Mercurial > public > mercurial-scm > hg-stable
diff mercurial/dagop.py @ 32921:27932a76a88d
dagop: split module hosting DAG-related algorithms from revset
This module hosts the following functions. They are somewhat similar (e.g.
scanning revisions using heap queue or stack) and seem non-trivial in
algorithmic point of view.
- _revancestors()
- _revdescendants()
- reachableroots()
- _toposort()
I was thinking of adding revset._fileancestors() generator for better follow()
implementation, but it would be called from context.py as well. So I decided
to create new module.
Naming is hard. I couldn't come up with any better module name, so it's called
"dag operation" now. I rejected the following candidates:
- ancestor.py - existing, revlog-level DAG algorithm
- ancestorset.py - doesn't always return a set
- dagalgorithm.py - hard to type
- dagutil.py - existing
- revancestor.py - I want to add fileancestors()
% wc -l mercurial/dagop.py mercurial/revset.py
339 mercurial/dagop.py
2020 mercurial/revset.py
2359 total
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 16 Oct 2016 18:03:24 +0900 |
parents | mercurial/revset.py@8e02829bec61 |
children | 582080a4a812 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/dagop.py Sun Oct 16 18:03:24 2016 +0900 @@ -0,0 +1,339 @@ +# dagop.py - graph ancestry and topology algorithm for revset +# +# Copyright 2010 Matt Mackall <mpm@selenic.com> +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +import heapq + +from . import ( + error, + node, + smartset, +) + +baseset = smartset.baseset +generatorset = smartset.generatorset + +def revancestors(repo, revs, followfirst): + """Like revlog.ancestors(), but supports followfirst.""" + if followfirst: + cut = 1 + else: + cut = None + cl = repo.changelog + + def iterate(): + revs.sort(reverse=True) + irevs = iter(revs) + h = [] + + inputrev = next(irevs, None) + if inputrev is not None: + heapq.heappush(h, -inputrev) + + seen = set() + while h: + current = -heapq.heappop(h) + if current == inputrev: + inputrev = next(irevs, None) + if inputrev is not None: + heapq.heappush(h, -inputrev) + if current not in seen: + seen.add(current) + yield current + try: + for parent in cl.parentrevs(current)[:cut]: + if parent != node.nullrev: + heapq.heappush(h, -parent) + except error.WdirUnsupported: + for parent in repo[current].parents()[:cut]: + if parent.rev() != node.nullrev: + heapq.heappush(h, -parent.rev()) + + return generatorset(iterate(), iterasc=False) + +def revdescendants(repo, revs, followfirst): + """Like revlog.descendants() but supports followfirst.""" + if followfirst: + cut = 1 + else: + cut = None + + def iterate(): + cl = repo.changelog + # XXX this should be 'parentset.min()' assuming 'parentset' is a + # smartset (and if it is not, it should.) + first = min(revs) + nullrev = node.nullrev + if first == nullrev: + # Are there nodes with a null first parent and a non-null + # second one? Maybe. Do we care? Probably not. + for i in cl: + yield i + else: + seen = set(revs) + for i in cl.revs(first + 1): + for x in cl.parentrevs(i)[:cut]: + if x != nullrev and x in seen: + seen.add(i) + yield i + break + + return generatorset(iterate(), iterasc=True) + +def _reachablerootspure(repo, minroot, roots, heads, includepath): + """return (heads(::<roots> and ::<heads>)) + + If includepath is True, return (<roots>::<heads>).""" + if not roots: + return [] + parentrevs = repo.changelog.parentrevs + roots = set(roots) + visit = list(heads) + reachable = set() + seen = {} + # prefetch all the things! (because python is slow) + reached = reachable.add + dovisit = visit.append + nextvisit = visit.pop + # open-code the post-order traversal due to the tiny size of + # sys.getrecursionlimit() + while visit: + rev = nextvisit() + if rev in roots: + reached(rev) + if not includepath: + continue + parents = parentrevs(rev) + seen[rev] = parents + for parent in parents: + if parent >= minroot and parent not in seen: + dovisit(parent) + if not reachable: + return baseset() + if not includepath: + return reachable + for rev in sorted(seen): + for parent in seen[rev]: + if parent in reachable: + reached(rev) + return reachable + +def reachableroots(repo, roots, heads, includepath=False): + """return (heads(::<roots> and ::<heads>)) + + If includepath is True, return (<roots>::<heads>).""" + if not roots: + return baseset() + minroot = roots.min() + roots = list(roots) + heads = list(heads) + try: + revs = repo.changelog.reachableroots(minroot, heads, roots, includepath) + except AttributeError: + revs = _reachablerootspure(repo, minroot, roots, heads, includepath) + revs = baseset(revs) + revs.sort() + return revs + +def toposort(revs, parentsfunc, firstbranch=()): + """Yield revisions from heads to roots one (topo) branch at a time. + + This function aims to be used by a graph generator that wishes to minimize + the number of parallel branches and their interleaving. + + Example iteration order (numbers show the "true" order in a changelog): + + o 4 + | + o 1 + | + | o 3 + | | + | o 2 + |/ + o 0 + + Note that the ancestors of merges are understood by the current + algorithm to be on the same branch. This means no reordering will + occur behind a merge. + """ + + ### Quick summary of the algorithm + # + # This function is based around a "retention" principle. We keep revisions + # in memory until we are ready to emit a whole branch that immediately + # "merges" into an existing one. This reduces the number of parallel + # branches with interleaved revisions. + # + # During iteration revs are split into two groups: + # A) revision already emitted + # B) revision in "retention". They are stored as different subgroups. + # + # for each REV, we do the following logic: + # + # 1) if REV is a parent of (A), we will emit it. If there is a + # retention group ((B) above) that is blocked on REV being + # available, we emit all the revisions out of that retention + # group first. + # + # 2) else, we'll search for a subgroup in (B) awaiting for REV to be + # available, if such subgroup exist, we add REV to it and the subgroup is + # now awaiting for REV.parents() to be available. + # + # 3) finally if no such group existed in (B), we create a new subgroup. + # + # + # To bootstrap the algorithm, we emit the tipmost revision (which + # puts it in group (A) from above). + + revs.sort(reverse=True) + + # Set of parents of revision that have been emitted. They can be considered + # unblocked as the graph generator is already aware of them so there is no + # need to delay the revisions that reference them. + # + # If someone wants to prioritize a branch over the others, pre-filling this + # set will force all other branches to wait until this branch is ready to be + # emitted. + unblocked = set(firstbranch) + + # list of groups waiting to be displayed, each group is defined by: + # + # (revs: lists of revs waiting to be displayed, + # blocked: set of that cannot be displayed before those in 'revs') + # + # The second value ('blocked') correspond to parents of any revision in the + # group ('revs') that is not itself contained in the group. The main idea + # of this algorithm is to delay as much as possible the emission of any + # revision. This means waiting for the moment we are about to display + # these parents to display the revs in a group. + # + # This first implementation is smart until it encounters a merge: it will + # emit revs as soon as any parent is about to be emitted and can grow an + # arbitrary number of revs in 'blocked'. In practice this mean we properly + # retains new branches but gives up on any special ordering for ancestors + # of merges. The implementation can be improved to handle this better. + # + # The first subgroup is special. It corresponds to all the revision that + # were already emitted. The 'revs' lists is expected to be empty and the + # 'blocked' set contains the parents revisions of already emitted revision. + # + # You could pre-seed the <parents> set of groups[0] to a specific + # changesets to select what the first emitted branch should be. + groups = [([], unblocked)] + pendingheap = [] + pendingset = set() + + heapq.heapify(pendingheap) + heappop = heapq.heappop + heappush = heapq.heappush + for currentrev in revs: + # Heap works with smallest element, we want highest so we invert + if currentrev not in pendingset: + heappush(pendingheap, -currentrev) + pendingset.add(currentrev) + # iterates on pending rev until after the current rev have been + # processed. + rev = None + while rev != currentrev: + rev = -heappop(pendingheap) + pendingset.remove(rev) + + # Seek for a subgroup blocked, waiting for the current revision. + matching = [i for i, g in enumerate(groups) if rev in g[1]] + + if matching: + # The main idea is to gather together all sets that are blocked + # on the same revision. + # + # Groups are merged when a common blocking ancestor is + # observed. For example, given two groups: + # + # revs [5, 4] waiting for 1 + # revs [3, 2] waiting for 1 + # + # These two groups will be merged when we process + # 1. In theory, we could have merged the groups when + # we added 2 to the group it is now in (we could have + # noticed the groups were both blocked on 1 then), but + # the way it works now makes the algorithm simpler. + # + # We also always keep the oldest subgroup first. We can + # probably improve the behavior by having the longest set + # first. That way, graph algorithms could minimise the length + # of parallel lines their drawing. This is currently not done. + targetidx = matching.pop(0) + trevs, tparents = groups[targetidx] + for i in matching: + gr = groups[i] + trevs.extend(gr[0]) + tparents |= gr[1] + # delete all merged subgroups (except the one we kept) + # (starting from the last subgroup for performance and + # sanity reasons) + for i in reversed(matching): + del groups[i] + else: + # This is a new head. We create a new subgroup for it. + targetidx = len(groups) + groups.append(([], {rev})) + + gr = groups[targetidx] + + # We now add the current nodes to this subgroups. This is done + # after the subgroup merging because all elements from a subgroup + # that relied on this rev must precede it. + # + # we also update the <parents> set to include the parents of the + # new nodes. + if rev == currentrev: # only display stuff in rev + gr[0].append(rev) + gr[1].remove(rev) + parents = [p for p in parentsfunc(rev) if p > node.nullrev] + gr[1].update(parents) + for p in parents: + if p not in pendingset: + pendingset.add(p) + heappush(pendingheap, -p) + + # Look for a subgroup to display + # + # When unblocked is empty (if clause), we were not waiting for any + # revisions during the first iteration (if no priority was given) or + # if we emitted a whole disconnected set of the graph (reached a + # root). In that case we arbitrarily take the oldest known + # subgroup. The heuristic could probably be better. + # + # Otherwise (elif clause) if the subgroup is blocked on + # a revision we just emitted, we can safely emit it as + # well. + if not unblocked: + if len(groups) > 1: # display other subset + targetidx = 1 + gr = groups[1] + elif not gr[1] & unblocked: + gr = None + + if gr is not None: + # update the set of awaited revisions with the one from the + # subgroup + unblocked |= gr[1] + # output all revisions in the subgroup + for r in gr[0]: + yield r + # delete the subgroup that you just output + # unless it is groups[0] in which case you just empty it. + if targetidx: + del groups[targetidx] + else: + gr[0][:] = [] + # Check if we have some subgroup waiting for revisions we are not going to + # iterate over + for g in groups: + for r in g[0]: + yield r