Mercurial > public > mercurial-scm > hg-stable

diff mercurial/dagop.py @ 32921:27932a76a88d
dagop: split module hosting DAG-related algorithms from revset This module hosts the following functions. They are somewhat similar (e.g. scanning revisions using heap queue or stack) and seem non-trivial in algorithmic point of view. - _revancestors() - _revdescendants() - reachableroots() - _toposort() I was thinking of adding revset._fileancestors() generator for better follow() implementation, but it would be called from context.py as well. So I decided to create new module. Naming is hard. I couldn't come up with any better module name, so it's called "dag operation" now. I rejected the following candidates: - ancestor.py - existing, revlog-level DAG algorithm - ancestorset.py - doesn't always return a set - dagalgorithm.py - hard to type - dagutil.py - existing - revancestor.py - I want to add fileancestors() % wc -l mercurial/dagop.py mercurial/revset.py 339 mercurial/dagop.py 2020 mercurial/revset.py 2359 total
author: Yuya Nishihara <yuya@tcha.org>
date: Sun, 16 Oct 2016 18:03:24 +0900
parents: mercurial/revset.py@8e02829bec61
children: 582080a4a812
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/dagop.py	Sun Oct 16 18:03:24 2016 +0900
@@ -0,0 +1,339 @@
+# dagop.py - graph ancestry and topology algorithm for revset
+#
+# Copyright 2010 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import heapq
+
+from . import (
+    error,
+    node,
+    smartset,
+)
+
+baseset = smartset.baseset
+generatorset = smartset.generatorset
+
+def revancestors(repo, revs, followfirst):
+    """Like revlog.ancestors(), but supports followfirst."""
+    if followfirst:
+        cut = 1
+    else:
+        cut = None
+    cl = repo.changelog
+
+    def iterate():
+        revs.sort(reverse=True)
+        irevs = iter(revs)
+        h = []
+
+        inputrev = next(irevs, None)
+        if inputrev is not None:
+            heapq.heappush(h, -inputrev)
+
+        seen = set()
+        while h:
+            current = -heapq.heappop(h)
+            if current == inputrev:
+                inputrev = next(irevs, None)
+                if inputrev is not None:
+                    heapq.heappush(h, -inputrev)
+            if current not in seen:
+                seen.add(current)
+                yield current
+                try:
+                    for parent in cl.parentrevs(current)[:cut]:
+                        if parent != node.nullrev:
+                            heapq.heappush(h, -parent)
+                except error.WdirUnsupported:
+                    for parent in repo[current].parents()[:cut]:
+                        if parent.rev() != node.nullrev:
+                            heapq.heappush(h, -parent.rev())
+
+    return generatorset(iterate(), iterasc=False)
+
+def revdescendants(repo, revs, followfirst):
+    """Like revlog.descendants() but supports followfirst."""
+    if followfirst:
+        cut = 1
+    else:
+        cut = None
+
+    def iterate():
+        cl = repo.changelog
+        # XXX this should be 'parentset.min()' assuming 'parentset' is a
+        # smartset (and if it is not, it should.)
+        first = min(revs)
+        nullrev = node.nullrev
+        if first == nullrev:
+            # Are there nodes with a null first parent and a non-null
+            # second one? Maybe. Do we care? Probably not.
+            for i in cl:
+                yield i
+        else:
+            seen = set(revs)
+            for i in cl.revs(first + 1):
+                for x in cl.parentrevs(i)[:cut]:
+                    if x != nullrev and x in seen:
+                        seen.add(i)
+                        yield i
+                        break
+
+    return generatorset(iterate(), iterasc=True)
+
+def _reachablerootspure(repo, minroot, roots, heads, includepath):
+    """return (heads(::<roots> and ::<heads>))
+
+    If includepath is True, return (<roots>::<heads>)."""
+    if not roots:
+        return []
+    parentrevs = repo.changelog.parentrevs
+    roots = set(roots)
+    visit = list(heads)
+    reachable = set()
+    seen = {}
+    # prefetch all the things! (because python is slow)
+    reached = reachable.add
+    dovisit = visit.append
+    nextvisit = visit.pop
+    # open-code the post-order traversal due to the tiny size of
+    # sys.getrecursionlimit()
+    while visit:
+        rev = nextvisit()
+        if rev in roots:
+            reached(rev)
+            if not includepath:
+                continue
+        parents = parentrevs(rev)
+        seen[rev] = parents
+        for parent in parents:
+            if parent >= minroot and parent not in seen:
+                dovisit(parent)
+    if not reachable:
+        return baseset()
+    if not includepath:
+        return reachable
+    for rev in sorted(seen):
+        for parent in seen[rev]:
+            if parent in reachable:
+                reached(rev)
+    return reachable
+
+def reachableroots(repo, roots, heads, includepath=False):
+    """return (heads(::<roots> and ::<heads>))
+
+    If includepath is True, return (<roots>::<heads>)."""
+    if not roots:
+        return baseset()
+    minroot = roots.min()
+    roots = list(roots)
+    heads = list(heads)
+    try:
+        revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
+    except AttributeError:
+        revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
+    revs = baseset(revs)
+    revs.sort()
+    return revs
+
+def toposort(revs, parentsfunc, firstbranch=()):
+    """Yield revisions from heads to roots one (topo) branch at a time.
+
+    This function aims to be used by a graph generator that wishes to minimize
+    the number of parallel branches and their interleaving.
+
+    Example iteration order (numbers show the "true" order in a changelog):
+
+      o  4
+      |
+      o  1
+      |
+      | o  3
+      | |
+      | o  2
+      |/
+      o  0
+
+    Note that the ancestors of merges are understood by the current
+    algorithm to be on the same branch. This means no reordering will
+    occur behind a merge.
+    """
+
+    ### Quick summary of the algorithm
+    #
+    # This function is based around a "retention" principle. We keep revisions
+    # in memory until we are ready to emit a whole branch that immediately
+    # "merges" into an existing one. This reduces the number of parallel
+    # branches with interleaved revisions.
+    #
+    # During iteration revs are split into two groups:
+    # A) revision already emitted
+    # B) revision in "retention". They are stored as different subgroups.
+    #
+    # for each REV, we do the following logic:
+    #
+    #   1) if REV is a parent of (A), we will emit it. If there is a
+    #   retention group ((B) above) that is blocked on REV being
+    #   available, we emit all the revisions out of that retention
+    #   group first.
+    #
+    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
+    #   available, if such subgroup exist, we add REV to it and the subgroup is
+    #   now awaiting for REV.parents() to be available.
+    #
+    #   3) finally if no such group existed in (B), we create a new subgroup.
+    #
+    #
+    # To bootstrap the algorithm, we emit the tipmost revision (which
+    # puts it in group (A) from above).
+
+    revs.sort(reverse=True)
+
+    # Set of parents of revision that have been emitted. They can be considered
+    # unblocked as the graph generator is already aware of them so there is no
+    # need to delay the revisions that reference them.
+    #
+    # If someone wants to prioritize a branch over the others, pre-filling this
+    # set will force all other branches to wait until this branch is ready to be
+    # emitted.
+    unblocked = set(firstbranch)
+
+    # list of groups waiting to be displayed, each group is defined by:
+    #
+    #   (revs:    lists of revs waiting to be displayed,
+    #    blocked: set of that cannot be displayed before those in 'revs')
+    #
+    # The second value ('blocked') correspond to parents of any revision in the
+    # group ('revs') that is not itself contained in the group. The main idea
+    # of this algorithm is to delay as much as possible the emission of any
+    # revision.  This means waiting for the moment we are about to display
+    # these parents to display the revs in a group.
+    #
+    # This first implementation is smart until it encounters a merge: it will
+    # emit revs as soon as any parent is about to be emitted and can grow an
+    # arbitrary number of revs in 'blocked'. In practice this mean we properly
+    # retains new branches but gives up on any special ordering for ancestors
+    # of merges. The implementation can be improved to handle this better.
+    #
+    # The first subgroup is special. It corresponds to all the revision that
+    # were already emitted. The 'revs' lists is expected to be empty and the
+    # 'blocked' set contains the parents revisions of already emitted revision.
+    #
+    # You could pre-seed the <parents> set of groups[0] to a specific
+    # changesets to select what the first emitted branch should be.
+    groups = [([], unblocked)]
+    pendingheap = []
+    pendingset = set()
+
+    heapq.heapify(pendingheap)
+    heappop = heapq.heappop
+    heappush = heapq.heappush
+    for currentrev in revs:
+        # Heap works with smallest element, we want highest so we invert
+        if currentrev not in pendingset:
+            heappush(pendingheap, -currentrev)
+            pendingset.add(currentrev)
+        # iterates on pending rev until after the current rev have been
+        # processed.
+        rev = None
+        while rev != currentrev:
+            rev = -heappop(pendingheap)
+            pendingset.remove(rev)
+
+            # Seek for a subgroup blocked, waiting for the current revision.
+            matching = [i for i, g in enumerate(groups) if rev in g[1]]
+
+            if matching:
+                # The main idea is to gather together all sets that are blocked
+                # on the same revision.
+                #
+                # Groups are merged when a common blocking ancestor is
+                # observed. For example, given two groups:
+                #
+                # revs [5, 4] waiting for 1
+                # revs [3, 2] waiting for 1
+                #
+                # These two groups will be merged when we process
+                # 1. In theory, we could have merged the groups when
+                # we added 2 to the group it is now in (we could have
+                # noticed the groups were both blocked on 1 then), but
+                # the way it works now makes the algorithm simpler.
+                #
+                # We also always keep the oldest subgroup first. We can
+                # probably improve the behavior by having the longest set
+                # first. That way, graph algorithms could minimise the length
+                # of parallel lines their drawing. This is currently not done.
+                targetidx = matching.pop(0)
+                trevs, tparents = groups[targetidx]
+                for i in matching:
+                    gr = groups[i]
+                    trevs.extend(gr[0])
+                    tparents |= gr[1]
+                # delete all merged subgroups (except the one we kept)
+                # (starting from the last subgroup for performance and
+                # sanity reasons)
+                for i in reversed(matching):
+                    del groups[i]
+            else:
+                # This is a new head. We create a new subgroup for it.
+                targetidx = len(groups)
+                groups.append(([], {rev}))
+
+            gr = groups[targetidx]
+
+            # We now add the current nodes to this subgroups. This is done
+            # after the subgroup merging because all elements from a subgroup
+            # that relied on this rev must precede it.
+            #
+            # we also update the <parents> set to include the parents of the
+            # new nodes.
+            if rev == currentrev: # only display stuff in rev
+                gr[0].append(rev)
+            gr[1].remove(rev)
+            parents = [p for p in parentsfunc(rev) if p > node.nullrev]
+            gr[1].update(parents)
+            for p in parents:
+                if p not in pendingset:
+                    pendingset.add(p)
+                    heappush(pendingheap, -p)
+
+            # Look for a subgroup to display
+            #
+            # When unblocked is empty (if clause), we were not waiting for any
+            # revisions during the first iteration (if no priority was given) or
+            # if we emitted a whole disconnected set of the graph (reached a
+            # root).  In that case we arbitrarily take the oldest known
+            # subgroup. The heuristic could probably be better.
+            #
+            # Otherwise (elif clause) if the subgroup is blocked on
+            # a revision we just emitted, we can safely emit it as
+            # well.
+            if not unblocked:
+                if len(groups) > 1:  # display other subset
+                    targetidx = 1
+                    gr = groups[1]
+            elif not gr[1] & unblocked:
+                gr = None
+
+            if gr is not None:
+                # update the set of awaited revisions with the one from the
+                # subgroup
+                unblocked |= gr[1]
+                # output all revisions in the subgroup
+                for r in gr[0]:
+                    yield r
+                # delete the subgroup that you just output
+                # unless it is groups[0] in which case you just empty it.
+                if targetidx:
+                    del groups[targetidx]
+                else:
+                    gr[0][:] = []
+    # Check if we have some subgroup waiting for revisions we are not going to
+    # iterate over
+    for g in groups:
+        for r in g[0]:
+            yield r
author	Yuya Nishihara <yuya@tcha.org>
date	Sun, 16 Oct 2016 18:03:24 +0900
parents	mercurial/revset.py@8e02829bec61
children	582080a4a812