mercurial/dagop.py
changeset 32903 27932a76a88d
parent 32885 8e02829bec61
child 32904 582080a4a812
equal deleted inserted replaced
32902:642feee29d70 32903:27932a76a88d
       
     1 # dagop.py - graph ancestry and topology algorithm for revset
       
     2 #
       
     3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 from __future__ import absolute_import
       
     9 
       
    10 import heapq
       
    11 
       
    12 from . import (
       
    13     error,
       
    14     node,
       
    15     smartset,
       
    16 )
       
    17 
       
    18 baseset = smartset.baseset
       
    19 generatorset = smartset.generatorset
       
    20 
       
    21 def revancestors(repo, revs, followfirst):
       
    22     """Like revlog.ancestors(), but supports followfirst."""
       
    23     if followfirst:
       
    24         cut = 1
       
    25     else:
       
    26         cut = None
       
    27     cl = repo.changelog
       
    28 
       
    29     def iterate():
       
    30         revs.sort(reverse=True)
       
    31         irevs = iter(revs)
       
    32         h = []
       
    33 
       
    34         inputrev = next(irevs, None)
       
    35         if inputrev is not None:
       
    36             heapq.heappush(h, -inputrev)
       
    37 
       
    38         seen = set()
       
    39         while h:
       
    40             current = -heapq.heappop(h)
       
    41             if current == inputrev:
       
    42                 inputrev = next(irevs, None)
       
    43                 if inputrev is not None:
       
    44                     heapq.heappush(h, -inputrev)
       
    45             if current not in seen:
       
    46                 seen.add(current)
       
    47                 yield current
       
    48                 try:
       
    49                     for parent in cl.parentrevs(current)[:cut]:
       
    50                         if parent != node.nullrev:
       
    51                             heapq.heappush(h, -parent)
       
    52                 except error.WdirUnsupported:
       
    53                     for parent in repo[current].parents()[:cut]:
       
    54                         if parent.rev() != node.nullrev:
       
    55                             heapq.heappush(h, -parent.rev())
       
    56 
       
    57     return generatorset(iterate(), iterasc=False)
       
    58 
       
    59 def revdescendants(repo, revs, followfirst):
       
    60     """Like revlog.descendants() but supports followfirst."""
       
    61     if followfirst:
       
    62         cut = 1
       
    63     else:
       
    64         cut = None
       
    65 
       
    66     def iterate():
       
    67         cl = repo.changelog
       
    68         # XXX this should be 'parentset.min()' assuming 'parentset' is a
       
    69         # smartset (and if it is not, it should.)
       
    70         first = min(revs)
       
    71         nullrev = node.nullrev
       
    72         if first == nullrev:
       
    73             # Are there nodes with a null first parent and a non-null
       
    74             # second one? Maybe. Do we care? Probably not.
       
    75             for i in cl:
       
    76                 yield i
       
    77         else:
       
    78             seen = set(revs)
       
    79             for i in cl.revs(first + 1):
       
    80                 for x in cl.parentrevs(i)[:cut]:
       
    81                     if x != nullrev and x in seen:
       
    82                         seen.add(i)
       
    83                         yield i
       
    84                         break
       
    85 
       
    86     return generatorset(iterate(), iterasc=True)
       
    87 
       
    88 def _reachablerootspure(repo, minroot, roots, heads, includepath):
       
    89     """return (heads(::<roots> and ::<heads>))
       
    90 
       
    91     If includepath is True, return (<roots>::<heads>)."""
       
    92     if not roots:
       
    93         return []
       
    94     parentrevs = repo.changelog.parentrevs
       
    95     roots = set(roots)
       
    96     visit = list(heads)
       
    97     reachable = set()
       
    98     seen = {}
       
    99     # prefetch all the things! (because python is slow)
       
   100     reached = reachable.add
       
   101     dovisit = visit.append
       
   102     nextvisit = visit.pop
       
   103     # open-code the post-order traversal due to the tiny size of
       
   104     # sys.getrecursionlimit()
       
   105     while visit:
       
   106         rev = nextvisit()
       
   107         if rev in roots:
       
   108             reached(rev)
       
   109             if not includepath:
       
   110                 continue
       
   111         parents = parentrevs(rev)
       
   112         seen[rev] = parents
       
   113         for parent in parents:
       
   114             if parent >= minroot and parent not in seen:
       
   115                 dovisit(parent)
       
   116     if not reachable:
       
   117         return baseset()
       
   118     if not includepath:
       
   119         return reachable
       
   120     for rev in sorted(seen):
       
   121         for parent in seen[rev]:
       
   122             if parent in reachable:
       
   123                 reached(rev)
       
   124     return reachable
       
   125 
       
   126 def reachableroots(repo, roots, heads, includepath=False):
       
   127     """return (heads(::<roots> and ::<heads>))
       
   128 
       
   129     If includepath is True, return (<roots>::<heads>)."""
       
   130     if not roots:
       
   131         return baseset()
       
   132     minroot = roots.min()
       
   133     roots = list(roots)
       
   134     heads = list(heads)
       
   135     try:
       
   136         revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
       
   137     except AttributeError:
       
   138         revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
       
   139     revs = baseset(revs)
       
   140     revs.sort()
       
   141     return revs
       
   142 
       
   143 def toposort(revs, parentsfunc, firstbranch=()):
       
   144     """Yield revisions from heads to roots one (topo) branch at a time.
       
   145 
       
   146     This function aims to be used by a graph generator that wishes to minimize
       
   147     the number of parallel branches and their interleaving.
       
   148 
       
   149     Example iteration order (numbers show the "true" order in a changelog):
       
   150 
       
   151       o  4
       
   152       |
       
   153       o  1
       
   154       |
       
   155       | o  3
       
   156       | |
       
   157       | o  2
       
   158       |/
       
   159       o  0
       
   160 
       
   161     Note that the ancestors of merges are understood by the current
       
   162     algorithm to be on the same branch. This means no reordering will
       
   163     occur behind a merge.
       
   164     """
       
   165 
       
   166     ### Quick summary of the algorithm
       
   167     #
       
   168     # This function is based around a "retention" principle. We keep revisions
       
   169     # in memory until we are ready to emit a whole branch that immediately
       
   170     # "merges" into an existing one. This reduces the number of parallel
       
   171     # branches with interleaved revisions.
       
   172     #
       
   173     # During iteration revs are split into two groups:
       
   174     # A) revision already emitted
       
   175     # B) revision in "retention". They are stored as different subgroups.
       
   176     #
       
   177     # for each REV, we do the following logic:
       
   178     #
       
   179     #   1) if REV is a parent of (A), we will emit it. If there is a
       
   180     #   retention group ((B) above) that is blocked on REV being
       
   181     #   available, we emit all the revisions out of that retention
       
   182     #   group first.
       
   183     #
       
   184     #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
       
   185     #   available, if such subgroup exist, we add REV to it and the subgroup is
       
   186     #   now awaiting for REV.parents() to be available.
       
   187     #
       
   188     #   3) finally if no such group existed in (B), we create a new subgroup.
       
   189     #
       
   190     #
       
   191     # To bootstrap the algorithm, we emit the tipmost revision (which
       
   192     # puts it in group (A) from above).
       
   193 
       
   194     revs.sort(reverse=True)
       
   195 
       
   196     # Set of parents of revision that have been emitted. They can be considered
       
   197     # unblocked as the graph generator is already aware of them so there is no
       
   198     # need to delay the revisions that reference them.
       
   199     #
       
   200     # If someone wants to prioritize a branch over the others, pre-filling this
       
   201     # set will force all other branches to wait until this branch is ready to be
       
   202     # emitted.
       
   203     unblocked = set(firstbranch)
       
   204 
       
   205     # list of groups waiting to be displayed, each group is defined by:
       
   206     #
       
   207     #   (revs:    lists of revs waiting to be displayed,
       
   208     #    blocked: set of that cannot be displayed before those in 'revs')
       
   209     #
       
   210     # The second value ('blocked') correspond to parents of any revision in the
       
   211     # group ('revs') that is not itself contained in the group. The main idea
       
   212     # of this algorithm is to delay as much as possible the emission of any
       
   213     # revision.  This means waiting for the moment we are about to display
       
   214     # these parents to display the revs in a group.
       
   215     #
       
   216     # This first implementation is smart until it encounters a merge: it will
       
   217     # emit revs as soon as any parent is about to be emitted and can grow an
       
   218     # arbitrary number of revs in 'blocked'. In practice this mean we properly
       
   219     # retains new branches but gives up on any special ordering for ancestors
       
   220     # of merges. The implementation can be improved to handle this better.
       
   221     #
       
   222     # The first subgroup is special. It corresponds to all the revision that
       
   223     # were already emitted. The 'revs' lists is expected to be empty and the
       
   224     # 'blocked' set contains the parents revisions of already emitted revision.
       
   225     #
       
   226     # You could pre-seed the <parents> set of groups[0] to a specific
       
   227     # changesets to select what the first emitted branch should be.
       
   228     groups = [([], unblocked)]
       
   229     pendingheap = []
       
   230     pendingset = set()
       
   231 
       
   232     heapq.heapify(pendingheap)
       
   233     heappop = heapq.heappop
       
   234     heappush = heapq.heappush
       
   235     for currentrev in revs:
       
   236         # Heap works with smallest element, we want highest so we invert
       
   237         if currentrev not in pendingset:
       
   238             heappush(pendingheap, -currentrev)
       
   239             pendingset.add(currentrev)
       
   240         # iterates on pending rev until after the current rev have been
       
   241         # processed.
       
   242         rev = None
       
   243         while rev != currentrev:
       
   244             rev = -heappop(pendingheap)
       
   245             pendingset.remove(rev)
       
   246 
       
   247             # Seek for a subgroup blocked, waiting for the current revision.
       
   248             matching = [i for i, g in enumerate(groups) if rev in g[1]]
       
   249 
       
   250             if matching:
       
   251                 # The main idea is to gather together all sets that are blocked
       
   252                 # on the same revision.
       
   253                 #
       
   254                 # Groups are merged when a common blocking ancestor is
       
   255                 # observed. For example, given two groups:
       
   256                 #
       
   257                 # revs [5, 4] waiting for 1
       
   258                 # revs [3, 2] waiting for 1
       
   259                 #
       
   260                 # These two groups will be merged when we process
       
   261                 # 1. In theory, we could have merged the groups when
       
   262                 # we added 2 to the group it is now in (we could have
       
   263                 # noticed the groups were both blocked on 1 then), but
       
   264                 # the way it works now makes the algorithm simpler.
       
   265                 #
       
   266                 # We also always keep the oldest subgroup first. We can
       
   267                 # probably improve the behavior by having the longest set
       
   268                 # first. That way, graph algorithms could minimise the length
       
   269                 # of parallel lines their drawing. This is currently not done.
       
   270                 targetidx = matching.pop(0)
       
   271                 trevs, tparents = groups[targetidx]
       
   272                 for i in matching:
       
   273                     gr = groups[i]
       
   274                     trevs.extend(gr[0])
       
   275                     tparents |= gr[1]
       
   276                 # delete all merged subgroups (except the one we kept)
       
   277                 # (starting from the last subgroup for performance and
       
   278                 # sanity reasons)
       
   279                 for i in reversed(matching):
       
   280                     del groups[i]
       
   281             else:
       
   282                 # This is a new head. We create a new subgroup for it.
       
   283                 targetidx = len(groups)
       
   284                 groups.append(([], {rev}))
       
   285 
       
   286             gr = groups[targetidx]
       
   287 
       
   288             # We now add the current nodes to this subgroups. This is done
       
   289             # after the subgroup merging because all elements from a subgroup
       
   290             # that relied on this rev must precede it.
       
   291             #
       
   292             # we also update the <parents> set to include the parents of the
       
   293             # new nodes.
       
   294             if rev == currentrev: # only display stuff in rev
       
   295                 gr[0].append(rev)
       
   296             gr[1].remove(rev)
       
   297             parents = [p for p in parentsfunc(rev) if p > node.nullrev]
       
   298             gr[1].update(parents)
       
   299             for p in parents:
       
   300                 if p not in pendingset:
       
   301                     pendingset.add(p)
       
   302                     heappush(pendingheap, -p)
       
   303 
       
   304             # Look for a subgroup to display
       
   305             #
       
   306             # When unblocked is empty (if clause), we were not waiting for any
       
   307             # revisions during the first iteration (if no priority was given) or
       
   308             # if we emitted a whole disconnected set of the graph (reached a
       
   309             # root).  In that case we arbitrarily take the oldest known
       
   310             # subgroup. The heuristic could probably be better.
       
   311             #
       
   312             # Otherwise (elif clause) if the subgroup is blocked on
       
   313             # a revision we just emitted, we can safely emit it as
       
   314             # well.
       
   315             if not unblocked:
       
   316                 if len(groups) > 1:  # display other subset
       
   317                     targetidx = 1
       
   318                     gr = groups[1]
       
   319             elif not gr[1] & unblocked:
       
   320                 gr = None
       
   321 
       
   322             if gr is not None:
       
   323                 # update the set of awaited revisions with the one from the
       
   324                 # subgroup
       
   325                 unblocked |= gr[1]
       
   326                 # output all revisions in the subgroup
       
   327                 for r in gr[0]:
       
   328                     yield r
       
   329                 # delete the subgroup that you just output
       
   330                 # unless it is groups[0] in which case you just empty it.
       
   331                 if targetidx:
       
   332                     del groups[targetidx]
       
   333                 else:
       
   334                     gr[0][:] = []
       
   335     # Check if we have some subgroup waiting for revisions we are not going to
       
   336     # iterate over
       
   337     for g in groups:
       
   338         for r in g[0]:
       
   339             yield r