diff mercurial/obsutil.py @ 33142:4f49810a1011

obsutil: move 'successorssets' to the new modules We have a new 'obsutil' module now. We move this high level utility there to bring 'obsolete.py' back to a more reasonable size.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 27 Jun 2017 01:03:01 +0200
parents 1858fc2327ef
children d09ae850296d
line wrap: on
line diff
--- a/mercurial/obsutil.py	Thu Jun 29 11:29:19 2017 -0700
+++ b/mercurial/obsutil.py	Tue Jun 27 01:03:01 2017 +0200
@@ -34,3 +34,208 @@
                 yield precnodeid
             else:
                 stack.append(precnodeid)
+
+def successorssets(repo, initialnode, cache=None):
+    """Return set of all latest successors of initial nodes
+
+    The successors set of a changeset A are the group of revisions that succeed
+    A. It succeeds A as a consistent whole, each revision being only a partial
+    replacement. The successors set contains non-obsolete changesets only.
+
+    This function returns the full list of successor sets which is why it
+    returns a list of tuples and not just a single tuple. Each tuple is a valid
+    successors set. Note that (A,) may be a valid successors set for changeset A
+    (see below).
+
+    In most cases, a changeset A will have a single element (e.g. the changeset
+    A is replaced by A') in its successors set. Though, it is also common for a
+    changeset A to have no elements in its successor set (e.g. the changeset
+    has been pruned). Therefore, the returned list of successors sets will be
+    [(A',)] or [], respectively.
+
+    When a changeset A is split into A' and B', however, it will result in a
+    successors set containing more than a single element, i.e. [(A',B')].
+    Divergent changesets will result in multiple successors sets, i.e. [(A',),
+    (A'')].
+
+    If a changeset A is not obsolete, then it will conceptually have no
+    successors set. To distinguish this from a pruned changeset, the successor
+    set will contain itself only, i.e. [(A,)].
+
+    Finally, successors unknown locally are considered to be pruned (obsoleted
+    without any successors).
+
+    The optional `cache` parameter is a dictionary that may contain precomputed
+    successors sets. It is meant to reuse the computation of a previous call to
+    `successorssets` when multiple calls are made at the same time. The cache
+    dictionary is updated in place. The caller is responsible for its life
+    span. Code that makes multiple calls to `successorssets` *must* use this
+    cache mechanism or suffer terrible performance.
+    """
+
+    succmarkers = repo.obsstore.successors
+
+    # Stack of nodes we search successors sets for
+    toproceed = [initialnode]
+    # set version of above list for fast loop detection
+    # element added to "toproceed" must be added here
+    stackedset = set(toproceed)
+    if cache is None:
+        cache = {}
+
+    # This while loop is the flattened version of a recursive search for
+    # successors sets
+    #
+    # def successorssets(x):
+    #    successors = directsuccessors(x)
+    #    ss = [[]]
+    #    for succ in directsuccessors(x):
+    #        # product as in itertools cartesian product
+    #        ss = product(ss, successorssets(succ))
+    #    return ss
+    #
+    # But we can not use plain recursive calls here:
+    # - that would blow the python call stack
+    # - obsolescence markers may have cycles, we need to handle them.
+    #
+    # The `toproceed` list act as our call stack. Every node we search
+    # successors set for are stacked there.
+    #
+    # The `stackedset` is set version of this stack used to check if a node is
+    # already stacked. This check is used to detect cycles and prevent infinite
+    # loop.
+    #
+    # successors set of all nodes are stored in the `cache` dictionary.
+    #
+    # After this while loop ends we use the cache to return the successors sets
+    # for the node requested by the caller.
+    while toproceed:
+        # Every iteration tries to compute the successors sets of the topmost
+        # node of the stack: CURRENT.
+        #
+        # There are four possible outcomes:
+        #
+        # 1) We already know the successors sets of CURRENT:
+        #    -> mission accomplished, pop it from the stack.
+        # 2) Node is not obsolete:
+        #    -> the node is its own successors sets. Add it to the cache.
+        # 3) We do not know successors set of direct successors of CURRENT:
+        #    -> We add those successors to the stack.
+        # 4) We know successors sets of all direct successors of CURRENT:
+        #    -> We can compute CURRENT successors set and add it to the
+        #       cache.
+        #
+        current = toproceed[-1]
+        if current in cache:
+            # case (1): We already know the successors sets
+            stackedset.remove(toproceed.pop())
+        elif current not in succmarkers:
+            # case (2): The node is not obsolete.
+            if current in repo:
+                # We have a valid last successors.
+                cache[current] = [(current,)]
+            else:
+                # Final obsolete version is unknown locally.
+                # Do not count that as a valid successors
+                cache[current] = []
+        else:
+            # cases (3) and (4)
+            #
+            # We proceed in two phases. Phase 1 aims to distinguish case (3)
+            # from case (4):
+            #
+            #     For each direct successors of CURRENT, we check whether its
+            #     successors sets are known. If they are not, we stack the
+            #     unknown node and proceed to the next iteration of the while
+            #     loop. (case 3)
+            #
+            #     During this step, we may detect obsolescence cycles: a node
+            #     with unknown successors sets but already in the call stack.
+            #     In such a situation, we arbitrary set the successors sets of
+            #     the node to nothing (node pruned) to break the cycle.
+            #
+            #     If no break was encountered we proceed to phase 2.
+            #
+            # Phase 2 computes successors sets of CURRENT (case 4); see details
+            # in phase 2 itself.
+            #
+            # Note the two levels of iteration in each phase.
+            # - The first one handles obsolescence markers using CURRENT as
+            #   precursor (successors markers of CURRENT).
+            #
+            #   Having multiple entry here means divergence.
+            #
+            # - The second one handles successors defined in each marker.
+            #
+            #   Having none means pruned node, multiple successors means split,
+            #   single successors are standard replacement.
+            #
+            for mark in sorted(succmarkers[current]):
+                for suc in mark[1]:
+                    if suc not in cache:
+                        if suc in stackedset:
+                            # cycle breaking
+                            cache[suc] = []
+                        else:
+                            # case (3) If we have not computed successors sets
+                            # of one of those successors we add it to the
+                            # `toproceed` stack and stop all work for this
+                            # iteration.
+                            toproceed.append(suc)
+                            stackedset.add(suc)
+                            break
+                else:
+                    continue
+                break
+            else:
+                # case (4): we know all successors sets of all direct
+                # successors
+                #
+                # Successors set contributed by each marker depends on the
+                # successors sets of all its "successors" node.
+                #
+                # Each different marker is a divergence in the obsolescence
+                # history. It contributes successors sets distinct from other
+                # markers.
+                #
+                # Within a marker, a successor may have divergent successors
+                # sets. In such a case, the marker will contribute multiple
+                # divergent successors sets. If multiple successors have
+                # divergent successors sets, a Cartesian product is used.
+                #
+                # At the end we post-process successors sets to remove
+                # duplicated entry and successors set that are strict subset of
+                # another one.
+                succssets = []
+                for mark in sorted(succmarkers[current]):
+                    # successors sets contributed by this marker
+                    markss = [[]]
+                    for suc in mark[1]:
+                        # cardinal product with previous successors
+                        productresult = []
+                        for prefix in markss:
+                            for suffix in cache[suc]:
+                                newss = list(prefix)
+                                for part in suffix:
+                                    # do not duplicated entry in successors set
+                                    # first entry wins.
+                                    if part not in newss:
+                                        newss.append(part)
+                                productresult.append(newss)
+                        markss = productresult
+                    succssets.extend(markss)
+                # remove duplicated and subset
+                seen = []
+                final = []
+                candidate = sorted(((set(s), s) for s in succssets if s),
+                                   key=lambda x: len(x[1]), reverse=True)
+                for setversion, listversion in candidate:
+                    for seenset in seen:
+                        if setversion.issubset(seenset):
+                            break
+                    else:
+                        final.append(listversion)
+                        seen.append(setversion)
+                final.reverse() # put small successors set first
+                cache[current] = final
+    return cache[initialnode]