diff mercurial/ancestor.py @ 18091:f7f8159caad3

ancestor: add lazy membership testing to lazyancestors This also makes the perfancestorset command use lazy membership testing. In a linear repository with over 400,000 commits, without this patch, hg perfancestorset takes 0.80 seconds no matter how far behind we're looking. With this patch, hg perfancestorset -- X takes: Rev X Time -1 0.00s -4000 0.01s -20000 0.04s -80000 0.17s -200000 0.43s -300000 0.69s 0 0.88s Thus, for revisions close to tip, we're up to several orders of magnitude faster. At 0 we're around 10% slower.
author Siddharth Agarwal <sid0@fb.com>
date Tue, 18 Dec 2012 12:47:20 -0800
parents 9abc55ef85b5
children 2f7186400a07
line wrap: on
line diff
--- a/mercurial/ancestor.py	Tue Dec 18 10:14:01 2012 -0800
+++ b/mercurial/ancestor.py	Tue Dec 18 12:47:20 2012 -0800
@@ -177,8 +177,8 @@
         """Create a new object generating ancestors for the given revs. Does
         not generate revs lower than stoprev.
 
-        This is computed lazily starting from revs. The object only supports
-        iteration.
+        This is computed lazily starting from revs. The object supports
+        iteration and membership.
 
         cl should be a changelog and revs should be an iterable. inclusive is
         a boolean that indicates whether revs should be included. Revs lower
@@ -190,6 +190,19 @@
         self._stoprev = stoprev
         self._inclusive = inclusive
 
+        # Initialize data structures for __contains__.
+        # For __contains__, we use a heap rather than a deque because
+        # (a) it minimizes the number of parentrevs calls made
+        # (b) it makes the loop termination condition obvious
+        # Python's heap is a min-heap. Multiply all values by -1 to convert it
+        # into a max-heap.
+        self._containsvisit = [-rev for rev in revs]
+        heapq.heapify(self._containsvisit)
+        if inclusive:
+            self._containsseen = set(revs)
+        else:
+            self._containsseen = set()
+
     def __iter__(self):
         """Generate the ancestors of _initrevs in reverse topological order.
 
@@ -219,3 +232,33 @@
                     visit.append(parent)
                     seen.add(parent)
                     yield parent
+
+    def __contains__(self, target):
+        """Test whether target is an ancestor of self._initrevs."""
+        # Trying to do both __iter__ and __contains__ using the same visit
+        # heap and seen set is complex enough that it slows down both. Keep
+        # them separate.
+        seen = self._containsseen
+        if target in seen:
+            return True
+
+        parentrevs = self._parentrevs
+        visit = self._containsvisit
+        stoprev = self._stoprev
+        heappop = heapq.heappop
+        heappush = heapq.heappush
+
+        targetseen = False
+
+        while visit and -visit[0] > target and not targetseen:
+            for parent in parentrevs(-heappop(visit)):
+                if parent < stoprev or parent in seen:
+                    continue
+                # We need to make sure we push all parents into the heap so
+                # that we leave it in a consistent state for future calls.
+                heappush(visit, -parent)
+                seen.add(parent)
+                if parent == target:
+                    targetseen = True
+
+        return targetseen