diff mercurial/revlogutils/deltas.py @ 39522:c6b8eab5db19

snapshot: also consider the snapshot chain of one unrelated revision To maximize the chance of good delta chain reuse, we inject an unrelated delta chain into our search. To do so, we search for the highest revision unrelated to the parents of the current revision and use its snapshot chain too. Adding this extra snapshot into the mix can have a performance impact. We'll deal with performance impact in a later series.
author Boris Feld <boris.feld@octobus.net>
date Fri, 07 Sep 2018 11:18:45 -0400
parents 05a165dc4f55
children bdb41eaa8b59
line wrap: on
line diff
--- a/mercurial/revlogutils/deltas.py	Fri Sep 07 11:17:37 2018 -0400
+++ b/mercurial/revlogutils/deltas.py	Fri Sep 07 11:18:45 2018 -0400
@@ -719,6 +719,36 @@
                 parents_snaps[idx].add(s)
         snapfloor = min(parents_snaps[0]) + 1
         _findsnapshots(revlog, snapshots, snapfloor)
+        # search for the highest "unrelated" revision
+        #
+        # Adding snapshots used by "unrelated" revision increase the odd we
+        # reuse an independant, yet better snapshot chain.
+        #
+        # XXX instead of building a set of revisions, we could lazily enumerate
+        # over the chains. That would be more efficient, however we stick to
+        # simple code for now.
+        all_revs = set()
+        for chain in candidate_chains:
+            all_revs.update(chain)
+        other = None
+        for r in revlog.revs(prev, snapfloor):
+            if r not in all_revs:
+                other = r
+                break
+        if other is not None:
+            # To avoid unfair competition, we won't use unrelated intermediate
+            # snapshot that are deeper than the ones from the parent delta
+            # chain.
+            max_depth = max(parents_snaps.keys())
+            chain = deltachain(other)
+            for idx, s in enumerate(chain):
+                if s < snapfloor:
+                    continue
+                if max_depth < idx:
+                    break
+                if not revlog.issnapshot(s):
+                    break
+                parents_snaps[idx].add(s)
         # Test them as possible intermediate snapshot base
         # We test them from highest to lowest level. High level one are more
         # likely to result in small delta
@@ -756,9 +786,10 @@
         # more and more snapshot as the repository grow.
         yield tuple(snapshots[nullrev])
 
-    # other approach failed try against prev to hopefully save us a
-    # fulltext.
-    yield (prev,)
+    if not sparse:
+        # other approach failed try against prev to hopefully save us a
+        # fulltext.
+        yield (prev,)
 
 class deltacomputer(object):
     def __init__(self, revlog):