Mercurial > public > mercurial-scm > hg

diff mercurial/revlogutils/deltas.py @ 49657:f5f113f1b011
delta-find: add a way to control the number of bases tested at the same time See inline comment for details. The feature is currently disabled, but should be enabled by default to mitigate some existing pathological cases. Also see the next changeset for details.
author: Pierre-Yves David <pierre-yves.david@octobus.net>
date: Sun, 06 Nov 2022 14:47:17 -0500
parents: 4956942c0416
children: 5af4a0a73e4c
--- a/mercurial/revlogutils/deltas.py	Wed Nov 23 21:11:46 2022 -0500
+++ b/mercurial/revlogutils/deltas.py	Sun Nov 06 14:47:17 2022 -0500
@@ -680,6 +680,7 @@
     good = None
 
     deltas_limit = textlen * LIMIT_DELTA2TEXT
+    group_chunk_size = revlog._candidate_group_chunk_size
 
     tested = {nullrev}
     candidates = _refinedgroups(
@@ -770,11 +771,30 @@
 
             group.append(rev)
         if group:
-            # XXX: in the sparse revlog case, group can become large,
-            #      impacting performances. Some bounding or slicing mecanism
-            #      would help to reduce this impact.
-            tested.update(group)
-            good = yield tuple(group)
+            # When the size of the candidate group is big, it can result in a
+            # quite significant performance impact. To reduce this, we can send
+            # them in smaller batches until the new batch does not provide any
+            # improvements.
+            #
+            # This might reduce the overall efficiency of the compression in
+            # some corner cases, but that should also prevent very pathological
+            # cases from being an issue. (eg. 20 000 candidates).
+            #
+            # XXX note that the ordering of the group becomes important as it
+            # now impacts the final result. The current order is unprocessed
+            # and can be improved.
+            if group_chunk_size == 0:
+                tested.update(group)
+                good = yield tuple(group)
+            else:
+                prev_good = good
+                for start in range(0, len(group), group_chunk_size):
+                    sub_group = group[start : start + group_chunk_size]
+                    tested.update(sub_group)
+                    good = yield tuple(sub_group)
+                    if prev_good == good:
+                        break
+
     yield None
author	Pierre-Yves David <pierre-yves.david@octobus.net>
date	Sun, 06 Nov 2022 14:47:17 -0500
parents	4956942c0416
children	5af4a0a73e4c