diff mercurial/revlogutils/deltas.py @ 50694:a41eeb877d07

branching: merge with stable
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sun, 18 Jun 2023 00:09:39 +0200
parents e77ca247b85b
children f1b57672cb94
line wrap: on
line diff
--- a/mercurial/revlogutils/deltas.py	Tue Jun 20 02:36:52 2023 +0200
+++ b/mercurial/revlogutils/deltas.py	Sun Jun 18 00:09:39 2023 +0200
@@ -1087,10 +1087,17 @@
     ):
         self.revlog = revlog
         self._write_debug = write_debug
-        self._debug_search = debug_search
+        if write_debug is None:
+            self._debug_search = False
+        else:
+            self._debug_search = debug_search
         self._debug_info = debug_info
         self._snapshot_cache = SnapshotCache()
 
+    @property
+    def _gather_debug(self):
+        return self._write_debug is not None or self._debug_info is not None
+
     def buildtext(self, revinfo, fh):
         """Builds a fulltext version of a revision
 
@@ -1136,7 +1143,6 @@
     def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
         # can we use the cached delta?
         revlog = self.revlog
-        debug_search = self._write_debug is not None and self._debug_search
         chainbase = revlog.chainbase(base)
         if revlog._generaldelta:
             deltabase = base
@@ -1173,7 +1179,7 @@
                 delta = revinfo.cachedelta[1]
         if delta is None:
             delta = self._builddeltadiff(base, revinfo, fh)
-        if debug_search:
+        if self._debug_search:
             msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
             msg %= len(delta)
             self._write_debug(msg)
@@ -1181,17 +1187,17 @@
         if revlog.upperboundcomp is not None and snapshotdepth:
             lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
             snapshotlimit = revinfo.textlen >> snapshotdepth
-            if debug_search:
+            if self._debug_search:
                 msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                 msg %= lowestrealisticdeltalen
                 self._write_debug(msg)
             if snapshotlimit < lowestrealisticdeltalen:
-                if debug_search:
+                if self._debug_search:
                     msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                     self._write_debug(msg)
                 return None
             if revlog.length(base) < lowestrealisticdeltalen:
-                if debug_search:
+                if self._debug_search:
                     msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                     self._write_debug(msg)
                 return None
@@ -1253,41 +1259,34 @@
         if target_rev is None:
             target_rev = len(self.revlog)
 
-        if not revinfo.textlen:
-            return self._fullsnapshotinfo(fh, revinfo, target_rev)
+        gather_debug = self._gather_debug
+        cachedelta = revinfo.cachedelta
+        revlog = self.revlog
+        p1r = p2r = None
 
         if excluded_bases is None:
             excluded_bases = set()
 
-        # no delta for flag processor revision (see "candelta" for why)
-        # not calling candelta since only one revision needs test, also to
-        # avoid overhead fetching flags again.
-        if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
-            return self._fullsnapshotinfo(fh, revinfo, target_rev)
-
-        gather_debug = (
-            self._write_debug is not None or self._debug_info is not None
-        )
-        debug_search = self._write_debug is not None and self._debug_search
-
         if gather_debug:
             start = util.timer()
-
-        # count the number of different delta we tried (for debug purpose)
-        dbg_try_count = 0
-        # count the number of "search round" we did. (for debug purpose)
-        dbg_try_rounds = 0
-        dbg_type = b'unknown'
-
-        cachedelta = revinfo.cachedelta
-        p1 = revinfo.p1
-        p2 = revinfo.p2
-        revlog = self.revlog
-
-        deltainfo = None
-        p1r, p2r = revlog.rev(p1), revlog.rev(p2)
-
-        if gather_debug:
+            dbg = self._one_dbg_data()
+            dbg['revision'] = target_rev
+            target_revlog = b"UNKNOWN"
+            target_type = self.revlog.target[0]
+            target_key = self.revlog.target[1]
+            if target_type == KIND_CHANGELOG:
+                target_revlog = b'CHANGELOG:'
+            elif target_type == KIND_MANIFESTLOG:
+                target_revlog = b'MANIFESTLOG:'
+                if target_key:
+                    target_revlog += b'%s:' % target_key
+            elif target_type == KIND_FILELOG:
+                target_revlog = b'FILELOG:'
+                if target_key:
+                    target_revlog += b'%s:' % target_key
+            dbg['target-revlog'] = target_revlog
+            p1r = revlog.rev(revinfo.p1)
+            p2r = revlog.rev(revinfo.p2)
             if p1r != nullrev:
                 p1_chain_len = revlog._chaininfo(p1r)[0]
             else:
@@ -1296,7 +1295,109 @@
                 p2_chain_len = revlog._chaininfo(p2r)[0]
             else:
                 p2_chain_len = -1
-        if debug_search:
+            dbg['p1-chain-len'] = p1_chain_len
+            dbg['p2-chain-len'] = p2_chain_len
+
+        # 1) if the revision is empty, no amount of delta can beat it
+        #
+        # 2) no delta for flag processor revision (see "candelta" for why)
+        # not calling candelta since only one revision needs test, also to
+        # avoid overhead fetching flags again.
+        if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
+            deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
+            if gather_debug:
+                end = util.timer()
+                dbg['duration'] = end - start
+                dbg[
+                    'delta-base'
+                ] = deltainfo.base  # pytype: disable=attribute-error
+                dbg['search_round_count'] = 0
+                dbg['using-cached-base'] = False
+                dbg['delta_try_count'] = 0
+                dbg['type'] = b"full"
+                dbg['snapshot-depth'] = 0
+                self._dbg_process_data(dbg)
+            return deltainfo
+
+        deltainfo = None
+
+        # If this source delta are to be forcibly reuse, let us comply early.
+        if (
+            revlog._generaldelta
+            and revinfo.cachedelta is not None
+            and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
+        ):
+            base = revinfo.cachedelta[0]
+            if base == nullrev:
+                dbg_type = b"full"
+                deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
+                if gather_debug:
+                    snapshotdepth = 0
+            elif base not in excluded_bases:
+                delta = revinfo.cachedelta[1]
+                header, data = revlog.compress(delta)
+                deltalen = len(header) + len(data)
+                if gather_debug:
+                    offset = revlog.end(len(revlog) - 1)
+                    chainbase = revlog.chainbase(base)
+                    distance = deltalen + offset - revlog.start(chainbase)
+                    chainlen, compresseddeltalen = revlog._chaininfo(base)
+                    chainlen += 1
+                    compresseddeltalen += deltalen
+                    if base == p1r or base == p2r:
+                        dbg_type = b"delta"
+                        snapshotdepth = None
+                    elif not revlog.issnapshot(base):
+                        snapshotdepth = None
+                    else:
+                        dbg_type = b"snapshot"
+                        snapshotdepth = revlog.snapshotdepth(base) + 1
+                else:
+                    distance = None
+                    chainbase = None
+                    chainlen = None
+                    compresseddeltalen = None
+                    snapshotdepth = None
+                deltainfo = _deltainfo(
+                    distance=distance,
+                    deltalen=deltalen,
+                    data=(header, data),
+                    base=base,
+                    chainbase=chainbase,
+                    chainlen=chainlen,
+                    compresseddeltalen=compresseddeltalen,
+                    snapshotdepth=snapshotdepth,
+                )
+
+            if deltainfo is not None:
+                if gather_debug:
+                    end = util.timer()
+                    dbg['duration'] = end - start
+                    dbg[
+                        'delta-base'
+                    ] = deltainfo.base  # pytype: disable=attribute-error
+                    dbg['search_round_count'] = 0
+                    dbg['using-cached-base'] = True
+                    dbg['delta_try_count'] = 0
+                    dbg['type'] = b"full"
+                    if snapshotdepth is None:
+                        dbg['snapshot-depth'] = 0
+                    else:
+                        dbg['snapshot-depth'] = snapshotdepth
+                    self._dbg_process_data(dbg)
+                return deltainfo
+
+        # count the number of different delta we tried (for debug purpose)
+        dbg_try_count = 0
+        # count the number of "search round" we did. (for debug purpose)
+        dbg_try_rounds = 0
+        dbg_type = b'unknown'
+
+        if p1r is None:
+            p1r = revlog.rev(revinfo.p1)
+            p2r = revlog.rev(revinfo.p2)
+
+        if self._debug_search:
             msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
             msg %= target_rev
             self._write_debug(msg)
@@ -1314,7 +1415,7 @@
         candidaterevs = next(groups)
         while candidaterevs is not None:
             dbg_try_rounds += 1
-            if debug_search:
+            if self._debug_search:
                 prev = None
                 if deltainfo is not None:
                     prev = deltainfo.base
@@ -1325,7 +1426,7 @@
                     and cachedelta[0] in candidaterevs
                 ):
                     round_type = b"cached-delta"
-                elif p1 in candidaterevs or p2 in candidaterevs:
+                elif p1r in candidaterevs or p2r in candidaterevs:
                     round_type = b"parents"
                 elif prev is not None and all(c < prev for c in candidaterevs):
                     round_type = b"refine-down"
@@ -1338,7 +1439,7 @@
                 self._write_debug(msg)
             nominateddeltas = []
             if deltainfo is not None:
-                if debug_search:
+                if self._debug_search:
                     msg = (
                         b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                     )
@@ -1348,14 +1449,14 @@
                 # challenge it against refined candidates
                 nominateddeltas.append(deltainfo)
             for candidaterev in candidaterevs:
-                if debug_search:
+                if self._debug_search:
                     msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                     msg %= candidaterev
                     self._write_debug(msg)
                     candidate_type = None
-                    if candidaterev == p1:
+                    if candidaterev == p1r:
                         candidate_type = b"p1"
-                    elif candidaterev == p2:
+                    elif candidaterev == p2r:
                         candidate_type = b"p2"
                     elif self.revlog.issnapshot(candidaterev):
                         candidate_type = b"snapshot-%d"
@@ -1376,7 +1477,7 @@
 
                 dbg_try_count += 1
 
-                if debug_search:
+                if self._debug_search:
                     delta_start = util.timer()
                 candidatedelta = self._builddeltainfo(
                     revinfo,
@@ -1384,23 +1485,23 @@
                     fh,
                     target_rev=target_rev,
                 )
-                if debug_search:
+                if self._debug_search:
                     delta_end = util.timer()
                     msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                     msg %= delta_end - delta_start
                     self._write_debug(msg)
                 if candidatedelta is not None:
                     if is_good_delta_info(self.revlog, candidatedelta, revinfo):
-                        if debug_search:
+                        if self._debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                             msg %= candidatedelta.deltalen
                             self._write_debug(msg)
                         nominateddeltas.append(candidatedelta)
-                    elif debug_search:
+                    elif self._debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                         msg %= candidatedelta.deltalen
                         self._write_debug(msg)
-                elif debug_search:
+                elif self._debug_search:
                     msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                     self._write_debug(msg)
             if nominateddeltas:
@@ -1434,17 +1535,14 @@
                     and dbg_try_count == 1
                     and deltainfo.base == cachedelta[0]
                 )
-            dbg = {
-                'duration': end - start,
-                'revision': target_rev,
-                'delta-base': deltainfo.base,  # pytype: disable=attribute-error
-                'search_round_count': dbg_try_rounds,
-                'using-cached-base': used_cached,
-                'delta_try_count': dbg_try_count,
-                'type': dbg_type,
-                'p1-chain-len': p1_chain_len,
-                'p2-chain-len': p2_chain_len,
-            }
+            dbg['duration'] = end - start
+            dbg[
+                'delta-base'
+            ] = deltainfo.base  # pytype: disable=attribute-error
+            dbg['search_round_count'] = dbg_try_rounds
+            dbg['using-cached-base'] = used_cached
+            dbg['delta_try_count'] = dbg_try_count
+            dbg['type'] = dbg_type
             if (
                 deltainfo.snapshotdepth  # pytype: disable=attribute-error
                 is not None
@@ -1454,55 +1552,58 @@
                 ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
             else:
                 dbg['snapshot-depth'] = 0
-            target_revlog = b"UNKNOWN"
-            target_type = self.revlog.target[0]
-            target_key = self.revlog.target[1]
-            if target_type == KIND_CHANGELOG:
-                target_revlog = b'CHANGELOG:'
-            elif target_type == KIND_MANIFESTLOG:
-                target_revlog = b'MANIFESTLOG:'
-                if target_key:
-                    target_revlog += b'%s:' % target_key
-            elif target_type == KIND_FILELOG:
-                target_revlog = b'FILELOG:'
-                if target_key:
-                    target_revlog += b'%s:' % target_key
-            dbg['target-revlog'] = target_revlog
+            self._dbg_process_data(dbg)
+        return deltainfo
 
-            if self._debug_info is not None:
-                self._debug_info.append(dbg)
+    def _one_dbg_data(self):
+        return {
+            'duration': None,
+            'revision': None,
+            'delta-base': None,
+            'search_round_count': None,
+            'using-cached-base': None,
+            'delta_try_count': None,
+            'type': None,
+            'p1-chain-len': None,
+            'p2-chain-len': None,
+            'snapshot-depth': None,
+            'target-revlog': None,
+        }
+
+    def _dbg_process_data(self, dbg):
+        if self._debug_info is not None:
+            self._debug_info.append(dbg)
 
-            if self._write_debug is not None:
-                msg = (
-                    b"DBG-DELTAS:"
-                    b" %-12s"
-                    b" rev=%d:"
-                    b" delta-base=%d"
-                    b" is-cached=%d"
-                    b" - search-rounds=%d"
-                    b" try-count=%d"
-                    b" - delta-type=%-6s"
-                    b" snap-depth=%d"
-                    b" - p1-chain-length=%d"
-                    b" p2-chain-length=%d"
-                    b" - duration=%f"
-                    b"\n"
-                )
-                msg %= (
-                    dbg["target-revlog"],
-                    dbg["revision"],
-                    dbg["delta-base"],
-                    dbg["using-cached-base"],
-                    dbg["search_round_count"],
-                    dbg["delta_try_count"],
-                    dbg["type"],
-                    dbg["snapshot-depth"],
-                    dbg["p1-chain-len"],
-                    dbg["p2-chain-len"],
-                    dbg["duration"],
-                )
-                self._write_debug(msg)
-        return deltainfo
+        if self._write_debug is not None:
+            msg = (
+                b"DBG-DELTAS:"
+                b" %-12s"
+                b" rev=%d:"
+                b" delta-base=%d"
+                b" is-cached=%d"
+                b" - search-rounds=%d"
+                b" try-count=%d"
+                b" - delta-type=%-6s"
+                b" snap-depth=%d"
+                b" - p1-chain-length=%d"
+                b" p2-chain-length=%d"
+                b" - duration=%f"
+                b"\n"
+            )
+            msg %= (
+                dbg["target-revlog"],
+                dbg["revision"],
+                dbg["delta-base"],
+                dbg["using-cached-base"],
+                dbg["search_round_count"],
+                dbg["delta_try_count"],
+                dbg["type"],
+                dbg["snapshot-depth"],
+                dbg["p1-chain-len"],
+                dbg["p2-chain-len"],
+                dbg["duration"],
+            )
+            self._write_debug(msg)
 
 
 def delta_compression(default_compression_header, deltainfo):