Mercurial > public > mercurial-scm > hg-stable
diff mercurial/revlogutils/deltas.py @ 50694:a41eeb877d07
branching: merge with stable
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sun, 18 Jun 2023 00:09:39 +0200 |
parents | e77ca247b85b |
children | f1b57672cb94 |
line wrap: on
line diff
--- a/mercurial/revlogutils/deltas.py Tue Jun 20 02:36:52 2023 +0200 +++ b/mercurial/revlogutils/deltas.py Sun Jun 18 00:09:39 2023 +0200 @@ -1087,10 +1087,17 @@ ): self.revlog = revlog self._write_debug = write_debug - self._debug_search = debug_search + if write_debug is None: + self._debug_search = False + else: + self._debug_search = debug_search self._debug_info = debug_info self._snapshot_cache = SnapshotCache() + @property + def _gather_debug(self): + return self._write_debug is not None or self._debug_info is not None + def buildtext(self, revinfo, fh): """Builds a fulltext version of a revision @@ -1136,7 +1143,6 @@ def _builddeltainfo(self, revinfo, base, fh, target_rev=None): # can we use the cached delta? revlog = self.revlog - debug_search = self._write_debug is not None and self._debug_search chainbase = revlog.chainbase(base) if revlog._generaldelta: deltabase = base @@ -1173,7 +1179,7 @@ delta = revinfo.cachedelta[1] if delta is None: delta = self._builddeltadiff(base, revinfo, fh) - if debug_search: + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n" msg %= len(delta) self._write_debug(msg) @@ -1181,17 +1187,17 @@ if revlog.upperboundcomp is not None and snapshotdepth: lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp snapshotlimit = revinfo.textlen >> snapshotdepth - if debug_search: + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n" msg %= lowestrealisticdeltalen self._write_debug(msg) if snapshotlimit < lowestrealisticdeltalen: - if debug_search: + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n" self._write_debug(msg) return None if revlog.length(base) < lowestrealisticdeltalen: - if debug_search: + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n" self._write_debug(msg) return None @@ -1253,41 +1259,34 @@ if target_rev is None: target_rev = len(self.revlog) - if not revinfo.textlen: - return self._fullsnapshotinfo(fh, revinfo, target_rev) + gather_debug = self._gather_debug + cachedelta = revinfo.cachedelta + revlog = self.revlog + p1r = p2r = None if excluded_bases is None: excluded_bases = set() - # no delta for flag processor revision (see "candelta" for why) - # not calling candelta since only one revision needs test, also to - # avoid overhead fetching flags again. - if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS: - return self._fullsnapshotinfo(fh, revinfo, target_rev) - - gather_debug = ( - self._write_debug is not None or self._debug_info is not None - ) - debug_search = self._write_debug is not None and self._debug_search - if gather_debug: start = util.timer() - - # count the number of different delta we tried (for debug purpose) - dbg_try_count = 0 - # count the number of "search round" we did. (for debug purpose) - dbg_try_rounds = 0 - dbg_type = b'unknown' - - cachedelta = revinfo.cachedelta - p1 = revinfo.p1 - p2 = revinfo.p2 - revlog = self.revlog - - deltainfo = None - p1r, p2r = revlog.rev(p1), revlog.rev(p2) - - if gather_debug: + dbg = self._one_dbg_data() + dbg['revision'] = target_rev + target_revlog = b"UNKNOWN" + target_type = self.revlog.target[0] + target_key = self.revlog.target[1] + if target_type == KIND_CHANGELOG: + target_revlog = b'CHANGELOG:' + elif target_type == KIND_MANIFESTLOG: + target_revlog = b'MANIFESTLOG:' + if target_key: + target_revlog += b'%s:' % target_key + elif target_type == KIND_FILELOG: + target_revlog = b'FILELOG:' + if target_key: + target_revlog += b'%s:' % target_key + dbg['target-revlog'] = target_revlog + p1r = revlog.rev(revinfo.p1) + p2r = revlog.rev(revinfo.p2) if p1r != nullrev: p1_chain_len = revlog._chaininfo(p1r)[0] else: @@ -1296,7 +1295,109 @@ p2_chain_len = revlog._chaininfo(p2r)[0] else: p2_chain_len = -1 - if debug_search: + dbg['p1-chain-len'] = p1_chain_len + dbg['p2-chain-len'] = p2_chain_len + + # 1) if the revision is empty, no amount of delta can beat it + # + # 2) no delta for flag processor revision (see "candelta" for why) + # not calling candelta since only one revision needs test, also to + # avoid overhead fetching flags again. + if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS: + deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev) + if gather_debug: + end = util.timer() + dbg['duration'] = end - start + dbg[ + 'delta-base' + ] = deltainfo.base # pytype: disable=attribute-error + dbg['search_round_count'] = 0 + dbg['using-cached-base'] = False + dbg['delta_try_count'] = 0 + dbg['type'] = b"full" + dbg['snapshot-depth'] = 0 + self._dbg_process_data(dbg) + return deltainfo + + deltainfo = None + + # If this source delta are to be forcibly reuse, let us comply early. + if ( + revlog._generaldelta + and revinfo.cachedelta is not None + and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE + ): + base = revinfo.cachedelta[0] + if base == nullrev: + dbg_type = b"full" + deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev) + if gather_debug: + snapshotdepth = 0 + elif base not in excluded_bases: + delta = revinfo.cachedelta[1] + header, data = revlog.compress(delta) + deltalen = len(header) + len(data) + if gather_debug: + offset = revlog.end(len(revlog) - 1) + chainbase = revlog.chainbase(base) + distance = deltalen + offset - revlog.start(chainbase) + chainlen, compresseddeltalen = revlog._chaininfo(base) + chainlen += 1 + compresseddeltalen += deltalen + if base == p1r or base == p2r: + dbg_type = b"delta" + snapshotdepth = None + elif not revlog.issnapshot(base): + snapshotdepth = None + else: + dbg_type = b"snapshot" + snapshotdepth = revlog.snapshotdepth(base) + 1 + else: + distance = None + chainbase = None + chainlen = None + compresseddeltalen = None + snapshotdepth = None + deltainfo = _deltainfo( + distance=distance, + deltalen=deltalen, + data=(header, data), + base=base, + chainbase=chainbase, + chainlen=chainlen, + compresseddeltalen=compresseddeltalen, + snapshotdepth=snapshotdepth, + ) + + if deltainfo is not None: + if gather_debug: + end = util.timer() + dbg['duration'] = end - start + dbg[ + 'delta-base' + ] = deltainfo.base # pytype: disable=attribute-error + dbg['search_round_count'] = 0 + dbg['using-cached-base'] = True + dbg['delta_try_count'] = 0 + dbg['type'] = b"full" + if snapshotdepth is None: + dbg['snapshot-depth'] = 0 + else: + dbg['snapshot-depth'] = snapshotdepth + self._dbg_process_data(dbg) + return deltainfo + + # count the number of different delta we tried (for debug purpose) + dbg_try_count = 0 + # count the number of "search round" we did. (for debug purpose) + dbg_try_rounds = 0 + dbg_type = b'unknown' + + if p1r is None: + p1r = revlog.rev(revinfo.p1) + p2r = revlog.rev(revinfo.p2) + + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n" msg %= target_rev self._write_debug(msg) @@ -1314,7 +1415,7 @@ candidaterevs = next(groups) while candidaterevs is not None: dbg_try_rounds += 1 - if debug_search: + if self._debug_search: prev = None if deltainfo is not None: prev = deltainfo.base @@ -1325,7 +1426,7 @@ and cachedelta[0] in candidaterevs ): round_type = b"cached-delta" - elif p1 in candidaterevs or p2 in candidaterevs: + elif p1r in candidaterevs or p2r in candidaterevs: round_type = b"parents" elif prev is not None and all(c < prev for c in candidaterevs): round_type = b"refine-down" @@ -1338,7 +1439,7 @@ self._write_debug(msg) nominateddeltas = [] if deltainfo is not None: - if debug_search: + if self._debug_search: msg = ( b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n" ) @@ -1348,14 +1449,14 @@ # challenge it against refined candidates nominateddeltas.append(deltainfo) for candidaterev in candidaterevs: - if debug_search: + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n" msg %= candidaterev self._write_debug(msg) candidate_type = None - if candidaterev == p1: + if candidaterev == p1r: candidate_type = b"p1" - elif candidaterev == p2: + elif candidaterev == p2r: candidate_type = b"p2" elif self.revlog.issnapshot(candidaterev): candidate_type = b"snapshot-%d" @@ -1376,7 +1477,7 @@ dbg_try_count += 1 - if debug_search: + if self._debug_search: delta_start = util.timer() candidatedelta = self._builddeltainfo( revinfo, @@ -1384,23 +1485,23 @@ fh, target_rev=target_rev, ) - if debug_search: + if self._debug_search: delta_end = util.timer() msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n" msg %= delta_end - delta_start self._write_debug(msg) if candidatedelta is not None: if is_good_delta_info(self.revlog, candidatedelta, revinfo): - if debug_search: + if self._debug_search: msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n" msg %= candidatedelta.deltalen self._write_debug(msg) nominateddeltas.append(candidatedelta) - elif debug_search: + elif self._debug_search: msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n" msg %= candidatedelta.deltalen self._write_debug(msg) - elif debug_search: + elif self._debug_search: msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n" self._write_debug(msg) if nominateddeltas: @@ -1434,17 +1535,14 @@ and dbg_try_count == 1 and deltainfo.base == cachedelta[0] ) - dbg = { - 'duration': end - start, - 'revision': target_rev, - 'delta-base': deltainfo.base, # pytype: disable=attribute-error - 'search_round_count': dbg_try_rounds, - 'using-cached-base': used_cached, - 'delta_try_count': dbg_try_count, - 'type': dbg_type, - 'p1-chain-len': p1_chain_len, - 'p2-chain-len': p2_chain_len, - } + dbg['duration'] = end - start + dbg[ + 'delta-base' + ] = deltainfo.base # pytype: disable=attribute-error + dbg['search_round_count'] = dbg_try_rounds + dbg['using-cached-base'] = used_cached + dbg['delta_try_count'] = dbg_try_count + dbg['type'] = dbg_type if ( deltainfo.snapshotdepth # pytype: disable=attribute-error is not None @@ -1454,55 +1552,58 @@ ] = deltainfo.snapshotdepth # pytype: disable=attribute-error else: dbg['snapshot-depth'] = 0 - target_revlog = b"UNKNOWN" - target_type = self.revlog.target[0] - target_key = self.revlog.target[1] - if target_type == KIND_CHANGELOG: - target_revlog = b'CHANGELOG:' - elif target_type == KIND_MANIFESTLOG: - target_revlog = b'MANIFESTLOG:' - if target_key: - target_revlog += b'%s:' % target_key - elif target_type == KIND_FILELOG: - target_revlog = b'FILELOG:' - if target_key: - target_revlog += b'%s:' % target_key - dbg['target-revlog'] = target_revlog + self._dbg_process_data(dbg) + return deltainfo - if self._debug_info is not None: - self._debug_info.append(dbg) + def _one_dbg_data(self): + return { + 'duration': None, + 'revision': None, + 'delta-base': None, + 'search_round_count': None, + 'using-cached-base': None, + 'delta_try_count': None, + 'type': None, + 'p1-chain-len': None, + 'p2-chain-len': None, + 'snapshot-depth': None, + 'target-revlog': None, + } + + def _dbg_process_data(self, dbg): + if self._debug_info is not None: + self._debug_info.append(dbg) - if self._write_debug is not None: - msg = ( - b"DBG-DELTAS:" - b" %-12s" - b" rev=%d:" - b" delta-base=%d" - b" is-cached=%d" - b" - search-rounds=%d" - b" try-count=%d" - b" - delta-type=%-6s" - b" snap-depth=%d" - b" - p1-chain-length=%d" - b" p2-chain-length=%d" - b" - duration=%f" - b"\n" - ) - msg %= ( - dbg["target-revlog"], - dbg["revision"], - dbg["delta-base"], - dbg["using-cached-base"], - dbg["search_round_count"], - dbg["delta_try_count"], - dbg["type"], - dbg["snapshot-depth"], - dbg["p1-chain-len"], - dbg["p2-chain-len"], - dbg["duration"], - ) - self._write_debug(msg) - return deltainfo + if self._write_debug is not None: + msg = ( + b"DBG-DELTAS:" + b" %-12s" + b" rev=%d:" + b" delta-base=%d" + b" is-cached=%d" + b" - search-rounds=%d" + b" try-count=%d" + b" - delta-type=%-6s" + b" snap-depth=%d" + b" - p1-chain-length=%d" + b" p2-chain-length=%d" + b" - duration=%f" + b"\n" + ) + msg %= ( + dbg["target-revlog"], + dbg["revision"], + dbg["delta-base"], + dbg["using-cached-base"], + dbg["search_round_count"], + dbg["delta_try_count"], + dbg["type"], + dbg["snapshot-depth"], + dbg["p1-chain-len"], + dbg["p2-chain-len"], + dbg["duration"], + ) + self._write_debug(msg) def delta_compression(default_compression_header, deltainfo):