Mercurial > public > mercurial-scm > hg
view mercurial/revlogutils/debug.py @ 49661:511106bcb16c
debug-revlog: details about non-ancestors delta-bases
Deltas against a base that is not an ancestor of the revision that owns this
delta are notable.
For example, they introduce complexity during the bundling process as the base
might not exist on the unbundling side.
We detect them in `hg debugrevlog` and print information about them.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 07 Nov 2022 14:38:52 -0500 |
parents | bd3b6f363fb9 |
children | 7aea9babac5d |
line wrap: on
line source
# revlogutils/debug.py - utility used for revlog debuging # # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> # Copyright 2022 Octobus <contact@octobus.net> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import collections import string from .. import ( node as nodemod, util, ) from . import ( constants, ) INDEX_ENTRY_DEBUG_COLUMN = [] NODE_SIZE = object() class _column_base: """constains the definition of a revlog column name: the column header, value_func: the function called to get a value, size: the width of the column, verbose_only: only include the column in verbose mode. """ def __init__(self, name, value_func, size=None, verbose=False): self.name = name self.value_func = value_func if size is not NODE_SIZE: if size is None: size = 8 # arbitrary default size = max(len(name), size) self._size = size self.verbose_only = verbose def get_size(self, node_size): if self._size is NODE_SIZE: return node_size else: return self._size def debug_column(name, size=None, verbose=False): """decorated function is registered as a column name: the name of the column, size: the expected size of the column. """ def register(func): entry = _column_base( name=name, value_func=func, size=size, verbose=verbose, ) INDEX_ENTRY_DEBUG_COLUMN.append(entry) return entry return register @debug_column(b"rev", size=6) def _rev(index, rev, entry, hexfn): return b"%d" % rev @debug_column(b"rank", size=6, verbose=True) def rank(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_RANK] @debug_column(b"linkrev", size=6) def _linkrev(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_LINK_REV] @debug_column(b"nodeid", size=NODE_SIZE) def _nodeid(index, rev, entry, hexfn): return hexfn(entry[constants.ENTRY_NODE_ID]) @debug_column(b"p1-rev", size=6, verbose=True) def _p1_rev(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_PARENT_1] @debug_column(b"p1-nodeid", size=NODE_SIZE) def _p1_node(index, rev, entry, hexfn): parent = entry[constants.ENTRY_PARENT_1] p_entry = index[parent] return hexfn(p_entry[constants.ENTRY_NODE_ID]) @debug_column(b"p2-rev", size=6, verbose=True) def _p2_rev(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_PARENT_2] @debug_column(b"p2-nodeid", size=NODE_SIZE) def _p2_node(index, rev, entry, hexfn): parent = entry[constants.ENTRY_PARENT_2] p_entry = index[parent] return hexfn(p_entry[constants.ENTRY_NODE_ID]) @debug_column(b"full-size", size=20, verbose=True) def full_size(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH] @debug_column(b"delta-base", size=6, verbose=True) def delta_base(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DELTA_BASE] @debug_column(b"flags", size=2, verbose=True) def flags(index, rev, entry, hexfn): field = entry[constants.ENTRY_DATA_OFFSET] field &= 0xFFFF return b"%d" % field @debug_column(b"comp-mode", size=4, verbose=True) def compression_mode(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE] @debug_column(b"data-offset", size=20, verbose=True) def data_offset(index, rev, entry, hexfn): field = entry[constants.ENTRY_DATA_OFFSET] field >>= 16 return b"%d" % field @debug_column(b"chunk-size", size=10, verbose=True) def data_chunk_size(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH] @debug_column(b"sd-comp-mode", size=7, verbose=True) def sidedata_compression_mode(index, rev, entry, hexfn): compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE] if compression == constants.COMP_MODE_PLAIN: return b"plain" elif compression == constants.COMP_MODE_DEFAULT: return b"default" elif compression == constants.COMP_MODE_INLINE: return b"inline" else: return b"%d" % compression @debug_column(b"sidedata-offset", size=20, verbose=True) def sidedata_offset(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET] @debug_column(b"sd-chunk-size", size=10, verbose=True) def sidedata_chunk_size(index, rev, entry, hexfn): return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH] def debug_index( ui, repo, formatter, revlog, full_node, ): """display index data for a revlog""" if full_node: hexfn = nodemod.hex else: hexfn = nodemod.short idlen = 12 for i in revlog: idlen = len(hexfn(revlog.node(i))) break fm = formatter header_pieces = [] for column in INDEX_ENTRY_DEBUG_COLUMN: if column.verbose_only and not ui.verbose: continue size = column.get_size(idlen) name = column.name header_pieces.append(name.rjust(size)) fm.plain(b' '.join(header_pieces) + b'\n') index = revlog.index for rev in revlog: fm.startitem() entry = index[rev] first = True for column in INDEX_ENTRY_DEBUG_COLUMN: if column.verbose_only and not ui.verbose: continue if not first: fm.plain(b' ') first = False size = column.get_size(idlen) value = column.value_func(index, rev, entry, hexfn) display = b"%%%ds" % size fm.write(column.name, display, value) fm.plain(b'\n') fm.end() def dump(ui, revlog): """perform the work for `hg debugrevlog --dump""" # XXX seems redundant with debug index ? r = revlog numrevs = len(r) ui.write( ( b"# rev p1rev p2rev start end deltastart base p1 p2" b" rawsize totalsize compression heads chainlen\n" ) ) ts = 0 heads = set() for rev in range(numrevs): dbase = r.deltaparent(rev) if dbase == -1: dbase = rev cbase = r.chainbase(rev) clen = r.chainlen(rev) p1, p2 = r.parentrevs(rev) rs = r.rawsize(rev) ts = ts + rs heads -= set(r.parentrevs(rev)) heads.add(rev) try: compression = ts / r.end(rev) except ZeroDivisionError: compression = 0 ui.write( b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d " b"%11d %5d %8d\n" % ( rev, p1, p2, r.start(rev), r.end(rev), r.start(dbase), r.start(cbase), r.start(p1), r.start(p2), rs, ts, compression, len(heads), clen, ) ) def debug_revlog(ui, revlog): """code for `hg debugrevlog`""" r = revlog format = r._format_version v = r._format_flags flags = [] gdelta = False if v & constants.FLAG_INLINE_DATA: flags.append(b'inline') if v & constants.FLAG_GENERALDELTA: gdelta = True flags.append(b'generaldelta') if not flags: flags = [b'(none)'] ### tracks merge vs single parent nummerges = 0 ### tracks ways the "delta" are build # nodelta numempty = 0 numemptytext = 0 numemptydelta = 0 # full file content numfull = 0 # intermediate snapshot against a prior snapshot numsemi = 0 # snapshot count per depth numsnapdepth = collections.defaultdict(lambda: 0) # number of snapshots with a non-ancestor delta numsnapdepth_nad = collections.defaultdict(lambda: 0) # delta against previous revision numprev = 0 # delta against prev, where prev is a non-ancestor numprev_nad = 0 # delta against first or second parent (not prev) nump1 = 0 nump2 = 0 # delta against neither prev nor parents numother = 0 # delta against other that is a non-ancestor numother_nad = 0 # delta against prev that are also first or second parent # (details of `numprev`) nump1prev = 0 nump2prev = 0 # data about delta chain of each revs chainlengths = [] chainbases = [] chainspans = [] # data about each revision datasize = [None, 0, 0] fullsize = [None, 0, 0] semisize = [None, 0, 0] # snapshot count per depth snapsizedepth = collections.defaultdict(lambda: [None, 0, 0]) deltasize = [None, 0, 0] chunktypecounts = {} chunktypesizes = {} def addsize(size, l): if l[0] is None or size < l[0]: l[0] = size if size > l[1]: l[1] = size l[2] += size numrevs = len(r) for rev in range(numrevs): p1, p2 = r.parentrevs(rev) delta = r.deltaparent(rev) if format > 0: addsize(r.rawsize(rev), datasize) if p2 != nodemod.nullrev: nummerges += 1 size = r.length(rev) if delta == nodemod.nullrev: chainlengths.append(0) chainbases.append(r.start(rev)) chainspans.append(size) if size == 0: numempty += 1 numemptytext += 1 else: numfull += 1 numsnapdepth[0] += 1 addsize(size, fullsize) addsize(size, snapsizedepth[0]) else: nad = ( delta != p1 and delta != p2 and not r.isancestorrev(delta, rev) ) chainlengths.append(chainlengths[delta] + 1) baseaddr = chainbases[delta] revaddr = r.start(rev) chainbases.append(baseaddr) chainspans.append((revaddr - baseaddr) + size) if size == 0: numempty += 1 numemptydelta += 1 elif r.issnapshot(rev): addsize(size, semisize) numsemi += 1 depth = r.snapshotdepth(rev) numsnapdepth[depth] += 1 if nad: numsnapdepth_nad[depth] += 1 addsize(size, snapsizedepth[depth]) else: addsize(size, deltasize) if delta == rev - 1: numprev += 1 if delta == p1: nump1prev += 1 elif delta == p2: nump2prev += 1 elif nad: numprev_nad += 1 elif delta == p1: nump1 += 1 elif delta == p2: nump2 += 1 elif delta != nodemod.nullrev: numother += 1 numother_nad += 1 # Obtain data on the raw chunks in the revlog. if util.safehasattr(r, '_getsegmentforrevs'): segment = r._getsegmentforrevs(rev, rev)[1] else: segment = r._revlog._getsegmentforrevs(rev, rev)[1] if segment: chunktype = bytes(segment[0:1]) else: chunktype = b'empty' if chunktype not in chunktypecounts: chunktypecounts[chunktype] = 0 chunktypesizes[chunktype] = 0 chunktypecounts[chunktype] += 1 chunktypesizes[chunktype] += size # Adjust size min value for empty cases for size in (datasize, fullsize, semisize, deltasize): if size[0] is None: size[0] = 0 numdeltas = numrevs - numfull - numempty - numsemi numoprev = numprev - nump1prev - nump2prev - numprev_nad num_other_ancestors = numother - numother_nad totalrawsize = datasize[2] datasize[2] /= numrevs fulltotal = fullsize[2] if numfull == 0: fullsize[2] = 0 else: fullsize[2] /= numfull semitotal = semisize[2] snaptotal = {} if numsemi > 0: semisize[2] /= numsemi for depth in snapsizedepth: snaptotal[depth] = snapsizedepth[depth][2] snapsizedepth[depth][2] /= numsnapdepth[depth] deltatotal = deltasize[2] if numdeltas > 0: deltasize[2] /= numdeltas totalsize = fulltotal + semitotal + deltatotal avgchainlen = sum(chainlengths) / numrevs maxchainlen = max(chainlengths) maxchainspan = max(chainspans) compratio = 1 if totalsize: compratio = totalrawsize / totalsize basedfmtstr = b'%%%dd\n' basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n' def dfmtstr(max): return basedfmtstr % len(str(max)) def pcfmtstr(max, padding=0): return basepcfmtstr % (len(str(max)), b' ' * padding) def pcfmt(value, total): if total: return (value, 100 * float(value) / total) else: return value, 100.0 ui.writenoi18n(b'format : %d\n' % format) ui.writenoi18n(b'flags : %s\n' % b', '.join(flags)) ui.write(b'\n') fmt = pcfmtstr(totalsize) fmt2 = dfmtstr(totalsize) ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs)) ui.writenoi18n( b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs) ) ui.writenoi18n(b'revisions : ' + fmt2 % numrevs) ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs)) ui.writenoi18n( b' text : ' + fmt % pcfmt(numemptytext, numemptytext + numemptydelta) ) ui.writenoi18n( b' delta : ' + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta) ) ui.writenoi18n( b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs) ) for depth in sorted(numsnapdepth): base = b' lvl-%-3d : ' % depth count = fmt % pcfmt(numsnapdepth[depth], numrevs) pieces = [base, count] if numsnapdepth_nad[depth]: pieces[-1] = count = count[:-1] # drop the final '\n' more = b' non-ancestor-bases: ' anc_count = fmt anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth]) pieces.append(more) pieces.append(anc_count) ui.write(b''.join(pieces)) ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs)) ui.writenoi18n(b'revision size : ' + fmt2 % totalsize) ui.writenoi18n( b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize) ) for depth in sorted(numsnapdepth): ui.write( (b' lvl-%-3d : ' % depth) + fmt % pcfmt(snaptotal[depth], totalsize) ) ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize)) letters = string.ascii_letters.encode('ascii') def fmtchunktype(chunktype): if chunktype == b'empty': return b' %s : ' % chunktype elif chunktype in letters: return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype) else: return b' 0x%s : ' % nodemod.hex(chunktype) ui.write(b'\n') ui.writenoi18n(b'chunks : ' + fmt2 % numrevs) for chunktype in sorted(chunktypecounts): ui.write(fmtchunktype(chunktype)) ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs)) ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize) for chunktype in sorted(chunktypecounts): ui.write(fmtchunktype(chunktype)) ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize)) ui.write(b'\n') fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio)) ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen) ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen) ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan) ui.writenoi18n(b'compression ratio : ' + fmt % compratio) if format > 0: ui.write(b'\n') ui.writenoi18n( b'uncompressed data size (min/max/avg) : %d / %d / %d\n' % tuple(datasize) ) ui.writenoi18n( b'full revision size (min/max/avg) : %d / %d / %d\n' % tuple(fullsize) ) ui.writenoi18n( b'inter-snapshot size (min/max/avg) : %d / %d / %d\n' % tuple(semisize) ) for depth in sorted(snapsizedepth): if depth == 0: continue ui.writenoi18n( b' level-%-3d (min/max/avg) : %d / %d / %d\n' % ((depth,) + tuple(snapsizedepth[depth])) ) ui.writenoi18n( b'delta size (min/max/avg) : %d / %d / %d\n' % tuple(deltasize) ) if numdeltas > 0: ui.write(b'\n') fmt = pcfmtstr(numdeltas) fmt2 = pcfmtstr(numdeltas, 4) ui.writenoi18n( b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas) ) if numprev > 0: ui.writenoi18n( b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev) ) ui.writenoi18n( b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev) ) ui.writenoi18n( b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev) ) ui.writenoi18n( b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev) ) if gdelta: ui.writenoi18n( b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas) ) ui.writenoi18n( b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas) ) ui.writenoi18n( b'deltas against ancs : ' + fmt % pcfmt(num_other_ancestors, numdeltas) ) ui.writenoi18n( b'deltas against other : ' + fmt % pcfmt(numother_nad, numdeltas) )