Mercurial > public > mercurial-scm > hg-stable
diff mercurial/changegroup.py @ 49739:35d4c2124073
debug: add an option to display statistic about a unbundling operation
This will helps a lot to understand how the bundling decision have actually
impacted pull/unbundle on the other side.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Mon, 17 Oct 2022 22:19:43 +0200 |
parents | 9cac281eb9c0 |
children | 152d9c011bcd |
line wrap: on
line diff
--- a/mercurial/changegroup.py Tue Nov 15 16:25:23 2022 +0100 +++ b/mercurial/changegroup.py Mon Oct 17 22:19:43 2022 +0200 @@ -105,6 +105,164 @@ os.unlink(cleanup) +def _dbg_ubdl_line( + ui, + indent, + key, + base_value=None, + percentage_base=None, + percentage_key=None, +): + """Print one line of debug_unbundle_debug_info""" + line = b"DEBUG-UNBUNDLING: " + line += b' ' * (2 * indent) + key += b":" + padding = b'' + if base_value is not None: + assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH + line += key.ljust(_KEY_PART_WIDTH - (2 * indent)) + if isinstance(base_value, float): + line += b"%14.3f seconds" % base_value + else: + line += b"%10d" % base_value + padding = b' ' + else: + line += key + + if percentage_base is not None: + line += padding + padding = b'' + assert base_value is not None + percentage = base_value * 100 // percentage_base + if percentage_key is not None: + line += b" (%3d%% of %s)" % ( + percentage, + percentage_key, + ) + else: + line += b" (%3d%%)" % percentage + + line += b'\n' + ui.write_err(line) + + +def _sumf(items): + # python < 3.8 does not support a `start=0.0` argument to sum + # So we have to cheat a bit until we drop support for those version + if not items: + return 0.0 + return sum(items) + + +def display_unbundle_debug_info(ui, debug_info): + """display an unbundling report from debug information""" + cl_info = [] + mn_info = [] + fl_info = [] + _dispatch = [ + (b'CHANGELOG:', cl_info), + (b'MANIFESTLOG:', mn_info), + (b'FILELOG:', fl_info), + ] + for e in debug_info: + for prefix, info in _dispatch: + if e["target-revlog"].startswith(prefix): + info.append(e) + break + else: + assert False, 'unreachable' + each_info = [ + (b'changelog', cl_info), + (b'manifests', mn_info), + (b'files', fl_info), + ] + + # General Revision Countss + _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info)) + for key, info in each_info: + if not info: + continue + _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info)) + + # General Time spent + all_durations = [e['duration'] for e in debug_info] + all_durations.sort() + total_duration = _sumf(all_durations) + _dbg_ubdl_line(ui, 0, b'total-time', total_duration) + + for key, info in each_info: + if not info: + continue + durations = [e['duration'] for e in info] + durations.sort() + _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration) + + # Count and cache reuse per delta types + each_types = {} + for key, info in each_info: + each_types[key] = types = { + b'full': 0, + b'full-cached': 0, + b'snapshot': 0, + b'snapshot-cached': 0, + b'delta': 0, + b'delta-cached': 0, + b'unknown': 0, + b'unknown-cached': 0, + } + for e in info: + types[e['type']] += 1 + if e['using-cached-base']: + types[e['type'] + b'-cached'] += 1 + + EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown') + if debug_info: + _dbg_ubdl_line(ui, 0, b'type-count') + for key, info in each_info: + if info: + _dbg_ubdl_line(ui, 1, key) + t = each_types[key] + for tn in EXPECTED_TYPES: + if t[tn]: + tc = tn + b'-cached' + _dbg_ubdl_line(ui, 2, tn, t[tn]) + _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn]) + + # time perf delta types and reuse + each_type_time = {} + for key, info in each_info: + each_type_time[key] = t = { + b'full': [], + b'full-cached': [], + b'snapshot': [], + b'snapshot-cached': [], + b'delta': [], + b'delta-cached': [], + b'unknown': [], + b'unknown-cached': [], + } + for e in info: + t[e['type']].append(e['duration']) + if e['using-cached-base']: + t[e['type'] + b'-cached'].append(e['duration']) + for t_key, value in list(t.items()): + value.sort() + t[t_key] = _sumf(value) + + if debug_info: + _dbg_ubdl_line(ui, 0, b'type-time') + for key, info in each_info: + if info: + _dbg_ubdl_line(ui, 1, key) + t = each_type_time[key] + td = total_duration # to same space on next lines + for tn in EXPECTED_TYPES: + if t[tn]: + tc = tn + b'-cached' + _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total") + _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total") + + class cg1unpacker: """Unpacker for cg1 changegroup streams. @@ -254,7 +412,15 @@ pos = next yield closechunk() - def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None): + def _unpackmanifests( + self, + repo, + revmap, + trp, + prog, + addrevisioncb=None, + debug_info=None, + ): self.callback = prog.increment # no need to check for empty manifest group here: # if the result of the merge of 1 and 2 is the same in 3 and 4, @@ -263,7 +429,13 @@ self.manifestheader() deltas = self.deltaiter() storage = repo.manifestlog.getstorage(b'') - storage.addgroup(deltas, revmap, trp, addrevisioncb=addrevisioncb) + storage.addgroup( + deltas, + revmap, + trp, + addrevisioncb=addrevisioncb, + debug_info=debug_info, + ) prog.complete() self.callback = None @@ -292,6 +464,10 @@ """ repo = repo.unfiltered() + debug_info = None + if repo.ui.configbool(b'debug', b'unbundling-stats'): + debug_info = [] + # Only useful if we're adding sidedata categories. If both peers have # the same categories, then we simply don't do anything. adding_sidedata = ( @@ -366,6 +542,7 @@ alwayscache=True, addrevisioncb=onchangelog, duplicaterevisioncb=ondupchangelog, + debug_info=debug_info, ): repo.ui.develwarn( b'applied empty changelog from changegroup', @@ -413,6 +590,7 @@ trp, progress, addrevisioncb=on_manifest_rev, + debug_info=debug_info, ) needfiles = {} @@ -449,6 +627,7 @@ efiles, needfiles, addrevisioncb=on_filelog_rev, + debug_info=debug_info, ) if sidedata_helpers: @@ -567,6 +746,8 @@ b'changegroup-runhooks-%020i' % clstart, lambda tr: repo._afterlock(runhooks), ) + if debug_info is not None: + display_unbundle_debug_info(repo.ui, debug_info) finally: repo.ui.flush() # never return 0 here: @@ -626,9 +807,22 @@ protocol_flags = 0 return node, p1, p2, deltabase, cs, flags, protocol_flags - def _unpackmanifests(self, repo, revmap, trp, prog, addrevisioncb=None): + def _unpackmanifests( + self, + repo, + revmap, + trp, + prog, + addrevisioncb=None, + debug_info=None, + ): super(cg3unpacker, self)._unpackmanifests( - repo, revmap, trp, prog, addrevisioncb=addrevisioncb + repo, + revmap, + trp, + prog, + addrevisioncb=addrevisioncb, + debug_info=debug_info, ) for chunkdata in iter(self.filelogheader, {}): # If we get here, there are directory manifests in the changegroup @@ -636,7 +830,11 @@ repo.ui.debug(b"adding %s revisions\n" % d) deltas = self.deltaiter() if not repo.manifestlog.getstorage(d).addgroup( - deltas, revmap, trp, addrevisioncb=addrevisioncb + deltas, + revmap, + trp, + addrevisioncb=addrevisioncb, + debug_info=debug_info, ): raise error.Abort(_(b"received dir revlog group is empty")) @@ -2173,6 +2371,7 @@ expectedfiles, needfiles, addrevisioncb=None, + debug_info=None, ): revisions = 0 files = 0 @@ -2193,6 +2392,7 @@ revmap, trp, addrevisioncb=addrevisioncb, + debug_info=debug_info, ) if not added: raise error.Abort(_(b"received file revlog group is empty"))