Mercurial > public > mercurial-scm > hg-stable
diff hgext/remotefilelog/debugcommands.py @ 40545:3a333a582d7b
remotefilelog: import pruned-down remotefilelog extension from hg-experimental
This is remotefilelog as of my recent patches for compatibility with
current tip of hg, minus support for old versions of Mercurial and
some FB-specific features like their treemanifest extension and
fetching linkrev data from a patched phabricator. The file extutil.py
moved from hgext3rd to remotefilelog.
This is not yet ready to be landed, consider it a preview for
now. Planned changes include:
* replace lz4 with zstd
* rename some capabilities, requirements and wireproto commands to mark
them as experimental
* consolidate bits of shallowutil with related functions (eg readfile)
I'm certainly open to other (small) changes, but my rough mission is
to land this largely as-is so we can use it as a model of the
functionality we need going forward for lazy-fetching of file contents
from a server.
# no-check-commit because of a few foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D4782
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 27 Sep 2018 13:03:19 -0400 |
parents | |
children | e2a1584e9e3f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hgext/remotefilelog/debugcommands.py Thu Sep 27 13:03:19 2018 -0400 @@ -0,0 +1,375 @@ +# debugcommands.py - debug logic for remotefilelog +# +# Copyright 2013 Facebook, Inc. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +from __future__ import absolute_import + +import hashlib +import os + +from mercurial.node import bin, hex, nullid, short +from mercurial.i18n import _ +from mercurial import ( + error, + filelog, + revlog, +) +from . import ( + constants, + datapack, + extutil, + fileserverclient, + historypack, + lz4wrapper, + repack, + shallowrepo, + shallowutil, +) + +def debugremotefilelog(ui, path, **opts): + decompress = opts.get('decompress') + + size, firstnode, mapping = parsefileblob(path, decompress) + + ui.status(_("size: %s bytes\n") % (size)) + ui.status(_("path: %s \n") % (path)) + ui.status(_("key: %s \n") % (short(firstnode))) + ui.status(_("\n")) + ui.status(_("%12s => %12s %13s %13s %12s\n") % + ("node", "p1", "p2", "linknode", "copyfrom")) + + queue = [firstnode] + while queue: + node = queue.pop(0) + p1, p2, linknode, copyfrom = mapping[node] + ui.status(_("%s => %s %s %s %s\n") % + (short(node), short(p1), short(p2), short(linknode), copyfrom)) + if p1 != nullid: + queue.append(p1) + if p2 != nullid: + queue.append(p2) + +def buildtemprevlog(repo, file): + # get filename key + filekey = hashlib.sha1(file).hexdigest() + filedir = os.path.join(repo.path, 'store/data', filekey) + + # sort all entries based on linkrev + fctxs = [] + for filenode in os.listdir(filedir): + if '_old' not in filenode: + fctxs.append(repo.filectx(file, fileid=bin(filenode))) + + fctxs = sorted(fctxs, key=lambda x: x.linkrev()) + + # add to revlog + temppath = repo.sjoin('data/temprevlog.i') + if os.path.exists(temppath): + os.remove(temppath) + r = filelog.filelog(repo.svfs, 'temprevlog') + + class faket(object): + def add(self, a, b, c): + pass + t = faket() + for fctx in fctxs: + if fctx.node() not in repo: + continue + + p = fctx.filelog().parents(fctx.filenode()) + meta = {} + if fctx.renamed(): + meta['copy'] = fctx.renamed()[0] + meta['copyrev'] = hex(fctx.renamed()[1]) + + r.add(fctx.data(), meta, t, fctx.linkrev(), p[0], p[1]) + + return r + +def debugindex(orig, ui, repo, file_=None, **opts): + """dump the contents of an index file""" + if (opts.get('changelog') or + opts.get('manifest') or + opts.get('dir') or + not shallowrepo.requirement in repo.requirements or + not repo.shallowmatch(file_)): + return orig(ui, repo, file_, **opts) + + r = buildtemprevlog(repo, file_) + + # debugindex like normal + format = opts.get('format', 0) + if format not in (0, 1): + raise error.Abort(_("unknown format %d") % format) + + generaldelta = r.version & revlog.FLAG_GENERALDELTA + if generaldelta: + basehdr = ' delta' + else: + basehdr = ' base' + + if format == 0: + ui.write((" rev offset length " + basehdr + " linkrev" + " nodeid p1 p2\n")) + elif format == 1: + ui.write((" rev flag offset length" + " size " + basehdr + " link p1 p2" + " nodeid\n")) + + for i in r: + node = r.node(i) + if generaldelta: + base = r.deltaparent(i) + else: + base = r.chainbase(i) + if format == 0: + try: + pp = r.parents(node) + except Exception: + pp = [nullid, nullid] + ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % ( + i, r.start(i), r.length(i), base, r.linkrev(i), + short(node), short(pp[0]), short(pp[1]))) + elif format == 1: + pr = r.parentrevs(i) + ui.write("% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d % 6d %s\n" % ( + i, r.flags(i), r.start(i), r.length(i), r.rawsize(i), + base, r.linkrev(i), pr[0], pr[1], short(node))) + +def debugindexdot(orig, ui, repo, file_): + """dump an index DAG as a graphviz dot file""" + if not shallowrepo.requirement in repo.requirements: + return orig(ui, repo, file_) + + r = buildtemprevlog(repo, os.path.basename(file_)[:-2]) + + ui.write(("digraph G {\n")) + for i in r: + node = r.node(i) + pp = r.parents(node) + ui.write("\t%d -> %d\n" % (r.rev(pp[0]), i)) + if pp[1] != nullid: + ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i)) + ui.write("}\n") + +def verifyremotefilelog(ui, path, **opts): + decompress = opts.get('decompress') + + for root, dirs, files in os.walk(path): + for file in files: + if file == "repos": + continue + filepath = os.path.join(root, file) + size, firstnode, mapping = parsefileblob(filepath, decompress) + for p1, p2, linknode, copyfrom in mapping.itervalues(): + if linknode == nullid: + actualpath = os.path.relpath(root, path) + key = fileserverclient.getcachekey("reponame", actualpath, + file) + ui.status("%s %s\n" % (key, os.path.relpath(filepath, + path))) + +def parsefileblob(path, decompress): + raw = None + f = open(path, "r") + try: + raw = f.read() + finally: + f.close() + + if decompress: + raw = lz4wrapper.lz4decompress(raw) + + offset, size, flags = shallowutil.parsesizeflags(raw) + start = offset + size + + firstnode = None + + mapping = {} + while start < len(raw): + divider = raw.index('\0', start + 80) + + currentnode = raw[start:(start + 20)] + if not firstnode: + firstnode = currentnode + + p1 = raw[(start + 20):(start + 40)] + p2 = raw[(start + 40):(start + 60)] + linknode = raw[(start + 60):(start + 80)] + copyfrom = raw[(start + 80):divider] + + mapping[currentnode] = (p1, p2, linknode, copyfrom) + start = divider + 1 + + return size, firstnode, mapping + +def debugdatapack(ui, *paths, **opts): + for path in paths: + if '.data' in path: + path = path[:path.index('.data')] + ui.write("%s:\n" % path) + dpack = datapack.datapack(path) + node = opts.get('node') + if node: + deltachain = dpack.getdeltachain('', bin(node)) + dumpdeltachain(ui, deltachain, **opts) + return + + if opts.get('long'): + hashformatter = hex + hashlen = 42 + else: + hashformatter = short + hashlen = 14 + + lastfilename = None + totaldeltasize = 0 + totalblobsize = 0 + def printtotals(): + if lastfilename is not None: + ui.write("\n") + if not totaldeltasize or not totalblobsize: + return + difference = totalblobsize - totaldeltasize + deltastr = "%0.1f%% %s" % ( + (100.0 * abs(difference) / totalblobsize), + ("smaller" if difference > 0 else "bigger")) + + ui.write(("Total:%s%s %s (%s)\n") % ( + "".ljust(2 * hashlen - len("Total:")), + str(totaldeltasize).ljust(12), + str(totalblobsize).ljust(9), + deltastr + )) + + bases = {} + nodes = set() + failures = 0 + for filename, node, deltabase, deltalen in dpack.iterentries(): + bases[node] = deltabase + if node in nodes: + ui.write(("Bad entry: %s appears twice\n" % short(node))) + failures += 1 + nodes.add(node) + if filename != lastfilename: + printtotals() + name = '(empty name)' if filename == '' else filename + ui.write("%s:\n" % name) + ui.write("%s%s%s%s\n" % ( + "Node".ljust(hashlen), + "Delta Base".ljust(hashlen), + "Delta Length".ljust(14), + "Blob Size".ljust(9))) + lastfilename = filename + totalblobsize = 0 + totaldeltasize = 0 + + # Metadata could be missing, in which case it will be an empty dict. + meta = dpack.getmeta(filename, node) + if constants.METAKEYSIZE in meta: + blobsize = meta[constants.METAKEYSIZE] + totaldeltasize += deltalen + totalblobsize += blobsize + else: + blobsize = "(missing)" + ui.write("%s %s %s%s\n" % ( + hashformatter(node), + hashformatter(deltabase), + str(deltalen).ljust(14), + blobsize)) + + if filename is not None: + printtotals() + + failures += _sanitycheck(ui, set(nodes), bases) + if failures > 1: + ui.warn(("%d failures\n" % failures)) + return 1 + +def _sanitycheck(ui, nodes, bases): + """ + Does some basic sanity checking on a packfiles with ``nodes`` ``bases`` (a + mapping of node->base): + + - Each deltabase must itself be a node elsewhere in the pack + - There must be no cycles + """ + failures = 0 + for node in nodes: + seen = set() + current = node + deltabase = bases[current] + + while deltabase != nullid: + if deltabase not in nodes: + ui.warn(("Bad entry: %s has an unknown deltabase (%s)\n" % + (short(node), short(deltabase)))) + failures += 1 + break + + if deltabase in seen: + ui.warn(("Bad entry: %s has a cycle (at %s)\n" % + (short(node), short(deltabase)))) + failures += 1 + break + + current = deltabase + seen.add(current) + deltabase = bases[current] + # Since ``node`` begins a valid chain, reset/memoize its base to nullid + # so we don't traverse it again. + bases[node] = nullid + return failures + +def dumpdeltachain(ui, deltachain, **opts): + hashformatter = hex + hashlen = 40 + + lastfilename = None + for filename, node, filename, deltabasenode, delta in deltachain: + if filename != lastfilename: + ui.write("\n%s\n" % filename) + lastfilename = filename + ui.write("%s %s %s %s\n" % ( + "Node".ljust(hashlen), + "Delta Base".ljust(hashlen), + "Delta SHA1".ljust(hashlen), + "Delta Length".ljust(6), + )) + + ui.write("%s %s %s %s\n" % ( + hashformatter(node), + hashformatter(deltabasenode), + hashlib.sha1(delta).hexdigest(), + len(delta))) + +def debughistorypack(ui, path): + if '.hist' in path: + path = path[:path.index('.hist')] + hpack = historypack.historypack(path) + + lastfilename = None + for entry in hpack.iterentries(): + filename, node, p1node, p2node, linknode, copyfrom = entry + if filename != lastfilename: + ui.write("\n%s\n" % filename) + ui.write("%s%s%s%s%s\n" % ( + "Node".ljust(14), + "P1 Node".ljust(14), + "P2 Node".ljust(14), + "Link Node".ljust(14), + "Copy From")) + lastfilename = filename + ui.write("%s %s %s %s %s\n" % (short(node), short(p1node), + short(p2node), short(linknode), copyfrom)) + +def debugwaitonrepack(repo): + with extutil.flock(repack.repacklockvfs(repo).join('repacklock'), ''): + return + +def debugwaitonprefetch(repo): + with repo._lock(repo.svfs, "prefetchlock", True, None, + None, _('prefetching in %s') % repo.origroot): + pass