diff mercurial/verify.py @ 28203:7297e9e13a8a

verify: check directory manifests In repos with treemanifests, there is no specific verification of directory manifest revlogs. It simply collects all file nodes by reading each manifest delta. With treemanifests, that's means calling the manifest._slowreaddelta(). If there are missing revlog entries in a subdirectory revlog, 'hg verify' will simply report the exception that occurred while trying to read the root manifest: manifest@0: reading delta 1700e2e92882: meta/b/00manifest.i@67688a370455: no node This patch changes the verify code to load only the root manifest at first and verify all revisions of it, then verify all revisions of each direct subdirectory, and so on, recursively. The above message becomes b/@0: parent-directory manifest refers to unknown revision 67688a370455 Since the new algorithm reads a single revlog at a time and in order, 'hg verify' on a treemanifest version of the hg core repo goes from ~50s to ~14s. As expected, there is no significant difference on a repo with flat manifests.
author Martin von Zweigbergk <martinvonz@google.com>
date Sun, 07 Feb 2016 21:13:24 -0800
parents bd279da57e4b
children 962921c330b0
line wrap: on
line diff
--- a/mercurial/verify.py	Sat Feb 20 17:44:29 2016 -0800
+++ b/mercurial/verify.py	Sun Feb 07 21:13:24 2016 -0800
@@ -197,46 +197,73 @@
         ui.progress(_('checking'), None)
         return mflinkrevs, filelinkrevs
 
-    def _verifymanifest(self, mflinkrevs):
+    def _verifymanifest(self, mflinkrevs, dir=""):
         repo = self.repo
         ui = self.ui
-        mf = self.repo.manifest
+        mf = self.repo.manifest.dirlog(dir)
 
-        ui.status(_("checking manifests\n"))
+        if not dir:
+            self.ui.status(_("checking manifests\n"))
+
         filenodes = {}
+        subdirnodes = {}
         seen = {}
         label = "manifest"
+        if dir:
+            label = dir
         if self.refersmf:
             # Do not check manifest if there are only changelog entries with
             # null manifests.
             self.checklog(mf, label, 0)
         total = len(mf)
         for i in mf:
-            ui.progress(_('checking'), i, total=total, unit=_('manifests'))
+            if not dir:
+                ui.progress(_('checking'), i, total=total, unit=_('manifests'))
             n = mf.node(i)
             lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
             if n in mflinkrevs:
                 del mflinkrevs[n]
+            elif dir:
+                self.err(lr, _("%s not in parent-directory manifest") %
+                         short(n), label)
             else:
                 self.err(lr, _("%s not in changesets") % short(n), label)
 
             try:
-                for f, fn in mf.readdelta(n).iteritems():
+                for f, fn, fl in mf.readshallowdelta(n).iterentries():
                     if not f:
-                        self.err(lr, _("file without name in manifest"))
-                    elif f != "/dev/null": # ignore this in very old repos
-                        if _validpath(repo, f):
-                            filenodes.setdefault(
-                                _normpath(f), {}).setdefault(fn, lr)
+                        self.err(lr, _("entry without name in manifest"))
+                    elif f == "/dev/null":  # ignore this in very old repos
+                        continue
+                    fullpath = dir + _normpath(f)
+                    if not _validpath(repo, fullpath):
+                        continue
+                    if fl == 't':
+                        subdirnodes.setdefault(fullpath + '/', {}).setdefault(
+                            fn, []).append(lr)
+                    else:
+                        filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
             except Exception as inst:
                 self.exc(lr, _("reading delta %s") % short(n), inst, label)
-        ui.progress(_('checking'), None)
+        if not dir:
+            ui.progress(_('checking'), None)
 
         if self.havemf:
             for c, m in sorted([(c, m) for m in mflinkrevs
                         for c in mflinkrevs[m]]):
-                self.err(c, _("changeset refers to unknown revision %s") %
-                         short(m), label)
+                if dir:
+                    self.err(c, _("parent-directory manifest refers to unknown "
+                                  "revision %s") % short(m), label)
+                else:
+                    self.err(c, _("changeset refers to unknown revision %s") %
+                             short(m), label)
+
+        if not dir and subdirnodes:
+            self.ui.status(_("checking directory manifests\n"))
+        for subdir, linkrevs in subdirnodes.iteritems():
+            subdirfilenodes = self._verifymanifest(linkrevs, subdir)
+            for f, onefilenodes in subdirfilenodes.iteritems():
+                filenodes.setdefault(f, {}).update(onefilenodes)
 
         return filenodes