comparison mercurial/verify.py @ 27443:937e73a6e4ff

verify: move verify logic into a class In order to allow extensions to hook into the verification logic more easily, we need to refactor it into multiple functions. The first step is to move it to a class so the shared state can be more easily accessed.
author Durham Goode <durham@fb.com>
date Fri, 18 Dec 2015 16:42:39 -0800
parents d1c741644d25
children 6647401858ab
comparison
equal deleted inserted replaced
27442:f67c6d8cc606 27443:937e73a6e4ff
22 ) 22 )
23 23
24 def verify(repo): 24 def verify(repo):
25 lock = repo.lock() 25 lock = repo.lock()
26 try: 26 try:
27 return _verify(repo) 27 return verifier().verify(repo)
28 finally: 28 finally:
29 lock.release() 29 lock.release()
30 30
31 def _normpath(f): 31 def _normpath(f):
32 # under hg < 2.4, convert didn't sanitize paths properly, so a 32 # under hg < 2.4, convert didn't sanitize paths properly, so a
44 filelogs will be missing, and changelog entries may mention 44 filelogs will be missing, and changelog entries may mention
45 modified files that are outside the narrow scope. 45 modified files that are outside the narrow scope.
46 """ 46 """
47 return True 47 return True
48 48
49 def _verify(repo): 49 class verifier(object):
50 repo = repo.unfiltered() 50 def verify(self, repo):
51 mflinkrevs = {} 51 repo = repo.unfiltered()
52 filelinkrevs = {} 52 mflinkrevs = {}
53 filenodes = {} 53 filelinkrevs = {}
54 revisions = 0 54 filenodes = {}
55 badrevs = set() 55 revisions = 0
56 errors = [0] 56 badrevs = set()
57 warnings = [0] 57 errors = [0]
58 ui = repo.ui 58 warnings = [0]
59 cl = repo.changelog 59 ui = repo.ui
60 mf = repo.manifest 60 cl = repo.changelog
61 lrugetctx = util.lrucachefunc(repo.changectx) 61 mf = repo.manifest
62 62 lrugetctx = util.lrucachefunc(repo.changectx)
63 if not repo.url().startswith('file:'): 63
64 raise error.Abort(_("cannot verify bundle or remote repos")) 64 if not repo.url().startswith('file:'):
65 65 raise error.Abort(_("cannot verify bundle or remote repos"))
66 def err(linkrev, msg, filename=None): 66
67 if linkrev is not None: 67 def err(linkrev, msg, filename=None):
68 badrevs.add(linkrev) 68 if linkrev is not None:
69 else: 69 badrevs.add(linkrev)
70 linkrev = '?'
71 msg = "%s: %s" % (linkrev, msg)
72 if filename:
73 msg = "%s@%s" % (filename, msg)
74 ui.warn(" " + msg + "\n")
75 errors[0] += 1
76
77 def exc(linkrev, msg, inst, filename=None):
78 if isinstance(inst, KeyboardInterrupt):
79 ui.warn(_("interrupted"))
80 raise
81 if not str(inst):
82 inst = repr(inst)
83 err(linkrev, "%s: %s" % (msg, inst), filename)
84
85 def warn(msg):
86 ui.warn(msg + "\n")
87 warnings[0] += 1
88
89 def checklog(obj, name, linkrev):
90 if not len(obj) and (havecl or havemf):
91 err(linkrev, _("empty or missing %s") % name)
92 return
93
94 d = obj.checksize()
95 if d[0]:
96 err(None, _("data length off by %d bytes") % d[0], name)
97 if d[1]:
98 err(None, _("index contains %d extra bytes") % d[1], name)
99
100 if obj.version != revlog.REVLOGV0:
101 if not revlogv1:
102 warn(_("warning: `%s' uses revlog format 1") % name)
103 elif revlogv1:
104 warn(_("warning: `%s' uses revlog format 0") % name)
105
106 def checkentry(obj, i, node, seen, linkrevs, f):
107 lr = obj.linkrev(obj.rev(node))
108 if lr < 0 or (havecl and lr not in linkrevs):
109 if lr < 0 or lr >= len(cl):
110 msg = _("rev %d points to nonexistent changeset %d")
111 else: 70 else:
112 msg = _("rev %d points to unexpected changeset %d") 71 linkrev = '?'
113 err(None, msg % (i, lr), f) 72 msg = "%s: %s" % (linkrev, msg)
73 if filename:
74 msg = "%s@%s" % (filename, msg)
75 ui.warn(" " + msg + "\n")
76 errors[0] += 1
77
78 def exc(linkrev, msg, inst, filename=None):
79 if isinstance(inst, KeyboardInterrupt):
80 ui.warn(_("interrupted"))
81 raise
82 if not str(inst):
83 inst = repr(inst)
84 err(linkrev, "%s: %s" % (msg, inst), filename)
85
86 def warn(msg):
87 ui.warn(msg + "\n")
88 warnings[0] += 1
89
90 def checklog(obj, name, linkrev):
91 if not len(obj) and (havecl or havemf):
92 err(linkrev, _("empty or missing %s") % name)
93 return
94
95 d = obj.checksize()
96 if d[0]:
97 err(None, _("data length off by %d bytes") % d[0], name)
98 if d[1]:
99 err(None, _("index contains %d extra bytes") % d[1], name)
100
101 if obj.version != revlog.REVLOGV0:
102 if not revlogv1:
103 warn(_("warning: `%s' uses revlog format 1") % name)
104 elif revlogv1:
105 warn(_("warning: `%s' uses revlog format 0") % name)
106
107 def checkentry(obj, i, node, seen, linkrevs, f):
108 lr = obj.linkrev(obj.rev(node))
109 if lr < 0 or (havecl and lr not in linkrevs):
110 if lr < 0 or lr >= len(cl):
111 msg = _("rev %d points to nonexistent changeset %d")
112 else:
113 msg = _("rev %d points to unexpected changeset %d")
114 err(None, msg % (i, lr), f)
115 if linkrevs:
116 if f and len(linkrevs) > 1:
117 try:
118 # attempt to filter down to real linkrevs
119 linkrevs = [l for l in linkrevs
120 if lrugetctx(l)[f].filenode() == node]
121 except Exception:
122 pass
123 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
124 lr = None # can't be trusted
125
126 try:
127 p1, p2 = obj.parents(node)
128 if p1 not in seen and p1 != nullid:
129 err(lr, _("unknown parent 1 %s of %s") %
130 (short(p1), short(node)), f)
131 if p2 not in seen and p2 != nullid:
132 err(lr, _("unknown parent 2 %s of %s") %
133 (short(p2), short(node)), f)
134 except Exception as inst:
135 exc(lr, _("checking parents of %s") % short(node), inst, f)
136
137 if node in seen:
138 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
139 seen[node] = i
140 return lr
141
142 if os.path.exists(repo.sjoin("journal")):
143 ui.warn(_("abandoned transaction found - run hg recover\n"))
144
145 revlogv1 = cl.version != revlog.REVLOGV0
146 if ui.verbose or not revlogv1:
147 ui.status(_("repository uses revlog format %d\n") %
148 (revlogv1 and 1 or 0))
149
150 havecl = len(cl) > 0
151 havemf = len(mf) > 0
152
153 ui.status(_("checking changesets\n"))
154 refersmf = False
155 seen = {}
156 checklog(cl, "changelog", 0)
157 total = len(repo)
158 for i in repo:
159 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
160 n = cl.node(i)
161 checkentry(cl, i, n, seen, [i], "changelog")
162
163 try:
164 changes = cl.read(n)
165 if changes[0] != nullid:
166 mflinkrevs.setdefault(changes[0], []).append(i)
167 refersmf = True
168 for f in changes[3]:
169 if _validpath(repo, f):
170 filelinkrevs.setdefault(_normpath(f), []).append(i)
171 except Exception as inst:
172 refersmf = True
173 exc(i, _("unpacking changeset %s") % short(n), inst)
174 ui.progress(_('checking'), None)
175
176 ui.status(_("checking manifests\n"))
177 seen = {}
178 if refersmf:
179 # Do not check manifest if there are only changelog entries with
180 # null manifests.
181 checklog(mf, "manifest", 0)
182 total = len(mf)
183 for i in mf:
184 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
185 n = mf.node(i)
186 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
187 if n in mflinkrevs:
188 del mflinkrevs[n]
189 else:
190 err(lr, _("%s not in changesets") % short(n), "manifest")
191
192 try:
193 for f, fn in mf.readdelta(n).iteritems():
194 if not f:
195 err(lr, _("file without name in manifest"))
196 elif f != "/dev/null": # ignore this in very old repos
197 if _validpath(repo, f):
198 filenodes.setdefault(
199 _normpath(f), {}).setdefault(fn, lr)
200 except Exception as inst:
201 exc(lr, _("reading manifest delta %s") % short(n), inst)
202 ui.progress(_('checking'), None)
203
204 ui.status(_("crosschecking files in changesets and manifests\n"))
205
206 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
207 count = 0
208 if havemf:
209 for c, m in sorted([(c, m) for m in mflinkrevs
210 for c in mflinkrevs[m]]):
211 count += 1
212 if m == nullid:
213 continue
214 ui.progress(_('crosschecking'), count, total=total)
215 err(c, _("changeset refers to unknown manifest %s") % short(m))
216 mflinkrevs = None # del is bad here due to scope issues
217
218 for f in sorted(filelinkrevs):
219 count += 1
220 ui.progress(_('crosschecking'), count, total=total)
221 if f not in filenodes:
222 lr = filelinkrevs[f][0]
223 err(lr, _("in changeset but not in manifest"), f)
224
225 if havecl:
226 for f in sorted(filenodes):
227 count += 1
228 ui.progress(_('crosschecking'), count, total=total)
229 if f not in filelinkrevs:
230 try:
231 fl = repo.file(f)
232 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
233 except Exception:
234 lr = None
235 err(lr, _("in manifest but not in changeset"), f)
236
237 ui.progress(_('crosschecking'), None)
238
239 ui.status(_("checking files\n"))
240
241 storefiles = set()
242 for f, f2, size in repo.store.datafiles():
243 if not f:
244 err(None, _("cannot decode filename '%s'") % f2)
245 elif size > 0 or not revlogv1:
246 storefiles.add(_normpath(f))
247
248 fncachewarned = False
249 files = sorted(set(filenodes) | set(filelinkrevs))
250 total = len(files)
251 for i, f in enumerate(files):
252 ui.progress(_('checking'), i, item=f, total=total)
253 try:
254 linkrevs = filelinkrevs[f]
255 except KeyError:
256 # in manifest but not in changelog
257 linkrevs = []
258
114 if linkrevs: 259 if linkrevs:
115 if f and len(linkrevs) > 1: 260 lr = linkrevs[0]
116 try: 261 else:
117 # attempt to filter down to real linkrevs 262 lr = None
118 linkrevs = [l for l in linkrevs 263
119 if lrugetctx(l)[f].filenode() == node] 264 try:
120 except Exception: 265 fl = repo.file(f)
121 pass 266 except error.RevlogError as e:
122 warn(_(" (expected %s)") % " ".join(map(str, linkrevs))) 267 err(lr, _("broken revlog! (%s)") % e, f)
123 lr = None # can't be trusted
124
125 try:
126 p1, p2 = obj.parents(node)
127 if p1 not in seen and p1 != nullid:
128 err(lr, _("unknown parent 1 %s of %s") %
129 (short(p1), short(node)), f)
130 if p2 not in seen and p2 != nullid:
131 err(lr, _("unknown parent 2 %s of %s") %
132 (short(p2), short(node)), f)
133 except Exception as inst:
134 exc(lr, _("checking parents of %s") % short(node), inst, f)
135
136 if node in seen:
137 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
138 seen[node] = i
139 return lr
140
141 if os.path.exists(repo.sjoin("journal")):
142 ui.warn(_("abandoned transaction found - run hg recover\n"))
143
144 revlogv1 = cl.version != revlog.REVLOGV0
145 if ui.verbose or not revlogv1:
146 ui.status(_("repository uses revlog format %d\n") %
147 (revlogv1 and 1 or 0))
148
149 havecl = len(cl) > 0
150 havemf = len(mf) > 0
151
152 ui.status(_("checking changesets\n"))
153 refersmf = False
154 seen = {}
155 checklog(cl, "changelog", 0)
156 total = len(repo)
157 for i in repo:
158 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
159 n = cl.node(i)
160 checkentry(cl, i, n, seen, [i], "changelog")
161
162 try:
163 changes = cl.read(n)
164 if changes[0] != nullid:
165 mflinkrevs.setdefault(changes[0], []).append(i)
166 refersmf = True
167 for f in changes[3]:
168 if _validpath(repo, f):
169 filelinkrevs.setdefault(_normpath(f), []).append(i)
170 except Exception as inst:
171 refersmf = True
172 exc(i, _("unpacking changeset %s") % short(n), inst)
173 ui.progress(_('checking'), None)
174
175 ui.status(_("checking manifests\n"))
176 seen = {}
177 if refersmf:
178 # Do not check manifest if there are only changelog entries with
179 # null manifests.
180 checklog(mf, "manifest", 0)
181 total = len(mf)
182 for i in mf:
183 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
184 n = mf.node(i)
185 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
186 if n in mflinkrevs:
187 del mflinkrevs[n]
188 else:
189 err(lr, _("%s not in changesets") % short(n), "manifest")
190
191 try:
192 for f, fn in mf.readdelta(n).iteritems():
193 if not f:
194 err(lr, _("file without name in manifest"))
195 elif f != "/dev/null": # ignore this in very old repos
196 if _validpath(repo, f):
197 filenodes.setdefault(
198 _normpath(f), {}).setdefault(fn, lr)
199 except Exception as inst:
200 exc(lr, _("reading manifest delta %s") % short(n), inst)
201 ui.progress(_('checking'), None)
202
203 ui.status(_("crosschecking files in changesets and manifests\n"))
204
205 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
206 count = 0
207 if havemf:
208 for c, m in sorted([(c, m) for m in mflinkrevs
209 for c in mflinkrevs[m]]):
210 count += 1
211 if m == nullid:
212 continue 268 continue
213 ui.progress(_('crosschecking'), count, total=total) 269
214 err(c, _("changeset refers to unknown manifest %s") % short(m)) 270 for ff in fl.files():
215 mflinkrevs = None # del is bad here due to scope issues
216
217 for f in sorted(filelinkrevs):
218 count += 1
219 ui.progress(_('crosschecking'), count, total=total)
220 if f not in filenodes:
221 lr = filelinkrevs[f][0]
222 err(lr, _("in changeset but not in manifest"), f)
223
224 if havecl:
225 for f in sorted(filenodes):
226 count += 1
227 ui.progress(_('crosschecking'), count, total=total)
228 if f not in filelinkrevs:
229 try: 271 try:
230 fl = repo.file(f) 272 storefiles.remove(ff)
231 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]]) 273 except KeyError:
232 except Exception: 274 warn(_(" warning: revlog '%s' not in fncache!") % ff)
233 lr = None 275 fncachewarned = True
234 err(lr, _("in manifest but not in changeset"), f) 276
235 277 checklog(fl, f, lr)
236 ui.progress(_('crosschecking'), None) 278 seen = {}
237 279 rp = None
238 ui.status(_("checking files\n")) 280 for i in fl:
239 281 revisions += 1
240 storefiles = set() 282 n = fl.node(i)
241 for f, f2, size in repo.store.datafiles(): 283 lr = checkentry(fl, i, n, seen, linkrevs, f)
242 if not f: 284 if f in filenodes:
243 err(None, _("cannot decode filename '%s'") % f2) 285 if havemf and n not in filenodes[f]:
244 elif size > 0 or not revlogv1: 286 err(lr, _("%s not in manifests") % (short(n)), f)
245 storefiles.add(_normpath(f)) 287 else:
246 288 del filenodes[f][n]
247 fncachewarned = False 289
248 files = sorted(set(filenodes) | set(filelinkrevs)) 290 # verify contents
249 total = len(files) 291 try:
250 for i, f in enumerate(files): 292 l = len(fl.read(n))
251 ui.progress(_('checking'), i, item=f, total=total) 293 rp = fl.renamed(n)
252 try: 294 if l != fl.size(i):
253 linkrevs = filelinkrevs[f] 295 if len(fl.revision(n)) != fl.size(i):
254 except KeyError: 296 err(lr, _("unpacked size is %s, %s expected") %
255 # in manifest but not in changelog 297 (l, fl.size(i)), f)
256 linkrevs = [] 298 except error.CensoredNodeError:
257 299 # experimental config: censor.policy
258 if linkrevs: 300 if ui.config("censor", "policy", "abort") == "abort":
259 lr = linkrevs[0] 301 err(lr, _("censored file data"), f)
260 else: 302 except Exception as inst:
261 lr = None 303 exc(lr, _("unpacking %s") % short(n), inst, f)
262 304
263 try: 305 # check renames
264 fl = repo.file(f) 306 try:
265 except error.RevlogError as e: 307 if rp:
266 err(lr, _("broken revlog! (%s)") % e, f) 308 if lr is not None and ui.verbose:
267 continue 309 ctx = lrugetctx(lr)
268 310 found = False
269 for ff in fl.files(): 311 for pctx in ctx.parents():
270 try: 312 if rp[0] in pctx:
271 storefiles.remove(ff) 313 found = True
272 except KeyError: 314 break
273 warn(_(" warning: revlog '%s' not in fncache!") % ff) 315 if not found:
274 fncachewarned = True 316 warn(_("warning: copy source of '%s' not"
275 317 " in parents of %s") % (f, ctx))
276 checklog(fl, f, lr) 318 fl2 = repo.file(rp[0])
277 seen = {} 319 if not len(fl2):
278 rp = None 320 err(lr, _("empty or missing copy source revlog "
279 for i in fl: 321 "%s:%s") % (rp[0], short(rp[1])), f)
280 revisions += 1 322 elif rp[1] == nullid:
281 n = fl.node(i) 323 ui.note(_("warning: %s@%s: copy source"
282 lr = checkentry(fl, i, n, seen, linkrevs, f) 324 " revision is nullid %s:%s\n")
325 % (f, lr, rp[0], short(rp[1])))
326 else:
327 fl2.rev(rp[1])
328 except Exception as inst:
329 exc(lr, _("checking rename of %s") % short(n), inst, f)
330
331 # cross-check
283 if f in filenodes: 332 if f in filenodes:
284 if havemf and n not in filenodes[f]: 333 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
285 err(lr, _("%s not in manifests") % (short(n)), f) 334 for lr, node in sorted(fns):
286 else: 335 err(lr, _("%s in manifests not found") % short(node), f)
287 del filenodes[f][n] 336 ui.progress(_('checking'), None)
288 337
289 # verify contents 338 for f in storefiles:
290 try: 339 warn(_("warning: orphan revlog '%s'") % f)
291 l = len(fl.read(n)) 340
292 rp = fl.renamed(n) 341 ui.status(_("%d files, %d changesets, %d total revisions\n") %
293 if l != fl.size(i): 342 (len(files), len(cl), revisions))
294 if len(fl.revision(n)) != fl.size(i): 343 if warnings[0]:
295 err(lr, _("unpacked size is %s, %s expected") % 344 ui.warn(_("%d warnings encountered!\n") % warnings[0])
296 (l, fl.size(i)), f) 345 if fncachewarned:
297 except error.CensoredNodeError: 346 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
298 # experimental config: censor.policy 347 'corrupt fncache\n'))
299 if ui.config("censor", "policy", "abort") == "abort": 348 if errors[0]:
300 err(lr, _("censored file data"), f) 349 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
301 except Exception as inst: 350 if badrevs:
302 exc(lr, _("unpacking %s") % short(n), inst, f) 351 ui.warn(_("(first damaged changeset appears to be %d)\n")
303 352 % min(badrevs))
304 # check renames 353 return 1
305 try:
306 if rp:
307 if lr is not None and ui.verbose:
308 ctx = lrugetctx(lr)
309 found = False
310 for pctx in ctx.parents():
311 if rp[0] in pctx:
312 found = True
313 break
314 if not found:
315 warn(_("warning: copy source of '%s' not"
316 " in parents of %s") % (f, ctx))
317 fl2 = repo.file(rp[0])
318 if not len(fl2):
319 err(lr, _("empty or missing copy source revlog %s:%s")
320 % (rp[0], short(rp[1])), f)
321 elif rp[1] == nullid:
322 ui.note(_("warning: %s@%s: copy source"
323 " revision is nullid %s:%s\n")
324 % (f, lr, rp[0], short(rp[1])))
325 else:
326 fl2.rev(rp[1])
327 except Exception as inst:
328 exc(lr, _("checking rename of %s") % short(n), inst, f)
329
330 # cross-check
331 if f in filenodes:
332 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
333 for lr, node in sorted(fns):
334 err(lr, _("%s in manifests not found") % short(node), f)
335 ui.progress(_('checking'), None)
336
337 for f in storefiles:
338 warn(_("warning: orphan revlog '%s'") % f)
339
340 ui.status(_("%d files, %d changesets, %d total revisions\n") %
341 (len(files), len(cl), revisions))
342 if warnings[0]:
343 ui.warn(_("%d warnings encountered!\n") % warnings[0])
344 if fncachewarned:
345 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
346 'corrupt fncache\n'))
347 if errors[0]:
348 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
349 if badrevs:
350 ui.warn(_("(first damaged changeset appears to be %d)\n")
351 % min(badrevs))
352 return 1