Mercurial > public > mercurial-scm > hg
comparison mercurial/verify.py @ 6752:e79a8f36c2a5
verify: lots of refactoring
- simplify finding first bad rev
- no need to count changesets
- add exc function to simplify exception handling
- combine checksize and checkversion to checklog
- unify missing revlog detection in checklog
- add checkentry to consolidate
- linkrev lookup
- detailed check of linkrev
- detailed check of parents
- duplicate checking
- use checkentry for changelog, manifest, and files
- simplify havecl and havemf
- track all changesets refering to a manifest
- move unnamed file check somewhere more useful
- reorder crosschecks
- fix filenodes crosscheck and add exception handling
- check unpacked size field
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 26 Jun 2008 14:35:50 -0500 |
parents | 7424a75f919a |
children | f67d1468ac50 |
comparison
equal
deleted
inserted
replaced
6751:7424a75f919a | 6752:e79a8f36c2a5 |
---|---|
15 return _verify(repo) | 15 return _verify(repo) |
16 finally: | 16 finally: |
17 del lock | 17 del lock |
18 | 18 |
19 def _verify(repo): | 19 def _verify(repo): |
20 mflinkrevs = {} | |
20 filelinkrevs = {} | 21 filelinkrevs = {} |
21 filenodes = {} | 22 filenodes = {} |
22 changesets = revisions = files = 0 | 23 revisions = 0 |
23 firstbad = [None] | 24 badrevs = {} |
24 errors = [0] | 25 errors = [0] |
25 warnings = [0] | 26 warnings = [0] |
26 neededmanifests = {} | |
27 ui = repo.ui | 27 ui = repo.ui |
28 cl = repo.changelog | 28 cl = repo.changelog |
29 mf = repo.manifest | 29 mf = repo.manifest |
30 | 30 |
31 def err(linkrev, msg, filename=None): | 31 def err(linkrev, msg, filename=None): |
32 if linkrev != None: | 32 if linkrev != None: |
33 if firstbad[0] != None: | 33 badrevs[linkrev] = True |
34 firstbad[0] = min(firstbad[0], linkrev) | |
35 else: | |
36 firstbad[0] = linkrev | |
37 else: | 34 else: |
38 linkrev = "?" | 35 linkrev = '?' |
39 msg = "%s: %s" % (linkrev, msg) | 36 msg = "%s: %s" % (linkrev, msg) |
40 if filename: | 37 if filename: |
41 msg = "%s@%s" % (filename, msg) | 38 msg = "%s@%s" % (filename, msg) |
42 ui.warn(" " + msg + "\n") | 39 ui.warn(" " + msg + "\n") |
43 errors[0] += 1 | 40 errors[0] += 1 |
44 | 41 |
42 def exc(linkrev, msg, inst, filename=None): | |
43 if isinstance(inst, KeyboardInterrupt): | |
44 ui.warn(_("interrupted")) | |
45 raise | |
46 err(linkrev, "%s: %s" % (msg, inst), filename) | |
47 | |
45 def warn(msg): | 48 def warn(msg): |
46 ui.warn(msg + "\n") | 49 ui.warn(msg + "\n") |
47 warnings[0] += 1 | 50 warnings[0] += 1 |
48 | 51 |
49 def checksize(obj, name): | 52 def checklog(obj, name): |
53 if not len(obj) and (havecl or havemf): | |
54 err(0, _("empty or missing %s") % name) | |
55 return | |
56 | |
50 d = obj.checksize() | 57 d = obj.checksize() |
51 if d[0]: | 58 if d[0]: |
52 err(None, _("data length off by %d bytes") % d[0], name) | 59 err(None, _("data length off by %d bytes") % d[0], name) |
53 if d[1]: | 60 if d[1]: |
54 err(None, _("index contains %d extra bytes") % d[1], name) | 61 err(None, _("index contains %d extra bytes") % d[1], name) |
55 | 62 |
56 def checkversion(obj, name): | |
57 if obj.version != revlog.REVLOGV0: | 63 if obj.version != revlog.REVLOGV0: |
58 if not revlogv1: | 64 if not revlogv1: |
59 warn(_("warning: `%s' uses revlog format 1") % name) | 65 warn(_("warning: `%s' uses revlog format 1") % name) |
60 elif revlogv1: | 66 elif revlogv1: |
61 warn(_("warning: `%s' uses revlog format 0") % name) | 67 warn(_("warning: `%s' uses revlog format 0") % name) |
62 | 68 |
69 def checkentry(obj, i, node, seen, linkrevs, f): | |
70 lr = obj.linkrev(node) | |
71 if lr < 0 or (havecl and lr not in linkrevs): | |
72 t = "unexpected" | |
73 if lr < 0 or lr >= len(cl): | |
74 t = "nonexistent" | |
75 err(None, _("rev %d point to %s changeset %d") % (i, t, lr), f) | |
76 if linkrevs: | |
77 warn(_(" (expected %s)") % " ".join(map(str,linkrevs))) | |
78 lr = None # can't be trusted | |
79 | |
80 try: | |
81 p1, p2 = obj.parents(node) | |
82 if p1 not in seen and p1 != nullid: | |
83 err(lr, _("unknown parent 1 %s of %s") % | |
84 (short(p1), short(n)), f) | |
85 if p2 not in seen and p2 != nullid: | |
86 err(lr, _("unknown parent 2 %s of %s") % | |
87 (short(p2), short(p1)), f) | |
88 except Exception, inst: | |
89 exc(lr, _("checking parents of %s") % short(node), inst, f) | |
90 | |
91 if node in seen: | |
92 err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f) | |
93 seen[n] = i | |
94 return lr | |
95 | |
63 revlogv1 = cl.version != revlog.REVLOGV0 | 96 revlogv1 = cl.version != revlog.REVLOGV0 |
64 if ui.verbose or not revlogv1: | 97 if ui.verbose or not revlogv1: |
65 ui.status(_("repository uses revlog format %d\n") % | 98 ui.status(_("repository uses revlog format %d\n") % |
66 (revlogv1 and 1 or 0)) | 99 (revlogv1 and 1 or 0)) |
67 | 100 |
68 havecl = havemf = 1 | 101 havecl = len(cl) > 0 |
102 havemf = len(mf) > 0 | |
103 | |
104 ui.status(_("checking changesets\n")) | |
69 seen = {} | 105 seen = {} |
70 ui.status(_("checking changesets\n")) | 106 checklog(cl, "changelog") |
71 if not len(cl) and len(mf): | |
72 havecl = 0 | |
73 err(0, _("empty or missing 00changelog.i")) | |
74 else: | |
75 checksize(cl, "changelog") | |
76 | |
77 for i in repo: | 107 for i in repo: |
78 changesets += 1 | |
79 n = cl.node(i) | 108 n = cl.node(i) |
80 l = cl.linkrev(n) | 109 checkentry(cl, i, n, seen, [i], "changelog") |
81 if l != i: | 110 |
82 err(i, _("incorrect link (%d) for changeset") %(l)) | |
83 if n in seen: | |
84 err(i, _("duplicates changeset at revision %d") % seen[n]) | |
85 seen[n] = i | |
86 | |
87 for p in cl.parents(n): | |
88 if p not in cl.nodemap: | |
89 err(i, _("changeset has unknown parent %s") % short(p)) | |
90 try: | 111 try: |
91 changes = cl.read(n) | 112 changes = cl.read(n) |
92 except KeyboardInterrupt: | 113 mflinkrevs.setdefault(changes[0], []).append(i) |
93 ui.warn(_("interrupted")) | 114 for f in changes[3]: |
94 raise | 115 filelinkrevs.setdefault(f, []).append(i) |
95 except Exception, inst: | 116 except Exception, inst: |
96 err(i, _("unpacking changeset: %s") % inst) | 117 exc(i, _("unpacking changeset %s") % short(n), inst) |
97 continue | 118 |
98 | 119 ui.status(_("checking manifests\n")) |
99 if changes[0] not in neededmanifests: | |
100 neededmanifests[changes[0]] = i | |
101 | |
102 for f in changes[3]: | |
103 filelinkrevs.setdefault(f, []).append(i) | |
104 | |
105 seen = {} | 120 seen = {} |
106 ui.status(_("checking manifests\n")) | 121 checklog(mf, "manifest") |
107 if len(cl) and not len(mf): | |
108 havemf = 0 | |
109 err(0, _("empty or missing 00manifest.i")) | |
110 else: | |
111 checkversion(mf, "manifest") | |
112 checksize(mf, "manifest") | |
113 | |
114 for i in mf: | 122 for i in mf: |
115 n = mf.node(i) | 123 n = mf.node(i) |
116 l = mf.linkrev(n) | 124 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest") |
117 | 125 if n in mflinkrevs: |
118 if l < 0 or (havecl and l >= len(cl)): | 126 del mflinkrevs[n] |
119 err(None, _("bad link (%d) at manifest revision %d") % (l, i)) | |
120 | |
121 if n in neededmanifests: | |
122 del neededmanifests[n] | |
123 | |
124 if n in seen: | |
125 err(l, _("duplicates manifest from %d") % seen[n]) | |
126 | |
127 seen[n] = l | |
128 | |
129 for p in mf.parents(n): | |
130 if p not in mf.nodemap: | |
131 err(l, _("manifest has unknown parent %s") % short(p)) | |
132 | 127 |
133 try: | 128 try: |
134 for f, fn in mf.readdelta(n).iteritems(): | 129 for f, fn in mf.readdelta(n).iteritems(): |
135 fns = filenodes.setdefault(f, {}) | 130 if not f: |
136 if fn not in fns: | 131 err(lr, _("file without name in manifest")) |
137 fns[fn] = n | 132 elif f != "/dev/null": |
138 except KeyboardInterrupt: | 133 fns = filenodes.setdefault(f, {}) |
139 ui.warn(_("interrupted")) | 134 if fn not in fns: |
140 raise | 135 fns[fn] = n |
141 except Exception, inst: | 136 except Exception, inst: |
142 err(l, _("reading manifest delta: %s") % inst) | 137 exc(lr, _("reading manifest delta %s") % short(n), inst) |
143 continue | |
144 | 138 |
145 ui.status(_("crosschecking files in changesets and manifests\n")) | 139 ui.status(_("crosschecking files in changesets and manifests\n")) |
146 | 140 |
147 if havemf > 0: | 141 if havemf: |
148 nm = [(c, m) for m, c in neededmanifests.items()] | 142 nm = [] |
143 for m in mflinkrevs: | |
144 for c in mflinkrevs[m]: | |
145 nm.append((c, m)) | |
149 nm.sort() | 146 nm.sort() |
150 for c, m in nm: | 147 for c, m in nm: |
151 err(c, _("changeset refers to unknown manifest %s") % short(m)) | 148 err(c, _("changeset refers to unknown manifest %s") % short(m)) |
152 del neededmanifests, nm | 149 del mflinkrevs, nm |
153 | 150 |
154 if havecl: | |
155 fl = filenodes.keys() | |
156 fl.sort() | |
157 for f in fl: | |
158 if f not in filelinkrevs: | |
159 lrs = [mf.linkrev(n) for n in filenodes[f]] | |
160 lrs.sort() | |
161 err(lrs[0], _("in manifest but not in changeset"), f) | |
162 del fl | |
163 | |
164 if havemf: | |
165 fl = filelinkrevs.keys() | 151 fl = filelinkrevs.keys() |
166 fl.sort() | 152 fl.sort() |
167 for f in fl: | 153 for f in fl: |
168 if f not in filenodes: | 154 if f not in filenodes: |
169 lr = filelinkrevs[f][0] | 155 lr = filelinkrevs[f][0] |
170 err(lr, _("in changeset but not in manifest"), f) | 156 err(lr, _("in changeset but not in manifest"), f) |
171 del fl | 157 del fl |
172 | 158 |
159 if havecl: | |
160 fl = filenodes.keys() | |
161 fl.sort() | |
162 for f in fl: | |
163 if f not in filelinkrevs: | |
164 try: | |
165 lr = min([repo.file(f).linkrev(n) for n in filenodes[f]]) | |
166 except: | |
167 lr = None | |
168 err(lr, _("in manifest but not in changeset"), f) | |
169 del fl | |
170 | |
173 ui.status(_("checking files\n")) | 171 ui.status(_("checking files\n")) |
174 ff = dict.fromkeys(filenodes.keys() + filelinkrevs.keys()).keys() | 172 files = dict.fromkeys(filenodes.keys() + filelinkrevs.keys()).keys() |
175 ff.sort() | 173 files.sort() |
176 for f in ff: | 174 for f in files: |
177 if f == "/dev/null": | |
178 continue | |
179 files += 1 | |
180 if not f: | |
181 lr = filelinkrevs[f][0] | |
182 err(lr, _("file without name in manifest")) | |
183 continue | |
184 fl = repo.file(f) | 175 fl = repo.file(f) |
185 checkversion(fl, f) | 176 checklog(fl, f) |
186 checksize(fl, f) | |
187 | |
188 if not len(fl): | |
189 err(filelinkrevs[f][0], _("empty or missing revlog"), f) | |
190 continue | |
191 | |
192 seen = {} | 177 seen = {} |
193 nodes = {nullid: 1} | |
194 for i in fl: | 178 for i in fl: |
195 revisions += 1 | 179 revisions += 1 |
196 n = fl.node(i) | 180 n = fl.node(i) |
197 flr = fl.linkrev(n) | 181 lr = checkentry(fl, i, n, seen, filelinkrevs.get(f, []), f) |
198 | |
199 if flr < 0 or (havecl and flr not in filelinkrevs.get(f, [])): | |
200 if flr < 0 or flr >= len(repo): | |
201 err(None, _("rev %d point to nonexistent changeset %d") | |
202 % (i, flr), f) | |
203 else: | |
204 err(None, _("rev %d points to unexpected changeset %d") | |
205 % (i, flr), f) | |
206 if f in filelinkrevs: | |
207 warn(_(" (expected %s)") % filelinkrevs[f][0]) | |
208 flr = None # can't be trusted | |
209 else: | |
210 if havecl: | |
211 filelinkrevs[f].remove(flr) | |
212 | |
213 if n in seen: | |
214 err(flr, _("duplicate revision %d") % i, f) | |
215 if f in filenodes: | 182 if f in filenodes: |
216 if havemf and n not in filenodes[f]: | 183 if havemf and n not in filenodes[f]: |
217 err(flr, _("%s not in manifests") % (short(n)), f) | 184 err(lr, _("%s not in manifests") % (short(n)), f) |
218 else: | 185 else: |
219 del filenodes[f][n] | 186 del filenodes[f][n] |
220 | 187 |
221 # verify contents | 188 # verify contents |
222 try: | 189 try: |
223 t = fl.read(n) | 190 t = fl.read(n) |
224 except KeyboardInterrupt: | 191 rp = fl.renamed(n) |
225 ui.warn(_("interrupted")) | 192 if len(t) != fl.size(i): |
226 raise | 193 if not fl._readmeta(n): # ancient copy? |
194 err(lr, _("unpacked size is %s, %s expected") % | |
195 (len(t), fl.size(i)), f) | |
227 except Exception, inst: | 196 except Exception, inst: |
228 err(flr, _("unpacking %s: %s") % (short(n), inst), f) | 197 exc(lr, _("unpacking %s") % short(n), inst, f) |
229 | |
230 # verify parents | |
231 try: | |
232 (p1, p2) = fl.parents(n) | |
233 if p1 not in nodes: | |
234 err(flr, _("unknown parent 1 %s of %s") % | |
235 (short(p1), short(n)), f) | |
236 if p2 not in nodes: | |
237 err(flr, _("unknown parent 2 %s of %s") % | |
238 (short(p2), short(p1)), f) | |
239 except KeyboardInterrupt: | |
240 ui.warn(_("interrupted")) | |
241 raise | |
242 except Exception, inst: | |
243 err(flr, _("checking parents of %s: %s") % (short(n), inst), f) | |
244 nodes[n] = 1 | |
245 | 198 |
246 # check renames | 199 # check renames |
247 try: | 200 try: |
248 rp = fl.renamed(n) | |
249 if rp: | 201 if rp: |
250 fl2 = repo.file(rp[0]) | 202 fl2 = repo.file(rp[0]) |
251 if not len(fl2): | 203 if not len(fl2): |
252 err(flr, _("empty or missing copy source revlog %s:%s") | 204 err(lr, _("empty or missing copy source revlog %s:%s") |
253 % (rp[0], short(rp[1])), f) | 205 % (rp[0], short(rp[1])), f) |
254 elif rp[1] == nullid: | 206 elif rp[1] == nullid: |
255 err(flr, _("copy source revision is nullid %s:%s") | 207 err(lr, _("copy source revision is nullid %s:%s") |
256 % (rp[0], short(rp[1])), f) | 208 % (rp[0], short(rp[1])), f) |
257 else: | 209 else: |
258 rev = fl2.rev(rp[1]) | 210 rev = fl2.rev(rp[1]) |
259 except KeyboardInterrupt: | |
260 ui.warn(_("interrupted")) | |
261 raise | |
262 except Exception, inst: | 211 except Exception, inst: |
263 err(flr, _("checking rename of %s: %s") % | 212 exc(lr, _("checking rename of %s") % short(n), inst, f) |
264 (short(n), inst), f) | |
265 | 213 |
266 # cross-check | 214 # cross-check |
267 if f in filenodes: | 215 if f in filenodes: |
268 fns = [(mf.linkrev(filenodes[f][n]), n) | 216 fns = [(mf.linkrev(l), n) for n,l in filenodes[f].items()] |
269 for n in filenodes[f]] | |
270 fns.sort() | 217 fns.sort() |
271 for lr, node in fns: | 218 for lr, node in fns: |
272 err(lr, _("%s in manifests not found") % short(node), f) | 219 err(lr, _("%s in manifests not found") % short(node), f) |
273 | 220 |
274 ui.status(_("%d files, %d changesets, %d total revisions\n") % | 221 ui.status(_("%d files, %d changesets, %d total revisions\n") % |
275 (files, changesets, revisions)) | 222 (len(files), len(cl), revisions)) |
276 | |
277 if warnings[0]: | 223 if warnings[0]: |
278 ui.warn(_("%d warnings encountered!\n") % warnings[0]) | 224 ui.warn(_("%d warnings encountered!\n") % warnings[0]) |
279 if errors[0]: | 225 if errors[0]: |
280 ui.warn(_("%d integrity errors encountered!\n") % errors[0]) | 226 ui.warn(_("%d integrity errors encountered!\n") % errors[0]) |
281 if firstbad[0]: | 227 if badrevs: |
282 ui.warn(_("(first damaged changeset appears to be %d)\n") | 228 ui.warn(_("(first damaged changeset appears to be %d)\n") |
283 % firstbad[0]) | 229 % min(badrevs)) |
284 return 1 | 230 return 1 |