Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 4977:6cb30bc4ca32
revlog: parse revlogv0 indexes into v1 internally
This lets us eliminate lots of special case code in our
performance-sensitive index accessors.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Mon, 23 Jul 2007 20:44:08 -0500 |
parents | 79c39cc9ff69 |
children | 93d48a8fa496 |
comparison
equal
deleted
inserted
replaced
4976:79c39cc9ff69 | 4977:6cb30bc4ca32 |
---|---|
290 return long(long(offset) << 16 | type) | 290 return long(long(offset) << 16 | type) |
291 | 291 |
292 class revlogoldio(object): | 292 class revlogoldio(object): |
293 def __init__(self): | 293 def __init__(self): |
294 self.chunkcache = None | 294 self.chunkcache = None |
295 self.size = struct.calcsize(indexformatv0) | |
295 | 296 |
296 def parseindex(self, fp, st, inline): | 297 def parseindex(self, fp, st, inline): |
297 s = struct.calcsize(indexformatv0) | 298 s = self.size |
298 index = [] | 299 index = [] |
299 nodemap = {nullid: nullrev} | 300 nodemap = {nullid: nullrev} |
300 n = off = 0 | 301 n = off = 0 |
301 data = fp.read() | 302 data = fp.read() |
302 l = len(data) | 303 l = len(data) |
303 while off + s <= l: | 304 while off + s <= l: |
304 cur = data[off:off + s] | 305 cur = data[off:off + s] |
305 off += s | 306 off += s |
306 e = struct.unpack(indexformatv0, cur) | 307 e = struct.unpack(indexformatv0, cur) |
307 index.append(e) | 308 # transform to revlogv1 format |
308 nodemap[e[-1]] = n | 309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3], |
310 nodemap[e[4]], nodemap[e[5]], e[6]) | |
311 index.append(e2) | |
312 nodemap[e[6]] = n | |
309 n += 1 | 313 n += 1 |
310 | 314 |
311 return index, nodemap | 315 return index, nodemap |
312 | 316 |
313 class revlogio(object): | 317 class revlogio(object): |
314 def __init__(self): | 318 def __init__(self): |
315 self.chunkcache = None | 319 self.chunkcache = None |
320 self.size = struct.calcsize(indexformatng) | |
316 | 321 |
317 def parseindex(self, fp, st, inline): | 322 def parseindex(self, fp, st, inline): |
318 if (lazyparser.safe_to_use and not inline and | 323 if (lazyparser.safe_to_use and not inline and |
319 st and st.st_size > 1000000): | 324 st and st.st_size > 1000000): |
320 # big index, let's parse it on demand | 325 # big index, let's parse it on demand |
325 type = gettype(e[0]) | 330 type = gettype(e[0]) |
326 e[0] = offset_type(0, type) | 331 e[0] = offset_type(0, type) |
327 index[0] = e | 332 index[0] = e |
328 return index, nodemap | 333 return index, nodemap |
329 | 334 |
330 s = struct.calcsize(indexformatng) | 335 s = self.size |
331 index = [] | 336 index = [] |
332 nodemap = {nullid: nullrev} | 337 nodemap = {nullid: nullrev} |
333 n = off = 0 | 338 n = off = 0 |
334 # if we're not using lazymap, always read the whole index | 339 # if we're not using lazymap, always read the whole index |
335 data = fp.read() | 340 data = fp.read() |
440 % (self.indexfile, fmt)) | 445 % (self.indexfile, fmt)) |
441 self.version = v | 446 self.version = v |
442 self.nodemap = {nullid: nullrev} | 447 self.nodemap = {nullid: nullrev} |
443 self.index = [] | 448 self.index = [] |
444 self._io = revlogio() | 449 self._io = revlogio() |
445 self.indexformat = indexformatng | |
446 if self.version == REVLOGV0: | 450 if self.version == REVLOGV0: |
447 self._io = revlogoldio() | 451 self._io = revlogoldio() |
448 self.indexformat = indexformatv0 | |
449 if i: | 452 if i: |
450 self.index, self.nodemap = self._io.parseindex(f, st, self._inline()) | 453 self.index, self.nodemap = self._io.parseindex(f, st, self._inline()) |
451 | 454 |
452 def _loadindex(self, start, end): | 455 def _loadindex(self, start, end): |
453 """load a block of indexes all at once from the lazy parser""" | 456 """load a block of indexes all at once from the lazy parser""" |
482 return (node == nullid) and nullrev or self.index[self.rev(node)][-4] | 485 return (node == nullid) and nullrev or self.index[self.rev(node)][-4] |
483 def parents(self, node): | 486 def parents(self, node): |
484 if node == nullid: return (nullid, nullid) | 487 if node == nullid: return (nullid, nullid) |
485 r = self.rev(node) | 488 r = self.rev(node) |
486 d = self.index[r][-3:-1] | 489 d = self.index[r][-3:-1] |
487 if self.version == REVLOGV0: | |
488 return d | |
489 return (self.node(d[0]), self.node(d[1])) | 490 return (self.node(d[0]), self.node(d[1])) |
490 def parentrevs(self, rev): | 491 def parentrevs(self, rev): |
491 if rev == nullrev: | 492 if rev == nullrev: |
492 return (nullrev, nullrev) | 493 return (nullrev, nullrev) |
493 d = self.index[rev][-3:-1] | 494 d = self.index[rev][-3:-1] |
494 if self.version == REVLOGV0: | |
495 return (self.rev(d[0]), self.rev(d[1])) | |
496 return d | 495 return d |
497 def start(self, rev): | 496 def start(self, rev): |
498 if rev == nullrev: | 497 if rev == nullrev: |
499 return 0 | 498 return 0 |
500 if self.version != REVLOGV0: | 499 return getoffset(self.index[rev][0]) |
501 return getoffset(self.index[rev][0]) | |
502 return self.index[rev][0] | |
503 | 500 |
504 def end(self, rev): return self.start(rev) + self.length(rev) | 501 def end(self, rev): return self.start(rev) + self.length(rev) |
505 | 502 |
506 def size(self, rev): | 503 def size(self, rev): |
507 """return the length of the uncompressed text for a given revision""" | 504 """return the length of the uncompressed text for a given revision""" |
508 if rev == nullrev: | 505 if rev == nullrev: |
509 return 0 | 506 return 0 |
510 l = -1 | 507 l = self.index[rev][2] |
511 if self.version != REVLOGV0: | |
512 l = self.index[rev][2] | |
513 if l >= 0: | 508 if l >= 0: |
514 return l | 509 return l |
515 | 510 |
516 t = self.revision(self.node(rev)) | 511 t = self.revision(self.node(rev)) |
517 return len(t) | 512 return len(t) |
842 | 837 |
843 def chunk(self, rev, df=None, cachelen=4096): | 838 def chunk(self, rev, df=None, cachelen=4096): |
844 start, length = self.start(rev), self.length(rev) | 839 start, length = self.start(rev), self.length(rev) |
845 inline = self._inline() | 840 inline = self._inline() |
846 if inline: | 841 if inline: |
847 start += (rev + 1) * struct.calcsize(self.indexformat) | 842 start += (rev + 1) * self._io.size |
848 end = start + length | 843 end = start + length |
849 def loadcache(df): | 844 def loadcache(df): |
850 cache_length = max(cachelen, length) # 4k | 845 cache_length = max(cachelen, length) # 4k |
851 if not df: | 846 if not df: |
852 if inline: | 847 if inline: |
946 trindex = trinfo[2] | 941 trindex = trinfo[2] |
947 dataoff = self.start(trindex) | 942 dataoff = self.start(trindex) |
948 | 943 |
949 tr.add(self.datafile, dataoff) | 944 tr.add(self.datafile, dataoff) |
950 df = self.opener(self.datafile, 'w') | 945 df = self.opener(self.datafile, 'w') |
951 calc = struct.calcsize(self.indexformat) | 946 calc = self._io.size |
952 for r in xrange(self.count()): | 947 for r in xrange(self.count()): |
953 start = self.start(r) + (r + 1) * calc | 948 start = self.start(r) + (r + 1) * calc |
954 length = self.length(r) | 949 length = self.length(r) |
955 fp.seek(start) | 950 fp.seek(start) |
956 d = fp.read(length) | 951 d = fp.read(length) |
959 df.close() | 954 df.close() |
960 fp = self.opener(self.indexfile, 'w', atomictemp=True) | 955 fp = self.opener(self.indexfile, 'w', atomictemp=True) |
961 self.version &= ~(REVLOGNGINLINEDATA) | 956 self.version &= ~(REVLOGNGINLINEDATA) |
962 if self.count(): | 957 if self.count(): |
963 x = self.index[0] | 958 x = self.index[0] |
964 e = struct.pack(self.indexformat, *x)[4:] | 959 e = struct.pack(indexformatng, *x)[4:] |
965 l = struct.pack(versionformat, self.version) | 960 l = struct.pack(versionformat, self.version) |
966 fp.write(l) | 961 fp.write(l) |
967 fp.write(e) | 962 fp.write(e) |
968 | 963 |
969 for i in xrange(1, self.count()): | 964 for i in xrange(1, self.count()): |
970 x = self.index[i] | 965 x = self.index[i] |
971 e = struct.pack(self.indexformat, *x) | 966 e = struct.pack(indexformatng, *x) |
972 fp.write(e) | 967 fp.write(e) |
973 | 968 |
974 # if we don't call rename, the temp file will never replace the | 969 # if we don't call rename, the temp file will never replace the |
975 # real index | 970 # real index |
976 fp.rename() | 971 fp.rename() |
1029 | 1024 |
1030 offset = 0 | 1025 offset = 0 |
1031 if t >= 0: | 1026 if t >= 0: |
1032 offset = self.end(t) | 1027 offset = self.end(t) |
1033 | 1028 |
1029 e = (offset_type(offset, 0), l, len(text), | |
1030 base, link, self.rev(p1), self.rev(p2), node) | |
1031 | |
1032 self.index.append(e) | |
1033 self.nodemap[node] = n | |
1034 | |
1034 if self.version == REVLOGV0: | 1035 if self.version == REVLOGV0: |
1035 e = (offset, l, base, link, p1, p2, node) | 1036 e = (offset, l, base, link, p1, p2, node) |
1036 else: | 1037 entry = struct.pack(indexformatv0, *e) |
1037 e = (offset_type(offset, 0), l, len(text), | 1038 else: |
1038 base, link, self.rev(p1), self.rev(p2), node) | 1039 entry = struct.pack(indexformatng, *e) |
1039 | |
1040 self.index.append(e) | |
1041 self.nodemap[node] = n | |
1042 entry = struct.pack(self.indexformat, *e) | |
1043 | 1040 |
1044 if not self._inline(): | 1041 if not self._inline(): |
1045 transaction.add(self.datafile, offset) | 1042 transaction.add(self.datafile, offset) |
1046 transaction.add(self.indexfile, n * len(entry)) | 1043 transaction.add(self.indexfile, n * len(entry)) |
1047 if data[0]: | 1044 if data[0]: |
1192 ifh = self.opener(self.indexfile, "a") | 1189 ifh = self.opener(self.indexfile, "a") |
1193 if chk != node: | 1190 if chk != node: |
1194 raise RevlogError(_("consistency error adding group")) | 1191 raise RevlogError(_("consistency error adding group")) |
1195 textlen = len(text) | 1192 textlen = len(text) |
1196 else: | 1193 else: |
1197 if self.version == REVLOGV0: | 1194 e = (offset_type(end, 0), len(cdelta), textlen, base, |
1198 e = (end, len(cdelta), base, link, p1, p2, node) | 1195 link, self.rev(p1), self.rev(p2), node) |
1199 else: | |
1200 e = (offset_type(end, 0), len(cdelta), textlen, base, | |
1201 link, self.rev(p1), self.rev(p2), node) | |
1202 self.index.append(e) | 1196 self.index.append(e) |
1203 self.nodemap[node] = r | 1197 self.nodemap[node] = r |
1204 if self._inline(): | 1198 if self._inline(): |
1205 ifh.write(struct.pack(self.indexformat, *e)) | 1199 ifh.write(struct.pack(indexformatng, *e)) |
1206 ifh.write(cdelta) | 1200 ifh.write(cdelta) |
1207 self.checkinlinesize(transaction, ifh) | 1201 self.checkinlinesize(transaction, ifh) |
1208 if not self._inline(): | 1202 if not self._inline(): |
1209 dfh = self.opener(self.datafile, "a") | 1203 dfh = self.opener(self.datafile, "a") |
1210 ifh = self.opener(self.indexfile, "a") | 1204 ifh = self.opener(self.indexfile, "a") |
1211 else: | 1205 else: |
1206 if self.version == REVLOGV0: | |
1207 e = (end, len(cdelta), base, link, p1, p2, node) | |
1208 entry = struct.pack(indexformatv0, *e) | |
1209 else: | |
1210 entry = struct.pack(indexformatng, *e) | |
1212 dfh.write(cdelta) | 1211 dfh.write(cdelta) |
1213 ifh.write(struct.pack(self.indexformat, *e)) | 1212 ifh.write(entry) |
1214 | 1213 |
1215 t, r, chain, prev = r, r + 1, node, node | 1214 t, r, chain, prev = r, r + 1, node, node |
1216 base = self.base(t) | 1215 base = self.base(t) |
1217 start = self.start(base) | 1216 start = self.start(base) |
1218 end = self.end(t) | 1217 end = self.end(t) |
1238 # first truncate the files on disk | 1237 # first truncate the files on disk |
1239 end = self.start(rev) | 1238 end = self.start(rev) |
1240 if not self._inline(): | 1239 if not self._inline(): |
1241 df = self.opener(self.datafile, "a") | 1240 df = self.opener(self.datafile, "a") |
1242 df.truncate(end) | 1241 df.truncate(end) |
1243 end = rev * struct.calcsize(self.indexformat) | 1242 end = rev * self._io.size |
1244 else: | 1243 else: |
1245 end += rev * struct.calcsize(self.indexformat) | 1244 end += rev * self._io.size |
1246 | 1245 |
1247 indexf = self.opener(self.indexfile, "a") | 1246 indexf = self.opener(self.indexfile, "a") |
1248 indexf.truncate(end) | 1247 indexf.truncate(end) |
1249 | 1248 |
1250 # then reset internal state in memory to forget those revisions | 1249 # then reset internal state in memory to forget those revisions |
1272 | 1271 |
1273 try: | 1272 try: |
1274 f = self.opener(self.indexfile) | 1273 f = self.opener(self.indexfile) |
1275 f.seek(0, 2) | 1274 f.seek(0, 2) |
1276 actual = f.tell() | 1275 actual = f.tell() |
1277 s = struct.calcsize(self.indexformat) | 1276 s = self._io.size |
1278 i = actual / s | 1277 i = actual / s |
1279 di = actual - (i * s) | 1278 di = actual - (i * s) |
1280 if self._inline(): | 1279 if self._inline(): |
1281 databytes = 0 | 1280 databytes = 0 |
1282 for r in xrange(self.count()): | 1281 for r in xrange(self.count()): |