comparison mercurial/revlog.py @ 4977:6cb30bc4ca32

revlog: parse revlogv0 indexes into v1 internally This lets us eliminate lots of special case code in our performance-sensitive index accessors.
author Matt Mackall <mpm@selenic.com>
date Mon, 23 Jul 2007 20:44:08 -0500
parents 79c39cc9ff69
children 93d48a8fa496
comparison
equal deleted inserted replaced
4976:79c39cc9ff69 4977:6cb30bc4ca32
290 return long(long(offset) << 16 | type) 290 return long(long(offset) << 16 | type)
291 291
292 class revlogoldio(object): 292 class revlogoldio(object):
293 def __init__(self): 293 def __init__(self):
294 self.chunkcache = None 294 self.chunkcache = None
295 self.size = struct.calcsize(indexformatv0)
295 296
296 def parseindex(self, fp, st, inline): 297 def parseindex(self, fp, st, inline):
297 s = struct.calcsize(indexformatv0) 298 s = self.size
298 index = [] 299 index = []
299 nodemap = {nullid: nullrev} 300 nodemap = {nullid: nullrev}
300 n = off = 0 301 n = off = 0
301 data = fp.read() 302 data = fp.read()
302 l = len(data) 303 l = len(data)
303 while off + s <= l: 304 while off + s <= l:
304 cur = data[off:off + s] 305 cur = data[off:off + s]
305 off += s 306 off += s
306 e = struct.unpack(indexformatv0, cur) 307 e = struct.unpack(indexformatv0, cur)
307 index.append(e) 308 # transform to revlogv1 format
308 nodemap[e[-1]] = n 309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 nodemap[e[4]], nodemap[e[5]], e[6])
311 index.append(e2)
312 nodemap[e[6]] = n
309 n += 1 313 n += 1
310 314
311 return index, nodemap 315 return index, nodemap
312 316
313 class revlogio(object): 317 class revlogio(object):
314 def __init__(self): 318 def __init__(self):
315 self.chunkcache = None 319 self.chunkcache = None
320 self.size = struct.calcsize(indexformatng)
316 321
317 def parseindex(self, fp, st, inline): 322 def parseindex(self, fp, st, inline):
318 if (lazyparser.safe_to_use and not inline and 323 if (lazyparser.safe_to_use and not inline and
319 st and st.st_size > 1000000): 324 st and st.st_size > 1000000):
320 # big index, let's parse it on demand 325 # big index, let's parse it on demand
325 type = gettype(e[0]) 330 type = gettype(e[0])
326 e[0] = offset_type(0, type) 331 e[0] = offset_type(0, type)
327 index[0] = e 332 index[0] = e
328 return index, nodemap 333 return index, nodemap
329 334
330 s = struct.calcsize(indexformatng) 335 s = self.size
331 index = [] 336 index = []
332 nodemap = {nullid: nullrev} 337 nodemap = {nullid: nullrev}
333 n = off = 0 338 n = off = 0
334 # if we're not using lazymap, always read the whole index 339 # if we're not using lazymap, always read the whole index
335 data = fp.read() 340 data = fp.read()
440 % (self.indexfile, fmt)) 445 % (self.indexfile, fmt))
441 self.version = v 446 self.version = v
442 self.nodemap = {nullid: nullrev} 447 self.nodemap = {nullid: nullrev}
443 self.index = [] 448 self.index = []
444 self._io = revlogio() 449 self._io = revlogio()
445 self.indexformat = indexformatng
446 if self.version == REVLOGV0: 450 if self.version == REVLOGV0:
447 self._io = revlogoldio() 451 self._io = revlogoldio()
448 self.indexformat = indexformatv0
449 if i: 452 if i:
450 self.index, self.nodemap = self._io.parseindex(f, st, self._inline()) 453 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
451 454
452 def _loadindex(self, start, end): 455 def _loadindex(self, start, end):
453 """load a block of indexes all at once from the lazy parser""" 456 """load a block of indexes all at once from the lazy parser"""
482 return (node == nullid) and nullrev or self.index[self.rev(node)][-4] 485 return (node == nullid) and nullrev or self.index[self.rev(node)][-4]
483 def parents(self, node): 486 def parents(self, node):
484 if node == nullid: return (nullid, nullid) 487 if node == nullid: return (nullid, nullid)
485 r = self.rev(node) 488 r = self.rev(node)
486 d = self.index[r][-3:-1] 489 d = self.index[r][-3:-1]
487 if self.version == REVLOGV0:
488 return d
489 return (self.node(d[0]), self.node(d[1])) 490 return (self.node(d[0]), self.node(d[1]))
490 def parentrevs(self, rev): 491 def parentrevs(self, rev):
491 if rev == nullrev: 492 if rev == nullrev:
492 return (nullrev, nullrev) 493 return (nullrev, nullrev)
493 d = self.index[rev][-3:-1] 494 d = self.index[rev][-3:-1]
494 if self.version == REVLOGV0:
495 return (self.rev(d[0]), self.rev(d[1]))
496 return d 495 return d
497 def start(self, rev): 496 def start(self, rev):
498 if rev == nullrev: 497 if rev == nullrev:
499 return 0 498 return 0
500 if self.version != REVLOGV0: 499 return getoffset(self.index[rev][0])
501 return getoffset(self.index[rev][0])
502 return self.index[rev][0]
503 500
504 def end(self, rev): return self.start(rev) + self.length(rev) 501 def end(self, rev): return self.start(rev) + self.length(rev)
505 502
506 def size(self, rev): 503 def size(self, rev):
507 """return the length of the uncompressed text for a given revision""" 504 """return the length of the uncompressed text for a given revision"""
508 if rev == nullrev: 505 if rev == nullrev:
509 return 0 506 return 0
510 l = -1 507 l = self.index[rev][2]
511 if self.version != REVLOGV0:
512 l = self.index[rev][2]
513 if l >= 0: 508 if l >= 0:
514 return l 509 return l
515 510
516 t = self.revision(self.node(rev)) 511 t = self.revision(self.node(rev))
517 return len(t) 512 return len(t)
842 837
843 def chunk(self, rev, df=None, cachelen=4096): 838 def chunk(self, rev, df=None, cachelen=4096):
844 start, length = self.start(rev), self.length(rev) 839 start, length = self.start(rev), self.length(rev)
845 inline = self._inline() 840 inline = self._inline()
846 if inline: 841 if inline:
847 start += (rev + 1) * struct.calcsize(self.indexformat) 842 start += (rev + 1) * self._io.size
848 end = start + length 843 end = start + length
849 def loadcache(df): 844 def loadcache(df):
850 cache_length = max(cachelen, length) # 4k 845 cache_length = max(cachelen, length) # 4k
851 if not df: 846 if not df:
852 if inline: 847 if inline:
946 trindex = trinfo[2] 941 trindex = trinfo[2]
947 dataoff = self.start(trindex) 942 dataoff = self.start(trindex)
948 943
949 tr.add(self.datafile, dataoff) 944 tr.add(self.datafile, dataoff)
950 df = self.opener(self.datafile, 'w') 945 df = self.opener(self.datafile, 'w')
951 calc = struct.calcsize(self.indexformat) 946 calc = self._io.size
952 for r in xrange(self.count()): 947 for r in xrange(self.count()):
953 start = self.start(r) + (r + 1) * calc 948 start = self.start(r) + (r + 1) * calc
954 length = self.length(r) 949 length = self.length(r)
955 fp.seek(start) 950 fp.seek(start)
956 d = fp.read(length) 951 d = fp.read(length)
959 df.close() 954 df.close()
960 fp = self.opener(self.indexfile, 'w', atomictemp=True) 955 fp = self.opener(self.indexfile, 'w', atomictemp=True)
961 self.version &= ~(REVLOGNGINLINEDATA) 956 self.version &= ~(REVLOGNGINLINEDATA)
962 if self.count(): 957 if self.count():
963 x = self.index[0] 958 x = self.index[0]
964 e = struct.pack(self.indexformat, *x)[4:] 959 e = struct.pack(indexformatng, *x)[4:]
965 l = struct.pack(versionformat, self.version) 960 l = struct.pack(versionformat, self.version)
966 fp.write(l) 961 fp.write(l)
967 fp.write(e) 962 fp.write(e)
968 963
969 for i in xrange(1, self.count()): 964 for i in xrange(1, self.count()):
970 x = self.index[i] 965 x = self.index[i]
971 e = struct.pack(self.indexformat, *x) 966 e = struct.pack(indexformatng, *x)
972 fp.write(e) 967 fp.write(e)
973 968
974 # if we don't call rename, the temp file will never replace the 969 # if we don't call rename, the temp file will never replace the
975 # real index 970 # real index
976 fp.rename() 971 fp.rename()
1029 1024
1030 offset = 0 1025 offset = 0
1031 if t >= 0: 1026 if t >= 0:
1032 offset = self.end(t) 1027 offset = self.end(t)
1033 1028
1029 e = (offset_type(offset, 0), l, len(text),
1030 base, link, self.rev(p1), self.rev(p2), node)
1031
1032 self.index.append(e)
1033 self.nodemap[node] = n
1034
1034 if self.version == REVLOGV0: 1035 if self.version == REVLOGV0:
1035 e = (offset, l, base, link, p1, p2, node) 1036 e = (offset, l, base, link, p1, p2, node)
1036 else: 1037 entry = struct.pack(indexformatv0, *e)
1037 e = (offset_type(offset, 0), l, len(text), 1038 else:
1038 base, link, self.rev(p1), self.rev(p2), node) 1039 entry = struct.pack(indexformatng, *e)
1039
1040 self.index.append(e)
1041 self.nodemap[node] = n
1042 entry = struct.pack(self.indexformat, *e)
1043 1040
1044 if not self._inline(): 1041 if not self._inline():
1045 transaction.add(self.datafile, offset) 1042 transaction.add(self.datafile, offset)
1046 transaction.add(self.indexfile, n * len(entry)) 1043 transaction.add(self.indexfile, n * len(entry))
1047 if data[0]: 1044 if data[0]:
1192 ifh = self.opener(self.indexfile, "a") 1189 ifh = self.opener(self.indexfile, "a")
1193 if chk != node: 1190 if chk != node:
1194 raise RevlogError(_("consistency error adding group")) 1191 raise RevlogError(_("consistency error adding group"))
1195 textlen = len(text) 1192 textlen = len(text)
1196 else: 1193 else:
1197 if self.version == REVLOGV0: 1194 e = (offset_type(end, 0), len(cdelta), textlen, base,
1198 e = (end, len(cdelta), base, link, p1, p2, node) 1195 link, self.rev(p1), self.rev(p2), node)
1199 else:
1200 e = (offset_type(end, 0), len(cdelta), textlen, base,
1201 link, self.rev(p1), self.rev(p2), node)
1202 self.index.append(e) 1196 self.index.append(e)
1203 self.nodemap[node] = r 1197 self.nodemap[node] = r
1204 if self._inline(): 1198 if self._inline():
1205 ifh.write(struct.pack(self.indexformat, *e)) 1199 ifh.write(struct.pack(indexformatng, *e))
1206 ifh.write(cdelta) 1200 ifh.write(cdelta)
1207 self.checkinlinesize(transaction, ifh) 1201 self.checkinlinesize(transaction, ifh)
1208 if not self._inline(): 1202 if not self._inline():
1209 dfh = self.opener(self.datafile, "a") 1203 dfh = self.opener(self.datafile, "a")
1210 ifh = self.opener(self.indexfile, "a") 1204 ifh = self.opener(self.indexfile, "a")
1211 else: 1205 else:
1206 if self.version == REVLOGV0:
1207 e = (end, len(cdelta), base, link, p1, p2, node)
1208 entry = struct.pack(indexformatv0, *e)
1209 else:
1210 entry = struct.pack(indexformatng, *e)
1212 dfh.write(cdelta) 1211 dfh.write(cdelta)
1213 ifh.write(struct.pack(self.indexformat, *e)) 1212 ifh.write(entry)
1214 1213
1215 t, r, chain, prev = r, r + 1, node, node 1214 t, r, chain, prev = r, r + 1, node, node
1216 base = self.base(t) 1215 base = self.base(t)
1217 start = self.start(base) 1216 start = self.start(base)
1218 end = self.end(t) 1217 end = self.end(t)
1238 # first truncate the files on disk 1237 # first truncate the files on disk
1239 end = self.start(rev) 1238 end = self.start(rev)
1240 if not self._inline(): 1239 if not self._inline():
1241 df = self.opener(self.datafile, "a") 1240 df = self.opener(self.datafile, "a")
1242 df.truncate(end) 1241 df.truncate(end)
1243 end = rev * struct.calcsize(self.indexformat) 1242 end = rev * self._io.size
1244 else: 1243 else:
1245 end += rev * struct.calcsize(self.indexformat) 1244 end += rev * self._io.size
1246 1245
1247 indexf = self.opener(self.indexfile, "a") 1246 indexf = self.opener(self.indexfile, "a")
1248 indexf.truncate(end) 1247 indexf.truncate(end)
1249 1248
1250 # then reset internal state in memory to forget those revisions 1249 # then reset internal state in memory to forget those revisions
1272 1271
1273 try: 1272 try:
1274 f = self.opener(self.indexfile) 1273 f = self.opener(self.indexfile)
1275 f.seek(0, 2) 1274 f.seek(0, 2)
1276 actual = f.tell() 1275 actual = f.tell()
1277 s = struct.calcsize(self.indexformat) 1276 s = self._io.size
1278 i = actual / s 1277 i = actual / s
1279 di = actual - (i * s) 1278 di = actual - (i * s)
1280 if self._inline(): 1279 if self._inline():
1281 databytes = 0 1280 databytes = 0
1282 for r in xrange(self.count()): 1281 for r in xrange(self.count()):