comparison mercurial/revlog.py @ 4972:8d0cf46e0dc6

revlog: add revlogio interface This lets us separate the logic for different on-disk revlog formats from the main revlog logic.
author Matt Mackall <mpm@selenic.com>
date Mon, 23 Jul 2007 20:44:07 -0500
parents 3e6dae278c99
children a386a6e4fe46
comparison
equal deleted inserted replaced
4971:3e6dae278c99 4972:8d0cf46e0dc6
290 return int(q & 0xFFFF) 290 return int(q & 0xFFFF)
291 291
292 def offset_type(offset, type): 292 def offset_type(offset, type):
293 return long(long(offset) << 16 | type) 293 return long(long(offset) << 16 | type)
294 294
295 class revlogoldio(object):
296 def __init__(self):
297 self.chunkcache = None
298
299 def parseindex(self, fp, st, inline):
300 s = struct.calcsize(indexformatv0)
301 index = []
302 nodemap = {nullid: nullrev}
303 n = 0
304 leftover = None
305 while True:
306 if st:
307 data = fp.read(65536)
308 else:
309 # hack for httprangereader, it doesn't do partial reads well
310 data = fp.read()
311 if not data:
312 break
313 if leftover:
314 data = leftover + data
315 leftover = None
316 off = 0
317 l = len(data)
318 while off < l:
319 if l - off < s:
320 leftover = data[off:]
321 break
322 cur = data[off:off + s]
323 off += s
324 e = struct.unpack(indexformatv0, cur)
325 index.append(e)
326 nodemap[e[-1]] = n
327 n += 1
328 if not st:
329 break
330
331 return index, nodemap
332
333 class revlogio(object):
334 def __init__(self):
335 self.chunkcache = None
336
337 def parseindex(self, fp, st, inline):
338 if (lazyparser.safe_to_use and not inline and
339 st and st.st_size > 10000):
340 # big index, let's parse it on demand
341 parser = lazyparser(fp, st.st_size, indexformatng, ngshaoffset)
342 index = lazyindex(parser)
343 nodemap = lazymap(parser)
344 e = list(index[0])
345 type = gettype(e[0])
346 e[0] = offset_type(0, type)
347 index[0] = e
348 return index, nodemap
349
350 s = struct.calcsize(indexformatng)
351 index = []
352 nodemap = {nullid: nullrev}
353 n = 0
354 leftover = None
355 while True:
356 if st:
357 data = fp.read(65536)
358 else:
359 # hack for httprangereader, it doesn't do partial reads well
360 data = fp.read()
361 if not data:
362 break
363 if n == 0 and inline:
364 # cache the first chunk
365 self.chunkcache = (0, data)
366 if leftover:
367 data = leftover + data
368 leftover = None
369 off = 0
370 l = len(data)
371 while off < l:
372 if l - off < s:
373 leftover = data[off:]
374 break
375 cur = data[off:off + s]
376 off += s
377 e = struct.unpack(indexformatng, cur)
378 index.append(e)
379 nodemap[e[-1]] = n
380 n += 1
381 if inline:
382 if e[1] < 0:
383 break
384 off += e[1]
385 if off > l:
386 # some things don't seek well, just read it
387 fp.read(off - l)
388 break
389 if not st:
390 break
391
392 e = list(index[0])
393 type = gettype(e[0])
394 e[0] = offset_type(0, type)
395 index[0] = e
396
397 return index, nodemap
398
295 class revlog(object): 399 class revlog(object):
296 """ 400 """
297 the underlying revision storage object 401 the underlying revision storage object
298 402
299 A revlog consists of two parts, an index and the revision data. 403 A revlog consists of two parts, an index and the revision data.
328 self.datafile = indexfile[:-2] + ".d" 432 self.datafile = indexfile[:-2] + ".d"
329 self.opener = opener 433 self.opener = opener
330 434
331 self.indexstat = None 435 self.indexstat = None
332 self.cache = None 436 self.cache = None
333 self.chunkcache = None
334 self.defversion = REVLOG_DEFAULT_VERSION 437 self.defversion = REVLOG_DEFAULT_VERSION
335 if hasattr(opener, "defversion"): 438 if hasattr(opener, "defversion"):
336 self.defversion = opener.defversion 439 self.defversion = opener.defversion
337 if self.defversion & REVLOGNG: 440 if self.defversion & REVLOGNG:
338 self.defversion |= REVLOGNGINLINEDATA 441 self.defversion |= REVLOGNGINLINEDATA
378 raise RevlogError(_("index %s unknown format %d") 481 raise RevlogError(_("index %s unknown format %d")
379 % (self.indexfile, fmt)) 482 % (self.indexfile, fmt))
380 self.version = v 483 self.version = v
381 self.nodemap = {nullid: nullrev} 484 self.nodemap = {nullid: nullrev}
382 self.index = [] 485 self.index = []
486 self._io = revlogio()
383 self.indexformat = indexformatng 487 self.indexformat = indexformatng
384 if self.version == REVLOGV0: 488 if self.version == REVLOGV0:
489 self._io = revlogoldio()
385 self.indexformat = indexformatv0 490 self.indexformat = indexformatv0
386 if i: 491 if i:
387 self._parseindex(f, st) 492 self.index, self.nodemap = self._io.parseindex(f, st, self._inline())
388
389 def _parseindex(self, fp, st):
390 shaoffset = ngshaoffset
391 if self.version == REVLOGV0:
392 shaoffset = v0shaoffset
393
394 if (lazyparser.safe_to_use and not self._inline() and
395 st and st.st_size > 10000):
396 # big index, let's parse it on demand
397 parser = lazyparser(fp, st.st_size, self.indexformat, shaoffset)
398 self.index = lazyindex(parser)
399 self.nodemap = lazymap(parser)
400 if self.version != REVLOGV0:
401 e = list(self.index[0])
402 type = gettype(e[0])
403 e[0] = offset_type(0, type)
404 self.index[0] = e
405 return
406
407 s = struct.calcsize(self.indexformat)
408 self.index = []
409 self.nodemap = {nullid: nullrev}
410 inline = self._inline()
411 n = 0
412 leftover = None
413 while True:
414 if st:
415 data = fp.read(65536)
416 else:
417 # hack for httprangereader, it doesn't do partial reads well
418 data = fp.read()
419 if not data:
420 break
421 if n == 0 and self._inline():
422 # cache the first chunk
423 self.chunkcache = (0, data)
424 if leftover:
425 data = leftover + data
426 leftover = None
427 off = 0
428 l = len(data)
429 while off < l:
430 if l - off < s:
431 leftover = data[off:]
432 break
433 cur = data[off:off + s]
434 off += s
435 e = struct.unpack(self.indexformat, cur)
436 self.index.append(e)
437 self.nodemap[e[-1]] = n
438 n += 1
439 if inline:
440 if e[1] < 0:
441 break
442 off += e[1]
443 if off > l:
444 # some things don't seek well, just read it
445 fp.read(off - l)
446 break
447 if not st:
448 break
449
450 if self.version != REVLOGV0:
451 e = list(self.index[0])
452 type = gettype(e[0])
453 e[0] = offset_type(0, type)
454 self.index[0] = e
455 493
456 def _loadindex(self, start, end): 494 def _loadindex(self, start, end):
457 """load a block of indexes all at once from the lazy parser""" 495 """load a block of indexes all at once from the lazy parser"""
458 if isinstance(self.index, lazyindex): 496 if isinstance(self.index, lazyindex):
459 self.index.p.loadindex(start, end) 497 self.index.p.loadindex(start, end)
856 if inline: 894 if inline:
857 df = self.opener(self.indexfile) 895 df = self.opener(self.indexfile)
858 else: 896 else:
859 df = self.opener(self.datafile) 897 df = self.opener(self.datafile)
860 df.seek(start) 898 df.seek(start)
861 self.chunkcache = (start, df.read(cache_length)) 899 self._io.chunkcache = (start, df.read(cache_length))
862 900
863 if not self.chunkcache: 901 if not self._io.chunkcache:
864 loadcache(df) 902 loadcache(df)
865 903
866 cache_start = self.chunkcache[0] 904 cache_start = self._io.chunkcache[0]
867 cache_end = cache_start + len(self.chunkcache[1]) 905 cache_end = cache_start + len(self._io.chunkcache[1])
868 if start >= cache_start and end <= cache_end: 906 if start >= cache_start and end <= cache_end:
869 # it is cached 907 # it is cached
870 offset = start - cache_start 908 offset = start - cache_start
871 else: 909 else:
872 loadcache(df) 910 loadcache(df)
875 #def checkchunk(): 913 #def checkchunk():
876 # df = self.opener(self.datafile) 914 # df = self.opener(self.datafile)
877 # df.seek(start) 915 # df.seek(start)
878 # return df.read(length) 916 # return df.read(length)
879 #assert s == checkchunk() 917 #assert s == checkchunk()
880 return decompress(self.chunkcache[1][offset:offset + length]) 918 return decompress(self._io.chunkcache[1][offset:offset + length])
881 919
882 def delta(self, node): 920 def delta(self, node):
883 """return or calculate a delta between a node and its predecessor""" 921 """return or calculate a delta between a node and its predecessor"""
884 r = self.rev(node) 922 r = self.rev(node)
885 return self.revdiff(r - 1, r) 923 return self.revdiff(r - 1, r)
978 # if we don't call rename, the temp file will never replace the 1016 # if we don't call rename, the temp file will never replace the
979 # real index 1017 # real index
980 fp.rename() 1018 fp.rename()
981 1019
982 tr.replace(self.indexfile, trindex * calc) 1020 tr.replace(self.indexfile, trindex * calc)
983 self.chunkcache = None 1021 self._io.chunkcache = None
984 1022
985 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None): 1023 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
986 """add a revision to the log 1024 """add a revision to the log
987 1025
988 text - the revision data to add 1026 text - the revision data to add
1251 indexf = self.opener(self.indexfile, "a") 1289 indexf = self.opener(self.indexfile, "a")
1252 indexf.truncate(end) 1290 indexf.truncate(end)
1253 1291
1254 # then reset internal state in memory to forget those revisions 1292 # then reset internal state in memory to forget those revisions
1255 self.cache = None 1293 self.cache = None
1256 self.chunkcache = None 1294 self._io.chunkcache = None
1257 for x in xrange(rev, self.count()): 1295 for x in xrange(rev, self.count()):
1258 del self.nodemap[self.node(x)] 1296 del self.nodemap[self.node(x)]
1259 1297
1260 del self.index[rev:] 1298 del self.index[rev:]
1261 1299