comparison mercurial/revlog.py @ 8316:d593922cf480

revlog: clean up the chunk caching code
author Matt Mackall <mpm@selenic.com>
date Thu, 07 May 2009 19:39:45 -0500
parents c8493310ad9b
children 5cdf4067857a
comparison
equal deleted inserted replaced
8315:c8493310ad9b 8316:d593922cf480
424 """ 424 """
425 self.indexfile = indexfile 425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d" 426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener 427 self.opener = opener
428 self._cache = None 428 self._cache = None
429 self._chunkcache = None 429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev} 430 self.nodemap = {nullid: nullrev}
431 self.index = [] 431 self.index = []
432 432
433 v = REVLOG_DEFAULT_VERSION 433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"): 434 if hasattr(opener, "defversion"):
467 try: 467 try:
468 d = self._io.parseindex(f, i, self._inline) 468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e: 469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile)) 470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d 471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
473 self._chunkcache = (0, '')
472 474
473 # add the magic null revision at -1 (if it hasn't been done already) 475 # add the magic null revision at -1 (if it hasn't been done already)
474 if (self.index == [] or isinstance(self.index, lazyindex) or 476 if (self.index == [] or isinstance(self.index, lazyindex) or
475 self.index[-1][7] != nullid) : 477 self.index[-1][7] != nullid) :
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) 478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
908 def cmp(self, node, text): 910 def cmp(self, node, text):
909 """compare text with a given file revision""" 911 """compare text with a given file revision"""
910 p1, p2 = self.parents(node) 912 p1, p2 = self.parents(node)
911 return hash(text, p1, p2) != node 913 return hash(text, p1, p2) != node
912 914
915 def _addchunk(self, offset, data):
916 o, d = self._chunkcache
917 # try to add to existing cache
918 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
919 self._chunkcache = o, d + data
920 else:
921 self._chunkcache = offset, data
922
923 def _loadchunk(self, offset, length, df=None):
924 if not df:
925 if self._inline:
926 df = self.opener(self.indexfile)
927 else:
928 df = self.opener(self.datafile)
929
930 readahead = max(65536, length)
931 df.seek(offset)
932 d = df.read(readahead)
933 self._addchunk(offset, d)
934 if readahead > length:
935 return d[:length]
936 return d
937
938 def _getchunk(self, offset, length, df=None):
939 o, d = self._chunkcache
940 l = len(d)
941
942 # is it in the cache?
943 cachestart = offset - o
944 cacheend = cachestart + length
945 if cachestart >= 0 and cacheend <= l:
946 if cachestart == 0 and cacheend == l:
947 return d # avoid a copy
948 return d[cachestart:cacheend]
949
950 return self._loadchunk(offset, length, df)
951
913 def chunk(self, rev, df=None): 952 def chunk(self, rev, df=None):
914 def loadcache(df):
915 if not df:
916 if self._inline:
917 df = self.opener(self.indexfile)
918 else:
919 df = self.opener(self.datafile)
920 df.seek(start)
921 self._chunkcache = (start, df.read(cache_length))
922
923 start, length = self.start(rev), self.length(rev) 953 start, length = self.start(rev), self.length(rev)
924 if self._inline: 954 if self._inline:
925 start += (rev + 1) * self._io.size 955 start += (rev + 1) * self._io.size
926 end = start + length 956 return decompress(self._getchunk(start, length, df))
927
928 offset = 0
929 if not self._chunkcache:
930 cache_length = max(65536, length)
931 loadcache(df)
932 else:
933 cache_start = self._chunkcache[0]
934 cache_length = len(self._chunkcache[1])
935 cache_end = cache_start + cache_length
936 if start >= cache_start and end <= cache_end:
937 # it is cached
938 offset = start - cache_start
939 else:
940 cache_length = max(65536, length)
941 loadcache(df)
942
943 # avoid copying large chunks
944 c = self._chunkcache[1]
945 if cache_length != length:
946 c = c[offset:offset + length]
947
948 return decompress(c)
949 957
950 def revdiff(self, rev1, rev2): 958 def revdiff(self, rev1, rev2):
951 """return or calculate a delta between two revisions""" 959 """return or calculate a delta between two revisions"""
952 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2): 960 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
953 return self.chunk(rev2) 961 return self.chunk(rev2)
1037 # if we don't call rename, the temp file will never replace the 1045 # if we don't call rename, the temp file will never replace the
1038 # real index 1046 # real index
1039 fp.rename() 1047 fp.rename()
1040 1048
1041 tr.replace(self.indexfile, trindex * calc) 1049 tr.replace(self.indexfile, trindex * calc)
1042 self._chunkcache = None 1050 self._chunkcache = (0, '')
1043 1051
1044 def addrevision(self, text, transaction, link, p1, p2, d=None): 1052 def addrevision(self, text, transaction, link, p1, p2, d=None):
1045 """add a revision to the log 1053 """add a revision to the log
1046 1054
1047 text - the revision data to add 1055 text - the revision data to add
1322 1330
1323 transaction.add(self.indexfile, end) 1331 transaction.add(self.indexfile, end)
1324 1332
1325 # then reset internal state in memory to forget those revisions 1333 # then reset internal state in memory to forget those revisions
1326 self._cache = None 1334 self._cache = None
1327 self._chunkcache = None 1335 self._chunkcache = (0, '')
1328 for x in xrange(rev, len(self)): 1336 for x in xrange(rev, len(self)):
1329 del self.nodemap[self.node(x)] 1337 del self.nodemap[self.node(x)]
1330 1338
1331 del self.index[rev:-1] 1339 del self.index[rev:-1]
1332 1340