comparison mercurial/revlog.py @ 8650:ef393d6ec030

revlog: refactor chunk cache interface again - chunk to _chunk - _prime to _chunkraw - _chunkclear for cache clearing - _chunk calls _chunkraw - clean up _prime a bit - simplify users in revision and checkinlinesize - drop file descriptor passing (we're better off opening fds lazily
author Matt Mackall <mpm@selenic.com>
date Wed, 27 May 2009 16:01:34 -0500
parents 648af8a6aa41
children 782a85ee686f
comparison
equal deleted inserted replaced
8649:2c097e22492c 8650:ef393d6ec030
468 d = self._io.parseindex(f, i, self._inline) 468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e: 469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile)) 470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d 471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache: 472 if not self._chunkcache:
473 self._chunkcache = (0, '') 473 self._chunkclear()
474 474
475 # add the magic null revision at -1 (if it hasn't been done already) 475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or 476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) : 477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) 478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
549 else: 549 else:
550 text = self.revision(self.node(base)) 550 text = self.revision(self.node(base))
551 551
552 l = len(text) 552 l = len(text)
553 for x in xrange(base + 1, rev + 1): 553 for x in xrange(base + 1, rev + 1):
554 l = mdiff.patchedsize(l, self.chunk(x)) 554 l = mdiff.patchedsize(l, self._chunk(x))
555 return l 555 return l
556 """ 556 """
557 557
558 def reachable(self, node, stop=None): 558 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including 559 """return the set of all nodes ancestral to a given node, including
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize: 917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 self._chunkcache = o, d + data 918 self._chunkcache = o, d + data
919 else: 919 else:
920 self._chunkcache = offset, data 920 self._chunkcache = offset, data
921 921
922 def _loadchunk(self, offset, length, df=None): 922 def _loadchunk(self, offset, length):
923 if not df: 923 if self._inline:
924 if self._inline: 924 df = self.opener(self.indexfile)
925 df = self.opener(self.indexfile) 925 else:
926 else: 926 df = self.opener(self.datafile)
927 df = self.opener(self.datafile)
928 927
929 readahead = max(65536, length) 928 readahead = max(65536, length)
930 df.seek(offset) 929 df.seek(offset)
931 d = df.read(readahead) 930 d = df.read(readahead)
932 self._addchunk(offset, d) 931 self._addchunk(offset, d)
933 if readahead > length: 932 if readahead > length:
934 return d[:length] 933 return d[:length]
935 return d 934 return d
936 935
937 def _getchunk(self, offset, length, df=None): 936 def _getchunk(self, offset, length):
938 o, d = self._chunkcache 937 o, d = self._chunkcache
939 l = len(d) 938 l = len(d)
940 939
941 # is it in the cache? 940 # is it in the cache?
942 cachestart = offset - o 941 cachestart = offset - o
944 if cachestart >= 0 and cacheend <= l: 943 if cachestart >= 0 and cacheend <= l:
945 if cachestart == 0 and cacheend == l: 944 if cachestart == 0 and cacheend == l:
946 return d # avoid a copy 945 return d # avoid a copy
947 return d[cachestart:cacheend] 946 return d[cachestart:cacheend]
948 947
949 return self._loadchunk(offset, length, df) 948 return self._loadchunk(offset, length)
950 949
951 def _prime(self, startrev, endrev, df): 950 def _chunkraw(self, startrev, endrev):
952 start = self.start(startrev) 951 start = self.start(startrev)
953 end = self.end(endrev) 952 length = self.end(endrev) - start
954 if self._inline: 953 if self._inline:
955 start += (startrev + 1) * self._io.size 954 start += (startrev + 1) * self._io.size
956 end += (startrev + 1) * self._io.size 955 return self._getchunk(start, length)
957 self._loadchunk(start, end - start, df) 956
958 957 def _chunk(self, rev):
959 def chunk(self, rev, df=None): 958 return decompress(self._chunkraw(rev, rev))
960 start, length = self.start(rev), self.length(rev) 959
961 if self._inline: 960 def _chunkclear(self):
962 start += (rev + 1) * self._io.size 961 self._chunkcache = (0, '')
963 return decompress(self._getchunk(start, length, df))
964 962
965 def revdiff(self, rev1, rev2): 963 def revdiff(self, rev1, rev2):
966 """return or calculate a delta between two revisions""" 964 """return or calculate a delta between two revisions"""
967 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2): 965 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 return self.chunk(rev2) 966 return self._chunk(rev2)
969 967
970 return mdiff.textdiff(self.revision(self.node(rev1)), 968 return mdiff.textdiff(self.revision(self.node(rev1)),
971 self.revision(self.node(rev2))) 969 self.revision(self.node(rev2)))
972 970
973 def revision(self, node): 971 def revision(self, node):
985 # check rev flags 983 # check rev flags
986 if self.index[rev][0] & 0xFFFF: 984 if self.index[rev][0] & 0xFFFF:
987 raise RevlogError(_('incompatible revision flag %x') % 985 raise RevlogError(_('incompatible revision flag %x') %
988 (self.index[rev][0] & 0xFFFF)) 986 (self.index[rev][0] & 0xFFFF))
989 987
990 df = None
991
992 # do we have useful data cached? 988 # do we have useful data cached?
993 if self._cache and self._cache[1] >= base and self._cache[1] < rev: 989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 base = self._cache[1] 990 base = self._cache[1]
995 text = str(self._cache[2]) 991 text = str(self._cache[2])
996 self._loadindex(base, rev + 1) 992
997 if not self._inline and rev > base + 1: 993 self._loadindex(base, rev + 1)
998 df = self.opener(self.datafile) 994 self._chunkraw(base, rev)
999 self._prime(base, rev, df) 995 if text is None:
1000 else: 996 text = self._chunk(base)
1001 self._loadindex(base, rev + 1) 997
1002 if not self._inline and rev > base: 998 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1003 df = self.opener(self.datafile)
1004 self._prime(base, rev, df)
1005 text = self.chunk(base, df=df)
1006
1007 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1008 text = mdiff.patches(text, bins) 999 text = mdiff.patches(text, bins)
1009 p1, p2 = self.parents(node) 1000 p1, p2 = self.parents(node)
1010 if node != hash(text, p1, p2): 1001 if node != hash(text, p1, p2):
1011 raise RevlogError(_("integrity check failed on %s:%d") 1002 raise RevlogError(_("integrity check failed on %s:%d")
1012 % (self.indexfile, rev)) 1003 % (self.indexfile, rev))
1032 fp.flush() 1023 fp.flush()
1033 fp.close() 1024 fp.close()
1034 1025
1035 df = self.opener(self.datafile, 'w') 1026 df = self.opener(self.datafile, 'w')
1036 try: 1027 try:
1037 calc = self._io.size
1038 for r in self: 1028 for r in self:
1039 start = self.start(r) + (r + 1) * calc 1029 df.write(self._chunkraw(r, r))
1040 length = self.length(r)
1041 d = self._getchunk(start, length)
1042 df.write(d)
1043 finally: 1030 finally:
1044 df.close() 1031 df.close()
1045 1032
1046 fp = self.opener(self.indexfile, 'w', atomictemp=True) 1033 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 self.version &= ~(REVLOGNGINLINEDATA) 1034 self.version &= ~(REVLOGNGINLINEDATA)
1052 1039
1053 # if we don't call rename, the temp file will never replace the 1040 # if we don't call rename, the temp file will never replace the
1054 # real index 1041 # real index
1055 fp.rename() 1042 fp.rename()
1056 1043
1057 tr.replace(self.indexfile, trindex * calc) 1044 tr.replace(self.indexfile, trindex * self._io.size)
1058 self._chunkcache = (0, '') 1045 self._chunkclear()
1059 1046
1060 def addrevision(self, text, transaction, link, p1, p2, d=None): 1047 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 """add a revision to the log 1048 """add a revision to the log
1062 1049
1063 text - the revision data to add 1050 text - the revision data to add
1339 1326
1340 transaction.add(self.indexfile, end) 1327 transaction.add(self.indexfile, end)
1341 1328
1342 # then reset internal state in memory to forget those revisions 1329 # then reset internal state in memory to forget those revisions
1343 self._cache = None 1330 self._cache = None
1344 self._chunkcache = (0, '') 1331 self._chunkclear()
1345 for x in xrange(rev, len(self)): 1332 for x in xrange(rev, len(self)):
1346 del self.nodemap[self.node(x)] 1333 del self.nodemap[self.node(x)]
1347 1334
1348 del self.index[rev:-1] 1335 del self.index[rev:-1]
1349 1336