Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 8650:ef393d6ec030
revlog: refactor chunk cache interface again
- chunk to _chunk
- _prime to _chunkraw
- _chunkclear for cache clearing
- _chunk calls _chunkraw
- clean up _prime a bit
- simplify users in revision and checkinlinesize
- drop file descriptor passing (we're better off opening fds lazily
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Wed, 27 May 2009 16:01:34 -0500 |
parents | 648af8a6aa41 |
children | 782a85ee686f |
comparison
equal
deleted
inserted
replaced
8649:2c097e22492c | 8650:ef393d6ec030 |
---|---|
468 d = self._io.parseindex(f, i, self._inline) | 468 d = self._io.parseindex(f, i, self._inline) |
469 except (ValueError, IndexError), e: | 469 except (ValueError, IndexError), e: |
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile)) | 470 raise RevlogError(_("index %s is corrupted") % (self.indexfile)) |
471 self.index, self.nodemap, self._chunkcache = d | 471 self.index, self.nodemap, self._chunkcache = d |
472 if not self._chunkcache: | 472 if not self._chunkcache: |
473 self._chunkcache = (0, '') | 473 self._chunkclear() |
474 | 474 |
475 # add the magic null revision at -1 (if it hasn't been done already) | 475 # add the magic null revision at -1 (if it hasn't been done already) |
476 if (self.index == [] or isinstance(self.index, lazyindex) or | 476 if (self.index == [] or isinstance(self.index, lazyindex) or |
477 self.index[-1][7] != nullid) : | 477 self.index[-1][7] != nullid) : |
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) | 478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) |
549 else: | 549 else: |
550 text = self.revision(self.node(base)) | 550 text = self.revision(self.node(base)) |
551 | 551 |
552 l = len(text) | 552 l = len(text) |
553 for x in xrange(base + 1, rev + 1): | 553 for x in xrange(base + 1, rev + 1): |
554 l = mdiff.patchedsize(l, self.chunk(x)) | 554 l = mdiff.patchedsize(l, self._chunk(x)) |
555 return l | 555 return l |
556 """ | 556 """ |
557 | 557 |
558 def reachable(self, node, stop=None): | 558 def reachable(self, node, stop=None): |
559 """return the set of all nodes ancestral to a given node, including | 559 """return the set of all nodes ancestral to a given node, including |
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize: | 917 if o + len(d) == offset and len(d) + len(data) < _prereadsize: |
918 self._chunkcache = o, d + data | 918 self._chunkcache = o, d + data |
919 else: | 919 else: |
920 self._chunkcache = offset, data | 920 self._chunkcache = offset, data |
921 | 921 |
922 def _loadchunk(self, offset, length, df=None): | 922 def _loadchunk(self, offset, length): |
923 if not df: | 923 if self._inline: |
924 if self._inline: | 924 df = self.opener(self.indexfile) |
925 df = self.opener(self.indexfile) | 925 else: |
926 else: | 926 df = self.opener(self.datafile) |
927 df = self.opener(self.datafile) | |
928 | 927 |
929 readahead = max(65536, length) | 928 readahead = max(65536, length) |
930 df.seek(offset) | 929 df.seek(offset) |
931 d = df.read(readahead) | 930 d = df.read(readahead) |
932 self._addchunk(offset, d) | 931 self._addchunk(offset, d) |
933 if readahead > length: | 932 if readahead > length: |
934 return d[:length] | 933 return d[:length] |
935 return d | 934 return d |
936 | 935 |
937 def _getchunk(self, offset, length, df=None): | 936 def _getchunk(self, offset, length): |
938 o, d = self._chunkcache | 937 o, d = self._chunkcache |
939 l = len(d) | 938 l = len(d) |
940 | 939 |
941 # is it in the cache? | 940 # is it in the cache? |
942 cachestart = offset - o | 941 cachestart = offset - o |
944 if cachestart >= 0 and cacheend <= l: | 943 if cachestart >= 0 and cacheend <= l: |
945 if cachestart == 0 and cacheend == l: | 944 if cachestart == 0 and cacheend == l: |
946 return d # avoid a copy | 945 return d # avoid a copy |
947 return d[cachestart:cacheend] | 946 return d[cachestart:cacheend] |
948 | 947 |
949 return self._loadchunk(offset, length, df) | 948 return self._loadchunk(offset, length) |
950 | 949 |
951 def _prime(self, startrev, endrev, df): | 950 def _chunkraw(self, startrev, endrev): |
952 start = self.start(startrev) | 951 start = self.start(startrev) |
953 end = self.end(endrev) | 952 length = self.end(endrev) - start |
954 if self._inline: | 953 if self._inline: |
955 start += (startrev + 1) * self._io.size | 954 start += (startrev + 1) * self._io.size |
956 end += (startrev + 1) * self._io.size | 955 return self._getchunk(start, length) |
957 self._loadchunk(start, end - start, df) | 956 |
958 | 957 def _chunk(self, rev): |
959 def chunk(self, rev, df=None): | 958 return decompress(self._chunkraw(rev, rev)) |
960 start, length = self.start(rev), self.length(rev) | 959 |
961 if self._inline: | 960 def _chunkclear(self): |
962 start += (rev + 1) * self._io.size | 961 self._chunkcache = (0, '') |
963 return decompress(self._getchunk(start, length, df)) | |
964 | 962 |
965 def revdiff(self, rev1, rev2): | 963 def revdiff(self, rev1, rev2): |
966 """return or calculate a delta between two revisions""" | 964 """return or calculate a delta between two revisions""" |
967 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2): | 965 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2): |
968 return self.chunk(rev2) | 966 return self._chunk(rev2) |
969 | 967 |
970 return mdiff.textdiff(self.revision(self.node(rev1)), | 968 return mdiff.textdiff(self.revision(self.node(rev1)), |
971 self.revision(self.node(rev2))) | 969 self.revision(self.node(rev2))) |
972 | 970 |
973 def revision(self, node): | 971 def revision(self, node): |
985 # check rev flags | 983 # check rev flags |
986 if self.index[rev][0] & 0xFFFF: | 984 if self.index[rev][0] & 0xFFFF: |
987 raise RevlogError(_('incompatible revision flag %x') % | 985 raise RevlogError(_('incompatible revision flag %x') % |
988 (self.index[rev][0] & 0xFFFF)) | 986 (self.index[rev][0] & 0xFFFF)) |
989 | 987 |
990 df = None | |
991 | |
992 # do we have useful data cached? | 988 # do we have useful data cached? |
993 if self._cache and self._cache[1] >= base and self._cache[1] < rev: | 989 if self._cache and self._cache[1] >= base and self._cache[1] < rev: |
994 base = self._cache[1] | 990 base = self._cache[1] |
995 text = str(self._cache[2]) | 991 text = str(self._cache[2]) |
996 self._loadindex(base, rev + 1) | 992 |
997 if not self._inline and rev > base + 1: | 993 self._loadindex(base, rev + 1) |
998 df = self.opener(self.datafile) | 994 self._chunkraw(base, rev) |
999 self._prime(base, rev, df) | 995 if text is None: |
1000 else: | 996 text = self._chunk(base) |
1001 self._loadindex(base, rev + 1) | 997 |
1002 if not self._inline and rev > base: | 998 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)] |
1003 df = self.opener(self.datafile) | |
1004 self._prime(base, rev, df) | |
1005 text = self.chunk(base, df=df) | |
1006 | |
1007 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)] | |
1008 text = mdiff.patches(text, bins) | 999 text = mdiff.patches(text, bins) |
1009 p1, p2 = self.parents(node) | 1000 p1, p2 = self.parents(node) |
1010 if node != hash(text, p1, p2): | 1001 if node != hash(text, p1, p2): |
1011 raise RevlogError(_("integrity check failed on %s:%d") | 1002 raise RevlogError(_("integrity check failed on %s:%d") |
1012 % (self.indexfile, rev)) | 1003 % (self.indexfile, rev)) |
1032 fp.flush() | 1023 fp.flush() |
1033 fp.close() | 1024 fp.close() |
1034 | 1025 |
1035 df = self.opener(self.datafile, 'w') | 1026 df = self.opener(self.datafile, 'w') |
1036 try: | 1027 try: |
1037 calc = self._io.size | |
1038 for r in self: | 1028 for r in self: |
1039 start = self.start(r) + (r + 1) * calc | 1029 df.write(self._chunkraw(r, r)) |
1040 length = self.length(r) | |
1041 d = self._getchunk(start, length) | |
1042 df.write(d) | |
1043 finally: | 1030 finally: |
1044 df.close() | 1031 df.close() |
1045 | 1032 |
1046 fp = self.opener(self.indexfile, 'w', atomictemp=True) | 1033 fp = self.opener(self.indexfile, 'w', atomictemp=True) |
1047 self.version &= ~(REVLOGNGINLINEDATA) | 1034 self.version &= ~(REVLOGNGINLINEDATA) |
1052 | 1039 |
1053 # if we don't call rename, the temp file will never replace the | 1040 # if we don't call rename, the temp file will never replace the |
1054 # real index | 1041 # real index |
1055 fp.rename() | 1042 fp.rename() |
1056 | 1043 |
1057 tr.replace(self.indexfile, trindex * calc) | 1044 tr.replace(self.indexfile, trindex * self._io.size) |
1058 self._chunkcache = (0, '') | 1045 self._chunkclear() |
1059 | 1046 |
1060 def addrevision(self, text, transaction, link, p1, p2, d=None): | 1047 def addrevision(self, text, transaction, link, p1, p2, d=None): |
1061 """add a revision to the log | 1048 """add a revision to the log |
1062 | 1049 |
1063 text - the revision data to add | 1050 text - the revision data to add |
1339 | 1326 |
1340 transaction.add(self.indexfile, end) | 1327 transaction.add(self.indexfile, end) |
1341 | 1328 |
1342 # then reset internal state in memory to forget those revisions | 1329 # then reset internal state in memory to forget those revisions |
1343 self._cache = None | 1330 self._cache = None |
1344 self._chunkcache = (0, '') | 1331 self._chunkclear() |
1345 for x in xrange(rev, len(self)): | 1332 for x in xrange(rev, len(self)): |
1346 del self.nodemap[self.node(x)] | 1333 del self.nodemap[self.node(x)] |
1347 | 1334 |
1348 del self.index[rev:-1] | 1335 del self.index[rev:-1] |
1349 | 1336 |