comparison mercurial/revlog.py @ 26377:dfef0d3be65e

revlog: support using an existing file handle when reading revlogs Currently, the low-level revlog reading code always opens a new file handle. In some key scenarios, the revlog is already opened and an existing file handle could be used to read. This patch paves the road to that by teaching various revlog reading functions to accept an optional existing file handle to read from.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 27 Sep 2015 15:48:35 -0700
parents 344a1621674b
children e749707f0afb
comparison
equal deleted inserted replaced
26376:344a1621674b 26377:dfef0d3be65e
930 if o + len(d) == offset and len(d) + len(data) < _chunksize: 930 if o + len(d) == offset and len(d) + len(data) < _chunksize:
931 self._chunkcache = o, d + data 931 self._chunkcache = o, d + data
932 else: 932 else:
933 self._chunkcache = offset, data 933 self._chunkcache = offset, data
934 934
935 def _loadchunk(self, offset, length): 935 def _loadchunk(self, offset, length, df=None):
936 if self._inline: 936 """Load a chunk/segment from the revlog.
937 df = self.opener(self.indexfile) 937
938 Accepts absolute offset, length to read, and an optional existing
939 file handle to read from.
940
941 If an existing file handle is passed, it will be seeked and the
942 original seek position will NOT be restored.
943 """
944 if df is not None:
945 closehandle = False
938 else: 946 else:
939 df = self.opener(self.datafile) 947 if self._inline:
948 df = self.opener(self.indexfile)
949 else:
950 df = self.opener(self.datafile)
951 closehandle = True
940 952
941 # Cache data both forward and backward around the requested 953 # Cache data both forward and backward around the requested
942 # data, in a fixed size window. This helps speed up operations 954 # data, in a fixed size window. This helps speed up operations
943 # involving reading the revlog backwards. 955 # involving reading the revlog backwards.
944 cachesize = self._chunkcachesize 956 cachesize = self._chunkcachesize
945 realoffset = offset & ~(cachesize - 1) 957 realoffset = offset & ~(cachesize - 1)
946 reallength = (((offset + length + cachesize) & ~(cachesize - 1)) 958 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
947 - realoffset) 959 - realoffset)
948 df.seek(realoffset) 960 df.seek(realoffset)
949 d = df.read(reallength) 961 d = df.read(reallength)
950 df.close() 962 if closehandle:
963 df.close()
951 self._addchunk(realoffset, d) 964 self._addchunk(realoffset, d)
952 if offset != realoffset or reallength != length: 965 if offset != realoffset or reallength != length:
953 return util.buffer(d, offset - realoffset, length) 966 return util.buffer(d, offset - realoffset, length)
954 return d 967 return d
955 968
956 def _getchunk(self, offset, length): 969 def _getchunk(self, offset, length, df=None):
957 o, d = self._chunkcache 970 o, d = self._chunkcache
958 l = len(d) 971 l = len(d)
959 972
960 # is it in the cache? 973 # is it in the cache?
961 cachestart = offset - o 974 cachestart = offset - o
963 if cachestart >= 0 and cacheend <= l: 976 if cachestart >= 0 and cacheend <= l:
964 if cachestart == 0 and cacheend == l: 977 if cachestart == 0 and cacheend == l:
965 return d # avoid a copy 978 return d # avoid a copy
966 return util.buffer(d, cachestart, cacheend - cachestart) 979 return util.buffer(d, cachestart, cacheend - cachestart)
967 980
968 return self._loadchunk(offset, length) 981 return self._loadchunk(offset, length, df=df)
969 982
970 def _chunkraw(self, startrev, endrev): 983 def _chunkraw(self, startrev, endrev, df=None):
971 start = self.start(startrev) 984 start = self.start(startrev)
972 end = self.end(endrev) 985 end = self.end(endrev)
973 if self._inline: 986 if self._inline:
974 start += (startrev + 1) * self._io.size 987 start += (startrev + 1) * self._io.size
975 end += (endrev + 1) * self._io.size 988 end += (endrev + 1) * self._io.size
976 length = end - start 989 length = end - start
977 return self._getchunk(start, length) 990 return self._getchunk(start, length, df=df)
978 991
979 def _chunk(self, rev): 992 def _chunk(self, rev, df=None):
980 return decompress(self._chunkraw(rev, rev)) 993 return decompress(self._chunkraw(rev, rev, df=df))
981 994
982 def _chunks(self, revs): 995 def _chunks(self, revs, df=None):
983 '''faster version of [self._chunk(rev) for rev in revs] 996 '''faster version of [self._chunk(rev) for rev in revs]
984 997
985 Assumes that revs is in ascending order.''' 998 Assumes that revs is in ascending order.'''
986 if not revs: 999 if not revs:
987 return [] 1000 return []
997 # preload the cache 1010 # preload the cache
998 try: 1011 try:
999 while True: 1012 while True:
1000 # ensure that the cache doesn't change out from under us 1013 # ensure that the cache doesn't change out from under us
1001 _cache = self._chunkcache 1014 _cache = self._chunkcache
1002 self._chunkraw(revs[0], revs[-1]) 1015 self._chunkraw(revs[0], revs[-1], df=df)
1003 if _cache == self._chunkcache: 1016 if _cache == self._chunkcache:
1004 break 1017 break
1005 offset, data = _cache 1018 offset, data = _cache
1006 except OverflowError: 1019 except OverflowError:
1007 # issue4215 - we can't cache a run of chunks greater than 1020 # issue4215 - we can't cache a run of chunks greater than
1008 # 2G on Windows 1021 # 2G on Windows
1009 return [self._chunk(rev) for rev in revs] 1022 return [self._chunk(rev, df=df) for rev in revs]
1010 1023
1011 for rev in revs: 1024 for rev in revs:
1012 chunkstart = start(rev) 1025 chunkstart = start(rev)
1013 if inline: 1026 if inline:
1014 chunkstart += (rev + 1) * iosize 1027 chunkstart += (rev + 1) * iosize
1036 return str(self._chunk(rev2)) 1049 return str(self._chunk(rev2))
1037 1050
1038 return mdiff.textdiff(self.revision(rev1), 1051 return mdiff.textdiff(self.revision(rev1),
1039 self.revision(rev2)) 1052 self.revision(rev2))
1040 1053
1041 def revision(self, nodeorrev): 1054 def revision(self, nodeorrev, _df=None):
1042 """return an uncompressed revision of a given node or revision 1055 """return an uncompressed revision of a given node or revision
1043 number. 1056 number.
1057
1058 _df is an existing file handle to read from. It is meant to only be
1059 used internally.
1044 """ 1060 """
1045 if isinstance(nodeorrev, int): 1061 if isinstance(nodeorrev, int):
1046 rev = nodeorrev 1062 rev = nodeorrev
1047 node = self.node(rev) 1063 node = self.node(rev)
1048 else: 1064 else:
1089 chain.reverse() 1105 chain.reverse()
1090 1106
1091 # drop cache to save memory 1107 # drop cache to save memory
1092 self._cache = None 1108 self._cache = None
1093 1109
1094 bins = self._chunks(chain) 1110 bins = self._chunks(chain, df=_df)
1095 if text is None: 1111 if text is None:
1096 text = str(bins[0]) 1112 text = str(bins[0])
1097 bins = bins[1:] 1113 bins = bins[1:]
1098 1114
1099 text = mdiff.patches(text, bins) 1115 text = mdiff.patches(text, bins)