Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 26377:dfef0d3be65e
revlog: support using an existing file handle when reading revlogs
Currently, the low-level revlog reading code always opens a new file
handle. In some key scenarios, the revlog is already opened and an
existing file handle could be used to read. This patch paves the
road to that by teaching various revlog reading functions to accept
an optional existing file handle to read from.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sun, 27 Sep 2015 15:48:35 -0700 |
parents | 344a1621674b |
children | e749707f0afb |
comparison
equal
deleted
inserted
replaced
26376:344a1621674b | 26377:dfef0d3be65e |
---|---|
930 if o + len(d) == offset and len(d) + len(data) < _chunksize: | 930 if o + len(d) == offset and len(d) + len(data) < _chunksize: |
931 self._chunkcache = o, d + data | 931 self._chunkcache = o, d + data |
932 else: | 932 else: |
933 self._chunkcache = offset, data | 933 self._chunkcache = offset, data |
934 | 934 |
935 def _loadchunk(self, offset, length): | 935 def _loadchunk(self, offset, length, df=None): |
936 if self._inline: | 936 """Load a chunk/segment from the revlog. |
937 df = self.opener(self.indexfile) | 937 |
938 Accepts absolute offset, length to read, and an optional existing | |
939 file handle to read from. | |
940 | |
941 If an existing file handle is passed, it will be seeked and the | |
942 original seek position will NOT be restored. | |
943 """ | |
944 if df is not None: | |
945 closehandle = False | |
938 else: | 946 else: |
939 df = self.opener(self.datafile) | 947 if self._inline: |
948 df = self.opener(self.indexfile) | |
949 else: | |
950 df = self.opener(self.datafile) | |
951 closehandle = True | |
940 | 952 |
941 # Cache data both forward and backward around the requested | 953 # Cache data both forward and backward around the requested |
942 # data, in a fixed size window. This helps speed up operations | 954 # data, in a fixed size window. This helps speed up operations |
943 # involving reading the revlog backwards. | 955 # involving reading the revlog backwards. |
944 cachesize = self._chunkcachesize | 956 cachesize = self._chunkcachesize |
945 realoffset = offset & ~(cachesize - 1) | 957 realoffset = offset & ~(cachesize - 1) |
946 reallength = (((offset + length + cachesize) & ~(cachesize - 1)) | 958 reallength = (((offset + length + cachesize) & ~(cachesize - 1)) |
947 - realoffset) | 959 - realoffset) |
948 df.seek(realoffset) | 960 df.seek(realoffset) |
949 d = df.read(reallength) | 961 d = df.read(reallength) |
950 df.close() | 962 if closehandle: |
963 df.close() | |
951 self._addchunk(realoffset, d) | 964 self._addchunk(realoffset, d) |
952 if offset != realoffset or reallength != length: | 965 if offset != realoffset or reallength != length: |
953 return util.buffer(d, offset - realoffset, length) | 966 return util.buffer(d, offset - realoffset, length) |
954 return d | 967 return d |
955 | 968 |
956 def _getchunk(self, offset, length): | 969 def _getchunk(self, offset, length, df=None): |
957 o, d = self._chunkcache | 970 o, d = self._chunkcache |
958 l = len(d) | 971 l = len(d) |
959 | 972 |
960 # is it in the cache? | 973 # is it in the cache? |
961 cachestart = offset - o | 974 cachestart = offset - o |
963 if cachestart >= 0 and cacheend <= l: | 976 if cachestart >= 0 and cacheend <= l: |
964 if cachestart == 0 and cacheend == l: | 977 if cachestart == 0 and cacheend == l: |
965 return d # avoid a copy | 978 return d # avoid a copy |
966 return util.buffer(d, cachestart, cacheend - cachestart) | 979 return util.buffer(d, cachestart, cacheend - cachestart) |
967 | 980 |
968 return self._loadchunk(offset, length) | 981 return self._loadchunk(offset, length, df=df) |
969 | 982 |
970 def _chunkraw(self, startrev, endrev): | 983 def _chunkraw(self, startrev, endrev, df=None): |
971 start = self.start(startrev) | 984 start = self.start(startrev) |
972 end = self.end(endrev) | 985 end = self.end(endrev) |
973 if self._inline: | 986 if self._inline: |
974 start += (startrev + 1) * self._io.size | 987 start += (startrev + 1) * self._io.size |
975 end += (endrev + 1) * self._io.size | 988 end += (endrev + 1) * self._io.size |
976 length = end - start | 989 length = end - start |
977 return self._getchunk(start, length) | 990 return self._getchunk(start, length, df=df) |
978 | 991 |
979 def _chunk(self, rev): | 992 def _chunk(self, rev, df=None): |
980 return decompress(self._chunkraw(rev, rev)) | 993 return decompress(self._chunkraw(rev, rev, df=df)) |
981 | 994 |
982 def _chunks(self, revs): | 995 def _chunks(self, revs, df=None): |
983 '''faster version of [self._chunk(rev) for rev in revs] | 996 '''faster version of [self._chunk(rev) for rev in revs] |
984 | 997 |
985 Assumes that revs is in ascending order.''' | 998 Assumes that revs is in ascending order.''' |
986 if not revs: | 999 if not revs: |
987 return [] | 1000 return [] |
997 # preload the cache | 1010 # preload the cache |
998 try: | 1011 try: |
999 while True: | 1012 while True: |
1000 # ensure that the cache doesn't change out from under us | 1013 # ensure that the cache doesn't change out from under us |
1001 _cache = self._chunkcache | 1014 _cache = self._chunkcache |
1002 self._chunkraw(revs[0], revs[-1]) | 1015 self._chunkraw(revs[0], revs[-1], df=df) |
1003 if _cache == self._chunkcache: | 1016 if _cache == self._chunkcache: |
1004 break | 1017 break |
1005 offset, data = _cache | 1018 offset, data = _cache |
1006 except OverflowError: | 1019 except OverflowError: |
1007 # issue4215 - we can't cache a run of chunks greater than | 1020 # issue4215 - we can't cache a run of chunks greater than |
1008 # 2G on Windows | 1021 # 2G on Windows |
1009 return [self._chunk(rev) for rev in revs] | 1022 return [self._chunk(rev, df=df) for rev in revs] |
1010 | 1023 |
1011 for rev in revs: | 1024 for rev in revs: |
1012 chunkstart = start(rev) | 1025 chunkstart = start(rev) |
1013 if inline: | 1026 if inline: |
1014 chunkstart += (rev + 1) * iosize | 1027 chunkstart += (rev + 1) * iosize |
1036 return str(self._chunk(rev2)) | 1049 return str(self._chunk(rev2)) |
1037 | 1050 |
1038 return mdiff.textdiff(self.revision(rev1), | 1051 return mdiff.textdiff(self.revision(rev1), |
1039 self.revision(rev2)) | 1052 self.revision(rev2)) |
1040 | 1053 |
1041 def revision(self, nodeorrev): | 1054 def revision(self, nodeorrev, _df=None): |
1042 """return an uncompressed revision of a given node or revision | 1055 """return an uncompressed revision of a given node or revision |
1043 number. | 1056 number. |
1057 | |
1058 _df is an existing file handle to read from. It is meant to only be | |
1059 used internally. | |
1044 """ | 1060 """ |
1045 if isinstance(nodeorrev, int): | 1061 if isinstance(nodeorrev, int): |
1046 rev = nodeorrev | 1062 rev = nodeorrev |
1047 node = self.node(rev) | 1063 node = self.node(rev) |
1048 else: | 1064 else: |
1089 chain.reverse() | 1105 chain.reverse() |
1090 | 1106 |
1091 # drop cache to save memory | 1107 # drop cache to save memory |
1092 self._cache = None | 1108 self._cache = None |
1093 | 1109 |
1094 bins = self._chunks(chain) | 1110 bins = self._chunks(chain, df=_df) |
1095 if text is None: | 1111 if text is None: |
1096 text = str(bins[0]) | 1112 text = str(bins[0]) |
1097 bins = bins[1:] | 1113 bins = bins[1:] |
1098 | 1114 |
1099 text = mdiff.patches(text, bins) | 1115 text = mdiff.patches(text, bins) |