Mercurial > public > mercurial-scm > hg-stable
diff mercurial/revlog.py @ 47395:e6292eb33384
revlog: store sidedata in their own file
This makes sidedata manipulation simpler and results in more compact data when
traversing either data or sidedata.
Differential Revision: https://phab.mercurial-scm.org/D10787
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 28 May 2021 23:41:17 +0200 |
parents | 75e1104f23a2 |
children | 33d626910374 |
line wrap: on
line diff
--- a/mercurial/revlog.py Fri May 28 23:41:12 2021 +0200 +++ b/mercurial/revlog.py Fri May 28 23:41:17 2021 +0200 @@ -1,4 +1,5 @@ # revlog.py - storage back-end for mercurial +# coding: utf8 # # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> # @@ -260,6 +261,11 @@ b'partial read of revlog %s; expected %d bytes from offset %d, got %d' ) +FILE_TOO_SHORT_MSG = _( + b'cannot read from revlog %s;' + b' expected %d bytes from offset %d, data size is %d' +) + class revlog(object): """ @@ -401,6 +407,7 @@ self._docket_file = None self._indexfile = None self._datafile = None + self._sidedatafile = None self._nodemap_file = None self.postfix = postfix self._trypending = trypending @@ -445,7 +452,7 @@ # custom flags. self._flagprocessors = dict(flagutil.flagprocessors) - # 2-tuple of file handles being used for active writing. + # 3-tuple of file handles being used for active writing. self._writinghandles = None # prevent nesting of addgroup self._adding_group = None @@ -634,6 +641,7 @@ if self._docket is not None: self._datafile = self._docket.data_filepath() + self._sidedatafile = self._docket.sidedata_filepath() elif self.postfix is None: self._datafile = b'%s.d' % self.radix else: @@ -803,9 +811,14 @@ with func() as fp: yield fp + @contextlib.contextmanager def _sidedatareadfp(self): """file object suitable to read sidedata""" - return self._datareadfp() + if self._writinghandles: + yield self._writinghandles[2] + else: + with self.opener(self._sidedatafile) as fp: + yield fp def tiprev(self): return len(self.index) - 1 @@ -909,6 +922,23 @@ def start(self, rev): return int(self.index[rev][0] >> 16) + def sidedata_cut_off(self, rev): + sd_cut_off = self.index[rev][8] + if sd_cut_off != 0: + return sd_cut_off + # This is some annoying dance, because entries without sidedata + # currently use 0 as their ofsset. (instead of previous-offset + + # previous-size) + # + # We should reconsider this sidedata → 0 sidata_offset policy. + # In the meantime, we need this. + while 0 <= rev: + e = self.index[rev] + if e[9] != 0: + return e[8] + e[9] + rev -= 1 + return 0 + def flags(self, rev): return self.index[rev][0] & 0xFFFF @@ -2074,11 +2104,19 @@ # XXX this need caching, as we do for data with self._sidedatareadfp() as sdf: - sdf.seek(sidedata_offset) + if self._docket.sidedata_end < sidedata_offset + sidedata_size: + filename = self._sidedatafile + end = self._docket.sidedata_end + offset = sidedata_offset + length = sidedata_size + m = FILE_TOO_SHORT_MSG % (filename, length, offset, end) + raise error.RevlogError(m) + + sdf.seek(sidedata_offset, os.SEEK_SET) comp_segment = sdf.read(sidedata_size) if len(comp_segment) < sidedata_size: - filename = self._datafile + filename = self._sidedatafile length = sidedata_size offset = sidedata_offset got = len(comp_segment) @@ -2215,7 +2253,7 @@ if existing_handles: # switched from inline to conventional reopen the index ifh = self.__index_write_fp() - self._writinghandles = (ifh, new_dfh) + self._writinghandles = (ifh, new_dfh, None) new_dfh = None finally: if new_dfh is not None: @@ -2233,7 +2271,7 @@ if self._writinghandles is not None: yield else: - ifh = dfh = None + ifh = dfh = sdfh = None try: r = len(self) # opening the data file. @@ -2253,6 +2291,17 @@ raise dfh = self._datafp(b"w+") transaction.add(self._datafile, dsize) + if self._sidedatafile is not None: + try: + sdfh = self.opener(self._sidedatafile, mode=b"r+") + dfh.seek(self._docket.sidedata_end, os.SEEK_SET) + except IOError as inst: + if inst.errno != errno.ENOENT: + raise + sdfh = self.opener(self._sidedatafile, mode=b"w+") + transaction.add( + self._sidedatafile, self._docket.sidedata_end + ) # opening the index file. isize = r * self.index.entry_size @@ -2262,7 +2311,7 @@ else: transaction.add(self._indexfile, isize) # exposing all file handle for writing. - self._writinghandles = (ifh, dfh) + self._writinghandles = (ifh, dfh, sdfh) yield if self._docket is not None: self._write_docket(transaction) @@ -2270,6 +2319,8 @@ self._writinghandles = None if dfh is not None: dfh.close() + if sdfh is not None: + dfh.close() # closing the index file last to avoid exposing referent to # potential unflushed data content. if ifh is not None: @@ -2513,7 +2564,8 @@ offset = self._get_data_offset(prev) if self._concurrencychecker: - ifh, dfh = self._writinghandles + ifh, dfh, sdfh = self._writinghandles + # XXX no checking for the sidedata file if self._inline: # offset is "as if" it were in the .d file, so we need to add on # the size of the entry metadata. @@ -2570,7 +2622,7 @@ if sidedata and self.hassidedata: sidedata_compression_mode = COMP_MODE_PLAIN serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) - sidedata_offset = offset + deltainfo.deltalen + sidedata_offset = self._docket.sidedata_end h, comp_sidedata = self.compress(serialized_sidedata) if ( h != b'u' @@ -2622,6 +2674,7 @@ link, offset, serialized_sidedata, + sidedata_offset, ) rawtext = btext[0] @@ -2648,7 +2701,9 @@ else: return self._docket.data_end - def _writeentry(self, transaction, entry, data, link, offset, sidedata): + def _writeentry( + self, transaction, entry, data, link, offset, sidedata, sidedata_offset + ): # Files opened in a+ mode have inconsistent behavior on various # platforms. Windows requires that a file positioning call be made # when the file handle transitions between reads and writes. See @@ -2664,7 +2719,7 @@ if self._writinghandles is None: msg = b'adding revision outside `revlog._writing` context' raise error.ProgrammingError(msg) - ifh, dfh = self._writinghandles + ifh, dfh, sdfh = self._writinghandles if self._docket is None: ifh.seek(0, os.SEEK_END) else: @@ -2674,16 +2729,20 @@ dfh.seek(0, os.SEEK_END) else: dfh.seek(self._docket.data_end, os.SEEK_SET) + if sdfh: + sdfh.seek(self._docket.sidedata_end, os.SEEK_SET) curr = len(self) - 1 if not self._inline: transaction.add(self._datafile, offset) + if self._sidedatafile: + transaction.add(self._sidedatafile, sidedata_offset) transaction.add(self._indexfile, curr * len(entry)) if data[0]: dfh.write(data[0]) dfh.write(data[1]) if sidedata: - dfh.write(sidedata) + sdfh.write(sidedata) ifh.write(entry) else: offset += curr * self.index.entry_size @@ -2691,12 +2750,12 @@ ifh.write(entry) ifh.write(data[0]) ifh.write(data[1]) - if sidedata: - ifh.write(sidedata) + assert not sidedata self._enforceinlinesize(transaction) if self._docket is not None: self._docket.index_end = self._writinghandles[0].tell() self._docket.data_end = self._writinghandles[1].tell() + self._docket.sidedata_end = self._writinghandles[2].tell() nodemaputil.setup_persistent_nodemap(transaction, self) @@ -2866,12 +2925,17 @@ else: end = data_end + (rev * self.index.entry_size) + if self._sidedatafile: + sidedata_end = self.sidedata_cut_off(rev) + transaction.add(self._sidedatafile, sidedata_end) + transaction.add(self._indexfile, end) if self._docket is not None: # XXX we could, leverage the docket while stripping. However it is # not powerfull enough at the time of this comment self._docket.index_end = end self._docket.data_end = data_end + self._docket.sidedata_end = sidedata_end self._docket.write(transaction, stripping=True) # then reset internal state in memory to forget those revisions @@ -3398,13 +3462,10 @@ new_entries = [] # append the new sidedata with self._writing(transaction): - ifh, dfh = self._writinghandles - if self._docket is not None: - dfh.seek(self._docket.data_end, os.SEEK_SET) - else: - dfh.seek(0, os.SEEK_END) - - current_offset = dfh.tell() + ifh, dfh, sdfh = self._writinghandles + dfh.seek(self._docket.sidedata_end, os.SEEK_SET) + + current_offset = sdfh.tell() for rev in range(startrev, endrev + 1): entry = self.index[rev] new_sidedata, flags = sidedatautil.run_sidedata_helpers( @@ -3455,12 +3516,11 @@ ) # the sidedata computation might have move the file cursors around - dfh.seek(current_offset, os.SEEK_SET) - dfh.write(serialized_sidedata) + sdfh.seek(current_offset, os.SEEK_SET) + sdfh.write(serialized_sidedata) new_entries.append(entry_update) current_offset += len(serialized_sidedata) - if self._docket is not None: - self._docket.data_end = dfh.tell() + self._docket.sidedata_end = sdfh.tell() # rewrite the new index entries ifh.seek(startrev * self.index.entry_size)