mercurial/revlog.py
changeset 51083 c3748f38dcd0
parent 51082 118c99c6092b
child 51084 df50a1592e0c
--- a/mercurial/revlog.py	Tue Oct 17 05:17:02 2023 +0200
+++ b/mercurial/revlog.py	Tue Oct 17 06:02:33 2023 +0200
@@ -337,6 +337,178 @@
     lazy_delta_base = attr.ib(default=False)
 
 
+class _InnerRevlog:
+    """An inner layer of the revlog object
+
+    That layer exist to be able to delegate some operation to Rust, its
+    boundaries are arbitrary and based on what we can delegate to Rust.
+    """
+
+    def __init__(
+        self,
+        opener,
+        index,
+        index_file,
+        data_file,
+        sidedata_file,
+        inline,
+        data_config,
+        chunk_cache,
+    ):
+        self.opener = opener
+        self.index = index
+
+        self.index_file = index_file
+        self.data_file = data_file
+        self.sidedata_file = sidedata_file
+        self.inline = inline
+        self.data_config = data_config
+
+        # index
+
+        # 3-tuple of file handles being used for active writing.
+        self._writinghandles = None
+
+        self._segmentfile = randomaccessfile.randomaccessfile(
+            self.opener,
+            (self.index_file if self.inline else self.data_file),
+            self.data_config.chunk_cache_size,
+            chunk_cache,
+        )
+        self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
+            self.opener,
+            self.sidedata_file,
+            self.data_config.chunk_cache_size,
+        )
+
+    # Derived from index values.
+
+    def start(self, rev):
+        """the offset of the data chunk for this revision"""
+        return int(self.index[rev][0] >> 16)
+
+    def length(self, rev):
+        """the length of the data chunk for this revision"""
+        return self.index[rev][1]
+
+    def end(self, rev):
+        """the end of the data chunk for this revision"""
+        return self.start(rev) + self.length(rev)
+
+    @contextlib.contextmanager
+    def reading(self):
+        """Context manager that keeps data and sidedata files open for reading"""
+        if len(self.index) == 0:
+            yield  # nothing to be read
+        else:
+            with self._segmentfile.reading():
+                with self._segmentfile_sidedata.reading():
+                    yield
+
+    @property
+    def is_writing(self):
+        """True is a writing context is open"""
+        return self._writinghandles is not None
+
+    @contextlib.contextmanager
+    def writing(self, transaction, data_end=None, sidedata_end=None):
+        """Open the revlog files for writing
+
+        Add content to a revlog should be done within such context.
+        """
+        if self.is_writing:
+            yield
+        else:
+            ifh = dfh = sdfh = None
+            try:
+                r = len(self.index)
+                # opening the data file.
+                dsize = 0
+                if r:
+                    dsize = self.end(r - 1)
+                dfh = None
+                if not self.inline:
+                    try:
+                        dfh = self.opener(self.data_file, mode=b"r+")
+                        if data_end is None:
+                            dfh.seek(0, os.SEEK_END)
+                        else:
+                            dfh.seek(data_end, os.SEEK_SET)
+                    except FileNotFoundError:
+                        dfh = self.opener(self.data_file, mode=b"w+")
+                    transaction.add(self.data_file, dsize)
+                if self.sidedata_file is not None:
+                    assert sidedata_end is not None
+                    # revlog-v2 does not inline, help Pytype
+                    assert dfh is not None
+                    try:
+                        sdfh = self.opener(self.sidedata_file, mode=b"r+")
+                        dfh.seek(sidedata_end, os.SEEK_SET)
+                    except FileNotFoundError:
+                        sdfh = self.opener(self.sidedata_file, mode=b"w+")
+                    transaction.add(self.sidedata_file, sidedata_end)
+
+                # opening the index file.
+                isize = r * self.index.entry_size
+                ifh = self.__index_write_fp()
+                if self.inline:
+                    transaction.add(self.index_file, dsize + isize)
+                else:
+                    transaction.add(self.index_file, isize)
+                # exposing all file handle for writing.
+                self._writinghandles = (ifh, dfh, sdfh)
+                self._segmentfile.writing_handle = ifh if self.inline else dfh
+                self._segmentfile_sidedata.writing_handle = sdfh
+                yield
+            finally:
+                self._writinghandles = None
+                self._segmentfile.writing_handle = None
+                self._segmentfile_sidedata.writing_handle = None
+                if dfh is not None:
+                    dfh.close()
+                if sdfh is not None:
+                    sdfh.close()
+                # closing the index file last to avoid exposing referent to
+                # potential unflushed data content.
+                if ifh is not None:
+                    ifh.close()
+
+    def __index_write_fp(self, index_end=None):
+        """internal method to open the index file for writing
+
+        You should not use this directly and use `_writing` instead
+        """
+        try:
+            f = self.opener(
+                self.index_file,
+                mode=b"r+",
+                checkambig=self.data_config.check_ambig,
+            )
+            if index_end is None:
+                f.seek(0, os.SEEK_END)
+            else:
+                f.seek(index_end, os.SEEK_SET)
+            return f
+        except FileNotFoundError:
+            return self.opener(
+                self.index_file,
+                mode=b"w+",
+                checkambig=self.data_config.check_ambig,
+            )
+
+    def __index_new_fp(self):
+        """internal method to create a new index file for writing
+
+        You should not use this unless you are upgrading from inline revlog
+        """
+        return self.opener(
+            self.index_file,
+            mode=b"w",
+            checkambig=self.data_config.check_ambig,
+            atomictemp=True,
+        )
+
+
 class revlog:
     """
     the underlying revision storage object
@@ -477,13 +649,11 @@
         # Make copy of flag processors so each revlog instance can support
         # custom flags.
         self._flagprocessors = dict(flagutil.flagprocessors)
-
-        # 3-tuple of file handles being used for active writing.
-        self._writinghandles = None
         # prevent nesting of addgroup
         self._adding_group = None
 
-        self._loadindex()
+        chunk_cache = self._loadindex()
+        self._load_inner(chunk_cache)
 
         self._concurrencychecker = concurrencychecker
 
@@ -1007,22 +1177,25 @@
                 _(b"index %s is corrupted") % self.display_id
             )
         self.index = index
-        self._segmentfile = randomaccessfile.randomaccessfile(
-            self.opener,
-            (self._indexfile if self._inline else self._datafile),
-            self.data_config.chunk_cache_size,
-            chunkcache,
-        )
-        self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
-            self.opener,
-            self._sidedatafile,
-            self.data_config.chunk_cache_size,
-        )
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = util.lrucachedict(500)
         # revlog header -> revlog compressor
         self._decompressors = {}
 
+        return chunkcache
+
+    def _load_inner(self, chunk_cache):
+        self._inner = _InnerRevlog(
+            opener=self.opener,
+            index=self.index,
+            index_file=self._indexfile,
+            data_file=self._datafile,
+            sidedata_file=self._sidedatafile,
+            inline=self._inline,
+            data_config=self.data_config,
+            chunk_cache=chunk_cache,
+        )
+
     def get_revlog(self):
         """simple function to mirror API of other not-really-revlog API"""
         return self
@@ -1073,35 +1246,6 @@
         c = self._get_decompressor(t)
         return c.decompress
 
-    def __index_write_fp(self):
-        # You should not use this directly and use `_writing` instead
-        try:
-            f = self.opener(
-                self._indexfile,
-                mode=b"r+",
-                checkambig=self.data_config.check_ambig,
-            )
-            if self._docket is None:
-                f.seek(0, os.SEEK_END)
-            else:
-                f.seek(self._docket.index_end, os.SEEK_SET)
-            return f
-        except FileNotFoundError:
-            return self.opener(
-                self._indexfile,
-                mode=b"w+",
-                checkambig=self.data_config.check_ambig,
-            )
-
-    def __index_new_fp(self):
-        # You should not use this unless you are upgrading from inline revlog
-        return self.opener(
-            self._indexfile,
-            mode=b"w",
-            checkambig=self.data_config.check_ambig,
-            atomictemp=True,
-        )
-
     def _datafp(self, mode=b'r'):
         """file object for the revlog's data file"""
         return self.opener(self._datafile, mode=mode)
@@ -1160,8 +1304,8 @@
         """Clear in-memory caches"""
         self._revisioncache = None
         self._chainbasecache.clear()
-        self._segmentfile.clear_cache()
-        self._segmentfile_sidedata.clear_cache()
+        self._inner._segmentfile.clear_cache()
+        self._inner._segmentfile_sidedata.clear_cache()
         self._pcache = {}
         self._nodemap_docket = None
         self.index.clearcaches()
@@ -2041,7 +2185,7 @@
             end += (endrev + 1) * self.index.entry_size
         length = end - start
 
-        return start, self._segmentfile.read_chunk(start, length)
+        return start, self._inner._segmentfile.read_chunk(start, length)
 
     def _chunk(self, rev):
         """Obtain a single decompressed chunk for a revision.
@@ -2318,7 +2462,7 @@
             m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
             raise error.RevlogError(m)
 
-        comp_segment = self._segmentfile_sidedata.read_chunk(
+        comp_segment = self._inner._segmentfile_sidedata.read_chunk(
             sidedata_offset, sidedata_size
         )
 
@@ -2423,15 +2567,15 @@
         tr.add(self._datafile, 0)
 
         existing_handles = False
-        if self._writinghandles is not None:
+        if self._inner._writinghandles is not None:
             existing_handles = True
-            fp = self._writinghandles[0]
+            fp = self._inner._writinghandles[0]
             fp.flush()
             fp.close()
             # We can't use the cached file handle after close(). So prevent
             # its usage.
-            self._writinghandles = None
-            self._segmentfile.writing_handle = None
+            self._inner._writinghandles = None
+            self._inner._segmentfile.writing_handle = None
             # No need to deal with sidedata writing handle as it is only
             # relevant with revlog-v2 which is never inline, not reaching
             # this code
@@ -2451,11 +2595,13 @@
                 maybe_self = weak_self()
                 if maybe_self is not None:
                     maybe_self._indexfile = old_index_file_path
+                    maybe_self._inner.index_file = maybe_self._indexfile
 
             def abort_callback(tr):
                 maybe_self = weak_self()
                 if maybe_self is not None:
                     maybe_self._indexfile = old_index_file_path
+                    maybe_self._inner.index_file = old_index_file_path
 
             tr.registertmp(new_index_file_path)
             if self.target[1] is not None:
@@ -2475,9 +2621,11 @@
 
             if side_write:
                 self._indexfile = new_index_file_path
-            with self.__index_new_fp() as fp:
+                self._inner.index_file = self._indexfile
+            with self._inner._InnerRevlog__index_new_fp() as fp:
                 self._format_flags &= ~FLAG_INLINE_DATA
                 self._inline = False
+                self._inner.inline = False
                 for i in self:
                     e = self.index.entry_binary(i)
                     if i == 0 and self._docket is None:
@@ -2492,7 +2640,7 @@
                 # index when we exit the context manager
 
             nodemaputil.setup_persistent_nodemap(tr, self)
-            self._segmentfile = randomaccessfile.randomaccessfile(
+            self._inner._segmentfile = randomaccessfile.randomaccessfile(
                 self.opener,
                 self._datafile,
                 self.data_config.chunk_cache_size,
@@ -2500,9 +2648,14 @@
 
             if existing_handles:
                 # switched from inline to conventional reopen the index
-                ifh = self.__index_write_fp()
-                self._writinghandles = (ifh, new_dfh, None)
-                self._segmentfile.writing_handle = new_dfh
+                index_end = None
+                if self._docket is not None:
+                    index_end = self._docket.index_end
+                ifh = self._inner._InnerRevlog__index_write_fp(
+                    index_end=index_end
+                )
+                self._inner._writinghandles = (ifh, new_dfh, None)
+                self._inner._segmentfile.writing_handle = new_dfh
                 new_dfh = None
                 # No need to deal with sidedata writing handle as it is only
                 # relevant with revlog-v2 which is never inline, not reaching
@@ -2516,13 +2669,8 @@
 
     @contextlib.contextmanager
     def reading(self):
-        """Context manager that keeps data and sidedata files open for reading"""
-        if len(self.index) == 0:
-            yield  # nothing to be read
-        else:
-            with self._segmentfile.reading():
-                with self._segmentfile_sidedata.reading():
-                    yield
+        with self._inner.reading():
+            yield
 
     @contextlib.contextmanager
     def _writing(self, transaction):
@@ -2530,65 +2678,22 @@
             msg = b'try to write in a `trypending` revlog: %s'
             msg %= self.display_id
             raise error.ProgrammingError(msg)
-        if self._writinghandles is not None:
+        if self._inner.is_writing:
             yield
         else:
-            ifh = dfh = sdfh = None
-            try:
-                r = len(self)
-                # opening the data file.
-                dsize = 0
-                if r:
-                    dsize = self.end(r - 1)
-                dfh = None
-                if not self._inline:
-                    try:
-                        dfh = self._datafp(b"r+")
-                        if self._docket is None:
-                            dfh.seek(0, os.SEEK_END)
-                        else:
-                            dfh.seek(self._docket.data_end, os.SEEK_SET)
-                    except FileNotFoundError:
-                        dfh = self._datafp(b"w+")
-                    transaction.add(self._datafile, dsize)
-                if self._sidedatafile is not None:
-                    # revlog-v2 does not inline, help Pytype
-                    assert dfh is not None
-                    try:
-                        sdfh = self.opener(self._sidedatafile, mode=b"r+")
-                        dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
-                    except FileNotFoundError:
-                        sdfh = self.opener(self._sidedatafile, mode=b"w+")
-                    transaction.add(
-                        self._sidedatafile, self._docket.sidedata_end
-                    )
-
-                # opening the index file.
-                isize = r * self.index.entry_size
-                ifh = self.__index_write_fp()
-                if self._inline:
-                    transaction.add(self._indexfile, dsize + isize)
-                else:
-                    transaction.add(self._indexfile, isize)
-                # exposing all file handle for writing.
-                self._writinghandles = (ifh, dfh, sdfh)
-                self._segmentfile.writing_handle = ifh if self._inline else dfh
-                self._segmentfile_sidedata.writing_handle = sdfh
+            data_end = None
+            sidedata_end = None
+            if self._docket is not None:
+                data_end = self._docket.data_end
+                sidedata_end = self._docket.sidedata_end
+            with self._inner.writing(
+                transaction,
+                data_end=data_end,
+                sidedata_end=sidedata_end,
+            ):
                 yield
                 if self._docket is not None:
                     self._write_docket(transaction)
-            finally:
-                self._writinghandles = None
-                self._segmentfile.writing_handle = None
-                self._segmentfile_sidedata.writing_handle = None
-                if dfh is not None:
-                    dfh.close()
-                if sdfh is not None:
-                    sdfh.close()
-                # closing the index file last to avoid exposing referent to
-                # potential unflushed data content.
-                if ifh is not None:
-                    ifh.close()
 
     def _write_docket(self, transaction):
         """write the current docket on disk
@@ -2811,7 +2916,7 @@
             raise error.RevlogError(
                 _(b"%s: attempt to add wdir revision") % self.display_id
             )
-        if self._writinghandles is None:
+        if self._inner._writinghandles is None:
             msg = b'adding revision outside `revlog._writing` context'
             raise error.ProgrammingError(msg)
 
@@ -2823,7 +2928,7 @@
         offset = self._get_data_offset(prev)
 
         if self._concurrencychecker:
-            ifh, dfh, sdfh = self._writinghandles
+            ifh, dfh, sdfh = self._inner._writinghandles
             # XXX no checking for the sidedata file
             if self._inline:
                 # offset is "as if" it were in the .d file, so we need to add on
@@ -3007,10 +3112,10 @@
         # Note: This is likely not necessary on Python 3. However, because
         # the file handle is reused for reads and may be seeked there, we need
         # to be careful before changing this.
-        if self._writinghandles is None:
+        if self._inner._writinghandles is None:
             msg = b'adding revision outside `revlog._writing` context'
             raise error.ProgrammingError(msg)
-        ifh, dfh, sdfh = self._writinghandles
+        ifh, dfh, sdfh = self._inner._writinghandles
         if self._docket is None:
             ifh.seek(0, os.SEEK_END)
         else:
@@ -3045,9 +3150,9 @@
             self._enforceinlinesize(transaction)
         if self._docket is not None:
             # revlog-v2 always has 3 writing handles, help Pytype
-            wh1 = self._writinghandles[0]
-            wh2 = self._writinghandles[1]
-            wh3 = self._writinghandles[2]
+            wh1 = self._inner._writinghandles[0]
+            wh2 = self._inner._writinghandles[1]
+            wh3 = self._inner._writinghandles[2]
             assert wh1 is not None
             assert wh2 is not None
             assert wh3 is not None
@@ -3259,8 +3364,8 @@
         # then reset internal state in memory to forget those revisions
         self._revisioncache = None
         self._chaininfocache = util.lrucachedict(500)
-        self._segmentfile.clear_cache()
-        self._segmentfile_sidedata.clear_cache()
+        self._inner._segmentfile.clear_cache()
+        self._inner._segmentfile_sidedata.clear_cache()
 
         del self.index[rev:-1]
 
@@ -3723,7 +3828,7 @@
         new_entries = []
         # append the new sidedata
         with self._writing(transaction):
-            ifh, dfh, sdfh = self._writinghandles
+            ifh, dfh, sdfh = self._inner._writinghandles
             dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
 
             current_offset = sdfh.tell()