diff mercurial/revlog.py @ 50637:9caa860dcbec

stream-clone: implement decidated `get_streams` method for revlog For revlog, we can do better using the maximum linkrev expected. This approach open the way to dealing with a much larger set of non-trivial changes, like splitting of inline revlogs. We will actually tackle this issue in the next changesets (thanks to this one).
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 29 May 2023 14:07:58 +0200
parents 32837c7e2e4b
children 5460424092e2
line wrap: on
line diff
--- a/mercurial/revlog.py	Sun May 28 05:52:58 2023 +0200
+++ b/mercurial/revlog.py	Mon May 29 14:07:58 2023 +0200
@@ -506,6 +506,74 @@
         except FileNotFoundError:
             return b''
 
+    def get_streams(self, max_linkrev):
+        n = len(self)
+        index = self.index
+        while n > 0:
+            linkrev = index[n - 1][4]
+            if linkrev < max_linkrev:
+                break
+            # note: this loop will rarely go through multiple iterations, since
+            # it only traverses commits created during the current streaming
+            # pull operation.
+            #
+            # If this become a problem, using a binary search should cap the
+            # runtime of this.
+            n = n - 1
+        if n == 0:
+            # no data to send
+            return []
+        index_size = n * index.entry_size
+        data_size = self.end(n - 1)
+
+        # XXX we might have been split (or stripped) since the object
+        # initialization, We need to close this race too, but having a way to
+        # pre-open the file we feed to the revlog and never closing them before
+        # we are done streaming.
+
+        if self._inline:
+
+            def get_stream():
+                with self._indexfp() as fp:
+                    yield None
+                    size = index_size + data_size
+                    if size <= 65536:
+                        yield fp.read(size)
+                    else:
+                        yield from util.filechunkiter(fp, limit=size)
+
+            inline_stream = get_stream()
+            next(inline_stream)
+            return [
+                (self._indexfile, inline_stream, index_size + data_size),
+            ]
+        else:
+
+            def get_index_stream():
+                with self._indexfp() as fp:
+                    yield None
+                    if index_size <= 65536:
+                        yield fp.read(index_size)
+                    else:
+                        yield from util.filechunkiter(fp, limit=index_size)
+
+            def get_data_stream():
+                with self._datafp() as fp:
+                    yield None
+                    if data_size <= 65536:
+                        yield fp.read(data_size)
+                    else:
+                        yield from util.filechunkiter(fp, limit=data_size)
+
+            index_stream = get_index_stream()
+            next(index_stream)
+            data_stream = get_data_stream()
+            next(data_stream)
+            return [
+                (self._datafile, data_stream, data_size),
+                (self._indexfile, index_stream, index_size),
+            ]
+
     def _loadindex(self, docket=None):
 
         new_header, mmapindexthreshold, force_nodemap = self._init_opts()