changeset 52941:5b8f6e198a6e

stream-clone-v2: centralize preparation for streamed files The vfs will do some operation to prepare the creation of the streamed file. It creates the necessary directories and it adds the file to the fncache. The directory creation is much more expensive than it should, and the same directory are being checked over and over. And the other part, like "adding the file to the fncache" increase the amount of GIL holding time. So we add a function to the vfs to do such preparation, and we centralise it in the "parsing" thread. This is done so that each directory are only "created" once. This does not yield speed up yet, because the writer thread still go through the same slow logic. This will be dealt with in the next changesets.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 29 Jan 2025 02:17:33 +0100
parents 7fc882f7fada
children 22e264ac7f60
files mercurial/streamclone.py mercurial/vfs.py
diffstat 2 files changed, 26 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/streamclone.py	Mon Jan 20 12:41:20 2025 +0100
+++ b/mercurial/streamclone.py	Wed Jan 29 02:17:33 2025 +0100
@@ -1168,6 +1168,7 @@
                     files = _v2_parse_files(
                         repo,
                         raw_data,
+                        vfsmap,
                         filecount,
                         progress,
                         report,
@@ -1495,6 +1496,7 @@
 def _v2_parse_files(
     repo,
     fp: bundle2mod.unbundlepart,
+    vfs_map,
     file_count: int,
     progress: scmutil.progress,
     report: V2Report,
@@ -1505,6 +1507,7 @@
 
     The parsed information are yield result for consumption by the "writer"
     """
+    known_dirs = set()  # set of directory that we know to exists
     progress.update(0)
     for i in range(file_count):
         src = util.readexactly(fp, 1)
@@ -1517,6 +1520,8 @@
             repo.ui.debug(
                 b'adding [%s] %s (%s)\n' % (src, name, util.bytecount(datalen))
             )
+        vfs = vfs_map[src]
+        vfs.prepare_streamed_file(name, known_dirs)
         if datalen <= util.DEFAULT_FILE_CHUNK:
             c = fp.read(datalen)
             offset = fp.tell()
--- a/mercurial/vfs.py	Mon Jan 20 12:41:20 2025 +0100
+++ b/mercurial/vfs.py	Wed Jan 29 02:17:33 2025 +0100
@@ -25,6 +25,7 @@
     List,
     MutableMapping,
     Optional,
+    Set,
     Tuple,
     Type,
     TypeVar,
@@ -86,6 +87,9 @@
     # by the Rust code
     rust_compatible = True
 
+    # createmode is always available on subclasses
+    createmode: int
+
     # TODO: type return, which is util.posixfile wrapped by a proxy
     @abc.abstractmethod
     def __call__(self, path: bytes, mode: bytes = b'rb', **kwargs) -> Any:
@@ -456,6 +460,23 @@
     def register_file(self, path: bytes) -> None:
         """generic hook point to lets fncache steer its stew"""
 
+    def prepare_streamed_file(
+        self, path: bytes, known_directories: Set[bytes]
+    ) -> None:
+        """make sure we are ready to write a file from a stream clone
+
+        The "known_directories" variable is here to avoid trying to create the
+        same directories over and over during a stream clone. It will be
+        updated by this function.
+        """
+        self._auditpath(path, b'wb')
+        self.register_file(path)
+        real_path = self.join(path)
+        dirname, basename = util.split(real_path)
+        if dirname not in known_directories:
+            util.makedirs(dirname, self.createmode, True)
+            known_directories.add(dirname)
+
 
 class vfs(abstractvfs):
     """Operate files relative to a base directory