# HG changeset patch # User Pierre-Yves David # Date 1738113453 -3600 # Node ID 5b8f6e198a6e00d9184c86396eb53b91e7b352b4 # Parent 7fc882f7fadab9d8326353c72f4227ae56323d12 stream-clone-v2: centralize preparation for streamed files The vfs will do some operation to prepare the creation of the streamed file. It creates the necessary directories and it adds the file to the fncache. The directory creation is much more expensive than it should, and the same directory are being checked over and over. And the other part, like "adding the file to the fncache" increase the amount of GIL holding time. So we add a function to the vfs to do such preparation, and we centralise it in the "parsing" thread. This is done so that each directory are only "created" once. This does not yield speed up yet, because the writer thread still go through the same slow logic. This will be dealt with in the next changesets. diff -r 7fc882f7fada -r 5b8f6e198a6e mercurial/streamclone.py --- a/mercurial/streamclone.py Mon Jan 20 12:41:20 2025 +0100 +++ b/mercurial/streamclone.py Wed Jan 29 02:17:33 2025 +0100 @@ -1168,6 +1168,7 @@ files = _v2_parse_files( repo, raw_data, + vfsmap, filecount, progress, report, @@ -1495,6 +1496,7 @@ def _v2_parse_files( repo, fp: bundle2mod.unbundlepart, + vfs_map, file_count: int, progress: scmutil.progress, report: V2Report, @@ -1505,6 +1507,7 @@ The parsed information are yield result for consumption by the "writer" """ + known_dirs = set() # set of directory that we know to exists progress.update(0) for i in range(file_count): src = util.readexactly(fp, 1) @@ -1517,6 +1520,8 @@ repo.ui.debug( b'adding [%s] %s (%s)\n' % (src, name, util.bytecount(datalen)) ) + vfs = vfs_map[src] + vfs.prepare_streamed_file(name, known_dirs) if datalen <= util.DEFAULT_FILE_CHUNK: c = fp.read(datalen) offset = fp.tell() diff -r 7fc882f7fada -r 5b8f6e198a6e mercurial/vfs.py --- a/mercurial/vfs.py Mon Jan 20 12:41:20 2025 +0100 +++ b/mercurial/vfs.py Wed Jan 29 02:17:33 2025 +0100 @@ -25,6 +25,7 @@ List, MutableMapping, Optional, + Set, Tuple, Type, TypeVar, @@ -86,6 +87,9 @@ # by the Rust code rust_compatible = True + # createmode is always available on subclasses + createmode: int + # TODO: type return, which is util.posixfile wrapped by a proxy @abc.abstractmethod def __call__(self, path: bytes, mode: bytes = b'rb', **kwargs) -> Any: @@ -456,6 +460,23 @@ def register_file(self, path: bytes) -> None: """generic hook point to lets fncache steer its stew""" + def prepare_streamed_file( + self, path: bytes, known_directories: Set[bytes] + ) -> None: + """make sure we are ready to write a file from a stream clone + + The "known_directories" variable is here to avoid trying to create the + same directories over and over during a stream clone. It will be + updated by this function. + """ + self._auditpath(path, b'wb') + self.register_file(path) + real_path = self.join(path) + dirname, basename = util.split(real_path) + if dirname not in known_directories: + util.makedirs(dirname, self.createmode, True) + known_directories.add(dirname) + class vfs(abstractvfs): """Operate files relative to a base directory