Mercurial > public > mercurial-scm > hg-stable
diff mercurial/streamclone.py @ 52942:22e264ac7f60
stream-clone-v2: bypass the vfs to write the file on disk
Now that all preparation is done in the main thread, we no longer need the vfs
to be involved at the writing end.
This very significantly speeds things up.
### benchmark.name = hg.perf.exchange.stream.consume
# bin-env-vars.hg.flavor = default
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.memory-target = default
# benchmark.variants.num-writer = default
# benchmark.variants.parallel-processing = yes
# benchmark.variants.progress = no
# benchmark.variants.read-from-memory = yes
# benchmark.variants.version = v2
## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog
prev-change: 0.217293 ~~~~~
this-change: 0.154647 (-28.83%, -0.06)
## data-env-vars.name = netbeans-2019-11-07-zstd-sparse-revlog
prev-change: 11.222771 ~~~~~
this-change: 7.843554 (-30.11%, -3.38)
## data-env-vars.name = netbsd-xsrc-all-2024-09-19-zstd-sparse-revlog
prev-change: 4.465113 ~~~~~
this-change: 3.040664 (-31.90%, -1.42)
## data-env-vars.name = netbsd-xsrc-draft-2024-09-19-zstd-sparse-revlog
prev-change: 4.667360 ~~~~~
this-change: 3.070976 (-34.20%, -1.60)
## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog
prev-change: 2.559670 ~~~~~
this-change: 1.832118 (-28.42%, -0.73)
## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog
prev-change: 6.123469 ~~~~~
this-change: 4.478754 (-26.86%, -1.64)
## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog
prev-change: 44.781498 ~~~~~
this-change: 30.349379 (-32.23%, -14.43)
## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog
prev-change: 44.396959 ~~~~~
this-change: 31.179906 (-29.77%, -13.22)
## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog
# benchmark.variants.read-from-memory = no
prev-change: 108.552706 ~~~~~
this-change: 91.454508 (-15.75%, -17.10)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 29 Jan 2025 02:23:02 +0100 |
parents | 5b8f6e198a6e |
children | c124308e3cd4 |
line wrap: on
line diff
--- a/mercurial/streamclone.py Wed Jan 29 02:17:33 2025 +0100 +++ b/mercurial/streamclone.py Wed Jan 29 02:23:02 2025 +0100 @@ -1176,14 +1176,14 @@ mark_used=mark_used, ) if not threaded: - _write_files(vfsmap, files) + _write_files(files) else: info_queue = _FileInfoQueue(files) for __ in range(num_writer): w = threading.Thread( target=_write_files, - args=(vfsmap, info_queue), + args=(info_queue,), ) workers.append(w) w.start() @@ -1217,8 +1217,8 @@ FileChunksT = Iterator[bytes] # Contains the information necessary to write stream file on disk FileInfoT = Tuple[ - bytes, # vfs key - bytes, # file name (non-vfs-encoded) + bytes, # real fs path + Optional[int], # permission to give to chmod FileChunksT, # content ] @@ -1521,14 +1521,14 @@ b'adding [%s] %s (%s)\n' % (src, name, util.bytecount(datalen)) ) vfs = vfs_map[src] - vfs.prepare_streamed_file(name, known_dirs) + path, mode = vfs.prepare_streamed_file(name, known_dirs) if datalen <= util.DEFAULT_FILE_CHUNK: c = fp.read(datalen) offset = fp.tell() report.byte_count += len(c) progress.increment(step=len(c)) chunks = _trivial_file(c, mark_used, offset) - yield (src, name, iter(chunks)) + yield (path, mode, iter(chunks)) else: chunks = file_chunker( fp, @@ -1537,25 +1537,26 @@ report, mark_used=mark_used, ) - yield (src, name, iter(chunks)) + yield (path, mode, iter(chunks)) # make sure we read all the chunk before moving to the next file chunks.fill() -def _write_files(vfsmap, info: Iterable[FileInfoT]): +def _write_files(info: Iterable[FileInfoT]): """write files from parsed data""" - for src, name, data in info: - vfs = vfsmap[src] + for path, mode, data in info: # we disable the internal Python buffering because the streamed data # are assume to have been written with large enough block for it to not # matters. So we only have more memory copy and GIL holding time to # gain with the Python buffering. - with vfs(name, b'w', buffering=0) as ofp: + with open(path, 'wb', buffering=0) as ofp: for chunk in data: written = ofp.write(chunk) # write missing pieces if the write was interrupted while written < len(chunk): written += ofp.write(chunk[written:]) + if mode is not None: + os.chmod(path, mode & 0o666) def consumev3(repo, fp) -> None: