Mercurial > public > mercurial-scm > hg-stable
changeset 52937:119cddd02b8c
stream-clone-v2: simplify the handling of small file
We don't need to a use a thread safe queue for file that come in one block. In
such case we can significantly simplify the code to avoid using a Queue and
locks for each small files.
This yield a very significant speedup. The threaded version is finally faster
than our baseline.
### benchmark.name = hg.perf.exchange.stream.consume
# bin-env-vars.hg.flavor = default
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.memory-target = default
# benchmark.variants.num-writer = default
# benchmark.variants.parallel-processing = yes
# benchmark.variants.progress = no
# benchmark.variants.read-from-memory = yes
# benchmark.variants.version = v2
## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog
baseline: 0.249693 ~~~~~
prev-change: 0.259730 (+4.02%, +0.01)
this-change: 0.234764 (-5.98%, -0.01)
## data-env-vars.name = netbeans-2019-11-07-zstd-sparse-revlog
baseline: 13.136674 ~~~~~
prev-change: 13.768783 (+4.81%, +0.63)
this-change: 12.037305 (-8.37%, -1.10)
## data-env-vars.name = netbsd-xsrc-all-2024-09-19-zstd-sparse-revlog
baseline: 5.317709 ~~~~~
prev-change: 5.679052 (+6.80%, +0.36)
this-change: 4.763710 (-10.42%, -0.55)
## data-env-vars.name = netbsd-xsrc-draft-2024-09-19-zstd-sparse-revlog
baseline: 5.398368 ~~~~~
prev-change: 5.616571 (+4.04%, +0.22)
this-change: 4.816293 (-10.78%, -0.58)
## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog
baseline: 3.acbb55 ~~~~~
prev-change: 3.164637 (+3.13%, +0.10)
this-change: 2.801525 (-8.70%, -0.27)
## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog
baseline: 7.244015 ~~~~~
prev-change: 7.729637 (+6.70%, +0.49)
this-change: 6.611609 (-8.73%, -0.63)
## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog
baseline: 51.934795 ~~~~~
prev-change: 56.567493 (+8.92%, +4.63)
this-change: 47.733498 (-8.09%, -4.20)
## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog
baseline: 52.253858 ~~~~~
prev-change: 56.093516 (+7.35%, +3.84)
this-change: 48.270778 (-7.62%, -3.98)
## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog
# benchmark.variants.read-from-memory = no
baseline: 130.584329 ~~~~~
prev-change: 125.985212 (-3.52%, -4.60)
this-change: 115.240169 (-11.75%, -15.34)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 05 Feb 2025 21:59:32 +0100 |
parents | c92f9edd362a |
children | 34fa51c25112 |
files | mercurial/streamclone.py |
diffstat | 1 files changed, 29 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/streamclone.py Wed Feb 05 21:58:05 2025 +0100 +++ b/mercurial/streamclone.py Wed Feb 05 21:59:32 2025 +0100 @@ -1481,6 +1481,17 @@ yield chunk +def _trivial_file( + chunk: bytes, + mark_used: Optional[Callable[[int], None]], + offset: int, +) -> FileChunksT: + """used for single chunk file,""" + if mark_used is not None: + mark_used(offset) + yield chunk + + def _v2_parse_files( repo, fp: bundle2mod.unbundlepart, @@ -1506,16 +1517,24 @@ repo.ui.debug( b'adding [%s] %s (%s)\n' % (src, name, util.bytecount(datalen)) ) - chunks = file_chunker( - fp, - datalen, - progress, - report, - mark_used=mark_used, - ) - yield (src, name, iter(chunks)) - # make sure we read all the chunk before moving to the next file - chunks.fill() + if datalen <= util.DEFAULT_FILE_CHUNK: + c = fp.read(datalen) + offset = fp.tell() + report.byte_count += len(c) + progress.increment(step=len(c)) + chunks = _trivial_file(c, mark_used, offset) + yield (src, name, iter(chunks)) + else: + chunks = file_chunker( + fp, + datalen, + progress, + report, + mark_used=mark_used, + ) + yield (src, name, iter(chunks)) + # make sure we read all the chunk before moving to the next file + chunks.fill() def _write_files(vfsmap, info: Iterable[FileInfoT]):