changeset 52937:119cddd02b8c

stream-clone-v2: simplify the handling of small file We don't need to a use a thread safe queue for file that come in one block. In such case we can significantly simplify the code to avoid using a Queue and locks for each small files. This yield a very significant speedup. The threaded version is finally faster than our baseline. ### benchmark.name = hg.perf.exchange.stream.consume # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.memory-target = default # benchmark.variants.num-writer = default # benchmark.variants.parallel-processing = yes # benchmark.variants.progress = no # benchmark.variants.read-from-memory = yes # benchmark.variants.version = v2 ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog baseline: 0.249693 ~~~~~ prev-change: 0.259730 (+4.02%, +0.01) this-change: 0.234764 (-5.98%, -0.01) ## data-env-vars.name = netbeans-2019-11-07-zstd-sparse-revlog baseline: 13.136674 ~~~~~ prev-change: 13.768783 (+4.81%, +0.63) this-change: 12.037305 (-8.37%, -1.10) ## data-env-vars.name = netbsd-xsrc-all-2024-09-19-zstd-sparse-revlog baseline: 5.317709 ~~~~~ prev-change: 5.679052 (+6.80%, +0.36) this-change: 4.763710 (-10.42%, -0.55) ## data-env-vars.name = netbsd-xsrc-draft-2024-09-19-zstd-sparse-revlog baseline: 5.398368 ~~~~~ prev-change: 5.616571 (+4.04%, +0.22) this-change: 4.816293 (-10.78%, -0.58) ## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog baseline: 3.acbb55 ~~~~~ prev-change: 3.164637 (+3.13%, +0.10) this-change: 2.801525 (-8.70%, -0.27) ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog baseline: 7.244015 ~~~~~ prev-change: 7.729637 (+6.70%, +0.49) this-change: 6.611609 (-8.73%, -0.63) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog baseline: 51.934795 ~~~~~ prev-change: 56.567493 (+8.92%, +4.63) this-change: 47.733498 (-8.09%, -4.20) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog baseline: 52.253858 ~~~~~ prev-change: 56.093516 (+7.35%, +3.84) this-change: 48.270778 (-7.62%, -3.98) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # benchmark.variants.read-from-memory = no baseline: 130.584329 ~~~~~ prev-change: 125.985212 (-3.52%, -4.60) this-change: 115.240169 (-11.75%, -15.34)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 05 Feb 2025 21:59:32 +0100
parents c92f9edd362a
children 34fa51c25112
files mercurial/streamclone.py
diffstat 1 files changed, 29 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/streamclone.py	Wed Feb 05 21:58:05 2025 +0100
+++ b/mercurial/streamclone.py	Wed Feb 05 21:59:32 2025 +0100
@@ -1481,6 +1481,17 @@
             yield chunk
 
 
+def _trivial_file(
+    chunk: bytes,
+    mark_used: Optional[Callable[[int], None]],
+    offset: int,
+) -> FileChunksT:
+    """used for single chunk file,"""
+    if mark_used is not None:
+        mark_used(offset)
+    yield chunk
+
+
 def _v2_parse_files(
     repo,
     fp: bundle2mod.unbundlepart,
@@ -1506,16 +1517,24 @@
             repo.ui.debug(
                 b'adding [%s] %s (%s)\n' % (src, name, util.bytecount(datalen))
             )
-        chunks = file_chunker(
-            fp,
-            datalen,
-            progress,
-            report,
-            mark_used=mark_used,
-        )
-        yield (src, name, iter(chunks))
-        # make sure we read all the chunk before moving to the next file
-        chunks.fill()
+        if datalen <= util.DEFAULT_FILE_CHUNK:
+            c = fp.read(datalen)
+            offset = fp.tell()
+            report.byte_count += len(c)
+            progress.increment(step=len(c))
+            chunks = _trivial_file(c, mark_used, offset)
+            yield (src, name, iter(chunks))
+        else:
+            chunks = file_chunker(
+                fp,
+                datalen,
+                progress,
+                report,
+                mark_used=mark_used,
+            )
+            yield (src, name, iter(chunks))
+            # make sure we read all the chunk before moving to the next file
+            chunks.fill()
 
 
 def _write_files(vfsmap, info: Iterable[FileInfoT]):