changeset 52931:c124308e3cd4

stream-clone-v2: directly use the os module for file operation Sadly this is still significantly faster. I am not sure what the python file object does internally, but bypassing it provide a good speedup. Probably because we spend less time with the GIL. This is the end of this first round of optimization. Overall, the threaded version is almost twice faster as the baseline on our benchmark machine (and three time faster than the naive implementation) (see first batch of benchmark below). In reality, various improvement we did also help the non-threaded case. Our benchmark show about 30% improvement after this series. (second benchmark batch below) However, the threaded version remains faster in our benchmark (third benchmark). Further improvements can probably be achieved by speeding up the directories creation process and but moving to a compiled language (both faster in general and without a GIL). ########### Comparing evolution of the threaded code ########################## ### benchmark.name = hg.perf.exchange.stream.consume # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.memory-target = default # benchmark.variants.num-writer = default # benchmark.variants.parallel-processing = yes # benchmark.variants.progress = no # benchmark.variants.read-from-memory = yes # benchmark.variants.version = v2 ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog baseline: 0.249693 ~~~~~ naive-thread: 0.305973 (+22.54%, +0.06) prev-change: 0.154647 (-38.07%, -0.10) this-change: 0.143486 (-42.54%, -0.11) ## data-env-vars.name = netbeans-2019-11-07-zstd-sparse-revlog baseline: 13.136674 ~~~~~ naive-thread: 18.467590 (+40.58%, +5.33) prev-change: 7.843554 (-40.29%, -5.29) this-change: 7.423071 (-43.49%, -5.71) ## data-env-vars.name = netbsd-xsrc-all-2024-09-19-zstd-sparse-revlog baseline: 5.317709 ~~~~~ naive-thread: 7.338505 (+38.00%, +2.02) prev-change: 3.040664 (-42.82%, -2.28) this-change: 2.776906 (-47.78%, -2.54) ## data-env-vars.name = netbsd-xsrc-draft-2024-09-19-zstd-sparse-revlog baseline: 5.398368 ~~~~~ naive-thread: 7.333354 (+35.84%, +1.93) prev-change: 3.070976 (-43.11%, -2.33) this-change: 2.795593 (-48.21%, -2.60) ## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog baseline: 3.acbb55 ~~~~~ naive-thread: 4.238172 (+38.11%, +1.17) prev-change: 1.832118 (-40.30%, -1.24) this-change: 1.640016 (-46.56%, -1.43) ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog baseline: 7.244015 ~~~~~ naive-thread: 9.901032 (+36.68%, +2.66) prev-change: 4.478754 (-38.17%, -2.77) this-change: 3.946716 (-45.52%, -3.30) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog baseline: 51.934795 ~~~~~ naive-thread: 78.194540 (+50.56%, +26.26) prev-change: 30.349379 (-41.56%, -21.59) this-change: 27.228980 (-47.57%, -24.71) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog baseline: 52.253858 ~~~~~ naive-thread: 77.492938 (+48.30%, +25.24) prev-change: 31.179906 (-40.33%, -21.07) this-change: 28.110867 (-46.20%, -24.14) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # benchmark.variants.read-from-memory = no baseline: 130.584329 ~~~~~ naive-thread: 164.366925 (+25.87%, +33.78) prev-change: 91.454508 (-29.97%, -39.13) this-change: 91.002420 (-30.31%, -39.58) ########### Comparing evolution of the non-threaded code ###################### ### benchmark.name = hg.perf.exchange.stream.consume # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.memory-target = default # benchmark.variants.num-writer = default # benchmark.variants.parallel-processing = no # benchmark.variants.progress = no # benchmark.variants.read-from-memory = yes # benchmark.variants.version = v2 ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog base-line: 0.247757 ~~~~~ this-change: 0.168695 (-31.91%, -0.08) ### data-env-vars.name = netbeans-2019-11-07-zstd-sparse-revlog base-line: 13.195582 ~~~~~ this-change: 9.240878 (-29.97%, -3.95) ## data-env-vars.name = netbsd-xsrc-all-2024-09-19-zstd-sparse-revlog base-line: 5.369459 ~~~~~ this-change: 3.637088 (-32.26%, -1.73) ## data-env-vars.name = netbsd-xsrc-draft-2024-09-19-zstd-sparse-revlog base-line: 5.345086 ~~~~~ this-change: 3.587604 (-32.88%, -1.76) ## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog base-line: 3.110852 ~~~~~ this-change: 2.069172 (-33.49%, -1.04) ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog base-line: 7.293048 ~~~~~ this-change: 4.931636 (-32.38%, -2.36) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog base-line: 52.067856 ~~~~~ this-change: 35.360483 (-32.09%, -16.71) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog base-line: 52.347182 ~~~~~ this-change: 35.465007 (-32.25%, -16.88) ## data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # benchmark.variants.read-from-memory = no base-line: 143.083922 ~~~~~ this-change: 107.609183 (-24.79%, -35.47) ########### Comparing non-threaded vs threaded ################################ ### benchmark.name = hg.perf.exchange.stream.consume # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = fec3492130e0a5ef797ad518e434816ee8d54ec6 # benchmark.variants.memory-target = default # benchmark.variants.num-writer = default # benchmark.variants.progress = no # benchmark.variants.read-from-memory = yes # benchmark.variants.version = v2 ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog not-threaded: 0.168695 ~~~~~ threaded: 0.143486 (-14.94%, -0.03) ### data-env-vars.name = netbeans-2019-11-07-zstd-sparse-revlog not-threaded: 9.240878 ~~~~~ threaded: 7.423071 (-19.67%, -1.82) ### data-env-vars.name = netbsd-xsrc-all-2024-09-19-zstd-sparse-revlog not-threaded: 3.637088 ~~~~~ threaded: 2.776906 (-23.65%, -0.86) ### data-env-vars.name = netbsd-xsrc-draft-2024-09-19-zstd-sparse-revlog not-threaded: 3.587604 ~~~~~ threaded: 2.795593 (-22.08%, -0.79) ### data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog not-threaded: 2.069172 ~~~~~ threaded: 1.640016 (-20.74%, -0.43) ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog not-threaded: 4.931636 ~~~~~ threaded: 3.946716 (-19.97%, -0.98) ### data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog not-threaded: 35.360483 ~~~~~ threaded: 27.228980 (-23.00%, -8.13) ### data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog not-threaded: 35.465007 ~~~~~ threaded: 28.110867 (-20.74%, -7.35) ### data-env-vars.name = mozilla-try-2024-03-26-zstd-sparse-revlog # benchmark.variants.read-from-memory = no not-threaded: 107.609183 ~~~~~ threaded: 91.002420 (-15.43%, -16.61)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 29 Jan 2025 02:27:05 +0100
parents 22e264ac7f60
children 1825f5593eb2
files mercurial/streamclone.py
diffstat 1 files changed, 8 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/streamclone.py	Wed Jan 29 02:23:02 2025 +0100
+++ b/mercurial/streamclone.py	Wed Jan 29 02:27:05 2025 +0100
@@ -1545,18 +1545,17 @@
 def _write_files(info: Iterable[FileInfoT]):
     """write files from parsed data"""
     for path, mode, data in info:
-        # we disable the internal Python buffering because the streamed data
-        # are assume to have been written with large enough block for it to not
-        # matters. So we only have more memory copy and GIL holding time to
-        # gain with the Python buffering.
-        with open(path, 'wb', buffering=0) as ofp:
+        fd = os.open(path, os.O_WRONLY | os.O_CREAT)
+        try:
+            if mode is not None:
+                os.fchmod(fd, mode)
             for chunk in data:
-                written = ofp.write(chunk)
+                written = os.write(fd, chunk)
                 # write missing pieces if the write was interrupted
                 while written < len(chunk):
-                    written += ofp.write(chunk[written:])
-        if mode is not None:
-            os.chmod(path, mode & 0o666)
+                    written = os.write(fd, chunk[written:])
+        finally:
+            os.close(fd)
 
 
 def consumev3(repo, fp) -> None: