diff contrib/perf.py @ 52723:42dd6ec73c28

perf: add a --in-memory-bundle option to perf::stream-consume This avoid doing read access at the same time as write access and abstract the read time. It consume "quite some" memory for large repository through. Doing this speedup the command on a mozilla try clone by about 15%, helping us to focus on processing time instead.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 30 Nov 2024 03:25:22 +0100
parents 492de5c0ecdd
children 6a7b0d0f090d
line wrap: on
line diff
--- a/contrib/perf.py	Fri Nov 29 22:21:18 2024 +0100
+++ b/contrib/perf.py	Sat Nov 30 03:25:22 2024 +0100
@@ -2182,6 +2182,12 @@
     [
         (
             b'',
+            b'in-memory-bundle',
+            False,
+            b'load the full bundle in userspace memory before proceeding',
+        ),
+        (
+            b'',
             b'unbundle-progress',
             False,
             b"compute and display progress during stream processing",
@@ -2225,7 +2231,7 @@
     if not (os.path.isfile(filename) and os.access(filename, os.R_OK)):
         raise error.Abort("not a readable file: %s" % filename)
 
-    run_variables = [None, None]
+    run_variables = [None, None, None]
 
     # we create the new repository next to the other one for two reasons:
     # - this way we use the same file system, which are relevant for benchmark
@@ -2235,20 +2241,29 @@
     @contextlib.contextmanager
     def context():
         with open(filename, mode='rb') as bundle:
+            bundle_name = bundle.name
+            if opts.get(b'in_memory_bundle'):
+                # you hate memory, don't you?
+                import io
+
+                bundle = io.BytesIO(bundle.read())
             with tempfile.TemporaryDirectory(
                 prefix=b'hg-perf-stream-consume-',
                 dir=source_repo_dir,
             ) as tmp_dir:
                 tmp_dir = fsencode(tmp_dir)
                 run_variables[0] = bundle
-                run_variables[1] = tmp_dir
+                run_variables[1] = bundle_name
+                run_variables[2] = tmp_dir
                 yield
                 run_variables[0] = None
                 run_variables[1] = None
+                run_variables[2] = None
 
     def runone():
         bundle = run_variables[0]
-        tmp_dir = run_variables[1]
+        bundle_name = run_variables[1]
+        tmp_dir = run_variables[2]
 
         # we actually wants to copy all config to ensure the repo config is
         # taken in account during the benchmark
@@ -2263,7 +2278,7 @@
         # benchmark.
         show_progress = bool(opts.get("show_progress"))
         target.ui.setconfig(b"progress", b"disable", not show_progress)
-        gen = exchange.readbundle(target.ui, bundle, bundle.name)
+        gen = exchange.readbundle(target.ui, bundle, bundle_name)
         # stream v1
         if util.safehasattr(gen, 'apply'):
             gen.apply(target)