tests/artifacts/scripts/generate-churning-bundle.py
changeset 39491 4ca7a67c94c8
child 41331 56a0de3d581c
child 41795 b59676077654
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/artifacts/scripts/generate-churning-bundle.py	Mon Sep 10 09:08:24 2018 -0700
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+#
+# generate-branchy-bundle - generate a branch for a "large" branchy repository
+#
+# Copyright 2018 Octobus, contact@octobus.net
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+# This script generates a repository suitable for testing delta computation
+# strategies.
+#
+# The repository update a single "large" file with many updates. One fixed part
+# of the files always get updated while the rest of the lines get updated over
+# time. This update happens over many topological branches, some getting merged
+# back.
+#
+# Running with `chg` in your path and `CHGHG` set is recommended for speed.
+
+from __future__ import absolute_import, print_function
+
+import hashlib
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+BUNDLE_NAME = 'big-file-churn.hg'
+
+# constants for generating the repository
+NB_CHANGESET = 5000
+PERIOD_MERGING = 8
+PERIOD_BRANCHING = 7
+MOVE_BACK_MIN = 3
+MOVE_BACK_RANGE = 5
+
+# constants for generating the large file we keep updating
+#
+# At each revision, the beginning on the file change,
+# and set of other lines changes too.
+FILENAME='SPARSE-REVLOG-TEST-FILE'
+NB_LINES = 10500
+ALWAYS_CHANGE_LINES = 500
+FILENAME = 'SPARSE-REVLOG-TEST-FILE'
+OTHER_CHANGES = 300
+
+def nextcontent(previous_content):
+    """utility to produce a new file content from the previous one"""
+    return hashlib.md5(previous_content).hexdigest()
+
+def filecontent(iteridx, oldcontent):
+    """generate a new file content
+
+    The content is generated according the iteration index and previous
+    content"""
+
+    # initial call
+    if iteridx is None:
+        current = ''
+    else:
+        current = str(iteridx)
+
+    for idx in xrange(NB_LINES):
+        do_change_line = True
+        if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
+            do_change_line = not ((idx - iteridx) % OTHER_CHANGES)
+
+        if do_change_line:
+            to_write = current + '\n'
+            current = nextcontent(current)
+        else:
+            to_write = oldcontent[idx]
+        yield to_write
+
+def updatefile(filename, idx):
+    """update <filename> to be at appropriate content for iteration <idx>"""
+    existing = None
+    if idx is not None:
+        with open(filename, 'rb') as old:
+            existing = old.readlines()
+    with open(filename, 'wb') as target:
+        for line in filecontent(idx, existing):
+            target.write(line)
+
+def hg(command, *args):
+    """call a mercurial command with appropriate config and argument"""
+    env = os.environ.copy()
+    if 'CHGHG' in env:
+        full_cmd = ['chg']
+    else:
+        full_cmd = ['hg']
+    full_cmd.append('--quiet')
+    full_cmd.append(command)
+    if command == 'commit':
+        # reproducible commit metadata
+        full_cmd.extend(['--date', '0 0', '--user', 'test'])
+    elif command == 'merge':
+        # avoid conflicts by picking the local variant
+        full_cmd.extend(['--tool', ':merge-local'])
+    full_cmd.extend(args)
+    env['HGRCPATH'] = ''
+    return subprocess.check_call(full_cmd, env=env)
+
+def run(target):
+    tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
+    try:
+        os.chdir(tmpdir)
+        hg('init')
+        updatefile(FILENAME, None)
+        hg('commit', '--addremove', '--message', 'initial commit')
+        for idx in xrange(1, NB_CHANGESET + 1):
+            if sys.stdout.isatty():
+                print("generating commit #%d/%d" % (idx, NB_CHANGESET))
+            if (idx % PERIOD_BRANCHING) == 0:
+                move_back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
+                hg('update', ".~%d" % move_back)
+            if (idx % PERIOD_MERGING) == 0:
+                hg('merge', 'min(head())')
+            updatefile(FILENAME, idx)
+            hg('commit', '--message', 'commit #%d' % idx)
+        hg('bundle', '--all', target)
+        with open(target, 'rb') as bundle:
+            data = bundle.read()
+            digest = hashlib.md5(data).hexdigest()
+        with open(target + '.md5', 'wb') as md5file:
+            md5file.write(digest + '\n')
+        if sys.stdout.isatty():
+            print('bundle generated at "%s" md5: %s' % (target, digest))
+
+    finally:
+        shutil.rmtree(tmpdir)
+    return 0
+
+if __name__ == '__main__':
+    orig = os.path.realpath(os.path.dirname(sys.argv[0]))
+    target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
+    sys.exit(run(target))
+