diff mercurial/wireprotov1server.py @ 50675:60f9602b413e

clonebundles: add support for inline (streaming) clonebundles The idea behind inline clonebundles is to send them through the ssh or https connection to the Mercurial server. We've been using this specifically for streaming clonebundles, although it works for 'regular' clonebundles as well (but is less relevant, since pullbundles exist). We've had this enabled for around 9 months for a part of our users. A few benefits are: - no need to secure an external system, since everything goes through the same Mercurial server - easier scaling (in our case: no risk of inconsistencies between multiple mercurial-server mirrors and nginx clonebundles hosts) Remaining topics/questions right now: - The inline clonebundles don't work for https yet. This is because httppeer doesn't seem to support sending client capabilities. I didn't focus on that as my main goal was to get this working for ssh.
author Mathias De Mare <mathias.de_mare@nokia.com>
date Wed, 08 Mar 2023 14:23:43 +0100
parents 9e24f8442640
children
line wrap: on
line diff
--- a/mercurial/wireprotov1server.py	Wed May 31 18:08:56 2023 +0100
+++ b/mercurial/wireprotov1server.py	Wed Mar 08 14:23:43 2023 +0100
@@ -21,6 +21,7 @@
     encoding,
     error,
     exchange,
+    hook,
     pushkey as pushkeymod,
     pycompat,
     repoview,
@@ -264,6 +265,40 @@
     return wireprototypes.bytesresponse(b''.join(r))
 
 
+@wireprotocommand(b'get_inline_clone_bundle', b'path', permission=b'pull')
+def get_inline_clone_bundle(repo, proto, path):
+    """
+    Server command to send a clonebundle to the client
+    """
+    if hook.hashook(repo.ui, b'pretransmit-inline-clone-bundle'):
+        hook.hook(
+            repo.ui,
+            repo,
+            b'pretransmit-inline-clone-bundle',
+            throw=True,
+            clonebundlepath=path,
+        )
+
+    bundle_dir = repo.vfs.join(bundlecaches.BUNDLE_CACHE_DIR)
+    clonebundlepath = repo.vfs.join(bundle_dir, path)
+    if not repo.vfs.exists(clonebundlepath):
+        raise error.Abort(b'clonebundle %s does not exist' % path)
+
+    clonebundles_dir = os.path.realpath(bundle_dir)
+    if not os.path.realpath(clonebundlepath).startswith(clonebundles_dir):
+        raise error.Abort(b'clonebundle %s is using an illegal path' % path)
+
+    def generator(vfs, bundle_path):
+        with vfs(bundle_path) as f:
+            length = os.fstat(f.fileno())[6]
+            yield util.uvarintencode(length)
+            for chunk in util.filechunkiter(f):
+                yield chunk
+
+    stream = generator(repo.vfs, clonebundlepath)
+    return wireprototypes.streamres(gen=stream, prefer_uncompressed=True)
+
+
 @wireprotocommand(b'clonebundles', b'', permission=b'pull')
 def clonebundles(repo, proto):
     """Server command for returning info for available bundles to seed clones.
@@ -273,9 +308,21 @@
     Extensions may wrap this command to filter or dynamically emit data
     depending on the request. e.g. you could advertise URLs for the closest
     data center given the client's IP address.
+
+    The only filter on the server side is filtering out inline clonebundles
+    in case a client does not support them.
+    Otherwise, older clients would retrieve and error out on those.
     """
-    manifest = bundlecaches.get_manifest(repo)
-    return wireprototypes.bytesresponse(manifest)
+    manifest_contents = bundlecaches.get_manifest(repo)
+    clientcapabilities = proto.getprotocaps()
+    if b'inlineclonebundles' in clientcapabilities:
+        return wireprototypes.bytesresponse(manifest_contents)
+    modified_manifest = []
+    for line in manifest_contents.splitlines():
+        if line.startswith(bundlecaches.CLONEBUNDLESCHEME):
+            continue
+        modified_manifest.append(line)
+    return wireprototypes.bytesresponse(b'\n'.join(modified_manifest))
 
 
 wireprotocaps = [