mercurial/wireprotov2server.py
changeset 40021 c537144fdbef
parent 39995 582676acaf6d
child 40023 10cf8b116dd8
--- a/mercurial/wireprotov2server.py	Wed Sep 26 17:16:27 2018 -0700
+++ b/mercurial/wireprotov2server.py	Wed Sep 26 17:16:56 2018 -0700
@@ -7,6 +7,7 @@
 from __future__ import absolute_import
 
 import contextlib
+import hashlib
 
 from .i18n import _
 from .node import (
@@ -25,6 +26,7 @@
     wireprototypes,
 )
 from .utils import (
+    cborutil,
     interfaceutil,
     stringutil,
 )
@@ -35,6 +37,11 @@
 
 COMMANDS = wireprototypes.commanddict()
 
+# Value inserted into cache key computation function. Change the value to
+# force new cache keys for every command request. This should be done when
+# there is a change to how caching works, etc.
+GLOBAL_CACHE_VERSION = 1
+
 def handlehttpv2request(rctx, req, res, checkperm, urlparts):
     from .hgweb import common as hgwebcommon
 
@@ -333,12 +340,64 @@
     return repo.filtered('served')
 
 def dispatch(repo, proto, command):
+    """Run a wire protocol command.
+
+    Returns an iterable of objects that will be sent to the client.
+    """
     repo = getdispatchrepo(repo, proto, command)
 
-    func, spec = COMMANDS[command]
+    entry = COMMANDS[command]
+    func = entry.func
+    spec = entry.args
+
     args = proto.getargs(spec)
 
-    return func(repo, proto, **pycompat.strkwargs(args))
+    # There is some duplicate boilerplate code here for calling the command and
+    # emitting objects. It is either that or a lot of indented code that looks
+    # like a pyramid (since there are a lot of code paths that result in not
+    # using the cacher).
+    callcommand = lambda: func(repo, proto, **pycompat.strkwargs(args))
+
+    # Request is not cacheable. Don't bother instantiating a cacher.
+    if not entry.cachekeyfn:
+        for o in callcommand():
+            yield o
+        return
+
+    cacher = makeresponsecacher(repo, proto, command, args,
+                                cborutil.streamencode)
+
+    # But we have no cacher. Do default handling.
+    if not cacher:
+        for o in callcommand():
+            yield o
+        return
+
+    with cacher:
+        cachekey = entry.cachekeyfn(repo, proto, cacher, **args)
+
+        # No cache key or the cacher doesn't like it. Do default handling.
+        if cachekey is None or not cacher.setcachekey(cachekey):
+            for o in callcommand():
+                yield o
+            return
+
+        # Serve it from the cache, if possible.
+        cached = cacher.lookup()
+
+        if cached:
+            for o in cached['objs']:
+                yield o
+            return
+
+        # Else call the command and feed its output into the cacher, allowing
+        # the cacher to buffer/mutate objects as it desires.
+        for o in callcommand():
+            for o in cacher.onobject(o):
+                yield o
+
+        for o in cacher.onfinished():
+            yield o
 
 @interfaceutil.implementer(wireprototypes.baseprotocolhandler)
 class httpv2protocolhandler(object):
@@ -460,7 +519,7 @@
 
     return proto.addcapabilities(repo, caps)
 
-def wireprotocommand(name, args=None, permission='push'):
+def wireprotocommand(name, args=None, permission='push', cachekeyfn=None):
     """Decorator to declare a wire protocol command.
 
     ``name`` is the name of the wire protocol command being provided.
@@ -489,11 +548,21 @@
     because otherwise commands not declaring their permissions could modify
     a repository that is supposed to be read-only.
 
+    ``cachekeyfn`` defines an optional callable that can derive the
+    cache key for this request.
+
     Wire protocol commands are generators of objects to be serialized and
     sent to the client.
 
     If a command raises an uncaught exception, this will be translated into
     a command error.
+
+    All commands can opt in to being cacheable by defining a function
+    (``cachekeyfn``) that is called to derive a cache key. This function
+    receives the same arguments as the command itself plus a ``cacher``
+    argument containing the active cacher for the request and returns a bytes
+    containing the key in a cache the response to this command may be cached
+    under.
     """
     transports = {k for k, v in wireprototypes.TRANSPORTS.items()
                   if v['version'] == 2}
@@ -543,12 +612,97 @@
                                          'for version 2' % name)
 
         COMMANDS[name] = wireprototypes.commandentry(
-            func, args=args, transports=transports, permission=permission)
+            func, args=args, transports=transports, permission=permission,
+            cachekeyfn=cachekeyfn)
 
         return func
 
     return register
 
+def makecommandcachekeyfn(command, localversion=None, allargs=False):
+    """Construct a cache key derivation function with common features.
+
+    By default, the cache key is a hash of:
+
+    * The command name.
+    * A global cache version number.
+    * A local cache version number (passed via ``localversion``).
+    * All the arguments passed to the command.
+    * The media type used.
+    * Wire protocol version string.
+    * The repository path.
+    """
+    if not allargs:
+        raise error.ProgrammingError('only allargs=True is currently supported')
+
+    if localversion is None:
+        raise error.ProgrammingError('must set localversion argument value')
+
+    def cachekeyfn(repo, proto, cacher, **args):
+        spec = COMMANDS[command]
+
+        # Commands that mutate the repo can not be cached.
+        if spec.permission == 'push':
+            return None
+
+        # TODO config option to disable caching.
+
+        # Our key derivation strategy is to construct a data structure
+        # holding everything that could influence cacheability and to hash
+        # the CBOR representation of that. Using CBOR seems like it might
+        # be overkill. However, simpler hashing mechanisms are prone to
+        # duplicate input issues. e.g. if you just concatenate two values,
+        # "foo"+"bar" is identical to "fo"+"obar". Using CBOR provides
+        # "padding" between values and prevents these problems.
+
+        # Seed the hash with various data.
+        state = {
+            # To invalidate all cache keys.
+            b'globalversion': GLOBAL_CACHE_VERSION,
+            # More granular cache key invalidation.
+            b'localversion': localversion,
+            # Cache keys are segmented by command.
+            b'command': pycompat.sysbytes(command),
+            # Throw in the media type and API version strings so changes
+            # to exchange semantics invalid cache.
+            b'mediatype': FRAMINGTYPE,
+            b'version': HTTP_WIREPROTO_V2,
+            # So same requests for different repos don't share cache keys.
+            b'repo': repo.root,
+        }
+
+        # The arguments passed to us will have already been normalized.
+        # Default values will be set, etc. This is important because it
+        # means that it doesn't matter if clients send an explicit argument
+        # or rely on the default value: it will all normalize to the same
+        # set of arguments on the server and therefore the same cache key.
+        #
+        # Arguments by their very nature must support being encoded to CBOR.
+        # And the CBOR encoder is deterministic. So we hash the arguments
+        # by feeding the CBOR of their representation into the hasher.
+        if allargs:
+            state[b'args'] = pycompat.byteskwargs(args)
+
+        cacher.adjustcachekeystate(state)
+
+        hasher = hashlib.sha1()
+        for chunk in cborutil.streamencode(state):
+            hasher.update(chunk)
+
+        return pycompat.sysbytes(hasher.hexdigest())
+
+    return cachekeyfn
+
+def makeresponsecacher(repo, proto, command, args, objencoderfn):
+    """Construct a cacher for a cacheable command.
+
+    Returns an ``iwireprotocolcommandcacher`` instance.
+
+    Extensions can monkeypatch this function to provide custom caching
+    backends.
+    """
+    return None
+
 @wireprotocommand('branchmap', permission='pull')
 def branchmapv2(repo, proto):
     yield {encoding.fromlocal(k): v
@@ -755,7 +909,11 @@
             'example': b'foo.txt',
         }
     },
-    permission='pull')
+    permission='pull',
+    # TODO censoring a file revision won't invalidate the cache.
+    # Figure out a way to take censoring into account when deriving
+    # the cache key.
+    cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True))
 def filedata(repo, proto, haveparents, nodes, fields, path):
     try:
         # Extensions may wish to access the protocol handler.
@@ -893,7 +1051,8 @@
             'example': b'',
         },
     },
-    permission='pull')
+    permission='pull',
+    cachekeyfn=makecommandcachekeyfn('manifestdata', 1, allargs=True))
 def manifestdata(repo, proto, haveparents, nodes, fields, tree):
     store = repo.manifestlog.getstorage(tree)