diff -r ed919b90acda -r c537144fdbef mercurial/wireprotov2server.py --- a/mercurial/wireprotov2server.py Wed Sep 26 17:16:27 2018 -0700 +++ b/mercurial/wireprotov2server.py Wed Sep 26 17:16:56 2018 -0700 @@ -7,6 +7,7 @@ from __future__ import absolute_import import contextlib +import hashlib from .i18n import _ from .node import ( @@ -25,6 +26,7 @@ wireprototypes, ) from .utils import ( + cborutil, interfaceutil, stringutil, ) @@ -35,6 +37,11 @@ COMMANDS = wireprototypes.commanddict() +# Value inserted into cache key computation function. Change the value to +# force new cache keys for every command request. This should be done when +# there is a change to how caching works, etc. +GLOBAL_CACHE_VERSION = 1 + def handlehttpv2request(rctx, req, res, checkperm, urlparts): from .hgweb import common as hgwebcommon @@ -333,12 +340,64 @@ return repo.filtered('served') def dispatch(repo, proto, command): + """Run a wire protocol command. + + Returns an iterable of objects that will be sent to the client. + """ repo = getdispatchrepo(repo, proto, command) - func, spec = COMMANDS[command] + entry = COMMANDS[command] + func = entry.func + spec = entry.args + args = proto.getargs(spec) - return func(repo, proto, **pycompat.strkwargs(args)) + # There is some duplicate boilerplate code here for calling the command and + # emitting objects. It is either that or a lot of indented code that looks + # like a pyramid (since there are a lot of code paths that result in not + # using the cacher). + callcommand = lambda: func(repo, proto, **pycompat.strkwargs(args)) + + # Request is not cacheable. Don't bother instantiating a cacher. + if not entry.cachekeyfn: + for o in callcommand(): + yield o + return + + cacher = makeresponsecacher(repo, proto, command, args, + cborutil.streamencode) + + # But we have no cacher. Do default handling. + if not cacher: + for o in callcommand(): + yield o + return + + with cacher: + cachekey = entry.cachekeyfn(repo, proto, cacher, **args) + + # No cache key or the cacher doesn't like it. Do default handling. + if cachekey is None or not cacher.setcachekey(cachekey): + for o in callcommand(): + yield o + return + + # Serve it from the cache, if possible. + cached = cacher.lookup() + + if cached: + for o in cached['objs']: + yield o + return + + # Else call the command and feed its output into the cacher, allowing + # the cacher to buffer/mutate objects as it desires. + for o in callcommand(): + for o in cacher.onobject(o): + yield o + + for o in cacher.onfinished(): + yield o @interfaceutil.implementer(wireprototypes.baseprotocolhandler) class httpv2protocolhandler(object): @@ -460,7 +519,7 @@ return proto.addcapabilities(repo, caps) -def wireprotocommand(name, args=None, permission='push'): +def wireprotocommand(name, args=None, permission='push', cachekeyfn=None): """Decorator to declare a wire protocol command. ``name`` is the name of the wire protocol command being provided. @@ -489,11 +548,21 @@ because otherwise commands not declaring their permissions could modify a repository that is supposed to be read-only. + ``cachekeyfn`` defines an optional callable that can derive the + cache key for this request. + Wire protocol commands are generators of objects to be serialized and sent to the client. If a command raises an uncaught exception, this will be translated into a command error. + + All commands can opt in to being cacheable by defining a function + (``cachekeyfn``) that is called to derive a cache key. This function + receives the same arguments as the command itself plus a ``cacher`` + argument containing the active cacher for the request and returns a bytes + containing the key in a cache the response to this command may be cached + under. """ transports = {k for k, v in wireprototypes.TRANSPORTS.items() if v['version'] == 2} @@ -543,12 +612,97 @@ 'for version 2' % name) COMMANDS[name] = wireprototypes.commandentry( - func, args=args, transports=transports, permission=permission) + func, args=args, transports=transports, permission=permission, + cachekeyfn=cachekeyfn) return func return register +def makecommandcachekeyfn(command, localversion=None, allargs=False): + """Construct a cache key derivation function with common features. + + By default, the cache key is a hash of: + + * The command name. + * A global cache version number. + * A local cache version number (passed via ``localversion``). + * All the arguments passed to the command. + * The media type used. + * Wire protocol version string. + * The repository path. + """ + if not allargs: + raise error.ProgrammingError('only allargs=True is currently supported') + + if localversion is None: + raise error.ProgrammingError('must set localversion argument value') + + def cachekeyfn(repo, proto, cacher, **args): + spec = COMMANDS[command] + + # Commands that mutate the repo can not be cached. + if spec.permission == 'push': + return None + + # TODO config option to disable caching. + + # Our key derivation strategy is to construct a data structure + # holding everything that could influence cacheability and to hash + # the CBOR representation of that. Using CBOR seems like it might + # be overkill. However, simpler hashing mechanisms are prone to + # duplicate input issues. e.g. if you just concatenate two values, + # "foo"+"bar" is identical to "fo"+"obar". Using CBOR provides + # "padding" between values and prevents these problems. + + # Seed the hash with various data. + state = { + # To invalidate all cache keys. + b'globalversion': GLOBAL_CACHE_VERSION, + # More granular cache key invalidation. + b'localversion': localversion, + # Cache keys are segmented by command. + b'command': pycompat.sysbytes(command), + # Throw in the media type and API version strings so changes + # to exchange semantics invalid cache. + b'mediatype': FRAMINGTYPE, + b'version': HTTP_WIREPROTO_V2, + # So same requests for different repos don't share cache keys. + b'repo': repo.root, + } + + # The arguments passed to us will have already been normalized. + # Default values will be set, etc. This is important because it + # means that it doesn't matter if clients send an explicit argument + # or rely on the default value: it will all normalize to the same + # set of arguments on the server and therefore the same cache key. + # + # Arguments by their very nature must support being encoded to CBOR. + # And the CBOR encoder is deterministic. So we hash the arguments + # by feeding the CBOR of their representation into the hasher. + if allargs: + state[b'args'] = pycompat.byteskwargs(args) + + cacher.adjustcachekeystate(state) + + hasher = hashlib.sha1() + for chunk in cborutil.streamencode(state): + hasher.update(chunk) + + return pycompat.sysbytes(hasher.hexdigest()) + + return cachekeyfn + +def makeresponsecacher(repo, proto, command, args, objencoderfn): + """Construct a cacher for a cacheable command. + + Returns an ``iwireprotocolcommandcacher`` instance. + + Extensions can monkeypatch this function to provide custom caching + backends. + """ + return None + @wireprotocommand('branchmap', permission='pull') def branchmapv2(repo, proto): yield {encoding.fromlocal(k): v @@ -755,7 +909,11 @@ 'example': b'foo.txt', } }, - permission='pull') + permission='pull', + # TODO censoring a file revision won't invalidate the cache. + # Figure out a way to take censoring into account when deriving + # the cache key. + cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True)) def filedata(repo, proto, haveparents, nodes, fields, path): try: # Extensions may wish to access the protocol handler. @@ -893,7 +1051,8 @@ 'example': b'', }, }, - permission='pull') + permission='pull', + cachekeyfn=makecommandcachekeyfn('manifestdata', 1, allargs=True)) def manifestdata(repo, proto, haveparents, nodes, fields, tree): store = repo.manifestlog.getstorage(tree)