Mercurial > public > mercurial-scm > hg
comparison mercurial/wireprotov2server.py @ 40178:46a40bce3ae0
wireprotov2: define and implement "filesdata" command
Previously, the only way to access file revision data was the
"filedata" command. This command is useful to have. But, it only
allowed resolving revision data for a single file. This meant that
clients needed to send 1 command for each tracked path they were
seeking data on. Furthermore, those commands would need to enumerate
the exact file nodes they wanted data for.
This approach meant that clients were sending a lot of data to
remotes in order to request file data. e.g. if there were 1M
file revisions, we'd need at least 20,000,000 bytes just to encode
file nodes! Many clients on the internet don't have that kind of
upload capacity.
In order to limit the amount of data that clients must send, we'll
need more efficient ways to request repository data.
This commit defines and implements a new "filesdata" command. This
command allows the retrieval of data for multiple files by specifying
changeset revisions and optional file patterns. The command figures
out what file revisions are "relevant" and sends them in bulk.
The logic around choosing which file revisions to send in the case of
haveparents not being set is overly simple and will over-send files. We
will need more smarts here eventually. (Specifically, the client will
need to tell the server which revisions it knows about.) This work
is deferred until a later time.
Differential Revision: https://phab.mercurial-scm.org/D4981
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Wed, 03 Oct 2018 12:54:39 -0700 |
parents | 41e2633bcd00 |
children | ed55a0077490 |
comparison
equal
deleted
inserted
replaced
40177:41e2633bcd00 | 40178:46a40bce3ae0 |
---|---|
4 # This software may be used and distributed according to the terms of the | 4 # This software may be used and distributed according to the terms of the |
5 # GNU General Public License version 2 or any later version. | 5 # GNU General Public License version 2 or any later version. |
6 | 6 |
7 from __future__ import absolute_import | 7 from __future__ import absolute_import |
8 | 8 |
9 import collections | |
9 import contextlib | 10 import contextlib |
10 import hashlib | 11 import hashlib |
11 | 12 |
12 from .i18n import _ | 13 from .i18n import _ |
13 from .node import ( | 14 from .node import ( |
16 ) | 17 ) |
17 from . import ( | 18 from . import ( |
18 discovery, | 19 discovery, |
19 encoding, | 20 encoding, |
20 error, | 21 error, |
22 match as matchmod, | |
21 narrowspec, | 23 narrowspec, |
22 pycompat, | 24 pycompat, |
23 wireprotoframing, | 25 wireprotoframing, |
24 wireprototypes, | 26 wireprototypes, |
25 ) | 27 ) |
1002 yield d | 1004 yield d |
1003 | 1005 |
1004 for extra in followingdata: | 1006 for extra in followingdata: |
1005 yield extra | 1007 yield extra |
1006 | 1008 |
1009 def makefilematcher(repo, pathfilter): | |
1010 """Construct a matcher from a path filter dict.""" | |
1011 | |
1012 # Validate values. | |
1013 if pathfilter: | |
1014 for key in (b'include', b'exclude'): | |
1015 for pattern in pathfilter.get(key, []): | |
1016 if not pattern.startswith((b'path:', b'rootfilesin:')): | |
1017 raise error.WireprotoCommandError( | |
1018 '%s pattern must begin with `path:` or `rootfilesin:`; ' | |
1019 'got %s', (key, pattern)) | |
1020 | |
1021 if pathfilter: | |
1022 matcher = matchmod.match(repo.root, b'', | |
1023 include=pathfilter.get(b'include', []), | |
1024 exclude=pathfilter.get(b'exclude', [])) | |
1025 else: | |
1026 matcher = matchmod.match(repo.root, b'') | |
1027 | |
1028 # Requested patterns could include files not in the local store. So | |
1029 # filter those out. | |
1030 return matchmod.intersectmatchers(repo.narrowmatch(), matcher) | |
1031 | |
1007 @wireprotocommand( | 1032 @wireprotocommand( |
1008 'filedata', | 1033 'filedata', |
1009 args={ | 1034 args={ |
1010 'haveparents': { | 1035 'haveparents': { |
1011 'type': 'bool', | 1036 'type': 'bool', |
1031 # TODO censoring a file revision won't invalidate the cache. | 1056 # TODO censoring a file revision won't invalidate the cache. |
1032 # Figure out a way to take censoring into account when deriving | 1057 # Figure out a way to take censoring into account when deriving |
1033 # the cache key. | 1058 # the cache key. |
1034 cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True)) | 1059 cachekeyfn=makecommandcachekeyfn('filedata', 1, allargs=True)) |
1035 def filedata(repo, proto, haveparents, nodes, fields, path): | 1060 def filedata(repo, proto, haveparents, nodes, fields, path): |
1061 # TODO this API allows access to file revisions that are attached to | |
1062 # secret changesets. filesdata does not have this problem. Maybe this | |
1063 # API should be deleted? | |
1064 | |
1036 try: | 1065 try: |
1037 # Extensions may wish to access the protocol handler. | 1066 # Extensions may wish to access the protocol handler. |
1038 store = getfilestore(repo, proto, path) | 1067 store = getfilestore(repo, proto, path) |
1039 except FileAccessError as e: | 1068 except FileAccessError as e: |
1040 raise error.WireprotoCommandError(e.msg, e.args) | 1069 raise error.WireprotoCommandError(e.msg, e.args) |
1055 b'totalitems': len(nodes), | 1084 b'totalitems': len(nodes), |
1056 } | 1085 } |
1057 | 1086 |
1058 for o in emitfilerevisions(revisions, fields): | 1087 for o in emitfilerevisions(revisions, fields): |
1059 yield o | 1088 yield o |
1089 | |
1090 def filesdatacapabilities(repo, proto): | |
1091 batchsize = repo.ui.configint( | |
1092 b'experimental', b'server.filesdata.recommended-batch-size') | |
1093 return { | |
1094 b'recommendedbatchsize': batchsize, | |
1095 } | |
1096 | |
1097 @wireprotocommand( | |
1098 'filesdata', | |
1099 args={ | |
1100 'haveparents': { | |
1101 'type': 'bool', | |
1102 'default': lambda: False, | |
1103 'example': True, | |
1104 }, | |
1105 'fields': { | |
1106 'type': 'set', | |
1107 'default': set, | |
1108 'example': {b'parents', b'revision'}, | |
1109 'validvalues': {b'firstchangeset', b'parents', b'revision'}, | |
1110 }, | |
1111 'pathfilter': { | |
1112 'type': 'dict', | |
1113 'default': lambda: None, | |
1114 'example': {b'include': [b'path:tests']}, | |
1115 }, | |
1116 'revisions': { | |
1117 'type': 'list', | |
1118 'example': [{ | |
1119 b'type': b'changesetexplicit', | |
1120 b'nodes': [b'abcdef...'], | |
1121 }], | |
1122 }, | |
1123 }, | |
1124 permission='pull', | |
1125 # TODO censoring a file revision won't invalidate the cache. | |
1126 # Figure out a way to take censoring into account when deriving | |
1127 # the cache key. | |
1128 cachekeyfn=makecommandcachekeyfn('filesdata', 1, allargs=True), | |
1129 extracapabilitiesfn=filesdatacapabilities) | |
1130 def filesdata(repo, proto, haveparents, fields, pathfilter, revisions): | |
1131 # TODO This should operate on a repo that exposes obsolete changesets. There | |
1132 # is a race between a client making a push that obsoletes a changeset and | |
1133 # another client fetching files data for that changeset. If a client has a | |
1134 # changeset, it should probably be allowed to access files data for that | |
1135 # changeset. | |
1136 | |
1137 cl = repo.changelog | |
1138 outgoing = resolvenodes(repo, revisions) | |
1139 filematcher = makefilematcher(repo, pathfilter) | |
1140 | |
1141 # Figure out what needs to be emitted. | |
1142 changedpaths = set() | |
1143 fnodes = collections.defaultdict(set) | |
1144 | |
1145 for node in outgoing: | |
1146 ctx = repo[node] | |
1147 changedpaths.update(ctx.files()) | |
1148 | |
1149 changedpaths = sorted(p for p in changedpaths if filematcher(p)) | |
1150 | |
1151 # If ancestors are known, we send file revisions having a linkrev in the | |
1152 # outgoing set of changeset revisions. | |
1153 if haveparents: | |
1154 outgoingclrevs = set(cl.rev(n) for n in outgoing) | |
1155 | |
1156 for path in changedpaths: | |
1157 try: | |
1158 store = getfilestore(repo, proto, path) | |
1159 except FileAccessError as e: | |
1160 raise error.WireprotoCommandError(e.msg, e.args) | |
1161 | |
1162 for rev in store: | |
1163 linkrev = store.linkrev(rev) | |
1164 | |
1165 if linkrev in outgoingclrevs: | |
1166 fnodes[path].add(store.node(rev)) | |
1167 | |
1168 # If ancestors aren't known, we walk the manifests and send all | |
1169 # encountered file revisions. | |
1170 else: | |
1171 for node in outgoing: | |
1172 mctx = repo[node].manifestctx() | |
1173 | |
1174 for path, fnode in mctx.read().items(): | |
1175 if filematcher(path): | |
1176 fnodes[path].add(fnode) | |
1177 | |
1178 yield { | |
1179 b'totalpaths': len(fnodes), | |
1180 b'totalitems': sum(len(v) for v in fnodes.values()) | |
1181 } | |
1182 | |
1183 for path, filenodes in sorted(fnodes.items()): | |
1184 try: | |
1185 store = getfilestore(repo, proto, path) | |
1186 except FileAccessError as e: | |
1187 raise error.WireprotoCommandError(e.msg, e.args) | |
1188 | |
1189 yield { | |
1190 b'path': path, | |
1191 b'totalitems': len(filenodes), | |
1192 } | |
1193 | |
1194 revisions = store.emitrevisions(filenodes, | |
1195 revisiondata=b'revision' in fields, | |
1196 assumehaveparentrevisions=haveparents) | |
1197 | |
1198 for o in emitfilerevisions(revisions, fields): | |
1199 yield o | |
1060 | 1200 |
1061 @wireprotocommand( | 1201 @wireprotocommand( |
1062 'heads', | 1202 'heads', |
1063 args={ | 1203 args={ |
1064 'publiconly': { | 1204 'publiconly': { |