Mercurial > public > mercurial-scm > hg
comparison mercurial/revlog.py @ 30745:c1b7b2285522
revlog: flag processor
Add the ability for revlog objects to process revision flags and apply
registered transforms on read/write operations.
This patch introduces:
- the 'revlog._processflags()' method that looks at revision flags and applies
flag processors registered on them. Due to the need to handle non-commutative
operations, flag transforms are applied in stable order but the order in which
the transforms are applied is reversed between read and write operations.
- the 'addflagprocessor()' method allowing to register processors on flags.
Flag processors are defined as a 3-tuple of (read, write, raw) functions to be
applied depending on the operation being performed.
- an update on 'revlog.addrevision()' behavior. The current flagprocessor design
relies on extensions to wrap around 'addrevision()' to set flags on revision
data, and on the flagprocessor to perform the actual transformation of its
contents. In the lfs case, this means we need to process flags before we meet
the 2GB size check, leading to performing some operations before it happens:
- if flags are set on the revision data, we assume some extensions might be
modifying the contents using the flag processor next, and we compute the
node for the original revision data (still allowing extension to override
the node by wrapping around 'addrevision()').
- we then invoke the flag processor to apply registered transforms (in lfs's
case, drastically reducing the size of large blobs).
- finally, we proceed with the 2GB size check.
Note: In the case a cachedelta is passed to 'addrevision()' and we detect the
flag processor modified the revision data, we chose to trust the flag processor
and drop the cachedelta.
author | Remi Chaintron <remi@fb.com> |
---|---|
date | Tue, 10 Jan 2017 16:15:21 +0000 |
parents | e12c0fa1f65b |
children | 9cb0bb0f29f0 |
comparison
equal
deleted
inserted
replaced
30744:e12c0fa1f65b | 30745:c1b7b2285522 |
---|---|
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA | 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA |
54 | 54 |
55 # revlog index flags | 55 # revlog index flags |
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified | 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified |
57 REVIDX_DEFAULT_FLAGS = 0 | 57 REVIDX_DEFAULT_FLAGS = 0 |
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED | 58 # stable order in which flags need to be processed and their processors applied |
59 REVIDX_FLAGS_ORDER = [ | |
60 REVIDX_ISCENSORED, | |
61 ] | |
62 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER) | |
59 | 63 |
60 # max size of revlog with inline data | 64 # max size of revlog with inline data |
61 _maxinline = 131072 | 65 _maxinline = 131072 |
62 _chunksize = 1048576 | 66 _chunksize = 1048576 |
63 | 67 |
64 RevlogError = error.RevlogError | 68 RevlogError = error.RevlogError |
65 LookupError = error.LookupError | 69 LookupError = error.LookupError |
66 CensoredNodeError = error.CensoredNodeError | 70 CensoredNodeError = error.CensoredNodeError |
71 ProgrammingError = error.ProgrammingError | |
72 | |
73 # Store flag processors (cf. 'addflagprocessor()' to register) | |
74 _flagprocessors = { | |
75 REVIDX_ISCENSORED: None, | |
76 } | |
77 | |
78 def addflagprocessor(flag, processor): | |
79 """Register a flag processor on a revision data flag. | |
80 | |
81 Invariant: | |
82 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER. | |
83 - Only one flag processor can be registered on a specific flag. | |
84 - flagprocessors must be 3-tuples of functions (read, write, raw) with the | |
85 following signatures: | |
86 - (read) f(self, text) -> newtext, bool | |
87 - (write) f(self, text) -> newtext, bool | |
88 - (raw) f(self, text) -> bool | |
89 The boolean returned by these transforms is used to determine whether | |
90 'newtext' can be used for hash integrity checking. | |
91 | |
92 Note: The 'raw' transform is used for changegroup generation and in some | |
93 debug commands. In this case the transform only indicates whether the | |
94 contents can be used for hash integrity checks. | |
95 """ | |
96 if not flag & REVIDX_KNOWN_FLAGS: | |
97 msg = _("cannot register processor on unknown flag '%#x'.") % (flag) | |
98 raise ProgrammingError(msg) | |
99 if flag not in REVIDX_FLAGS_ORDER: | |
100 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag) | |
101 raise ProgrammingError(msg) | |
102 if flag in _flagprocessors: | |
103 msg = _("cannot register multiple processors on flag '%#x'.") % (flag) | |
104 raise error.Abort(msg) | |
105 _flagprocessors[flag] = processor | |
67 | 106 |
68 def getoffset(q): | 107 def getoffset(q): |
69 return int(q >> 16) | 108 return int(q >> 16) |
70 | 109 |
71 def gettype(q): | 110 def gettype(q): |
1229 # look up what we need to read | 1268 # look up what we need to read |
1230 text = None | 1269 text = None |
1231 if rev is None: | 1270 if rev is None: |
1232 rev = self.rev(node) | 1271 rev = self.rev(node) |
1233 | 1272 |
1234 # check rev flags | |
1235 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS: | |
1236 raise RevlogError(_('incompatible revision flag %x') % | |
1237 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS)) | |
1238 | |
1239 chain, stopped = self._deltachain(rev, stoprev=cachedrev) | 1273 chain, stopped = self._deltachain(rev, stoprev=cachedrev) |
1240 if stopped: | 1274 if stopped: |
1241 text = self._cache[2] | 1275 text = self._cache[2] |
1242 | 1276 |
1243 # drop cache to save memory | 1277 # drop cache to save memory |
1247 if text is None: | 1281 if text is None: |
1248 text = str(bins[0]) | 1282 text = str(bins[0]) |
1249 bins = bins[1:] | 1283 bins = bins[1:] |
1250 | 1284 |
1251 text = mdiff.patches(text, bins) | 1285 text = mdiff.patches(text, bins) |
1252 self.checkhash(text, node, rev=rev) | 1286 |
1287 text, validatehash = self._processflags(text, self.flags(rev), 'read', | |
1288 raw=raw) | |
1289 if validatehash: | |
1290 self.checkhash(text, node, rev=rev) | |
1291 | |
1253 self._cache = (node, rev, text) | 1292 self._cache = (node, rev, text) |
1254 return text | 1293 return text |
1255 | 1294 |
1256 def hash(self, text, p1, p2): | 1295 def hash(self, text, p1, p2): |
1257 """Compute a node hash. | 1296 """Compute a node hash. |
1258 | 1297 |
1259 Available as a function so that subclasses can replace the hash | 1298 Available as a function so that subclasses can replace the hash |
1260 as needed. | 1299 as needed. |
1261 """ | 1300 """ |
1262 return hash(text, p1, p2) | 1301 return hash(text, p1, p2) |
1302 | |
1303 def _processflags(self, text, flags, operation, raw=False): | |
1304 """Inspect revision data flags and applies transforms defined by | |
1305 registered flag processors. | |
1306 | |
1307 ``text`` - the revision data to process | |
1308 ``flags`` - the revision flags | |
1309 ``operation`` - the operation being performed (read or write) | |
1310 ``raw`` - an optional argument describing if the raw transform should be | |
1311 applied. | |
1312 | |
1313 This method processes the flags in the order (or reverse order if | |
1314 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the | |
1315 flag processors registered for present flags. The order of flags defined | |
1316 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity. | |
1317 | |
1318 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the | |
1319 processed text and ``validatehash`` is a bool indicating whether the | |
1320 returned text should be checked for hash integrity. | |
1321 | |
1322 Note: If the ``raw`` argument is set, it has precedence over the | |
1323 operation and will only update the value of ``validatehash``. | |
1324 """ | |
1325 if not operation in ('read', 'write'): | |
1326 raise ProgrammingError(_("invalid '%s' operation ") % (operation)) | |
1327 # Check all flags are known. | |
1328 if flags & ~REVIDX_KNOWN_FLAGS: | |
1329 raise RevlogError(_("incompatible revision flag '%#x'") % | |
1330 (flags & ~REVIDX_KNOWN_FLAGS)) | |
1331 validatehash = True | |
1332 # Depending on the operation (read or write), the order might be | |
1333 # reversed due to non-commutative transforms. | |
1334 orderedflags = REVIDX_FLAGS_ORDER | |
1335 if operation == 'write': | |
1336 orderedflags = reversed(orderedflags) | |
1337 | |
1338 for flag in orderedflags: | |
1339 # If a flagprocessor has been registered for a known flag, apply the | |
1340 # related operation transform and update result tuple. | |
1341 if flag & flags: | |
1342 vhash = True | |
1343 | |
1344 if flag not in _flagprocessors: | |
1345 message = _("missing processor for flag '%#x'") % (flag) | |
1346 raise RevlogError(message) | |
1347 | |
1348 processor = _flagprocessors[flag] | |
1349 if processor is not None: | |
1350 readtransform, writetransform, rawtransform = processor | |
1351 | |
1352 if raw: | |
1353 vhash = rawtransform(self, text) | |
1354 elif operation == 'read': | |
1355 text, vhash = readtransform(self, text) | |
1356 else: # write operation | |
1357 text, vhash = writetransform(self, text) | |
1358 validatehash = validatehash and vhash | |
1359 | |
1360 return text, validatehash | |
1263 | 1361 |
1264 def checkhash(self, text, node, p1=None, p2=None, rev=None): | 1362 def checkhash(self, text, node, p1=None, p2=None, rev=None): |
1265 """Check node hash integrity. | 1363 """Check node hash integrity. |
1266 | 1364 |
1267 Available as a function so that subclasses can extend hash mismatch | 1365 Available as a function so that subclasses can extend hash mismatch |
1343 """ | 1441 """ |
1344 if link == nullrev: | 1442 if link == nullrev: |
1345 raise RevlogError(_("attempted to add linkrev -1 to %s") | 1443 raise RevlogError(_("attempted to add linkrev -1 to %s") |
1346 % self.indexfile) | 1444 % self.indexfile) |
1347 | 1445 |
1446 if flags: | |
1447 node = node or self.hash(text, p1, p2) | |
1448 | |
1449 newtext, validatehash = self._processflags(text, flags, 'write') | |
1450 | |
1451 # If the flag processor modifies the revision data, ignore any provided | |
1452 # cachedelta. | |
1453 if newtext != text: | |
1454 cachedelta = None | |
1455 text = newtext | |
1456 | |
1348 if len(text) > _maxentrysize: | 1457 if len(text) > _maxentrysize: |
1349 raise RevlogError( | 1458 raise RevlogError( |
1350 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB") | 1459 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB") |
1351 % (self.indexfile, len(text))) | 1460 % (self.indexfile, len(text))) |
1352 | 1461 |
1353 node = node or self.hash(text, p1, p2) | 1462 node = node or self.hash(text, p1, p2) |
1354 if node in self.nodemap: | 1463 if node in self.nodemap: |
1355 return node | 1464 return node |
1465 | |
1466 if validatehash: | |
1467 self.checkhash(text, node, p1=p1, p2=p2) | |
1356 | 1468 |
1357 dfh = None | 1469 dfh = None |
1358 if not self._inline: | 1470 if not self._inline: |
1359 dfh = self.opener(self.datafile, "a+") | 1471 dfh = self.opener(self.datafile, "a+") |
1360 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig) | 1472 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig) |
1446 fh = dfh | 1558 fh = dfh |
1447 basetext = self.revision(self.node(baserev), _df=fh, raw=raw) | 1559 basetext = self.revision(self.node(baserev), _df=fh, raw=raw) |
1448 btext[0] = mdiff.patch(basetext, delta) | 1560 btext[0] = mdiff.patch(basetext, delta) |
1449 | 1561 |
1450 try: | 1562 try: |
1451 self.checkhash(btext[0], node, p1=p1, p2=p2) | 1563 res = self._processflags(btext[0], flags, 'read', raw=raw) |
1564 btext[0], validatehash = res | |
1565 if validatehash: | |
1566 self.checkhash(btext[0], node, p1=p1, p2=p2) | |
1452 if flags & REVIDX_ISCENSORED: | 1567 if flags & REVIDX_ISCENSORED: |
1453 raise RevlogError(_('node %s is not censored') % node) | 1568 raise RevlogError(_('node %s is not censored') % node) |
1454 except CensoredNodeError: | 1569 except CensoredNodeError: |
1455 # must pass the censored index flag to add censored revisions | 1570 # must pass the censored index flag to add censored revisions |
1456 if not flags & REVIDX_ISCENSORED: | 1571 if not flags & REVIDX_ISCENSORED: |