--- a/mercurial/revlog.py Thu Jan 05 17:16:07 2017 +0000
+++ b/mercurial/revlog.py Tue Jan 10 16:15:21 2017 +0000
@@ -55,7 +55,11 @@
# revlog index flags
REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
REVIDX_DEFAULT_FLAGS = 0
-REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
+# stable order in which flags need to be processed and their processors applied
+REVIDX_FLAGS_ORDER = [
+ REVIDX_ISCENSORED,
+]
+REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
# max size of revlog with inline data
_maxinline = 131072
@@ -64,6 +68,41 @@
RevlogError = error.RevlogError
LookupError = error.LookupError
CensoredNodeError = error.CensoredNodeError
+ProgrammingError = error.ProgrammingError
+
+# Store flag processors (cf. 'addflagprocessor()' to register)
+_flagprocessors = {
+ REVIDX_ISCENSORED: None,
+}
+
+def addflagprocessor(flag, processor):
+ """Register a flag processor on a revision data flag.
+
+ Invariant:
+ - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
+ - Only one flag processor can be registered on a specific flag.
+ - flagprocessors must be 3-tuples of functions (read, write, raw) with the
+ following signatures:
+ - (read) f(self, text) -> newtext, bool
+ - (write) f(self, text) -> newtext, bool
+ - (raw) f(self, text) -> bool
+ The boolean returned by these transforms is used to determine whether
+ 'newtext' can be used for hash integrity checking.
+
+ Note: The 'raw' transform is used for changegroup generation and in some
+ debug commands. In this case the transform only indicates whether the
+ contents can be used for hash integrity checks.
+ """
+ if not flag & REVIDX_KNOWN_FLAGS:
+ msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
+ raise ProgrammingError(msg)
+ if flag not in REVIDX_FLAGS_ORDER:
+ msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
+ raise ProgrammingError(msg)
+ if flag in _flagprocessors:
+ msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
+ raise error.Abort(msg)
+ _flagprocessors[flag] = processor
def getoffset(q):
return int(q >> 16)
@@ -1231,11 +1270,6 @@
if rev is None:
rev = self.rev(node)
- # check rev flags
- if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
- raise RevlogError(_('incompatible revision flag %x') %
- (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
-
chain, stopped = self._deltachain(rev, stoprev=cachedrev)
if stopped:
text = self._cache[2]
@@ -1249,7 +1283,12 @@
bins = bins[1:]
text = mdiff.patches(text, bins)
- self.checkhash(text, node, rev=rev)
+
+ text, validatehash = self._processflags(text, self.flags(rev), 'read',
+ raw=raw)
+ if validatehash:
+ self.checkhash(text, node, rev=rev)
+
self._cache = (node, rev, text)
return text
@@ -1261,6 +1300,65 @@
"""
return hash(text, p1, p2)
+ def _processflags(self, text, flags, operation, raw=False):
+ """Inspect revision data flags and applies transforms defined by
+ registered flag processors.
+
+ ``text`` - the revision data to process
+ ``flags`` - the revision flags
+ ``operation`` - the operation being performed (read or write)
+ ``raw`` - an optional argument describing if the raw transform should be
+ applied.
+
+ This method processes the flags in the order (or reverse order if
+ ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
+ flag processors registered for present flags. The order of flags defined
+ in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
+
+ Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
+ processed text and ``validatehash`` is a bool indicating whether the
+ returned text should be checked for hash integrity.
+
+ Note: If the ``raw`` argument is set, it has precedence over the
+ operation and will only update the value of ``validatehash``.
+ """
+ if not operation in ('read', 'write'):
+ raise ProgrammingError(_("invalid '%s' operation ") % (operation))
+ # Check all flags are known.
+ if flags & ~REVIDX_KNOWN_FLAGS:
+ raise RevlogError(_("incompatible revision flag '%#x'") %
+ (flags & ~REVIDX_KNOWN_FLAGS))
+ validatehash = True
+ # Depending on the operation (read or write), the order might be
+ # reversed due to non-commutative transforms.
+ orderedflags = REVIDX_FLAGS_ORDER
+ if operation == 'write':
+ orderedflags = reversed(orderedflags)
+
+ for flag in orderedflags:
+ # If a flagprocessor has been registered for a known flag, apply the
+ # related operation transform and update result tuple.
+ if flag & flags:
+ vhash = True
+
+ if flag not in _flagprocessors:
+ message = _("missing processor for flag '%#x'") % (flag)
+ raise RevlogError(message)
+
+ processor = _flagprocessors[flag]
+ if processor is not None:
+ readtransform, writetransform, rawtransform = processor
+
+ if raw:
+ vhash = rawtransform(self, text)
+ elif operation == 'read':
+ text, vhash = readtransform(self, text)
+ else: # write operation
+ text, vhash = writetransform(self, text)
+ validatehash = validatehash and vhash
+
+ return text, validatehash
+
def checkhash(self, text, node, p1=None, p2=None, rev=None):
"""Check node hash integrity.
@@ -1345,6 +1443,17 @@
raise RevlogError(_("attempted to add linkrev -1 to %s")
% self.indexfile)
+ if flags:
+ node = node or self.hash(text, p1, p2)
+
+ newtext, validatehash = self._processflags(text, flags, 'write')
+
+ # If the flag processor modifies the revision data, ignore any provided
+ # cachedelta.
+ if newtext != text:
+ cachedelta = None
+ text = newtext
+
if len(text) > _maxentrysize:
raise RevlogError(
_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
@@ -1354,6 +1463,9 @@
if node in self.nodemap:
return node
+ if validatehash:
+ self.checkhash(text, node, p1=p1, p2=p2)
+
dfh = None
if not self._inline:
dfh = self.opener(self.datafile, "a+")
@@ -1448,7 +1560,10 @@
btext[0] = mdiff.patch(basetext, delta)
try:
- self.checkhash(btext[0], node, p1=p1, p2=p2)
+ res = self._processflags(btext[0], flags, 'read', raw=raw)
+ btext[0], validatehash = res
+ if validatehash:
+ self.checkhash(btext[0], node, p1=p1, p2=p2)
if flags & REVIDX_ISCENSORED:
raise RevlogError(_('node %s is not censored') % node)
except CensoredNodeError: