diff mercurial/revlog.py @ 39867:5a9ab91e0a45

revlog: new API to emit revision data I recently refactored changegroup generation code to make it more storage agnostic. I made significant progress. But there is still a bit of work to be done. Specifically: * Changegroup code is looking at low-level storage attributes to influence sorting. Sorting should be done at the storage layer. * The linknode lookup and sorting code for ellipsis is very complicated. * Linknodes are just generally wonky because e.g. file storage doesn't know how to translate a linkrev to a changelog node. * We regressed performance when introducing the request-response objects. Having thought about this problem a bit, I think I've come up with a better interface for emitting revision deltas. This commit defines and implements that interface. See the docstring in repository.py for more info. This API adds 3 notable features over the previous one. First, it defers node ordering to the storage implementation in the common case but allows overriding as necessary. We have a facility for requesting an exact ordering (used in ellipsis mode). We have another facility for storage order (used for changelog). Second, we have an argument specifying assumptions about parents revisions. This can be used to force a fulltext revision when we don't know the receiver has a parent revision to delta against. Third, we can control whether revision data is emitted. This makes the API suitable as a generic "index data retrieval" API as well as for producing revision deltas - possibly in the same operation! The new API is much simpler: we no longer need a complicated "request" object to encapsulate the delta generation request. I'm optimistic this will restore performance loss associated with emitrevisiondeltas(). Storage unit tests for the new API have been implemented. Future commits will port existing consumers of emitrevisiondeltas() to the new API then remove emitrevisiondeltas(). Differential Revision: https://phab.mercurial-scm.org/D4722
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 21 Sep 2018 14:28:21 -0700
parents e6d3d39cc1c7
children e23c03dc5cf9
line wrap: on
line diff
--- a/mercurial/revlog.py	Mon Sep 24 09:41:42 2018 -0700
+++ b/mercurial/revlog.py	Fri Sep 21 14:28:21 2018 -0700
@@ -59,6 +59,7 @@
 )
 from . import (
     ancestor,
+    dagop,
     error,
     mdiff,
     policy,
@@ -242,17 +243,17 @@
     flags = attr.ib()
 
 @interfaceutil.implementer(repository.irevisiondelta)
-@attr.s(slots=True, frozen=True)
+@attr.s(slots=True)
 class revlogrevisiondelta(object):
     node = attr.ib()
     p1node = attr.ib()
     p2node = attr.ib()
     basenode = attr.ib()
-    linknode = attr.ib()
     flags = attr.ib()
     baserevisionsize = attr.ib()
     revision = attr.ib()
     delta = attr.ib()
+    linknode = attr.ib(default=None)
 
 @interfaceutil.implementer(repository.iverifyproblem)
 @attr.s(frozen=True)
@@ -2374,6 +2375,122 @@
 
             prevrev = rev
 
+    def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
+                      assumehaveparentrevisions=False, deltaprevious=False):
+        if nodesorder not in ('nodes', 'storage', None):
+            raise error.ProgrammingError('unhandled value for nodesorder: %s' %
+                                         nodesorder)
+
+        if nodesorder is None and not self._generaldelta:
+            nodesorder = 'storage'
+
+        frev = self.rev
+        fnode = self.node
+
+        if nodesorder == 'nodes':
+            revs = [frev(n) for n in nodes]
+        elif nodesorder == 'storage':
+            revs = sorted(frev(n) for n in nodes)
+        else:
+            assert self._generaldelta
+            revs = set(frev(n) for n in nodes)
+            revs = dagop.linearize(revs, self.parentrevs)
+
+        prevrev = None
+
+        if deltaprevious or assumehaveparentrevisions:
+            prevrev = self.parentrevs(revs[0])[0]
+
+        # Set of revs available to delta against.
+        available = set()
+
+        for rev in revs:
+            if rev == nullrev:
+                continue
+
+            node = fnode(rev)
+            deltaparentrev = self.deltaparent(rev)
+            p1rev, p2rev = self.parentrevs(rev)
+
+            # Forced delta against previous mode.
+            if deltaprevious:
+                baserev = prevrev
+
+            # Revlog is configured to use full snapshots. Stick to that.
+            elif not self._storedeltachains:
+                baserev = nullrev
+
+            # There is a delta in storage. We try to use that because it
+            # amounts to effectively copying data from storage and is
+            # therefore the fastest.
+            elif deltaparentrev != nullrev:
+                # Base revision was already emitted in this group. We can
+                # always safely use the delta.
+                if deltaparentrev in available:
+                    baserev = deltaparentrev
+
+                # Base revision is a parent that hasn't been emitted already.
+                # Use it if we can assume the receiver has the parent revision.
+                elif (assumehaveparentrevisions
+                      and deltaparentrev in (p1rev, p2rev)):
+                    baserev = deltaparentrev
+
+                # No guarantee the receiver has the delta parent. Send delta
+                # against last revision (if possible), which in the common case
+                # should be similar enough to this revision that the delta is
+                # reasonable.
+                elif prevrev is not None:
+                    baserev = prevrev
+                else:
+                    baserev = nullrev
+
+            # Storage has a fulltext revision.
+
+            # Let's use the previous revision, which is as good a guess as any.
+            # There is definitely room to improve this logic.
+            elif prevrev is not None:
+                baserev = prevrev
+            else:
+                baserev = nullrev
+
+            # But we can't actually use our chosen delta base for whatever
+            # reason. Reset to fulltext.
+            if baserev != nullrev and not self.candelta(baserev, rev):
+                baserev = nullrev
+
+            revision = None
+            delta = None
+            baserevisionsize = None
+
+            if revisiondata:
+                if self.iscensored(baserev) or self.iscensored(rev):
+                    try:
+                        revision = self.revision(node, raw=True)
+                    except error.CensoredNodeError as e:
+                        revision = e.tombstone
+
+                    if baserev != nullrev:
+                        baserevisionsize = self.rawsize(baserev)
+
+                elif baserev == nullrev and not deltaprevious:
+                    revision = self.revision(node, raw=True)
+                    available.add(rev)
+                else:
+                    delta = self.revdiff(baserev, rev)
+                    available.add(rev)
+
+            yield revlogrevisiondelta(
+                node=node,
+                p1node=fnode(p1rev),
+                p2node=fnode(p2rev),
+                basenode=fnode(baserev),
+                flags=self.flags(rev),
+                baserevisionsize=baserevisionsize,
+                revision=revision,
+                delta=delta)
+
+            prevrev = rev
+
     DELTAREUSEALWAYS = 'always'
     DELTAREUSESAMEREVS = 'samerevs'
     DELTAREUSENEVER = 'never'