comparison mercurial/revlog.py @ 39862:5a9ab91e0a45

revlog: new API to emit revision data I recently refactored changegroup generation code to make it more storage agnostic. I made significant progress. But there is still a bit of work to be done. Specifically: * Changegroup code is looking at low-level storage attributes to influence sorting. Sorting should be done at the storage layer. * The linknode lookup and sorting code for ellipsis is very complicated. * Linknodes are just generally wonky because e.g. file storage doesn't know how to translate a linkrev to a changelog node. * We regressed performance when introducing the request-response objects. Having thought about this problem a bit, I think I've come up with a better interface for emitting revision deltas. This commit defines and implements that interface. See the docstring in repository.py for more info. This API adds 3 notable features over the previous one. First, it defers node ordering to the storage implementation in the common case but allows overriding as necessary. We have a facility for requesting an exact ordering (used in ellipsis mode). We have another facility for storage order (used for changelog). Second, we have an argument specifying assumptions about parents revisions. This can be used to force a fulltext revision when we don't know the receiver has a parent revision to delta against. Third, we can control whether revision data is emitted. This makes the API suitable as a generic "index data retrieval" API as well as for producing revision deltas - possibly in the same operation! The new API is much simpler: we no longer need a complicated "request" object to encapsulate the delta generation request. I'm optimistic this will restore performance loss associated with emitrevisiondeltas(). Storage unit tests for the new API have been implemented. Future commits will port existing consumers of emitrevisiondeltas() to the new API then remove emitrevisiondeltas(). Differential Revision: https://phab.mercurial-scm.org/D4722
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 21 Sep 2018 14:28:21 -0700
parents e6d3d39cc1c7
children e23c03dc5cf9
comparison
equal deleted inserted replaced
39861:db5501d93bcf 39862:5a9ab91e0a45
57 from .thirdparty import ( 57 from .thirdparty import (
58 attr, 58 attr,
59 ) 59 )
60 from . import ( 60 from . import (
61 ancestor, 61 ancestor,
62 dagop,
62 error, 63 error,
63 mdiff, 64 mdiff,
64 policy, 65 policy,
65 pycompat, 66 pycompat,
66 repository, 67 repository,
240 textlen = attr.ib() 241 textlen = attr.ib()
241 cachedelta = attr.ib() 242 cachedelta = attr.ib()
242 flags = attr.ib() 243 flags = attr.ib()
243 244
244 @interfaceutil.implementer(repository.irevisiondelta) 245 @interfaceutil.implementer(repository.irevisiondelta)
245 @attr.s(slots=True, frozen=True) 246 @attr.s(slots=True)
246 class revlogrevisiondelta(object): 247 class revlogrevisiondelta(object):
247 node = attr.ib() 248 node = attr.ib()
248 p1node = attr.ib() 249 p1node = attr.ib()
249 p2node = attr.ib() 250 p2node = attr.ib()
250 basenode = attr.ib() 251 basenode = attr.ib()
251 linknode = attr.ib()
252 flags = attr.ib() 252 flags = attr.ib()
253 baserevisionsize = attr.ib() 253 baserevisionsize = attr.ib()
254 revision = attr.ib() 254 revision = attr.ib()
255 delta = attr.ib() 255 delta = attr.ib()
256 linknode = attr.ib(default=None)
256 257
257 @interfaceutil.implementer(repository.iverifyproblem) 258 @interfaceutil.implementer(repository.iverifyproblem)
258 @attr.s(frozen=True) 259 @attr.s(frozen=True)
259 class revlogproblem(object): 260 class revlogproblem(object):
260 warning = attr.ib(default=None) 261 warning = attr.ib(default=None)
2372 revision=revision, 2373 revision=revision,
2373 delta=delta) 2374 delta=delta)
2374 2375
2375 prevrev = rev 2376 prevrev = rev
2376 2377
2378 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2379 assumehaveparentrevisions=False, deltaprevious=False):
2380 if nodesorder not in ('nodes', 'storage', None):
2381 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2382 nodesorder)
2383
2384 if nodesorder is None and not self._generaldelta:
2385 nodesorder = 'storage'
2386
2387 frev = self.rev
2388 fnode = self.node
2389
2390 if nodesorder == 'nodes':
2391 revs = [frev(n) for n in nodes]
2392 elif nodesorder == 'storage':
2393 revs = sorted(frev(n) for n in nodes)
2394 else:
2395 assert self._generaldelta
2396 revs = set(frev(n) for n in nodes)
2397 revs = dagop.linearize(revs, self.parentrevs)
2398
2399 prevrev = None
2400
2401 if deltaprevious or assumehaveparentrevisions:
2402 prevrev = self.parentrevs(revs[0])[0]
2403
2404 # Set of revs available to delta against.
2405 available = set()
2406
2407 for rev in revs:
2408 if rev == nullrev:
2409 continue
2410
2411 node = fnode(rev)
2412 deltaparentrev = self.deltaparent(rev)
2413 p1rev, p2rev = self.parentrevs(rev)
2414
2415 # Forced delta against previous mode.
2416 if deltaprevious:
2417 baserev = prevrev
2418
2419 # Revlog is configured to use full snapshots. Stick to that.
2420 elif not self._storedeltachains:
2421 baserev = nullrev
2422
2423 # There is a delta in storage. We try to use that because it
2424 # amounts to effectively copying data from storage and is
2425 # therefore the fastest.
2426 elif deltaparentrev != nullrev:
2427 # Base revision was already emitted in this group. We can
2428 # always safely use the delta.
2429 if deltaparentrev in available:
2430 baserev = deltaparentrev
2431
2432 # Base revision is a parent that hasn't been emitted already.
2433 # Use it if we can assume the receiver has the parent revision.
2434 elif (assumehaveparentrevisions
2435 and deltaparentrev in (p1rev, p2rev)):
2436 baserev = deltaparentrev
2437
2438 # No guarantee the receiver has the delta parent. Send delta
2439 # against last revision (if possible), which in the common case
2440 # should be similar enough to this revision that the delta is
2441 # reasonable.
2442 elif prevrev is not None:
2443 baserev = prevrev
2444 else:
2445 baserev = nullrev
2446
2447 # Storage has a fulltext revision.
2448
2449 # Let's use the previous revision, which is as good a guess as any.
2450 # There is definitely room to improve this logic.
2451 elif prevrev is not None:
2452 baserev = prevrev
2453 else:
2454 baserev = nullrev
2455
2456 # But we can't actually use our chosen delta base for whatever
2457 # reason. Reset to fulltext.
2458 if baserev != nullrev and not self.candelta(baserev, rev):
2459 baserev = nullrev
2460
2461 revision = None
2462 delta = None
2463 baserevisionsize = None
2464
2465 if revisiondata:
2466 if self.iscensored(baserev) or self.iscensored(rev):
2467 try:
2468 revision = self.revision(node, raw=True)
2469 except error.CensoredNodeError as e:
2470 revision = e.tombstone
2471
2472 if baserev != nullrev:
2473 baserevisionsize = self.rawsize(baserev)
2474
2475 elif baserev == nullrev and not deltaprevious:
2476 revision = self.revision(node, raw=True)
2477 available.add(rev)
2478 else:
2479 delta = self.revdiff(baserev, rev)
2480 available.add(rev)
2481
2482 yield revlogrevisiondelta(
2483 node=node,
2484 p1node=fnode(p1rev),
2485 p2node=fnode(p2rev),
2486 basenode=fnode(baserev),
2487 flags=self.flags(rev),
2488 baserevisionsize=baserevisionsize,
2489 revision=revision,
2490 delta=delta)
2491
2492 prevrev = rev
2493
2377 DELTAREUSEALWAYS = 'always' 2494 DELTAREUSEALWAYS = 'always'
2378 DELTAREUSESAMEREVS = 'samerevs' 2495 DELTAREUSESAMEREVS = 'samerevs'
2379 DELTAREUSENEVER = 'never' 2496 DELTAREUSENEVER = 'never'
2380 2497
2381 DELTAREUSEFULLADD = 'fulladd' 2498 DELTAREUSEFULLADD = 'fulladd'