Mercurial > public > mercurial-scm > hg
comparison mercurial/revlog.py @ 39862:5a9ab91e0a45
revlog: new API to emit revision data
I recently refactored changegroup generation code to make it more
storage agnostic. I made significant progress. But there is still
a bit of work to be done. Specifically:
* Changegroup code is looking at low-level storage attributes to
influence sorting. Sorting should be done at the storage layer.
* The linknode lookup and sorting code for ellipsis is very
complicated.
* Linknodes are just generally wonky because e.g. file storage doesn't
know how to translate a linkrev to a changelog node.
* We regressed performance when introducing the request-response
objects.
Having thought about this problem a bit, I think I've come up with
a better interface for emitting revision deltas.
This commit defines and implements that interface. See the docstring
in repository.py for more info.
This API adds 3 notable features over the previous one.
First, it defers node ordering to the storage implementation in
the common case but allows overriding as necessary. We have a
facility for requesting an exact ordering (used in ellipsis
mode). We have another facility for storage order (used for changelog).
Second, we have an argument specifying assumptions about parents
revisions. This can be used to force a fulltext revision when we
don't know the receiver has a parent revision to delta against.
Third, we can control whether revision data is emitted. This makes
the API suitable as a generic "index data retrieval" API as well
as for producing revision deltas - possibly in the same operation!
The new API is much simpler: we no longer need a complicated "request"
object to encapsulate the delta generation request. I'm optimistic
this will restore performance loss associated with
emitrevisiondeltas().
Storage unit tests for the new API have been implemented.
Future commits will port existing consumers of emitrevisiondeltas()
to the new API then remove emitrevisiondeltas().
Differential Revision: https://phab.mercurial-scm.org/D4722
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Fri, 21 Sep 2018 14:28:21 -0700 |
parents | e6d3d39cc1c7 |
children | e23c03dc5cf9 |
comparison
equal
deleted
inserted
replaced
39861:db5501d93bcf | 39862:5a9ab91e0a45 |
---|---|
57 from .thirdparty import ( | 57 from .thirdparty import ( |
58 attr, | 58 attr, |
59 ) | 59 ) |
60 from . import ( | 60 from . import ( |
61 ancestor, | 61 ancestor, |
62 dagop, | |
62 error, | 63 error, |
63 mdiff, | 64 mdiff, |
64 policy, | 65 policy, |
65 pycompat, | 66 pycompat, |
66 repository, | 67 repository, |
240 textlen = attr.ib() | 241 textlen = attr.ib() |
241 cachedelta = attr.ib() | 242 cachedelta = attr.ib() |
242 flags = attr.ib() | 243 flags = attr.ib() |
243 | 244 |
244 @interfaceutil.implementer(repository.irevisiondelta) | 245 @interfaceutil.implementer(repository.irevisiondelta) |
245 @attr.s(slots=True, frozen=True) | 246 @attr.s(slots=True) |
246 class revlogrevisiondelta(object): | 247 class revlogrevisiondelta(object): |
247 node = attr.ib() | 248 node = attr.ib() |
248 p1node = attr.ib() | 249 p1node = attr.ib() |
249 p2node = attr.ib() | 250 p2node = attr.ib() |
250 basenode = attr.ib() | 251 basenode = attr.ib() |
251 linknode = attr.ib() | |
252 flags = attr.ib() | 252 flags = attr.ib() |
253 baserevisionsize = attr.ib() | 253 baserevisionsize = attr.ib() |
254 revision = attr.ib() | 254 revision = attr.ib() |
255 delta = attr.ib() | 255 delta = attr.ib() |
256 linknode = attr.ib(default=None) | |
256 | 257 |
257 @interfaceutil.implementer(repository.iverifyproblem) | 258 @interfaceutil.implementer(repository.iverifyproblem) |
258 @attr.s(frozen=True) | 259 @attr.s(frozen=True) |
259 class revlogproblem(object): | 260 class revlogproblem(object): |
260 warning = attr.ib(default=None) | 261 warning = attr.ib(default=None) |
2372 revision=revision, | 2373 revision=revision, |
2373 delta=delta) | 2374 delta=delta) |
2374 | 2375 |
2375 prevrev = rev | 2376 prevrev = rev |
2376 | 2377 |
2378 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False, | |
2379 assumehaveparentrevisions=False, deltaprevious=False): | |
2380 if nodesorder not in ('nodes', 'storage', None): | |
2381 raise error.ProgrammingError('unhandled value for nodesorder: %s' % | |
2382 nodesorder) | |
2383 | |
2384 if nodesorder is None and not self._generaldelta: | |
2385 nodesorder = 'storage' | |
2386 | |
2387 frev = self.rev | |
2388 fnode = self.node | |
2389 | |
2390 if nodesorder == 'nodes': | |
2391 revs = [frev(n) for n in nodes] | |
2392 elif nodesorder == 'storage': | |
2393 revs = sorted(frev(n) for n in nodes) | |
2394 else: | |
2395 assert self._generaldelta | |
2396 revs = set(frev(n) for n in nodes) | |
2397 revs = dagop.linearize(revs, self.parentrevs) | |
2398 | |
2399 prevrev = None | |
2400 | |
2401 if deltaprevious or assumehaveparentrevisions: | |
2402 prevrev = self.parentrevs(revs[0])[0] | |
2403 | |
2404 # Set of revs available to delta against. | |
2405 available = set() | |
2406 | |
2407 for rev in revs: | |
2408 if rev == nullrev: | |
2409 continue | |
2410 | |
2411 node = fnode(rev) | |
2412 deltaparentrev = self.deltaparent(rev) | |
2413 p1rev, p2rev = self.parentrevs(rev) | |
2414 | |
2415 # Forced delta against previous mode. | |
2416 if deltaprevious: | |
2417 baserev = prevrev | |
2418 | |
2419 # Revlog is configured to use full snapshots. Stick to that. | |
2420 elif not self._storedeltachains: | |
2421 baserev = nullrev | |
2422 | |
2423 # There is a delta in storage. We try to use that because it | |
2424 # amounts to effectively copying data from storage and is | |
2425 # therefore the fastest. | |
2426 elif deltaparentrev != nullrev: | |
2427 # Base revision was already emitted in this group. We can | |
2428 # always safely use the delta. | |
2429 if deltaparentrev in available: | |
2430 baserev = deltaparentrev | |
2431 | |
2432 # Base revision is a parent that hasn't been emitted already. | |
2433 # Use it if we can assume the receiver has the parent revision. | |
2434 elif (assumehaveparentrevisions | |
2435 and deltaparentrev in (p1rev, p2rev)): | |
2436 baserev = deltaparentrev | |
2437 | |
2438 # No guarantee the receiver has the delta parent. Send delta | |
2439 # against last revision (if possible), which in the common case | |
2440 # should be similar enough to this revision that the delta is | |
2441 # reasonable. | |
2442 elif prevrev is not None: | |
2443 baserev = prevrev | |
2444 else: | |
2445 baserev = nullrev | |
2446 | |
2447 # Storage has a fulltext revision. | |
2448 | |
2449 # Let's use the previous revision, which is as good a guess as any. | |
2450 # There is definitely room to improve this logic. | |
2451 elif prevrev is not None: | |
2452 baserev = prevrev | |
2453 else: | |
2454 baserev = nullrev | |
2455 | |
2456 # But we can't actually use our chosen delta base for whatever | |
2457 # reason. Reset to fulltext. | |
2458 if baserev != nullrev and not self.candelta(baserev, rev): | |
2459 baserev = nullrev | |
2460 | |
2461 revision = None | |
2462 delta = None | |
2463 baserevisionsize = None | |
2464 | |
2465 if revisiondata: | |
2466 if self.iscensored(baserev) or self.iscensored(rev): | |
2467 try: | |
2468 revision = self.revision(node, raw=True) | |
2469 except error.CensoredNodeError as e: | |
2470 revision = e.tombstone | |
2471 | |
2472 if baserev != nullrev: | |
2473 baserevisionsize = self.rawsize(baserev) | |
2474 | |
2475 elif baserev == nullrev and not deltaprevious: | |
2476 revision = self.revision(node, raw=True) | |
2477 available.add(rev) | |
2478 else: | |
2479 delta = self.revdiff(baserev, rev) | |
2480 available.add(rev) | |
2481 | |
2482 yield revlogrevisiondelta( | |
2483 node=node, | |
2484 p1node=fnode(p1rev), | |
2485 p2node=fnode(p2rev), | |
2486 basenode=fnode(baserev), | |
2487 flags=self.flags(rev), | |
2488 baserevisionsize=baserevisionsize, | |
2489 revision=revision, | |
2490 delta=delta) | |
2491 | |
2492 prevrev = rev | |
2493 | |
2377 DELTAREUSEALWAYS = 'always' | 2494 DELTAREUSEALWAYS = 'always' |
2378 DELTAREUSESAMEREVS = 'samerevs' | 2495 DELTAREUSESAMEREVS = 'samerevs' |
2379 DELTAREUSENEVER = 'never' | 2496 DELTAREUSENEVER = 'never' |
2380 | 2497 |
2381 DELTAREUSEFULLADD = 'fulladd' | 2498 DELTAREUSEFULLADD = 'fulladd' |