Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/utils/storageutil.py @ 49786:e92de86cf4f8
emitrevision: consider ancestors revision to emit as available base
This should make more delta base valid. This notably affects:
* case where we skipped some parent with empty delta to directly delta against
an ancestors
* case where an intermediate snapshots is stored.
This change means we could sent largish intermediate snapshots over the wire.
However this is actually a sub goal here. Sending snapshots over the wire means
the client have a high odd of simply storing the pre-computed delta instead of
doing a lengthy process that will? end up doing the same intermediate snapshot.
In addition the overall size of snapshot (or any level) is "only" some or the
overall delta size. (0.17% for my mercurial clone, 20% for my clone of Mozilla
try). So Sending them other the wire is unlikely to change large impact on the
bandwidth used.
If we decide that minimising the bandwidth is an explicit goal, we should
introduce new logic to filter-out snapshot as delta. The current code has no
notion explicite of snapshot so far, they just tended to fall into the wobbly
filtering options.
In some cases, this patch can yield large improvement to the bundling time:
### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog
# benchmark.name = perf-bundle
# benchmark.variants.revs = last-100000
before: 68.787066 seconds
after: 47.552677 seconds (-30.87%)
That translate to large improvement to the pull time :
### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog
# benchmark.name = pull
# benchmark.variants.issue6528 = disabled
# benchmark.variants.revs = last-100000
before: 142.186625 seconds
after: 75.897745 seconds (-46.62%)
No significant negative impact have been observed.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sat, 12 Nov 2022 00:18:41 +0100 |
parents | 2fd8750f3722 |
children | e1953a34c110 |
comparison
equal
deleted
inserted
replaced
49785:92c65bd0c4d6 | 49786:e92de86cf4f8 |
---|---|
377 process (if present, see config: debug.bundling.stats. | 377 process (if present, see config: debug.bundling.stats. |
378 """ | 378 """ |
379 | 379 |
380 fnode = store.node | 380 fnode = store.node |
381 frev = store.rev | 381 frev = store.rev |
382 parents = store.parentrevs | |
382 | 383 |
383 if nodesorder == b'nodes': | 384 if nodesorder == b'nodes': |
384 revs = [frev(n) for n in nodes] | 385 revs = [frev(n) for n in nodes] |
385 elif nodesorder == b'linear': | 386 elif nodesorder == b'linear': |
386 revs = {frev(n) for n in nodes} | 387 revs = {frev(n) for n in nodes} |
389 revs = sorted(frev(n) for n in nodes) | 390 revs = sorted(frev(n) for n in nodes) |
390 | 391 |
391 prevrev = None | 392 prevrev = None |
392 | 393 |
393 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions: | 394 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions: |
394 prevrev = store.parentrevs(revs[0])[0] | 395 prevrev = parents(revs[0])[0] |
395 | 396 |
396 # Set of revs available to delta against. | 397 # Sets of revs available to delta against. |
398 emitted = set() | |
397 available = set() | 399 available = set() |
398 parents = [] | 400 if assumehaveparentrevisions: |
401 common_heads = set(p for r in revs for p in parents(r)) | |
402 common_heads.difference_update(revs) | |
403 available = store.ancestors(common_heads, inclusive=True) | |
399 | 404 |
400 def is_usable_base(rev): | 405 def is_usable_base(rev): |
401 """Is a delta against this revision usable over the wire""" | 406 """Is a delta against this revision usable over the wire""" |
402 if rev == nullrev: | 407 if rev == nullrev: |
403 return False | 408 return False |
404 # Base revision was already emitted in this group. | 409 return rev in emitted or rev in available |
405 if rev in available: | |
406 return True | |
407 # Base revision is a parent that hasn't been emitted already. | |
408 if assumehaveparentrevisions and rev in parents: | |
409 return True | |
410 return False | |
411 | 410 |
412 for rev in revs: | 411 for rev in revs: |
413 if rev == nullrev: | 412 if rev == nullrev: |
414 continue | 413 continue |
415 | 414 |
416 debug_delta_source = None | 415 debug_delta_source = None |
417 if debug_info is not None: | 416 if debug_info is not None: |
418 debug_info['revision-total'] += 1 | 417 debug_info['revision-total'] += 1 |
419 | 418 |
420 node = fnode(rev) | 419 node = fnode(rev) |
421 parents[:] = p1rev, p2rev = store.parentrevs(rev) | 420 p1rev, p2rev = parents(rev) |
422 | 421 |
423 if debug_info is not None: | 422 if debug_info is not None: |
424 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev: | 423 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev: |
425 debug_info['merge-total'] += 1 | 424 debug_info['merge-total'] += 1 |
426 | 425 |
529 ): | 528 ): |
530 if debug_info is not None: | 529 if debug_info is not None: |
531 debug_info['computed-delta'] += 1 # close enough | 530 debug_info['computed-delta'] += 1 # close enough |
532 debug_info['delta-full'] += 1 | 531 debug_info['delta-full'] += 1 |
533 revision = store.rawdata(node) | 532 revision = store.rawdata(node) |
534 available.add(rev) | 533 emitted.add(rev) |
535 else: | 534 else: |
536 if revdifffn: | 535 if revdifffn: |
537 if debug_info is not None: | 536 if debug_info is not None: |
538 if debug_delta_source == "full": | 537 if debug_delta_source == "full": |
539 debug_info['computed-delta'] += 1 | 538 debug_info['computed-delta'] += 1 |
569 assert False, 'unreachable' | 568 assert False, 'unreachable' |
570 delta = mdiff.textdiff( | 569 delta = mdiff.textdiff( |
571 store.rawdata(baserev), store.rawdata(rev) | 570 store.rawdata(baserev), store.rawdata(rev) |
572 ) | 571 ) |
573 | 572 |
574 available.add(rev) | 573 emitted.add(rev) |
575 | 574 |
576 serialized_sidedata = None | 575 serialized_sidedata = None |
577 sidedata_flags = (0, 0) | 576 sidedata_flags = (0, 0) |
578 if sidedata_helpers: | 577 if sidedata_helpers: |
579 try: | 578 try: |