comparison mercurial/revlogutils/deltas.py @ 39336:1c6ff52fe9cf

revlogdeltas: split candidate groups selection from the filtering logic The group iteration has two main components: * walking candidates, the logic that we are about to extend to build intermediate snapshots, * Making sure we test diffs against interesting bases. No duplicated tests, skipping empty revisions, etc. We split `_candidategroups` to separate the two components. This achieves two goals: * It will be simpler to update the walking logic for intermediate snapshots, * We can gather the filtering logic from `finddeltainfo` into `_candidategroups` to centralize it.
author Boris Feld <boris.feld@octobus.net>
date Wed, 29 Aug 2018 09:55:11 -0700
parents 1441eb38849f
children 37957e07138c
comparison
equal deleted inserted replaced
39335:1441eb38849f 39336:1c6ff52fe9cf
567 return False 567 return False
568 568
569 return True 569 return True
570 570
571 def _candidategroups(revlog, p1, p2, cachedelta): 571 def _candidategroups(revlog, p1, p2, cachedelta):
572 """Provides group of revision to be tested as delta base
573
574 This top level function focus on emitting groups with unique and worthwhile
575 content. See _raw_candidate_groups for details about the group order.
572 """ 576 """
573 Provides revisions that present an interest to be diffed against, 577 # should we try to build a delta?
574 grouped by level of easiness. 578 if not (len(revlog) and revlog._storedeltachains):
579 return
580
581 tested = set([nullrev])
582 for group in _rawgroups(revlog, p1, p2, cachedelta):
583 group = tuple(r for r in group if r not in tested)
584 tested.update(group)
585 if group:
586 yield group
587
588 def _rawgroups(revlog, p1, p2, cachedelta):
589 """Provides group of revision to be tested as delta base
590
591 This lower level function focus on emitting delta theorically interresting
592 without looking it any practical details.
593
594 The group order aims at providing fast or small candidates first.
575 """ 595 """
576 gdelta = revlog._generaldelta 596 gdelta = revlog._generaldelta
577 curr = len(revlog) 597 curr = len(revlog)
578 prev = curr - 1 598 prev = curr - 1
579 599
588 # Assume what we received from the server is a good choice 608 # Assume what we received from the server is a good choice
589 # build delta will reuse the cache 609 # build delta will reuse the cache
590 yield (cachedelta[0],) 610 yield (cachedelta[0],)
591 tested.add(cachedelta[0]) 611 tested.add(cachedelta[0])
592 612
593 if gdelta: 613 # This condition is true most of the time when processing
594 # exclude already lazy tested base if any 614 # changegroup data into a generaldelta repo. The only time it
595 parents = [p for p in (p1, p2) 615 # isn't true is if this is the first revision in a delta chain
596 if p != nullrev and p not in tested] 616 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
597 617 if cachedelta and gdelta and revlog._lazydeltabase:
598 if not revlog._deltabothparents and len(parents) == 2: 618 # Assume what we received from the server is a good choice
599 parents.sort() 619 # build delta will reuse the cache
600 # To minimize the chance of having to build a fulltext, 620 yield (cachedelta[0],)
601 # pick first whichever parent is closest to us (max rev) 621
602 yield (parents[1],) 622 if gdelta:
603 # then the other one (min rev) if the first did not fit 623 # exclude already lazy tested base if any
604 yield (parents[0],) 624 parents = [p for p in (p1, p2) if p != nullrev]
605 tested.update(parents) 625
606 elif len(parents) > 0: 626 if not revlog._deltabothparents and len(parents) == 2:
607 # Test all parents (1 or 2), and keep the best candidate 627 parents.sort()
608 yield parents 628 # To minimize the chance of having to build a fulltext,
609 tested.update(parents) 629 # pick first whichever parent is closest to us (max rev)
610 630 yield (parents[1],)
611 if prev not in tested: 631 # then the other one (min rev) if the first did not fit
612 # other approach failed try against prev to hopefully save us a 632 yield (parents[0],)
613 # fulltext. 633 elif len(parents) > 0:
614 yield (prev,) 634 # Test all parents (1 or 2), and keep the best candidate
615 tested.add(prev) 635 yield parents
636
637 # other approach failed try against prev to hopefully save us a
638 # fulltext.
639 yield (prev,)
616 640
617 class deltacomputer(object): 641 class deltacomputer(object):
618 def __init__(self, revlog): 642 def __init__(self, revlog):
619 self.revlog = revlog 643 self.revlog = revlog
620 644