Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlogutils/deltas.py @ 51345:02cc19f4f091
delta-find: move pre-filtering of candidates in its own function
This organise the code further and open the way to specialization via
sub-classing. Something important for the coming changes.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 23 Nov 2023 04:21:07 +0100 |
parents | d0d869fccd20 |
children | 898c212e1b2f |
comparison
equal
deleted
inserted
replaced
51344:d0d869fccd20 | 51345:02cc19f4f091 |
---|---|
673 # before general delta, there is only one possible delta base | 673 # before general delta, there is only one possible delta base |
674 yield (self.target_rev - 1,) | 674 yield (self.target_rev - 1,) |
675 yield None | 675 yield None |
676 return | 676 return |
677 | 677 |
678 deltalength = self.revlog.length | |
679 deltaparent = self.revlog.deltaparent | |
680 sparse = self.revlog.delta_config.sparse_revlog | |
681 good = None | 678 good = None |
682 | 679 |
683 deltas_limit = self.textlen * LIMIT_DELTA2TEXT | |
684 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size | 680 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size |
685 | 681 |
686 tested = self.tested # prefetch for speed and code compactness | 682 tested = self.tested # prefetch for speed and code compactness |
687 candidates = self._refined_groups() | 683 candidates = self._refined_groups() |
688 while True: | 684 while True: |
689 temptative = candidates.send(good) | 685 temptative = candidates.send(good) |
690 if temptative is None: | 686 if temptative is None: |
691 break | 687 break |
692 group = [] | 688 group = self._pre_filter_candidate_revs(temptative) |
693 for rev in temptative: | |
694 # skip over empty delta (no need to include them in a chain) | |
695 while not (rev == nullrev or rev in tested or deltalength(rev)): | |
696 tested.add(rev) | |
697 rev = deltaparent(rev) | |
698 # no need to try a delta against nullrev, this will be done as | |
699 # a last resort. | |
700 if rev == nullrev: | |
701 continue | |
702 # filter out revision we tested already | |
703 if rev in tested: | |
704 continue | |
705 | |
706 # an higher authority deamed the base unworthy (e.g. censored) | |
707 if ( | |
708 self.excluded_bases is not None | |
709 and rev in self.excluded_bases | |
710 ): | |
711 tested.add(rev) | |
712 continue | |
713 # We are in some recomputation cases and that rev is too high | |
714 # in the revlog | |
715 if self.target_rev is not None and rev >= self.target_rev: | |
716 tested.add(rev) | |
717 continue | |
718 # filter out delta base that will never produce good delta | |
719 # | |
720 # if the delta of that base is already bigger than the limit | |
721 # for the delta chain size, doing a delta is hopeless. | |
722 if deltas_limit < self.revlog.length(rev): | |
723 tested.add(rev) | |
724 continue | |
725 if sparse and self.revlog.rawsize(rev) < ( | |
726 self.textlen // LIMIT_BASE2TEXT | |
727 ): | |
728 tested.add(rev) | |
729 continue | |
730 # no delta for rawtext-changing revs (see "candelta" for why) | |
731 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: | |
732 tested.add(rev) | |
733 continue | |
734 | |
735 # If we reach here, we are about to build and test a delta. | |
736 # The delta building process will compute the chaininfo in all | |
737 # case, since that computation is cached, it is fine to access | |
738 # it here too. | |
739 chainlen, chainsize = self.revlog._chaininfo(rev) | |
740 # if chain will be too long, skip base | |
741 if ( | |
742 self.revlog.delta_config.max_chain_len | |
743 and chainlen >= self.revlog.delta_config.max_chain_len | |
744 ): | |
745 tested.add(rev) | |
746 continue | |
747 # if chain already have too much data, skip base | |
748 if deltas_limit < chainsize: | |
749 tested.add(rev) | |
750 continue | |
751 if ( | |
752 sparse | |
753 and self.revlog.delta_config.upper_bound_comp is not None | |
754 ): | |
755 maxcomp = self.revlog.delta_config.upper_bound_comp | |
756 basenotsnap = (self.p1, self.p2, nullrev) | |
757 if rev not in basenotsnap and self.revlog.issnapshot(rev): | |
758 snapshotdepth = self.revlog.snapshotdepth(rev) | |
759 # If text is significantly larger than the base, we can | |
760 # expect the resulting delta to be proportional to the size | |
761 # difference | |
762 revsize = self.revlog.rawsize(rev) | |
763 rawsizedistance = max(self.textlen - revsize, 0) | |
764 # use an estimate of the compression upper bound. | |
765 lowestrealisticdeltalen = rawsizedistance // maxcomp | |
766 | |
767 # check the absolute constraint on the delta size | |
768 snapshotlimit = self.textlen >> snapshotdepth | |
769 if snapshotlimit < lowestrealisticdeltalen: | |
770 # delta lower bound is larger than accepted upper | |
771 # bound | |
772 tested.add(rev) | |
773 continue | |
774 | |
775 # check the relative constraint on the delta size | |
776 revlength = self.revlog.length(rev) | |
777 if revlength < lowestrealisticdeltalen: | |
778 # delta probable lower bound is larger than target | |
779 # base | |
780 tested.add(rev) | |
781 continue | |
782 | |
783 group.append(rev) | |
784 if group: | 689 if group: |
785 # When the size of the candidate group is big, it can result in | 690 # When the size of the candidate group is big, it can result in |
786 # a quite significant performance impact. To reduce this, we | 691 # a quite significant performance impact. To reduce this, we |
787 # can send them in smaller batches until the new batch does not | 692 # can send them in smaller batches until the new batch does not |
788 # provide any improvements. | 693 # provide any improvements. |
806 good = yield tuple(sub_group) | 711 good = yield tuple(sub_group) |
807 if prev_good == good: | 712 if prev_good == good: |
808 break | 713 break |
809 | 714 |
810 yield None | 715 yield None |
716 | |
717 def _pre_filter_candidate_revs(self, temptative): | |
718 """filter possible candidate before computing a delta | |
719 | |
720 This function use various criteria to pre-filter candidate delta base | |
721 before we compute a delta and evaluate its quality. | |
722 | |
723 Such pre-filter limit the number of computed delta, an expensive operation. | |
724 | |
725 return the updated list of revision to test | |
726 """ | |
727 deltalength = self.revlog.length | |
728 deltaparent = self.revlog.deltaparent | |
729 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT | |
730 | |
731 sparse = self.revlog.delta_config.sparse_revlog | |
732 tested = self.tested | |
733 group = [] | |
734 for rev in temptative: | |
735 # skip over empty delta (no need to include them in a chain) | |
736 while not (rev == nullrev or rev in tested or deltalength(rev)): | |
737 tested.add(rev) | |
738 rev = deltaparent(rev) | |
739 # no need to try a delta against nullrev, this will be done as | |
740 # a last resort. | |
741 if rev == nullrev: | |
742 continue | |
743 # filter out revision we tested already | |
744 if rev in tested: | |
745 continue | |
746 | |
747 # an higher authority deamed the base unworthy (e.g. censored) | |
748 if self.excluded_bases is not None and rev in self.excluded_bases: | |
749 tested.add(rev) | |
750 continue | |
751 # We are in some recomputation cases and that rev is too high | |
752 # in the revlog | |
753 if self.target_rev is not None and rev >= self.target_rev: | |
754 tested.add(rev) | |
755 continue | |
756 # filter out delta base that will never produce good delta | |
757 # | |
758 # if the delta of that base is already bigger than the limit | |
759 # for the delta chain size, doing a delta is hopeless. | |
760 if deltas_limit < self.revlog.length(rev): | |
761 tested.add(rev) | |
762 continue | |
763 | |
764 # if the revision we test again is too small, the resulting delta | |
765 # will be large anyway as that amount of data to be added is big | |
766 if sparse and self.revlog.rawsize(rev) < ( | |
767 self.textlen // LIMIT_BASE2TEXT | |
768 ): | |
769 tested.add(rev) | |
770 continue | |
771 | |
772 # no delta for rawtext-changing revs (see "candelta" for why) | |
773 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: | |
774 tested.add(rev) | |
775 continue | |
776 | |
777 # If we reach here, we are about to build and test a delta. | |
778 # The delta building process will compute the chaininfo in all | |
779 # case, since that computation is cached, it is fine to access | |
780 # it here too. | |
781 chainlen, chainsize = self.revlog._chaininfo(rev) | |
782 # if chain will be too long, skip base | |
783 if ( | |
784 self.revlog.delta_config.max_chain_len | |
785 and chainlen >= self.revlog.delta_config.max_chain_len | |
786 ): | |
787 tested.add(rev) | |
788 continue | |
789 # if chain already have too much data, skip base | |
790 if deltas_limit < chainsize: | |
791 tested.add(rev) | |
792 continue | |
793 if sparse and self.revlog.delta_config.upper_bound_comp is not None: | |
794 maxcomp = self.revlog.delta_config.upper_bound_comp | |
795 basenotsnap = (self.p1, self.p2, nullrev) | |
796 if rev not in basenotsnap and self.revlog.issnapshot(rev): | |
797 snapshotdepth = self.revlog.snapshotdepth(rev) | |
798 # If text is significantly larger than the base, we can | |
799 # expect the resulting delta to be proportional to the size | |
800 # difference | |
801 revsize = self.revlog.rawsize(rev) | |
802 rawsizedistance = max(self.textlen - revsize, 0) | |
803 # use an estimate of the compression upper bound. | |
804 lowestrealisticdeltalen = rawsizedistance // maxcomp | |
805 | |
806 # check the absolute constraint on the delta size | |
807 snapshotlimit = self.textlen >> snapshotdepth | |
808 if snapshotlimit < lowestrealisticdeltalen: | |
809 # delta lower bound is larger than accepted upper | |
810 # bound | |
811 tested.add(rev) | |
812 continue | |
813 | |
814 # check the relative constraint on the delta size | |
815 revlength = self.revlog.length(rev) | |
816 if revlength < lowestrealisticdeltalen: | |
817 # delta probable lower bound is larger than target | |
818 # base | |
819 tested.add(rev) | |
820 continue | |
821 | |
822 group.append(rev) | |
823 return group | |
811 | 824 |
812 def _refined_groups(self): | 825 def _refined_groups(self): |
813 good = None | 826 good = None |
814 # First we try to reuse a the delta contained in the bundle. (or from | 827 # First we try to reuse a the delta contained in the bundle. (or from |
815 # the source revlog) | 828 # the source revlog) |