673 # before general delta, there is only one possible delta base |
673 # before general delta, there is only one possible delta base |
674 yield (self.target_rev - 1,) |
674 yield (self.target_rev - 1,) |
675 yield None |
675 yield None |
676 return |
676 return |
677 |
677 |
678 deltalength = self.revlog.length |
|
679 deltaparent = self.revlog.deltaparent |
|
680 sparse = self.revlog.delta_config.sparse_revlog |
|
681 good = None |
678 good = None |
682 |
679 |
683 deltas_limit = self.textlen * LIMIT_DELTA2TEXT |
|
684 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size |
680 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size |
685 |
681 |
686 tested = self.tested # prefetch for speed and code compactness |
682 tested = self.tested # prefetch for speed and code compactness |
687 candidates = self._refined_groups() |
683 candidates = self._refined_groups() |
688 while True: |
684 while True: |
689 temptative = candidates.send(good) |
685 temptative = candidates.send(good) |
690 if temptative is None: |
686 if temptative is None: |
691 break |
687 break |
692 group = [] |
688 group = self._pre_filter_candidate_revs(temptative) |
693 for rev in temptative: |
|
694 # skip over empty delta (no need to include them in a chain) |
|
695 while not (rev == nullrev or rev in tested or deltalength(rev)): |
|
696 tested.add(rev) |
|
697 rev = deltaparent(rev) |
|
698 # no need to try a delta against nullrev, this will be done as |
|
699 # a last resort. |
|
700 if rev == nullrev: |
|
701 continue |
|
702 # filter out revision we tested already |
|
703 if rev in tested: |
|
704 continue |
|
705 |
|
706 # an higher authority deamed the base unworthy (e.g. censored) |
|
707 if ( |
|
708 self.excluded_bases is not None |
|
709 and rev in self.excluded_bases |
|
710 ): |
|
711 tested.add(rev) |
|
712 continue |
|
713 # We are in some recomputation cases and that rev is too high |
|
714 # in the revlog |
|
715 if self.target_rev is not None and rev >= self.target_rev: |
|
716 tested.add(rev) |
|
717 continue |
|
718 # filter out delta base that will never produce good delta |
|
719 # |
|
720 # if the delta of that base is already bigger than the limit |
|
721 # for the delta chain size, doing a delta is hopeless. |
|
722 if deltas_limit < self.revlog.length(rev): |
|
723 tested.add(rev) |
|
724 continue |
|
725 if sparse and self.revlog.rawsize(rev) < ( |
|
726 self.textlen // LIMIT_BASE2TEXT |
|
727 ): |
|
728 tested.add(rev) |
|
729 continue |
|
730 # no delta for rawtext-changing revs (see "candelta" for why) |
|
731 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: |
|
732 tested.add(rev) |
|
733 continue |
|
734 |
|
735 # If we reach here, we are about to build and test a delta. |
|
736 # The delta building process will compute the chaininfo in all |
|
737 # case, since that computation is cached, it is fine to access |
|
738 # it here too. |
|
739 chainlen, chainsize = self.revlog._chaininfo(rev) |
|
740 # if chain will be too long, skip base |
|
741 if ( |
|
742 self.revlog.delta_config.max_chain_len |
|
743 and chainlen >= self.revlog.delta_config.max_chain_len |
|
744 ): |
|
745 tested.add(rev) |
|
746 continue |
|
747 # if chain already have too much data, skip base |
|
748 if deltas_limit < chainsize: |
|
749 tested.add(rev) |
|
750 continue |
|
751 if ( |
|
752 sparse |
|
753 and self.revlog.delta_config.upper_bound_comp is not None |
|
754 ): |
|
755 maxcomp = self.revlog.delta_config.upper_bound_comp |
|
756 basenotsnap = (self.p1, self.p2, nullrev) |
|
757 if rev not in basenotsnap and self.revlog.issnapshot(rev): |
|
758 snapshotdepth = self.revlog.snapshotdepth(rev) |
|
759 # If text is significantly larger than the base, we can |
|
760 # expect the resulting delta to be proportional to the size |
|
761 # difference |
|
762 revsize = self.revlog.rawsize(rev) |
|
763 rawsizedistance = max(self.textlen - revsize, 0) |
|
764 # use an estimate of the compression upper bound. |
|
765 lowestrealisticdeltalen = rawsizedistance // maxcomp |
|
766 |
|
767 # check the absolute constraint on the delta size |
|
768 snapshotlimit = self.textlen >> snapshotdepth |
|
769 if snapshotlimit < lowestrealisticdeltalen: |
|
770 # delta lower bound is larger than accepted upper |
|
771 # bound |
|
772 tested.add(rev) |
|
773 continue |
|
774 |
|
775 # check the relative constraint on the delta size |
|
776 revlength = self.revlog.length(rev) |
|
777 if revlength < lowestrealisticdeltalen: |
|
778 # delta probable lower bound is larger than target |
|
779 # base |
|
780 tested.add(rev) |
|
781 continue |
|
782 |
|
783 group.append(rev) |
|
784 if group: |
689 if group: |
785 # When the size of the candidate group is big, it can result in |
690 # When the size of the candidate group is big, it can result in |
786 # a quite significant performance impact. To reduce this, we |
691 # a quite significant performance impact. To reduce this, we |
787 # can send them in smaller batches until the new batch does not |
692 # can send them in smaller batches until the new batch does not |
788 # provide any improvements. |
693 # provide any improvements. |
806 good = yield tuple(sub_group) |
711 good = yield tuple(sub_group) |
807 if prev_good == good: |
712 if prev_good == good: |
808 break |
713 break |
809 |
714 |
810 yield None |
715 yield None |
|
716 |
|
717 def _pre_filter_candidate_revs(self, temptative): |
|
718 """filter possible candidate before computing a delta |
|
719 |
|
720 This function use various criteria to pre-filter candidate delta base |
|
721 before we compute a delta and evaluate its quality. |
|
722 |
|
723 Such pre-filter limit the number of computed delta, an expensive operation. |
|
724 |
|
725 return the updated list of revision to test |
|
726 """ |
|
727 deltalength = self.revlog.length |
|
728 deltaparent = self.revlog.deltaparent |
|
729 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT |
|
730 |
|
731 sparse = self.revlog.delta_config.sparse_revlog |
|
732 tested = self.tested |
|
733 group = [] |
|
734 for rev in temptative: |
|
735 # skip over empty delta (no need to include them in a chain) |
|
736 while not (rev == nullrev or rev in tested or deltalength(rev)): |
|
737 tested.add(rev) |
|
738 rev = deltaparent(rev) |
|
739 # no need to try a delta against nullrev, this will be done as |
|
740 # a last resort. |
|
741 if rev == nullrev: |
|
742 continue |
|
743 # filter out revision we tested already |
|
744 if rev in tested: |
|
745 continue |
|
746 |
|
747 # an higher authority deamed the base unworthy (e.g. censored) |
|
748 if self.excluded_bases is not None and rev in self.excluded_bases: |
|
749 tested.add(rev) |
|
750 continue |
|
751 # We are in some recomputation cases and that rev is too high |
|
752 # in the revlog |
|
753 if self.target_rev is not None and rev >= self.target_rev: |
|
754 tested.add(rev) |
|
755 continue |
|
756 # filter out delta base that will never produce good delta |
|
757 # |
|
758 # if the delta of that base is already bigger than the limit |
|
759 # for the delta chain size, doing a delta is hopeless. |
|
760 if deltas_limit < self.revlog.length(rev): |
|
761 tested.add(rev) |
|
762 continue |
|
763 |
|
764 # if the revision we test again is too small, the resulting delta |
|
765 # will be large anyway as that amount of data to be added is big |
|
766 if sparse and self.revlog.rawsize(rev) < ( |
|
767 self.textlen // LIMIT_BASE2TEXT |
|
768 ): |
|
769 tested.add(rev) |
|
770 continue |
|
771 |
|
772 # no delta for rawtext-changing revs (see "candelta" for why) |
|
773 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: |
|
774 tested.add(rev) |
|
775 continue |
|
776 |
|
777 # If we reach here, we are about to build and test a delta. |
|
778 # The delta building process will compute the chaininfo in all |
|
779 # case, since that computation is cached, it is fine to access |
|
780 # it here too. |
|
781 chainlen, chainsize = self.revlog._chaininfo(rev) |
|
782 # if chain will be too long, skip base |
|
783 if ( |
|
784 self.revlog.delta_config.max_chain_len |
|
785 and chainlen >= self.revlog.delta_config.max_chain_len |
|
786 ): |
|
787 tested.add(rev) |
|
788 continue |
|
789 # if chain already have too much data, skip base |
|
790 if deltas_limit < chainsize: |
|
791 tested.add(rev) |
|
792 continue |
|
793 if sparse and self.revlog.delta_config.upper_bound_comp is not None: |
|
794 maxcomp = self.revlog.delta_config.upper_bound_comp |
|
795 basenotsnap = (self.p1, self.p2, nullrev) |
|
796 if rev not in basenotsnap and self.revlog.issnapshot(rev): |
|
797 snapshotdepth = self.revlog.snapshotdepth(rev) |
|
798 # If text is significantly larger than the base, we can |
|
799 # expect the resulting delta to be proportional to the size |
|
800 # difference |
|
801 revsize = self.revlog.rawsize(rev) |
|
802 rawsizedistance = max(self.textlen - revsize, 0) |
|
803 # use an estimate of the compression upper bound. |
|
804 lowestrealisticdeltalen = rawsizedistance // maxcomp |
|
805 |
|
806 # check the absolute constraint on the delta size |
|
807 snapshotlimit = self.textlen >> snapshotdepth |
|
808 if snapshotlimit < lowestrealisticdeltalen: |
|
809 # delta lower bound is larger than accepted upper |
|
810 # bound |
|
811 tested.add(rev) |
|
812 continue |
|
813 |
|
814 # check the relative constraint on the delta size |
|
815 revlength = self.revlog.length(rev) |
|
816 if revlength < lowestrealisticdeltalen: |
|
817 # delta probable lower bound is larger than target |
|
818 # base |
|
819 tested.add(rev) |
|
820 continue |
|
821 |
|
822 group.append(rev) |
|
823 return group |
811 |
824 |
812 def _refined_groups(self): |
825 def _refined_groups(self): |
813 good = None |
826 good = None |
814 # First we try to reuse a the delta contained in the bundle. (or from |
827 # First we try to reuse a the delta contained in the bundle. (or from |
815 # the source revlog) |
828 # the source revlog) |