mercurial/revlogutils/deltas.py
changeset 51332 02cc19f4f091
parent 51331 d0d869fccd20
child 51333 898c212e1b2f
equal deleted inserted replaced
51331:d0d869fccd20 51332:02cc19f4f091
   673             # before general delta, there is only one possible delta base
   673             # before general delta, there is only one possible delta base
   674             yield (self.target_rev - 1,)
   674             yield (self.target_rev - 1,)
   675             yield None
   675             yield None
   676             return
   676             return
   677 
   677 
   678         deltalength = self.revlog.length
       
   679         deltaparent = self.revlog.deltaparent
       
   680         sparse = self.revlog.delta_config.sparse_revlog
       
   681         good = None
   678         good = None
   682 
   679 
   683         deltas_limit = self.textlen * LIMIT_DELTA2TEXT
       
   684         group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
   680         group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
   685 
   681 
   686         tested = self.tested  # prefetch for speed and code compactness
   682         tested = self.tested  # prefetch for speed and code compactness
   687         candidates = self._refined_groups()
   683         candidates = self._refined_groups()
   688         while True:
   684         while True:
   689             temptative = candidates.send(good)
   685             temptative = candidates.send(good)
   690             if temptative is None:
   686             if temptative is None:
   691                 break
   687                 break
   692             group = []
   688             group = self._pre_filter_candidate_revs(temptative)
   693             for rev in temptative:
       
   694                 # skip over empty delta (no need to include them in a chain)
       
   695                 while not (rev == nullrev or rev in tested or deltalength(rev)):
       
   696                     tested.add(rev)
       
   697                     rev = deltaparent(rev)
       
   698                 # no need to try a delta against nullrev, this will be done as
       
   699                 # a last resort.
       
   700                 if rev == nullrev:
       
   701                     continue
       
   702                 # filter out revision we tested already
       
   703                 if rev in tested:
       
   704                     continue
       
   705 
       
   706                 # an higher authority deamed the base unworthy (e.g. censored)
       
   707                 if (
       
   708                     self.excluded_bases is not None
       
   709                     and rev in self.excluded_bases
       
   710                 ):
       
   711                     tested.add(rev)
       
   712                     continue
       
   713                 # We are in some recomputation cases and that rev is too high
       
   714                 # in the revlog
       
   715                 if self.target_rev is not None and rev >= self.target_rev:
       
   716                     tested.add(rev)
       
   717                     continue
       
   718                 # filter out delta base that will never produce good delta
       
   719                 #
       
   720                 # if the delta of that base is already bigger than the limit
       
   721                 # for the delta chain size, doing a delta is hopeless.
       
   722                 if deltas_limit < self.revlog.length(rev):
       
   723                     tested.add(rev)
       
   724                     continue
       
   725                 if sparse and self.revlog.rawsize(rev) < (
       
   726                     self.textlen // LIMIT_BASE2TEXT
       
   727                 ):
       
   728                     tested.add(rev)
       
   729                     continue
       
   730                 # no delta for rawtext-changing revs (see "candelta" for why)
       
   731                 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
       
   732                     tested.add(rev)
       
   733                     continue
       
   734 
       
   735                 # If we reach here, we are about to build and test a delta.
       
   736                 # The delta building process will compute the chaininfo in all
       
   737                 # case, since that computation is cached, it is fine to access
       
   738                 # it here too.
       
   739                 chainlen, chainsize = self.revlog._chaininfo(rev)
       
   740                 # if chain will be too long, skip base
       
   741                 if (
       
   742                     self.revlog.delta_config.max_chain_len
       
   743                     and chainlen >= self.revlog.delta_config.max_chain_len
       
   744                 ):
       
   745                     tested.add(rev)
       
   746                     continue
       
   747                 # if chain already have too much data, skip base
       
   748                 if deltas_limit < chainsize:
       
   749                     tested.add(rev)
       
   750                     continue
       
   751                 if (
       
   752                     sparse
       
   753                     and self.revlog.delta_config.upper_bound_comp is not None
       
   754                 ):
       
   755                     maxcomp = self.revlog.delta_config.upper_bound_comp
       
   756                     basenotsnap = (self.p1, self.p2, nullrev)
       
   757                     if rev not in basenotsnap and self.revlog.issnapshot(rev):
       
   758                         snapshotdepth = self.revlog.snapshotdepth(rev)
       
   759                         # If text is significantly larger than the base, we can
       
   760                         # expect the resulting delta to be proportional to the size
       
   761                         # difference
       
   762                         revsize = self.revlog.rawsize(rev)
       
   763                         rawsizedistance = max(self.textlen - revsize, 0)
       
   764                         # use an estimate of the compression upper bound.
       
   765                         lowestrealisticdeltalen = rawsizedistance // maxcomp
       
   766 
       
   767                         # check the absolute constraint on the delta size
       
   768                         snapshotlimit = self.textlen >> snapshotdepth
       
   769                         if snapshotlimit < lowestrealisticdeltalen:
       
   770                             # delta lower bound is larger than accepted upper
       
   771                             # bound
       
   772                             tested.add(rev)
       
   773                             continue
       
   774 
       
   775                         # check the relative constraint on the delta size
       
   776                         revlength = self.revlog.length(rev)
       
   777                         if revlength < lowestrealisticdeltalen:
       
   778                             # delta probable lower bound is larger than target
       
   779                             # base
       
   780                             tested.add(rev)
       
   781                             continue
       
   782 
       
   783                 group.append(rev)
       
   784             if group:
   689             if group:
   785                 # When the size of the candidate group is big, it can result in
   690                 # When the size of the candidate group is big, it can result in
   786                 # a quite significant performance impact. To reduce this, we
   691                 # a quite significant performance impact. To reduce this, we
   787                 # can send them in smaller batches until the new batch does not
   692                 # can send them in smaller batches until the new batch does not
   788                 # provide any improvements.
   693                 # provide any improvements.
   806                         good = yield tuple(sub_group)
   711                         good = yield tuple(sub_group)
   807                         if prev_good == good:
   712                         if prev_good == good:
   808                             break
   713                             break
   809 
   714 
   810         yield None
   715         yield None
       
   716 
       
   717     def _pre_filter_candidate_revs(self, temptative):
       
   718         """filter possible candidate before computing a delta
       
   719 
       
   720         This function use various criteria to pre-filter candidate delta base
       
   721         before we compute a delta and evaluate its quality.
       
   722 
       
   723         Such pre-filter limit the number of computed delta, an expensive operation.
       
   724 
       
   725         return the updated list of revision to test
       
   726         """
       
   727         deltalength = self.revlog.length
       
   728         deltaparent = self.revlog.deltaparent
       
   729         deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
       
   730 
       
   731         sparse = self.revlog.delta_config.sparse_revlog
       
   732         tested = self.tested
       
   733         group = []
       
   734         for rev in temptative:
       
   735             # skip over empty delta (no need to include them in a chain)
       
   736             while not (rev == nullrev or rev in tested or deltalength(rev)):
       
   737                 tested.add(rev)
       
   738                 rev = deltaparent(rev)
       
   739             # no need to try a delta against nullrev, this will be done as
       
   740             # a last resort.
       
   741             if rev == nullrev:
       
   742                 continue
       
   743             # filter out revision we tested already
       
   744             if rev in tested:
       
   745                 continue
       
   746 
       
   747             # an higher authority deamed the base unworthy (e.g. censored)
       
   748             if self.excluded_bases is not None and rev in self.excluded_bases:
       
   749                 tested.add(rev)
       
   750                 continue
       
   751             # We are in some recomputation cases and that rev is too high
       
   752             # in the revlog
       
   753             if self.target_rev is not None and rev >= self.target_rev:
       
   754                 tested.add(rev)
       
   755                 continue
       
   756             # filter out delta base that will never produce good delta
       
   757             #
       
   758             # if the delta of that base is already bigger than the limit
       
   759             # for the delta chain size, doing a delta is hopeless.
       
   760             if deltas_limit < self.revlog.length(rev):
       
   761                 tested.add(rev)
       
   762                 continue
       
   763 
       
   764             # if the revision we test again is too small, the resulting delta
       
   765             # will be large anyway as that amount of data to be added is big
       
   766             if sparse and self.revlog.rawsize(rev) < (
       
   767                 self.textlen // LIMIT_BASE2TEXT
       
   768             ):
       
   769                 tested.add(rev)
       
   770                 continue
       
   771 
       
   772             # no delta for rawtext-changing revs (see "candelta" for why)
       
   773             if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
       
   774                 tested.add(rev)
       
   775                 continue
       
   776 
       
   777             # If we reach here, we are about to build and test a delta.
       
   778             # The delta building process will compute the chaininfo in all
       
   779             # case, since that computation is cached, it is fine to access
       
   780             # it here too.
       
   781             chainlen, chainsize = self.revlog._chaininfo(rev)
       
   782             # if chain will be too long, skip base
       
   783             if (
       
   784                 self.revlog.delta_config.max_chain_len
       
   785                 and chainlen >= self.revlog.delta_config.max_chain_len
       
   786             ):
       
   787                 tested.add(rev)
       
   788                 continue
       
   789             # if chain already have too much data, skip base
       
   790             if deltas_limit < chainsize:
       
   791                 tested.add(rev)
       
   792                 continue
       
   793             if sparse and self.revlog.delta_config.upper_bound_comp is not None:
       
   794                 maxcomp = self.revlog.delta_config.upper_bound_comp
       
   795                 basenotsnap = (self.p1, self.p2, nullrev)
       
   796                 if rev not in basenotsnap and self.revlog.issnapshot(rev):
       
   797                     snapshotdepth = self.revlog.snapshotdepth(rev)
       
   798                     # If text is significantly larger than the base, we can
       
   799                     # expect the resulting delta to be proportional to the size
       
   800                     # difference
       
   801                     revsize = self.revlog.rawsize(rev)
       
   802                     rawsizedistance = max(self.textlen - revsize, 0)
       
   803                     # use an estimate of the compression upper bound.
       
   804                     lowestrealisticdeltalen = rawsizedistance // maxcomp
       
   805 
       
   806                     # check the absolute constraint on the delta size
       
   807                     snapshotlimit = self.textlen >> snapshotdepth
       
   808                     if snapshotlimit < lowestrealisticdeltalen:
       
   809                         # delta lower bound is larger than accepted upper
       
   810                         # bound
       
   811                         tested.add(rev)
       
   812                         continue
       
   813 
       
   814                     # check the relative constraint on the delta size
       
   815                     revlength = self.revlog.length(rev)
       
   816                     if revlength < lowestrealisticdeltalen:
       
   817                         # delta probable lower bound is larger than target
       
   818                         # base
       
   819                         tested.add(rev)
       
   820                         continue
       
   821 
       
   822             group.append(rev)
       
   823         return group
   811 
   824 
   812     def _refined_groups(self):
   825     def _refined_groups(self):
   813         good = None
   826         good = None
   814         # First we try to reuse a the delta contained in the bundle.  (or from
   827         # First we try to reuse a the delta contained in the bundle.  (or from
   815         # the source revlog)
   828         # the source revlog)