comparison mercurial/revlogutils/deltas.py @ 51356:a224ce5694b3

delta-find: move sparse-revlog pre-filtering in the associated class Lets move the specialized code in the specialized class.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sun, 07 Jan 2024 05:20:00 +0100
parents 383e99f6bc99
children 99869dcf3ba0
comparison
equal deleted inserted replaced
51355:383e99f6bc99 51356:a224ce5694b3
936 # no delta for rawtext-changing revs (see "candelta" for why) 936 # no delta for rawtext-changing revs (see "candelta" for why)
937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: 937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 return False 938 return False
939 return True 939 return True
940 940
941 def _pre_filter_rev_sparse(self, rev):
942 """pre filtering that is needed in sparse revlog cases
943
944 return True if it seems okay to test a rev, False otherwise.
945
946 used by _pre_filter_rev.
947 """
948 assert self.revlog.delta_config.sparse_revlog
949 # if the revision we test again is too small, the resulting delta
950 # will be large anyway as that amount of data to be added is big
951 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
952 return False
953
954 if self.revlog.delta_config.upper_bound_comp is not None:
955 maxcomp = self.revlog.delta_config.upper_bound_comp
956 basenotsnap = (self.p1, self.p2, nullrev)
957 if rev not in basenotsnap and self.revlog.issnapshot(rev):
958 snapshotdepth = self.revlog.snapshotdepth(rev)
959 # If text is significantly larger than the base, we can
960 # expect the resulting delta to be proportional to the size
961 # difference
962 revsize = self.revlog.rawsize(rev)
963 rawsizedistance = max(self.textlen - revsize, 0)
964 # use an estimate of the compression upper bound.
965 lowestrealisticdeltalen = rawsizedistance // maxcomp
966
967 # check the absolute constraint on the delta size
968 snapshotlimit = self.textlen >> snapshotdepth
969 if snapshotlimit < lowestrealisticdeltalen:
970 # delta lower bound is larger than accepted upper
971 # bound
972 return False
973
974 # check the relative constraint on the delta size
975 revlength = self.revlog.length(rev)
976 if revlength < lowestrealisticdeltalen:
977 # delta probable lower bound is larger than target
978 # base
979 return False
980 return True
981
982 def _pre_filter_rev_delta_chain(self, rev): 941 def _pre_filter_rev_delta_chain(self, rev):
983 """pre filtering that is needed in sparse revlog cases 942 """pre filtering that is needed in sparse revlog cases
984 943
985 return True if it seems okay to test a rev, False otherwise. 944 return True if it seems okay to test a rev, False otherwise.
986 945
1014 """return True if it seems okay to test a rev, False otherwise""" 973 """return True if it seems okay to test a rev, False otherwise"""
1015 if not self._pre_filter_rev_universal(rev): 974 if not self._pre_filter_rev_universal(rev):
1016 return False 975 return False
1017 if not self._pre_filter_rev_delta_chain(rev): 976 if not self._pre_filter_rev_delta_chain(rev):
1018 return False 977 return False
1019 if self.revlog.delta_config.sparse_revlog:
1020 if not self._pre_filter_rev_sparse(rev):
1021 return False
1022
1023 return True 978 return True
1024 979
1025 def _iter_parents(self): 980 def _iter_parents(self):
1026 # exclude already lazy tested base if any 981 # exclude already lazy tested base if any
1027 parents = [p for p in (self.p1, self.p2) if p != nullrev] 982 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1100 # bad delta if new intermediate snapshot is larger than the previous 1055 # bad delta if new intermediate snapshot is larger than the previous
1101 # snapshot 1056 # snapshot
1102 if self.revlog.length(deltainfo.base) < deltainfo.deltalen: 1057 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1103 return False 1058 return False
1104 1059
1060 return True
1061
1062 def _pre_filter_rev(self, rev):
1063 """return True if it seems okay to test a rev, False otherwise"""
1064 if not self._pre_filter_rev_universal(rev):
1065 return False
1066 if not self._pre_filter_rev_delta_chain(rev):
1067 return False
1068 if not self._pre_filter_rev_sparse(rev):
1069 return False
1070 return True
1071
1072 def _pre_filter_rev_sparse(self, rev):
1073 """pre filtering that is needed in sparse revlog cases
1074
1075 return True if it seems okay to test a rev, False otherwise.
1076
1077 used by _pre_filter_rev.
1078 """
1079 assert self.revlog.delta_config.sparse_revlog
1080 # if the revision we test again is too small, the resulting delta
1081 # will be large anyway as that amount of data to be added is big
1082 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 return False
1084
1085 if self.revlog.delta_config.upper_bound_comp is not None:
1086 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 basenotsnap = (self.p1, self.p2, nullrev)
1088 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 # If text is significantly larger than the base, we can
1091 # expect the resulting delta to be proportional to the size
1092 # difference
1093 revsize = self.revlog.rawsize(rev)
1094 rawsizedistance = max(self.textlen - revsize, 0)
1095 # use an estimate of the compression upper bound.
1096 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097
1098 # check the absolute constraint on the delta size
1099 snapshotlimit = self.textlen >> snapshotdepth
1100 if snapshotlimit < lowestrealisticdeltalen:
1101 # delta lower bound is larger than accepted upper
1102 # bound
1103 return False
1104
1105 # check the relative constraint on the delta size
1106 revlength = self.revlog.length(rev)
1107 if revlength < lowestrealisticdeltalen:
1108 # delta probable lower bound is larger than target
1109 # base
1110 return False
1105 return True 1111 return True
1106 1112
1107 def _iter_snapshots_base(self): 1113 def _iter_snapshots_base(self):
1108 assert self.revlog.delta_config.sparse_revlog 1114 assert self.revlog.delta_config.sparse_revlog
1109 assert self.current_stage == _STAGE_SNAPSHOT 1115 assert self.current_stage == _STAGE_SNAPSHOT