Mercurial > public > mercurial-scm > hg
comparison mercurial/revlogutils/deltas.py @ 51352:fac6038b11f5
delta-find: move the base of the delta search in its own function
That logic is complicated enough that is is worth puting in its own function. Another method will be introduced in the next changeset to deal with the actual refining.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 23 Nov 2023 22:29:02 +0100 |
parents | 94fe4474b053 |
children | 701caeabbee7 |
comparison
equal
deleted
inserted
replaced
51351:94fe4474b053 | 51352:fac6038b11f5 |
---|---|
1078 # other approach failed try against prev to hopefully save us a | 1078 # other approach failed try against prev to hopefully save us a |
1079 # fulltext. | 1079 # fulltext. |
1080 self.current_stage = _STAGE_PREV | 1080 self.current_stage = _STAGE_PREV |
1081 yield (self.target_rev - 1,) | 1081 yield (self.target_rev - 1,) |
1082 | 1082 |
1083 def _iter_snapshots_base(self): | |
1084 assert self.revlog.delta_config.sparse_revlog | |
1085 prev = self.target_rev - 1 | |
1086 deltachain = lambda rev: self.revlog._deltachain(rev)[0] | |
1087 | |
1088 parents = [p for p in (self.p1, self.p2) if p != nullrev] | |
1089 if not parents: | |
1090 return | |
1091 self.current_stage = _STAGE_SNAPSHOT | |
1092 # See if we can use an existing snapshot in the parent chains to | |
1093 # use as a base for a new intermediate-snapshot | |
1094 # | |
1095 # search for snapshot in parents delta chain map: snapshot-level: | |
1096 # snapshot-rev | |
1097 parents_snaps = collections.defaultdict(set) | |
1098 candidate_chains = [deltachain(p) for p in parents] | |
1099 for chain in candidate_chains: | |
1100 for idx, s in enumerate(chain): | |
1101 if not self.revlog.issnapshot(s): | |
1102 break | |
1103 parents_snaps[idx].add(s) | |
1104 snapfloor = min(parents_snaps[0]) + 1 | |
1105 self.snapshot_cache.update(self.revlog, snapfloor) | |
1106 # search for the highest "unrelated" revision | |
1107 # | |
1108 # Adding snapshots used by "unrelated" revision increase the odd we | |
1109 # reuse an independant, yet better snapshot chain. | |
1110 # | |
1111 # XXX instead of building a set of revisions, we could lazily | |
1112 # enumerate over the chains. That would be more efficient, however | |
1113 # we stick to simple code for now. | |
1114 all_revs = set() | |
1115 for chain in candidate_chains: | |
1116 all_revs.update(chain) | |
1117 other = None | |
1118 for r in self.revlog.revs(prev, snapfloor): | |
1119 if r not in all_revs: | |
1120 other = r | |
1121 break | |
1122 if other is not None: | |
1123 # To avoid unfair competition, we won't use unrelated | |
1124 # intermediate snapshot that are deeper than the ones from the | |
1125 # parent delta chain. | |
1126 max_depth = max(parents_snaps.keys()) | |
1127 chain = deltachain(other) | |
1128 for depth, s in enumerate(chain): | |
1129 if s < snapfloor: | |
1130 continue | |
1131 if max_depth < depth: | |
1132 break | |
1133 if not self.revlog.issnapshot(s): | |
1134 break | |
1135 parents_snaps[depth].add(s) | |
1136 # Test them as possible intermediate snapshot base We test them | |
1137 # from highest to lowest level. High level one are more likely to | |
1138 # result in small delta | |
1139 floor = None | |
1140 for idx, snaps in sorted(parents_snaps.items(), reverse=True): | |
1141 siblings = set() | |
1142 for s in snaps: | |
1143 siblings.update(self.snapshot_cache.snapshots[s]) | |
1144 # Before considering making a new intermediate snapshot, we | |
1145 # check if an existing snapshot, children of base we consider, | |
1146 # would be suitable. | |
1147 # | |
1148 # It give a change to reuse a delta chain "unrelated" to the | |
1149 # current revision instead of starting our own. Without such | |
1150 # re-use, topological branches would keep reopening new chains. | |
1151 # Creating more and more snapshot as the repository grow. | |
1152 | |
1153 if floor is not None: | |
1154 # We only do this for siblings created after the one in our | |
1155 # parent's delta chain. Those created before has less | |
1156 # chances to be valid base since our ancestors had to | |
1157 # create a new snapshot. | |
1158 siblings = [r for r in siblings if floor < r] | |
1159 yield tuple(sorted(siblings)) | |
1160 # then test the base from our parent's delta chain. | |
1161 yield tuple(sorted(snaps)) | |
1162 floor = min(snaps) | |
1163 # No suitable base found in the parent chain, search if any full | |
1164 # snapshots emitted since parent's base would be a suitable base | |
1165 # for an intermediate snapshot. | |
1166 # | |
1167 # It give a chance to reuse a delta chain unrelated to the current | |
1168 # revisions instead of starting our own. Without such re-use, | |
1169 # topological branches would keep reopening new full chains. | |
1170 # Creating more and more snapshot as the repository grow. | |
1171 full = [ | |
1172 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r | |
1173 ] | |
1174 yield tuple(sorted(full)) | |
1175 | |
1083 def _refined_groups(self): | 1176 def _refined_groups(self): |
1084 good = None | 1177 good = None |
1085 groups = self._raw_groups() | 1178 groups = self._raw_groups() |
1086 for candidates in groups: | 1179 for candidates in groups: |
1087 good = yield candidates | 1180 good = yield candidates |
1131 interresting without looking it any practical details. | 1224 interresting without looking it any practical details. |
1132 | 1225 |
1133 The group order aims at providing fast or small candidates first. | 1226 The group order aims at providing fast or small candidates first. |
1134 """ | 1227 """ |
1135 yield from self._iter_parents() | 1228 yield from self._iter_parents() |
1136 sparse = self.revlog.delta_config.sparse_revlog | 1229 if self.revlog.delta_config.sparse_revlog: |
1137 prev = self.target_rev - 1 | 1230 yield from self._iter_snapshots_base() |
1138 deltachain = lambda rev: self.revlog._deltachain(rev)[0] | 1231 else: |
1139 | |
1140 parents = [p for p in (self.p1, self.p2) if p != nullrev] | |
1141 if sparse and parents: | |
1142 self.current_stage = _STAGE_SNAPSHOT | |
1143 # See if we can use an existing snapshot in the parent chains to | |
1144 # use as a base for a new intermediate-snapshot | |
1145 # | |
1146 # search for snapshot in parents delta chain map: snapshot-level: | |
1147 # snapshot-rev | |
1148 parents_snaps = collections.defaultdict(set) | |
1149 candidate_chains = [deltachain(p) for p in parents] | |
1150 for chain in candidate_chains: | |
1151 for idx, s in enumerate(chain): | |
1152 if not self.revlog.issnapshot(s): | |
1153 break | |
1154 parents_snaps[idx].add(s) | |
1155 snapfloor = min(parents_snaps[0]) + 1 | |
1156 self.snapshot_cache.update(self.revlog, snapfloor) | |
1157 # search for the highest "unrelated" revision | |
1158 # | |
1159 # Adding snapshots used by "unrelated" revision increase the odd we | |
1160 # reuse an independant, yet better snapshot chain. | |
1161 # | |
1162 # XXX instead of building a set of revisions, we could lazily | |
1163 # enumerate over the chains. That would be more efficient, however | |
1164 # we stick to simple code for now. | |
1165 all_revs = set() | |
1166 for chain in candidate_chains: | |
1167 all_revs.update(chain) | |
1168 other = None | |
1169 for r in self.revlog.revs(prev, snapfloor): | |
1170 if r not in all_revs: | |
1171 other = r | |
1172 break | |
1173 if other is not None: | |
1174 # To avoid unfair competition, we won't use unrelated | |
1175 # intermediate snapshot that are deeper than the ones from the | |
1176 # parent delta chain. | |
1177 max_depth = max(parents_snaps.keys()) | |
1178 chain = deltachain(other) | |
1179 for depth, s in enumerate(chain): | |
1180 if s < snapfloor: | |
1181 continue | |
1182 if max_depth < depth: | |
1183 break | |
1184 if not self.revlog.issnapshot(s): | |
1185 break | |
1186 parents_snaps[depth].add(s) | |
1187 # Test them as possible intermediate snapshot base We test them | |
1188 # from highest to lowest level. High level one are more likely to | |
1189 # result in small delta | |
1190 floor = None | |
1191 for idx, snaps in sorted(parents_snaps.items(), reverse=True): | |
1192 siblings = set() | |
1193 for s in snaps: | |
1194 siblings.update(self.snapshot_cache.snapshots[s]) | |
1195 # Before considering making a new intermediate snapshot, we | |
1196 # check if an existing snapshot, children of base we consider, | |
1197 # would be suitable. | |
1198 # | |
1199 # It give a change to reuse a delta chain "unrelated" to the | |
1200 # current revision instead of starting our own. Without such | |
1201 # re-use, topological branches would keep reopening new chains. | |
1202 # Creating more and more snapshot as the repository grow. | |
1203 | |
1204 if floor is not None: | |
1205 # We only do this for siblings created after the one in our | |
1206 # parent's delta chain. Those created before has less | |
1207 # chances to be valid base since our ancestors had to | |
1208 # create a new snapshot. | |
1209 siblings = [r for r in siblings if floor < r] | |
1210 yield tuple(sorted(siblings)) | |
1211 # then test the base from our parent's delta chain. | |
1212 yield tuple(sorted(snaps)) | |
1213 floor = min(snaps) | |
1214 # No suitable base found in the parent chain, search if any full | |
1215 # snapshots emitted since parent's base would be a suitable base | |
1216 # for an intermediate snapshot. | |
1217 # | |
1218 # It give a chance to reuse a delta chain unrelated to the current | |
1219 # revisions instead of starting our own. Without such re-use, | |
1220 # topological branches would keep reopening new full chains. | |
1221 # Creating more and more snapshot as the repository grow. | |
1222 full = [ | |
1223 r | |
1224 for r in self.snapshot_cache.snapshots[nullrev] | |
1225 if snapfloor <= r | |
1226 ] | |
1227 yield tuple(sorted(full)) | |
1228 | |
1229 if not sparse: | |
1230 yield from self._iter_prev() | 1232 yield from self._iter_prev() |
1231 | 1233 |
1232 | 1234 |
1233 class SnapshotCache: | 1235 class SnapshotCache: |
1234 __slots__ = ('snapshots', '_start_rev', '_end_rev') | 1236 __slots__ = ('snapshots', '_start_rev', '_end_rev') |