Mercurial > public > mercurial-scm > hg
comparison mercurial/revlogutils/deltas.py @ 49228:b909dd35d9ab
deltas: add a debug-delta-find command to analyse delta search
See command documentation for details.
For some reason, pytype is confused by our usage of None/deltainfo variable, so
I had to quiet it.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 20 May 2022 14:27:46 +0200 |
parents | e6b7c6fbeb48 |
children | 78ba41878f2e |
comparison
equal
deleted
inserted
replaced
49227:2bcf5e14bb7e | 49228:b909dd35d9ab |
---|---|
929 # fulltext. | 929 # fulltext. |
930 yield (prev,) | 930 yield (prev,) |
931 | 931 |
932 | 932 |
933 class deltacomputer: | 933 class deltacomputer: |
934 def __init__(self, revlog, write_debug=None): | 934 def __init__(self, revlog, write_debug=None, debug_search=False): |
935 self.revlog = revlog | 935 self.revlog = revlog |
936 self._write_debug = write_debug | 936 self._write_debug = write_debug |
937 self._debug_search = debug_search | |
937 | 938 |
938 def buildtext(self, revinfo, fh): | 939 def buildtext(self, revinfo, fh): |
939 """Builds a fulltext version of a revision | 940 """Builds a fulltext version of a revision |
940 | 941 |
941 revinfo: revisioninfo instance that contains all needed info | 942 revinfo: revisioninfo instance that contains all needed info |
978 return delta | 979 return delta |
979 | 980 |
980 def _builddeltainfo(self, revinfo, base, fh): | 981 def _builddeltainfo(self, revinfo, base, fh): |
981 # can we use the cached delta? | 982 # can we use the cached delta? |
982 revlog = self.revlog | 983 revlog = self.revlog |
984 debug_search = self._write_debug is not None and self._debug_search | |
983 chainbase = revlog.chainbase(base) | 985 chainbase = revlog.chainbase(base) |
984 if revlog._generaldelta: | 986 if revlog._generaldelta: |
985 deltabase = base | 987 deltabase = base |
986 else: | 988 else: |
987 deltabase = chainbase | 989 deltabase = chainbase |
1007 currentbase = self.revlog.deltaparent(currentbase) | 1009 currentbase = self.revlog.deltaparent(currentbase) |
1008 if self.revlog._lazydelta and currentbase == base: | 1010 if self.revlog._lazydelta and currentbase == base: |
1009 delta = revinfo.cachedelta[1] | 1011 delta = revinfo.cachedelta[1] |
1010 if delta is None: | 1012 if delta is None: |
1011 delta = self._builddeltadiff(base, revinfo, fh) | 1013 delta = self._builddeltadiff(base, revinfo, fh) |
1014 if debug_search: | |
1015 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n" | |
1016 msg %= len(delta) | |
1017 self._write_debug(msg) | |
1012 # snapshotdept need to be neither None nor 0 level snapshot | 1018 # snapshotdept need to be neither None nor 0 level snapshot |
1013 if revlog.upperboundcomp is not None and snapshotdepth: | 1019 if revlog.upperboundcomp is not None and snapshotdepth: |
1014 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp | 1020 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp |
1015 snapshotlimit = revinfo.textlen >> snapshotdepth | 1021 snapshotlimit = revinfo.textlen >> snapshotdepth |
1022 if debug_search: | |
1023 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n" | |
1024 msg %= lowestrealisticdeltalen | |
1025 self._write_debug(msg) | |
1016 if snapshotlimit < lowestrealisticdeltalen: | 1026 if snapshotlimit < lowestrealisticdeltalen: |
1027 if debug_search: | |
1028 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n" | |
1029 self._write_debug(msg) | |
1017 return None | 1030 return None |
1018 if revlog.length(base) < lowestrealisticdeltalen: | 1031 if revlog.length(base) < lowestrealisticdeltalen: |
1032 if debug_search: | |
1033 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n" | |
1034 self._write_debug(msg) | |
1019 return None | 1035 return None |
1020 header, data = revlog.compress(delta) | 1036 header, data = revlog.compress(delta) |
1021 deltalen = len(header) + len(data) | 1037 deltalen = len(header) + len(data) |
1022 offset = revlog.end(len(revlog) - 1) | 1038 offset = revlog.end(len(revlog) - 1) |
1023 dist = deltalen + offset - revlog.start(chainbase) | 1039 dist = deltalen + offset - revlog.start(chainbase) |
1088 return self._fullsnapshotinfo(fh, revinfo, target_rev) | 1104 return self._fullsnapshotinfo(fh, revinfo, target_rev) |
1089 | 1105 |
1090 if self._write_debug is not None: | 1106 if self._write_debug is not None: |
1091 start = util.timer() | 1107 start = util.timer() |
1092 | 1108 |
1109 debug_search = self._write_debug is not None and self._debug_search | |
1110 | |
1093 # count the number of different delta we tried (for debug purpose) | 1111 # count the number of different delta we tried (for debug purpose) |
1094 dbg_try_count = 0 | 1112 dbg_try_count = 0 |
1095 # count the number of "search round" we did. (for debug purpose) | 1113 # count the number of "search round" we did. (for debug purpose) |
1096 dbg_try_rounds = 0 | 1114 dbg_try_rounds = 0 |
1097 dbg_type = b'unknown' | 1115 dbg_type = b'unknown' |
1111 p1_chain_len = -1 | 1129 p1_chain_len = -1 |
1112 if p2r != nullrev: | 1130 if p2r != nullrev: |
1113 p2_chain_len = revlog._chaininfo(p2r)[0] | 1131 p2_chain_len = revlog._chaininfo(p2r)[0] |
1114 else: | 1132 else: |
1115 p2_chain_len = -1 | 1133 p2_chain_len = -1 |
1134 if debug_search: | |
1135 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n" | |
1136 msg %= target_rev | |
1137 self._write_debug(msg) | |
1116 | 1138 |
1117 groups = _candidategroups( | 1139 groups = _candidategroups( |
1118 self.revlog, revinfo.textlen, p1r, p2r, cachedelta | 1140 self.revlog, revinfo.textlen, p1r, p2r, cachedelta |
1119 ) | 1141 ) |
1120 candidaterevs = next(groups) | 1142 candidaterevs = next(groups) |
1121 while candidaterevs is not None: | 1143 while candidaterevs is not None: |
1122 dbg_try_rounds += 1 | 1144 dbg_try_rounds += 1 |
1145 if debug_search: | |
1146 prev = None | |
1147 if deltainfo is not None: | |
1148 prev = deltainfo.base | |
1149 | |
1150 if p1 in candidaterevs or p2 in candidaterevs: | |
1151 round_type = b"parents" | |
1152 elif prev is not None and all(c < prev for c in candidaterevs): | |
1153 round_type = b"refine-down" | |
1154 elif prev is not None and all(c > prev for c in candidaterevs): | |
1155 round_type = b"refine-up" | |
1156 else: | |
1157 round_type = b"search-down" | |
1158 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n" | |
1159 msg %= (dbg_try_rounds, len(candidaterevs), round_type) | |
1160 self._write_debug(msg) | |
1123 nominateddeltas = [] | 1161 nominateddeltas = [] |
1124 if deltainfo is not None: | 1162 if deltainfo is not None: |
1163 if debug_search: | |
1164 msg = ( | |
1165 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n" | |
1166 ) | |
1167 msg %= (deltainfo.base, deltainfo.deltalen) | |
1168 self._write_debug(msg) | |
1125 # if we already found a good delta, | 1169 # if we already found a good delta, |
1126 # challenge it against refined candidates | 1170 # challenge it against refined candidates |
1127 nominateddeltas.append(deltainfo) | 1171 nominateddeltas.append(deltainfo) |
1128 for candidaterev in candidaterevs: | 1172 for candidaterev in candidaterevs: |
1173 if debug_search: | |
1174 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n" | |
1175 msg %= candidaterev | |
1176 self._write_debug(msg) | |
1177 candidate_type = None | |
1178 if candidaterev == p1: | |
1179 candidate_type = b"p1" | |
1180 elif candidaterev == p2: | |
1181 candidate_type = b"p2" | |
1182 elif self.revlog.issnapshot(candidaterev): | |
1183 candidate_type = b"snapshot-%d" | |
1184 candidate_type %= self.revlog.snapshotdepth( | |
1185 candidaterev | |
1186 ) | |
1187 | |
1188 if candidate_type is not None: | |
1189 msg = b"DBG-DELTAS-SEARCH: type=%s\n" | |
1190 msg %= candidate_type | |
1191 self._write_debug(msg) | |
1192 msg = b"DBG-DELTAS-SEARCH: size=%d\n" | |
1193 msg %= self.revlog.length(candidaterev) | |
1194 self._write_debug(msg) | |
1195 msg = b"DBG-DELTAS-SEARCH: base=%d\n" | |
1196 msg %= self.revlog.deltaparent(candidaterev) | |
1197 self._write_debug(msg) | |
1129 if candidaterev in excluded_bases: | 1198 if candidaterev in excluded_bases: |
1199 if debug_search: | |
1200 msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n" | |
1201 self._write_debug(msg) | |
1130 continue | 1202 continue |
1131 if candidaterev >= target_rev: | 1203 if candidaterev >= target_rev: |
1204 if debug_search: | |
1205 msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n" | |
1206 self._write_debug(msg) | |
1132 continue | 1207 continue |
1133 dbg_try_count += 1 | 1208 dbg_try_count += 1 |
1209 | |
1210 if debug_search: | |
1211 delta_start = util.timer() | |
1134 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) | 1212 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) |
1213 if debug_search: | |
1214 delta_end = util.timer() | |
1215 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n" | |
1216 msg %= delta_end - delta_start | |
1217 self._write_debug(msg) | |
1135 if candidatedelta is not None: | 1218 if candidatedelta is not None: |
1136 if isgooddeltainfo(self.revlog, candidatedelta, revinfo): | 1219 if isgooddeltainfo(self.revlog, candidatedelta, revinfo): |
1220 if debug_search: | |
1221 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n" | |
1222 msg %= candidatedelta.deltalen | |
1223 self._write_debug(msg) | |
1137 nominateddeltas.append(candidatedelta) | 1224 nominateddeltas.append(candidatedelta) |
1225 elif debug_search: | |
1226 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n" | |
1227 msg %= candidatedelta.deltalen | |
1228 self._write_debug(msg) | |
1229 elif debug_search: | |
1230 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n" | |
1231 self._write_debug(msg) | |
1138 if nominateddeltas: | 1232 if nominateddeltas: |
1139 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) | 1233 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) |
1140 if deltainfo is not None: | 1234 if deltainfo is not None: |
1141 candidaterevs = groups.send(deltainfo.base) | 1235 candidaterevs = groups.send(deltainfo.base) |
1142 else: | 1236 else: |
1143 candidaterevs = next(groups) | 1237 candidaterevs = next(groups) |
1144 | 1238 |
1145 if deltainfo is None: | 1239 if deltainfo is None: |
1146 dbg_type = b"full" | 1240 dbg_type = b"full" |
1147 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev) | 1241 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev) |
1148 elif deltainfo.snapshotdepth: | 1242 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error |
1149 dbg_type = b"snapshot" | 1243 dbg_type = b"snapshot" |
1150 else: | 1244 else: |
1151 dbg_type = b"delta" | 1245 dbg_type = b"delta" |
1152 | 1246 |
1153 if self._write_debug is not None: | 1247 if self._write_debug is not None: |
1159 'delta_try_count': dbg_try_count, | 1253 'delta_try_count': dbg_try_count, |
1160 'type': dbg_type, | 1254 'type': dbg_type, |
1161 'p1-chain-len': p1_chain_len, | 1255 'p1-chain-len': p1_chain_len, |
1162 'p2-chain-len': p2_chain_len, | 1256 'p2-chain-len': p2_chain_len, |
1163 } | 1257 } |
1164 if deltainfo.snapshotdepth is not None: | 1258 if ( |
1165 dbg['snapshot-depth'] = deltainfo.snapshotdepth | 1259 deltainfo.snapshotdepth # pytype: disable=attribute-error |
1260 is not None | |
1261 ): | |
1262 dbg[ | |
1263 'snapshot-depth' | |
1264 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error | |
1166 else: | 1265 else: |
1167 dbg['snapshot-depth'] = 0 | 1266 dbg['snapshot-depth'] = 0 |
1168 target_revlog = b"UNKNOWN" | 1267 target_revlog = b"UNKNOWN" |
1169 target_type = self.revlog.target[0] | 1268 target_type = self.revlog.target[0] |
1170 target_key = self.revlog.target[1] | 1269 target_key = self.revlog.target[1] |