comparison mercurial/revlogutils/deltas.py @ 49228:b909dd35d9ab

deltas: add a debug-delta-find command to analyse delta search See command documentation for details. For some reason, pytype is confused by our usage of None/deltainfo variable, so I had to quiet it.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 20 May 2022 14:27:46 +0200
parents e6b7c6fbeb48
children 78ba41878f2e
comparison
equal deleted inserted replaced
49227:2bcf5e14bb7e 49228:b909dd35d9ab
929 # fulltext. 929 # fulltext.
930 yield (prev,) 930 yield (prev,)
931 931
932 932
933 class deltacomputer: 933 class deltacomputer:
934 def __init__(self, revlog, write_debug=None): 934 def __init__(self, revlog, write_debug=None, debug_search=False):
935 self.revlog = revlog 935 self.revlog = revlog
936 self._write_debug = write_debug 936 self._write_debug = write_debug
937 self._debug_search = debug_search
937 938
938 def buildtext(self, revinfo, fh): 939 def buildtext(self, revinfo, fh):
939 """Builds a fulltext version of a revision 940 """Builds a fulltext version of a revision
940 941
941 revinfo: revisioninfo instance that contains all needed info 942 revinfo: revisioninfo instance that contains all needed info
978 return delta 979 return delta
979 980
980 def _builddeltainfo(self, revinfo, base, fh): 981 def _builddeltainfo(self, revinfo, base, fh):
981 # can we use the cached delta? 982 # can we use the cached delta?
982 revlog = self.revlog 983 revlog = self.revlog
984 debug_search = self._write_debug is not None and self._debug_search
983 chainbase = revlog.chainbase(base) 985 chainbase = revlog.chainbase(base)
984 if revlog._generaldelta: 986 if revlog._generaldelta:
985 deltabase = base 987 deltabase = base
986 else: 988 else:
987 deltabase = chainbase 989 deltabase = chainbase
1007 currentbase = self.revlog.deltaparent(currentbase) 1009 currentbase = self.revlog.deltaparent(currentbase)
1008 if self.revlog._lazydelta and currentbase == base: 1010 if self.revlog._lazydelta and currentbase == base:
1009 delta = revinfo.cachedelta[1] 1011 delta = revinfo.cachedelta[1]
1010 if delta is None: 1012 if delta is None:
1011 delta = self._builddeltadiff(base, revinfo, fh) 1013 delta = self._builddeltadiff(base, revinfo, fh)
1014 if debug_search:
1015 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1016 msg %= len(delta)
1017 self._write_debug(msg)
1012 # snapshotdept need to be neither None nor 0 level snapshot 1018 # snapshotdept need to be neither None nor 0 level snapshot
1013 if revlog.upperboundcomp is not None and snapshotdepth: 1019 if revlog.upperboundcomp is not None and snapshotdepth:
1014 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp 1020 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1015 snapshotlimit = revinfo.textlen >> snapshotdepth 1021 snapshotlimit = revinfo.textlen >> snapshotdepth
1022 if debug_search:
1023 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1024 msg %= lowestrealisticdeltalen
1025 self._write_debug(msg)
1016 if snapshotlimit < lowestrealisticdeltalen: 1026 if snapshotlimit < lowestrealisticdeltalen:
1027 if debug_search:
1028 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1029 self._write_debug(msg)
1017 return None 1030 return None
1018 if revlog.length(base) < lowestrealisticdeltalen: 1031 if revlog.length(base) < lowestrealisticdeltalen:
1032 if debug_search:
1033 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1034 self._write_debug(msg)
1019 return None 1035 return None
1020 header, data = revlog.compress(delta) 1036 header, data = revlog.compress(delta)
1021 deltalen = len(header) + len(data) 1037 deltalen = len(header) + len(data)
1022 offset = revlog.end(len(revlog) - 1) 1038 offset = revlog.end(len(revlog) - 1)
1023 dist = deltalen + offset - revlog.start(chainbase) 1039 dist = deltalen + offset - revlog.start(chainbase)
1088 return self._fullsnapshotinfo(fh, revinfo, target_rev) 1104 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1089 1105
1090 if self._write_debug is not None: 1106 if self._write_debug is not None:
1091 start = util.timer() 1107 start = util.timer()
1092 1108
1109 debug_search = self._write_debug is not None and self._debug_search
1110
1093 # count the number of different delta we tried (for debug purpose) 1111 # count the number of different delta we tried (for debug purpose)
1094 dbg_try_count = 0 1112 dbg_try_count = 0
1095 # count the number of "search round" we did. (for debug purpose) 1113 # count the number of "search round" we did. (for debug purpose)
1096 dbg_try_rounds = 0 1114 dbg_try_rounds = 0
1097 dbg_type = b'unknown' 1115 dbg_type = b'unknown'
1111 p1_chain_len = -1 1129 p1_chain_len = -1
1112 if p2r != nullrev: 1130 if p2r != nullrev:
1113 p2_chain_len = revlog._chaininfo(p2r)[0] 1131 p2_chain_len = revlog._chaininfo(p2r)[0]
1114 else: 1132 else:
1115 p2_chain_len = -1 1133 p2_chain_len = -1
1134 if debug_search:
1135 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1136 msg %= target_rev
1137 self._write_debug(msg)
1116 1138
1117 groups = _candidategroups( 1139 groups = _candidategroups(
1118 self.revlog, revinfo.textlen, p1r, p2r, cachedelta 1140 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1119 ) 1141 )
1120 candidaterevs = next(groups) 1142 candidaterevs = next(groups)
1121 while candidaterevs is not None: 1143 while candidaterevs is not None:
1122 dbg_try_rounds += 1 1144 dbg_try_rounds += 1
1145 if debug_search:
1146 prev = None
1147 if deltainfo is not None:
1148 prev = deltainfo.base
1149
1150 if p1 in candidaterevs or p2 in candidaterevs:
1151 round_type = b"parents"
1152 elif prev is not None and all(c < prev for c in candidaterevs):
1153 round_type = b"refine-down"
1154 elif prev is not None and all(c > prev for c in candidaterevs):
1155 round_type = b"refine-up"
1156 else:
1157 round_type = b"search-down"
1158 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1159 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1160 self._write_debug(msg)
1123 nominateddeltas = [] 1161 nominateddeltas = []
1124 if deltainfo is not None: 1162 if deltainfo is not None:
1163 if debug_search:
1164 msg = (
1165 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1166 )
1167 msg %= (deltainfo.base, deltainfo.deltalen)
1168 self._write_debug(msg)
1125 # if we already found a good delta, 1169 # if we already found a good delta,
1126 # challenge it against refined candidates 1170 # challenge it against refined candidates
1127 nominateddeltas.append(deltainfo) 1171 nominateddeltas.append(deltainfo)
1128 for candidaterev in candidaterevs: 1172 for candidaterev in candidaterevs:
1173 if debug_search:
1174 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1175 msg %= candidaterev
1176 self._write_debug(msg)
1177 candidate_type = None
1178 if candidaterev == p1:
1179 candidate_type = b"p1"
1180 elif candidaterev == p2:
1181 candidate_type = b"p2"
1182 elif self.revlog.issnapshot(candidaterev):
1183 candidate_type = b"snapshot-%d"
1184 candidate_type %= self.revlog.snapshotdepth(
1185 candidaterev
1186 )
1187
1188 if candidate_type is not None:
1189 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1190 msg %= candidate_type
1191 self._write_debug(msg)
1192 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1193 msg %= self.revlog.length(candidaterev)
1194 self._write_debug(msg)
1195 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1196 msg %= self.revlog.deltaparent(candidaterev)
1197 self._write_debug(msg)
1129 if candidaterev in excluded_bases: 1198 if candidaterev in excluded_bases:
1199 if debug_search:
1200 msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n"
1201 self._write_debug(msg)
1130 continue 1202 continue
1131 if candidaterev >= target_rev: 1203 if candidaterev >= target_rev:
1204 if debug_search:
1205 msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n"
1206 self._write_debug(msg)
1132 continue 1207 continue
1133 dbg_try_count += 1 1208 dbg_try_count += 1
1209
1210 if debug_search:
1211 delta_start = util.timer()
1134 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh) 1212 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1213 if debug_search:
1214 delta_end = util.timer()
1215 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1216 msg %= delta_end - delta_start
1217 self._write_debug(msg)
1135 if candidatedelta is not None: 1218 if candidatedelta is not None:
1136 if isgooddeltainfo(self.revlog, candidatedelta, revinfo): 1219 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1220 if debug_search:
1221 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1222 msg %= candidatedelta.deltalen
1223 self._write_debug(msg)
1137 nominateddeltas.append(candidatedelta) 1224 nominateddeltas.append(candidatedelta)
1225 elif debug_search:
1226 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1227 msg %= candidatedelta.deltalen
1228 self._write_debug(msg)
1229 elif debug_search:
1230 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1231 self._write_debug(msg)
1138 if nominateddeltas: 1232 if nominateddeltas:
1139 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) 1233 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1140 if deltainfo is not None: 1234 if deltainfo is not None:
1141 candidaterevs = groups.send(deltainfo.base) 1235 candidaterevs = groups.send(deltainfo.base)
1142 else: 1236 else:
1143 candidaterevs = next(groups) 1237 candidaterevs = next(groups)
1144 1238
1145 if deltainfo is None: 1239 if deltainfo is None:
1146 dbg_type = b"full" 1240 dbg_type = b"full"
1147 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev) 1241 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1148 elif deltainfo.snapshotdepth: 1242 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1149 dbg_type = b"snapshot" 1243 dbg_type = b"snapshot"
1150 else: 1244 else:
1151 dbg_type = b"delta" 1245 dbg_type = b"delta"
1152 1246
1153 if self._write_debug is not None: 1247 if self._write_debug is not None:
1159 'delta_try_count': dbg_try_count, 1253 'delta_try_count': dbg_try_count,
1160 'type': dbg_type, 1254 'type': dbg_type,
1161 'p1-chain-len': p1_chain_len, 1255 'p1-chain-len': p1_chain_len,
1162 'p2-chain-len': p2_chain_len, 1256 'p2-chain-len': p2_chain_len,
1163 } 1257 }
1164 if deltainfo.snapshotdepth is not None: 1258 if (
1165 dbg['snapshot-depth'] = deltainfo.snapshotdepth 1259 deltainfo.snapshotdepth # pytype: disable=attribute-error
1260 is not None
1261 ):
1262 dbg[
1263 'snapshot-depth'
1264 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1166 else: 1265 else:
1167 dbg['snapshot-depth'] = 0 1266 dbg['snapshot-depth'] = 0
1168 target_revlog = b"UNKNOWN" 1267 target_revlog = b"UNKNOWN"
1169 target_type = self.revlog.target[0] 1268 target_type = self.revlog.target[0]
1170 target_key = self.revlog.target[1] 1269 target_key = self.revlog.target[1]