1085 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]] |
1079 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]] |
1086 ] = (), |
1080 ] = (), |
1087 closednodes: Optional[Set[bytes]] = None, |
1081 closednodes: Optional[Set[bytes]] = None, |
1088 ) -> None: |
1082 ) -> None: |
1089 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes) |
1083 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes) |
1090 |
|
1091 |
|
1092 # Revision branch info cache |
|
1093 |
|
1094 _rbcversion = b'-v1' |
|
1095 _rbcnames = b'rbc-names' + _rbcversion |
|
1096 _rbcrevs = b'rbc-revs' + _rbcversion |
|
1097 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open] |
|
1098 _rbcrecfmt = b'>4sI' |
|
1099 _rbcrecsize = calcsize(_rbcrecfmt) |
|
1100 _rbcmininc = 64 * _rbcrecsize |
|
1101 _rbcnodelen = 4 |
|
1102 _rbcbranchidxmask = 0x7FFFFFFF |
|
1103 _rbccloseflag = 0x80000000 |
|
1104 |
|
1105 |
|
1106 class rbcrevs: |
|
1107 """a byte string consisting of an immutable prefix followed by a mutable suffix""" |
|
1108 |
|
1109 def __init__(self, revs): |
|
1110 self._prefix = revs |
|
1111 self._rest = bytearray() |
|
1112 |
|
1113 def __len__(self): |
|
1114 return len(self._prefix) + len(self._rest) |
|
1115 |
|
1116 def unpack_record(self, rbcrevidx): |
|
1117 if rbcrevidx < len(self._prefix): |
|
1118 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx) |
|
1119 else: |
|
1120 return unpack_from( |
|
1121 _rbcrecfmt, |
|
1122 util.buffer(self._rest), |
|
1123 rbcrevidx - len(self._prefix), |
|
1124 ) |
|
1125 |
|
1126 def make_mutable(self): |
|
1127 if len(self._prefix) > 0: |
|
1128 entirety = bytearray() |
|
1129 entirety[:] = self._prefix |
|
1130 entirety.extend(self._rest) |
|
1131 self._rest = entirety |
|
1132 self._prefix = bytearray() |
|
1133 |
|
1134 def truncate(self, pos): |
|
1135 self.make_mutable() |
|
1136 del self._rest[pos:] |
|
1137 |
|
1138 def pack_into(self, rbcrevidx, node, branchidx): |
|
1139 if rbcrevidx < len(self._prefix): |
|
1140 self.make_mutable() |
|
1141 buf = self._rest |
|
1142 start_offset = rbcrevidx - len(self._prefix) |
|
1143 end_offset = start_offset + _rbcrecsize |
|
1144 |
|
1145 if len(self._rest) < end_offset: |
|
1146 # bytearray doesn't allocate extra space at least in Python 3.7. |
|
1147 # When multiple changesets are added in a row, precise resize would |
|
1148 # result in quadratic complexity. Overallocate to compensate by |
|
1149 # using the classic doubling technique for dynamic arrays instead. |
|
1150 # If there was a gap in the map before, less space will be reserved. |
|
1151 self._rest.extend(b'\0' * end_offset) |
|
1152 return pack_into( |
|
1153 _rbcrecfmt, |
|
1154 buf, |
|
1155 start_offset, |
|
1156 node, |
|
1157 branchidx, |
|
1158 ) |
|
1159 |
|
1160 def extend(self, extension): |
|
1161 return self._rest.extend(extension) |
|
1162 |
|
1163 def slice(self, begin, end): |
|
1164 if begin < len(self._prefix): |
|
1165 acc = bytearray() |
|
1166 acc[:] = self._prefix[begin:end] |
|
1167 acc.extend( |
|
1168 self._rest[begin - len(self._prefix) : end - len(self._prefix)] |
|
1169 ) |
|
1170 return acc |
|
1171 return self._rest[begin - len(self._prefix) : end - len(self._prefix)] |
|
1172 |
|
1173 |
|
1174 class revbranchcache: |
|
1175 """Persistent cache, mapping from revision number to branch name and close. |
|
1176 This is a low level cache, independent of filtering. |
|
1177 |
|
1178 Branch names are stored in rbc-names in internal encoding separated by 0. |
|
1179 rbc-names is append-only, and each branch name is only stored once and will |
|
1180 thus have a unique index. |
|
1181 |
|
1182 The branch info for each revision is stored in rbc-revs as constant size |
|
1183 records. The whole file is read into memory, but it is only 'parsed' on |
|
1184 demand. The file is usually append-only but will be truncated if repo |
|
1185 modification is detected. |
|
1186 The record for each revision contains the first 4 bytes of the |
|
1187 corresponding node hash, and the record is only used if it still matches. |
|
1188 Even a completely trashed rbc-revs fill thus still give the right result |
|
1189 while converging towards full recovery ... assuming no incorrectly matching |
|
1190 node hashes. |
|
1191 The record also contains 4 bytes where 31 bits contains the index of the |
|
1192 branch and the last bit indicate that it is a branch close commit. |
|
1193 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i |
|
1194 and will grow with it but be 1/8th of its size. |
|
1195 """ |
|
1196 |
|
1197 def __init__(self, repo, readonly=True): |
|
1198 assert repo.filtername is None |
|
1199 self._repo = repo |
|
1200 self._names = [] # branch names in local encoding with static index |
|
1201 self._rbcrevs = rbcrevs(bytearray()) |
|
1202 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen |
|
1203 try: |
|
1204 bndata = repo.cachevfs.read(_rbcnames) |
|
1205 self._rbcsnameslen = len(bndata) # for verification before writing |
|
1206 if bndata: |
|
1207 self._names = [ |
|
1208 encoding.tolocal(bn) for bn in bndata.split(b'\0') |
|
1209 ] |
|
1210 except (IOError, OSError): |
|
1211 if readonly: |
|
1212 # don't try to use cache - fall back to the slow path |
|
1213 self.branchinfo = self._branchinfo |
|
1214 |
|
1215 if self._names: |
|
1216 try: |
|
1217 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap') |
|
1218 with repo.cachevfs(_rbcrevs) as fp: |
|
1219 if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs): |
|
1220 data = util.buffer(util.mmapread(fp)) |
|
1221 else: |
|
1222 data = fp.read() |
|
1223 self._rbcrevs = rbcrevs(data) |
|
1224 except (IOError, OSError) as inst: |
|
1225 repo.ui.debug( |
|
1226 b"couldn't read revision branch cache: %s\n" |
|
1227 % stringutil.forcebytestr(inst) |
|
1228 ) |
|
1229 # remember number of good records on disk |
|
1230 self._rbcrevslen = min( |
|
1231 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog) |
|
1232 ) |
|
1233 if self._rbcrevslen == 0: |
|
1234 self._names = [] |
|
1235 self._rbcnamescount = len(self._names) # number of names read at |
|
1236 # _rbcsnameslen |
|
1237 |
|
1238 def _clear(self): |
|
1239 self._rbcsnameslen = 0 |
|
1240 del self._names[:] |
|
1241 self._rbcnamescount = 0 |
|
1242 self._rbcrevslen = len(self._repo.changelog) |
|
1243 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize)) |
|
1244 util.clearcachedproperty(self, b'_namesreverse') |
|
1245 |
|
1246 @util.propertycache |
|
1247 def _namesreverse(self): |
|
1248 return {b: r for r, b in enumerate(self._names)} |
|
1249 |
|
1250 def branchinfo(self, rev): |
|
1251 """Return branch name and close flag for rev, using and updating |
|
1252 persistent cache.""" |
|
1253 changelog = self._repo.changelog |
|
1254 rbcrevidx = rev * _rbcrecsize |
|
1255 |
|
1256 # avoid negative index, changelog.read(nullrev) is fast without cache |
|
1257 if rev == nullrev: |
|
1258 return changelog.branchinfo(rev) |
|
1259 |
|
1260 # if requested rev isn't allocated, grow and cache the rev info |
|
1261 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: |
|
1262 return self._branchinfo(rev) |
|
1263 |
|
1264 # fast path: extract data from cache, use it if node is matching |
|
1265 reponode = changelog.node(rev)[:_rbcnodelen] |
|
1266 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx) |
|
1267 close = bool(branchidx & _rbccloseflag) |
|
1268 if close: |
|
1269 branchidx &= _rbcbranchidxmask |
|
1270 if cachenode == b'\0\0\0\0': |
|
1271 pass |
|
1272 elif cachenode == reponode: |
|
1273 try: |
|
1274 return self._names[branchidx], close |
|
1275 except IndexError: |
|
1276 # recover from invalid reference to unknown branch |
|
1277 self._repo.ui.debug( |
|
1278 b"referenced branch names not found" |
|
1279 b" - rebuilding revision branch cache from scratch\n" |
|
1280 ) |
|
1281 self._clear() |
|
1282 else: |
|
1283 # rev/node map has changed, invalidate the cache from here up |
|
1284 self._repo.ui.debug( |
|
1285 b"history modification detected - truncating " |
|
1286 b"revision branch cache to revision %d\n" % rev |
|
1287 ) |
|
1288 truncate = rbcrevidx + _rbcrecsize |
|
1289 self._rbcrevs.truncate(truncate) |
|
1290 self._rbcrevslen = min(self._rbcrevslen, truncate) |
|
1291 |
|
1292 # fall back to slow path and make sure it will be written to disk |
|
1293 return self._branchinfo(rev) |
|
1294 |
|
1295 def _branchinfo(self, rev): |
|
1296 """Retrieve branch info from changelog and update _rbcrevs""" |
|
1297 changelog = self._repo.changelog |
|
1298 b, close = changelog.branchinfo(rev) |
|
1299 if b in self._namesreverse: |
|
1300 branchidx = self._namesreverse[b] |
|
1301 else: |
|
1302 branchidx = len(self._names) |
|
1303 self._names.append(b) |
|
1304 self._namesreverse[b] = branchidx |
|
1305 reponode = changelog.node(rev) |
|
1306 if close: |
|
1307 branchidx |= _rbccloseflag |
|
1308 self._setcachedata(rev, reponode, branchidx) |
|
1309 return b, close |
|
1310 |
|
1311 def setdata(self, rev, changelogrevision): |
|
1312 """add new data information to the cache""" |
|
1313 branch, close = changelogrevision.branchinfo |
|
1314 |
|
1315 if branch in self._namesreverse: |
|
1316 branchidx = self._namesreverse[branch] |
|
1317 else: |
|
1318 branchidx = len(self._names) |
|
1319 self._names.append(branch) |
|
1320 self._namesreverse[branch] = branchidx |
|
1321 if close: |
|
1322 branchidx |= _rbccloseflag |
|
1323 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx) |
|
1324 # If no cache data were readable (non exists, bad permission, etc) |
|
1325 # the cache was bypassing itself by setting: |
|
1326 # |
|
1327 # self.branchinfo = self._branchinfo |
|
1328 # |
|
1329 # Since we now have data in the cache, we need to drop this bypassing. |
|
1330 if 'branchinfo' in vars(self): |
|
1331 del self.branchinfo |
|
1332 |
|
1333 def _setcachedata(self, rev, node, branchidx): |
|
1334 """Writes the node's branch data to the in-memory cache data.""" |
|
1335 if rev == nullrev: |
|
1336 return |
|
1337 rbcrevidx = rev * _rbcrecsize |
|
1338 self._rbcrevs.pack_into(rbcrevidx, node, branchidx) |
|
1339 self._rbcrevslen = min(self._rbcrevslen, rev) |
|
1340 |
|
1341 tr = self._repo.currenttransaction() |
|
1342 if tr: |
|
1343 tr.addfinalize(b'write-revbranchcache', self.write) |
|
1344 |
|
1345 def write(self, tr=None): |
|
1346 """Save branch cache if it is dirty.""" |
|
1347 repo = self._repo |
|
1348 wlock = None |
|
1349 step = b'' |
|
1350 try: |
|
1351 # write the new names |
|
1352 if self._rbcnamescount < len(self._names): |
|
1353 wlock = repo.wlock(wait=False) |
|
1354 step = b' names' |
|
1355 self._writenames(repo) |
|
1356 |
|
1357 # write the new revs |
|
1358 start = self._rbcrevslen * _rbcrecsize |
|
1359 if start != len(self._rbcrevs): |
|
1360 step = b'' |
|
1361 if wlock is None: |
|
1362 wlock = repo.wlock(wait=False) |
|
1363 self._writerevs(repo, start) |
|
1364 |
|
1365 except (IOError, OSError, error.Abort, error.LockError) as inst: |
|
1366 repo.ui.debug( |
|
1367 b"couldn't write revision branch cache%s: %s\n" |
|
1368 % (step, stringutil.forcebytestr(inst)) |
|
1369 ) |
|
1370 finally: |
|
1371 if wlock is not None: |
|
1372 wlock.release() |
|
1373 |
|
1374 def _writenames(self, repo): |
|
1375 """write the new branch names to revbranchcache""" |
|
1376 if self._rbcnamescount != 0: |
|
1377 f = repo.cachevfs.open(_rbcnames, b'ab') |
|
1378 if f.tell() == self._rbcsnameslen: |
|
1379 f.write(b'\0') |
|
1380 else: |
|
1381 f.close() |
|
1382 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames) |
|
1383 self._rbcnamescount = 0 |
|
1384 self._rbcrevslen = 0 |
|
1385 if self._rbcnamescount == 0: |
|
1386 # before rewriting names, make sure references are removed |
|
1387 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True) |
|
1388 f = repo.cachevfs.open(_rbcnames, b'wb') |
|
1389 f.write( |
|
1390 b'\0'.join( |
|
1391 encoding.fromlocal(b) |
|
1392 for b in self._names[self._rbcnamescount :] |
|
1393 ) |
|
1394 ) |
|
1395 self._rbcsnameslen = f.tell() |
|
1396 f.close() |
|
1397 self._rbcnamescount = len(self._names) |
|
1398 |
|
1399 def _writerevs(self, repo, start): |
|
1400 """write the new revs to revbranchcache""" |
|
1401 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize) |
|
1402 with repo.cachevfs.open(_rbcrevs, b'ab') as f: |
|
1403 if f.tell() != start: |
|
1404 repo.ui.debug( |
|
1405 b"truncating cache/%s to %d\n" % (_rbcrevs, start) |
|
1406 ) |
|
1407 f.seek(start) |
|
1408 if f.tell() != start: |
|
1409 start = 0 |
|
1410 f.seek(start) |
|
1411 f.truncate() |
|
1412 end = revs * _rbcrecsize |
|
1413 f.write(self._rbcrevs.slice(start, end)) |
|
1414 self._rbcrevslen = revs |
|