Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/branchmap.py @ 51938:f0e07efc199f
rev-branch-cache: move the code in a dedicated module
The branchmap module is getting huge and the rev branch cache is fully
independent, lets move it elsewhere.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sun, 22 Sep 2024 15:55:46 +0200 |
parents | f4733654f144 |
children | dd3ccda3abc8 |
comparison
equal
deleted
inserted
replaced
51937:77a9c7d8a7ba | 51938:f0e07efc199f |
---|---|
4 # | 4 # |
5 # This software may be used and distributed according to the terms of the | 5 # This software may be used and distributed according to the terms of the |
6 # GNU General Public License version 2 or any later version. | 6 # GNU General Public License version 2 or any later version. |
7 | 7 |
8 from __future__ import annotations | 8 from __future__ import annotations |
9 | |
10 import struct | |
11 | 9 |
12 from .node import ( | 10 from .node import ( |
13 bin, | 11 bin, |
14 hex, | 12 hex, |
15 nullrev, | 13 nullrev, |
46 from . import localrepo | 44 from . import localrepo |
47 | 45 |
48 assert [localrepo] | 46 assert [localrepo] |
49 | 47 |
50 subsettable = repoviewutil.subsettable | 48 subsettable = repoviewutil.subsettable |
51 | |
52 calcsize = struct.calcsize | |
53 pack_into = struct.pack_into | |
54 unpack_from = struct.unpack_from | |
55 | 49 |
56 | 50 |
57 class BranchMapCache: | 51 class BranchMapCache: |
58 """mapping of filtered views of repo with their branchcache""" | 52 """mapping of filtered views of repo with their branchcache""" |
59 | 53 |
1085 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]] | 1079 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]] |
1086 ] = (), | 1080 ] = (), |
1087 closednodes: Optional[Set[bytes]] = None, | 1081 closednodes: Optional[Set[bytes]] = None, |
1088 ) -> None: | 1082 ) -> None: |
1089 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes) | 1083 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes) |
1090 | |
1091 | |
1092 # Revision branch info cache | |
1093 | |
1094 _rbcversion = b'-v1' | |
1095 _rbcnames = b'rbc-names' + _rbcversion | |
1096 _rbcrevs = b'rbc-revs' + _rbcversion | |
1097 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open] | |
1098 _rbcrecfmt = b'>4sI' | |
1099 _rbcrecsize = calcsize(_rbcrecfmt) | |
1100 _rbcmininc = 64 * _rbcrecsize | |
1101 _rbcnodelen = 4 | |
1102 _rbcbranchidxmask = 0x7FFFFFFF | |
1103 _rbccloseflag = 0x80000000 | |
1104 | |
1105 | |
1106 class rbcrevs: | |
1107 """a byte string consisting of an immutable prefix followed by a mutable suffix""" | |
1108 | |
1109 def __init__(self, revs): | |
1110 self._prefix = revs | |
1111 self._rest = bytearray() | |
1112 | |
1113 def __len__(self): | |
1114 return len(self._prefix) + len(self._rest) | |
1115 | |
1116 def unpack_record(self, rbcrevidx): | |
1117 if rbcrevidx < len(self._prefix): | |
1118 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx) | |
1119 else: | |
1120 return unpack_from( | |
1121 _rbcrecfmt, | |
1122 util.buffer(self._rest), | |
1123 rbcrevidx - len(self._prefix), | |
1124 ) | |
1125 | |
1126 def make_mutable(self): | |
1127 if len(self._prefix) > 0: | |
1128 entirety = bytearray() | |
1129 entirety[:] = self._prefix | |
1130 entirety.extend(self._rest) | |
1131 self._rest = entirety | |
1132 self._prefix = bytearray() | |
1133 | |
1134 def truncate(self, pos): | |
1135 self.make_mutable() | |
1136 del self._rest[pos:] | |
1137 | |
1138 def pack_into(self, rbcrevidx, node, branchidx): | |
1139 if rbcrevidx < len(self._prefix): | |
1140 self.make_mutable() | |
1141 buf = self._rest | |
1142 start_offset = rbcrevidx - len(self._prefix) | |
1143 end_offset = start_offset + _rbcrecsize | |
1144 | |
1145 if len(self._rest) < end_offset: | |
1146 # bytearray doesn't allocate extra space at least in Python 3.7. | |
1147 # When multiple changesets are added in a row, precise resize would | |
1148 # result in quadratic complexity. Overallocate to compensate by | |
1149 # using the classic doubling technique for dynamic arrays instead. | |
1150 # If there was a gap in the map before, less space will be reserved. | |
1151 self._rest.extend(b'\0' * end_offset) | |
1152 return pack_into( | |
1153 _rbcrecfmt, | |
1154 buf, | |
1155 start_offset, | |
1156 node, | |
1157 branchidx, | |
1158 ) | |
1159 | |
1160 def extend(self, extension): | |
1161 return self._rest.extend(extension) | |
1162 | |
1163 def slice(self, begin, end): | |
1164 if begin < len(self._prefix): | |
1165 acc = bytearray() | |
1166 acc[:] = self._prefix[begin:end] | |
1167 acc.extend( | |
1168 self._rest[begin - len(self._prefix) : end - len(self._prefix)] | |
1169 ) | |
1170 return acc | |
1171 return self._rest[begin - len(self._prefix) : end - len(self._prefix)] | |
1172 | |
1173 | |
1174 class revbranchcache: | |
1175 """Persistent cache, mapping from revision number to branch name and close. | |
1176 This is a low level cache, independent of filtering. | |
1177 | |
1178 Branch names are stored in rbc-names in internal encoding separated by 0. | |
1179 rbc-names is append-only, and each branch name is only stored once and will | |
1180 thus have a unique index. | |
1181 | |
1182 The branch info for each revision is stored in rbc-revs as constant size | |
1183 records. The whole file is read into memory, but it is only 'parsed' on | |
1184 demand. The file is usually append-only but will be truncated if repo | |
1185 modification is detected. | |
1186 The record for each revision contains the first 4 bytes of the | |
1187 corresponding node hash, and the record is only used if it still matches. | |
1188 Even a completely trashed rbc-revs fill thus still give the right result | |
1189 while converging towards full recovery ... assuming no incorrectly matching | |
1190 node hashes. | |
1191 The record also contains 4 bytes where 31 bits contains the index of the | |
1192 branch and the last bit indicate that it is a branch close commit. | |
1193 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i | |
1194 and will grow with it but be 1/8th of its size. | |
1195 """ | |
1196 | |
1197 def __init__(self, repo, readonly=True): | |
1198 assert repo.filtername is None | |
1199 self._repo = repo | |
1200 self._names = [] # branch names in local encoding with static index | |
1201 self._rbcrevs = rbcrevs(bytearray()) | |
1202 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen | |
1203 try: | |
1204 bndata = repo.cachevfs.read(_rbcnames) | |
1205 self._rbcsnameslen = len(bndata) # for verification before writing | |
1206 if bndata: | |
1207 self._names = [ | |
1208 encoding.tolocal(bn) for bn in bndata.split(b'\0') | |
1209 ] | |
1210 except (IOError, OSError): | |
1211 if readonly: | |
1212 # don't try to use cache - fall back to the slow path | |
1213 self.branchinfo = self._branchinfo | |
1214 | |
1215 if self._names: | |
1216 try: | |
1217 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap') | |
1218 with repo.cachevfs(_rbcrevs) as fp: | |
1219 if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs): | |
1220 data = util.buffer(util.mmapread(fp)) | |
1221 else: | |
1222 data = fp.read() | |
1223 self._rbcrevs = rbcrevs(data) | |
1224 except (IOError, OSError) as inst: | |
1225 repo.ui.debug( | |
1226 b"couldn't read revision branch cache: %s\n" | |
1227 % stringutil.forcebytestr(inst) | |
1228 ) | |
1229 # remember number of good records on disk | |
1230 self._rbcrevslen = min( | |
1231 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog) | |
1232 ) | |
1233 if self._rbcrevslen == 0: | |
1234 self._names = [] | |
1235 self._rbcnamescount = len(self._names) # number of names read at | |
1236 # _rbcsnameslen | |
1237 | |
1238 def _clear(self): | |
1239 self._rbcsnameslen = 0 | |
1240 del self._names[:] | |
1241 self._rbcnamescount = 0 | |
1242 self._rbcrevslen = len(self._repo.changelog) | |
1243 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize)) | |
1244 util.clearcachedproperty(self, b'_namesreverse') | |
1245 | |
1246 @util.propertycache | |
1247 def _namesreverse(self): | |
1248 return {b: r for r, b in enumerate(self._names)} | |
1249 | |
1250 def branchinfo(self, rev): | |
1251 """Return branch name and close flag for rev, using and updating | |
1252 persistent cache.""" | |
1253 changelog = self._repo.changelog | |
1254 rbcrevidx = rev * _rbcrecsize | |
1255 | |
1256 # avoid negative index, changelog.read(nullrev) is fast without cache | |
1257 if rev == nullrev: | |
1258 return changelog.branchinfo(rev) | |
1259 | |
1260 # if requested rev isn't allocated, grow and cache the rev info | |
1261 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: | |
1262 return self._branchinfo(rev) | |
1263 | |
1264 # fast path: extract data from cache, use it if node is matching | |
1265 reponode = changelog.node(rev)[:_rbcnodelen] | |
1266 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx) | |
1267 close = bool(branchidx & _rbccloseflag) | |
1268 if close: | |
1269 branchidx &= _rbcbranchidxmask | |
1270 if cachenode == b'\0\0\0\0': | |
1271 pass | |
1272 elif cachenode == reponode: | |
1273 try: | |
1274 return self._names[branchidx], close | |
1275 except IndexError: | |
1276 # recover from invalid reference to unknown branch | |
1277 self._repo.ui.debug( | |
1278 b"referenced branch names not found" | |
1279 b" - rebuilding revision branch cache from scratch\n" | |
1280 ) | |
1281 self._clear() | |
1282 else: | |
1283 # rev/node map has changed, invalidate the cache from here up | |
1284 self._repo.ui.debug( | |
1285 b"history modification detected - truncating " | |
1286 b"revision branch cache to revision %d\n" % rev | |
1287 ) | |
1288 truncate = rbcrevidx + _rbcrecsize | |
1289 self._rbcrevs.truncate(truncate) | |
1290 self._rbcrevslen = min(self._rbcrevslen, truncate) | |
1291 | |
1292 # fall back to slow path and make sure it will be written to disk | |
1293 return self._branchinfo(rev) | |
1294 | |
1295 def _branchinfo(self, rev): | |
1296 """Retrieve branch info from changelog and update _rbcrevs""" | |
1297 changelog = self._repo.changelog | |
1298 b, close = changelog.branchinfo(rev) | |
1299 if b in self._namesreverse: | |
1300 branchidx = self._namesreverse[b] | |
1301 else: | |
1302 branchidx = len(self._names) | |
1303 self._names.append(b) | |
1304 self._namesreverse[b] = branchidx | |
1305 reponode = changelog.node(rev) | |
1306 if close: | |
1307 branchidx |= _rbccloseflag | |
1308 self._setcachedata(rev, reponode, branchidx) | |
1309 return b, close | |
1310 | |
1311 def setdata(self, rev, changelogrevision): | |
1312 """add new data information to the cache""" | |
1313 branch, close = changelogrevision.branchinfo | |
1314 | |
1315 if branch in self._namesreverse: | |
1316 branchidx = self._namesreverse[branch] | |
1317 else: | |
1318 branchidx = len(self._names) | |
1319 self._names.append(branch) | |
1320 self._namesreverse[branch] = branchidx | |
1321 if close: | |
1322 branchidx |= _rbccloseflag | |
1323 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx) | |
1324 # If no cache data were readable (non exists, bad permission, etc) | |
1325 # the cache was bypassing itself by setting: | |
1326 # | |
1327 # self.branchinfo = self._branchinfo | |
1328 # | |
1329 # Since we now have data in the cache, we need to drop this bypassing. | |
1330 if 'branchinfo' in vars(self): | |
1331 del self.branchinfo | |
1332 | |
1333 def _setcachedata(self, rev, node, branchidx): | |
1334 """Writes the node's branch data to the in-memory cache data.""" | |
1335 if rev == nullrev: | |
1336 return | |
1337 rbcrevidx = rev * _rbcrecsize | |
1338 self._rbcrevs.pack_into(rbcrevidx, node, branchidx) | |
1339 self._rbcrevslen = min(self._rbcrevslen, rev) | |
1340 | |
1341 tr = self._repo.currenttransaction() | |
1342 if tr: | |
1343 tr.addfinalize(b'write-revbranchcache', self.write) | |
1344 | |
1345 def write(self, tr=None): | |
1346 """Save branch cache if it is dirty.""" | |
1347 repo = self._repo | |
1348 wlock = None | |
1349 step = b'' | |
1350 try: | |
1351 # write the new names | |
1352 if self._rbcnamescount < len(self._names): | |
1353 wlock = repo.wlock(wait=False) | |
1354 step = b' names' | |
1355 self._writenames(repo) | |
1356 | |
1357 # write the new revs | |
1358 start = self._rbcrevslen * _rbcrecsize | |
1359 if start != len(self._rbcrevs): | |
1360 step = b'' | |
1361 if wlock is None: | |
1362 wlock = repo.wlock(wait=False) | |
1363 self._writerevs(repo, start) | |
1364 | |
1365 except (IOError, OSError, error.Abort, error.LockError) as inst: | |
1366 repo.ui.debug( | |
1367 b"couldn't write revision branch cache%s: %s\n" | |
1368 % (step, stringutil.forcebytestr(inst)) | |
1369 ) | |
1370 finally: | |
1371 if wlock is not None: | |
1372 wlock.release() | |
1373 | |
1374 def _writenames(self, repo): | |
1375 """write the new branch names to revbranchcache""" | |
1376 if self._rbcnamescount != 0: | |
1377 f = repo.cachevfs.open(_rbcnames, b'ab') | |
1378 if f.tell() == self._rbcsnameslen: | |
1379 f.write(b'\0') | |
1380 else: | |
1381 f.close() | |
1382 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames) | |
1383 self._rbcnamescount = 0 | |
1384 self._rbcrevslen = 0 | |
1385 if self._rbcnamescount == 0: | |
1386 # before rewriting names, make sure references are removed | |
1387 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True) | |
1388 f = repo.cachevfs.open(_rbcnames, b'wb') | |
1389 f.write( | |
1390 b'\0'.join( | |
1391 encoding.fromlocal(b) | |
1392 for b in self._names[self._rbcnamescount :] | |
1393 ) | |
1394 ) | |
1395 self._rbcsnameslen = f.tell() | |
1396 f.close() | |
1397 self._rbcnamescount = len(self._names) | |
1398 | |
1399 def _writerevs(self, repo, start): | |
1400 """write the new revs to revbranchcache""" | |
1401 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize) | |
1402 with repo.cachevfs.open(_rbcrevs, b'ab') as f: | |
1403 if f.tell() != start: | |
1404 repo.ui.debug( | |
1405 b"truncating cache/%s to %d\n" % (_rbcrevs, start) | |
1406 ) | |
1407 f.seek(start) | |
1408 if f.tell() != start: | |
1409 start = 0 | |
1410 f.seek(start) | |
1411 f.truncate() | |
1412 end = revs * _rbcrecsize | |
1413 f.write(self._rbcrevs.slice(start, end)) | |
1414 self._rbcrevslen = revs |