comparison mercurial/branchmap.py @ 51938:f0e07efc199f

rev-branch-cache: move the code in a dedicated module The branchmap module is getting huge and the rev branch cache is fully independent, lets move it elsewhere.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sun, 22 Sep 2024 15:55:46 +0200
parents f4733654f144
children dd3ccda3abc8
comparison
equal deleted inserted replaced
51937:77a9c7d8a7ba 51938:f0e07efc199f
4 # 4 #
5 # This software may be used and distributed according to the terms of the 5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version. 6 # GNU General Public License version 2 or any later version.
7 7
8 from __future__ import annotations 8 from __future__ import annotations
9
10 import struct
11 9
12 from .node import ( 10 from .node import (
13 bin, 11 bin,
14 hex, 12 hex,
15 nullrev, 13 nullrev,
46 from . import localrepo 44 from . import localrepo
47 45
48 assert [localrepo] 46 assert [localrepo]
49 47
50 subsettable = repoviewutil.subsettable 48 subsettable = repoviewutil.subsettable
51
52 calcsize = struct.calcsize
53 pack_into = struct.pack_into
54 unpack_from = struct.unpack_from
55 49
56 50
57 class BranchMapCache: 51 class BranchMapCache:
58 """mapping of filtered views of repo with their branchcache""" 52 """mapping of filtered views of repo with their branchcache"""
59 53
1085 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]] 1079 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1086 ] = (), 1080 ] = (),
1087 closednodes: Optional[Set[bytes]] = None, 1081 closednodes: Optional[Set[bytes]] = None,
1088 ) -> None: 1082 ) -> None:
1089 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes) 1083 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
1090
1091
1092 # Revision branch info cache
1093
1094 _rbcversion = b'-v1'
1095 _rbcnames = b'rbc-names' + _rbcversion
1096 _rbcrevs = b'rbc-revs' + _rbcversion
1097 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
1098 _rbcrecfmt = b'>4sI'
1099 _rbcrecsize = calcsize(_rbcrecfmt)
1100 _rbcmininc = 64 * _rbcrecsize
1101 _rbcnodelen = 4
1102 _rbcbranchidxmask = 0x7FFFFFFF
1103 _rbccloseflag = 0x80000000
1104
1105
1106 class rbcrevs:
1107 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
1108
1109 def __init__(self, revs):
1110 self._prefix = revs
1111 self._rest = bytearray()
1112
1113 def __len__(self):
1114 return len(self._prefix) + len(self._rest)
1115
1116 def unpack_record(self, rbcrevidx):
1117 if rbcrevidx < len(self._prefix):
1118 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
1119 else:
1120 return unpack_from(
1121 _rbcrecfmt,
1122 util.buffer(self._rest),
1123 rbcrevidx - len(self._prefix),
1124 )
1125
1126 def make_mutable(self):
1127 if len(self._prefix) > 0:
1128 entirety = bytearray()
1129 entirety[:] = self._prefix
1130 entirety.extend(self._rest)
1131 self._rest = entirety
1132 self._prefix = bytearray()
1133
1134 def truncate(self, pos):
1135 self.make_mutable()
1136 del self._rest[pos:]
1137
1138 def pack_into(self, rbcrevidx, node, branchidx):
1139 if rbcrevidx < len(self._prefix):
1140 self.make_mutable()
1141 buf = self._rest
1142 start_offset = rbcrevidx - len(self._prefix)
1143 end_offset = start_offset + _rbcrecsize
1144
1145 if len(self._rest) < end_offset:
1146 # bytearray doesn't allocate extra space at least in Python 3.7.
1147 # When multiple changesets are added in a row, precise resize would
1148 # result in quadratic complexity. Overallocate to compensate by
1149 # using the classic doubling technique for dynamic arrays instead.
1150 # If there was a gap in the map before, less space will be reserved.
1151 self._rest.extend(b'\0' * end_offset)
1152 return pack_into(
1153 _rbcrecfmt,
1154 buf,
1155 start_offset,
1156 node,
1157 branchidx,
1158 )
1159
1160 def extend(self, extension):
1161 return self._rest.extend(extension)
1162
1163 def slice(self, begin, end):
1164 if begin < len(self._prefix):
1165 acc = bytearray()
1166 acc[:] = self._prefix[begin:end]
1167 acc.extend(
1168 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
1169 )
1170 return acc
1171 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
1172
1173
1174 class revbranchcache:
1175 """Persistent cache, mapping from revision number to branch name and close.
1176 This is a low level cache, independent of filtering.
1177
1178 Branch names are stored in rbc-names in internal encoding separated by 0.
1179 rbc-names is append-only, and each branch name is only stored once and will
1180 thus have a unique index.
1181
1182 The branch info for each revision is stored in rbc-revs as constant size
1183 records. The whole file is read into memory, but it is only 'parsed' on
1184 demand. The file is usually append-only but will be truncated if repo
1185 modification is detected.
1186 The record for each revision contains the first 4 bytes of the
1187 corresponding node hash, and the record is only used if it still matches.
1188 Even a completely trashed rbc-revs fill thus still give the right result
1189 while converging towards full recovery ... assuming no incorrectly matching
1190 node hashes.
1191 The record also contains 4 bytes where 31 bits contains the index of the
1192 branch and the last bit indicate that it is a branch close commit.
1193 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
1194 and will grow with it but be 1/8th of its size.
1195 """
1196
1197 def __init__(self, repo, readonly=True):
1198 assert repo.filtername is None
1199 self._repo = repo
1200 self._names = [] # branch names in local encoding with static index
1201 self._rbcrevs = rbcrevs(bytearray())
1202 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
1203 try:
1204 bndata = repo.cachevfs.read(_rbcnames)
1205 self._rbcsnameslen = len(bndata) # for verification before writing
1206 if bndata:
1207 self._names = [
1208 encoding.tolocal(bn) for bn in bndata.split(b'\0')
1209 ]
1210 except (IOError, OSError):
1211 if readonly:
1212 # don't try to use cache - fall back to the slow path
1213 self.branchinfo = self._branchinfo
1214
1215 if self._names:
1216 try:
1217 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap')
1218 with repo.cachevfs(_rbcrevs) as fp:
1219 if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):
1220 data = util.buffer(util.mmapread(fp))
1221 else:
1222 data = fp.read()
1223 self._rbcrevs = rbcrevs(data)
1224 except (IOError, OSError) as inst:
1225 repo.ui.debug(
1226 b"couldn't read revision branch cache: %s\n"
1227 % stringutil.forcebytestr(inst)
1228 )
1229 # remember number of good records on disk
1230 self._rbcrevslen = min(
1231 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
1232 )
1233 if self._rbcrevslen == 0:
1234 self._names = []
1235 self._rbcnamescount = len(self._names) # number of names read at
1236 # _rbcsnameslen
1237
1238 def _clear(self):
1239 self._rbcsnameslen = 0
1240 del self._names[:]
1241 self._rbcnamescount = 0
1242 self._rbcrevslen = len(self._repo.changelog)
1243 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
1244 util.clearcachedproperty(self, b'_namesreverse')
1245
1246 @util.propertycache
1247 def _namesreverse(self):
1248 return {b: r for r, b in enumerate(self._names)}
1249
1250 def branchinfo(self, rev):
1251 """Return branch name and close flag for rev, using and updating
1252 persistent cache."""
1253 changelog = self._repo.changelog
1254 rbcrevidx = rev * _rbcrecsize
1255
1256 # avoid negative index, changelog.read(nullrev) is fast without cache
1257 if rev == nullrev:
1258 return changelog.branchinfo(rev)
1259
1260 # if requested rev isn't allocated, grow and cache the rev info
1261 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
1262 return self._branchinfo(rev)
1263
1264 # fast path: extract data from cache, use it if node is matching
1265 reponode = changelog.node(rev)[:_rbcnodelen]
1266 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
1267 close = bool(branchidx & _rbccloseflag)
1268 if close:
1269 branchidx &= _rbcbranchidxmask
1270 if cachenode == b'\0\0\0\0':
1271 pass
1272 elif cachenode == reponode:
1273 try:
1274 return self._names[branchidx], close
1275 except IndexError:
1276 # recover from invalid reference to unknown branch
1277 self._repo.ui.debug(
1278 b"referenced branch names not found"
1279 b" - rebuilding revision branch cache from scratch\n"
1280 )
1281 self._clear()
1282 else:
1283 # rev/node map has changed, invalidate the cache from here up
1284 self._repo.ui.debug(
1285 b"history modification detected - truncating "
1286 b"revision branch cache to revision %d\n" % rev
1287 )
1288 truncate = rbcrevidx + _rbcrecsize
1289 self._rbcrevs.truncate(truncate)
1290 self._rbcrevslen = min(self._rbcrevslen, truncate)
1291
1292 # fall back to slow path and make sure it will be written to disk
1293 return self._branchinfo(rev)
1294
1295 def _branchinfo(self, rev):
1296 """Retrieve branch info from changelog and update _rbcrevs"""
1297 changelog = self._repo.changelog
1298 b, close = changelog.branchinfo(rev)
1299 if b in self._namesreverse:
1300 branchidx = self._namesreverse[b]
1301 else:
1302 branchidx = len(self._names)
1303 self._names.append(b)
1304 self._namesreverse[b] = branchidx
1305 reponode = changelog.node(rev)
1306 if close:
1307 branchidx |= _rbccloseflag
1308 self._setcachedata(rev, reponode, branchidx)
1309 return b, close
1310
1311 def setdata(self, rev, changelogrevision):
1312 """add new data information to the cache"""
1313 branch, close = changelogrevision.branchinfo
1314
1315 if branch in self._namesreverse:
1316 branchidx = self._namesreverse[branch]
1317 else:
1318 branchidx = len(self._names)
1319 self._names.append(branch)
1320 self._namesreverse[branch] = branchidx
1321 if close:
1322 branchidx |= _rbccloseflag
1323 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
1324 # If no cache data were readable (non exists, bad permission, etc)
1325 # the cache was bypassing itself by setting:
1326 #
1327 # self.branchinfo = self._branchinfo
1328 #
1329 # Since we now have data in the cache, we need to drop this bypassing.
1330 if 'branchinfo' in vars(self):
1331 del self.branchinfo
1332
1333 def _setcachedata(self, rev, node, branchidx):
1334 """Writes the node's branch data to the in-memory cache data."""
1335 if rev == nullrev:
1336 return
1337 rbcrevidx = rev * _rbcrecsize
1338 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
1339 self._rbcrevslen = min(self._rbcrevslen, rev)
1340
1341 tr = self._repo.currenttransaction()
1342 if tr:
1343 tr.addfinalize(b'write-revbranchcache', self.write)
1344
1345 def write(self, tr=None):
1346 """Save branch cache if it is dirty."""
1347 repo = self._repo
1348 wlock = None
1349 step = b''
1350 try:
1351 # write the new names
1352 if self._rbcnamescount < len(self._names):
1353 wlock = repo.wlock(wait=False)
1354 step = b' names'
1355 self._writenames(repo)
1356
1357 # write the new revs
1358 start = self._rbcrevslen * _rbcrecsize
1359 if start != len(self._rbcrevs):
1360 step = b''
1361 if wlock is None:
1362 wlock = repo.wlock(wait=False)
1363 self._writerevs(repo, start)
1364
1365 except (IOError, OSError, error.Abort, error.LockError) as inst:
1366 repo.ui.debug(
1367 b"couldn't write revision branch cache%s: %s\n"
1368 % (step, stringutil.forcebytestr(inst))
1369 )
1370 finally:
1371 if wlock is not None:
1372 wlock.release()
1373
1374 def _writenames(self, repo):
1375 """write the new branch names to revbranchcache"""
1376 if self._rbcnamescount != 0:
1377 f = repo.cachevfs.open(_rbcnames, b'ab')
1378 if f.tell() == self._rbcsnameslen:
1379 f.write(b'\0')
1380 else:
1381 f.close()
1382 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1383 self._rbcnamescount = 0
1384 self._rbcrevslen = 0
1385 if self._rbcnamescount == 0:
1386 # before rewriting names, make sure references are removed
1387 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1388 f = repo.cachevfs.open(_rbcnames, b'wb')
1389 f.write(
1390 b'\0'.join(
1391 encoding.fromlocal(b)
1392 for b in self._names[self._rbcnamescount :]
1393 )
1394 )
1395 self._rbcsnameslen = f.tell()
1396 f.close()
1397 self._rbcnamescount = len(self._names)
1398
1399 def _writerevs(self, repo, start):
1400 """write the new revs to revbranchcache"""
1401 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1402 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1403 if f.tell() != start:
1404 repo.ui.debug(
1405 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1406 )
1407 f.seek(start)
1408 if f.tell() != start:
1409 start = 0
1410 f.seek(start)
1411 f.truncate()
1412 end = revs * _rbcrecsize
1413 f.write(self._rbcrevs.slice(start, end))
1414 self._rbcrevslen = revs