mercurial/branchmap.py
changeset 51897 f0e07efc199f
parent 51859 f4733654f144
child 51968 dd3ccda3abc8
equal deleted inserted replaced
51896:77a9c7d8a7ba 51897:f0e07efc199f
     4 #
     4 #
     5 # This software may be used and distributed according to the terms of the
     5 # This software may be used and distributed according to the terms of the
     6 # GNU General Public License version 2 or any later version.
     6 # GNU General Public License version 2 or any later version.
     7 
     7 
     8 from __future__ import annotations
     8 from __future__ import annotations
     9 
       
    10 import struct
       
    11 
     9 
    12 from .node import (
    10 from .node import (
    13     bin,
    11     bin,
    14     hex,
    12     hex,
    15     nullrev,
    13     nullrev,
    46     from . import localrepo
    44     from . import localrepo
    47 
    45 
    48     assert [localrepo]
    46     assert [localrepo]
    49 
    47 
    50 subsettable = repoviewutil.subsettable
    48 subsettable = repoviewutil.subsettable
    51 
       
    52 calcsize = struct.calcsize
       
    53 pack_into = struct.pack_into
       
    54 unpack_from = struct.unpack_from
       
    55 
    49 
    56 
    50 
    57 class BranchMapCache:
    51 class BranchMapCache:
    58     """mapping of filtered views of repo with their branchcache"""
    52     """mapping of filtered views of repo with their branchcache"""
    59 
    53 
  1085             Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
  1079             Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
  1086         ] = (),
  1080         ] = (),
  1087         closednodes: Optional[Set[bytes]] = None,
  1081         closednodes: Optional[Set[bytes]] = None,
  1088     ) -> None:
  1082     ) -> None:
  1089         super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
  1083         super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
  1090 
       
  1091 
       
  1092 # Revision branch info cache
       
  1093 
       
  1094 _rbcversion = b'-v1'
       
  1095 _rbcnames = b'rbc-names' + _rbcversion
       
  1096 _rbcrevs = b'rbc-revs' + _rbcversion
       
  1097 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
       
  1098 _rbcrecfmt = b'>4sI'
       
  1099 _rbcrecsize = calcsize(_rbcrecfmt)
       
  1100 _rbcmininc = 64 * _rbcrecsize
       
  1101 _rbcnodelen = 4
       
  1102 _rbcbranchidxmask = 0x7FFFFFFF
       
  1103 _rbccloseflag = 0x80000000
       
  1104 
       
  1105 
       
  1106 class rbcrevs:
       
  1107     """a byte string consisting of an immutable prefix followed by a mutable suffix"""
       
  1108 
       
  1109     def __init__(self, revs):
       
  1110         self._prefix = revs
       
  1111         self._rest = bytearray()
       
  1112 
       
  1113     def __len__(self):
       
  1114         return len(self._prefix) + len(self._rest)
       
  1115 
       
  1116     def unpack_record(self, rbcrevidx):
       
  1117         if rbcrevidx < len(self._prefix):
       
  1118             return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
       
  1119         else:
       
  1120             return unpack_from(
       
  1121                 _rbcrecfmt,
       
  1122                 util.buffer(self._rest),
       
  1123                 rbcrevidx - len(self._prefix),
       
  1124             )
       
  1125 
       
  1126     def make_mutable(self):
       
  1127         if len(self._prefix) > 0:
       
  1128             entirety = bytearray()
       
  1129             entirety[:] = self._prefix
       
  1130             entirety.extend(self._rest)
       
  1131             self._rest = entirety
       
  1132             self._prefix = bytearray()
       
  1133 
       
  1134     def truncate(self, pos):
       
  1135         self.make_mutable()
       
  1136         del self._rest[pos:]
       
  1137 
       
  1138     def pack_into(self, rbcrevidx, node, branchidx):
       
  1139         if rbcrevidx < len(self._prefix):
       
  1140             self.make_mutable()
       
  1141         buf = self._rest
       
  1142         start_offset = rbcrevidx - len(self._prefix)
       
  1143         end_offset = start_offset + _rbcrecsize
       
  1144 
       
  1145         if len(self._rest) < end_offset:
       
  1146             # bytearray doesn't allocate extra space at least in Python 3.7.
       
  1147             # When multiple changesets are added in a row, precise resize would
       
  1148             # result in quadratic complexity. Overallocate to compensate by
       
  1149             # using the classic doubling technique for dynamic arrays instead.
       
  1150             # If there was a gap in the map before, less space will be reserved.
       
  1151             self._rest.extend(b'\0' * end_offset)
       
  1152         return pack_into(
       
  1153             _rbcrecfmt,
       
  1154             buf,
       
  1155             start_offset,
       
  1156             node,
       
  1157             branchidx,
       
  1158         )
       
  1159 
       
  1160     def extend(self, extension):
       
  1161         return self._rest.extend(extension)
       
  1162 
       
  1163     def slice(self, begin, end):
       
  1164         if begin < len(self._prefix):
       
  1165             acc = bytearray()
       
  1166             acc[:] = self._prefix[begin:end]
       
  1167             acc.extend(
       
  1168                 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
       
  1169             )
       
  1170             return acc
       
  1171         return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
       
  1172 
       
  1173 
       
  1174 class revbranchcache:
       
  1175     """Persistent cache, mapping from revision number to branch name and close.
       
  1176     This is a low level cache, independent of filtering.
       
  1177 
       
  1178     Branch names are stored in rbc-names in internal encoding separated by 0.
       
  1179     rbc-names is append-only, and each branch name is only stored once and will
       
  1180     thus have a unique index.
       
  1181 
       
  1182     The branch info for each revision is stored in rbc-revs as constant size
       
  1183     records. The whole file is read into memory, but it is only 'parsed' on
       
  1184     demand. The file is usually append-only but will be truncated if repo
       
  1185     modification is detected.
       
  1186     The record for each revision contains the first 4 bytes of the
       
  1187     corresponding node hash, and the record is only used if it still matches.
       
  1188     Even a completely trashed rbc-revs fill thus still give the right result
       
  1189     while converging towards full recovery ... assuming no incorrectly matching
       
  1190     node hashes.
       
  1191     The record also contains 4 bytes where 31 bits contains the index of the
       
  1192     branch and the last bit indicate that it is a branch close commit.
       
  1193     The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
       
  1194     and will grow with it but be 1/8th of its size.
       
  1195     """
       
  1196 
       
  1197     def __init__(self, repo, readonly=True):
       
  1198         assert repo.filtername is None
       
  1199         self._repo = repo
       
  1200         self._names = []  # branch names in local encoding with static index
       
  1201         self._rbcrevs = rbcrevs(bytearray())
       
  1202         self._rbcsnameslen = 0  # length of names read at _rbcsnameslen
       
  1203         try:
       
  1204             bndata = repo.cachevfs.read(_rbcnames)
       
  1205             self._rbcsnameslen = len(bndata)  # for verification before writing
       
  1206             if bndata:
       
  1207                 self._names = [
       
  1208                     encoding.tolocal(bn) for bn in bndata.split(b'\0')
       
  1209                 ]
       
  1210         except (IOError, OSError):
       
  1211             if readonly:
       
  1212                 # don't try to use cache - fall back to the slow path
       
  1213                 self.branchinfo = self._branchinfo
       
  1214 
       
  1215         if self._names:
       
  1216             try:
       
  1217                 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap')
       
  1218                 with repo.cachevfs(_rbcrevs) as fp:
       
  1219                     if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):
       
  1220                         data = util.buffer(util.mmapread(fp))
       
  1221                     else:
       
  1222                         data = fp.read()
       
  1223                 self._rbcrevs = rbcrevs(data)
       
  1224             except (IOError, OSError) as inst:
       
  1225                 repo.ui.debug(
       
  1226                     b"couldn't read revision branch cache: %s\n"
       
  1227                     % stringutil.forcebytestr(inst)
       
  1228                 )
       
  1229         # remember number of good records on disk
       
  1230         self._rbcrevslen = min(
       
  1231             len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
       
  1232         )
       
  1233         if self._rbcrevslen == 0:
       
  1234             self._names = []
       
  1235         self._rbcnamescount = len(self._names)  # number of names read at
       
  1236         # _rbcsnameslen
       
  1237 
       
  1238     def _clear(self):
       
  1239         self._rbcsnameslen = 0
       
  1240         del self._names[:]
       
  1241         self._rbcnamescount = 0
       
  1242         self._rbcrevslen = len(self._repo.changelog)
       
  1243         self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
       
  1244         util.clearcachedproperty(self, b'_namesreverse')
       
  1245 
       
  1246     @util.propertycache
       
  1247     def _namesreverse(self):
       
  1248         return {b: r for r, b in enumerate(self._names)}
       
  1249 
       
  1250     def branchinfo(self, rev):
       
  1251         """Return branch name and close flag for rev, using and updating
       
  1252         persistent cache."""
       
  1253         changelog = self._repo.changelog
       
  1254         rbcrevidx = rev * _rbcrecsize
       
  1255 
       
  1256         # avoid negative index, changelog.read(nullrev) is fast without cache
       
  1257         if rev == nullrev:
       
  1258             return changelog.branchinfo(rev)
       
  1259 
       
  1260         # if requested rev isn't allocated, grow and cache the rev info
       
  1261         if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
       
  1262             return self._branchinfo(rev)
       
  1263 
       
  1264         # fast path: extract data from cache, use it if node is matching
       
  1265         reponode = changelog.node(rev)[:_rbcnodelen]
       
  1266         cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
       
  1267         close = bool(branchidx & _rbccloseflag)
       
  1268         if close:
       
  1269             branchidx &= _rbcbranchidxmask
       
  1270         if cachenode == b'\0\0\0\0':
       
  1271             pass
       
  1272         elif cachenode == reponode:
       
  1273             try:
       
  1274                 return self._names[branchidx], close
       
  1275             except IndexError:
       
  1276                 # recover from invalid reference to unknown branch
       
  1277                 self._repo.ui.debug(
       
  1278                     b"referenced branch names not found"
       
  1279                     b" - rebuilding revision branch cache from scratch\n"
       
  1280                 )
       
  1281                 self._clear()
       
  1282         else:
       
  1283             # rev/node map has changed, invalidate the cache from here up
       
  1284             self._repo.ui.debug(
       
  1285                 b"history modification detected - truncating "
       
  1286                 b"revision branch cache to revision %d\n" % rev
       
  1287             )
       
  1288             truncate = rbcrevidx + _rbcrecsize
       
  1289             self._rbcrevs.truncate(truncate)
       
  1290             self._rbcrevslen = min(self._rbcrevslen, truncate)
       
  1291 
       
  1292         # fall back to slow path and make sure it will be written to disk
       
  1293         return self._branchinfo(rev)
       
  1294 
       
  1295     def _branchinfo(self, rev):
       
  1296         """Retrieve branch info from changelog and update _rbcrevs"""
       
  1297         changelog = self._repo.changelog
       
  1298         b, close = changelog.branchinfo(rev)
       
  1299         if b in self._namesreverse:
       
  1300             branchidx = self._namesreverse[b]
       
  1301         else:
       
  1302             branchidx = len(self._names)
       
  1303             self._names.append(b)
       
  1304             self._namesreverse[b] = branchidx
       
  1305         reponode = changelog.node(rev)
       
  1306         if close:
       
  1307             branchidx |= _rbccloseflag
       
  1308         self._setcachedata(rev, reponode, branchidx)
       
  1309         return b, close
       
  1310 
       
  1311     def setdata(self, rev, changelogrevision):
       
  1312         """add new data information to the cache"""
       
  1313         branch, close = changelogrevision.branchinfo
       
  1314 
       
  1315         if branch in self._namesreverse:
       
  1316             branchidx = self._namesreverse[branch]
       
  1317         else:
       
  1318             branchidx = len(self._names)
       
  1319             self._names.append(branch)
       
  1320             self._namesreverse[branch] = branchidx
       
  1321         if close:
       
  1322             branchidx |= _rbccloseflag
       
  1323         self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
       
  1324         # If no cache data were readable (non exists, bad permission, etc)
       
  1325         # the cache was bypassing itself by setting:
       
  1326         #
       
  1327         #   self.branchinfo = self._branchinfo
       
  1328         #
       
  1329         # Since we now have data in the cache, we need to drop this bypassing.
       
  1330         if 'branchinfo' in vars(self):
       
  1331             del self.branchinfo
       
  1332 
       
  1333     def _setcachedata(self, rev, node, branchidx):
       
  1334         """Writes the node's branch data to the in-memory cache data."""
       
  1335         if rev == nullrev:
       
  1336             return
       
  1337         rbcrevidx = rev * _rbcrecsize
       
  1338         self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
       
  1339         self._rbcrevslen = min(self._rbcrevslen, rev)
       
  1340 
       
  1341         tr = self._repo.currenttransaction()
       
  1342         if tr:
       
  1343             tr.addfinalize(b'write-revbranchcache', self.write)
       
  1344 
       
  1345     def write(self, tr=None):
       
  1346         """Save branch cache if it is dirty."""
       
  1347         repo = self._repo
       
  1348         wlock = None
       
  1349         step = b''
       
  1350         try:
       
  1351             # write the new names
       
  1352             if self._rbcnamescount < len(self._names):
       
  1353                 wlock = repo.wlock(wait=False)
       
  1354                 step = b' names'
       
  1355                 self._writenames(repo)
       
  1356 
       
  1357             # write the new revs
       
  1358             start = self._rbcrevslen * _rbcrecsize
       
  1359             if start != len(self._rbcrevs):
       
  1360                 step = b''
       
  1361                 if wlock is None:
       
  1362                     wlock = repo.wlock(wait=False)
       
  1363                 self._writerevs(repo, start)
       
  1364 
       
  1365         except (IOError, OSError, error.Abort, error.LockError) as inst:
       
  1366             repo.ui.debug(
       
  1367                 b"couldn't write revision branch cache%s: %s\n"
       
  1368                 % (step, stringutil.forcebytestr(inst))
       
  1369             )
       
  1370         finally:
       
  1371             if wlock is not None:
       
  1372                 wlock.release()
       
  1373 
       
  1374     def _writenames(self, repo):
       
  1375         """write the new branch names to revbranchcache"""
       
  1376         if self._rbcnamescount != 0:
       
  1377             f = repo.cachevfs.open(_rbcnames, b'ab')
       
  1378             if f.tell() == self._rbcsnameslen:
       
  1379                 f.write(b'\0')
       
  1380             else:
       
  1381                 f.close()
       
  1382                 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
       
  1383                 self._rbcnamescount = 0
       
  1384                 self._rbcrevslen = 0
       
  1385         if self._rbcnamescount == 0:
       
  1386             # before rewriting names, make sure references are removed
       
  1387             repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
       
  1388             f = repo.cachevfs.open(_rbcnames, b'wb')
       
  1389         f.write(
       
  1390             b'\0'.join(
       
  1391                 encoding.fromlocal(b)
       
  1392                 for b in self._names[self._rbcnamescount :]
       
  1393             )
       
  1394         )
       
  1395         self._rbcsnameslen = f.tell()
       
  1396         f.close()
       
  1397         self._rbcnamescount = len(self._names)
       
  1398 
       
  1399     def _writerevs(self, repo, start):
       
  1400         """write the new revs to revbranchcache"""
       
  1401         revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
       
  1402         with repo.cachevfs.open(_rbcrevs, b'ab') as f:
       
  1403             if f.tell() != start:
       
  1404                 repo.ui.debug(
       
  1405                     b"truncating cache/%s to %d\n" % (_rbcrevs, start)
       
  1406                 )
       
  1407                 f.seek(start)
       
  1408                 if f.tell() != start:
       
  1409                     start = 0
       
  1410                     f.seek(start)
       
  1411                 f.truncate()
       
  1412             end = revs * _rbcrecsize
       
  1413             f.write(self._rbcrevs.slice(start, end))
       
  1414         self._rbcrevslen = revs