mercurial/branching/rev_cache.py
changeset 51897 f0e07efc199f
parent 51859 f4733654f144
child 51898 1eb2317c1762
equal deleted inserted replaced
51896:77a9c7d8a7ba 51897:f0e07efc199f
       
     1 # rev_cache.py - caching branch information per revision
       
     2 #
       
     3 # This software may be used and distributed according to the terms of the
       
     4 # GNU General Public License version 2 or any later version.
       
     5 from __future__ import annotations
       
     6 
       
     7 import struct
       
     8 
       
     9 from ..node import (
       
    10     nullrev,
       
    11 )
       
    12 
       
    13 from .. import (
       
    14     encoding,
       
    15     error,
       
    16     util,
       
    17 )
       
    18 
       
    19 from ..utils import (
       
    20     stringutil,
       
    21 )
       
    22 
       
    23 calcsize = struct.calcsize
       
    24 pack_into = struct.pack_into
       
    25 unpack_from = struct.unpack_from
       
    26 
       
    27 
       
    28 # Revision branch info cache
       
    29 
       
    30 _rbcversion = b'-v1'
       
    31 _rbcnames = b'rbc-names' + _rbcversion
       
    32 _rbcrevs = b'rbc-revs' + _rbcversion
       
    33 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
       
    34 _rbcrecfmt = b'>4sI'
       
    35 _rbcrecsize = calcsize(_rbcrecfmt)
       
    36 _rbcmininc = 64 * _rbcrecsize
       
    37 _rbcnodelen = 4
       
    38 _rbcbranchidxmask = 0x7FFFFFFF
       
    39 _rbccloseflag = 0x80000000
       
    40 
       
    41 
       
    42 class rbcrevs:
       
    43     """a byte string consisting of an immutable prefix followed by a mutable suffix"""
       
    44 
       
    45     def __init__(self, revs):
       
    46         self._prefix = revs
       
    47         self._rest = bytearray()
       
    48 
       
    49     def __len__(self):
       
    50         return len(self._prefix) + len(self._rest)
       
    51 
       
    52     def unpack_record(self, rbcrevidx):
       
    53         if rbcrevidx < len(self._prefix):
       
    54             return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
       
    55         else:
       
    56             return unpack_from(
       
    57                 _rbcrecfmt,
       
    58                 util.buffer(self._rest),
       
    59                 rbcrevidx - len(self._prefix),
       
    60             )
       
    61 
       
    62     def make_mutable(self):
       
    63         if len(self._prefix) > 0:
       
    64             entirety = bytearray()
       
    65             entirety[:] = self._prefix
       
    66             entirety.extend(self._rest)
       
    67             self._rest = entirety
       
    68             self._prefix = bytearray()
       
    69 
       
    70     def truncate(self, pos):
       
    71         self.make_mutable()
       
    72         del self._rest[pos:]
       
    73 
       
    74     def pack_into(self, rbcrevidx, node, branchidx):
       
    75         if rbcrevidx < len(self._prefix):
       
    76             self.make_mutable()
       
    77         buf = self._rest
       
    78         start_offset = rbcrevidx - len(self._prefix)
       
    79         end_offset = start_offset + _rbcrecsize
       
    80 
       
    81         if len(self._rest) < end_offset:
       
    82             # bytearray doesn't allocate extra space at least in Python 3.7.
       
    83             # When multiple changesets are added in a row, precise resize would
       
    84             # result in quadratic complexity. Overallocate to compensate by
       
    85             # using the classic doubling technique for dynamic arrays instead.
       
    86             # If there was a gap in the map before, less space will be reserved.
       
    87             self._rest.extend(b'\0' * end_offset)
       
    88         return pack_into(
       
    89             _rbcrecfmt,
       
    90             buf,
       
    91             start_offset,
       
    92             node,
       
    93             branchidx,
       
    94         )
       
    95 
       
    96     def extend(self, extension):
       
    97         return self._rest.extend(extension)
       
    98 
       
    99     def slice(self, begin, end):
       
   100         if begin < len(self._prefix):
       
   101             acc = bytearray()
       
   102             acc[:] = self._prefix[begin:end]
       
   103             acc.extend(
       
   104                 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
       
   105             )
       
   106             return acc
       
   107         return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
       
   108 
       
   109 
       
   110 class revbranchcache:
       
   111     """Persistent cache, mapping from revision number to branch name and close.
       
   112     This is a low level cache, independent of filtering.
       
   113 
       
   114     Branch names are stored in rbc-names in internal encoding separated by 0.
       
   115     rbc-names is append-only, and each branch name is only stored once and will
       
   116     thus have a unique index.
       
   117 
       
   118     The branch info for each revision is stored in rbc-revs as constant size
       
   119     records. The whole file is read into memory, but it is only 'parsed' on
       
   120     demand. The file is usually append-only but will be truncated if repo
       
   121     modification is detected.
       
   122     The record for each revision contains the first 4 bytes of the
       
   123     corresponding node hash, and the record is only used if it still matches.
       
   124     Even a completely trashed rbc-revs fill thus still give the right result
       
   125     while converging towards full recovery ... assuming no incorrectly matching
       
   126     node hashes.
       
   127     The record also contains 4 bytes where 31 bits contains the index of the
       
   128     branch and the last bit indicate that it is a branch close commit.
       
   129     The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
       
   130     and will grow with it but be 1/8th of its size.
       
   131     """
       
   132 
       
   133     def __init__(self, repo, readonly=True):
       
   134         assert repo.filtername is None
       
   135         self._repo = repo
       
   136         self._names = []  # branch names in local encoding with static index
       
   137         self._rbcrevs = rbcrevs(bytearray())
       
   138         self._rbcsnameslen = 0  # length of names read at _rbcsnameslen
       
   139         try:
       
   140             bndata = repo.cachevfs.read(_rbcnames)
       
   141             self._rbcsnameslen = len(bndata)  # for verification before writing
       
   142             if bndata:
       
   143                 self._names = [
       
   144                     encoding.tolocal(bn) for bn in bndata.split(b'\0')
       
   145                 ]
       
   146         except (IOError, OSError):
       
   147             if readonly:
       
   148                 # don't try to use cache - fall back to the slow path
       
   149                 self.branchinfo = self._branchinfo
       
   150 
       
   151         if self._names:
       
   152             try:
       
   153                 usemmap = repo.ui.configbool(b'storage', b'revbranchcache.mmap')
       
   154                 with repo.cachevfs(_rbcrevs) as fp:
       
   155                     if usemmap and repo.cachevfs.is_mmap_safe(_rbcrevs):
       
   156                         data = util.buffer(util.mmapread(fp))
       
   157                     else:
       
   158                         data = fp.read()
       
   159                 self._rbcrevs = rbcrevs(data)
       
   160             except (IOError, OSError) as inst:
       
   161                 repo.ui.debug(
       
   162                     b"couldn't read revision branch cache: %s\n"
       
   163                     % stringutil.forcebytestr(inst)
       
   164                 )
       
   165         # remember number of good records on disk
       
   166         self._rbcrevslen = min(
       
   167             len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
       
   168         )
       
   169         if self._rbcrevslen == 0:
       
   170             self._names = []
       
   171         self._rbcnamescount = len(self._names)  # number of names read at
       
   172         # _rbcsnameslen
       
   173 
       
   174     def _clear(self):
       
   175         self._rbcsnameslen = 0
       
   176         del self._names[:]
       
   177         self._rbcnamescount = 0
       
   178         self._rbcrevslen = len(self._repo.changelog)
       
   179         self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
       
   180         util.clearcachedproperty(self, b'_namesreverse')
       
   181 
       
   182     @util.propertycache
       
   183     def _namesreverse(self):
       
   184         return {b: r for r, b in enumerate(self._names)}
       
   185 
       
   186     def branchinfo(self, rev):
       
   187         """Return branch name and close flag for rev, using and updating
       
   188         persistent cache."""
       
   189         changelog = self._repo.changelog
       
   190         rbcrevidx = rev * _rbcrecsize
       
   191 
       
   192         # avoid negative index, changelog.read(nullrev) is fast without cache
       
   193         if rev == nullrev:
       
   194             return changelog.branchinfo(rev)
       
   195 
       
   196         # if requested rev isn't allocated, grow and cache the rev info
       
   197         if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
       
   198             return self._branchinfo(rev)
       
   199 
       
   200         # fast path: extract data from cache, use it if node is matching
       
   201         reponode = changelog.node(rev)[:_rbcnodelen]
       
   202         cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
       
   203         close = bool(branchidx & _rbccloseflag)
       
   204         if close:
       
   205             branchidx &= _rbcbranchidxmask
       
   206         if cachenode == b'\0\0\0\0':
       
   207             pass
       
   208         elif cachenode == reponode:
       
   209             try:
       
   210                 return self._names[branchidx], close
       
   211             except IndexError:
       
   212                 # recover from invalid reference to unknown branch
       
   213                 self._repo.ui.debug(
       
   214                     b"referenced branch names not found"
       
   215                     b" - rebuilding revision branch cache from scratch\n"
       
   216                 )
       
   217                 self._clear()
       
   218         else:
       
   219             # rev/node map has changed, invalidate the cache from here up
       
   220             self._repo.ui.debug(
       
   221                 b"history modification detected - truncating "
       
   222                 b"revision branch cache to revision %d\n" % rev
       
   223             )
       
   224             truncate = rbcrevidx + _rbcrecsize
       
   225             self._rbcrevs.truncate(truncate)
       
   226             self._rbcrevslen = min(self._rbcrevslen, truncate)
       
   227 
       
   228         # fall back to slow path and make sure it will be written to disk
       
   229         return self._branchinfo(rev)
       
   230 
       
   231     def _branchinfo(self, rev):
       
   232         """Retrieve branch info from changelog and update _rbcrevs"""
       
   233         changelog = self._repo.changelog
       
   234         b, close = changelog.branchinfo(rev)
       
   235         if b in self._namesreverse:
       
   236             branchidx = self._namesreverse[b]
       
   237         else:
       
   238             branchidx = len(self._names)
       
   239             self._names.append(b)
       
   240             self._namesreverse[b] = branchidx
       
   241         reponode = changelog.node(rev)
       
   242         if close:
       
   243             branchidx |= _rbccloseflag
       
   244         self._setcachedata(rev, reponode, branchidx)
       
   245         return b, close
       
   246 
       
   247     def setdata(self, rev, changelogrevision):
       
   248         """add new data information to the cache"""
       
   249         branch, close = changelogrevision.branchinfo
       
   250 
       
   251         if branch in self._namesreverse:
       
   252             branchidx = self._namesreverse[branch]
       
   253         else:
       
   254             branchidx = len(self._names)
       
   255             self._names.append(branch)
       
   256             self._namesreverse[branch] = branchidx
       
   257         if close:
       
   258             branchidx |= _rbccloseflag
       
   259         self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
       
   260         # If no cache data were readable (non exists, bad permission, etc)
       
   261         # the cache was bypassing itself by setting:
       
   262         #
       
   263         #   self.branchinfo = self._branchinfo
       
   264         #
       
   265         # Since we now have data in the cache, we need to drop this bypassing.
       
   266         if 'branchinfo' in vars(self):
       
   267             del self.branchinfo
       
   268 
       
   269     def _setcachedata(self, rev, node, branchidx):
       
   270         """Writes the node's branch data to the in-memory cache data."""
       
   271         if rev == nullrev:
       
   272             return
       
   273         rbcrevidx = rev * _rbcrecsize
       
   274         self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
       
   275         self._rbcrevslen = min(self._rbcrevslen, rev)
       
   276 
       
   277         tr = self._repo.currenttransaction()
       
   278         if tr:
       
   279             tr.addfinalize(b'write-revbranchcache', self.write)
       
   280 
       
   281     def write(self, tr=None):
       
   282         """Save branch cache if it is dirty."""
       
   283         repo = self._repo
       
   284         wlock = None
       
   285         step = b''
       
   286         try:
       
   287             # write the new names
       
   288             if self._rbcnamescount < len(self._names):
       
   289                 wlock = repo.wlock(wait=False)
       
   290                 step = b' names'
       
   291                 self._writenames(repo)
       
   292 
       
   293             # write the new revs
       
   294             start = self._rbcrevslen * _rbcrecsize
       
   295             if start != len(self._rbcrevs):
       
   296                 step = b''
       
   297                 if wlock is None:
       
   298                     wlock = repo.wlock(wait=False)
       
   299                 self._writerevs(repo, start)
       
   300 
       
   301         except (IOError, OSError, error.Abort, error.LockError) as inst:
       
   302             repo.ui.debug(
       
   303                 b"couldn't write revision branch cache%s: %s\n"
       
   304                 % (step, stringutil.forcebytestr(inst))
       
   305             )
       
   306         finally:
       
   307             if wlock is not None:
       
   308                 wlock.release()
       
   309 
       
   310     def _writenames(self, repo):
       
   311         """write the new branch names to revbranchcache"""
       
   312         if self._rbcnamescount != 0:
       
   313             f = repo.cachevfs.open(_rbcnames, b'ab')
       
   314             if f.tell() == self._rbcsnameslen:
       
   315                 f.write(b'\0')
       
   316             else:
       
   317                 f.close()
       
   318                 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
       
   319                 self._rbcnamescount = 0
       
   320                 self._rbcrevslen = 0
       
   321         if self._rbcnamescount == 0:
       
   322             # before rewriting names, make sure references are removed
       
   323             repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
       
   324             f = repo.cachevfs.open(_rbcnames, b'wb')
       
   325         f.write(
       
   326             b'\0'.join(
       
   327                 encoding.fromlocal(b)
       
   328                 for b in self._names[self._rbcnamescount :]
       
   329             )
       
   330         )
       
   331         self._rbcsnameslen = f.tell()
       
   332         f.close()
       
   333         self._rbcnamescount = len(self._names)
       
   334 
       
   335     def _writerevs(self, repo, start):
       
   336         """write the new revs to revbranchcache"""
       
   337         revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
       
   338         with repo.cachevfs.open(_rbcrevs, b'ab') as f:
       
   339             if f.tell() != start:
       
   340                 repo.ui.debug(
       
   341                     b"truncating cache/%s to %d\n" % (_rbcrevs, start)
       
   342                 )
       
   343                 f.seek(start)
       
   344                 if f.tell() != start:
       
   345                     start = 0
       
   346                     f.seek(start)
       
   347                 f.truncate()
       
   348             end = revs * _rbcrecsize
       
   349             f.write(self._rbcrevs.slice(start, end))
       
   350         self._rbcrevslen = revs