mercurial/branchmap.py
changeset 23785 cb99bacb9b4e
parent 22357 9c3c3dc14a65
child 23786 7d63398fbfd1
--- a/mercurial/branchmap.py	Fri Jan 09 22:53:38 2015 +0800
+++ b/mercurial/branchmap.py	Thu Jan 08 00:01:03 2015 +0100
@@ -9,6 +9,8 @@
 import encoding
 import util
 import time
+from array import array
+from struct import calcsize, pack, unpack
 
 def _filename(repo):
     """name of a branchcache file for a given repo or repoview"""
@@ -285,3 +287,150 @@
         duration = time.time() - starttime
         repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
                     repo.filtername, duration)
+
+# Revision branch info cache
+
+_rbcversion = '-v1'
+_rbcnames = 'cache/rbc-names' + _rbcversion
+_rbcrevs = 'cache/rbc-revs' + _rbcversion
+# [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
+_rbcrecfmt = '>4sI'
+_rbcrecsize = calcsize(_rbcrecfmt)
+_rbcnodelen = 4
+_rbcbranchidxmask = 0x7fffffff
+_rbccloseflag = 0x80000000
+
+class revbranchcache(object):
+    """Persistent cache, mapping from revision number to branch name and close.
+    This is a low level cache, independent of filtering.
+
+    Branch names are stored in rbc-names in internal encoding separated by 0.
+    rbc-names is append-only, and each branch name is only stored once and will
+    thus have a unique index.
+
+    The branch info for each revision is stored in rbc-revs as constant size
+    records. The whole file is read into memory, but it is only 'parsed' on
+    demand. The file is usually append-only but will be truncated if repo
+    modification is detected.
+    The record for each revision contains the first 4 bytes of the
+    corresponding node hash, and the record is only used if it still matches.
+    Even a completely trashed rbc-revs fill thus still give the right result
+    while converging towards full recovery ... assuming no incorrectly matching
+    node hashes.
+    The record also contains 4 bytes where 31 bits contains the index of the
+    branch and the last bit indicate that it is a branch close commit.
+    The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
+    and will grow with it but be 1/8th of its size.
+    """
+
+    def __init__(self, repo):
+        assert repo.filtername is None
+        self._names = [] # branch names in local encoding with static index
+        self._rbcrevs = array('c') # structs of type _rbcrecfmt
+        self._rbcsnameslen = 0
+        try:
+            bndata = repo.vfs.read(_rbcnames)
+            self._rbcsnameslen = len(bndata) # for verification before writing
+            self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
+        except (IOError, OSError), inst:
+            repo.ui.debug("couldn't read revision branch cache names: %s\n" %
+                          inst)
+        if self._names:
+            try:
+                data = repo.vfs.read(_rbcrevs)
+                self._rbcrevs.fromstring(data)
+            except (IOError, OSError), inst:
+                repo.ui.debug("couldn't read revision branch cache: %s\n" %
+                              inst)
+        # remember number of good records on disk
+        self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
+                               len(repo.changelog))
+        if self._rbcrevslen == 0:
+            self._names = []
+        self._rbcnamescount = len(self._names) # number of good names on disk
+        self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
+
+    def branchinfo(self, changelog, rev):
+        """Return branch name and close flag for rev, using and updating
+        persistent cache."""
+        rbcrevidx = rev * _rbcrecsize
+
+        # if requested rev is missing, add and populate all missing revs
+        if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
+            first = len(self._rbcrevs) // _rbcrecsize
+            self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
+                                         len(self._rbcrevs)))
+            for r in xrange(first, len(changelog)):
+                self._branchinfo(r)
+
+        # fast path: extract data from cache, use it if node is matching
+        reponode = changelog.node(rev)[:_rbcnodelen]
+        cachenode, branchidx = unpack(
+            _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
+        close = bool(branchidx & _rbccloseflag)
+        if close:
+            branchidx &= _rbcbranchidxmask
+        if cachenode == reponode:
+            return self._names[branchidx], close
+        # fall back to slow path and make sure it will be written to disk
+        self._rbcrevslen = min(self._rbcrevslen, rev)
+        return self._branchinfo(rev)
+
+    def _branchinfo(self, changelog, rev):
+        """Retrieve branch info from changelog and update _rbcrevs"""
+        b, close = changelog.branchinfo(rev)
+        if b in self._namesreverse:
+            branchidx = self._namesreverse[b]
+        else:
+            branchidx = len(self._names)
+            self._names.append(b)
+            self._namesreverse[b] = branchidx
+        reponode = changelog.node(rev)
+        if close:
+            branchidx |= _rbccloseflag
+        rbcrevidx = rev * _rbcrecsize
+        rec = array('c')
+        rec.fromstring(pack(_rbcrecfmt, reponode, branchidx))
+        self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
+        return b, close
+
+    def write(self, repo):
+        """Save branch cache if it is dirty."""
+        if self._rbcnamescount < len(self._names):
+            try:
+                if self._rbcnamescount != 0:
+                    f = repo.vfs.open(_rbcnames, 'ab')
+                    if f.tell() == self._rbcsnameslen:
+                        f.write('\0')
+                    else:
+                        f.close()
+                        self._rbcnamescount = 0
+                        self._rbcrevslen = 0
+                if self._rbcnamescount == 0:
+                    f = repo.vfs.open(_rbcnames, 'wb')
+                f.write('\0'.join(encoding.fromlocal(b)
+                                  for b in self._names[self._rbcnamescount:]))
+                self._rbcsnameslen = f.tell()
+                f.close()
+            except (IOError, OSError, util.Abort), inst:
+                repo.ui.debug("couldn't write revision branch cache names: "
+                              "%s\n" % inst)
+                return
+            self._rbcnamescount = len(self._names)
+
+        start = self._rbcrevslen * _rbcrecsize
+        if start != len(self._rbcrevs):
+            self._rbcrevslen = min(len(repo.changelog),
+                                   len(self._rbcrevs) // _rbcrecsize)
+            try:
+                f = repo.vfs.open(_rbcrevs, 'ab')
+                if f.tell() != start:
+                    f.seek(start)
+                    f.truncate()
+                end = self._rbcrevslen * _rbcrecsize
+                f.write(self._rbcrevs[start:end])
+                f.close()
+            except (IOError, OSError, util.Abort), inst:
+                repo.ui.debug("couldn't write revision branch cache: %s\n" %
+                              inst)
+                return