Mercurial > public > mercurial-scm > hg-stable

diff hgext/fastannotate/context.py @ 39238:1ddb296e0dee
fastannotate: initial import from Facebook's hg-experimental I made as few changes as I could to get the tests to pass, but this was a bit involved due to some churn in the blame code since someone last gave fastannotate any TLC. There's still follow-up work here to rip out support for old versions of hg and to integrate the protocol with modern standards. Some performance numbers (all on my 2016 MacBook Pro with a 2.6Ghz i7): Mercurial mercurial/manifest.py traditional blame time: real 1.050 secs (user 0.990+0.000 sys 0.060+0.000) build cache time: real 5.900 secs (user 5.720+0.000 sys 0.110+0.000) fastannotate time: real 0.120 secs (user 0.100+0.000 sys 0.020+0.000) Mercurial mercurial/localrepo.py traditional blame time: real 3.330 secs (user 3.220+0.000 sys 0.070+0.000) build cache time: real 30.610 secs (user 30.190+0.000 sys 0.230+0.000) fastannotate time: real 0.180 secs (user 0.160+0.000 sys 0.020+0.000) mozilla-central dom/ipc/ContentParent.cpp traditional blame time: real 7.640 secs (user 7.210+0.000 sys 0.380+0.000) build cache time: real 98.650 secs (user 97.000+0.000 sys 0.950+0.000) fastannotate time: real 1.580 secs (user 1.340+0.000 sys 0.240+0.000) mozilla-central dom/base/nsDocument.cpp traditional blame time: real 17.110 secs (user 16.490+0.000 sys 0.500+0.000) build cache time: real 399.750 secs (user 394.520+0.000 sys 2.610+0.000) fastannotate time: real 1.780 secs (user 1.530+0.000 sys 0.240+0.000) So building the cache is expensive (but might be faster with xdiff enabled), but the blame results are *way* faster. Differential Revision: https://phab.mercurial-scm.org/D3994
author: Augie Fackler <augie@google.com>
date: Mon, 30 Jul 2018 22:50:00 -0400
children: 1099d9bbdf9a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/fastannotate/context.py	Mon Jul 30 22:50:00 2018 -0400
@@ -0,0 +1,823 @@
+# Copyright 2016-present Facebook. All Rights Reserved.
+#
+# context: context needed to annotate a file
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import collections
+import contextlib
+import hashlib
+import os
+
+from mercurial.i18n import _
+from mercurial import (
+    error,
+    linelog as linelogmod,
+    lock as lockmod,
+    mdiff,
+    node,
+    pycompat,
+    scmutil,
+    util,
+)
+
+from . import (
+    error as faerror,
+    revmap as revmapmod,
+)
+
+# given path, get filelog, cached
+@util.lrucachefunc
+def _getflog(repo, path):
+    return repo.file(path)
+
+# extracted from mercurial.context.basefilectx.annotate
+def _parents(f, follow=True):
+    # Cut _descendantrev here to mitigate the penalty of lazy linkrev
+    # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
+    # from the topmost introrev (= srcrev) down to p.linkrev() if it
+    # isn't an ancestor of the srcrev.
+    f._changeid
+    pl = f.parents()
+
+    # Don't return renamed parents if we aren't following.
+    if not follow:
+        pl = [p for p in pl if p.path() == f.path()]
+
+    # renamed filectx won't have a filelog yet, so set it
+    # from the cache to save time
+    for p in pl:
+        if not '_filelog' in p.__dict__:
+            p._filelog = _getflog(f._repo, p.path())
+
+    return pl
+
+# extracted from mercurial.context.basefilectx.annotate. slightly modified
+# so it takes a fctx instead of a pair of text and fctx.
+def _decorate(fctx):
+    text = fctx.data()
+    linecount = text.count('\n')
+    if text and not text.endswith('\n'):
+        linecount += 1
+    return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
+
+# extracted from mercurial.context.basefilectx.annotate. slightly modified
+# so it takes an extra "blocks" parameter calculated elsewhere, instead of
+# calculating diff here.
+def _pair(parent, child, blocks):
+    for (a1, a2, b1, b2), t in blocks:
+        # Changed blocks ('!') or blocks made only of blank lines ('~')
+        # belong to the child.
+        if t == '=':
+            child[0][b1:b2] = parent[0][a1:a2]
+    return child
+
+# like scmutil.revsingle, but with lru cache, so their states (like manifests)
+# could be reused
+_revsingle = util.lrucachefunc(scmutil.revsingle)
+
+def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
+    """(repo, str, str) -> fctx
+
+    get the filectx object from repo, rev, path, in an efficient way.
+
+    if resolverev is True, "rev" is a revision specified by the revset
+    language, otherwise "rev" is a nodeid, or a revision number that can
+    be consumed by repo.__getitem__.
+
+    if adjustctx is not None, the returned fctx will point to a changeset
+    that introduces the change (last modified the file). if adjustctx
+    is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
+    faster for big repos but is incorrect for some cases.
+    """
+    if resolverev and not isinstance(rev, int) and rev is not None:
+        ctx = _revsingle(repo, rev)
+    else:
+        ctx = repo[rev]
+
+    # If we don't need to adjust the linkrev, create the filectx using the
+    # changectx instead of using ctx[path]. This means it already has the
+    # changectx information, so blame -u will be able to look directly at the
+    # commitctx object instead of having to resolve it by going through the
+    # manifest. In a lazy-manifest world this can prevent us from downloading a
+    # lot of data.
+    if adjustctx is None:
+        # ctx.rev() is None means it's the working copy, which is a special
+        # case.
+        if ctx.rev() is None:
+            fctx = ctx[path]
+        else:
+            fctx = repo.filectx(path, changeid=ctx.rev())
+    else:
+        fctx = ctx[path]
+        if adjustctx == 'linkrev':
+            introrev = fctx.linkrev()
+        else:
+            introrev = fctx.introrev()
+        if introrev != ctx.rev():
+            fctx._changeid = introrev
+            fctx._changectx = repo[introrev]
+    return fctx
+
+# like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
+def encodedir(path):
+    return (path
+            .replace('.hg/', '.hg.hg/')
+            .replace('.l/', '.l.hg/')
+            .replace('.m/', '.m.hg/')
+            .replace('.lock/', '.lock.hg/'))
+
+def hashdiffopts(diffopts):
+    diffoptstr = str(sorted(
+        (k, getattr(diffopts, k))
+        for k in mdiff.diffopts.defaults.iterkeys()
+    ))
+    return hashlib.sha1(diffoptstr).hexdigest()[:6]
+
+_defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
+
+class annotateopts(object):
+    """like mercurial.mdiff.diffopts, but is for annotate
+
+    followrename: follow renames, like "hg annotate -f"
+    followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
+    """
+
+    defaults = {
+        'diffopts': None,
+        'followrename': True,
+        'followmerge': True,
+    }
+
+    def __init__(self, **opts):
+        for k, v in self.defaults.iteritems():
+            setattr(self, k, opts.get(k, v))
+
+    @util.propertycache
+    def shortstr(self):
+        """represent opts in a short string, suitable for a directory name"""
+        result = ''
+        if not self.followrename:
+            result += 'r0'
+        if not self.followmerge:
+            result += 'm0'
+        if self.diffopts is not None:
+            assert isinstance(self.diffopts, mdiff.diffopts)
+            diffopthash = hashdiffopts(self.diffopts)
+            if diffopthash != _defaultdiffopthash:
+                result += 'i' + diffopthash
+        return result or 'default'
+
+defaultopts = annotateopts()
+
+class _annotatecontext(object):
+    """do not use this class directly as it does not use lock to protect
+    writes. use "with annotatecontext(...)" instead.
+    """
+
+    def __init__(self, repo, path, linelogpath, revmappath, opts):
+        self.repo = repo
+        self.ui = repo.ui
+        self.path = path
+        self.opts = opts
+        self.linelogpath = linelogpath
+        self.revmappath = revmappath
+        self._linelog = None
+        self._revmap = None
+        self._node2path = {} # {str: str}
+
+    @property
+    def linelog(self):
+        if self._linelog is None:
+            if os.path.exists(self.linelogpath):
+                with open(self.linelogpath, 'rb') as f:
+                    try:
+                        self._linelog = linelogmod.linelog.fromdata(f.read())
+                    except linelogmod.LineLogError:
+                        self._linelog = linelogmod.linelog()
+            else:
+                self._linelog = linelogmod.linelog()
+        return self._linelog
+
+    @property
+    def revmap(self):
+        if self._revmap is None:
+            self._revmap = revmapmod.revmap(self.revmappath)
+        return self._revmap
+
+    def close(self):
+        if self._revmap is not None:
+            self._revmap.flush()
+            self._revmap = None
+        if self._linelog is not None:
+            with open(self.linelogpath, 'wb') as f:
+                f.write(self._linelog.encode())
+            self._linelog = None
+
+    __del__ = close
+
+    def rebuild(self):
+        """delete linelog and revmap, useful for rebuilding"""
+        self.close()
+        self._node2path.clear()
+        _unlinkpaths([self.revmappath, self.linelogpath])
+
+    @property
+    def lastnode(self):
+        """return last node in revmap, or None if revmap is empty"""
+        if self._revmap is None:
+            # fast path, read revmap without loading its full content
+            return revmapmod.getlastnode(self.revmappath)
+        else:
+            return self._revmap.rev2hsh(self._revmap.maxrev)
+
+    def isuptodate(self, master, strict=True):
+        """return True if the revmap / linelog is up-to-date, or the file
+        does not exist in the master revision. False otherwise.
+
+        it tries to be fast and could return false negatives, because of the
+        use of linkrev instead of introrev.
+
+        useful for both server and client to decide whether to update
+        fastannotate cache or not.
+
+        if strict is True, even if fctx exists in the revmap, but is not the
+        last node, isuptodate will return False. it's good for performance - no
+        expensive check was done.
+
+        if strict is False, if fctx exists in the revmap, this function may
+        return True. this is useful for the client to skip downloading the
+        cache if the client's master is behind the server's.
+        """
+        lastnode = self.lastnode
+        try:
+            f = self._resolvefctx(master, resolverev=True)
+            # choose linkrev instead of introrev as the check is meant to be
+            # *fast*.
+            linknode = self.repo.changelog.node(f.linkrev())
+            if not strict and lastnode and linknode != lastnode:
+                # check if f.node() is in the revmap. note: this loads the
+                # revmap and can be slow.
+                return self.revmap.hsh2rev(linknode) is not None
+            # avoid resolving old manifest, or slow adjustlinkrev to be fast,
+            # false negatives are acceptable in this case.
+            return linknode == lastnode
+        except LookupError:
+            # master does not have the file, or the revmap is ahead
+            return True
+
+    def annotate(self, rev, master=None, showpath=False, showlines=False):
+        """incrementally update the cache so it includes revisions in the main
+        branch till 'master'. and run annotate on 'rev', which may or may not be
+        included in the main branch.
+
+        if master is None, do not update linelog.
+
+        the first value returned is the annotate result, it is [(node, linenum)]
+        by default. [(node, linenum, path)] if showpath is True.
+
+        if showlines is True, a second value will be returned, it is a list of
+        corresponding line contents.
+        """
+
+        # the fast path test requires commit hash, convert rev number to hash,
+        # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
+        # command could give us a revision number even if the user passes a
+        # commit hash.
+        if isinstance(rev, int):
+            rev = node.hex(self.repo.changelog.node(rev))
+
+        # fast path: if rev is in the main branch already
+        directly, revfctx = self.canannotatedirectly(rev)
+        if directly:
+            if self.ui.debugflag:
+                self.ui.debug('fastannotate: %s: using fast path '
+                              '(resolved fctx: %s)\n'
+                              % (self.path, util.safehasattr(revfctx, 'node')))
+            return self.annotatedirectly(revfctx, showpath, showlines)
+
+        # resolve master
+        masterfctx = None
+        if master:
+            try:
+                masterfctx = self._resolvefctx(master, resolverev=True,
+                                               adjustctx=True)
+            except LookupError: # master does not have the file
+                pass
+            else:
+                if masterfctx in self.revmap: # no need to update linelog
+                    masterfctx = None
+
+        #                  ... - @ <- rev (can be an arbitrary changeset,
+        #                 /                not necessarily a descendant
+        #      master -> o                 of master)
+        #                |
+        #     a merge -> o         'o': new changesets in the main branch
+        #                |\        '#': revisions in the main branch that
+        #                o *            exist in linelog / revmap
+        #                | .       '*': changesets in side branches, or
+        # last master -> # .            descendants of master
+        #                | .
+        #                # *       joint: '#', and is a parent of a '*'
+        #                |/
+        #     a joint -> # ^^^^ --- side branches
+        #                |
+        #                ^ --- main branch (in linelog)
+
+        # these DFSes are similar to the traditional annotate algorithm.
+        # we cannot really reuse the code for perf reason.
+
+        # 1st DFS calculates merges, joint points, and needed.
+        # "needed" is a simple reference counting dict to free items in
+        # "hist", reducing its memory usage otherwise could be huge.
+        initvisit = [revfctx]
+        if masterfctx:
+            if masterfctx.rev() is None:
+                raise error.Abort(_('cannot update linelog to wdir()'),
+                                  hint=_('set fastannotate.mainbranch'))
+            initvisit.append(masterfctx)
+        visit = initvisit[:]
+        pcache = {}
+        needed = {revfctx: 1}
+        hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
+        while visit:
+            f = visit.pop()
+            if f in pcache or f in hist:
+                continue
+            if f in self.revmap: # in the old main branch, it's a joint
+                llrev = self.revmap.hsh2rev(f.node())
+                self.linelog.annotate(llrev)
+                result = self.linelog.annotateresult
+                hist[f] = (result, f.data())
+                continue
+            pl = self._parentfunc(f)
+            pcache[f] = pl
+            for p in pl:
+                needed[p] = needed.get(p, 0) + 1
+                if p not in pcache:
+                    visit.append(p)
+
+        # 2nd (simple) DFS calculates new changesets in the main branch
+        # ('o' nodes in # the above graph), so we know when to update linelog.
+        newmainbranch = set()
+        f = masterfctx
+        while f and f not in self.revmap:
+            newmainbranch.add(f)
+            pl = pcache[f]
+            if pl:
+                f = pl[0]
+            else:
+                f = None
+                break
+
+        # f, if present, is the position where the last build stopped at, and
+        # should be the "master" last time. check to see if we can continue
+        # building the linelog incrementally. (we cannot if diverged)
+        if masterfctx is not None:
+            self._checklastmasterhead(f)
+
+        if self.ui.debugflag:
+            if newmainbranch:
+                self.ui.debug('fastannotate: %s: %d new changesets in the main'
+                              ' branch\n' % (self.path, len(newmainbranch)))
+            elif not hist: # no joints, no updates
+                self.ui.debug('fastannotate: %s: linelog cannot help in '
+                              'annotating this revision\n' % self.path)
+
+        # prepare annotateresult so we can update linelog incrementally
+        self.linelog.annotate(self.linelog.maxrev)
+
+        # 3rd DFS does the actual annotate
+        visit = initvisit[:]
+        progress = 0
+        while visit:
+            f = visit[-1]
+            if f in hist:
+                visit.pop()
+                continue
+
+            ready = True
+            pl = pcache[f]
+            for p in pl:
+                if p not in hist:
+                    ready = False
+                    visit.append(p)
+            if not ready:
+                continue
+
+            visit.pop()
+            blocks = None # mdiff blocks, used for appending linelog
+            ismainbranch = (f in newmainbranch)
+            # curr is the same as the traditional annotate algorithm,
+            # if we only care about linear history (do not follow merge),
+            # then curr is not actually used.
+            assert f not in hist
+            curr = _decorate(f)
+            for i, p in enumerate(pl):
+                bs = list(self._diffblocks(hist[p][1], curr[1]))
+                if i == 0 and ismainbranch:
+                    blocks = bs
+                curr = _pair(hist[p], curr, bs)
+                if needed[p] == 1:
+                    del hist[p]
+                    del needed[p]
+                else:
+                    needed[p] -= 1
+
+            hist[f] = curr
+            del pcache[f]
+
+            if ismainbranch: # need to write to linelog
+                if not self.ui.quiet:
+                    progress += 1
+                    self.ui.progress(_('building cache'), progress,
+                                     total=len(newmainbranch))
+                bannotated = None
+                if len(pl) == 2 and self.opts.followmerge: # merge
+                    bannotated = curr[0]
+                if blocks is None: # no parents, add an empty one
+                    blocks = list(self._diffblocks('', curr[1]))
+                self._appendrev(f, blocks, bannotated)
+            elif showpath: # not append linelog, but we need to record path
+                self._node2path[f.node()] = f.path()
+
+        if progress: # clean progress bar
+            self.ui.write()
+
+        result = [
+            ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
+            for fr, l in hist[revfctx][0]] # [(node, linenumber)]
+        return self._refineannotateresult(result, revfctx, showpath, showlines)
+
+    def canannotatedirectly(self, rev):
+        """(str) -> bool, fctx or node.
+        return (True, f) if we can annotate without updating the linelog, pass
+        f to annotatedirectly.
+        return (False, f) if we need extra calculation. f is the fctx resolved
+        from rev.
+        """
+        result = True
+        f = None
+        if not isinstance(rev, int) and rev is not None:
+            hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
+            if hsh is not None and (hsh, self.path) in self.revmap:
+                f = hsh
+        if f is None:
+            adjustctx = 'linkrev' if self._perfhack else True
+            f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
+            result = f in self.revmap
+            if not result and self._perfhack:
+                # redo the resolution without perfhack - as we are going to
+                # do write operations, we need a correct fctx.
+                f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
+        return result, f
+
+    def annotatealllines(self, rev, showpath=False, showlines=False):
+        """(rev : str) -> [(node : str, linenum : int, path : str)]
+
+        the result has the same format with annotate, but include all (including
+        deleted) lines up to rev. call this after calling annotate(rev, ...) for
+        better performance and accuracy.
+        """
+        revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
+
+        # find a chain from rev to anything in the mainbranch
+        if revfctx not in self.revmap:
+            chain = [revfctx]
+            a = ''
+            while True:
+                f = chain[-1]
+                pl = self._parentfunc(f)
+                if not pl:
+                    break
+                if pl[0] in self.revmap:
+                    a = pl[0].data()
+                    break
+                chain.append(pl[0])
+
+            # both self.linelog and self.revmap is backed by filesystem. now
+            # we want to modify them but do not want to write changes back to
+            # files. so we create in-memory objects and copy them. it's like
+            # a "fork".
+            linelog = linelogmod.linelog()
+            linelog.copyfrom(self.linelog)
+            linelog.annotate(linelog.maxrev)
+            revmap = revmapmod.revmap()
+            revmap.copyfrom(self.revmap)
+
+            for f in reversed(chain):
+                b = f.data()
+                blocks = list(self._diffblocks(a, b))
+                self._doappendrev(linelog, revmap, f, blocks)
+                a = b
+        else:
+            # fastpath: use existing linelog, revmap as we don't write to them
+            linelog = self.linelog
+            revmap = self.revmap
+
+        lines = linelog.getalllines()
+        hsh = revfctx.node()
+        llrev = revmap.hsh2rev(hsh)
+        result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
+        # cannot use _refineannotateresult since we need custom logic for
+        # resolving line contents
+        if showpath:
+            result = self._addpathtoresult(result, revmap)
+        if showlines:
+            linecontents = self._resolvelines(result, revmap, linelog)
+            result = (result, linecontents)
+        return result
+
+    def _resolvelines(self, annotateresult, revmap, linelog):
+        """(annotateresult) -> [line]. designed for annotatealllines.
+        this is probably the most inefficient code in the whole fastannotate
+        directory. but we have made a decision that the linelog does not
+        store line contents. so getting them requires random accesses to
+        the revlog data, since they can be many, it can be very slow.
+        """
+        # [llrev]
+        revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
+        result = [None] * len(annotateresult)
+        # {(rev, linenum): [lineindex]}
+        key2idxs = collections.defaultdict(list)
+        for i in pycompat.xrange(len(result)):
+            key2idxs[(revs[i], annotateresult[i][1])].append(i)
+        while key2idxs:
+            # find an unresolved line and its linelog rev to annotate
+            hsh = None
+            try:
+                for (rev, _linenum), idxs in key2idxs.iteritems():
+                    if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
+                        continue
+                    hsh = annotateresult[idxs[0]][0]
+                    break
+            except StopIteration: # no more unresolved lines
+                return result
+            if hsh is None:
+                # the remaining key2idxs are not in main branch, resolving them
+                # using the hard way...
+                revlines = {}
+                for (rev, linenum), idxs in key2idxs.iteritems():
+                    if rev not in revlines:
+                        hsh = annotateresult[idxs[0]][0]
+                        if self.ui.debugflag:
+                            self.ui.debug('fastannotate: reading %s line #%d '
+                                          'to resolve lines %r\n'
+                                          % (node.short(hsh), linenum, idxs))
+                        fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
+                        lines = mdiff.splitnewlines(fctx.data())
+                        revlines[rev] = lines
+                    for idx in idxs:
+                        result[idx] = revlines[rev][linenum]
+                assert all(x is not None for x in result)
+                return result
+
+            # run the annotate and the lines should match to the file content
+            self.ui.debug('fastannotate: annotate %s to resolve lines\n'
+                          % node.short(hsh))
+            linelog.annotate(rev)
+            fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
+            annotated = linelog.annotateresult
+            lines = mdiff.splitnewlines(fctx.data())
+            if len(lines) != len(annotated):
+                raise faerror.CorruptedFileError('unexpected annotated lines')
+            # resolve lines from the annotate result
+            for i, line in enumerate(lines):
+                k = annotated[i]
+                if k in key2idxs:
+                    for idx in key2idxs[k]:
+                        result[idx] = line
+                    del key2idxs[k]
+        return result
+
+    def annotatedirectly(self, f, showpath, showlines):
+        """like annotate, but when we know that f is in linelog.
+        f can be either a 20-char str (node) or a fctx. this is for perf - in
+        the best case, the user provides a node and we don't need to read the
+        filelog or construct any filecontext.
+        """
+        if isinstance(f, str):
+            hsh = f
+        else:
+            hsh = f.node()
+        llrev = self.revmap.hsh2rev(hsh)
+        if not llrev:
+            raise faerror.CorruptedFileError('%s is not in revmap'
+                                             % node.hex(hsh))
+        if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
+            raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
+                                             % node.hex(hsh))
+        self.linelog.annotate(llrev)
+        result = [(self.revmap.rev2hsh(r), l)
+                  for r, l in self.linelog.annotateresult]
+        return self._refineannotateresult(result, f, showpath, showlines)
+
+    def _refineannotateresult(self, result, f, showpath, showlines):
+        """add the missing path or line contents, they can be expensive.
+        f could be either node or fctx.
+        """
+        if showpath:
+            result = self._addpathtoresult(result)
+        if showlines:
+            if isinstance(f, str): # f: node or fctx
+                llrev = self.revmap.hsh2rev(f)
+                fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
+            else:
+                fctx = f
+            lines = mdiff.splitnewlines(fctx.data())
+            if len(lines) != len(result): # linelog is probably corrupted
+                raise faerror.CorruptedFileError()
+            result = (result, lines)
+        return result
+
+    def _appendrev(self, fctx, blocks, bannotated=None):
+        self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
+
+    def _diffblocks(self, a, b):
+        return mdiff.allblocks(a, b, self.opts.diffopts)
+
+    @staticmethod
+    def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
+        """append a revision to linelog and revmap"""
+
+        def getllrev(f):
+            """(fctx) -> int"""
+            # f should not be a linelog revision
+            if isinstance(f, int):
+                raise error.ProgrammingError('f should not be an int')
+            # f is a fctx, allocate linelog rev on demand
+            hsh = f.node()
+            rev = revmap.hsh2rev(hsh)
+            if rev is None:
+                rev = revmap.append(hsh, sidebranch=True, path=f.path())
+            return rev
+
+        # append sidebranch revisions to revmap
+        siderevs = []
+        siderevmap = {} # node: int
+        if bannotated is not None:
+            for (a1, a2, b1, b2), op in blocks:
+                if op != '=':
+                    # f could be either linelong rev, or fctx.
+                    siderevs += [f for f, l in bannotated[b1:b2]
+                                 if not isinstance(f, int)]
+        siderevs = set(siderevs)
+        if fctx in siderevs: # mainnode must be appended seperately
+            siderevs.remove(fctx)
+        for f in siderevs:
+            siderevmap[f] = getllrev(f)
+
+        # the changeset in the main branch, could be a merge
+        llrev = revmap.append(fctx.node(), path=fctx.path())
+        siderevmap[fctx] = llrev
+
+        for (a1, a2, b1, b2), op in reversed(blocks):
+            if op == '=':
+                continue
+            if bannotated is None:
+                linelog.replacelines(llrev, a1, a2, b1, b2)
+            else:
+                blines = [((r if isinstance(r, int) else siderevmap[r]), l)
+                          for r, l in bannotated[b1:b2]]
+                linelog.replacelines_vec(llrev, a1, a2, blines)
+
+    def _addpathtoresult(self, annotateresult, revmap=None):
+        """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
+        if revmap is None:
+            revmap = self.revmap
+
+        def _getpath(nodeid):
+            path = self._node2path.get(nodeid)
+            if path is None:
+                path = revmap.rev2path(revmap.hsh2rev(nodeid))
+                self._node2path[nodeid] = path
+            return path
+
+        return [(n, l, _getpath(n)) for n, l in annotateresult]
+
+    def _checklastmasterhead(self, fctx):
+        """check if fctx is the master's head last time, raise if not"""
+        if fctx is None:
+            llrev = 0
+        else:
+            llrev = self.revmap.hsh2rev(fctx.node())
+            if not llrev:
+                raise faerror.CannotReuseError()
+        if self.linelog.maxrev != llrev:
+            raise faerror.CannotReuseError()
+
+    @util.propertycache
+    def _parentfunc(self):
+        """-> (fctx) -> [fctx]"""
+        followrename = self.opts.followrename
+        followmerge = self.opts.followmerge
+        def parents(f):
+            pl = _parents(f, follow=followrename)
+            if not followmerge:
+                pl = pl[:1]
+            return pl
+        return parents
+
+    @util.propertycache
+    def _perfhack(self):
+        return self.ui.configbool('fastannotate', 'perfhack')
+
+    def _resolvefctx(self, rev, path=None, **kwds):
+        return resolvefctx(self.repo, rev, (path or self.path), **kwds)
+
+def _unlinkpaths(paths):
+    """silent, best-effort unlink"""
+    for path in paths:
+        try:
+            util.unlink(path)
+        except OSError:
+            pass
+
+class pathhelper(object):
+    """helper for getting paths for lockfile, linelog and revmap"""
+
+    def __init__(self, repo, path, opts=defaultopts):
+        # different options use different directories
+        self._vfspath = os.path.join('fastannotate',
+                                     opts.shortstr, encodedir(path))
+        self._repo = repo
+
+    @property
+    def dirname(self):
+        return os.path.dirname(self._repo.vfs.join(self._vfspath))
+
+    @property
+    def linelogpath(self):
+        return self._repo.vfs.join(self._vfspath + '.l')
+
+    def lock(self):
+        return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
+
+    @contextlib.contextmanager
+    def _lockflock(self):
+        """the same as 'lock' but use flock instead of lockmod.lock, to avoid
+        creating temporary symlinks."""
+        import fcntl
+        lockpath = self.linelogpath
+        util.makedirs(os.path.dirname(lockpath))
+        lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
+        fcntl.flock(lockfd, fcntl.LOCK_EX)
+        try:
+            yield
+        finally:
+            fcntl.flock(lockfd, fcntl.LOCK_UN)
+            os.close(lockfd)
+
+    @property
+    def revmappath(self):
+        return self._repo.vfs.join(self._vfspath + '.m')
+
+@contextlib.contextmanager
+def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
+    """context needed to perform (fast) annotate on a file
+
+    an annotatecontext of a single file consists of two structures: the
+    linelog and the revmap. this function takes care of locking. only 1
+    process is allowed to write that file's linelog and revmap at a time.
+
+    when something goes wrong, this function will assume the linelog and the
+    revmap are in a bad state, and remove them from disk.
+
+    use this function in the following way:
+
+        with annotatecontext(...) as actx:
+            actx. ....
+    """
+    helper = pathhelper(repo, path, opts)
+    util.makedirs(helper.dirname)
+    revmappath = helper.revmappath
+    linelogpath = helper.linelogpath
+    actx = None
+    try:
+        with helper.lock():
+            actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
+            if rebuild:
+                actx.rebuild()
+            yield actx
+    except Exception:
+        if actx is not None:
+            actx.rebuild()
+        repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
+        raise
+    finally:
+        if actx is not None:
+            actx.close()
+
+def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
+    """like annotatecontext but get the context from a fctx. convenient when
+    used in fctx.annotate
+    """
+    repo = fctx._repo
+    path = fctx._path
+    if repo.ui.configbool('fastannotate', 'forcefollow', True):
+        follow = True
+    aopts = annotateopts(diffopts=diffopts, followrename=follow)
+    return annotatecontext(repo, path, aopts, rebuild)
author	Augie Fackler <augie@google.com>
date	Mon, 30 Jul 2018 22:50:00 -0400
parents
children	1099d9bbdf9a