diff hgext/fastannotate/commands.py @ 39210:1ddb296e0dee

fastannotate: initial import from Facebook's hg-experimental I made as few changes as I could to get the tests to pass, but this was a bit involved due to some churn in the blame code since someone last gave fastannotate any TLC. There's still follow-up work here to rip out support for old versions of hg and to integrate the protocol with modern standards. Some performance numbers (all on my 2016 MacBook Pro with a 2.6Ghz i7): Mercurial mercurial/manifest.py traditional blame time: real 1.050 secs (user 0.990+0.000 sys 0.060+0.000) build cache time: real 5.900 secs (user 5.720+0.000 sys 0.110+0.000) fastannotate time: real 0.120 secs (user 0.100+0.000 sys 0.020+0.000) Mercurial mercurial/localrepo.py traditional blame time: real 3.330 secs (user 3.220+0.000 sys 0.070+0.000) build cache time: real 30.610 secs (user 30.190+0.000 sys 0.230+0.000) fastannotate time: real 0.180 secs (user 0.160+0.000 sys 0.020+0.000) mozilla-central dom/ipc/ContentParent.cpp traditional blame time: real 7.640 secs (user 7.210+0.000 sys 0.380+0.000) build cache time: real 98.650 secs (user 97.000+0.000 sys 0.950+0.000) fastannotate time: real 1.580 secs (user 1.340+0.000 sys 0.240+0.000) mozilla-central dom/base/nsDocument.cpp traditional blame time: real 17.110 secs (user 16.490+0.000 sys 0.500+0.000) build cache time: real 399.750 secs (user 394.520+0.000 sys 2.610+0.000) fastannotate time: real 1.780 secs (user 1.530+0.000 sys 0.240+0.000) So building the cache is expensive (but might be faster with xdiff enabled), but the blame results are *way* faster. Differential Revision: https://phab.mercurial-scm.org/D3994
author Augie Fackler <augie@google.com>
date Mon, 30 Jul 2018 22:50:00 -0400
parents
children b3572f733dbd
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/fastannotate/commands.py	Mon Jul 30 22:50:00 2018 -0400
@@ -0,0 +1,281 @@
+# Copyright 2016-present Facebook. All Rights Reserved.
+#
+# commands: fastannotate commands
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import os
+
+from mercurial.i18n import _
+from mercurial import (
+    commands,
+    error,
+    extensions,
+    patch,
+    pycompat,
+    registrar,
+    scmutil,
+    util,
+)
+
+from . import (
+    context as facontext,
+    error as faerror,
+    formatter as faformatter,
+)
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+def _matchpaths(repo, rev, pats, opts, aopts=facontext.defaultopts):
+    """generate paths matching given patterns"""
+    perfhack = repo.ui.configbool('fastannotate', 'perfhack')
+
+    # disable perfhack if:
+    # a) any walkopt is used
+    # b) if we treat pats as plain file names, some of them do not have
+    #    corresponding linelog files
+    if perfhack:
+        # cwd related to reporoot
+        reporoot = os.path.dirname(repo.path)
+        reldir = os.path.relpath(pycompat.getcwd(), reporoot)
+        if reldir == '.':
+            reldir = ''
+        if any(opts.get(o[1]) for o in commands.walkopts): # a)
+            perfhack = False
+        else: # b)
+            relpats = [os.path.relpath(p, reporoot) if os.path.isabs(p) else p
+                       for p in pats]
+            # disable perfhack on '..' since it allows escaping from the repo
+            if any(('..' in f or
+                    not os.path.isfile(
+                        facontext.pathhelper(repo, f, aopts).linelogpath))
+                   for f in relpats):
+                perfhack = False
+
+    # perfhack: emit paths directory without checking with manifest
+    # this can be incorrect if the rev dos not have file.
+    if perfhack:
+        for p in relpats:
+            yield os.path.join(reldir, p)
+    else:
+        def bad(x, y):
+            raise error.Abort("%s: %s" % (x, y))
+        ctx = scmutil.revsingle(repo, rev)
+        m = scmutil.match(ctx, pats, opts, badfn=bad)
+        for p in ctx.walk(m):
+            yield p
+
+fastannotatecommandargs = {
+    'options': [
+        ('r', 'rev', '.', _('annotate the specified revision'), _('REV')),
+        ('u', 'user', None, _('list the author (long with -v)')),
+        ('f', 'file', None, _('list the filename')),
+        ('d', 'date', None, _('list the date (short with -q)')),
+        ('n', 'number', None, _('list the revision number (default)')),
+        ('c', 'changeset', None, _('list the changeset')),
+        ('l', 'line-number', None, _('show line number at the first '
+                                     'appearance')),
+        ('e', 'deleted', None, _('show deleted lines (slow) (EXPERIMENTAL)')),
+        ('', 'no-content', None, _('do not show file content (EXPERIMENTAL)')),
+        ('', 'no-follow', None, _("don't follow copies and renames")),
+        ('', 'linear', None, _('enforce linear history, ignore second parent '
+                               'of merges (EXPERIMENTAL)')),
+        ('', 'long-hash', None, _('show long changeset hash (EXPERIMENTAL)')),
+        ('', 'rebuild', None, _('rebuild cache even if it exists '
+                                '(EXPERIMENTAL)')),
+    ] + commands.diffwsopts + commands.walkopts + commands.formatteropts,
+    'synopsis': _('[-r REV] [-f] [-a] [-u] [-d] [-n] [-c] [-l] FILE...'),
+    'inferrepo': True,
+}
+
+def fastannotate(ui, repo, *pats, **opts):
+    """show changeset information by line for each file
+
+    List changes in files, showing the revision id responsible for each line.
+
+    This command is useful for discovering when a change was made and by whom.
+
+    By default this command prints revision numbers. If you include --file,
+    --user, or --date, the revision number is suppressed unless you also
+    include --number. The default format can also be customized by setting
+    fastannotate.defaultformat.
+
+    Returns 0 on success.
+
+    .. container:: verbose
+
+        This command uses an implementation different from the vanilla annotate
+        command, which may produce slightly different (while still reasonable)
+        outputs for some cases.
+
+        Unlike the vanilla anootate, fastannotate follows rename regardless of
+        the existence of --file.
+
+        For the best performance when running on a full repo, use -c, -l,
+        avoid -u, -d, -n. Use --linear and --no-content to make it even faster.
+
+        For the best performance when running on a shallow (remotefilelog)
+        repo, avoid --linear, --no-follow, or any diff options. As the server
+        won't be able to populate annotate cache when non-default options
+        affecting results are used.
+    """
+    if not pats:
+        raise error.Abort(_('at least one filename or pattern is required'))
+
+    # performance hack: filtered repo can be slow. unfilter by default.
+    if ui.configbool('fastannotate', 'unfilteredrepo'):
+        repo = repo.unfiltered()
+
+    rev = opts.get('rev', '.')
+    rebuild = opts.get('rebuild', False)
+
+    diffopts = patch.difffeatureopts(ui, opts, section='annotate',
+                                     whitespace=True)
+    aopts = facontext.annotateopts(
+        diffopts=diffopts,
+        followmerge=not opts.get('linear', False),
+        followrename=not opts.get('no_follow', False))
+
+    if not any(opts.get(s)
+               for s in ['user', 'date', 'file', 'number', 'changeset']):
+        # default 'number' for compatibility. but fastannotate is more
+        # efficient with "changeset", "line-number" and "no-content".
+        for name in ui.configlist('fastannotate', 'defaultformat', ['number']):
+            opts[name] = True
+
+    ui.pager('fastannotate')
+    template = opts.get('template')
+    if template == 'json':
+        formatter = faformatter.jsonformatter(ui, repo, opts)
+    else:
+        formatter = faformatter.defaultformatter(ui, repo, opts)
+    showdeleted = opts.get('deleted', False)
+    showlines = not bool(opts.get('no_content'))
+    showpath = opts.get('file', False)
+
+    # find the head of the main (master) branch
+    master = ui.config('fastannotate', 'mainbranch') or rev
+
+    # paths will be used for prefetching and the real annotating
+    paths = list(_matchpaths(repo, rev, pats, opts, aopts))
+
+    # for client, prefetch from the server
+    if util.safehasattr(repo, 'prefetchfastannotate'):
+        repo.prefetchfastannotate(paths)
+
+    for path in paths:
+        result = lines = existinglines = None
+        while True:
+            try:
+                with facontext.annotatecontext(repo, path, aopts, rebuild) as a:
+                    result = a.annotate(rev, master=master, showpath=showpath,
+                                        showlines=(showlines and
+                                                   not showdeleted))
+                    if showdeleted:
+                        existinglines = set((l[0], l[1]) for l in result)
+                        result = a.annotatealllines(
+                            rev, showpath=showpath, showlines=showlines)
+                break
+            except (faerror.CannotReuseError, faerror.CorruptedFileError):
+                # happens if master moves backwards, or the file was deleted
+                # and readded, or renamed to an existing name, or corrupted.
+                if rebuild: # give up since we have tried rebuild already
+                    raise
+                else: # try a second time rebuilding the cache (slow)
+                    rebuild = True
+                    continue
+
+        if showlines:
+            result, lines = result
+
+        formatter.write(result, lines, existinglines=existinglines)
+    formatter.end()
+
+_newopts = set([])
+_knownopts = set([opt[1].replace('-', '_') for opt in
+                  (fastannotatecommandargs['options'] + commands.globalopts)])
+
+def _annotatewrapper(orig, ui, repo, *pats, **opts):
+    """used by wrapdefault"""
+    # we need this hack until the obsstore has 0.0 seconds perf impact
+    if ui.configbool('fastannotate', 'unfilteredrepo'):
+        repo = repo.unfiltered()
+
+    # treat the file as text (skip the isbinary check)
+    if ui.configbool('fastannotate', 'forcetext'):
+        opts['text'] = True
+
+    # check if we need to do prefetch (client-side)
+    rev = opts.get('rev')
+    if util.safehasattr(repo, 'prefetchfastannotate') and rev is not None:
+        paths = list(_matchpaths(repo, rev, pats, opts))
+        repo.prefetchfastannotate(paths)
+
+    return orig(ui, repo, *pats, **opts)
+
+def registercommand():
+    """register the fastannotate command"""
+    name = '^fastannotate|fastblame|fa'
+    command(name, **fastannotatecommandargs)(fastannotate)
+
+def wrapdefault():
+    """wrap the default annotate command, to be aware of the protocol"""
+    extensions.wrapcommand(commands.table, 'annotate', _annotatewrapper)
+
+@command('debugbuildannotatecache',
+         [('r', 'rev', '', _('build up to the specific revision'), _('REV'))
+         ] + commands.walkopts,
+         _('[-r REV] FILE...'))
+def debugbuildannotatecache(ui, repo, *pats, **opts):
+    """incrementally build fastannotate cache up to REV for specified files
+
+    If REV is not specified, use the config 'fastannotate.mainbranch'.
+
+    If fastannotate.client is True, download the annotate cache from the
+    server. Otherwise, build the annotate cache locally.
+
+    The annotate cache will be built using the default diff and follow
+    options and lives in '.hg/fastannotate/default'.
+    """
+    rev = opts.get('REV') or ui.config('fastannotate', 'mainbranch')
+    if not rev:
+        raise error.Abort(_('you need to provide a revision'),
+                          hint=_('set fastannotate.mainbranch or use --rev'))
+    if ui.configbool('fastannotate', 'unfilteredrepo'):
+        repo = repo.unfiltered()
+    ctx = scmutil.revsingle(repo, rev)
+    m = scmutil.match(ctx, pats, opts)
+    paths = list(ctx.walk(m))
+    if util.safehasattr(repo, 'prefetchfastannotate'):
+        # client
+        if opts.get('REV'):
+            raise error.Abort(_('--rev cannot be used for client'))
+        repo.prefetchfastannotate(paths)
+    else:
+        # server, or full repo
+        for i, path in enumerate(paths):
+            ui.progress(_('building'), i, total=len(paths))
+            with facontext.annotatecontext(repo, path) as actx:
+                try:
+                    if actx.isuptodate(rev):
+                        continue
+                    actx.annotate(rev, rev)
+                except (faerror.CannotReuseError, faerror.CorruptedFileError):
+                    # the cache is broken (could happen with renaming so the
+                    # file history gets invalidated). rebuild and try again.
+                    ui.debug('fastannotate: %s: rebuilding broken cache\n'
+                             % path)
+                    actx.rebuild()
+                    try:
+                        actx.annotate(rev, rev)
+                    except Exception as ex:
+                        # possibly a bug, but should not stop us from building
+                        # cache for other files.
+                        ui.warn(_('fastannotate: %s: failed to '
+                                  'build cache: %r\n') % (path, ex))
+        # clear the progress bar
+        ui.write()