Mercurial > public > mercurial-scm > hg-stable
view contrib/churn.py @ 4531:b51a8138292a
Avoid extra filelogs entries.
Right now, there are some situations in which localrepo.filecommit can
create filelog entries even though they're not needed. For example:
- permissions for a file have changed;
- qrefresh can create a filelog entry identical to its parent (see the
added test);
- convert-repo creates extra filelog entries in every merge where the
first parent has added files (for example, changeset ebebe9577a1a of
the kernel repo added extra filelog entries to files in the
arch/blackfin directory, even though the merge should only touch the
drivers/ata directory). This makes "hg log file" in a converted repo
less useful than it could be, since it may mention many merges that
don't actually touch that specific file.
They all come from the same basic problem: localrepo.commit (through
filecommit) creates new filelog entries for all files passed to it
(except for some cases during a merge).
Patch and test case provided by Benoit.
This should fix issue351.
author | Alexis S. L. Carvalho <alexis@cecm.usp.br> |
---|---|
date | Sat, 09 Jun 2007 01:04:28 -0300 |
parents | ba45041827a2 |
children | 9bbc0217209b |
line wrap: on
line source
# churn.py - create a graph showing who changed the most lines # # Copyright 2006 Josef "Jeff" Sipek <jeffpc@josefsipek.net> # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. # # # Aliases map file format is simple one alias per line in the following # format: # # <alias email> <actual email> import sys from mercurial.i18n import gettext as _ from mercurial import hg, mdiff, cmdutil, ui, util, templater, node def __gather(ui, repo, node1, node2): def dirtywork(f, mmap1, mmap2): lines = 0 to = mmap1 and repo.file(f).read(mmap1[f]) or None tn = mmap2 and repo.file(f).read(mmap2[f]) or None diff = mdiff.unidiff(to, "", tn, "", f).split("\n") for line in diff: if not line: continue # skip EOF if line.startswith(" "): continue # context line if line.startswith("--- ") or line.startswith("+++ "): continue # begining of diff if line.startswith("@@ "): continue # info line # changed lines lines += 1 return lines ## lines = 0 changes = repo.status(node1, node2, None, util.always)[:5] modified, added, removed, deleted, unknown = changes who = repo.changelog.read(node2)[1] who = templater.email(who) # get the email of the person mmap1 = repo.manifest.read(repo.changelog.read(node1)[0]) mmap2 = repo.manifest.read(repo.changelog.read(node2)[0]) for f in modified: lines += dirtywork(f, mmap1, mmap2) for f in added: lines += dirtywork(f, None, mmap2) for f in removed: lines += dirtywork(f, mmap1, None) for f in deleted: lines += dirtywork(f, mmap1, mmap2) for f in unknown: lines += dirtywork(f, mmap1, mmap2) return (who, lines) def gather_stats(ui, repo, amap, revs=None, progress=False): stats = {} cl = repo.changelog if not revs: revs = range(0, cl.count()) nr_revs = len(revs) cur_rev = 0 for rev in revs: cur_rev += 1 # next revision node2 = cl.node(rev) node1 = cl.parents(node2)[0] if cl.parents(node2)[1] != node.nullid: ui.note(_('Revision %d is a merge, ignoring...\n') % (rev,)) continue who, lines = __gather(ui, repo, node1, node2) # remap the owner if possible if amap.has_key(who): ui.note("using '%s' alias for '%s'\n" % (amap[who], who)) who = amap[who] if not stats.has_key(who): stats[who] = 0 stats[who] += lines ui.note("rev %d: %d lines by %s\n" % (rev, lines, who)) if progress: if int(100.0*(cur_rev - 1)/nr_revs) < int(100.0*cur_rev/nr_revs): ui.write("%d%%.." % (int(100.0*cur_rev/nr_revs),)) sys.stdout.flush() if progress: ui.write("done\n") sys.stdout.flush() return stats def churn(ui, repo, **opts): "Graphs the number of lines changed" def pad(s, l): if len(s) < l: return s + " " * (l-len(s)) return s[0:l] def graph(n, maximum, width, char): n = int(n * width / float(maximum)) return char * (n) def get_aliases(f): aliases = {} for l in f.readlines(): l = l.strip() alias, actual = l.split(" ") aliases[alias] = actual return aliases amap = {} aliases = opts.get('aliases') if aliases: try: f = open(aliases,"r") except OSError, e: print "Error: " + e return amap = get_aliases(f) f.close() revs = [int(r) for r in cmdutil.revrange(repo, opts['rev'])] revs.sort() stats = gather_stats(ui, repo, amap, revs, opts.get('progress')) # make a list of tuples (name, lines) and sort it in descending order ordered = stats.items() ordered.sort(lambda x, y: cmp(y[1], x[1])) maximum = ordered[0][1] ui.note("Assuming 80 character terminal\n") width = 80 - 1 for i in ordered: person = i[0] lines = i[1] print "%s %6d %s" % (pad(person, 20), lines, graph(lines, maximum, width - 20 - 1 - 6 - 2 - 2, '*')) cmdtable = { "churn": (churn, [('r', 'rev', [], _('limit statistics to the specified revisions')), ('', 'aliases', '', _('file with email aliases')), ('', 'progress', None, _('show progress'))], 'hg churn [-r revision range] [-a file] [--progress]'), }