Mercurial > public > mercurial-scm > hg-stable
diff mercurial/cmdutil.py @ 4135:6cb6cfe43c5d
Avoid some false positives for addremove -s
The original code uses the similary score
1 - len(diff(after, before)) / len(after)
The diff can at most be the size of the 'before' file, so any small
'before' file would be considered very similar. Removing an empty file
would cause all files added in the same revision to be considered
copies of the removed file.
This changes the metric to
bytes_overlap(before, after) / len(before + after)
i.e. the actual percentage of bytes shared between the two files.
author | Erling Ellingsen <erlingalf@gmail.com> |
---|---|
date | Sun, 18 Feb 2007 20:39:25 +0100 |
parents | 431f3c1d3a37 |
children | eb5d4fec1487 |
line wrap: on
line diff
--- a/mercurial/cmdutil.py Sun Mar 04 09:03:21 2007 -0300 +++ b/mercurial/cmdutil.py Sun Feb 18 20:39:25 2007 +0100 @@ -7,7 +7,7 @@ from node import * from i18n import _ -import os, sys, mdiff, util, templater, patch +import os, sys, mdiff, bdiff, util, templater, patch revrangesep = ':' @@ -146,20 +146,29 @@ yield src, fn, util.pathto(repo.getcwd(), fn), fn in exact def findrenames(repo, added=None, removed=None, threshold=0.5): + '''find renamed files -- yields (before, after, score) tuples''' if added is None or removed is None: added, removed = repo.status()[1:3] ctx = repo.changectx() for a in added: aa = repo.wread(a) - bestscore, bestname = None, None + bestname, bestscore = None, threshold for r in removed: rr = ctx.filectx(r).data() - delta = mdiff.textdiff(aa, rr) - if len(delta) < len(aa): - myscore = 1.0 - (float(len(delta)) / len(aa)) - if bestscore is None or myscore > bestscore: - bestscore, bestname = myscore, r - if bestname and bestscore >= threshold: + + # bdiff.blocks() returns blocks of matching lines + # count the number of bytes in each + equal = 0 + alines = mdiff.splitnewlines(aa) + matches = bdiff.blocks(aa, rr) + for x1,x2,y1,y2 in matches: + for line in alines[x1:x2]: + equal += len(line) + + myscore = equal*2.0 / (len(aa)+len(rr)) + if myscore >= bestscore: + bestname, bestscore = r, myscore + if bestname: yield bestname, a, bestscore def addremove(repo, pats=[], opts={}, wlock=None, dry_run=None,