comparison mercurial/cmdutil.py @ 9925:9dfe34bf42c7

findrenames: first loop over the removed files, it's faster Getting the file from the working dir is less expensive than getting it from the repo history, hence the speedup. benchmarked on crew repo with: rm -rf * ; hg up -C ; for i in `find . -name "*.py"` ; do mv $i $i.new;done followed by: hg addremove -s 100 before: Time: real 68.760 secs (user 65.760+0.000 sys 2.490+0.000) after : Time: real 28.890 secs (user 26.920+0.000 sys 1.450+0.000)
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Tue, 24 Nov 2009 17:26:42 +0100
parents 2c2f7593ffc4
children 4b044b81cb54
comparison
equal deleted inserted replaced
9916:3d718761157b 9925:9dfe34bf42c7
268 def matchfiles(repo, files): 268 def matchfiles(repo, files):
269 return _match.exact(repo.root, repo.getcwd(), files) 269 return _match.exact(repo.root, repo.getcwd(), files)
270 270
271 def findrenames(repo, added, removed, threshold): 271 def findrenames(repo, added, removed, threshold):
272 '''find renamed files -- yields (before, after, score) tuples''' 272 '''find renamed files -- yields (before, after, score) tuples'''
273 copies = {}
273 ctx = repo['.'] 274 ctx = repo['.']
274 for a in added: 275 for r in removed:
275 aa = repo.wread(a) 276 if r not in ctx:
276 bestname, bestscore = None, threshold 277 continue
277 for r in removed: 278 fctx = ctx.filectx(r)
278 if r not in ctx: 279 rr = fctx.data()
279 continue 280 for a in added:
280 rr = ctx.filectx(r).data() 281 bestscore = copies.get(a, (None, threshold))[1]
281 282 aa = repo.wread(a)
282 # bdiff.blocks() returns blocks of matching lines 283 # bdiff.blocks() returns blocks of matching lines
283 # count the number of bytes in each 284 # count the number of bytes in each
284 equal = 0 285 equal = 0
285 alines = mdiff.splitnewlines(aa) 286 alines = mdiff.splitnewlines(aa)
286 matches = bdiff.blocks(aa, rr) 287 matches = bdiff.blocks(aa, rr)
290 291
291 lengths = len(aa) + len(rr) 292 lengths = len(aa) + len(rr)
292 if lengths: 293 if lengths:
293 myscore = equal*2.0 / lengths 294 myscore = equal*2.0 / lengths
294 if myscore >= bestscore: 295 if myscore >= bestscore:
295 bestname, bestscore = r, myscore 296 copies[a] = (r, myscore)
296 if bestname: 297 for dest, v in copies.iteritems():
297 yield bestname, a, bestscore 298 source, score = v
299 yield source, dest, score
298 300
299 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None): 301 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
300 if dry_run is None: 302 if dry_run is None:
301 dry_run = opts.get('dry_run') 303 dry_run = opts.get('dry_run')
302 if similarity is None: 304 if similarity is None: