comparison mercurial/cmdutil.py @ 11059:ef4aa90b1e58

Move 'findrenames' code into its own file. The next few patches will increase the size of the "findrenames" functionality. This patch simply moves the function into its own file to avoid clutter building up in 'cmdutil.py'.
author David Greenaway <hg-dev@davidgreenaway.com>
date Sat, 03 Apr 2010 11:58:16 +1100
parents 5d35f7d93514
children 51d0387523c6
comparison
equal deleted inserted replaced
11058:f6dcbeb5babe 11059:ef4aa90b1e58
8 from node import hex, nullid, nullrev, short 8 from node import hex, nullid, nullrev, short
9 from i18n import _ 9 from i18n import _
10 import os, sys, errno, re, glob, tempfile 10 import os, sys, errno, re, glob, tempfile
11 import mdiff, bdiff, util, templater, patch, error, encoding, templatekw 11 import mdiff, bdiff, util, templater, patch, error, encoding, templatekw
12 import match as _match 12 import match as _match
13 import similar
13 14
14 revrangesep = ':' 15 revrangesep = ':'
15 16
16 def parsealiases(cmd): 17 def parsealiases(cmd):
17 return cmd.lstrip("^").split("|") 18 return cmd.lstrip("^").split("|")
283 def matchall(repo): 284 def matchall(repo):
284 return _match.always(repo.root, repo.getcwd()) 285 return _match.always(repo.root, repo.getcwd())
285 286
286 def matchfiles(repo, files): 287 def matchfiles(repo, files):
287 return _match.exact(repo.root, repo.getcwd(), files) 288 return _match.exact(repo.root, repo.getcwd(), files)
288
289 def findrenames(repo, added, removed, threshold):
290 '''find renamed files -- yields (before, after, score) tuples'''
291 copies = {}
292 ctx = repo['.']
293 for i, r in enumerate(removed):
294 repo.ui.progress(_('searching'), i, total=len(removed))
295 if r not in ctx:
296 continue
297 fctx = ctx.filectx(r)
298
299 # lazily load text
300 @util.cachefunc
301 def data():
302 orig = fctx.data()
303 return orig, mdiff.splitnewlines(orig)
304
305 def score(text):
306 if not len(text):
307 return 0.0
308 if not fctx.cmp(text):
309 return 1.0
310 if threshold == 1.0:
311 return 0.0
312 orig, lines = data()
313 # bdiff.blocks() returns blocks of matching lines
314 # count the number of bytes in each
315 equal = 0
316 matches = bdiff.blocks(text, orig)
317 for x1, x2, y1, y2 in matches:
318 for line in lines[y1:y2]:
319 equal += len(line)
320
321 lengths = len(text) + len(orig)
322 return equal * 2.0 / lengths
323
324 for a in added:
325 bestscore = copies.get(a, (None, threshold))[1]
326 myscore = score(repo.wread(a))
327 if myscore >= bestscore:
328 copies[a] = (r, myscore)
329 repo.ui.progress(_('searching'), None)
330
331 for dest, v in copies.iteritems():
332 source, score = v
333 yield source, dest, score
334 289
335 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None): 290 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
336 if dry_run is None: 291 if dry_run is None:
337 dry_run = opts.get('dry_run') 292 dry_run = opts.get('dry_run')
338 if similarity is None: 293 if similarity is None:
364 removed.append(abs) 319 removed.append(abs)
365 elif repo.dirstate[abs] == 'a': 320 elif repo.dirstate[abs] == 'a':
366 added.append(abs) 321 added.append(abs)
367 copies = {} 322 copies = {}
368 if similarity > 0: 323 if similarity > 0:
369 for old, new, score in findrenames(repo, added + unknown, 324 for old, new, score in similar.findrenames(repo,
370 removed + deleted, similarity): 325 added + unknown, removed + deleted, similarity):
371 if repo.ui.verbose or not m.exact(old) or not m.exact(new): 326 if repo.ui.verbose or not m.exact(old) or not m.exact(new):
372 repo.ui.status(_('recording removal of %s as rename to %s ' 327 repo.ui.status(_('recording removal of %s as rename to %s '
373 '(%d%% similar)\n') % 328 '(%d%% similar)\n') %
374 (m.rel(old), m.rel(new), score * 100)) 329 (m.rel(old), m.rel(new), score * 100))
375 copies[new] = old 330 copies[new] = old