diff -r 0cfa7d9b889c -r 036d47d7cf39 mercurial/copies.py --- a/mercurial/copies.py Fri Jun 30 03:36:46 2017 +0200 +++ b/mercurial/copies.py Sun Sep 03 03:49:15 2017 +0530 @@ -7,7 +7,9 @@ from __future__ import absolute_import +import collections import heapq +import os from . import ( match as matchmod, @@ -364,6 +366,8 @@ # rebase. if copytracing == 'off': return {}, {}, {}, {}, {} + elif copytracing == 'heuristics': + return _heuristicscopytracing(repo, c1, c2, base) else: return _fullcopytracing(repo, c1, c2, base) @@ -599,6 +603,94 @@ return copy, movewithdir, diverge, renamedelete, dirmove +def _heuristicscopytracing(repo, c1, c2, base): + """ Fast copytracing using filename heuristics + + Assumes that moves or renames are of following two types: + + 1) Inside a directory only (same directory name but different filenames) + 2) Move from one directory to another + (same filenames but different directory names) + + Works only when there are no merge commits in the "source branch". + Source branch is commits from base up to c2 not including base. + + If merge is involved it fallbacks to _fullcopytracing(). + + Can be used by setting the following config: + + [experimental] + copytrace = heuristics + """ + + if c1.rev() is None: + c1 = c1.p1() + if c2.rev() is None: + c2 = c2.p1() + + copies = {} + + changedfiles = set() + m1 = c1.manifest() + if not repo.revs('%d::%d', base.rev(), c2.rev()): + # If base is not in c2 branch, we switch to fullcopytracing + repo.ui.debug("switching to full copytracing as base is not " + "an ancestor of c2\n") + return _fullcopytracing(repo, c1, c2, base) + + ctx = c2 + while ctx != base: + if len(ctx.parents()) == 2: + # To keep things simple let's not handle merges + repo.ui.debug("switching to full copytracing because of merges\n") + return _fullcopytracing(repo, c1, c2, base) + changedfiles.update(ctx.files()) + ctx = ctx.p1() + + cp = _forwardcopies(base, c2) + for dst, src in cp.iteritems(): + if src in m1: + copies[dst] = src + + # file is missing if it isn't present in the destination, but is present in + # the base and present in the source. + # Presence in the base is important to exclude added files, presence in the + # source is important to exclude removed files. + missingfiles = filter(lambda f: f not in m1 and f in base and f in c2, + changedfiles) + + if missingfiles: + basenametofilename = collections.defaultdict(list) + dirnametofilename = collections.defaultdict(list) + + for f in m1.filesnotin(base.manifest()): + basename = os.path.basename(f) + dirname = os.path.dirname(f) + basenametofilename[basename].append(f) + dirnametofilename[dirname].append(f) + + # in case of a rebase/graft, base may not be a common ancestor + anc = c1.ancestor(c2) + + for f in missingfiles: + basename = os.path.basename(f) + dirname = os.path.dirname(f) + samebasename = basenametofilename[basename] + samedirname = dirnametofilename[dirname] + movecandidates = samebasename + samedirname + # f is guaranteed to be present in c2, that's why + # c2.filectx(f) won't fail + f2 = c2.filectx(f) + for candidate in movecandidates: + f1 = c1.filectx(candidate) + if _related(f1, f2, anc.rev()): + # if there are a few related copies then we'll merge + # changes into all of them. This matches the behaviour + # of upstream copytracing + copies[candidate] = f + + return copies, {}, {}, {}, {} + def _related(f1, f2, limit): """return True if f1 and f2 filectx have a common ancestor