mercurial/copies.py
changeset 42222 57203e0210f8
parent 42210 390ec72b8ea4
child 42223 d69bc8ffbe6f
--- a/mercurial/copies.py	Mon Apr 29 14:38:54 2019 -0700
+++ b/mercurial/copies.py	Thu Apr 11 23:22:54 2019 -0700
@@ -373,57 +373,6 @@
 
     return u1, u2
 
-def _makegetfctx(ctx):
-    """return a 'getfctx' function suitable for _checkcopies usage
-
-    We have to re-setup the function building 'filectx' for each
-    '_checkcopies' to ensure the linkrev adjustment is properly setup for
-    each. Linkrev adjustment is important to avoid bug in rename
-    detection. Moreover, having a proper '_ancestrycontext' setup ensures
-    the performance impact of this adjustment is kept limited. Without it,
-    each file could do a full dag traversal making the time complexity of
-    the operation explode (see issue4537).
-
-    This function exists here mostly to limit the impact on stable. Feel
-    free to refactor on default.
-    """
-    rev = ctx.rev()
-    repo = ctx._repo
-    ac = getattr(ctx, '_ancestrycontext', None)
-    if ac is None:
-        revs = [rev]
-        if rev is None:
-            revs = [p.rev() for p in ctx.parents()]
-        ac = repo.changelog.ancestors(revs, inclusive=True)
-        ctx._ancestrycontext = ac
-    def makectx(f, n):
-        if n in node.wdirfilenodeids:  # in a working context?
-            if ctx.rev() is None:
-                return ctx.filectx(f)
-            return repo[None][f]
-        fctx = repo.filectx(f, fileid=n)
-        # setup only needed for filectx not create from a changectx
-        fctx._ancestrycontext = ac
-        fctx._descendantrev = rev
-        return fctx
-    return util.lrucachefunc(makectx)
-
-def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
-    """combine partial copy paths"""
-    remainder = {}
-    for f in copyfrom:
-        if f in copyto:
-            finalcopy[copyto[f]] = copyfrom[f]
-            del copyto[f]
-    for f in incompletediverge:
-        assert f not in diverge
-        ic = incompletediverge[f]
-        if ic[0] in copyto:
-            diverge[f] = [copyto[ic[0]], ic[1]]
-        else:
-            remainder[f] = ic
-    return remainder
-
 def mergecopies(repo, c1, c2, base):
     """
     Finds moves and copies between context c1 and c2 that are relevant for
@@ -518,6 +467,23 @@
         return commits < sourcecommitlimit
     return False
 
+def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
+                           copy, renamedelete):
+    if src not in m2:
+        # deleted on side 2
+        if src not in m1:
+            # renamed on side 1, deleted on side 2
+            renamedelete[src] = dsts1
+    elif m2[src] != mb[src]:
+        if not _related(c2[src], base[src]):
+            return
+        # modified on side 2
+        for dst in dsts1:
+            if dst not in m2:
+                # dst not added on side 2 (handle as regular
+                # "both created" case in manifestmerge otherwise)
+                copy[dst] = src
+
 def _fullcopytracing(repo, c1, c2, base):
     """ The full copytracing algorithm which finds all the new files that were
     added from merge base up to the top commit and for each file it checks if
@@ -526,168 +492,73 @@
     This is pretty slow when a lot of changesets are involved but will track all
     the copies.
     """
-    # In certain scenarios (e.g. graft, update or rebase), base can be
-    # overridden We still need to know a real common ancestor in this case We
-    # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
-    # can be multiple common ancestors, e.g. in case of bidmerge.  Because our
-    # caller may not know if the revision passed in lieu of the CA is a genuine
-    # common ancestor or not without explicitly checking it, it's better to
-    # determine that here.
-    #
-    # base.isancestorof(wc) is False, work around that
-    _c1 = c1.p1() if c1.rev() is None else c1
-    _c2 = c2.p1() if c2.rev() is None else c2
-    # an endpoint is "dirty" if it isn't a descendant of the merge base
-    # if we have a dirty endpoint, we need to trigger graft logic, and also
-    # keep track of which endpoint is dirty
-    dirtyc1 = not base.isancestorof(_c1)
-    dirtyc2 = not base.isancestorof(_c2)
-    graft = dirtyc1 or dirtyc2
-    tca = base
-    if graft:
-        tca = _c1.ancestor(_c2)
-
-    limit = _findlimit(repo, c1, c2)
-
     m1 = c1.manifest()
     m2 = c2.manifest()
     mb = base.manifest()
 
-    # gather data from _checkcopies:
-    # - diverge = record all diverges in this dict
-    # - copy = record all non-divergent copies in this dict
-    # - fullcopy = record all copies in this dict
-    # - incomplete = record non-divergent partial copies here
-    # - incompletediverge = record divergent partial copies here
-    diverge = {} # divergence data is shared
-    incompletediverge  = {}
-    data1 = {'copy': {},
-             'fullcopy': {},
-             'incomplete': {},
-             'diverge': diverge,
-             'incompletediverge': incompletediverge,
-            }
-    data2 = {'copy': {},
-             'fullcopy': {},
-             'incomplete': {},
-             'diverge': diverge,
-             'incompletediverge': incompletediverge,
-            }
+    copies1 = pathcopies(base, c1)
+    copies2 = pathcopies(base, c2)
+
+    inversecopies1 = {}
+    inversecopies2 = {}
+    for dst, src in copies1.items():
+        inversecopies1.setdefault(src, []).append(dst)
+    for dst, src in copies2.items():
+        inversecopies2.setdefault(src, []).append(dst)
+
+    copy = {}
+    diverge = {}
+    renamedelete = {}
+    allsources = set(inversecopies1) | set(inversecopies2)
+    for src in allsources:
+        dsts1 = inversecopies1.get(src)
+        dsts2 = inversecopies2.get(src)
+        if dsts1 and dsts2:
+            # copied/renamed on both sides
+            if src not in m1 and src not in m2:
+                # renamed on both sides
+                dsts1 = set(dsts1)
+                dsts2 = set(dsts2)
+                # If there's some overlap in the rename destinations, we
+                # consider it not divergent. For example, if side 1 copies 'a'
+                # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
+                # and 'd' and deletes 'a'.
+                if dsts1 & dsts2:
+                    for dst in (dsts1 & dsts2):
+                        copy[dst] = src
+                else:
+                    diverge[src] = sorted(dsts1 | dsts2)
+            elif src in m1 and src in m2:
+                # copied on both sides
+                dsts1 = set(dsts1)
+                dsts2 = set(dsts2)
+                for dst in (dsts1 & dsts2):
+                    copy[dst] = src
+            # TODO: Handle cases where it was renamed on one side and copied
+            # on the other side
+        elif dsts1:
+            # copied/renamed only on side 1
+            _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
+                                   copy, renamedelete)
+        elif dsts2:
+            # copied/renamed only on side 2
+            _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
+                                   copy, renamedelete)
+
+    renamedeleteset = set()
+    divergeset = set()
+    for src, dsts in diverge.items():
+        divergeset.update(dsts)
+    for src, dsts in renamedelete.items():
+        renamedeleteset.update(dsts)
 
     # find interesting file sets from manifests
     addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
     addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
-    bothnew = sorted(addedinm1 & addedinm2)
-    if tca == base:
-        # unmatched file from base
-        u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
-        u1u, u2u = u1r, u2r
-    else:
-        # unmatched file from base (DAG rotation in the graft case)
-        u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
-        # unmatched file from topological common ancestors (no DAG rotation)
-        # need to recompute this for directory move handling when grafting
-        mta = tca.manifest()
-        u1u, u2u = _computenonoverlap(repo, c1, c2,
-                                      m1.filesnotin(mta, repo.narrowmatch()),
-                                      m2.filesnotin(mta, repo.narrowmatch()),
-                                      debug=False)
-
-    for f in u1u:
-        _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
-
-    for f in u2u:
-        _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
-
-    copy = dict(data1['copy'])
-    copy.update(data2['copy'])
-    fullcopy = dict(data1['fullcopy'])
-    fullcopy.update(data2['fullcopy'])
-
-    if dirtyc1:
-        _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
-                       incompletediverge)
-    if dirtyc2:
-        _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
-                       incompletediverge)
-
-    renamedelete = {}
-    renamedeleteset = set()
-    divergeset = set()
-    for of, fl in list(diverge.items()):
-        if len(fl) == 1 or of in c1 or of in c2:
-            del diverge[of] # not actually divergent, or not a rename
-            if of not in c1 and of not in c2:
-                # renamed on one side, deleted on the other side, but filter
-                # out files that have been renamed and then deleted
-                renamedelete[of] = [f for f in fl if f in c1 or f in c2]
-                renamedeleteset.update(fl) # reverse map for below
-        else:
-            divergeset.update(fl) # reverse map for below
+    u1, u2 = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
 
-    bothdiverge = {}
-    bothincompletediverge = {}
-    remainder = {}
-    both1 = {'copy': {},
-             'fullcopy': {},
-             'incomplete': {},
-             'diverge': bothdiverge,
-             'incompletediverge': bothincompletediverge
-            }
-    both2 = {'copy': {},
-             'fullcopy': {},
-             'incomplete': {},
-             'diverge': bothdiverge,
-             'incompletediverge': bothincompletediverge
-            }
-    for f in bothnew:
-        _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
-        _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
-    if dirtyc1 and dirtyc2:
-        remainder = _combinecopies(both2['incomplete'], both1['incomplete'],
-                                   copy, bothdiverge, bothincompletediverge)
-        remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],
-                                   copy, bothdiverge, bothincompletediverge)
-        remainder.update(remainder1)
-    elif dirtyc1:
-        # incomplete copies may only be found on the "dirty" side for bothnew
-        assert not both2['incomplete']
-        remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
-                                   bothincompletediverge)
-    elif dirtyc2:
-        assert not both1['incomplete']
-        remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
-                                   bothincompletediverge)
-    else:
-        # incomplete copies and divergences can't happen outside grafts
-        assert not both1['incomplete']
-        assert not both2['incomplete']
-        assert not bothincompletediverge
-    for f in remainder:
-        assert f not in bothdiverge
-        ic = remainder[f]
-        if ic[0] in (m1 if dirtyc1 else m2):
-            # backed-out rename on one side, but watch out for deleted files
-            bothdiverge[f] = ic
-    for of, fl in bothdiverge.items():
-        if len(fl) == 2 and fl[0] == fl[1]:
-            copy[fl[0]] = of # not actually divergent, just matching renames
-
-    # Sometimes we get invalid copies here (the "and not remotebase" in
-    # _checkcopies() seems suspicious). Filter them out.
-    for dst, src in fullcopy.copy().items():
-        if src not in mb:
-            del fullcopy[dst]
-    # Sometimes we forget to add entries from "copy" to "fullcopy", so fix
-    # that up here
-    for dst, src in copy.items():
-        fullcopy[dst] = src
-    # Sometimes we forget to add entries from "diverge" to "fullcopy", so fix
-    # that up here
-    for src, dsts in diverge.items():
-        for dst in dsts:
-            fullcopy[dst] = src
-
+    fullcopy = copies1.copy()
+    fullcopy.update(copies2)
     if not fullcopy:
         return copy, {}, diverge, renamedelete, {}
 
@@ -752,7 +623,7 @@
 
     movewithdir = {}
     # check unaccounted nonoverlapping files against directory moves
-    for f in u1r + u2r:
+    for f in u1 + u2:
         if f not in fullcopy:
             for d in dirmove:
                 if f.startswith(d):
@@ -899,99 +770,6 @@
     except StopIteration:
         return False
 
-def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
-    """
-    check possible copies of f from msrc to mdst
-
-    srcctx = starting context for f in msrc
-    dstctx = destination context for f in mdst
-    f = the filename to check (as in msrc)
-    base = the changectx used as a merge base
-    tca = topological common ancestor for graft-like scenarios
-    remotebase = True if base is outside tca::srcctx, False otherwise
-    limit = the rev number to not search beyond
-    data = dictionary of dictionary to store copy data. (see mergecopies)
-
-    note: limit is only an optimization, and provides no guarantee that
-    irrelevant revisions will not be visited
-    there is no easy way to make this algorithm stop in a guaranteed way
-    once it "goes behind a certain revision".
-    """
-
-    msrc = srcctx.manifest()
-    mdst = dstctx.manifest()
-    mb = base.manifest()
-    mta = tca.manifest()
-    # Might be true if this call is about finding backward renames,
-    # This happens in the case of grafts because the DAG is then rotated.
-    # If the file exists in both the base and the source, we are not looking
-    # for a rename on the source side, but on the part of the DAG that is
-    # traversed backwards.
-    #
-    # In the case there is both backward and forward renames (before and after
-    # the base) this is more complicated as we must detect a divergence.
-    # We use 'backwards = False' in that case.
-    backwards = not remotebase and base != tca and f in mb
-    getsrcfctx = _makegetfctx(srcctx)
-    getdstfctx = _makegetfctx(dstctx)
-
-    if msrc[f] == mb.get(f) and not remotebase:
-        # Nothing to merge
-        return
-
-    of = None
-    seen = {f}
-    for oc in getsrcfctx(f, msrc[f]).ancestors():
-        of = oc.path()
-        if of in seen:
-            # check limit late - grab last rename before
-            if oc.linkrev() < limit:
-                break
-            continue
-        seen.add(of)
-
-        # remember for dir rename detection
-        if backwards:
-            data['fullcopy'][of] = f # grafting backwards through renames
-        else:
-            data['fullcopy'][f] = of
-        if of not in mdst:
-            continue # no match, keep looking
-        if mdst[of] == mb.get(of):
-            return # no merge needed, quit early
-        c2 = getdstfctx(of, mdst[of])
-        # c2 might be a plain new file on added on destination side that is
-        # unrelated to the droids we are looking for.
-        cr = _related(oc, c2)
-        if cr and (of == f or of == c2.path()): # non-divergent
-            if backwards:
-                data['copy'][of] = f
-            elif of in mb:
-                data['copy'][f] = of
-            elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
-                data['copy'][of] = f
-                del data['fullcopy'][f]
-                data['fullcopy'][of] = f
-            else: # divergence w.r.t. graft CA on one side of topological CA
-                for sf in seen:
-                    if sf in mb:
-                        assert sf not in data['diverge']
-                        data['diverge'][sf] = [f, of]
-                        break
-            return
-
-    if of in mta:
-        if backwards or remotebase:
-            data['incomplete'][of] = f
-        else:
-            for sf in seen:
-                if sf in mb:
-                    if tca == base:
-                        data['diverge'].setdefault(sf, []).append(f)
-                    else:
-                        data['incompletediverge'][sf] = [of, f]
-                    return
-
 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
     """reproduce copies from fromrev to rev in the dirstate