mercurial/setdiscovery.py
changeset 14164 cb98fed52495
child 14206 2bf60f158ecb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/setdiscovery.py	Mon May 02 19:21:30 2011 +0200
@@ -0,0 +1,178 @@
+# setdiscovery.py - improved discovery of common nodeset for mercurial
+#
+# Copyright 2010 Benoit Boissinot <bboissin@gmail.com>
+# and Peter Arrenbrecht <peter@arrenbrecht.ch>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from node import nullid
+from i18n import _
+import random, collections, util, dagutil
+
+def _updatesample(dag, nodes, sample, always, quicksamplesize=0):
+    # if nodes is empty we scan the entire graph
+    if nodes:
+        heads = dag.headsetofconnecteds(nodes)
+    else:
+        heads = dag.heads()
+    dist = {}
+    visit = collections.deque(heads)
+    seen = set()
+    factor = 1
+    while visit:
+        curr = visit.popleft()
+        if curr in seen:
+            continue
+        d = dist.setdefault(curr, 1)
+        if d > factor:
+            factor *= 2
+        if d == factor:
+            if curr not in always: # need this check for the early exit below
+                sample.add(curr)
+                if quicksamplesize and (len(sample) >= quicksamplesize):
+                    return
+        seen.add(curr)
+        for p in dag.parents(curr):
+            if not nodes or p in nodes:
+                dist.setdefault(p, d + 1)
+                visit.append(p)
+
+def _setupsample(dag, nodes, size):
+    if len(nodes) <= size:
+        return set(nodes), None, 0
+    always = set(dag.heads())
+    desiredlen = size - len(always)
+    if desiredlen <= 0:
+        # This could be bad if there are very many heads, all unknown to the
+        # server. We're counting on long request support here.
+        return always, None, desiredlen
+    return always, set(), desiredlen
+
+def _takequicksample(dag, nodes, size, initial):
+    always, sample, desiredlen = _setupsample(dag, nodes, size)
+    if sample is None:
+        return always
+    if initial:
+        fromset = None
+    else:
+        fromset = nodes
+    _updatesample(dag, fromset, sample, always, quicksamplesize=desiredlen)
+    sample.update(always)
+    return sample
+
+def _takefullsample(dag, nodes, size):
+    always, sample, desiredlen = _setupsample(dag, nodes, size)
+    if sample is None:
+        return always
+    # update from heads
+    _updatesample(dag, nodes, sample, always)
+    # update from roots
+    _updatesample(dag.inverse(), nodes, sample, always)
+    assert sample
+    if len(sample) > desiredlen:
+        sample = set(random.sample(sample, desiredlen))
+    elif len(sample) < desiredlen:
+        more = desiredlen - len(sample)
+        sample.update(random.sample(list(nodes - sample - always), more))
+    sample.update(always)
+    return sample
+
+def findcommonheads(ui, local, remote,
+                    initialsamplesize=100,
+                    fullsamplesize=200,
+                    abortwhenunrelated=True):
+    '''Return a tuple (common, anyincoming, remoteheads) used to identify missing
+    nodes from or in remote.
+
+    shortcutlocal determines whether we try use direct access to localrepo if
+    remote is actually local.
+    '''
+    roundtrips = 0
+    cl = local.changelog
+    dag = dagutil.revlogdag(cl)
+    nodes = dag.nodeset()
+
+    # early exit if we know all the specified server heads already
+    ui.debug("query 1; heads\n")
+    roundtrips += 1
+    srvheadhashes = remote.heads()
+
+    ## TODO We might want to request an additional random sample of the server's
+    ## nodes batched with the heads query here.
+
+    if cl.tip() == nullid:
+        if srvheadhashes != [nullid]:
+            return [nullid], True, srvheadhashes
+        return [nullid], False, []
+
+    # start actual discovery (we note this before the next "if" for compatibility
+    # reasons)
+    ui.status(_("searching for changes\n"))
+
+    srvheads = dag.internalizeall(srvheadhashes, filterunknown=True)
+    if len(srvheads) == len(srvheadhashes):
+        ui.note("all remote heads known locally\n")
+        return (srvheadhashes, False, srvheadhashes,)
+
+    # full blown discovery
+    undecided = nodes # own nodes where I don't know if the server knows them
+    common = set() # own nodes I know we both know
+    missing = set() # own nodes I know the server lacks
+
+    # treat remote heads as a first implicit sample response
+    common.update(dag.ancestorset(srvheads))
+    undecided.difference_update(common)
+    # use cheapish initial sample
+    if common:
+        ui.debug("taking initial sample\n")
+        sample = _takefullsample(dag, undecided, size=fullsamplesize)
+    else:
+        ui.debug("taking quick initial sample\n")
+        sample = _takequicksample(dag, nodes, size=initialsamplesize,
+                                  initial=True)
+
+    roundtrips += 1
+    ui.progress(_('searching'), roundtrips, unit=_('queries'))
+    ui.debug("query %i; still undecided: %i, sample size is: %i\n"
+             % (roundtrips, len(undecided), len(sample)))
+    # indices between sample and externalized version must match
+    sample = list(sample)
+    yesno = remote.known(dag.externalizeall(sample))
+
+    while undecided:
+        commoninsample = set(n for i, n in enumerate(sample) if yesno[i])
+        common.update(dag.ancestorset(commoninsample, common))
+
+        missinginsample = [n for i, n in enumerate(sample) if not yesno[i]]
+        missing.update(dag.descendantset(missinginsample, missing))
+
+        undecided.difference_update(missing)
+        undecided.difference_update(common)
+
+        if not undecided:
+            break
+
+        ui.note("sampling from both directions\n")
+        sample = _takefullsample(dag, undecided, size=fullsamplesize)
+
+        roundtrips += 1
+        ui.progress(_('searching'), roundtrips, unit=_('queries'))
+        ui.debug("query %i; still undecided: %i, sample size is: %i\n"
+                 % (roundtrips, len(undecided), len(sample)))
+        # indices between sample and externalized version must match
+        sample = list(sample)
+        yesno = remote.known(dag.externalizeall(sample))
+
+    result = dag.headsetofconnecteds(common)
+    ui.progress(_('searching'), None)
+    ui.debug("%d total queries\n" % roundtrips)
+
+    if not result and srvheadhashes != [nullid]:
+        if abortwhenunrelated:
+            raise util.Abort(_("repository is unrelated"))
+        else:
+            ui.warn(_("warning: repository is unrelated\n"))
+        return (set([nullid]), True, srvheadhashes,)
+
+    return (dag.externalizeall(result), True, srvheadhashes,)