comparison mercurial/branchmap.py @ 46254:c4b792fa109e

branchmap: avoid ancestor computations in absence of non-continous branches The branchhead computation is one of the more heavy operations for bigger repositories as it has to scan all changesets and potentially involves the expensive computation of the ancestor sets. Redo the computation to handle the common cases directly and use tighter conditions for when the ancestor scan is necessary. Most importantly, avoid it completely if the non-continous branches are processed in one update as seen in the initial computation after a clone. For the Mercurial repository, it gives a small 2-3% performance boost. For the NetBSD test repository, it cuts the time in half. Differential Revision: https://phab.mercurial-scm.org/D9631
author Joerg Sonnenberger <joerg@bec.de>
date Fri, 18 Dec 2020 14:45:28 +0100
parents 89a2afe31e82
children 1726a53a8494 799973a44c82
comparison
equal deleted inserted replaced
46253:1cebad969d88 46254:c4b792fa109e
441 branch, closesbranch = getbranchinfo(r) 441 branch, closesbranch = getbranchinfo(r)
442 newbranches.setdefault(branch, []).append(r) 442 newbranches.setdefault(branch, []).append(r)
443 if closesbranch: 443 if closesbranch:
444 self._closednodes.add(cl.node(r)) 444 self._closednodes.add(cl.node(r))
445 445
446 # fetch current topological heads to speed up filtering
447 topoheads = set(cl.headrevs())
448
449 # new tip revision which we found after iterating items from new 446 # new tip revision which we found after iterating items from new
450 # branches 447 # branches
451 ntiprev = self.tiprev 448 ntiprev = self.tiprev
452 449
453 # if older branchheads are reachable from new ones, they aren't 450 # Delay fetching the topological heads until they are needed.
454 # really branchheads. Note checking parents is insufficient: 451 # A repository without non-continous branches can skip this part.
455 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) 452 topoheads = None
453
454 # If a changeset is visible, its parents must be visible too, so
455 # use the faster unfiltered parent accessor.
456 parentrevs = repo.unfiltered().changelog.parentrevs
457
456 for branch, newheadrevs in pycompat.iteritems(newbranches): 458 for branch, newheadrevs in pycompat.iteritems(newbranches):
459 # For every branch, compute the new branchheads.
460 # A branchhead is a revision such that no descendant is on
461 # the same branch.
462 #
463 # The branchheads are computed iteratively in revision order.
464 # This ensures topological order, i.e. parents are processed
465 # before their children. Ancestors are inclusive here, i.e.
466 # any revision is an ancestor of itself.
467 #
468 # Core observations:
469 # - The current revision is always a branchhead for the
470 # repository up to that point.
471 # - It is the first revision of the branch if and only if
472 # there was no branchhead before. In that case, it is the
473 # only branchhead as there are no possible ancestors on
474 # the same branch.
475 # - If a parent is on the same branch, a branchhead can
476 # only be an ancestor of that parent, if it is parent
477 # itself. Otherwise it would have been removed as ancestor
478 # of that parent before.
479 # - Therefore, if all parents are on the same branch, they
480 # can just be removed from the branchhead set.
481 # - If one parent is on the same branch and the other is not
482 # and there was exactly one branchhead known, the existing
483 # branchhead can only be an ancestor if it is the parent.
484 # Otherwise it would have been removed as ancestor of
485 # the parent before. The other parent therefore can't have
486 # a branchhead as ancestor.
487 # - In all other cases, the parents on different branches
488 # could have a branchhead as ancestor. Those parents are
489 # kept in the "uncertain" set. If all branchheads are also
490 # topological heads, they can't have descendants and further
491 # checks can be skipped. Otherwise, the ancestors of the
492 # "uncertain" set are removed from branchheads.
493 # This computation is heavy and avoided if at all possible.
457 bheads = self._entries.setdefault(branch, []) 494 bheads = self._entries.setdefault(branch, [])
458 bheadset = {cl.rev(node) for node in bheads} 495 bheadset = {cl.rev(node) for node in bheads}
459 496 uncertain = set()
460 # This have been tested True on all internal usage of this function. 497 for newrev in sorted(newheadrevs):
461 # run it again in case of doubt 498 if not bheadset:
462 # assert not (set(bheadrevs) & set(newheadrevs)) 499 bheadset.add(newrev)
463 bheadset.update(newheadrevs) 500 continue
464 501
465 # This prunes out two kinds of heads - heads that are superseded by 502 parents = [p for p in parentrevs(newrev) if p != nullrev]
466 # a head in newheadrevs, and newheadrevs that are not heads because 503 samebranch = set()
467 # an existing head is their descendant. 504 otherbranch = set()
468 uncertain = bheadset - topoheads 505 for p in parents:
506 if p in bheadset or getbranchinfo(p)[0] == branch:
507 samebranch.add(p)
508 else:
509 otherbranch.add(p)
510 if otherbranch and not (len(bheadset) == len(samebranch) == 1):
511 uncertain.update(otherbranch)
512 bheadset.difference_update(samebranch)
513 bheadset.add(newrev)
514
469 if uncertain: 515 if uncertain:
470 floorrev = min(uncertain) 516 if topoheads is None:
471 ancestors = set(cl.ancestors(newheadrevs, floorrev)) 517 topoheads = set(cl.headrevs())
472 bheadset -= ancestors 518 if bheadset - topoheads:
519 floorrev = min(bheadset)
520 ancestors = set(cl.ancestors(newheadrevs, floorrev))
521 bheadset -= ancestors
473 bheadrevs = sorted(bheadset) 522 bheadrevs = sorted(bheadset)
474 self[branch] = [cl.node(rev) for rev in bheadrevs] 523 self[branch] = [cl.node(rev) for rev in bheadrevs]
475 tiprev = bheadrevs[-1] 524 tiprev = bheadrevs[-1]
476 if tiprev > ntiprev: 525 if tiprev > ntiprev:
477 ntiprev = tiprev 526 ntiprev = tiprev