comparison mercurial/localrepo.py @ 17013:c8eda7bbdcab

strip: incrementally update the branchheads cache after a strip This function augments strip to incrementally update the branchheads cache rather than recompute it from scratch. This speeds up the performance of strip and rebase on repos with long history. The performance optimization only happens if the revisions stripped are all on the same branch and the parents of the stripped revisions are also on that same branch. This adds a few test cases, particularly one that reproduces the extra heads that mpm observed.
author Joshua Redstone <joshua.redstone@fb.com>
date Fri, 18 May 2012 12:45:47 -0700
parents ea97744c4801
children ad0d6c2b3279
comparison
equal deleted inserted replaced
17012:ea97744c4801 17013:c8eda7bbdcab
548 for l in lines: 548 for l in lines:
549 if not l: 549 if not l:
550 continue 550 continue
551 node, label = l.split(" ", 1) 551 node, label = l.split(" ", 1)
552 label = encoding.tolocal(label.strip()) 552 label = encoding.tolocal(label.strip())
553 if not node in self:
554 raise ValueError('invalidating branch cache because node '+
555 '%s does not exist' % node)
553 partial.setdefault(label, []).append(bin(node)) 556 partial.setdefault(label, []).append(bin(node))
554 except KeyboardInterrupt: 557 except KeyboardInterrupt:
555 raise 558 raise
556 except Exception, inst: 559 except Exception, inst:
557 if self.ui.debugflag: 560 if self.ui.debugflag:
569 f.close() 572 f.close()
570 except (IOError, OSError): 573 except (IOError, OSError):
571 pass 574 pass
572 575
573 def _updatebranchcache(self, partial, ctxgen): 576 def _updatebranchcache(self, partial, ctxgen):
577 """Given a branchhead cache, partial, that may have extra nodes or be
578 missing heads, and a generator of nodes that are at least a superset of
579 heads missing, this function updates partial to be correct.
580 """
574 # collect new branch entries 581 # collect new branch entries
575 newbranches = {} 582 newbranches = {}
576 for c in ctxgen: 583 for c in ctxgen:
577 newbranches.setdefault(c.branch(), []).append(c.rev()) 584 newbranches.setdefault(c.branch(), []).append(c.node())
578 # if older branchheads are reachable from new ones, they aren't 585 # if older branchheads are reachable from new ones, they aren't
579 # really branchheads. Note checking parents is insufficient: 586 # really branchheads. Note checking parents is insufficient:
580 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) 587 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
581 for branch, newrevs in newbranches.iteritems(): 588 for branch, newnodes in newbranches.iteritems():
582 bheadrevs = [self.changelog.rev(node) for node in 589 bheads = partial.setdefault(branch, [])
583 partial.setdefault(branch, [])] 590 # Remove candidate heads that no longer are in the repo (e.g., as
584 bheadrevs.extend(newrevs) 591 # the result of a strip that just happened). Avoid using 'node in
585 bheadrevs.sort() 592 # self' here because that dives down into branchcache code somewhat
586 # starting from tip means fewer passes over ancestors 593 # recrusively.
587 newrevs.sort() 594 bheadrevs = [self.changelog.rev(node) for node in bheads
588 while newrevs: 595 if self.changelog.hasnode(node)]
589 latest = newrevs.pop() 596 newheadrevs = [self.changelog.rev(node) for node in newnodes
597 if self.changelog.hasnode(node)]
598 ctxisnew = bheadrevs and min(newheadrevs) > max(bheadrevs)
599 # Remove duplicates - nodes that are in newheadrevs and are already
600 # in bheadrevs. This can happen if you strip a node whose parent
601 # was already a head (because they're on different branches).
602 bheadrevs = sorted(set(bheadrevs).union(newheadrevs))
603
604 # Starting from tip means fewer passes over reachable. If we know
605 # the new candidates are not ancestors of existing heads, we don't
606 # have to examine ancestors of existing heads
607 if ctxisnew:
608 iterrevs = sorted(newheadrevs)
609 else:
610 iterrevs = list(bheadrevs)
611
612 # This loop prunes out two kinds of heads - heads that are
613 # superceded by a head in newheadrevs, and newheadrevs that are not
614 # heads because an existing head is their descendant.
615 while iterrevs:
616 latest = iterrevs.pop()
590 if latest not in bheadrevs: 617 if latest not in bheadrevs:
591 continue 618 continue
592 ancestors = set(self.changelog.ancestors([latest], 619 ancestors = set(self.changelog.ancestors([latest],
593 bheadrevs[0])) 620 bheadrevs[0]))
594 if ancestors: 621 if ancestors:
595 bheadrevs = [b for b in bheadrevs if b not in ancestors] 622 bheadrevs = [b for b in bheadrevs if b not in ancestors]
596 partial[branch] = [self.changelog.node(rev) for rev in bheadrevs] 623 partial[branch] = [self.changelog.node(rev) for rev in bheadrevs]
624
625 # There may be branches that cease to exist when the last commit in the
626 # branch was stripped. This code filters them out. Note that the
627 # branch that ceased to exist may not be in newbranches because
628 # newbranches is the set of candidate heads, which when you strip the
629 # last commit in a branch will be the parent branch.
630 for branch in partial:
631 nodes = [head for head in partial[branch]
632 if self.changelog.hasnode(head)]
633 if not nodes:
634 del partial[branch]
597 635
598 def lookup(self, key): 636 def lookup(self, key):
599 return self[key].node() 637 return self[key].node()
600 638
601 def lookupbranch(self, key, remote=None): 639 def lookupbranch(self, key, remote=None):
855 ui.status(_('working directory now based on ' 893 ui.status(_('working directory now based on '
856 'revisions %d and %d\n') % parents) 894 'revisions %d and %d\n') % parents)
857 else: 895 else:
858 ui.status(_('working directory now based on ' 896 ui.status(_('working directory now based on '
859 'revision %d\n') % parents) 897 'revision %d\n') % parents)
898 # TODO: if we know which new heads may result from this rollback, pass
899 # them to destroy(), which will prevent the branchhead cache from being
900 # invalidated.
860 self.destroyed() 901 self.destroyed()
861 return 0 902 return 0
862 903
863 def invalidatecaches(self): 904 def invalidatecaches(self):
864 def delcache(name): 905 def delcache(name):
1301 finally: 1342 finally:
1302 if tr: 1343 if tr:
1303 tr.release() 1344 tr.release()
1304 lock.release() 1345 lock.release()
1305 1346
1306 def destroyed(self): 1347 def destroyed(self, newheadnodes=None):
1307 '''Inform the repository that nodes have been destroyed. 1348 '''Inform the repository that nodes have been destroyed.
1308 Intended for use by strip and rollback, so there's a common 1349 Intended for use by strip and rollback, so there's a common
1309 place for anything that has to be done after destroying history.''' 1350 place for anything that has to be done after destroying history.
1310 # XXX it might be nice if we could take the list of destroyed 1351
1311 # nodes, but I don't see an easy way for rollback() to do that 1352 If you know the branchheadcache was uptodate before nodes were removed
1353 and you also know the set of candidate new heads that may have resulted
1354 from the destruction, you can set newheadnodes. This will enable the
1355 code to update the branchheads cache, rather than having future code
1356 decide it's invalid and regenrating it from scratch.
1357 '''
1358 # If we have info, newheadnodes, on how to update the branch cache, do
1359 # it, Otherwise, since nodes were destroyed, the cache is stale and this
1360 # will be caught the next time it is read.
1361 if newheadnodes:
1362 tiprev = len(self) - 1
1363 ctxgen = (self[node] for node in newheadnodes
1364 if self.changelog.hasnode(node))
1365 self._updatebranchcache(self._branchcache, ctxgen)
1366 self._writebranchcache(self._branchcache, self.changelog.tip(),
1367 tiprev)
1312 1368
1313 # Ensure the persistent tag cache is updated. Doing it now 1369 # Ensure the persistent tag cache is updated. Doing it now
1314 # means that the tag cache only has to worry about destroyed 1370 # means that the tag cache only has to worry about destroyed
1315 # heads immediately after a strip/rollback. That in turn 1371 # heads immediately after a strip/rollback. That in turn
1316 # guarantees that "cachetip == currenttip" (comparing both rev 1372 # guarantees that "cachetip == currenttip" (comparing both rev