Mercurial > public > mercurial-scm > hg
comparison mercurial/localrepo.py @ 17013:c8eda7bbdcab
strip: incrementally update the branchheads cache after a strip
This function augments strip to incrementally update the branchheads cache
rather than recompute it from scratch. This speeds up the performance of strip
and rebase on repos with long history. The performance optimization only
happens if the revisions stripped are all on the same branch and the parents of
the stripped revisions are also on that same branch.
This adds a few test cases, particularly one that reproduces the extra heads
that mpm observed.
author | Joshua Redstone <joshua.redstone@fb.com> |
---|---|
date | Fri, 18 May 2012 12:45:47 -0700 |
parents | ea97744c4801 |
children | ad0d6c2b3279 |
comparison
equal
deleted
inserted
replaced
17012:ea97744c4801 | 17013:c8eda7bbdcab |
---|---|
548 for l in lines: | 548 for l in lines: |
549 if not l: | 549 if not l: |
550 continue | 550 continue |
551 node, label = l.split(" ", 1) | 551 node, label = l.split(" ", 1) |
552 label = encoding.tolocal(label.strip()) | 552 label = encoding.tolocal(label.strip()) |
553 if not node in self: | |
554 raise ValueError('invalidating branch cache because node '+ | |
555 '%s does not exist' % node) | |
553 partial.setdefault(label, []).append(bin(node)) | 556 partial.setdefault(label, []).append(bin(node)) |
554 except KeyboardInterrupt: | 557 except KeyboardInterrupt: |
555 raise | 558 raise |
556 except Exception, inst: | 559 except Exception, inst: |
557 if self.ui.debugflag: | 560 if self.ui.debugflag: |
569 f.close() | 572 f.close() |
570 except (IOError, OSError): | 573 except (IOError, OSError): |
571 pass | 574 pass |
572 | 575 |
573 def _updatebranchcache(self, partial, ctxgen): | 576 def _updatebranchcache(self, partial, ctxgen): |
577 """Given a branchhead cache, partial, that may have extra nodes or be | |
578 missing heads, and a generator of nodes that are at least a superset of | |
579 heads missing, this function updates partial to be correct. | |
580 """ | |
574 # collect new branch entries | 581 # collect new branch entries |
575 newbranches = {} | 582 newbranches = {} |
576 for c in ctxgen: | 583 for c in ctxgen: |
577 newbranches.setdefault(c.branch(), []).append(c.rev()) | 584 newbranches.setdefault(c.branch(), []).append(c.node()) |
578 # if older branchheads are reachable from new ones, they aren't | 585 # if older branchheads are reachable from new ones, they aren't |
579 # really branchheads. Note checking parents is insufficient: | 586 # really branchheads. Note checking parents is insufficient: |
580 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) | 587 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) |
581 for branch, newrevs in newbranches.iteritems(): | 588 for branch, newnodes in newbranches.iteritems(): |
582 bheadrevs = [self.changelog.rev(node) for node in | 589 bheads = partial.setdefault(branch, []) |
583 partial.setdefault(branch, [])] | 590 # Remove candidate heads that no longer are in the repo (e.g., as |
584 bheadrevs.extend(newrevs) | 591 # the result of a strip that just happened). Avoid using 'node in |
585 bheadrevs.sort() | 592 # self' here because that dives down into branchcache code somewhat |
586 # starting from tip means fewer passes over ancestors | 593 # recrusively. |
587 newrevs.sort() | 594 bheadrevs = [self.changelog.rev(node) for node in bheads |
588 while newrevs: | 595 if self.changelog.hasnode(node)] |
589 latest = newrevs.pop() | 596 newheadrevs = [self.changelog.rev(node) for node in newnodes |
597 if self.changelog.hasnode(node)] | |
598 ctxisnew = bheadrevs and min(newheadrevs) > max(bheadrevs) | |
599 # Remove duplicates - nodes that are in newheadrevs and are already | |
600 # in bheadrevs. This can happen if you strip a node whose parent | |
601 # was already a head (because they're on different branches). | |
602 bheadrevs = sorted(set(bheadrevs).union(newheadrevs)) | |
603 | |
604 # Starting from tip means fewer passes over reachable. If we know | |
605 # the new candidates are not ancestors of existing heads, we don't | |
606 # have to examine ancestors of existing heads | |
607 if ctxisnew: | |
608 iterrevs = sorted(newheadrevs) | |
609 else: | |
610 iterrevs = list(bheadrevs) | |
611 | |
612 # This loop prunes out two kinds of heads - heads that are | |
613 # superceded by a head in newheadrevs, and newheadrevs that are not | |
614 # heads because an existing head is their descendant. | |
615 while iterrevs: | |
616 latest = iterrevs.pop() | |
590 if latest not in bheadrevs: | 617 if latest not in bheadrevs: |
591 continue | 618 continue |
592 ancestors = set(self.changelog.ancestors([latest], | 619 ancestors = set(self.changelog.ancestors([latest], |
593 bheadrevs[0])) | 620 bheadrevs[0])) |
594 if ancestors: | 621 if ancestors: |
595 bheadrevs = [b for b in bheadrevs if b not in ancestors] | 622 bheadrevs = [b for b in bheadrevs if b not in ancestors] |
596 partial[branch] = [self.changelog.node(rev) for rev in bheadrevs] | 623 partial[branch] = [self.changelog.node(rev) for rev in bheadrevs] |
624 | |
625 # There may be branches that cease to exist when the last commit in the | |
626 # branch was stripped. This code filters them out. Note that the | |
627 # branch that ceased to exist may not be in newbranches because | |
628 # newbranches is the set of candidate heads, which when you strip the | |
629 # last commit in a branch will be the parent branch. | |
630 for branch in partial: | |
631 nodes = [head for head in partial[branch] | |
632 if self.changelog.hasnode(head)] | |
633 if not nodes: | |
634 del partial[branch] | |
597 | 635 |
598 def lookup(self, key): | 636 def lookup(self, key): |
599 return self[key].node() | 637 return self[key].node() |
600 | 638 |
601 def lookupbranch(self, key, remote=None): | 639 def lookupbranch(self, key, remote=None): |
855 ui.status(_('working directory now based on ' | 893 ui.status(_('working directory now based on ' |
856 'revisions %d and %d\n') % parents) | 894 'revisions %d and %d\n') % parents) |
857 else: | 895 else: |
858 ui.status(_('working directory now based on ' | 896 ui.status(_('working directory now based on ' |
859 'revision %d\n') % parents) | 897 'revision %d\n') % parents) |
898 # TODO: if we know which new heads may result from this rollback, pass | |
899 # them to destroy(), which will prevent the branchhead cache from being | |
900 # invalidated. | |
860 self.destroyed() | 901 self.destroyed() |
861 return 0 | 902 return 0 |
862 | 903 |
863 def invalidatecaches(self): | 904 def invalidatecaches(self): |
864 def delcache(name): | 905 def delcache(name): |
1301 finally: | 1342 finally: |
1302 if tr: | 1343 if tr: |
1303 tr.release() | 1344 tr.release() |
1304 lock.release() | 1345 lock.release() |
1305 | 1346 |
1306 def destroyed(self): | 1347 def destroyed(self, newheadnodes=None): |
1307 '''Inform the repository that nodes have been destroyed. | 1348 '''Inform the repository that nodes have been destroyed. |
1308 Intended for use by strip and rollback, so there's a common | 1349 Intended for use by strip and rollback, so there's a common |
1309 place for anything that has to be done after destroying history.''' | 1350 place for anything that has to be done after destroying history. |
1310 # XXX it might be nice if we could take the list of destroyed | 1351 |
1311 # nodes, but I don't see an easy way for rollback() to do that | 1352 If you know the branchheadcache was uptodate before nodes were removed |
1353 and you also know the set of candidate new heads that may have resulted | |
1354 from the destruction, you can set newheadnodes. This will enable the | |
1355 code to update the branchheads cache, rather than having future code | |
1356 decide it's invalid and regenrating it from scratch. | |
1357 ''' | |
1358 # If we have info, newheadnodes, on how to update the branch cache, do | |
1359 # it, Otherwise, since nodes were destroyed, the cache is stale and this | |
1360 # will be caught the next time it is read. | |
1361 if newheadnodes: | |
1362 tiprev = len(self) - 1 | |
1363 ctxgen = (self[node] for node in newheadnodes | |
1364 if self.changelog.hasnode(node)) | |
1365 self._updatebranchcache(self._branchcache, ctxgen) | |
1366 self._writebranchcache(self._branchcache, self.changelog.tip(), | |
1367 tiprev) | |
1312 | 1368 |
1313 # Ensure the persistent tag cache is updated. Doing it now | 1369 # Ensure the persistent tag cache is updated. Doing it now |
1314 # means that the tag cache only has to worry about destroyed | 1370 # means that the tag cache only has to worry about destroyed |
1315 # heads immediately after a strip/rollback. That in turn | 1371 # heads immediately after a strip/rollback. That in turn |
1316 # guarantees that "cachetip == currenttip" (comparing both rev | 1372 # guarantees that "cachetip == currenttip" (comparing both rev |