Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/localrepo.py @ 16716:0311a6abd38a
localrepo: strip now incrementally updates the branchheads cache
Destroying history via strip used to invalidate the branchheads cache,
causing it to be regenerated the next time it is read. This is
expensive in large repos. This change converts strip to pass info to
localrepo.destroyed() to enable to it to incrementally update the
cache, improving the performance of strip and other operations that
depend on it (e.g., rebase).
This change also strengthens a bit the integrity checking of the
branchheads cache when it is read, by rejecting the cache if it has
nodes in it that no longer exist.
author | Joshua Redstone <joshua.redstone@fb.com> |
---|---|
date | Fri, 11 May 2012 10:35:54 -0700 |
parents | f8dee1a8f844 |
children | e7bf09acd410 |
comparison
equal
deleted
inserted
replaced
16715:1e24da6fcd67 | 16716:0311a6abd38a |
---|---|
538 for l in lines: | 538 for l in lines: |
539 if not l: | 539 if not l: |
540 continue | 540 continue |
541 node, label = l.split(" ", 1) | 541 node, label = l.split(" ", 1) |
542 label = encoding.tolocal(label.strip()) | 542 label = encoding.tolocal(label.strip()) |
543 if not node in self: | |
544 raise ValueError('invalidating branch cache because node '+ | |
545 '%s does not exist' % node) | |
543 partial.setdefault(label, []).append(bin(node)) | 546 partial.setdefault(label, []).append(bin(node)) |
544 except KeyboardInterrupt: | 547 except KeyboardInterrupt: |
545 raise | 548 raise |
546 except Exception, inst: | 549 except Exception, inst: |
547 if self.ui.debugflag: | 550 if self.ui.debugflag: |
559 f.close() | 562 f.close() |
560 except (IOError, OSError): | 563 except (IOError, OSError): |
561 pass | 564 pass |
562 | 565 |
563 def _updatebranchcache(self, partial, ctxgen): | 566 def _updatebranchcache(self, partial, ctxgen): |
567 """Given a branchhead cache, partial, that may have extra nodes or be | |
568 missing heads, and a generator of nodes that are at least a superset of | |
569 heads missing, this function updates partial to be correct. | |
570 """ | |
564 # collect new branch entries | 571 # collect new branch entries |
565 newbranches = {} | 572 newbranches = {} |
566 for c in ctxgen: | 573 for c in ctxgen: |
567 newbranches.setdefault(c.branch(), []).append(c.node()) | 574 newbranches.setdefault(c.branch(), []).append(c.node()) |
568 # if older branchheads are reachable from new ones, they aren't | 575 # if older branchheads are reachable from new ones, they aren't |
569 # really branchheads. Note checking parents is insufficient: | 576 # really branchheads. Note checking parents is insufficient: |
570 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) | 577 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) |
571 for branch, newnodes in newbranches.iteritems(): | 578 for branch, newnodes in newbranches.iteritems(): |
572 bheads = partial.setdefault(branch, []) | 579 bheads = partial.setdefault(branch, []) |
573 bheads.extend(newnodes) | 580 bheads.extend(newnodes) |
574 if len(bheads) <= 1: | 581 # Remove duplicates - nodes that are in newnodes and are already in |
575 continue | 582 # bheads. This can happen if you strip a node and its parent was |
576 bheads = sorted(bheads, key=lambda x: self[x].rev()) | 583 # already a head (because they're on different branches). |
577 # starting from tip means fewer passes over reachable | 584 bheads = set(bheads) |
578 while newnodes: | 585 |
579 latest = newnodes.pop() | 586 # Remove candidate heads that no longer are in the repo (e.g., as |
580 if latest not in bheads: | 587 # the result of a strip that just happened). |
581 continue | 588 # avoid using 'bhead in self' here because that dives down into |
582 minbhnode = self[bheads[0]].node() | 589 # branchcache code somewhat recrusively. |
583 reachable = self.changelog.reachable(latest, minbhnode) | 590 bheads = [bhead for bhead in bheads \ |
584 reachable.remove(latest) | 591 if self.changelog.hasnode(bhead)] |
585 if reachable: | 592 if len(bheads) > 1: |
586 bheads = [b for b in bheads if b not in reachable] | 593 bheads = sorted(bheads, key=lambda x: self[x].rev()) |
594 # starting from tip means fewer passes over reachable | |
595 while newnodes: | |
596 latest = newnodes.pop() | |
597 if latest not in bheads: | |
598 continue | |
599 minbhnode = self[bheads[0]].node() | |
600 reachable = self.changelog.reachable(latest, minbhnode) | |
601 reachable.remove(latest) | |
602 if reachable: | |
603 bheads = [b for b in bheads if b not in reachable] | |
587 partial[branch] = bheads | 604 partial[branch] = bheads |
605 | |
606 # There may be branches that cease to exist when the last commit in the | |
607 # branch was stripped. This code filters them out. Note that the | |
608 # branch that ceased to exist may not be in newbranches because | |
609 # newbranches is the set of candidate heads, which when you strip the | |
610 # last commit in a branch will be the parent branch. | |
611 for branch in partial.keys(): | |
612 nodes = [head for head in partial[branch] \ | |
613 if self.changelog.hasnode(head)] | |
614 if len(nodes) < 1: | |
615 del partial[branch] | |
588 | 616 |
589 def lookup(self, key): | 617 def lookup(self, key): |
590 return self[key].node() | 618 return self[key].node() |
591 | 619 |
592 def lookupbranch(self, key, remote=None): | 620 def lookupbranch(self, key, remote=None): |
846 ui.status(_('working directory now based on ' | 874 ui.status(_('working directory now based on ' |
847 'revisions %d and %d\n') % parents) | 875 'revisions %d and %d\n') % parents) |
848 else: | 876 else: |
849 ui.status(_('working directory now based on ' | 877 ui.status(_('working directory now based on ' |
850 'revision %d\n') % parents) | 878 'revision %d\n') % parents) |
879 # TODO: if we know which new heads may result from this rollback, pass | |
880 # them to destroy(), which will prevent the branchhead cache from being | |
881 # invalidated. | |
851 self.destroyed() | 882 self.destroyed() |
852 return 0 | 883 return 0 |
853 | 884 |
854 def invalidatecaches(self): | 885 def invalidatecaches(self): |
855 def delcache(name): | 886 def delcache(name): |
1289 finally: | 1320 finally: |
1290 if tr: | 1321 if tr: |
1291 tr.release() | 1322 tr.release() |
1292 lock.release() | 1323 lock.release() |
1293 | 1324 |
1294 def destroyed(self): | 1325 def destroyed(self, newheadrevs=None): |
1295 '''Inform the repository that nodes have been destroyed. | 1326 '''Inform the repository that nodes have been destroyed. |
1296 Intended for use by strip and rollback, so there's a common | 1327 Intended for use by strip and rollback, so there's a common |
1297 place for anything that has to be done after destroying history.''' | 1328 place for anything that has to be done after destroying history. |
1298 # XXX it might be nice if we could take the list of destroyed | 1329 |
1299 # nodes, but I don't see an easy way for rollback() to do that | 1330 If you know the branchheadcache was uptodate before nodes were removed |
1331 and you also know the set of candidate set of new heads that may have | |
1332 resulted from the destruction, you can set newheadrevs. This will | |
1333 enable the code to update the branchheads cache, rather than having | |
1334 future code decide it's invalid and regenrating it. | |
1335 ''' | |
1336 if newheadrevs: | |
1337 tiprev = len(self) - 1 | |
1338 ctxgen = (self[rev] for rev in newheadrevs) | |
1339 self._updatebranchcache(self._branchcache, ctxgen) | |
1340 self._writebranchcache(self._branchcache, self.changelog.tip(), | |
1341 tiprev) | |
1342 else: | |
1343 # No info to update the cache. If nodes were destroyed, the cache | |
1344 # is stale and this will be caught the next time it is read. | |
1345 pass | |
1300 | 1346 |
1301 # Ensure the persistent tag cache is updated. Doing it now | 1347 # Ensure the persistent tag cache is updated. Doing it now |
1302 # means that the tag cache only has to worry about destroyed | 1348 # means that the tag cache only has to worry about destroyed |
1303 # heads immediately after a strip/rollback. That in turn | 1349 # heads immediately after a strip/rollback. That in turn |
1304 # guarantees that "cachetip == currenttip" (comparing both rev | 1350 # guarantees that "cachetip == currenttip" (comparing both rev |