comparison mercurial/localrepo.py @ 16716:0311a6abd38a

localrepo: strip now incrementally updates the branchheads cache Destroying history via strip used to invalidate the branchheads cache, causing it to be regenerated the next time it is read. This is expensive in large repos. This change converts strip to pass info to localrepo.destroyed() to enable to it to incrementally update the cache, improving the performance of strip and other operations that depend on it (e.g., rebase). This change also strengthens a bit the integrity checking of the branchheads cache when it is read, by rejecting the cache if it has nodes in it that no longer exist.
author Joshua Redstone <joshua.redstone@fb.com>
date Fri, 11 May 2012 10:35:54 -0700
parents f8dee1a8f844
children e7bf09acd410
comparison
equal deleted inserted replaced
16715:1e24da6fcd67 16716:0311a6abd38a
538 for l in lines: 538 for l in lines:
539 if not l: 539 if not l:
540 continue 540 continue
541 node, label = l.split(" ", 1) 541 node, label = l.split(" ", 1)
542 label = encoding.tolocal(label.strip()) 542 label = encoding.tolocal(label.strip())
543 if not node in self:
544 raise ValueError('invalidating branch cache because node '+
545 '%s does not exist' % node)
543 partial.setdefault(label, []).append(bin(node)) 546 partial.setdefault(label, []).append(bin(node))
544 except KeyboardInterrupt: 547 except KeyboardInterrupt:
545 raise 548 raise
546 except Exception, inst: 549 except Exception, inst:
547 if self.ui.debugflag: 550 if self.ui.debugflag:
559 f.close() 562 f.close()
560 except (IOError, OSError): 563 except (IOError, OSError):
561 pass 564 pass
562 565
563 def _updatebranchcache(self, partial, ctxgen): 566 def _updatebranchcache(self, partial, ctxgen):
567 """Given a branchhead cache, partial, that may have extra nodes or be
568 missing heads, and a generator of nodes that are at least a superset of
569 heads missing, this function updates partial to be correct.
570 """
564 # collect new branch entries 571 # collect new branch entries
565 newbranches = {} 572 newbranches = {}
566 for c in ctxgen: 573 for c in ctxgen:
567 newbranches.setdefault(c.branch(), []).append(c.node()) 574 newbranches.setdefault(c.branch(), []).append(c.node())
568 # if older branchheads are reachable from new ones, they aren't 575 # if older branchheads are reachable from new ones, they aren't
569 # really branchheads. Note checking parents is insufficient: 576 # really branchheads. Note checking parents is insufficient:
570 # 1 (branch a) -> 2 (branch b) -> 3 (branch a) 577 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
571 for branch, newnodes in newbranches.iteritems(): 578 for branch, newnodes in newbranches.iteritems():
572 bheads = partial.setdefault(branch, []) 579 bheads = partial.setdefault(branch, [])
573 bheads.extend(newnodes) 580 bheads.extend(newnodes)
574 if len(bheads) <= 1: 581 # Remove duplicates - nodes that are in newnodes and are already in
575 continue 582 # bheads. This can happen if you strip a node and its parent was
576 bheads = sorted(bheads, key=lambda x: self[x].rev()) 583 # already a head (because they're on different branches).
577 # starting from tip means fewer passes over reachable 584 bheads = set(bheads)
578 while newnodes: 585
579 latest = newnodes.pop() 586 # Remove candidate heads that no longer are in the repo (e.g., as
580 if latest not in bheads: 587 # the result of a strip that just happened).
581 continue 588 # avoid using 'bhead in self' here because that dives down into
582 minbhnode = self[bheads[0]].node() 589 # branchcache code somewhat recrusively.
583 reachable = self.changelog.reachable(latest, minbhnode) 590 bheads = [bhead for bhead in bheads \
584 reachable.remove(latest) 591 if self.changelog.hasnode(bhead)]
585 if reachable: 592 if len(bheads) > 1:
586 bheads = [b for b in bheads if b not in reachable] 593 bheads = sorted(bheads, key=lambda x: self[x].rev())
594 # starting from tip means fewer passes over reachable
595 while newnodes:
596 latest = newnodes.pop()
597 if latest not in bheads:
598 continue
599 minbhnode = self[bheads[0]].node()
600 reachable = self.changelog.reachable(latest, minbhnode)
601 reachable.remove(latest)
602 if reachable:
603 bheads = [b for b in bheads if b not in reachable]
587 partial[branch] = bheads 604 partial[branch] = bheads
605
606 # There may be branches that cease to exist when the last commit in the
607 # branch was stripped. This code filters them out. Note that the
608 # branch that ceased to exist may not be in newbranches because
609 # newbranches is the set of candidate heads, which when you strip the
610 # last commit in a branch will be the parent branch.
611 for branch in partial.keys():
612 nodes = [head for head in partial[branch] \
613 if self.changelog.hasnode(head)]
614 if len(nodes) < 1:
615 del partial[branch]
588 616
589 def lookup(self, key): 617 def lookup(self, key):
590 return self[key].node() 618 return self[key].node()
591 619
592 def lookupbranch(self, key, remote=None): 620 def lookupbranch(self, key, remote=None):
846 ui.status(_('working directory now based on ' 874 ui.status(_('working directory now based on '
847 'revisions %d and %d\n') % parents) 875 'revisions %d and %d\n') % parents)
848 else: 876 else:
849 ui.status(_('working directory now based on ' 877 ui.status(_('working directory now based on '
850 'revision %d\n') % parents) 878 'revision %d\n') % parents)
879 # TODO: if we know which new heads may result from this rollback, pass
880 # them to destroy(), which will prevent the branchhead cache from being
881 # invalidated.
851 self.destroyed() 882 self.destroyed()
852 return 0 883 return 0
853 884
854 def invalidatecaches(self): 885 def invalidatecaches(self):
855 def delcache(name): 886 def delcache(name):
1289 finally: 1320 finally:
1290 if tr: 1321 if tr:
1291 tr.release() 1322 tr.release()
1292 lock.release() 1323 lock.release()
1293 1324
1294 def destroyed(self): 1325 def destroyed(self, newheadrevs=None):
1295 '''Inform the repository that nodes have been destroyed. 1326 '''Inform the repository that nodes have been destroyed.
1296 Intended for use by strip and rollback, so there's a common 1327 Intended for use by strip and rollback, so there's a common
1297 place for anything that has to be done after destroying history.''' 1328 place for anything that has to be done after destroying history.
1298 # XXX it might be nice if we could take the list of destroyed 1329
1299 # nodes, but I don't see an easy way for rollback() to do that 1330 If you know the branchheadcache was uptodate before nodes were removed
1331 and you also know the set of candidate set of new heads that may have
1332 resulted from the destruction, you can set newheadrevs. This will
1333 enable the code to update the branchheads cache, rather than having
1334 future code decide it's invalid and regenrating it.
1335 '''
1336 if newheadrevs:
1337 tiprev = len(self) - 1
1338 ctxgen = (self[rev] for rev in newheadrevs)
1339 self._updatebranchcache(self._branchcache, ctxgen)
1340 self._writebranchcache(self._branchcache, self.changelog.tip(),
1341 tiprev)
1342 else:
1343 # No info to update the cache. If nodes were destroyed, the cache
1344 # is stale and this will be caught the next time it is read.
1345 pass
1300 1346
1301 # Ensure the persistent tag cache is updated. Doing it now 1347 # Ensure the persistent tag cache is updated. Doing it now
1302 # means that the tag cache only has to worry about destroyed 1348 # means that the tag cache only has to worry about destroyed
1303 # heads immediately after a strip/rollback. That in turn 1349 # heads immediately after a strip/rollback. That in turn
1304 # guarantees that "cachetip == currenttip" (comparing both rev 1350 # guarantees that "cachetip == currenttip" (comparing both rev