Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/branchmap.py @ 24376:203a078da052
revbranchcache: populate cache incrementally
Previously the cache would populate completely the first time it was accessed.
This could take over a minute on larger repos. This patch changes it to update
incrementally. Only values that are read will be written, and it will only
rewrite as much of the file as strictly necessary.
This adds a magic value of '\0\0\0\0' to represent an empty cache entry. The
probability of this matching an actual commit hash prefix is tiny, so it's ok if
that's always considered a cache miss. This is also BC safe since any existing
entries with '\0\0\0\0' will just be considered misses.
Perf numbers:
Mozilla-central: hg --time log -r 'branch(mobile)' -T.
Cold Cache: 14.7s -> 15.1s (3% worse)
Warm Cache: 1.6s -> 2.1s (30% worse)
Mozilla-cental: hg perfbranchmap
2s -> 2.4s (20% worse)
hg: hg log -r 'branch(stable) & branch(default)'
Cold Cache: 3.1s -> 1.9s (40% better - because the old code missed the cache on
both branch() revset iterations, so it did twice the work)
Warm Cache: 0.2 -> 0.26 (30% worse)
internal huge repo: hg --time log -r 'tip & branch(default)'
Cold Cache: 65.4s -> 0.2s (327x better)
While this change introduces minor regressions when iterating over every commit
in a branch, it massively improves the cold cache time for operations which
touch a single commit. I feel the better O() is worth it in this case.
author | Durham Goode <durham@fb.com> |
---|---|
date | Tue, 10 Feb 2015 20:04:47 -0800 |
parents | fe255b2525d5 |
children | 656f93ce66d5 |
comparison
equal
deleted
inserted
replaced
24375:fe255b2525d5 | 24376:203a078da052 |
---|---|
365 changelog = self._repo.changelog | 365 changelog = self._repo.changelog |
366 rbcrevidx = rev * _rbcrecsize | 366 rbcrevidx = rev * _rbcrecsize |
367 | 367 |
368 # if requested rev is missing, add and populate all missing revs | 368 # if requested rev is missing, add and populate all missing revs |
369 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: | 369 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: |
370 first = len(self._rbcrevs) // _rbcrecsize | |
371 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize - | 370 self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize - |
372 len(self._rbcrevs))) | 371 len(self._rbcrevs))) |
373 for r in xrange(first, len(changelog)): | |
374 self._branchinfo(r) | |
375 | 372 |
376 # fast path: extract data from cache, use it if node is matching | 373 # fast path: extract data from cache, use it if node is matching |
377 reponode = changelog.node(rev)[:_rbcnodelen] | 374 reponode = changelog.node(rev)[:_rbcnodelen] |
378 cachenode, branchidx = unpack( | 375 cachenode, branchidx = unpack( |
379 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize)) | 376 _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize)) |
380 close = bool(branchidx & _rbccloseflag) | 377 close = bool(branchidx & _rbccloseflag) |
381 if close: | 378 if close: |
382 branchidx &= _rbcbranchidxmask | 379 branchidx &= _rbcbranchidxmask |
383 if cachenode == reponode: | 380 if cachenode == '\0\0\0\0': |
381 pass | |
382 elif cachenode == reponode: | |
384 return self._names[branchidx], close | 383 return self._names[branchidx], close |
384 else: | |
385 # rev/node map has changed, invalidate the cache from here up | |
386 truncate = rbcrevidx + _rbcrecsize | |
387 del self._rbcrevs[truncate:] | |
388 self._rbcrevslen = min(self._rbcrevslen, truncate) | |
389 | |
385 # fall back to slow path and make sure it will be written to disk | 390 # fall back to slow path and make sure it will be written to disk |
386 self._rbcrevslen = min(self._rbcrevslen, rev) | |
387 return self._branchinfo(rev) | 391 return self._branchinfo(rev) |
388 | 392 |
389 def _branchinfo(self, rev): | 393 def _branchinfo(self, rev): |
390 """Retrieve branch info from changelog and update _rbcrevs""" | 394 """Retrieve branch info from changelog and update _rbcrevs""" |
391 changelog = self._repo.changelog | 395 changelog = self._repo.changelog |
406 """Writes the node's branch data to the in-memory cache data.""" | 410 """Writes the node's branch data to the in-memory cache data.""" |
407 rbcrevidx = rev * _rbcrecsize | 411 rbcrevidx = rev * _rbcrecsize |
408 rec = array('c') | 412 rec = array('c') |
409 rec.fromstring(pack(_rbcrecfmt, node, branchidx)) | 413 rec.fromstring(pack(_rbcrecfmt, node, branchidx)) |
410 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec | 414 self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec |
415 self._rbcrevslen = min(self._rbcrevslen, rev) | |
411 | 416 |
412 def write(self): | 417 def write(self): |
413 """Save branch cache if it is dirty.""" | 418 """Save branch cache if it is dirty.""" |
414 repo = self._repo | 419 repo = self._repo |
415 if self._rbcnamescount < len(self._names): | 420 if self._rbcnamescount < len(self._names): |