Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 1598:14d1f1868bf6
cleanup of revlog.group when repository is local
revlog.group cached every chunk from the revlog, the behaviour was
needed to minimize the roundtrip with old-http.
We now cache the revlog data ~4MB at a time.
The memory used server side when pulling goes down to 35Mo maximum
whereas without the patch more than 160Mo was used when cloning the linux kernel
repository.
The time used by cloning is higher mainly because of the check in revlog.revision.
before
110.25user 20.90system 2:52.00elapsed 76%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+708707minor)pagefaults 0swaps
after
117.56user 18.86system 2:50.43elapsed 80%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+748366minor)pagefaults 0swaps
author | Benoit Boissinot <benoit.boissinot@ens-lyon.org> |
---|---|
date | Tue, 27 Dec 2005 13:09:49 -0600 |
parents | 59b3639df0a9 |
children | bfa90d9a3b77 |
comparison
equal
deleted
inserted
replaced
1597:96b47ef8f740 | 1598:14d1f1868bf6 |
---|---|
186 """ | 186 """ |
187 self.indexfile = indexfile | 187 self.indexfile = indexfile |
188 self.datafile = datafile | 188 self.datafile = datafile |
189 self.opener = opener | 189 self.opener = opener |
190 self.cache = None | 190 self.cache = None |
191 self.chunkcache = None | |
191 | 192 |
192 try: | 193 try: |
193 i = self.opener(self.indexfile).read() | 194 i = self.opener(self.indexfile).read() |
194 except IOError, inst: | 195 except IOError, inst: |
195 if inst.errno != errno.ENOENT: | 196 if inst.errno != errno.ENOENT: |
471 | 472 |
472 def patches(self, t, pl): | 473 def patches(self, t, pl): |
473 """apply a list of patches to a string""" | 474 """apply a list of patches to a string""" |
474 return mdiff.patches(t, pl) | 475 return mdiff.patches(t, pl) |
475 | 476 |
477 def chunk(self, rev): | |
478 start, length = self.start(rev), self.length(rev) | |
479 end = start + length | |
480 | |
481 def loadcache(): | |
482 cache_length = max(4096 * 1024, length) # 4Mo | |
483 df = self.opener(self.datafile) | |
484 df.seek(start) | |
485 self.chunkcache = (start, df.read(cache_length)) | |
486 | |
487 if not self.chunkcache: | |
488 loadcache() | |
489 | |
490 cache_start = self.chunkcache[0] | |
491 cache_end = cache_start + len(self.chunkcache[1]) | |
492 if start >= cache_start and end <= cache_end: | |
493 # it is cached | |
494 offset = start - cache_start | |
495 else: | |
496 loadcache() | |
497 offset = 0 | |
498 | |
499 #def checkchunk(): | |
500 # df = self.opener(self.datafile) | |
501 # df.seek(start) | |
502 # return df.read(length) | |
503 #assert s == checkchunk() | |
504 return decompress(self.chunkcache[1][offset:offset + length]) | |
505 | |
476 def delta(self, node): | 506 def delta(self, node): |
477 """return or calculate a delta between a node and its predecessor""" | 507 """return or calculate a delta between a node and its predecessor""" |
478 r = self.rev(node) | 508 r = self.rev(node) |
479 b = self.base(r) | 509 b = self.base(r) |
480 if r == b: | 510 if r == b: |
481 return self.diff(self.revision(self.node(r - 1)), | 511 return self.diff(self.revision(self.node(r - 1)), |
482 self.revision(node)) | 512 self.revision(node)) |
483 else: | 513 else: |
484 f = self.opener(self.datafile) | 514 return self.chunk(r) |
485 f.seek(self.start(r)) | |
486 data = f.read(self.length(r)) | |
487 return decompress(data) | |
488 | 515 |
489 def revision(self, node): | 516 def revision(self, node): |
490 """return an uncompressed revision of a given""" | 517 """return an uncompressed revision of a given""" |
491 if node == nullid: return "" | 518 if node == nullid: return "" |
492 if self.cache and self.cache[0] == node: return self.cache[2] | 519 if self.cache and self.cache[0] == node: return self.cache[2] |
493 | 520 |
494 # look up what we need to read | 521 # look up what we need to read |
495 text = None | 522 text = None |
496 rev = self.rev(node) | 523 rev = self.rev(node) |
497 start, length, base, link, p1, p2, node = self.index[rev] | 524 base = self.base(rev) |
498 end = start + length | |
499 if base != rev: start = self.start(base) | |
500 | 525 |
501 # do we have useful data cached? | 526 # do we have useful data cached? |
502 if self.cache and self.cache[1] >= base and self.cache[1] < rev: | 527 if self.cache and self.cache[1] >= base and self.cache[1] < rev: |
503 base = self.cache[1] | 528 base = self.cache[1] |
504 start = self.start(base + 1) | |
505 text = self.cache[2] | 529 text = self.cache[2] |
506 last = 0 | 530 else: |
507 | 531 text = self.chunk(base) |
508 f = self.opener(self.datafile) | |
509 f.seek(start) | |
510 data = f.read(end - start) | |
511 | |
512 if text is None: | |
513 last = self.length(base) | |
514 text = decompress(data[:last]) | |
515 | 532 |
516 bins = [] | 533 bins = [] |
517 for r in xrange(base + 1, rev + 1): | 534 for r in xrange(base + 1, rev + 1): |
518 s = self.length(r) | 535 bins.append(self.chunk(r)) |
519 bins.append(decompress(data[last:last + s])) | |
520 last = last + s | |
521 | 536 |
522 text = mdiff.patches(text, bins) | 537 text = mdiff.patches(text, bins) |
523 | 538 |
539 p1, p2 = self.parents(node) | |
524 if node != hash(text, p1, p2): | 540 if node != hash(text, p1, p2): |
525 raise RevlogError(_("integrity check failed on %s:%d") | 541 raise RevlogError(_("integrity check failed on %s:%d") |
526 % (self.datafile, rev)) | 542 % (self.datafile, rev)) |
527 | 543 |
528 self.cache = (node, rev, text) | 544 self.cache = (node, rev, text) |
648 gy = y.next() | 664 gy = y.next() |
649 else: | 665 else: |
650 #print "next x" | 666 #print "next x" |
651 gx = x.next() | 667 gx = x.next() |
652 | 668 |
653 def group(self, nodelist, lookup, infocollect = None): | 669 def group(self, nodelist, lookup, infocollect=None): |
654 """calculate a delta group | 670 """calculate a delta group |
655 | 671 |
656 Given a list of changeset revs, return a set of deltas and | 672 Given a list of changeset revs, return a set of deltas and |
657 metadata corresponding to nodes. the first delta is | 673 metadata corresponding to nodes. the first delta is |
658 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to | 674 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to |
659 have this parent as it has all history before these | 675 have this parent as it has all history before these |
660 changesets. parent is parent[0] | 676 changesets. parent is parent[0] |
661 """ | 677 """ |
662 revs = [self.rev(n) for n in nodelist] | 678 revs = [self.rev(n) for n in nodelist] |
663 needed = dict.fromkeys(revs, 1) | |
664 | 679 |
665 # if we don't have any revisions touched by these changesets, bail | 680 # if we don't have any revisions touched by these changesets, bail |
666 if not revs: | 681 if not revs: |
667 yield struct.pack(">l", 0) | 682 yield struct.pack(">l", 0) |
668 return | 683 return |
669 | 684 |
670 # add the parent of the first rev | 685 # add the parent of the first rev |
671 p = self.parents(self.node(revs[0]))[0] | 686 p = self.parents(self.node(revs[0]))[0] |
672 revs.insert(0, self.rev(p)) | 687 revs.insert(0, self.rev(p)) |
673 | 688 |
674 # for each delta that isn't contiguous in the log, we need to | |
675 # reconstruct the base, reconstruct the result, and then | |
676 # calculate the delta. We also need to do this where we've | |
677 # stored a full version and not a delta | |
678 for i in xrange(0, len(revs) - 1): | |
679 a, b = revs[i], revs[i + 1] | |
680 if a + 1 != b or self.base(b) == b: | |
681 for j in xrange(self.base(a), a + 1): | |
682 needed[j] = 1 | |
683 for j in xrange(self.base(b), b + 1): | |
684 needed[j] = 1 | |
685 | |
686 # calculate spans to retrieve from datafile | |
687 needed = needed.keys() | |
688 needed.sort() | |
689 spans = [] | |
690 oo = -1 | |
691 ol = 0 | |
692 for n in needed: | |
693 if n < 0: continue | |
694 o = self.start(n) | |
695 l = self.length(n) | |
696 if oo + ol == o: # can we merge with the previous? | |
697 nl = spans[-1][2] | |
698 nl.append((n, l)) | |
699 ol += l | |
700 spans[-1] = (oo, ol, nl) | |
701 else: | |
702 oo = o | |
703 ol = l | |
704 spans.append((oo, ol, [(n, l)])) | |
705 | |
706 # read spans in, divide up chunks | |
707 chunks = {} | |
708 for span in spans: | |
709 # we reopen the file for each span to make http happy for now | |
710 f = self.opener(self.datafile) | |
711 f.seek(span[0]) | |
712 data = f.read(span[1]) | |
713 | |
714 # divide up the span | |
715 pos = 0 | |
716 for r, l in span[2]: | |
717 chunks[r] = decompress(data[pos: pos + l]) | |
718 pos += l | |
719 | |
720 # helper to reconstruct intermediate versions | 689 # helper to reconstruct intermediate versions |
721 def construct(text, base, rev): | 690 def construct(text, base, rev): |
722 bins = [chunks[r] for r in xrange(base + 1, rev + 1)] | 691 bins = [self.chunk(r) for r in xrange(base + 1, rev + 1)] |
723 return mdiff.patches(text, bins) | 692 return mdiff.patches(text, bins) |
724 | 693 |
725 # build deltas | 694 # build deltas |
726 deltas = [] | |
727 for d in xrange(0, len(revs) - 1): | 695 for d in xrange(0, len(revs) - 1): |
728 a, b = revs[d], revs[d + 1] | 696 a, b = revs[d], revs[d + 1] |
729 n = self.node(b) | 697 na = self.node(a) |
698 nb = self.node(b) | |
730 | 699 |
731 if infocollect is not None: | 700 if infocollect is not None: |
732 infocollect(n) | 701 infocollect(nb) |
733 | 702 |
734 # do we need to construct a new delta? | 703 # do we need to construct a new delta? |
735 if a + 1 != b or self.base(b) == b: | 704 if a + 1 != b or self.base(b) == b: |
736 if a >= 0: | 705 ta = self.revision(na) |
737 base = self.base(a) | 706 tb = self.revision(nb) |
738 ta = chunks[self.base(a)] | |
739 ta = construct(ta, base, a) | |
740 else: | |
741 ta = "" | |
742 | |
743 base = self.base(b) | |
744 if a > base: | |
745 base = a | |
746 tb = ta | |
747 else: | |
748 tb = chunks[self.base(b)] | |
749 tb = construct(tb, base, b) | |
750 d = self.diff(ta, tb) | 707 d = self.diff(ta, tb) |
751 else: | 708 else: |
752 d = chunks[b] | 709 d = self.chunk(b) |
753 | 710 |
754 p = self.parents(n) | 711 p = self.parents(nb) |
755 meta = n + p[0] + p[1] + lookup(n) | 712 meta = nb + p[0] + p[1] + lookup(nb) |
756 l = struct.pack(">l", len(meta) + len(d) + 4) | 713 l = struct.pack(">l", len(meta) + len(d) + 4) |
757 yield l | 714 yield l |
758 yield meta | 715 yield meta |
759 yield d | 716 yield d |
760 | 717 |