Mercurial > public > mercurial-scm > hg
comparison mercurial/manifest.py @ 25091:b5052fc73300
treemanifest: store submanifest revlog per directory
With this change, when tree manifests are enabled (in .hg/requires),
commits will be written with one manifest revlog per directory. The
manifest revlogs are stored in
.hg/store/meta/$dir/00manifest.[id].
Flat manifests can still be read and interacted with as usual (they
are also read into treemanifest instances). The functionality for
writing treemanifest as a flat manifest to disk is still left in the
code; tests still pass with '_treeinmem=True' hardcoded.
Exchange is not yet implemented.
author | Martin von Zweigbergk <martinvonz@google.com> |
---|---|
date | Mon, 13 Apr 2015 23:21:02 -0700 |
parents | 48583a1e44f3 |
children | 49c583ca48c4 |
comparison
equal
deleted
inserted
replaced
25090:252412e24551 | 25091:b5052fc73300 |
---|---|
442 return '', f | 442 return '', f |
443 | 443 |
444 class treemanifest(object): | 444 class treemanifest(object): |
445 def __init__(self, dir='', text=''): | 445 def __init__(self, dir='', text=''): |
446 self._dir = dir | 446 self._dir = dir |
447 self._node = revlog.nullid | |
447 self._dirs = {} | 448 self._dirs = {} |
448 # Using _lazymanifest here is a little slower than plain old dicts | 449 # Using _lazymanifest here is a little slower than plain old dicts |
449 self._files = {} | 450 self._files = {} |
450 self._flags = {} | 451 self._flags = {} |
451 self.parse(text) | 452 def readsubtree(subdir, subm): |
453 raise AssertionError('treemanifest constructor only accepts ' | |
454 'flat manifests') | |
455 self.parse(text, readsubtree) | |
452 | 456 |
453 def _subpath(self, path): | 457 def _subpath(self, path): |
454 return self._dir + path | 458 return self._dir + path |
455 | 459 |
456 def __len__(self): | 460 def __len__(self): |
462 def _isempty(self): | 466 def _isempty(self): |
463 return (not self._files and (not self._dirs or | 467 return (not self._files and (not self._dirs or |
464 util.all(m._isempty() for m in self._dirs.values()))) | 468 util.all(m._isempty() for m in self._dirs.values()))) |
465 | 469 |
466 def __str__(self): | 470 def __str__(self): |
467 return '<treemanifest dir=%s>' % self._dir | 471 return ('<treemanifest dir=%s, node=%s>' % |
472 (self._dir, revlog.hex(self._node))) | |
473 | |
474 def dir(self): | |
475 '''The directory that this tree manifest represents, including a | |
476 trailing '/'. Empty string for the repo root directory.''' | |
477 return self._dir | |
478 | |
479 def node(self): | |
480 '''This node of this instance. nullid for unsaved instances. Should | |
481 be updated when the instance is read or written from a revlog. | |
482 ''' | |
483 return self._node | |
484 | |
485 def setnode(self, node): | |
486 self._node = node | |
468 | 487 |
469 def iteritems(self): | 488 def iteritems(self): |
470 for p, n in sorted(self._dirs.items() + self._files.items()): | 489 for p, n in sorted(self._dirs.items() + self._files.items()): |
471 if p in self._files: | 490 if p in self._files: |
472 yield self._subpath(p), n | 491 yield self._subpath(p), n |
555 else: | 574 else: |
556 self._files[f] = n[:21] # to match manifestdict's behavior | 575 self._files[f] = n[:21] # to match manifestdict's behavior |
557 | 576 |
558 def setflag(self, f, flags): | 577 def setflag(self, f, flags): |
559 """Set the flags (symlink, executable) for path f.""" | 578 """Set the flags (symlink, executable) for path f.""" |
579 assert 'd' not in flags | |
560 dir, subpath = _splittopdir(f) | 580 dir, subpath = _splittopdir(f) |
561 if dir: | 581 if dir: |
562 if dir not in self._dirs: | 582 if dir not in self._dirs: |
563 self._dirs[dir] = treemanifest(self._subpath(dir)) | 583 self._dirs[dir] = treemanifest(self._subpath(dir)) |
564 self._dirs[dir].setflag(subpath, flags) | 584 self._dirs[dir].setflag(subpath, flags) |
565 else: | 585 else: |
566 self._flags[f] = flags | 586 self._flags[f] = flags |
567 | 587 |
568 def copy(self): | 588 def copy(self): |
569 copy = treemanifest(self._dir) | 589 copy = treemanifest(self._dir) |
590 copy._node = self._node | |
570 for d in self._dirs: | 591 for d in self._dirs: |
571 copy._dirs[d] = self._dirs[d].copy() | 592 copy._dirs[d] = self._dirs[d].copy() |
572 copy._files = dict.copy(self._files) | 593 copy._files = dict.copy(self._files) |
573 copy._flags = dict.copy(self._flags) | 594 copy._flags = dict.copy(self._flags) |
574 return copy | 595 return copy |
735 result[t2._subpath(fn)] = ((None, ''), (n2, fl2)) | 756 result[t2._subpath(fn)] = ((None, ''), (n2, fl2)) |
736 | 757 |
737 _diff(self, m2) | 758 _diff(self, m2) |
738 return result | 759 return result |
739 | 760 |
740 def parse(self, text): | 761 def parse(self, text, readsubtree): |
741 for f, n, fl in _parse(text): | 762 for f, n, fl in _parse(text): |
742 self[f] = n | 763 if fl == 'd': |
743 if fl: | 764 f = f + '/' |
744 self.setflag(f, fl) | 765 self._dirs[f] = readsubtree(self._subpath(f), n) |
766 else: | |
767 # Use __setitem__ and setflag rather than assigning directly | |
768 # to _files and _flags, thereby letting us parse flat manifests | |
769 # as well as tree manifests. | |
770 self[f] = n | |
771 if fl: | |
772 self.setflag(f, fl) | |
745 | 773 |
746 def text(self, usemanifestv2=False): | 774 def text(self, usemanifestv2=False): |
747 """Get the full data of this manifest as a bytestring.""" | 775 """Get the full data of this manifest as a bytestring.""" |
748 flags = self.flags | 776 flags = self.flags |
749 return _text(((f, self[f], flags(f)) for f in self.keys()), | 777 return _text(((f, self[f], flags(f)) for f in self.keys()), |
750 usemanifestv2) | 778 usemanifestv2) |
751 | 779 |
780 def dirtext(self, usemanifestv2=False): | |
781 """Get the full data of this directory as a bytestring. Make sure that | |
782 any submanifests have been written first, so their nodeids are correct. | |
783 """ | |
784 flags = self.flags | |
785 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs] | |
786 files = [(f, self._files[f], flags(f)) for f in self._files] | |
787 return _text(sorted(dirs + files), usemanifestv2) | |
788 | |
789 def writesubtrees(self, m1, m2, writesubtree): | |
790 emptytree = treemanifest() | |
791 for d, subm in self._dirs.iteritems(): | |
792 subp1 = m1._dirs.get(d, emptytree)._node | |
793 subp2 = m2._dirs.get(d, emptytree)._node | |
794 if subp1 == revlog.nullid: | |
795 subp1, subp2 = subp2, subp1 | |
796 writesubtree(subm, subp1, subp2) | |
797 | |
752 class manifest(revlog.revlog): | 798 class manifest(revlog.revlog): |
753 def __init__(self, opener): | 799 def __init__(self, opener, dir=''): |
754 # During normal operations, we expect to deal with not more than four | 800 # During normal operations, we expect to deal with not more than four |
755 # revs at a time (such as during commit --amend). When rebasing large | 801 # revs at a time (such as during commit --amend). When rebasing large |
756 # stacks of commits, the number can go up, hence the config knob below. | 802 # stacks of commits, the number can go up, hence the config knob below. |
757 cachesize = 4 | 803 cachesize = 4 |
758 usetreemanifest = False | 804 usetreemanifest = False |
761 if opts is not None: | 807 if opts is not None: |
762 cachesize = opts.get('manifestcachesize', cachesize) | 808 cachesize = opts.get('manifestcachesize', cachesize) |
763 usetreemanifest = opts.get('treemanifest', usetreemanifest) | 809 usetreemanifest = opts.get('treemanifest', usetreemanifest) |
764 usemanifestv2 = opts.get('manifestv2', usemanifestv2) | 810 usemanifestv2 = opts.get('manifestv2', usemanifestv2) |
765 self._mancache = util.lrucachedict(cachesize) | 811 self._mancache = util.lrucachedict(cachesize) |
766 revlog.revlog.__init__(self, opener, "00manifest.i") | |
767 self._treeinmem = usetreemanifest | 812 self._treeinmem = usetreemanifest |
768 self._treeondisk = usetreemanifest | 813 self._treeondisk = usetreemanifest |
769 self._usemanifestv2 = usemanifestv2 | 814 self._usemanifestv2 = usemanifestv2 |
815 indexfile = "00manifest.i" | |
816 if dir: | |
817 assert self._treeondisk | |
818 indexfile = "meta/" + dir + "00manifest.i" | |
819 revlog.revlog.__init__(self, opener, indexfile) | |
820 self._dir = dir | |
770 | 821 |
771 def _newmanifest(self, data=''): | 822 def _newmanifest(self, data=''): |
772 if self._treeinmem: | 823 if self._treeinmem: |
773 return treemanifest('', data) | 824 return treemanifest(self._dir, data) |
774 return manifestdict(data) | 825 return manifestdict(data) |
775 | 826 |
776 def _slowreaddelta(self, node): | 827 def _slowreaddelta(self, node): |
777 r0 = self.deltaparent(self.rev(node)) | 828 r0 = self.deltaparent(self.rev(node)) |
778 m0 = self.read(self.node(r0)) | 829 m0 = self.read(self.node(r0)) |
810 if node == revlog.nullid: | 861 if node == revlog.nullid: |
811 return self._newmanifest() # don't upset local cache | 862 return self._newmanifest() # don't upset local cache |
812 if node in self._mancache: | 863 if node in self._mancache: |
813 return self._mancache[node][0] | 864 return self._mancache[node][0] |
814 text = self.revision(node) | 865 text = self.revision(node) |
815 arraytext = array.array('c', text) | 866 if self._treeondisk: |
816 m = self._newmanifest(text) | 867 def readsubtree(dir, subm): |
868 sublog = manifest(self.opener, dir) | |
869 return sublog.read(subm) | |
870 m = self._newmanifest() | |
871 m.parse(text, readsubtree) | |
872 m.setnode(node) | |
873 arraytext = None | |
874 else: | |
875 m = self._newmanifest(text) | |
876 arraytext = array.array('c', text) | |
817 self._mancache[node] = (m, arraytext) | 877 self._mancache[node] = (m, arraytext) |
818 return m | 878 return m |
819 | 879 |
820 def find(self, node, f): | 880 def find(self, node, f): |
821 '''look up entry for a single file efficiently. | 881 '''look up entry for a single file efficiently. |
849 else: | 909 else: |
850 # The first parent manifest isn't already loaded, so we'll | 910 # The first parent manifest isn't already loaded, so we'll |
851 # just encode a fulltext of the manifest and pass that | 911 # just encode a fulltext of the manifest and pass that |
852 # through to the revlog layer, and let it handle the delta | 912 # through to the revlog layer, and let it handle the delta |
853 # process. | 913 # process. |
854 text = m.text(self._usemanifestv2) | 914 if self._treeondisk: |
855 arraytext = array.array('c', text) | 915 m1 = self.read(p1) |
856 n = self.addrevision(text, transaction, link, p1, p2) | 916 m2 = self.read(p2) |
917 n = self._addtree(m, transaction, link, m1, m2) | |
918 arraytext = None | |
919 else: | |
920 text = m.text(self._usemanifestv2) | |
921 n = self.addrevision(text, transaction, link, p1, p2) | |
922 arraytext = array.array('c', text) | |
857 | 923 |
858 self._mancache[n] = (m, arraytext) | 924 self._mancache[n] = (m, arraytext) |
859 | 925 |
860 return n | 926 return n |
927 | |
928 def _addtree(self, m, transaction, link, m1, m2): | |
929 def writesubtree(subm, subp1, subp2): | |
930 sublog = manifest(self.opener, subm.dir()) | |
931 sublog.add(subm, transaction, link, subp1, subp2, None, None) | |
932 m.writesubtrees(m1, m2, writesubtree) | |
933 text = m.dirtext(self._usemanifestv2) | |
934 # If the manifest is unchanged compared to one parent, | |
935 # don't write a new revision | |
936 if text == m1.dirtext(self._usemanifestv2): | |
937 n = m1.node() | |
938 elif text == m2.dirtext(self._usemanifestv2): | |
939 n = m2.node() | |
940 else: | |
941 n = self.addrevision(text, transaction, link, m1.node(), m2.node()) | |
942 # Save nodeid so parent manifest can calculate its nodeid | |
943 m.setnode(n) | |
944 return n |