comparison mercurial/manifest.py @ 25091:b5052fc73300

treemanifest: store submanifest revlog per directory With this change, when tree manifests are enabled (in .hg/requires), commits will be written with one manifest revlog per directory. The manifest revlogs are stored in .hg/store/meta/$dir/00manifest.[id]. Flat manifests can still be read and interacted with as usual (they are also read into treemanifest instances). The functionality for writing treemanifest as a flat manifest to disk is still left in the code; tests still pass with '_treeinmem=True' hardcoded. Exchange is not yet implemented.
author Martin von Zweigbergk <martinvonz@google.com>
date Mon, 13 Apr 2015 23:21:02 -0700
parents 48583a1e44f3
children 49c583ca48c4
comparison
equal deleted inserted replaced
25090:252412e24551 25091:b5052fc73300
442 return '', f 442 return '', f
443 443
444 class treemanifest(object): 444 class treemanifest(object):
445 def __init__(self, dir='', text=''): 445 def __init__(self, dir='', text=''):
446 self._dir = dir 446 self._dir = dir
447 self._node = revlog.nullid
447 self._dirs = {} 448 self._dirs = {}
448 # Using _lazymanifest here is a little slower than plain old dicts 449 # Using _lazymanifest here is a little slower than plain old dicts
449 self._files = {} 450 self._files = {}
450 self._flags = {} 451 self._flags = {}
451 self.parse(text) 452 def readsubtree(subdir, subm):
453 raise AssertionError('treemanifest constructor only accepts '
454 'flat manifests')
455 self.parse(text, readsubtree)
452 456
453 def _subpath(self, path): 457 def _subpath(self, path):
454 return self._dir + path 458 return self._dir + path
455 459
456 def __len__(self): 460 def __len__(self):
462 def _isempty(self): 466 def _isempty(self):
463 return (not self._files and (not self._dirs or 467 return (not self._files and (not self._dirs or
464 util.all(m._isempty() for m in self._dirs.values()))) 468 util.all(m._isempty() for m in self._dirs.values())))
465 469
466 def __str__(self): 470 def __str__(self):
467 return '<treemanifest dir=%s>' % self._dir 471 return ('<treemanifest dir=%s, node=%s>' %
472 (self._dir, revlog.hex(self._node)))
473
474 def dir(self):
475 '''The directory that this tree manifest represents, including a
476 trailing '/'. Empty string for the repo root directory.'''
477 return self._dir
478
479 def node(self):
480 '''This node of this instance. nullid for unsaved instances. Should
481 be updated when the instance is read or written from a revlog.
482 '''
483 return self._node
484
485 def setnode(self, node):
486 self._node = node
468 487
469 def iteritems(self): 488 def iteritems(self):
470 for p, n in sorted(self._dirs.items() + self._files.items()): 489 for p, n in sorted(self._dirs.items() + self._files.items()):
471 if p in self._files: 490 if p in self._files:
472 yield self._subpath(p), n 491 yield self._subpath(p), n
555 else: 574 else:
556 self._files[f] = n[:21] # to match manifestdict's behavior 575 self._files[f] = n[:21] # to match manifestdict's behavior
557 576
558 def setflag(self, f, flags): 577 def setflag(self, f, flags):
559 """Set the flags (symlink, executable) for path f.""" 578 """Set the flags (symlink, executable) for path f."""
579 assert 'd' not in flags
560 dir, subpath = _splittopdir(f) 580 dir, subpath = _splittopdir(f)
561 if dir: 581 if dir:
562 if dir not in self._dirs: 582 if dir not in self._dirs:
563 self._dirs[dir] = treemanifest(self._subpath(dir)) 583 self._dirs[dir] = treemanifest(self._subpath(dir))
564 self._dirs[dir].setflag(subpath, flags) 584 self._dirs[dir].setflag(subpath, flags)
565 else: 585 else:
566 self._flags[f] = flags 586 self._flags[f] = flags
567 587
568 def copy(self): 588 def copy(self):
569 copy = treemanifest(self._dir) 589 copy = treemanifest(self._dir)
590 copy._node = self._node
570 for d in self._dirs: 591 for d in self._dirs:
571 copy._dirs[d] = self._dirs[d].copy() 592 copy._dirs[d] = self._dirs[d].copy()
572 copy._files = dict.copy(self._files) 593 copy._files = dict.copy(self._files)
573 copy._flags = dict.copy(self._flags) 594 copy._flags = dict.copy(self._flags)
574 return copy 595 return copy
735 result[t2._subpath(fn)] = ((None, ''), (n2, fl2)) 756 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
736 757
737 _diff(self, m2) 758 _diff(self, m2)
738 return result 759 return result
739 760
740 def parse(self, text): 761 def parse(self, text, readsubtree):
741 for f, n, fl in _parse(text): 762 for f, n, fl in _parse(text):
742 self[f] = n 763 if fl == 'd':
743 if fl: 764 f = f + '/'
744 self.setflag(f, fl) 765 self._dirs[f] = readsubtree(self._subpath(f), n)
766 else:
767 # Use __setitem__ and setflag rather than assigning directly
768 # to _files and _flags, thereby letting us parse flat manifests
769 # as well as tree manifests.
770 self[f] = n
771 if fl:
772 self.setflag(f, fl)
745 773
746 def text(self, usemanifestv2=False): 774 def text(self, usemanifestv2=False):
747 """Get the full data of this manifest as a bytestring.""" 775 """Get the full data of this manifest as a bytestring."""
748 flags = self.flags 776 flags = self.flags
749 return _text(((f, self[f], flags(f)) for f in self.keys()), 777 return _text(((f, self[f], flags(f)) for f in self.keys()),
750 usemanifestv2) 778 usemanifestv2)
751 779
780 def dirtext(self, usemanifestv2=False):
781 """Get the full data of this directory as a bytestring. Make sure that
782 any submanifests have been written first, so their nodeids are correct.
783 """
784 flags = self.flags
785 dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
786 files = [(f, self._files[f], flags(f)) for f in self._files]
787 return _text(sorted(dirs + files), usemanifestv2)
788
789 def writesubtrees(self, m1, m2, writesubtree):
790 emptytree = treemanifest()
791 for d, subm in self._dirs.iteritems():
792 subp1 = m1._dirs.get(d, emptytree)._node
793 subp2 = m2._dirs.get(d, emptytree)._node
794 if subp1 == revlog.nullid:
795 subp1, subp2 = subp2, subp1
796 writesubtree(subm, subp1, subp2)
797
752 class manifest(revlog.revlog): 798 class manifest(revlog.revlog):
753 def __init__(self, opener): 799 def __init__(self, opener, dir=''):
754 # During normal operations, we expect to deal with not more than four 800 # During normal operations, we expect to deal with not more than four
755 # revs at a time (such as during commit --amend). When rebasing large 801 # revs at a time (such as during commit --amend). When rebasing large
756 # stacks of commits, the number can go up, hence the config knob below. 802 # stacks of commits, the number can go up, hence the config knob below.
757 cachesize = 4 803 cachesize = 4
758 usetreemanifest = False 804 usetreemanifest = False
761 if opts is not None: 807 if opts is not None:
762 cachesize = opts.get('manifestcachesize', cachesize) 808 cachesize = opts.get('manifestcachesize', cachesize)
763 usetreemanifest = opts.get('treemanifest', usetreemanifest) 809 usetreemanifest = opts.get('treemanifest', usetreemanifest)
764 usemanifestv2 = opts.get('manifestv2', usemanifestv2) 810 usemanifestv2 = opts.get('manifestv2', usemanifestv2)
765 self._mancache = util.lrucachedict(cachesize) 811 self._mancache = util.lrucachedict(cachesize)
766 revlog.revlog.__init__(self, opener, "00manifest.i")
767 self._treeinmem = usetreemanifest 812 self._treeinmem = usetreemanifest
768 self._treeondisk = usetreemanifest 813 self._treeondisk = usetreemanifest
769 self._usemanifestv2 = usemanifestv2 814 self._usemanifestv2 = usemanifestv2
815 indexfile = "00manifest.i"
816 if dir:
817 assert self._treeondisk
818 indexfile = "meta/" + dir + "00manifest.i"
819 revlog.revlog.__init__(self, opener, indexfile)
820 self._dir = dir
770 821
771 def _newmanifest(self, data=''): 822 def _newmanifest(self, data=''):
772 if self._treeinmem: 823 if self._treeinmem:
773 return treemanifest('', data) 824 return treemanifest(self._dir, data)
774 return manifestdict(data) 825 return manifestdict(data)
775 826
776 def _slowreaddelta(self, node): 827 def _slowreaddelta(self, node):
777 r0 = self.deltaparent(self.rev(node)) 828 r0 = self.deltaparent(self.rev(node))
778 m0 = self.read(self.node(r0)) 829 m0 = self.read(self.node(r0))
810 if node == revlog.nullid: 861 if node == revlog.nullid:
811 return self._newmanifest() # don't upset local cache 862 return self._newmanifest() # don't upset local cache
812 if node in self._mancache: 863 if node in self._mancache:
813 return self._mancache[node][0] 864 return self._mancache[node][0]
814 text = self.revision(node) 865 text = self.revision(node)
815 arraytext = array.array('c', text) 866 if self._treeondisk:
816 m = self._newmanifest(text) 867 def readsubtree(dir, subm):
868 sublog = manifest(self.opener, dir)
869 return sublog.read(subm)
870 m = self._newmanifest()
871 m.parse(text, readsubtree)
872 m.setnode(node)
873 arraytext = None
874 else:
875 m = self._newmanifest(text)
876 arraytext = array.array('c', text)
817 self._mancache[node] = (m, arraytext) 877 self._mancache[node] = (m, arraytext)
818 return m 878 return m
819 879
820 def find(self, node, f): 880 def find(self, node, f):
821 '''look up entry for a single file efficiently. 881 '''look up entry for a single file efficiently.
849 else: 909 else:
850 # The first parent manifest isn't already loaded, so we'll 910 # The first parent manifest isn't already loaded, so we'll
851 # just encode a fulltext of the manifest and pass that 911 # just encode a fulltext of the manifest and pass that
852 # through to the revlog layer, and let it handle the delta 912 # through to the revlog layer, and let it handle the delta
853 # process. 913 # process.
854 text = m.text(self._usemanifestv2) 914 if self._treeondisk:
855 arraytext = array.array('c', text) 915 m1 = self.read(p1)
856 n = self.addrevision(text, transaction, link, p1, p2) 916 m2 = self.read(p2)
917 n = self._addtree(m, transaction, link, m1, m2)
918 arraytext = None
919 else:
920 text = m.text(self._usemanifestv2)
921 n = self.addrevision(text, transaction, link, p1, p2)
922 arraytext = array.array('c', text)
857 923
858 self._mancache[n] = (m, arraytext) 924 self._mancache[n] = (m, arraytext)
859 925
860 return n 926 return n
927
928 def _addtree(self, m, transaction, link, m1, m2):
929 def writesubtree(subm, subp1, subp2):
930 sublog = manifest(self.opener, subm.dir())
931 sublog.add(subm, transaction, link, subp1, subp2, None, None)
932 m.writesubtrees(m1, m2, writesubtree)
933 text = m.dirtext(self._usemanifestv2)
934 # If the manifest is unchanged compared to one parent,
935 # don't write a new revision
936 if text == m1.dirtext(self._usemanifestv2):
937 n = m1.node()
938 elif text == m2.dirtext(self._usemanifestv2):
939 n = m2.node()
940 else:
941 n = self.addrevision(text, transaction, link, m1.node(), m2.node())
942 # Save nodeid so parent manifest can calculate its nodeid
943 m.setnode(n)
944 return n