comparison mercurial/changegroup.py @ 27432:77d25b913f80

changegroup: introduce cg3, which has support for exchanging treemanifests I'm not entirely happy with using a trailing / on a "file" entry for transferring a treemanifest. We've discussed putting some flags on each file header[0], but I'm unconvinced that's actually any better: if we were going to add another feature to the cg format we'd still be doing a version bump anyway to cg4, so I'm inclined to not spend time coming up with a more sophisticated format until we actually know what the next feature we want to stuff in a changegroup will be. Test changes outside test-treemanifest.t are only due to the new CG3 bundlecap showing up in the wire protocol. Many thanks to adgar@google.com and martinvonz@google.com for helping me with various odd corners of the changegroup and treemanifest API. 0: It's not hard refactoring, nor is it a lot of work. I'm just disinclined to do speculative work when it's not clear what the customer would actually be.
author Augie Fackler <augie@google.com>
date Fri, 11 Dec 2015 11:23:49 -0500
parents ead8e7069998
children 12f727a5b434
comparison
equal deleted inserted replaced
27431:8f016345e6b0 27432:77d25b913f80
495 495
496 def _deltaheader(self, headertuple, prevnode): 496 def _deltaheader(self, headertuple, prevnode):
497 node, p1, p2, deltabase, cs = headertuple 497 node, p1, p2, deltabase, cs = headertuple
498 return node, p1, p2, deltabase, cs 498 return node, p1, p2, deltabase, cs
499 499
500 class cg3unpacker(cg2unpacker):
501 """Unpacker for cg3 streams.
502
503 cg3 streams add support for exchanging treemanifests, so the only
504 thing that changes is the version number.
505 """
506 version = '03'
507
500 class headerlessfixup(object): 508 class headerlessfixup(object):
501 def __init__(self, fh, h): 509 def __init__(self, fh, h):
502 self._h = h 510 self._h = h
503 self._fh = fh 511 self._fh = fh
504 def read(self, n): 512 def read(self, n):
506 d, self._h = self._h[:n], self._h[n:] 514 d, self._h = self._h[:n], self._h[n:]
507 if len(d) < n: 515 if len(d) < n:
508 d += readexactly(self._fh, n - len(d)) 516 d += readexactly(self._fh, n - len(d))
509 return d 517 return d
510 return readexactly(self._fh, n) 518 return readexactly(self._fh, n)
519
520 def _moddirs(files):
521 """Given a set of modified files, find the list of modified directories.
522
523 This returns a list of (path to changed dir, changed dir) tuples,
524 as that's what the one client needs anyway.
525
526 >>> _moddirs(['a/b/c.py', 'a/b/c.txt', 'a/d/e/f/g.txt', 'i.txt', ])
527 [('/', 'a/'), ('a/', 'b/'), ('a/', 'd/'), ('a/d/', 'e/'), ('a/d/e/', 'f/')]
528
529 """
530 alldirs = set()
531 for f in files:
532 path = f.split('/')[:-1]
533 for i in xrange(len(path) - 1, -1, -1):
534 dn = '/'.join(path[:i])
535 current = dn + '/', path[i] + '/'
536 if current in alldirs:
537 break
538 alldirs.add(current)
539 return sorted(alldirs)
511 540
512 class cg1packer(object): 541 class cg1packer(object):
513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER 542 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
514 version = '01' 543 version = '01'
515 def __init__(self, repo, bundlecaps=None): 544 def __init__(self, repo, bundlecaps=None):
592 # filter any nodes that claim to be part of the known set 621 # filter any nodes that claim to be part of the known set
593 def prune(self, revlog, missing, commonrevs): 622 def prune(self, revlog, missing, commonrevs):
594 rr, rl = revlog.rev, revlog.linkrev 623 rr, rl = revlog.rev, revlog.linkrev
595 return [n for n in missing if rl(rr(n)) not in commonrevs] 624 return [n for n in missing if rl(rr(n)) not in commonrevs]
596 625
597 def _packmanifests(self, mfnodes, lookuplinknode): 626 def _packmanifests(self, mfnodes, tmfnodes, lookuplinknode):
598 """Pack flat manifests into a changegroup stream.""" 627 """Pack flat manifests into a changegroup stream."""
599 ml = self._repo.manifest 628 ml = self._repo.manifest
600 size = 0 629 size = 0
601 for chunk in self.group( 630 for chunk in self.group(
602 mfnodes, ml, lookuplinknode, units=_('manifests')): 631 mfnodes, ml, lookuplinknode, units=_('manifests')):
603 size += len(chunk) 632 size += len(chunk)
604 yield chunk 633 yield chunk
605 self._verbosenote(_('%8.i (manifests)\n') % size) 634 self._verbosenote(_('%8.i (manifests)\n') % size)
635 # It looks odd to assert this here, but tmfnodes doesn't get
636 # filled in until after we've called lookuplinknode for
637 # sending root manifests, so the only way to tell the streams
638 # got crossed is to check after we've done all the work.
639 assert not tmfnodes
606 640
607 def generate(self, commonrevs, clnodes, fastpathlinkrev, source): 641 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
608 '''yield a sequence of changegroup chunks (strings)''' 642 '''yield a sequence of changegroup chunks (strings)'''
609 repo = self._repo 643 repo = self._repo
610 cl = repo.changelog 644 cl = repo.changelog
611 ml = repo.manifest 645 ml = repo.manifest
612 646
613 clrevorder = {} 647 clrevorder = {}
614 mfs = {} # needed manifests 648 mfs = {} # needed manifests
649 tmfnodes = {}
615 fnodes = {} # needed file nodes 650 fnodes = {} # needed file nodes
616 # maps manifest node id -> set(changed files) 651 # maps manifest node id -> set(changed files)
617 mfchangedfiles = {} 652 mfchangedfiles = {}
618 653
619 # Callback for the changelog, used to collect changed files and manifest 654 # Callback for the changelog, used to collect changed files and manifest
651 # of the changelog itself. The changelog never uses generaldelta, so 686 # of the changelog itself. The changelog never uses generaldelta, so
652 # it is only reordered when reorder=True. To handle this case, we 687 # it is only reordered when reorder=True. To handle this case, we
653 # simply take the slowpath, which already has the 'clrevorder' logic. 688 # simply take the slowpath, which already has the 'clrevorder' logic.
654 # This was also fixed in cc0ff93d0c0c. 689 # This was also fixed in cc0ff93d0c0c.
655 fastpathlinkrev = fastpathlinkrev and not self._reorder 690 fastpathlinkrev = fastpathlinkrev and not self._reorder
691 # Treemanifests don't work correctly with fastpathlinkrev
692 # either, because we don't discover which directory nodes to
693 # send along with files. This could probably be fixed.
694 fastpathlinkrev = fastpathlinkrev and (
695 'treemanifest' not in repo.requirements)
656 # Callback for the manifest, used to collect linkrevs for filelog 696 # Callback for the manifest, used to collect linkrevs for filelog
657 # revisions. 697 # revisions.
658 # Returns the linkrev node (collected in lookupcl). 698 # Returns the linkrev node (collected in lookupcl).
659 if fastpathlinkrev: 699 if fastpathlinkrev:
660 lookupmflinknode = mfs.__getitem__ 700 lookupmflinknode = mfs.__getitem__
664 704
665 Returns the linkrev node for the specified manifest. 705 Returns the linkrev node for the specified manifest.
666 706
667 SIDE EFFECT: 707 SIDE EFFECT:
668 708
669 fclnodes gets populated with the list of relevant 709 1) fclnodes gets populated with the list of relevant
670 file nodes. 710 file nodes if we're not using fastpathlinkrev
671 711 2) When treemanifests are in use, collects treemanifest nodes
672 Note that this means you can't trust fclnodes until 712 to send
673 after manifests have been sent to the client. 713
714 Note that this means manifests must be completely sent to
715 the client before you can trust the list of files and
716 treemanifests to send.
674 """ 717 """
675 clnode = mfs[x] 718 clnode = mfs[x]
676 mdata = ml.readfast(x) 719 # We no longer actually care about reading deltas of
720 # the manifest here, because we already know the list
721 # of changed files, so for treemanifests (which
722 # lazily-load anyway to *generate* a readdelta) we can
723 # just load them with read() and then we'll actually
724 # be able to correctly load node IDs from the
725 # submanifest entries.
726 if 'treemanifest' in repo.requirements:
727 mdata = ml.read(x)
728 else:
729 mdata = ml.readfast(x)
677 for f in mfchangedfiles[x]: 730 for f in mfchangedfiles[x]:
678 try: 731 try:
679 n = mdata[f] 732 n = mdata[f]
680 except KeyError: 733 except KeyError:
681 continue 734 continue
683 # version 736 # version
684 fclnodes = fnodes.setdefault(f, {}) 737 fclnodes = fnodes.setdefault(f, {})
685 fclnode = fclnodes.setdefault(n, clnode) 738 fclnode = fclnodes.setdefault(n, clnode)
686 if clrevorder[clnode] < clrevorder[fclnode]: 739 if clrevorder[clnode] < clrevorder[fclnode]:
687 fclnodes[n] = clnode 740 fclnodes[n] = clnode
741 # gather list of changed treemanifest nodes
742 if 'treemanifest' in repo.requirements:
743 submfs = {'/': mdata}
744 for dn, bn in _moddirs(mfchangedfiles[x]):
745 submf = submfs[dn]
746 submf = submf._dirs[bn]
747 submfs[submf.dir()] = submf
748 tmfclnodes = tmfnodes.setdefault(submf.dir(), {})
749 tmfclnodes.setdefault(submf._node, clnode)
750 if clrevorder[clnode] < clrevorder[fclnode]:
751 tmfclnodes[n] = clnode
688 return clnode 752 return clnode
689 753
690 mfnodes = self.prune(ml, mfs, commonrevs) 754 mfnodes = self.prune(ml, mfs, commonrevs)
691 for x in self._packmanifests(mfnodes, lookupmflinknode): 755 for x in self._packmanifests(
756 mfnodes, tmfnodes, lookupmflinknode):
692 yield x 757 yield x
693 758
694 mfs.clear() 759 mfs.clear()
695 clrevs = set(cl.rev(x) for x in clnodes) 760 clrevs = set(cl.rev(x) for x in clnodes)
696 761
807 return dp 872 return dp
808 873
809 def builddeltaheader(self, node, p1n, p2n, basenode, linknode): 874 def builddeltaheader(self, node, p1n, p2n, basenode, linknode):
810 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode) 875 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
811 876
877 class cg3packer(cg2packer):
878 version = '03'
879
880 def _packmanifests(self, mfnodes, tmfnodes, lookuplinknode):
881 # Note that debug prints are super confusing in this code, as
882 # tmfnodes gets populated by the calls to lookuplinknode in
883 # the superclass's manifest packer. In the future we should
884 # probably see if we can refactor this somehow to be less
885 # confusing.
886 for x in super(cg3packer, self)._packmanifests(
887 mfnodes, {}, lookuplinknode):
888 yield x
889 dirlog = self._repo.manifest.dirlog
890 for name, nodes in tmfnodes.iteritems():
891 # For now, directory headers are simply file headers with
892 # a trailing '/' on the path.
893 yield self.fileheader(name + '/')
894 for chunk in self.group(nodes, dirlog(name), nodes.get):
895 yield chunk
896
897
812 packermap = {'01': (cg1packer, cg1unpacker), 898 packermap = {'01': (cg1packer, cg1unpacker),
813 # cg2 adds support for exchanging generaldelta 899 # cg2 adds support for exchanging generaldelta
814 '02': (cg2packer, cg2unpacker), 900 '02': (cg2packer, cg2unpacker),
901 # cg3 adds support for exchanging treemanifests
902 '03': (cg3packer, cg3unpacker),
815 } 903 }
816 904
817 def _changegroupinfo(repo, nodes, source): 905 def _changegroupinfo(repo, nodes, source):
818 if repo.ui.verbose or source == 'bundle': 906 if repo.ui.verbose or source == 'bundle':
819 repo.ui.status(_("%d changesets found\n") % len(nodes)) 907 repo.ui.status(_("%d changesets found\n") % len(nodes))
936 if not chunkdata: 1024 if not chunkdata:
937 break 1025 break
938 f = chunkdata["filename"] 1026 f = chunkdata["filename"]
939 repo.ui.debug("adding %s revisions\n" % f) 1027 repo.ui.debug("adding %s revisions\n" % f)
940 pr() 1028 pr()
941 fl = repo.file(f) 1029 directory = (f[-1] == '/')
1030 if directory:
1031 # a directory using treemanifests
1032 # TODO fixup repo requirements safely
1033 if 'treemanifest' not in repo.requirements:
1034 if not wasempty:
1035 raise error.Abort(_(
1036 "bundle contains tree manifests, but local repo is "
1037 "non-empty and does not use tree manifests"))
1038 repo.requirements.add('treemanifest')
1039 repo._applyopenerreqs()
1040 repo._writerequirements()
1041 repo.manifest._treeondisk = True
1042 repo.manifest._treeinmem = True
1043 fl = repo.manifest.dirlog(f)
1044 else:
1045 fl = repo.file(f)
942 o = len(fl) 1046 o = len(fl)
943 try: 1047 try:
944 if not fl.addgroup(source, revmap, trp): 1048 if not fl.addgroup(source, revmap, trp):
945 raise error.Abort(_("received file revlog group is empty")) 1049 raise error.Abort(_("received file revlog group is empty"))
946 except error.CensoredBaseError as e: 1050 except error.CensoredBaseError as e:
947 raise error.Abort(_("received delta base is censored: %s") % e) 1051 raise error.Abort(_("received delta base is censored: %s") % e)
948 revisions += len(fl) - o 1052 if not directory:
949 files += 1 1053 revisions += len(fl) - o
1054 files += 1
950 if f in needfiles: 1055 if f in needfiles:
951 needs = needfiles[f] 1056 needs = needfiles[f]
952 for new in xrange(o, len(fl)): 1057 for new in xrange(o, len(fl)):
953 n = fl.node(new) 1058 n = fl.node(new)
954 if n in needs: 1059 if n in needs: