Mercurial > public > mercurial-scm > hg
comparison mercurial/changegroup.py @ 27432:77d25b913f80
changegroup: introduce cg3, which has support for exchanging treemanifests
I'm not entirely happy with using a trailing / on a "file" entry for
transferring a treemanifest. We've discussed putting some flags on
each file header[0], but I'm unconvinced that's actually any better:
if we were going to add another feature to the cg format we'd still be
doing a version bump anyway to cg4, so I'm inclined to not spend time
coming up with a more sophisticated format until we actually know what
the next feature we want to stuff in a changegroup will be.
Test changes outside test-treemanifest.t are only due to the new CG3
bundlecap showing up in the wire protocol.
Many thanks to adgar@google.com and martinvonz@google.com for helping
me with various odd corners of the changegroup and treemanifest API.
0: It's not hard refactoring, nor is it a lot of work. I'm just
disinclined to do speculative work when it's not clear what the
customer would actually be.
author | Augie Fackler <augie@google.com> |
---|---|
date | Fri, 11 Dec 2015 11:23:49 -0500 |
parents | ead8e7069998 |
children | 12f727a5b434 |
comparison
equal
deleted
inserted
replaced
27431:8f016345e6b0 | 27432:77d25b913f80 |
---|---|
495 | 495 |
496 def _deltaheader(self, headertuple, prevnode): | 496 def _deltaheader(self, headertuple, prevnode): |
497 node, p1, p2, deltabase, cs = headertuple | 497 node, p1, p2, deltabase, cs = headertuple |
498 return node, p1, p2, deltabase, cs | 498 return node, p1, p2, deltabase, cs |
499 | 499 |
500 class cg3unpacker(cg2unpacker): | |
501 """Unpacker for cg3 streams. | |
502 | |
503 cg3 streams add support for exchanging treemanifests, so the only | |
504 thing that changes is the version number. | |
505 """ | |
506 version = '03' | |
507 | |
500 class headerlessfixup(object): | 508 class headerlessfixup(object): |
501 def __init__(self, fh, h): | 509 def __init__(self, fh, h): |
502 self._h = h | 510 self._h = h |
503 self._fh = fh | 511 self._fh = fh |
504 def read(self, n): | 512 def read(self, n): |
506 d, self._h = self._h[:n], self._h[n:] | 514 d, self._h = self._h[:n], self._h[n:] |
507 if len(d) < n: | 515 if len(d) < n: |
508 d += readexactly(self._fh, n - len(d)) | 516 d += readexactly(self._fh, n - len(d)) |
509 return d | 517 return d |
510 return readexactly(self._fh, n) | 518 return readexactly(self._fh, n) |
519 | |
520 def _moddirs(files): | |
521 """Given a set of modified files, find the list of modified directories. | |
522 | |
523 This returns a list of (path to changed dir, changed dir) tuples, | |
524 as that's what the one client needs anyway. | |
525 | |
526 >>> _moddirs(['a/b/c.py', 'a/b/c.txt', 'a/d/e/f/g.txt', 'i.txt', ]) | |
527 [('/', 'a/'), ('a/', 'b/'), ('a/', 'd/'), ('a/d/', 'e/'), ('a/d/e/', 'f/')] | |
528 | |
529 """ | |
530 alldirs = set() | |
531 for f in files: | |
532 path = f.split('/')[:-1] | |
533 for i in xrange(len(path) - 1, -1, -1): | |
534 dn = '/'.join(path[:i]) | |
535 current = dn + '/', path[i] + '/' | |
536 if current in alldirs: | |
537 break | |
538 alldirs.add(current) | |
539 return sorted(alldirs) | |
511 | 540 |
512 class cg1packer(object): | 541 class cg1packer(object): |
513 deltaheader = _CHANGEGROUPV1_DELTA_HEADER | 542 deltaheader = _CHANGEGROUPV1_DELTA_HEADER |
514 version = '01' | 543 version = '01' |
515 def __init__(self, repo, bundlecaps=None): | 544 def __init__(self, repo, bundlecaps=None): |
592 # filter any nodes that claim to be part of the known set | 621 # filter any nodes that claim to be part of the known set |
593 def prune(self, revlog, missing, commonrevs): | 622 def prune(self, revlog, missing, commonrevs): |
594 rr, rl = revlog.rev, revlog.linkrev | 623 rr, rl = revlog.rev, revlog.linkrev |
595 return [n for n in missing if rl(rr(n)) not in commonrevs] | 624 return [n for n in missing if rl(rr(n)) not in commonrevs] |
596 | 625 |
597 def _packmanifests(self, mfnodes, lookuplinknode): | 626 def _packmanifests(self, mfnodes, tmfnodes, lookuplinknode): |
598 """Pack flat manifests into a changegroup stream.""" | 627 """Pack flat manifests into a changegroup stream.""" |
599 ml = self._repo.manifest | 628 ml = self._repo.manifest |
600 size = 0 | 629 size = 0 |
601 for chunk in self.group( | 630 for chunk in self.group( |
602 mfnodes, ml, lookuplinknode, units=_('manifests')): | 631 mfnodes, ml, lookuplinknode, units=_('manifests')): |
603 size += len(chunk) | 632 size += len(chunk) |
604 yield chunk | 633 yield chunk |
605 self._verbosenote(_('%8.i (manifests)\n') % size) | 634 self._verbosenote(_('%8.i (manifests)\n') % size) |
635 # It looks odd to assert this here, but tmfnodes doesn't get | |
636 # filled in until after we've called lookuplinknode for | |
637 # sending root manifests, so the only way to tell the streams | |
638 # got crossed is to check after we've done all the work. | |
639 assert not tmfnodes | |
606 | 640 |
607 def generate(self, commonrevs, clnodes, fastpathlinkrev, source): | 641 def generate(self, commonrevs, clnodes, fastpathlinkrev, source): |
608 '''yield a sequence of changegroup chunks (strings)''' | 642 '''yield a sequence of changegroup chunks (strings)''' |
609 repo = self._repo | 643 repo = self._repo |
610 cl = repo.changelog | 644 cl = repo.changelog |
611 ml = repo.manifest | 645 ml = repo.manifest |
612 | 646 |
613 clrevorder = {} | 647 clrevorder = {} |
614 mfs = {} # needed manifests | 648 mfs = {} # needed manifests |
649 tmfnodes = {} | |
615 fnodes = {} # needed file nodes | 650 fnodes = {} # needed file nodes |
616 # maps manifest node id -> set(changed files) | 651 # maps manifest node id -> set(changed files) |
617 mfchangedfiles = {} | 652 mfchangedfiles = {} |
618 | 653 |
619 # Callback for the changelog, used to collect changed files and manifest | 654 # Callback for the changelog, used to collect changed files and manifest |
651 # of the changelog itself. The changelog never uses generaldelta, so | 686 # of the changelog itself. The changelog never uses generaldelta, so |
652 # it is only reordered when reorder=True. To handle this case, we | 687 # it is only reordered when reorder=True. To handle this case, we |
653 # simply take the slowpath, which already has the 'clrevorder' logic. | 688 # simply take the slowpath, which already has the 'clrevorder' logic. |
654 # This was also fixed in cc0ff93d0c0c. | 689 # This was also fixed in cc0ff93d0c0c. |
655 fastpathlinkrev = fastpathlinkrev and not self._reorder | 690 fastpathlinkrev = fastpathlinkrev and not self._reorder |
691 # Treemanifests don't work correctly with fastpathlinkrev | |
692 # either, because we don't discover which directory nodes to | |
693 # send along with files. This could probably be fixed. | |
694 fastpathlinkrev = fastpathlinkrev and ( | |
695 'treemanifest' not in repo.requirements) | |
656 # Callback for the manifest, used to collect linkrevs for filelog | 696 # Callback for the manifest, used to collect linkrevs for filelog |
657 # revisions. | 697 # revisions. |
658 # Returns the linkrev node (collected in lookupcl). | 698 # Returns the linkrev node (collected in lookupcl). |
659 if fastpathlinkrev: | 699 if fastpathlinkrev: |
660 lookupmflinknode = mfs.__getitem__ | 700 lookupmflinknode = mfs.__getitem__ |
664 | 704 |
665 Returns the linkrev node for the specified manifest. | 705 Returns the linkrev node for the specified manifest. |
666 | 706 |
667 SIDE EFFECT: | 707 SIDE EFFECT: |
668 | 708 |
669 fclnodes gets populated with the list of relevant | 709 1) fclnodes gets populated with the list of relevant |
670 file nodes. | 710 file nodes if we're not using fastpathlinkrev |
671 | 711 2) When treemanifests are in use, collects treemanifest nodes |
672 Note that this means you can't trust fclnodes until | 712 to send |
673 after manifests have been sent to the client. | 713 |
714 Note that this means manifests must be completely sent to | |
715 the client before you can trust the list of files and | |
716 treemanifests to send. | |
674 """ | 717 """ |
675 clnode = mfs[x] | 718 clnode = mfs[x] |
676 mdata = ml.readfast(x) | 719 # We no longer actually care about reading deltas of |
720 # the manifest here, because we already know the list | |
721 # of changed files, so for treemanifests (which | |
722 # lazily-load anyway to *generate* a readdelta) we can | |
723 # just load them with read() and then we'll actually | |
724 # be able to correctly load node IDs from the | |
725 # submanifest entries. | |
726 if 'treemanifest' in repo.requirements: | |
727 mdata = ml.read(x) | |
728 else: | |
729 mdata = ml.readfast(x) | |
677 for f in mfchangedfiles[x]: | 730 for f in mfchangedfiles[x]: |
678 try: | 731 try: |
679 n = mdata[f] | 732 n = mdata[f] |
680 except KeyError: | 733 except KeyError: |
681 continue | 734 continue |
683 # version | 736 # version |
684 fclnodes = fnodes.setdefault(f, {}) | 737 fclnodes = fnodes.setdefault(f, {}) |
685 fclnode = fclnodes.setdefault(n, clnode) | 738 fclnode = fclnodes.setdefault(n, clnode) |
686 if clrevorder[clnode] < clrevorder[fclnode]: | 739 if clrevorder[clnode] < clrevorder[fclnode]: |
687 fclnodes[n] = clnode | 740 fclnodes[n] = clnode |
741 # gather list of changed treemanifest nodes | |
742 if 'treemanifest' in repo.requirements: | |
743 submfs = {'/': mdata} | |
744 for dn, bn in _moddirs(mfchangedfiles[x]): | |
745 submf = submfs[dn] | |
746 submf = submf._dirs[bn] | |
747 submfs[submf.dir()] = submf | |
748 tmfclnodes = tmfnodes.setdefault(submf.dir(), {}) | |
749 tmfclnodes.setdefault(submf._node, clnode) | |
750 if clrevorder[clnode] < clrevorder[fclnode]: | |
751 tmfclnodes[n] = clnode | |
688 return clnode | 752 return clnode |
689 | 753 |
690 mfnodes = self.prune(ml, mfs, commonrevs) | 754 mfnodes = self.prune(ml, mfs, commonrevs) |
691 for x in self._packmanifests(mfnodes, lookupmflinknode): | 755 for x in self._packmanifests( |
756 mfnodes, tmfnodes, lookupmflinknode): | |
692 yield x | 757 yield x |
693 | 758 |
694 mfs.clear() | 759 mfs.clear() |
695 clrevs = set(cl.rev(x) for x in clnodes) | 760 clrevs = set(cl.rev(x) for x in clnodes) |
696 | 761 |
807 return dp | 872 return dp |
808 | 873 |
809 def builddeltaheader(self, node, p1n, p2n, basenode, linknode): | 874 def builddeltaheader(self, node, p1n, p2n, basenode, linknode): |
810 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode) | 875 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode) |
811 | 876 |
877 class cg3packer(cg2packer): | |
878 version = '03' | |
879 | |
880 def _packmanifests(self, mfnodes, tmfnodes, lookuplinknode): | |
881 # Note that debug prints are super confusing in this code, as | |
882 # tmfnodes gets populated by the calls to lookuplinknode in | |
883 # the superclass's manifest packer. In the future we should | |
884 # probably see if we can refactor this somehow to be less | |
885 # confusing. | |
886 for x in super(cg3packer, self)._packmanifests( | |
887 mfnodes, {}, lookuplinknode): | |
888 yield x | |
889 dirlog = self._repo.manifest.dirlog | |
890 for name, nodes in tmfnodes.iteritems(): | |
891 # For now, directory headers are simply file headers with | |
892 # a trailing '/' on the path. | |
893 yield self.fileheader(name + '/') | |
894 for chunk in self.group(nodes, dirlog(name), nodes.get): | |
895 yield chunk | |
896 | |
897 | |
812 packermap = {'01': (cg1packer, cg1unpacker), | 898 packermap = {'01': (cg1packer, cg1unpacker), |
813 # cg2 adds support for exchanging generaldelta | 899 # cg2 adds support for exchanging generaldelta |
814 '02': (cg2packer, cg2unpacker), | 900 '02': (cg2packer, cg2unpacker), |
901 # cg3 adds support for exchanging treemanifests | |
902 '03': (cg3packer, cg3unpacker), | |
815 } | 903 } |
816 | 904 |
817 def _changegroupinfo(repo, nodes, source): | 905 def _changegroupinfo(repo, nodes, source): |
818 if repo.ui.verbose or source == 'bundle': | 906 if repo.ui.verbose or source == 'bundle': |
819 repo.ui.status(_("%d changesets found\n") % len(nodes)) | 907 repo.ui.status(_("%d changesets found\n") % len(nodes)) |
936 if not chunkdata: | 1024 if not chunkdata: |
937 break | 1025 break |
938 f = chunkdata["filename"] | 1026 f = chunkdata["filename"] |
939 repo.ui.debug("adding %s revisions\n" % f) | 1027 repo.ui.debug("adding %s revisions\n" % f) |
940 pr() | 1028 pr() |
941 fl = repo.file(f) | 1029 directory = (f[-1] == '/') |
1030 if directory: | |
1031 # a directory using treemanifests | |
1032 # TODO fixup repo requirements safely | |
1033 if 'treemanifest' not in repo.requirements: | |
1034 if not wasempty: | |
1035 raise error.Abort(_( | |
1036 "bundle contains tree manifests, but local repo is " | |
1037 "non-empty and does not use tree manifests")) | |
1038 repo.requirements.add('treemanifest') | |
1039 repo._applyopenerreqs() | |
1040 repo._writerequirements() | |
1041 repo.manifest._treeondisk = True | |
1042 repo.manifest._treeinmem = True | |
1043 fl = repo.manifest.dirlog(f) | |
1044 else: | |
1045 fl = repo.file(f) | |
942 o = len(fl) | 1046 o = len(fl) |
943 try: | 1047 try: |
944 if not fl.addgroup(source, revmap, trp): | 1048 if not fl.addgroup(source, revmap, trp): |
945 raise error.Abort(_("received file revlog group is empty")) | 1049 raise error.Abort(_("received file revlog group is empty")) |
946 except error.CensoredBaseError as e: | 1050 except error.CensoredBaseError as e: |
947 raise error.Abort(_("received delta base is censored: %s") % e) | 1051 raise error.Abort(_("received delta base is censored: %s") % e) |
948 revisions += len(fl) - o | 1052 if not directory: |
949 files += 1 | 1053 revisions += len(fl) - o |
1054 files += 1 | |
950 if f in needfiles: | 1055 if f in needfiles: |
951 needs = needfiles[f] | 1056 needs = needfiles[f] |
952 for new in xrange(o, len(fl)): | 1057 for new in xrange(o, len(fl)): |
953 n = fl.node(new) | 1058 n = fl.node(new) |
954 if n in needs: | 1059 if n in needs: |