Mercurial > public > mercurial-scm > hg
comparison mercurial/localrepo.py @ 1466:b6d9ea0bc107
Added a lot of comments to changegroupsubset.
author | Eric Hopper <hopper@omnifarious.org> |
---|---|
date | Tue, 11 Oct 2005 18:56:47 -0700 |
parents | 00117edce2dd |
children | 0847c45ffee6 |
comparison
equal
deleted
inserted
replaced
1465:be6b5ce60b7f | 1466:b6d9ea0bc107 |
---|---|
894 | 894 |
895 cg = self.changegroup(update) | 895 cg = self.changegroup(update) |
896 return remote.addchangegroup(cg) | 896 return remote.addchangegroup(cg) |
897 | 897 |
898 def changegroupsubset(self, bases, heads): | 898 def changegroupsubset(self, bases, heads): |
899 """This function generates a changegroup consisting of all the nodes | |
900 that are descendents of any of the bases, and ancestors of any of | |
901 the heads. | |
902 | |
903 It is fairly complex as determining which filenodes and which | |
904 manifest nodes need to be included for the changeset to be complete | |
905 is non-trivial. | |
906 | |
907 Another wrinkle is doing the reverse, figuring out which changeset in | |
908 the changegroup a particular filenode or manifestnode belongs to.""" | |
909 | |
910 # Set up some initial variables | |
911 # Make it easy to refer to self.changelog | |
899 cl = self.changelog | 912 cl = self.changelog |
900 # msng = missing | 913 # msng is short for missing - compute the list of changesets in this |
914 # changegroup. | |
901 msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads) | 915 msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads) |
902 junk = None | 916 # Some bases may turn out to be superfluous, and some heads may be |
917 # too. nodesbetween will return the minimal set of bases and heads | |
918 # necessary to re-create the changegroup. | |
919 | |
920 # Known heads are the list of heads that it is assumed the recipient | |
921 # of this changegroup will know about. | |
903 knownheads = {} | 922 knownheads = {} |
923 # We assume that all parents of bases are known heads. | |
904 for n in bases: | 924 for n in bases: |
905 for p in cl.parents(n): | 925 for p in cl.parents(n): |
906 if p != nullid: | 926 if p != nullid: |
907 knownheads[p] = 1 | 927 knownheads[p] = 1 |
908 knownheads = knownheads.keys() | 928 knownheads = knownheads.keys() |
909 if knownheads: | 929 if knownheads: |
930 # Now that we know what heads are known, we can compute which | |
931 # changesets are known. The recipient must know about all | |
932 # changesets required to reach the known heads from the null | |
933 # changeset. | |
910 has_cl_set, junk, junk = cl.nodesbetween(None, knownheads) | 934 has_cl_set, junk, junk = cl.nodesbetween(None, knownheads) |
935 junk = None | |
936 # Transform the list into an ersatz set. | |
911 has_cl_set = dict.fromkeys(has_cl_set) | 937 has_cl_set = dict.fromkeys(has_cl_set) |
912 else: | 938 else: |
939 # If there were no known heads, the recipient cannot be assumed to | |
940 # know about any changesets. | |
913 has_cl_set = {} | 941 has_cl_set = {} |
914 | 942 |
943 # Make it easy to refer to self.manifest | |
915 mnfst = self.manifest | 944 mnfst = self.manifest |
945 # We don't know which manifests are missing yet | |
916 msng_mnfst_set = {} | 946 msng_mnfst_set = {} |
947 # Nor do we know which filenodes are missing. | |
917 msng_filenode_set = {} | 948 msng_filenode_set = {} |
918 | 949 |
919 junk = mnfst.index[mnfst.count() - 1] # Get around a bug in lazyindex | 950 junk = mnfst.index[mnfst.count() - 1] # Get around a bug in lazyindex |
920 junk = None | 951 junk = None |
921 | 952 |
953 # A changeset always belongs to itself, so the changenode lookup | |
954 # function for a changenode is identity. | |
922 def identity(x): | 955 def identity(x): |
923 return x | 956 return x |
924 | 957 |
958 # A function generating function. Sets up an environment for the | |
959 # inner function. | |
925 def cmp_by_rev_func(revlog): | 960 def cmp_by_rev_func(revlog): |
926 def cmpfunc(a, b): | 961 # Compare two nodes by their revision number in the environment's |
962 # revision history. Since the revision number both represents the | |
963 # most efficient order to read the nodes in, and represents a | |
964 # topological sorting of the nodes, this function is often useful. | |
965 def cmp_by_rev(a, b): | |
927 return cmp(revlog.rev(a), revlog.rev(b)) | 966 return cmp(revlog.rev(a), revlog.rev(b)) |
928 return cmpfunc | 967 return cmp_by_rev |
929 | 968 |
969 # If we determine that a particular file or manifest node must be a | |
970 # node that the recipient of the changegroup will already have, we can | |
971 # also assume the recipient will have all the parents. This function | |
972 # prunes them from the set of missing nodes. | |
930 def prune_parents(revlog, hasset, msngset): | 973 def prune_parents(revlog, hasset, msngset): |
931 haslst = hasset.keys() | 974 haslst = hasset.keys() |
932 haslst.sort(cmp_by_rev_func(revlog)) | 975 haslst.sort(cmp_by_rev_func(revlog)) |
933 for node in haslst: | 976 for node in haslst: |
934 parentlst = [p for p in revlog.parents(node) if p != nullid] | 977 parentlst = [p for p in revlog.parents(node) if p != nullid] |
939 p = [p for p in revlog.parents(n) if p != nullid] | 982 p = [p for p in revlog.parents(n) if p != nullid] |
940 parentlst.extend(p) | 983 parentlst.extend(p) |
941 for n in hasset: | 984 for n in hasset: |
942 msngset.pop(n, None) | 985 msngset.pop(n, None) |
943 | 986 |
987 # This is a function generating function used to set up an environment | |
988 # for the inner function to execute in. | |
944 def manifest_and_file_collector(changedfileset): | 989 def manifest_and_file_collector(changedfileset): |
990 # This is an information gathering function that gathers | |
991 # information from each changeset node that goes out as part of | |
992 # the changegroup. The information gathered is a list of which | |
993 # manifest nodes are potentially required (the recipient may | |
994 # already have them) and total list of all files which were | |
995 # changed in any changeset in the changegroup. | |
996 # | |
997 # We also remember the first changenode we saw any manifest | |
998 # referenced by so we can later determine which changenode 'owns' | |
999 # the manifest. | |
945 def collect_manifests_and_files(clnode): | 1000 def collect_manifests_and_files(clnode): |
946 c = cl.read(clnode) | 1001 c = cl.read(clnode) |
947 for f in c[3]: | 1002 for f in c[3]: |
948 # This is to make sure we only have one instance of each | 1003 # This is to make sure we only have one instance of each |
949 # filename string for each filename. | 1004 # filename string for each filename. |
950 changedfileset.setdefault(f, f) | 1005 changedfileset.setdefault(f, f) |
951 msng_mnfst_set.setdefault(c[0], clnode) | 1006 msng_mnfst_set.setdefault(c[0], clnode) |
952 return collect_manifests_and_files | 1007 return collect_manifests_and_files |
953 | 1008 |
1009 # Figure out which manifest nodes (of the ones we think might be part | |
1010 # of the changegroup) the recipient must know about and remove them | |
1011 # from the changegroup. | |
954 def prune_manifests(): | 1012 def prune_manifests(): |
955 has_mnfst_set = {} | 1013 has_mnfst_set = {} |
956 for n in msng_mnfst_set: | 1014 for n in msng_mnfst_set: |
1015 # If a 'missing' manifest thinks it belongs to a changenode | |
1016 # the recipient is assumed to have, obviously the recipient | |
1017 # must have that manifest. | |
957 linknode = cl.node(mnfst.linkrev(n)) | 1018 linknode = cl.node(mnfst.linkrev(n)) |
958 if linknode in has_cl_set: | 1019 if linknode in has_cl_set: |
959 has_mnfst_set[n] = 1 | 1020 has_mnfst_set[n] = 1 |
960 prune_parents(mnfst, has_mnfst_set, msng_mnfst_set) | 1021 prune_parents(mnfst, has_mnfst_set, msng_mnfst_set) |
961 | 1022 |
1023 # Use the information collected in collect_manifests_and_files to say | |
1024 # which changenode any manifestnode belongs to. | |
962 def lookup_manifest_link(mnfstnode): | 1025 def lookup_manifest_link(mnfstnode): |
963 return msng_mnfst_set[mnfstnode] | 1026 return msng_mnfst_set[mnfstnode] |
964 | 1027 |
1028 # A function generating function that sets up the initial environment | |
1029 # the inner function. | |
965 def filenode_collector(changedfiles): | 1030 def filenode_collector(changedfiles): |
966 next_rev = [0] | 1031 next_rev = [0] |
1032 # This gathers information from each manifestnode included in the | |
1033 # changegroup about which filenodes the manifest node references | |
1034 # so we can include those in the changegroup too. | |
1035 # | |
1036 # It also remembers which changenode each filenode belongs to. It | |
1037 # does this by assuming the a filenode belongs to the changenode | |
1038 # the first manifest that references it belongs to. | |
967 def collect_msng_filenodes(mnfstnode): | 1039 def collect_msng_filenodes(mnfstnode): |
968 r = mnfst.rev(mnfstnode) | 1040 r = mnfst.rev(mnfstnode) |
969 if r == next_rev[0]: | 1041 if r == next_rev[0]: |
970 # If the last rev we looked at was the one just previous, | 1042 # If the last rev we looked at was the one just previous, |
971 # we only need to see a diff. | 1043 # we only need to see a diff. |
972 delta = mdiff.patchtext(mnfst.delta(mnfstnode)) | 1044 delta = mdiff.patchtext(mnfst.delta(mnfstnode)) |
1045 # For each line in the delta | |
973 for dline in delta.splitlines(): | 1046 for dline in delta.splitlines(): |
1047 # get the filename and filenode for that line | |
974 f, fnode = dline.split('\0') | 1048 f, fnode = dline.split('\0') |
975 fnode = bin(fnode[:40]) | 1049 fnode = bin(fnode[:40]) |
976 f = changedfiles.get(f, None) | 1050 f = changedfiles.get(f, None) |
1051 # And if the file is in the list of files we care | |
1052 # about. | |
977 if f is not None: | 1053 if f is not None: |
1054 # Get the changenode this manifest belongs to | |
1055 clnode = msng_mnfst_set[mnfstnode] | |
1056 # Create the set of filenodes for the file if | |
1057 # there isn't one already. | |
1058 ndset = msng_filenode_set.setdefault(f, {}) | |
1059 # And set the filenode's changelog node to the | |
1060 # manifest's if it hasn't been set already. | |
1061 ndset.setdefault(fnode, clnode) | |
1062 else: | |
1063 # Otherwise we need a full manifest. | |
1064 m = mnfst.read(mnfstnode) | |
1065 # For every file in we care about. | |
1066 for f in changedfiles: | |
1067 fnode = m.get(f, None) | |
1068 # If it's in the manifest | |
1069 if fnode is not None: | |
1070 # See comments above. | |
978 clnode = msng_mnfst_set[mnfstnode] | 1071 clnode = msng_mnfst_set[mnfstnode] |
979 ndset = msng_filenode_set.setdefault(f, {}) | 1072 ndset = msng_filenode_set.setdefault(f, {}) |
980 ndset.setdefault(fnode, clnode) | 1073 ndset.setdefault(fnode, clnode) |
981 else: | 1074 # Remember the revision we hope to see next. |
982 m = mnfst.read(mnfstnode) | |
983 for f in changedfiles: | |
984 fnode = m.get(f, None) | |
985 if fnode is not None: | |
986 clnode = msng_mnfst_set[mnfstnode] | |
987 ndset = msng_filenode_set.setdefault(f, {}) | |
988 ndset.setdefault(fnode, clnode) | |
989 next_rev[0] = r + 1 | 1075 next_rev[0] = r + 1 |
990 return collect_msng_filenodes | 1076 return collect_msng_filenodes |
991 | 1077 |
1078 # We have a list of filenodes we think we need for a file, lets remove | |
1079 # all those we now the recipient must have. | |
992 def prune_filenodes(f, filerevlog): | 1080 def prune_filenodes(f, filerevlog): |
993 msngset = msng_filenode_set[f] | 1081 msngset = msng_filenode_set[f] |
994 hasset = {} | 1082 hasset = {} |
1083 # If a 'missing' filenode thinks it belongs to a changenode we | |
1084 # assume the recipient must have, then the recipient must have | |
1085 # that filenode. | |
995 for n in msngset: | 1086 for n in msngset: |
996 clnode = cl.node(filerevlog.linkrev(n)) | 1087 clnode = cl.node(filerevlog.linkrev(n)) |
997 if clnode in has_cl_set: | 1088 if clnode in has_cl_set: |
998 hasset[n] = 1 | 1089 hasset[n] = 1 |
999 prune_parents(filerevlog, hasset, msngset) | 1090 prune_parents(filerevlog, hasset, msngset) |
1000 | 1091 |
1092 # A function generator function that sets up the a context for the | |
1093 # inner function. | |
1001 def lookup_filenode_link_func(fname): | 1094 def lookup_filenode_link_func(fname): |
1002 msngset = msng_filenode_set[fname] | 1095 msngset = msng_filenode_set[fname] |
1096 # Lookup the changenode the filenode belongs to. | |
1003 def lookup_filenode_link(fnode): | 1097 def lookup_filenode_link(fnode): |
1004 return msngset[fnode] | 1098 return msngset[fnode] |
1005 return lookup_filenode_link | 1099 return lookup_filenode_link |
1006 | 1100 |
1101 # Now that we have all theses utility functions to help out and | |
1102 # logically divide up the task, generate the group. | |
1007 def gengroup(): | 1103 def gengroup(): |
1104 # The set of changed files starts empty. | |
1008 changedfiles = {} | 1105 changedfiles = {} |
1106 # Create a changenode group generator that will call our functions | |
1107 # back to lookup the owning changenode and collect information. | |
1009 group = cl.group(msng_cl_lst, identity, | 1108 group = cl.group(msng_cl_lst, identity, |
1010 manifest_and_file_collector(changedfiles)) | 1109 manifest_and_file_collector(changedfiles)) |
1011 for chnk in group: | 1110 for chnk in group: |
1012 yield chnk | 1111 yield chnk |
1112 | |
1113 # The list of manifests has been collected by the generator | |
1114 # calling our functions back. | |
1013 prune_manifests() | 1115 prune_manifests() |
1014 msng_mnfst_lst = msng_mnfst_set.keys() | 1116 msng_mnfst_lst = msng_mnfst_set.keys() |
1117 # Sort the manifestnodes by revision number. | |
1015 msng_mnfst_lst.sort(cmp_by_rev_func(mnfst)) | 1118 msng_mnfst_lst.sort(cmp_by_rev_func(mnfst)) |
1119 # Create a generator for the manifestnodes that calls our lookup | |
1120 # and data collection functions back. | |
1016 group = mnfst.group(msng_mnfst_lst, lookup_manifest_link, | 1121 group = mnfst.group(msng_mnfst_lst, lookup_manifest_link, |
1017 filenode_collector(changedfiles)) | 1122 filenode_collector(changedfiles)) |
1018 for chnk in group: | 1123 for chnk in group: |
1019 yield chnk | 1124 yield chnk |
1125 | |
1126 # These are no longer needed, dereference and toss the memory for | |
1127 # them. | |
1020 msng_mnfst_lst = None | 1128 msng_mnfst_lst = None |
1021 msng_mnfst_set.clear() | 1129 msng_mnfst_set.clear() |
1130 | |
1022 changedfiles = changedfiles.keys() | 1131 changedfiles = changedfiles.keys() |
1023 changedfiles.sort() | 1132 changedfiles.sort() |
1133 # Go through all our files in order sorted by name. | |
1024 for fname in changedfiles: | 1134 for fname in changedfiles: |
1025 filerevlog = self.file(fname) | 1135 filerevlog = self.file(fname) |
1136 # Toss out the filenodes that the recipient isn't really | |
1137 # missing. | |
1026 prune_filenodes(fname, filerevlog) | 1138 prune_filenodes(fname, filerevlog) |
1027 msng_filenode_lst = msng_filenode_set[fname].keys() | 1139 msng_filenode_lst = msng_filenode_set[fname].keys() |
1140 # If any filenodes are left, generate the group for them, | |
1141 # otherwise don't bother. | |
1028 if len(msng_filenode_lst) > 0: | 1142 if len(msng_filenode_lst) > 0: |
1029 yield struct.pack(">l", len(fname) + 4) + fname | 1143 yield struct.pack(">l", len(fname) + 4) + fname |
1144 # Sort the filenodes by their revision # | |
1030 msng_filenode_lst.sort(cmp_by_rev_func(filerevlog)) | 1145 msng_filenode_lst.sort(cmp_by_rev_func(filerevlog)) |
1146 # Create a group generator and only pass in a changenode | |
1147 # lookup function as we need to collect no information | |
1148 # from filenodes. | |
1031 group = filerevlog.group(msng_filenode_lst, | 1149 group = filerevlog.group(msng_filenode_lst, |
1032 lookup_filenode_link_func(fname)) | 1150 lookup_filenode_link_func(fname)) |
1033 for chnk in group: | 1151 for chnk in group: |
1034 yield chnk | 1152 yield chnk |
1153 # Don't need this anymore, toss it to free memory. | |
1035 del msng_filenode_set[fname] | 1154 del msng_filenode_set[fname] |
1155 # Signal that no more groups are left. | |
1036 yield struct.pack(">l", 0) | 1156 yield struct.pack(">l", 0) |
1037 | 1157 |
1038 return util.chunkbuffer(gengroup()) | 1158 return util.chunkbuffer(gengroup()) |
1039 | 1159 |
1040 def changegroup(self, basenodes): | 1160 def changegroup(self, basenodes): |
1161 """Generate a changegroup of all nodes that we have that a recipient | |
1162 doesn't. | |
1163 | |
1164 This is much easier than the previous function as we can assume that | |
1165 the recipient has any changenode we aren't sending them.""" | |
1041 cl = self.changelog | 1166 cl = self.changelog |
1042 nodes = cl.nodesbetween(basenodes, None)[0] | 1167 nodes = cl.nodesbetween(basenodes, None)[0] |
1043 revset = dict.fromkeys([cl.rev(n) for n in nodes]) | 1168 revset = dict.fromkeys([cl.rev(n) for n in nodes]) |
1044 | 1169 |
1045 def identity(x): | 1170 def identity(x): |