comparison tests/artifacts/scripts/generate-churning-bundle.py @ 52469:9feb175c028d

test-sparse-revlog: build the content directly in memory We now do our own merge so that we now have the graph and the content fully defined. Which will help to speed up the generation soon. The generation jump through a few hoops to avoid consuming 700MB of memory at run time.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 04 Dec 2024 10:34:17 +0100
parents e26b738430a1
children 83f87912c5e0
comparison
equal deleted inserted replaced
52468:e26b738430a1 52469:9feb175c028d
94 else: 94 else:
95 to_write = oldcontent[idx] 95 to_write = oldcontent[idx]
96 yield to_write 96 yield to_write
97 97
98 98
99 def updatefile(filename, idx): 99 def merge_content(base, left, right):
100 """update <filename> to be at appropriate content for iteration <idx>""" 100 """merge two file content to produce a new one
101 existing = None 101
102 if idx > 0: 102 use unambiguous update on each side when possible, and produce a new line
103 with open(filename, 'rb') as old: 103 whenever a merge is needed. Similar to what the manifest would do.
104 existing = old.readlines() 104 """
105 with open(filename, 'wb') as target: 105 for old, left, right in zip(base, left, right):
106 for line in filecontent(idx, existing): 106 if old == left and old == right:
107 target.write(line) 107 yield old
108 elif old == left and old != right:
109 yield right
110 elif old != left and old == right:
111 yield left
112 else:
113 yield nextcontent(left + right)
114
115
116 def ancestors(graph, rev):
117 """return the set of ancestors of revision <rev>"""
118 to_proceed = {rev}
119 seen = set(to_proceed)
120 while to_proceed:
121 current = to_proceed.pop()
122 for p in graph[current]:
123 if p is None:
124 continue
125 if p in seen:
126 continue
127 to_proceed.add(p)
128 seen.add(p)
129 return seen
130
131
132 def gca(graph, left, right):
133 """find the greater common ancestors of left and right
134
135 Note that the algorithm is stupid and N² when run on all merge, however
136 this should not be a too much issue given the current scale.
137 """
138 return max(ancestors(graph, left) & ancestors(graph, right))
139
140
141 def make_one_content_fn(idx, base, left, right):
142 """build a function that build the content on demand
143
144 The dependency are kept are reference to make sure they are not
145 garbage-collected until we use them. Once we computed the current content,
146 we make sure to drop their reference to allow them to be garbage collected.
147 """
148
149 def content_fn(idx=idx, base=base, left=left, right=right):
150 if left is None:
151 new = filecontent(idx, None)
152 elif base is None:
153 new = filecontent(idx, left())
154 else:
155 merged = merge_content(base(), left(), right())
156 new = filecontent(idx, list(merged))
157 return list(new)
158
159 del idx
160 del base
161 del left
162 del right
163
164 value = None
165 cf = [content_fn]
166 del content_fn
167
168 def final_fn():
169 nonlocal value
170 if value is None:
171 content_fn = cf.pop()
172 value = list(content_fn())
173 del content_fn
174 return value
175
176 return final_fn
177
178
179 def build_content_graph(graph):
180 """produce file content for all revision
181
182 The content will be generated on demande and cached. Cleanup the
183 dictionnary are you use it to reduce memory usage.
184 """
185 content = {}
186 for idx, (p1, p2) in graph.items():
187 base = left = right = None
188 if p1 is not None:
189 left = content[p1]
190 if p2 is not None:
191 right = content[p2]
192 base_rev = gca(graph, p1, p2)
193 base = content[base_rev]
194 content[idx] = make_one_content_fn(idx, base, left, right)
195 return content
196
197
198 CONTENT = build_content_graph(GRAPH)
108 199
109 200
110 def hg(command, *args): 201 def hg(command, *args):
111 """call a mercurial command with appropriate config and argument""" 202 """call a mercurial command with appropriate config and argument"""
112 env = os.environ.copy() 203 env = os.environ.copy()
137 print("generating commit #%d/%d" % (idx, NB_CHANGESET)) 228 print("generating commit #%d/%d" % (idx, NB_CHANGESET))
138 if p1 is not None and p1 != idx - 1: 229 if p1 is not None and p1 != idx - 1:
139 hg('update', "%d" % p1) 230 hg('update', "%d" % p1)
140 if p2 is not None: 231 if p2 is not None:
141 hg('merge', "%d" % p2) 232 hg('merge', "%d" % p2)
142 updatefile(FILENAME, idx) 233 with open(FILENAME, 'wb') as f:
234 # pop the value to let it be garbage collection eventually.
235 for line in CONTENT.pop(idx)():
236 f.write(line)
143 if idx == 0: 237 if idx == 0:
144 hg('add', FILENAME) 238 hg('add', FILENAME)
145 hg('commit', '--addremove', '--message', 'initial commit') 239 hg('commit', '--addremove', '--message', 'initial commit')
146 else: 240 else:
147 hg('commit', '--message', 'commit #%d' % idx) 241 hg('commit', '--message', 'commit #%d' % idx)