Mercurial > public > mercurial-scm > hg
comparison mercurial/manifest.py @ 30042:d24e03da24b5
lazymanifest: write a more efficient, pypy friendly version of lazymanifest
author | Maciej Fijalkowski <fijall@gmail.com> |
---|---|
date | Mon, 12 Sep 2016 13:37:14 +0200 |
parents | 14ad8e2a4abe |
children | abe723002509 |
comparison
equal
deleted
inserted
replaced
30041:1779dde4c9ef | 30042:d24e03da24b5 |
---|---|
102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) | 102 lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) |
103 prevf = f | 103 prevf = f |
104 _checkforbidden(files) | 104 _checkforbidden(files) |
105 return ''.join(lines) | 105 return ''.join(lines) |
106 | 106 |
107 class _lazymanifest(dict): | 107 class lazymanifestiter(object): |
108 """This is the pure implementation of lazymanifest. | 108 def __init__(self, lm): |
109 | 109 self.pos = 0 |
110 It has not been optimized *at all* and is not lazy. | 110 self.lm = lm |
111 """ | |
112 | |
113 def __init__(self, data): | |
114 dict.__init__(self) | |
115 for f, n, fl in _parse(data): | |
116 self[f] = n, fl | |
117 | |
118 def __setitem__(self, k, v): | |
119 node, flag = v | |
120 assert node is not None | |
121 if len(node) > 21: | |
122 node = node[:21] # match c implementation behavior | |
123 dict.__setitem__(self, k, (node, flag)) | |
124 | 111 |
125 def __iter__(self): | 112 def __iter__(self): |
126 return iter(sorted(dict.keys(self))) | 113 return self |
127 | 114 |
128 def iterkeys(self): | 115 def next(self): |
129 return iter(sorted(dict.keys(self))) | 116 try: |
130 | 117 data, pos = self.lm._get(self.pos) |
131 def iterentries(self): | 118 except IndexError: |
132 return ((f, e[0], e[1]) for f, e in sorted(self.iteritems())) | 119 raise StopIteration |
120 if pos == -1: | |
121 self.pos += 1 | |
122 return data[0] | |
123 self.pos += 1 | |
124 zeropos = data.find('\x00', pos) | |
125 return data[pos:zeropos] | |
126 | |
127 class lazymanifestiterentries(object): | |
128 def __init__(self, lm): | |
129 self.lm = lm | |
130 self.pos = 0 | |
131 | |
132 def __iter__(self): | |
133 return self | |
134 | |
135 def next(self): | |
136 try: | |
137 data, pos = self.lm._get(self.pos) | |
138 except IndexError: | |
139 raise StopIteration | |
140 if pos == -1: | |
141 self.pos += 1 | |
142 return data | |
143 zeropos = data.find('\x00', pos) | |
144 hashval = unhexlify(data, self.lm.extrainfo[self.pos], | |
145 zeropos + 1, 40) | |
146 flags = self.lm._getflags(data, self.pos, zeropos) | |
147 self.pos += 1 | |
148 return (data[pos:zeropos], hashval, flags) | |
149 | |
150 def unhexlify(data, extra, pos, length): | |
151 s = data[pos:pos + length].decode('hex') | |
152 if extra: | |
153 s += chr(extra & 0xff) | |
154 return s | |
155 | |
156 def _cmp(a, b): | |
157 return (a > b) - (a < b) | |
158 | |
159 class _lazymanifest(object): | |
160 def __init__(self, data, positions=None, extrainfo=None, extradata=None): | |
161 if positions is None: | |
162 self.positions = self.findlines(data) | |
163 self.extrainfo = [0] * len(self.positions) | |
164 self.data = data | |
165 self.extradata = [] | |
166 else: | |
167 self.positions = positions[:] | |
168 self.extrainfo = extrainfo[:] | |
169 self.extradata = extradata[:] | |
170 self.data = data | |
171 | |
172 def findlines(self, data): | |
173 if not data: | |
174 return [] | |
175 pos = data.find("\n") | |
176 if pos == -1 or data[-1] != '\n': | |
177 raise ValueError("Manifest did not end in a newline.") | |
178 positions = [0] | |
179 prev = data[:data.find('\x00')] | |
180 while pos < len(data) - 1 and pos != -1: | |
181 positions.append(pos + 1) | |
182 nexts = data[pos + 1:data.find('\x00', pos + 1)] | |
183 if nexts < prev: | |
184 raise ValueError("Manifest lines not in sorted order.") | |
185 prev = nexts | |
186 pos = data.find("\n", pos + 1) | |
187 return positions | |
188 | |
189 def _get(self, index): | |
190 # get the position encoded in pos: | |
191 # positive number is an index in 'data' | |
192 # negative number is in extrapieces | |
193 pos = self.positions[index] | |
194 if pos >= 0: | |
195 return self.data, pos | |
196 return self.extradata[-pos - 1], -1 | |
197 | |
198 def _getkey(self, pos): | |
199 if pos >= 0: | |
200 return self.data[pos:self.data.find('\x00', pos + 1)] | |
201 return self.extradata[-pos - 1][0] | |
202 | |
203 def bsearch(self, key): | |
204 first = 0 | |
205 last = len(self.positions) - 1 | |
206 | |
207 while first <= last: | |
208 midpoint = (first + last)//2 | |
209 nextpos = self.positions[midpoint] | |
210 candidate = self._getkey(nextpos) | |
211 r = _cmp(key, candidate) | |
212 if r == 0: | |
213 return midpoint | |
214 else: | |
215 if r < 0: | |
216 last = midpoint - 1 | |
217 else: | |
218 first = midpoint + 1 | |
219 return -1 | |
220 | |
221 def bsearch2(self, key): | |
222 # same as the above, but will always return the position | |
223 # done for performance reasons | |
224 first = 0 | |
225 last = len(self.positions) - 1 | |
226 | |
227 while first <= last: | |
228 midpoint = (first + last)//2 | |
229 nextpos = self.positions[midpoint] | |
230 candidate = self._getkey(nextpos) | |
231 r = _cmp(key, candidate) | |
232 if r == 0: | |
233 return (midpoint, True) | |
234 else: | |
235 if r < 0: | |
236 last = midpoint - 1 | |
237 else: | |
238 first = midpoint + 1 | |
239 return (first, False) | |
240 | |
241 def __contains__(self, key): | |
242 return self.bsearch(key) != -1 | |
243 | |
244 def _getflags(self, data, needle, pos): | |
245 start = pos + 41 | |
246 end = data.find("\n", start) | |
247 if end == -1: | |
248 end = len(data) - 1 | |
249 if start == end: | |
250 return '' | |
251 return self.data[start:end] | |
252 | |
253 def __getitem__(self, key): | |
254 if not isinstance(key, str): | |
255 raise TypeError("getitem: manifest keys must be a string.") | |
256 needle = self.bsearch(key) | |
257 if needle == -1: | |
258 raise KeyError | |
259 data, pos = self._get(needle) | |
260 if pos == -1: | |
261 return (data[1], data[2]) | |
262 zeropos = data.find('\x00', pos) | |
263 assert 0 <= needle <= len(self.positions) | |
264 assert len(self.extrainfo) == len(self.positions) | |
265 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40) | |
266 flags = self._getflags(data, needle, zeropos) | |
267 return (hashval, flags) | |
268 | |
269 def __delitem__(self, key): | |
270 needle, found = self.bsearch2(key) | |
271 if not found: | |
272 raise KeyError | |
273 cur = self.positions[needle] | |
274 self.positions = self.positions[:needle] + self.positions[needle + 1:] | |
275 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] | |
276 if cur >= 0: | |
277 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] | |
278 | |
279 def __setitem__(self, key, value): | |
280 if not isinstance(key, str): | |
281 raise TypeError("setitem: manifest keys must be a string.") | |
282 if not isinstance(value, tuple) or len(value) != 2: | |
283 raise TypeError("Manifest values must be a tuple of (node, flags).") | |
284 hashval = value[0] | |
285 if not isinstance(hashval, str) or not 20 <= len(hashval) <= 22: | |
286 raise TypeError("node must be a 20-byte string") | |
287 flags = value[1] | |
288 if len(hashval) == 22: | |
289 hashval = hashval[:-1] | |
290 if not isinstance(flags, str) or len(flags) > 1: | |
291 raise TypeError("flags must a 0 or 1 byte string, got %r", flags) | |
292 needle, found = self.bsearch2(key) | |
293 if found: | |
294 # put the item | |
295 pos = self.positions[needle] | |
296 if pos < 0: | |
297 self.extradata[-pos - 1] = (key, hashval, value[1]) | |
298 else: | |
299 # just don't bother | |
300 self.extradata.append((key, hashval, value[1])) | |
301 self.positions[needle] = -len(self.extradata) | |
302 else: | |
303 # not found, put it in with extra positions | |
304 self.extradata.append((key, hashval, value[1])) | |
305 self.positions = (self.positions[:needle] + [-len(self.extradata)] | |
306 + self.positions[needle:]) | |
307 self.extrainfo = (self.extrainfo[:needle] + [0] + | |
308 self.extrainfo[needle:]) | |
133 | 309 |
134 def copy(self): | 310 def copy(self): |
135 c = _lazymanifest('') | 311 # XXX call _compact like in C? |
136 c.update(self) | 312 return _lazymanifest(self.data, self.positions, self.extrainfo, |
137 return c | 313 self.extradata) |
314 | |
315 def _compact(self): | |
316 # hopefully not called TOO often | |
317 if len(self.extradata) == 0: | |
318 return | |
319 l = [] | |
320 last_cut = 0 | |
321 i = 0 | |
322 offset = 0 | |
323 self.extrainfo = [0] * len(self.positions) | |
324 while i < len(self.positions): | |
325 if self.positions[i] >= 0: | |
326 cur = self.positions[i] | |
327 last_cut = cur | |
328 while True: | |
329 self.positions[i] = offset | |
330 i += 1 | |
331 if i == len(self.positions) or self.positions[i] < 0: | |
332 break | |
333 offset += self.positions[i] - cur | |
334 cur = self.positions[i] | |
335 end_cut = self.data.find('\n', cur) | |
336 if end_cut != -1: | |
337 end_cut += 1 | |
338 offset += end_cut - cur | |
339 l.append(self.data[last_cut:end_cut]) | |
340 else: | |
341 while i < len(self.positions) and self.positions[i] < 0: | |
342 cur = self.positions[i] | |
343 t = self.extradata[-cur - 1] | |
344 l.append(self._pack(t)) | |
345 self.positions[i] = offset | |
346 if len(t[1]) > 20: | |
347 self.extrainfo[i] = ord(t[1][21]) | |
348 offset += len(l[-1]) | |
349 i += 1 | |
350 self.data = ''.join(l) | |
351 self.extradata = [] | |
352 | |
353 def _pack(self, d): | |
354 return d[0] + '\x00' + d[1][:20].encode('hex') + d[2] + '\n' | |
355 | |
356 def text(self): | |
357 self._compact() | |
358 return self.data | |
138 | 359 |
139 def diff(self, m2, clean=False): | 360 def diff(self, m2, clean=False): |
140 '''Finds changes between the current manifest and m2.''' | 361 '''Finds changes between the current manifest and m2.''' |
362 # XXX think whether efficiency matters here | |
141 diff = {} | 363 diff = {} |
142 | 364 |
143 for fn, e1 in self.iteritems(): | 365 for fn, e1, flags in self.iterentries(): |
144 if fn not in m2: | 366 if fn not in m2: |
145 diff[fn] = e1, (None, '') | 367 diff[fn] = (e1, flags), (None, '') |
146 else: | 368 else: |
147 e2 = m2[fn] | 369 e2 = m2[fn] |
148 if e1 != e2: | 370 if (e1, flags) != e2: |
149 diff[fn] = e1, e2 | 371 diff[fn] = (e1, flags), e2 |
150 elif clean: | 372 elif clean: |
151 diff[fn] = None | 373 diff[fn] = None |
152 | 374 |
153 for fn, e2 in m2.iteritems(): | 375 for fn, e2, flags in m2.iterentries(): |
154 if fn not in self: | 376 if fn not in self: |
155 diff[fn] = (None, ''), e2 | 377 diff[fn] = (None, ''), (e2, flags) |
156 | 378 |
157 return diff | 379 return diff |
158 | 380 |
381 def iterentries(self): | |
382 return lazymanifestiterentries(self) | |
383 | |
384 def iterkeys(self): | |
385 return lazymanifestiter(self) | |
386 | |
387 def __iter__(self): | |
388 return lazymanifestiter(self) | |
389 | |
390 def __len__(self): | |
391 return len(self.positions) | |
392 | |
159 def filtercopy(self, filterfn): | 393 def filtercopy(self, filterfn): |
394 # XXX should be optimized | |
160 c = _lazymanifest('') | 395 c = _lazymanifest('') |
161 for f, n, fl in self.iterentries(): | 396 for f, n, fl in self.iterentries(): |
162 if filterfn(f): | 397 if filterfn(f): |
163 c[f] = n, fl | 398 c[f] = n, fl |
164 return c | 399 return c |
165 | |
166 def text(self): | |
167 """Get the full data of this manifest as a bytestring.""" | |
168 return _textv1(self.iterentries()) | |
169 | 400 |
170 try: | 401 try: |
171 _lazymanifest = parsers.lazymanifest | 402 _lazymanifest = parsers.lazymanifest |
172 except AttributeError: | 403 except AttributeError: |
173 pass | 404 pass |