Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/revlog.py @ 38642:e59e27e52297
revlog: add function to slice chunk down to a given size
It is possible to encounter situations where the slicing based on density did
not achieve chunk smaller than the 4*textlength limit. To avoid extra memory
consumption in those cases, we need to be able to break down chunk to a given
size. Actual caller comes in the next changesets.
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Wed, 11 Jul 2018 00:35:01 -0700 |
parents | feba6be0941b |
children | 967fee55e8d9 |
comparison
equal
deleted
inserted
replaced
38641:feba6be0941b | 38642:e59e27e52297 |
---|---|
335 """ | 335 """ |
336 for chunk in _slicechunktodensity(revlog, revs, | 336 for chunk in _slicechunktodensity(revlog, revs, |
337 revlog._srdensitythreshold, | 337 revlog._srdensitythreshold, |
338 revlog._srmingapsize): | 338 revlog._srmingapsize): |
339 yield chunk | 339 yield chunk |
340 | |
341 def _slicechunktosize(revlog, revs, targetsize): | |
342 """slice revs to match the target size | |
343 | |
344 This is intended to be used on chunk that density slicing selected by that | |
345 are still too large compared to the read garantee of revlog. This might | |
346 happens when "minimal gap size" interrupted the slicing or when chain are | |
347 built in a way that create large blocks next to each other. | |
348 | |
349 >>> revlog = _testrevlog([ | |
350 ... 3, #0 (3) | |
351 ... 5, #1 (2) | |
352 ... 6, #2 (1) | |
353 ... 8, #3 (2) | |
354 ... 8, #4 (empty) | |
355 ... 11, #5 (3) | |
356 ... 12, #6 (1) | |
357 ... 13, #7 (1) | |
358 ... 14, #8 (1) | |
359 ... ]) | |
360 | |
361 Cases where chunk is already small enough | |
362 >>> list(_slicechunktosize(revlog, [0], 3)) | |
363 [[0]] | |
364 >>> list(_slicechunktosize(revlog, [6, 7], 3)) | |
365 [[6, 7]] | |
366 >>> list(_slicechunktosize(revlog, [0], None)) | |
367 [[0]] | |
368 >>> list(_slicechunktosize(revlog, [6, 7], None)) | |
369 [[6, 7]] | |
370 | |
371 cases where we need actual slicing | |
372 >>> list(_slicechunktosize(revlog, [0, 1], 3)) | |
373 [[0], [1]] | |
374 >>> list(_slicechunktosize(revlog, [1, 3], 3)) | |
375 [[1], [3]] | |
376 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3)) | |
377 [[1, 2], [3]] | |
378 >>> list(_slicechunktosize(revlog, [3, 5], 3)) | |
379 [[3], [5]] | |
380 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3)) | |
381 [[3], [5]] | |
382 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3)) | |
383 [[5], [6, 7, 8]] | |
384 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3)) | |
385 [[0], [1, 2], [3], [5], [6, 7, 8]] | |
386 | |
387 Case with too large individual chunk (must return valid chunk) | |
388 >>> list(_slicechunktosize(revlog, [0, 1], 2)) | |
389 [[0], [1]] | |
390 >>> list(_slicechunktosize(revlog, [1, 3], 1)) | |
391 [[1], [3]] | |
392 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2)) | |
393 [[3], [5]] | |
394 """ | |
395 assert targetsize is None or 0 <= targetsize | |
396 if targetsize is None or _segmentspan(revlog, revs) <= targetsize: | |
397 yield revs | |
398 return | |
399 | |
400 startrevidx = 0 | |
401 startdata = revlog.start(revs[0]) | |
402 endrevidx = 0 | |
403 iterrevs = enumerate(revs) | |
404 next(iterrevs) # skip first rev. | |
405 for idx, r in iterrevs: | |
406 span = revlog.end(r) - startdata | |
407 if span <= targetsize: | |
408 endrevidx = idx | |
409 else: | |
410 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1) | |
411 if chunk: | |
412 yield chunk | |
413 startrevidx = idx | |
414 startdata = revlog.start(r) | |
415 endrevidx = idx | |
416 yield _trimchunk(revlog, revs, startrevidx) | |
340 | 417 |
341 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0): | 418 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0): |
342 """slice revs to reduce the amount of unrelated data to be read from disk. | 419 """slice revs to reduce the amount of unrelated data to be read from disk. |
343 | 420 |
344 ``revs`` is sliced into groups that should be read in one time. | 421 ``revs`` is sliced into groups that should be read in one time. |