Mercurial > public > mercurial-scm > hg
comparison mercurial/revlog.py @ 38641:feba6be0941b
revlog: extract density based slicing into its own function
We are going to introduce another slicing step. We start by extracting the
existing one into its own function.
author | Boris Feld <boris.feld@octobus.net> |
---|---|
date | Tue, 10 Jul 2018 11:53:36 +0200 |
parents | f62b8fb0a484 |
children | e59e27e52297 |
comparison
equal
deleted
inserted
replaced
38640:f62b8fb0a484 | 38641:feba6be0941b |
---|---|
331 >>> list(_slicechunk(revlog, [0, 11, 13, 15])) | 331 >>> list(_slicechunk(revlog, [0, 11, 13, 15])) |
332 [[0], [11, 13, 15]] | 332 [[0], [11, 13, 15]] |
333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) | 333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) |
334 [[1, 2], [5, 8, 10, 11], [14]] | 334 [[1, 2], [5, 8, 10, 11], [14]] |
335 """ | 335 """ |
336 for chunk in _slicechunktodensity(revlog, revs, | |
337 revlog._srdensitythreshold, | |
338 revlog._srmingapsize): | |
339 yield chunk | |
340 | |
341 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0): | |
342 """slice revs to reduce the amount of unrelated data to be read from disk. | |
343 | |
344 ``revs`` is sliced into groups that should be read in one time. | |
345 Assume that revs are sorted. | |
346 | |
347 The initial chunk is sliced until the overall density (payload/chunks-span | |
348 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is | |
349 skipped. | |
350 | |
351 >>> revlog = _testrevlog([ | |
352 ... 5, #00 (5) | |
353 ... 10, #01 (5) | |
354 ... 12, #02 (2) | |
355 ... 12, #03 (empty) | |
356 ... 27, #04 (15) | |
357 ... 31, #05 (4) | |
358 ... 31, #06 (empty) | |
359 ... 42, #07 (11) | |
360 ... 47, #08 (5) | |
361 ... 47, #09 (empty) | |
362 ... 48, #10 (1) | |
363 ... 51, #11 (3) | |
364 ... 74, #12 (23) | |
365 ... 85, #13 (11) | |
366 ... 86, #14 (1) | |
367 ... 91, #15 (5) | |
368 ... ]) | |
369 | |
370 >>> list(_slicechunktodensity(revlog, range(16))) | |
371 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]] | |
372 >>> list(_slicechunktodensity(revlog, [0, 15])) | |
373 [[0], [15]] | |
374 >>> list(_slicechunktodensity(revlog, [0, 11, 15])) | |
375 [[0], [11], [15]] | |
376 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15])) | |
377 [[0], [11, 13, 15]] | |
378 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) | |
379 [[1, 2], [5, 8, 10, 11], [14]] | |
380 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14], | |
381 ... mingapsize=20)) | |
382 [[1, 2, 3, 5, 8, 10, 11], [14]] | |
383 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14], | |
384 ... targetdensity=0.95)) | |
385 [[1, 2], [5], [8, 10, 11], [14]] | |
386 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14], | |
387 ... targetdensity=0.95, mingapsize=12)) | |
388 [[1, 2], [5, 8, 10, 11], [14]] | |
389 """ | |
336 start = revlog.start | 390 start = revlog.start |
337 length = revlog.length | 391 length = revlog.length |
338 | 392 |
339 if len(revs) <= 1: | 393 if len(revs) <= 1: |
340 yield revs | 394 yield revs |
341 return | 395 return |
342 | 396 |
343 readdata = deltachainspan = _segmentspan(revlog, revs) | 397 readdata = deltachainspan = _segmentspan(revlog, revs) |
344 | 398 |
345 if deltachainspan < revlog._srmingapsize: | 399 if deltachainspan < mingapsize: |
346 yield revs | 400 yield revs |
347 return | 401 return |
348 | 402 |
349 chainpayload = sum(length(r) for r in revs) | 403 chainpayload = sum(length(r) for r in revs) |
350 | 404 |
351 if deltachainspan: | 405 if deltachainspan: |
352 density = chainpayload / float(deltachainspan) | 406 density = chainpayload / float(deltachainspan) |
353 else: | 407 else: |
354 density = 1.0 | 408 density = 1.0 |
355 | 409 |
356 if density >= revlog._srdensitythreshold: | 410 if density >= targetdensity: |
357 yield revs | 411 yield revs |
358 return | 412 return |
359 | 413 |
360 # Store the gaps in a heap to have them sorted by decreasing size | 414 # Store the gaps in a heap to have them sorted by decreasing size |
361 gapsheap = [] | 415 gapsheap = [] |
370 continue | 424 continue |
371 | 425 |
372 if prevend is not None: | 426 if prevend is not None: |
373 gapsize = revstart - prevend | 427 gapsize = revstart - prevend |
374 # only consider holes that are large enough | 428 # only consider holes that are large enough |
375 if gapsize > revlog._srmingapsize: | 429 if gapsize > mingapsize: |
376 heapq.heappush(gapsheap, (-gapsize, i)) | 430 heapq.heappush(gapsheap, (-gapsize, i)) |
377 | 431 |
378 prevend = revstart + revlen | 432 prevend = revstart + revlen |
379 | 433 |
380 # Collect the indices of the largest holes until the density is acceptable | 434 # Collect the indices of the largest holes until the density is acceptable |
381 indicesheap = [] | 435 indicesheap = [] |
382 heapq.heapify(indicesheap) | 436 heapq.heapify(indicesheap) |
383 while gapsheap and density < revlog._srdensitythreshold: | 437 while gapsheap and density < targetdensity: |
384 oppgapsize, gapidx = heapq.heappop(gapsheap) | 438 oppgapsize, gapidx = heapq.heappop(gapsheap) |
385 | 439 |
386 heapq.heappush(indicesheap, gapidx) | 440 heapq.heappush(indicesheap, gapidx) |
387 | 441 |
388 # the gap sizes are stored as negatives to be sorted decreasingly | 442 # the gap sizes are stored as negatives to be sorted decreasingly |