comparison mercurial/revlog.py @ 38641:feba6be0941b

revlog: extract density based slicing into its own function We are going to introduce another slicing step. We start by extracting the existing one into its own function.
author Boris Feld <boris.feld@octobus.net>
date Tue, 10 Jul 2018 11:53:36 +0200
parents f62b8fb0a484
children e59e27e52297
comparison
equal deleted inserted replaced
38640:f62b8fb0a484 38641:feba6be0941b
331 >>> list(_slicechunk(revlog, [0, 11, 13, 15])) 331 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
332 [[0], [11, 13, 15]] 332 [[0], [11, 13, 15]]
333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) 333 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
334 [[1, 2], [5, 8, 10, 11], [14]] 334 [[1, 2], [5, 8, 10, 11], [14]]
335 """ 335 """
336 for chunk in _slicechunktodensity(revlog, revs,
337 revlog._srdensitythreshold,
338 revlog._srmingapsize):
339 yield chunk
340
341 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
342 """slice revs to reduce the amount of unrelated data to be read from disk.
343
344 ``revs`` is sliced into groups that should be read in one time.
345 Assume that revs are sorted.
346
347 The initial chunk is sliced until the overall density (payload/chunks-span
348 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
349 skipped.
350
351 >>> revlog = _testrevlog([
352 ... 5, #00 (5)
353 ... 10, #01 (5)
354 ... 12, #02 (2)
355 ... 12, #03 (empty)
356 ... 27, #04 (15)
357 ... 31, #05 (4)
358 ... 31, #06 (empty)
359 ... 42, #07 (11)
360 ... 47, #08 (5)
361 ... 47, #09 (empty)
362 ... 48, #10 (1)
363 ... 51, #11 (3)
364 ... 74, #12 (23)
365 ... 85, #13 (11)
366 ... 86, #14 (1)
367 ... 91, #15 (5)
368 ... ])
369
370 >>> list(_slicechunktodensity(revlog, range(16)))
371 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
372 >>> list(_slicechunktodensity(revlog, [0, 15]))
373 [[0], [15]]
374 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
375 [[0], [11], [15]]
376 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
377 [[0], [11, 13, 15]]
378 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
379 [[1, 2], [5, 8, 10, 11], [14]]
380 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
381 ... mingapsize=20))
382 [[1, 2, 3, 5, 8, 10, 11], [14]]
383 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
384 ... targetdensity=0.95))
385 [[1, 2], [5], [8, 10, 11], [14]]
386 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
387 ... targetdensity=0.95, mingapsize=12))
388 [[1, 2], [5, 8, 10, 11], [14]]
389 """
336 start = revlog.start 390 start = revlog.start
337 length = revlog.length 391 length = revlog.length
338 392
339 if len(revs) <= 1: 393 if len(revs) <= 1:
340 yield revs 394 yield revs
341 return 395 return
342 396
343 readdata = deltachainspan = _segmentspan(revlog, revs) 397 readdata = deltachainspan = _segmentspan(revlog, revs)
344 398
345 if deltachainspan < revlog._srmingapsize: 399 if deltachainspan < mingapsize:
346 yield revs 400 yield revs
347 return 401 return
348 402
349 chainpayload = sum(length(r) for r in revs) 403 chainpayload = sum(length(r) for r in revs)
350 404
351 if deltachainspan: 405 if deltachainspan:
352 density = chainpayload / float(deltachainspan) 406 density = chainpayload / float(deltachainspan)
353 else: 407 else:
354 density = 1.0 408 density = 1.0
355 409
356 if density >= revlog._srdensitythreshold: 410 if density >= targetdensity:
357 yield revs 411 yield revs
358 return 412 return
359 413
360 # Store the gaps in a heap to have them sorted by decreasing size 414 # Store the gaps in a heap to have them sorted by decreasing size
361 gapsheap = [] 415 gapsheap = []
370 continue 424 continue
371 425
372 if prevend is not None: 426 if prevend is not None:
373 gapsize = revstart - prevend 427 gapsize = revstart - prevend
374 # only consider holes that are large enough 428 # only consider holes that are large enough
375 if gapsize > revlog._srmingapsize: 429 if gapsize > mingapsize:
376 heapq.heappush(gapsheap, (-gapsize, i)) 430 heapq.heappush(gapsheap, (-gapsize, i))
377 431
378 prevend = revstart + revlen 432 prevend = revstart + revlen
379 433
380 # Collect the indices of the largest holes until the density is acceptable 434 # Collect the indices of the largest holes until the density is acceptable
381 indicesheap = [] 435 indicesheap = []
382 heapq.heapify(indicesheap) 436 heapq.heapify(indicesheap)
383 while gapsheap and density < revlog._srdensitythreshold: 437 while gapsheap and density < targetdensity:
384 oppgapsize, gapidx = heapq.heappop(gapsheap) 438 oppgapsize, gapidx = heapq.heappop(gapsheap)
385 439
386 heapq.heappush(indicesheap, gapidx) 440 heapq.heappush(indicesheap, gapidx)
387 441
388 # the gap sizes are stored as negatives to be sorted decreasingly 442 # the gap sizes are stored as negatives to be sorted decreasingly