comparison mercurial/revlog.py @ 39330:655b5b465953

revlog: split functionality related to deltas computation in a new module The revlog module is getting big and this logic is getting more and more advanced. Moving it to `mercurial.revlogutils.deltas` split a lot off revlog.py and will help this logic to become less interleaved with revlog. The code is simply moved without modification (but for namespace changes). Refactoring and improvement will be made in later changesets.
author Boris Feld <boris.feld@octobus.net>
date Thu, 16 Aug 2018 02:53:42 +0200
parents 729082bb9938
children 6f4b8f607a31
comparison
equal deleted inserted replaced
39329:729082bb9938 39330:655b5b465953
15 15
16 import collections 16 import collections
17 import contextlib 17 import contextlib
18 import errno 18 import errno
19 import hashlib 19 import hashlib
20 import heapq
21 import os 20 import os
22 import re 21 import re
23 import struct 22 import struct
24 import zlib 23 import zlib
25 24
37 ) 36 )
38 from .i18n import _ 37 from .i18n import _
39 from .revlogutils.constants import ( 38 from .revlogutils.constants import (
40 FLAG_GENERALDELTA, 39 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA, 40 FLAG_INLINE_DATA,
42 LIMIT_DELTA2TEXT,
43 REVIDX_DEFAULT_FLAGS, 41 REVIDX_DEFAULT_FLAGS,
44 REVIDX_ELLIPSIS, 42 REVIDX_ELLIPSIS,
45 REVIDX_EXTSTORED, 43 REVIDX_EXTSTORED,
46 REVIDX_FLAGS_ORDER, 44 REVIDX_FLAGS_ORDER,
47 REVIDX_ISCENSORED, 45 REVIDX_ISCENSORED,
67 pycompat, 65 pycompat,
68 repository, 66 repository,
69 templatefilters, 67 templatefilters,
70 util, 68 util,
71 ) 69 )
70 from .revlogutils import (
71 deltas as deltautil,
72 )
72 from .utils import ( 73 from .utils import (
73 interfaceutil, 74 interfaceutil,
74 stringutil, 75 stringutil,
75 ) 76 )
76 77
208 b = p1 209 b = p1
209 s = hashlib.sha1(a) 210 s = hashlib.sha1(a)
210 s.update(b) 211 s.update(b)
211 s.update(text) 212 s.update(text)
212 return s.digest() 213 return s.digest()
213
214 class _testrevlog(object):
215 """minimalist fake revlog to use in doctests"""
216
217 def __init__(self, data, density=0.5, mingap=0):
218 """data is an list of revision payload boundaries"""
219 self._data = data
220 self._srdensitythreshold = density
221 self._srmingapsize = mingap
222
223 def start(self, rev):
224 if rev == 0:
225 return 0
226 return self._data[rev - 1]
227
228 def end(self, rev):
229 return self._data[rev]
230
231 def length(self, rev):
232 return self.end(rev) - self.start(rev)
233
234 def __len__(self):
235 return len(self._data)
236
237 def _trimchunk(revlog, revs, startidx, endidx=None):
238 """returns revs[startidx:endidx] without empty trailing revs
239
240 Doctest Setup
241 >>> revlog = _testrevlog([
242 ... 5, #0
243 ... 10, #1
244 ... 12, #2
245 ... 12, #3 (empty)
246 ... 17, #4
247 ... 21, #5
248 ... 21, #6 (empty)
249 ... ])
250
251 Contiguous cases:
252 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
253 [0, 1, 2, 3, 4, 5]
254 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
255 [0, 1, 2, 3, 4]
256 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
257 [0, 1, 2]
258 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
259 [2]
260 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
261 [3, 4, 5]
262 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
263 [3, 4]
264
265 Discontiguous cases:
266 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
267 [1, 3, 5]
268 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
269 [1]
270 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
271 [3, 5]
272 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
273 [3, 5]
274 """
275 length = revlog.length
276
277 if endidx is None:
278 endidx = len(revs)
279
280 # If we have a non-emtpy delta candidate, there are nothing to trim
281 if revs[endidx - 1] < len(revlog):
282 # Trim empty revs at the end, except the very first revision of a chain
283 while (endidx > 1
284 and endidx > startidx
285 and length(revs[endidx - 1]) == 0):
286 endidx -= 1
287
288 return revs[startidx:endidx]
289
290 def _segmentspan(revlog, revs, deltainfo=None):
291 """Get the byte span of a segment of revisions
292
293 revs is a sorted array of revision numbers
294
295 >>> revlog = _testrevlog([
296 ... 5, #0
297 ... 10, #1
298 ... 12, #2
299 ... 12, #3 (empty)
300 ... 17, #4
301 ... ])
302
303 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
304 17
305 >>> _segmentspan(revlog, [0, 4])
306 17
307 >>> _segmentspan(revlog, [3, 4])
308 5
309 >>> _segmentspan(revlog, [1, 2, 3,])
310 7
311 >>> _segmentspan(revlog, [1, 3])
312 7
313 """
314 if not revs:
315 return 0
316 if deltainfo is not None and len(revlog) <= revs[-1]:
317 if len(revs) == 1:
318 return deltainfo.deltalen
319 offset = revlog.end(len(revlog) - 1)
320 end = deltainfo.deltalen + offset
321 else:
322 end = revlog.end(revs[-1])
323 return end - revlog.start(revs[0])
324
325 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
326 """slice revs to reduce the amount of unrelated data to be read from disk.
327
328 ``revs`` is sliced into groups that should be read in one time.
329 Assume that revs are sorted.
330
331 The initial chunk is sliced until the overall density (payload/chunks-span
332 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
333 `revlog._srmingapsize` is skipped.
334
335 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
336 For consistency with other slicing choice, this limit won't go lower than
337 `revlog._srmingapsize`.
338
339 If individual revisions chunk are larger than this limit, they will still
340 be raised individually.
341
342 >>> revlog = _testrevlog([
343 ... 5, #00 (5)
344 ... 10, #01 (5)
345 ... 12, #02 (2)
346 ... 12, #03 (empty)
347 ... 27, #04 (15)
348 ... 31, #05 (4)
349 ... 31, #06 (empty)
350 ... 42, #07 (11)
351 ... 47, #08 (5)
352 ... 47, #09 (empty)
353 ... 48, #10 (1)
354 ... 51, #11 (3)
355 ... 74, #12 (23)
356 ... 85, #13 (11)
357 ... 86, #14 (1)
358 ... 91, #15 (5)
359 ... ])
360
361 >>> list(_slicechunk(revlog, list(range(16))))
362 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
363 >>> list(_slicechunk(revlog, [0, 15]))
364 [[0], [15]]
365 >>> list(_slicechunk(revlog, [0, 11, 15]))
366 [[0], [11], [15]]
367 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
368 [[0], [11, 13, 15]]
369 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
370 [[1, 2], [5, 8, 10, 11], [14]]
371
372 Slicing with a maximum chunk size
373 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
374 [[0], [11], [13], [15]]
375 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
376 [[0], [11], [13, 15]]
377 """
378 if targetsize is not None:
379 targetsize = max(targetsize, revlog._srmingapsize)
380 # targetsize should not be specified when evaluating delta candidates:
381 # * targetsize is used to ensure we stay within specification when reading,
382 # * deltainfo is used to pick are good delta chain when writing.
383 if not (deltainfo is None or targetsize is None):
384 msg = 'cannot use `targetsize` with a `deltainfo`'
385 raise error.ProgrammingError(msg)
386 for chunk in _slicechunktodensity(revlog, revs,
387 deltainfo,
388 revlog._srdensitythreshold,
389 revlog._srmingapsize):
390 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
391 yield subchunk
392
393 def _slicechunktosize(revlog, revs, targetsize=None):
394 """slice revs to match the target size
395
396 This is intended to be used on chunk that density slicing selected by that
397 are still too large compared to the read garantee of revlog. This might
398 happens when "minimal gap size" interrupted the slicing or when chain are
399 built in a way that create large blocks next to each other.
400
401 >>> revlog = _testrevlog([
402 ... 3, #0 (3)
403 ... 5, #1 (2)
404 ... 6, #2 (1)
405 ... 8, #3 (2)
406 ... 8, #4 (empty)
407 ... 11, #5 (3)
408 ... 12, #6 (1)
409 ... 13, #7 (1)
410 ... 14, #8 (1)
411 ... ])
412
413 Cases where chunk is already small enough
414 >>> list(_slicechunktosize(revlog, [0], 3))
415 [[0]]
416 >>> list(_slicechunktosize(revlog, [6, 7], 3))
417 [[6, 7]]
418 >>> list(_slicechunktosize(revlog, [0], None))
419 [[0]]
420 >>> list(_slicechunktosize(revlog, [6, 7], None))
421 [[6, 7]]
422
423 cases where we need actual slicing
424 >>> list(_slicechunktosize(revlog, [0, 1], 3))
425 [[0], [1]]
426 >>> list(_slicechunktosize(revlog, [1, 3], 3))
427 [[1], [3]]
428 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
429 [[1, 2], [3]]
430 >>> list(_slicechunktosize(revlog, [3, 5], 3))
431 [[3], [5]]
432 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
433 [[3], [5]]
434 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
435 [[5], [6, 7, 8]]
436 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
437 [[0], [1, 2], [3], [5], [6, 7, 8]]
438
439 Case with too large individual chunk (must return valid chunk)
440 >>> list(_slicechunktosize(revlog, [0, 1], 2))
441 [[0], [1]]
442 >>> list(_slicechunktosize(revlog, [1, 3], 1))
443 [[1], [3]]
444 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
445 [[3], [5]]
446 """
447 assert targetsize is None or 0 <= targetsize
448 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
449 yield revs
450 return
451
452 startrevidx = 0
453 startdata = revlog.start(revs[0])
454 endrevidx = 0
455 iterrevs = enumerate(revs)
456 next(iterrevs) # skip first rev.
457 for idx, r in iterrevs:
458 span = revlog.end(r) - startdata
459 if span <= targetsize:
460 endrevidx = idx
461 else:
462 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
463 if chunk:
464 yield chunk
465 startrevidx = idx
466 startdata = revlog.start(r)
467 endrevidx = idx
468 yield _trimchunk(revlog, revs, startrevidx)
469
470 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
471 mingapsize=0):
472 """slice revs to reduce the amount of unrelated data to be read from disk.
473
474 ``revs`` is sliced into groups that should be read in one time.
475 Assume that revs are sorted.
476
477 ``deltainfo`` is a _deltainfo instance of a revision that we would append
478 to the top of the revlog.
479
480 The initial chunk is sliced until the overall density (payload/chunks-span
481 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
482 skipped.
483
484 >>> revlog = _testrevlog([
485 ... 5, #00 (5)
486 ... 10, #01 (5)
487 ... 12, #02 (2)
488 ... 12, #03 (empty)
489 ... 27, #04 (15)
490 ... 31, #05 (4)
491 ... 31, #06 (empty)
492 ... 42, #07 (11)
493 ... 47, #08 (5)
494 ... 47, #09 (empty)
495 ... 48, #10 (1)
496 ... 51, #11 (3)
497 ... 74, #12 (23)
498 ... 85, #13 (11)
499 ... 86, #14 (1)
500 ... 91, #15 (5)
501 ... ])
502
503 >>> list(_slicechunktodensity(revlog, list(range(16))))
504 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
505 >>> list(_slicechunktodensity(revlog, [0, 15]))
506 [[0], [15]]
507 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
508 [[0], [11], [15]]
509 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
510 [[0], [11, 13, 15]]
511 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
512 [[1, 2], [5, 8, 10, 11], [14]]
513 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
514 ... mingapsize=20))
515 [[1, 2, 3, 5, 8, 10, 11], [14]]
516 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
517 ... targetdensity=0.95))
518 [[1, 2], [5], [8, 10, 11], [14]]
519 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
520 ... targetdensity=0.95, mingapsize=12))
521 [[1, 2], [5, 8, 10, 11], [14]]
522 """
523 start = revlog.start
524 length = revlog.length
525
526 if len(revs) <= 1:
527 yield revs
528 return
529
530 nextrev = len(revlog)
531 nextoffset = revlog.end(nextrev - 1)
532
533 if deltainfo is None:
534 deltachainspan = _segmentspan(revlog, revs)
535 chainpayload = sum(length(r) for r in revs)
536 else:
537 deltachainspan = deltainfo.distance
538 chainpayload = deltainfo.compresseddeltalen
539
540 if deltachainspan < mingapsize:
541 yield revs
542 return
543
544 readdata = deltachainspan
545
546 if deltachainspan:
547 density = chainpayload / float(deltachainspan)
548 else:
549 density = 1.0
550
551 if density >= targetdensity:
552 yield revs
553 return
554
555 if deltainfo is not None and deltainfo.deltalen:
556 revs = list(revs)
557 revs.append(nextrev)
558
559 # Store the gaps in a heap to have them sorted by decreasing size
560 gapsheap = []
561 heapq.heapify(gapsheap)
562 prevend = None
563 for i, rev in enumerate(revs):
564 if rev < nextrev:
565 revstart = start(rev)
566 revlen = length(rev)
567 else:
568 revstart = nextoffset
569 revlen = deltainfo.deltalen
570
571 # Skip empty revisions to form larger holes
572 if revlen == 0:
573 continue
574
575 if prevend is not None:
576 gapsize = revstart - prevend
577 # only consider holes that are large enough
578 if gapsize > mingapsize:
579 heapq.heappush(gapsheap, (-gapsize, i))
580
581 prevend = revstart + revlen
582
583 # Collect the indices of the largest holes until the density is acceptable
584 indicesheap = []
585 heapq.heapify(indicesheap)
586 while gapsheap and density < targetdensity:
587 oppgapsize, gapidx = heapq.heappop(gapsheap)
588
589 heapq.heappush(indicesheap, gapidx)
590
591 # the gap sizes are stored as negatives to be sorted decreasingly
592 # by the heap
593 readdata -= (-oppgapsize)
594 if readdata > 0:
595 density = chainpayload / float(readdata)
596 else:
597 density = 1.0
598
599 # Cut the revs at collected indices
600 previdx = 0
601 while indicesheap:
602 idx = heapq.heappop(indicesheap)
603
604 chunk = _trimchunk(revlog, revs, previdx, idx)
605 if chunk:
606 yield chunk
607
608 previdx = idx
609
610 chunk = _trimchunk(revlog, revs, previdx)
611 if chunk:
612 yield chunk
613
614 @attr.s(slots=True, frozen=True)
615 class _deltainfo(object):
616 distance = attr.ib()
617 deltalen = attr.ib()
618 data = attr.ib()
619 base = attr.ib()
620 chainbase = attr.ib()
621 chainlen = attr.ib()
622 compresseddeltalen = attr.ib()
623 snapshotdepth = attr.ib()
624
625 class _deltacomputer(object):
626 def __init__(self, revlog):
627 self.revlog = revlog
628
629 def _getcandidaterevs(self, p1, p2, cachedelta):
630 """
631 Provides revisions that present an interest to be diffed against,
632 grouped by level of easiness.
633 """
634 revlog = self.revlog
635 gdelta = revlog._generaldelta
636 curr = len(revlog)
637 prev = curr - 1
638 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
639
640 # should we try to build a delta?
641 if prev != nullrev and revlog._storedeltachains:
642 tested = set()
643 # This condition is true most of the time when processing
644 # changegroup data into a generaldelta repo. The only time it
645 # isn't true is if this is the first revision in a delta chain
646 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
647 if cachedelta and gdelta and revlog._lazydeltabase:
648 # Assume what we received from the server is a good choice
649 # build delta will reuse the cache
650 yield (cachedelta[0],)
651 tested.add(cachedelta[0])
652
653 if gdelta:
654 # exclude already lazy tested base if any
655 parents = [p for p in (p1r, p2r)
656 if p != nullrev and p not in tested]
657
658 if not revlog._deltabothparents and len(parents) == 2:
659 parents.sort()
660 # To minimize the chance of having to build a fulltext,
661 # pick first whichever parent is closest to us (max rev)
662 yield (parents[1],)
663 # then the other one (min rev) if the first did not fit
664 yield (parents[0],)
665 tested.update(parents)
666 elif len(parents) > 0:
667 # Test all parents (1 or 2), and keep the best candidate
668 yield parents
669 tested.update(parents)
670
671 if prev not in tested:
672 # other approach failed try against prev to hopefully save us a
673 # fulltext.
674 yield (prev,)
675 tested.add(prev)
676
677 def buildtext(self, revinfo, fh):
678 """Builds a fulltext version of a revision
679
680 revinfo: _revisioninfo instance that contains all needed info
681 fh: file handle to either the .i or the .d revlog file,
682 depending on whether it is inlined or not
683 """
684 btext = revinfo.btext
685 if btext[0] is not None:
686 return btext[0]
687
688 revlog = self.revlog
689 cachedelta = revinfo.cachedelta
690 flags = revinfo.flags
691 node = revinfo.node
692
693 baserev = cachedelta[0]
694 delta = cachedelta[1]
695 # special case deltas which replace entire base; no need to decode
696 # base revision. this neatly avoids censored bases, which throw when
697 # they're decoded.
698 hlen = struct.calcsize(">lll")
699 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
700 len(delta) - hlen):
701 btext[0] = delta[hlen:]
702 else:
703 # deltabase is rawtext before changed by flag processors, which is
704 # equivalent to non-raw text
705 basetext = revlog.revision(baserev, _df=fh, raw=False)
706 btext[0] = mdiff.patch(basetext, delta)
707
708 try:
709 res = revlog._processflags(btext[0], flags, 'read', raw=True)
710 btext[0], validatehash = res
711 if validatehash:
712 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
713 if flags & REVIDX_ISCENSORED:
714 raise RevlogError(_('node %s is not censored') % node)
715 except CensoredNodeError:
716 # must pass the censored index flag to add censored revisions
717 if not flags & REVIDX_ISCENSORED:
718 raise
719 return btext[0]
720
721 def _builddeltadiff(self, base, revinfo, fh):
722 revlog = self.revlog
723 t = self.buildtext(revinfo, fh)
724 if revlog.iscensored(base):
725 # deltas based on a censored revision must replace the
726 # full content in one patch, so delta works everywhere
727 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
728 delta = header + t
729 else:
730 ptext = revlog.revision(base, _df=fh, raw=True)
731 delta = mdiff.textdiff(ptext, t)
732
733 return delta
734
735 def _builddeltainfo(self, revinfo, base, fh):
736 # can we use the cached delta?
737 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
738 delta = revinfo.cachedelta[1]
739 else:
740 delta = self._builddeltadiff(base, revinfo, fh)
741 revlog = self.revlog
742 header, data = revlog.compress(delta)
743 deltalen = len(header) + len(data)
744 chainbase = revlog.chainbase(base)
745 offset = revlog.end(len(revlog) - 1)
746 dist = deltalen + offset - revlog.start(chainbase)
747 if revlog._generaldelta:
748 deltabase = base
749 else:
750 deltabase = chainbase
751 chainlen, compresseddeltalen = revlog._chaininfo(base)
752 chainlen += 1
753 compresseddeltalen += deltalen
754
755 revlog = self.revlog
756 snapshotdepth = None
757 if deltabase == nullrev:
758 snapshotdepth = 0
759 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
760 # A delta chain should always be one full snapshot,
761 # zero or more semi-snapshots, and zero or more deltas
762 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
763 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
764 snapshotdepth = len(revlog._deltachain(deltabase)[0])
765
766 return _deltainfo(dist, deltalen, (header, data), deltabase,
767 chainbase, chainlen, compresseddeltalen,
768 snapshotdepth)
769
770 def finddeltainfo(self, revinfo, fh):
771 """Find an acceptable delta against a candidate revision
772
773 revinfo: information about the revision (instance of _revisioninfo)
774 fh: file handle to either the .i or the .d revlog file,
775 depending on whether it is inlined or not
776
777 Returns the first acceptable candidate revision, as ordered by
778 _getcandidaterevs
779 """
780 if not revinfo.textlen:
781 return None # empty file do not need delta
782
783 cachedelta = revinfo.cachedelta
784 p1 = revinfo.p1
785 p2 = revinfo.p2
786 revlog = self.revlog
787
788 deltalength = self.revlog.length
789 deltaparent = self.revlog.deltaparent
790
791 deltainfo = None
792 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
793 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
794 # filter out delta base that will never produce good delta
795 candidaterevs = [r for r in candidaterevs
796 if self.revlog.length(r) <= deltas_limit]
797 nominateddeltas = []
798 for candidaterev in candidaterevs:
799 # skip over empty delta (no need to include them in a chain)
800 while candidaterev != nullrev and not deltalength(candidaterev):
801 candidaterev = deltaparent(candidaterev)
802 # no need to try a delta against nullid, this will be handled
803 # by fulltext later.
804 if candidaterev == nullrev:
805 continue
806 # no delta for rawtext-changing revs (see "candelta" for why)
807 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
808 continue
809 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
810 if revlog._isgooddeltainfo(candidatedelta, revinfo):
811 nominateddeltas.append(candidatedelta)
812 if nominateddeltas:
813 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
814 break
815
816 return deltainfo
817 214
818 @attr.s(slots=True, frozen=True) 215 @attr.s(slots=True, frozen=True)
819 class _revisioninfo(object): 216 class _revisioninfo(object):
820 """Information about a revision that allows building its fulltext 217 """Information about a revision that allows building its fulltext
821 node: expected hash of the revision 218 node: expected hash of the revision
2093 ladd = l.append 1490 ladd = l.append
2094 1491
2095 if not self._withsparseread: 1492 if not self._withsparseread:
2096 slicedchunks = (revs,) 1493 slicedchunks = (revs,)
2097 else: 1494 else:
2098 slicedchunks = _slicechunk(self, revs, targetsize=targetsize) 1495 slicedchunks = deltautil.slicechunk(self, revs,
1496 targetsize=targetsize)
2099 1497
2100 for revschunk in slicedchunks: 1498 for revschunk in slicedchunks:
2101 firstrev = revschunk[0] 1499 firstrev = revschunk[0]
2102 # Skip trailing revisions with empty diff 1500 # Skip trailing revisions with empty diff
2103 for lastrev in revschunk[::-1]: 1501 for lastrev in revschunk[::-1]:
2391 cachedelta - an optional precomputed delta 1789 cachedelta - an optional precomputed delta
2392 node - nodeid of revision; typically node is not specified, and it is 1790 node - nodeid of revision; typically node is not specified, and it is
2393 computed by default as hash(text, p1, p2), however subclasses might 1791 computed by default as hash(text, p1, p2), however subclasses might
2394 use different hashing method (and override checkhash() in such case) 1792 use different hashing method (and override checkhash() in such case)
2395 flags - the known flags to set on the revision 1793 flags - the known flags to set on the revision
2396 deltacomputer - an optional _deltacomputer instance shared between 1794 deltacomputer - an optional deltacomputer instance shared between
2397 multiple calls 1795 multiple calls
2398 """ 1796 """
2399 if link == nullrev: 1797 if link == nullrev:
2400 raise RevlogError(_("attempted to add linkrev -1 to %s") 1798 raise RevlogError(_("attempted to add linkrev -1 to %s")
2401 % self.indexfile) 1799 % self.indexfile)
2514 except KeyError: 1912 except KeyError:
2515 raise RevlogError(_('unknown compression type %r') % t) 1913 raise RevlogError(_('unknown compression type %r') % t)
2516 1914
2517 return compressor.decompress(data) 1915 return compressor.decompress(data)
2518 1916
2519 def _isgooddeltainfo(self, deltainfo, revinfo):
2520 """Returns True if the given delta is good. Good means that it is within
2521 the disk span, disk size, and chain length bounds that we know to be
2522 performant."""
2523 if deltainfo is None:
2524 return False
2525
2526 # - 'deltainfo.distance' is the distance from the base revision --
2527 # bounding it limits the amount of I/O we need to do.
2528 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2529 # deltas we need to apply -- bounding it limits the amount of CPU
2530 # we consume.
2531
2532 if self._sparserevlog:
2533 # As sparse-read will be used, we can consider that the distance,
2534 # instead of being the span of the whole chunk,
2535 # is the span of the largest read chunk
2536 base = deltainfo.base
2537
2538 if base != nullrev:
2539 deltachain = self._deltachain(base)[0]
2540 else:
2541 deltachain = []
2542
2543 # search for the first non-snapshot revision
2544 for idx, r in enumerate(deltachain):
2545 if not self.issnapshot(r):
2546 break
2547 deltachain = deltachain[idx:]
2548 chunks = _slicechunk(self, deltachain, deltainfo)
2549 all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
2550 distance = max(all_span)
2551 else:
2552 distance = deltainfo.distance
2553
2554 textlen = revinfo.textlen
2555 defaultmax = textlen * 4
2556 maxdist = self._maxdeltachainspan
2557 if not maxdist:
2558 maxdist = distance # ensure the conditional pass
2559 maxdist = max(maxdist, defaultmax)
2560 if self._sparserevlog and maxdist < self._srmingapsize:
2561 # In multiple place, we are ignoring irrelevant data range below a
2562 # certain size. Be also apply this tradeoff here and relax span
2563 # constraint for small enought content.
2564 maxdist = self._srmingapsize
2565
2566 # Bad delta from read span:
2567 #
2568 # If the span of data read is larger than the maximum allowed.
2569 if maxdist < distance:
2570 return False
2571
2572 # Bad delta from new delta size:
2573 #
2574 # If the delta size is larger than the target text, storing the
2575 # delta will be inefficient.
2576 if textlen < deltainfo.deltalen:
2577 return False
2578
2579 # Bad delta from cumulated payload size:
2580 #
2581 # If the sum of delta get larger than K * target text length.
2582 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
2583 return False
2584
2585 # Bad delta from chain length:
2586 #
2587 # If the number of delta in the chain gets too high.
2588 if self._maxchainlen and self._maxchainlen < deltainfo.chainlen:
2589 return False
2590
2591 # bad delta from intermediate snapshot size limit
2592 #
2593 # If an intermediate snapshot size is higher than the limit. The
2594 # limit exist to prevent endless chain of intermediate delta to be
2595 # created.
2596 if (deltainfo.snapshotdepth is not None and
2597 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
2598 return False
2599
2600 # bad delta if new intermediate snapshot is larger than the previous
2601 # snapshot
2602 if (deltainfo.snapshotdepth
2603 and self.length(deltainfo.base) < deltainfo.deltalen):
2604 return False
2605
2606 return True
2607
2608 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags, 1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2609 cachedelta, ifh, dfh, alwayscache=False, 1918 cachedelta, ifh, dfh, alwayscache=False,
2610 deltacomputer=None): 1919 deltacomputer=None):
2611 """internal function to add revisions to the log 1920 """internal function to add revisions to the log
2612 1921
2650 cachedelta[1]) 1959 cachedelta[1])
2651 else: 1960 else:
2652 textlen = len(rawtext) 1961 textlen = len(rawtext)
2653 1962
2654 if deltacomputer is None: 1963 if deltacomputer is None:
2655 deltacomputer = _deltacomputer(self) 1964 deltacomputer = deltautil.deltacomputer(self)
2656 1965
2657 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags) 1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2658 1967
2659 # no delta for flag processor revision (see "candelta" for why) 1968 # no delta for flag processor revision (see "candelta" for why)
2660 # not calling candelta since only one revision needs test, also to 1969 # not calling candelta since only one revision needs test, also to
2752 def flush(): 2061 def flush():
2753 if dfh: 2062 if dfh:
2754 dfh.flush() 2063 dfh.flush()
2755 ifh.flush() 2064 ifh.flush()
2756 try: 2065 try:
2757 deltacomputer = _deltacomputer(self) 2066 deltacomputer = deltautil.deltacomputer(self)
2758 # loop through our set of deltas 2067 # loop through our set of deltas
2759 for data in deltas: 2068 for data in deltas:
2760 node, p1, p2, linknode, deltabase, delta, flags = data 2069 node, p1, p2, linknode, deltabase, delta, flags = data
2761 link = linkmapper(linknode) 2070 link = linkmapper(linknode)
2762 flags = flags or REVIDX_DEFAULT_FLAGS 2071 flags = flags or REVIDX_DEFAULT_FLAGS
3125 destrevlog._deltabothparents = deltabothparents or oldamd 2434 destrevlog._deltabothparents = deltabothparents or oldamd
3126 2435
3127 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS, 2436 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
3128 self.DELTAREUSESAMEREVS) 2437 self.DELTAREUSESAMEREVS)
3129 2438
3130 deltacomputer = _deltacomputer(destrevlog) 2439 deltacomputer = deltautil.deltacomputer(destrevlog)
3131 index = self.index 2440 index = self.index
3132 for rev in self: 2441 for rev in self:
3133 entry = index[rev] 2442 entry = index[rev]
3134 2443
3135 # Some classes override linkrev to take filtered revs into 2444 # Some classes override linkrev to take filtered revs into