Mercurial > public > mercurial-scm > hg
comparison mercurial/revlogutils/deltas.py @ 51328:7455cae67260
delta-find: move is_good_delta_info on the _DeltaSearch class
There is a lot of format specific code in `is_good_delta_info`, moving it on
_DeltaSearch will allow to split this into subclass soon.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 22 Dec 2023 01:33:40 +0100 |
parents | 49401b7dec0c |
children | 2e169a222e63 |
comparison
equal
deleted
inserted
replaced
51327:49401b7dec0c | 51328:7455cae67260 |
---|---|
580 delta.chainbase, | 580 delta.chainbase, |
581 delta.chainlen, | 581 delta.chainlen, |
582 delta.compresseddeltalen, | 582 delta.compresseddeltalen, |
583 delta.snapshotdepth, | 583 delta.snapshotdepth, |
584 ) | 584 ) |
585 | |
586 | |
587 def is_good_delta_info(revlog, deltainfo, revinfo): | |
588 """Returns True if the given delta is good. Good means that it is within | |
589 the disk span, disk size, and chain length bounds that we know to be | |
590 performant.""" | |
591 if deltainfo is None: | |
592 return False | |
593 | |
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so | |
595 # we should never end up asking such question. Adding the assert as a | |
596 # safe-guard to detect anything that would be fishy in this regard. | |
597 assert ( | |
598 revinfo.cachedelta is None | |
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE | |
600 or not revlog.delta_config.general_delta | |
601 ) | |
602 | |
603 # - 'deltainfo.distance' is the distance from the base revision -- | |
604 # bounding it limits the amount of I/O we need to do. | |
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of | |
606 # deltas we need to apply -- bounding it limits the amount of CPU | |
607 # we consume. | |
608 | |
609 textlen = revinfo.textlen | |
610 defaultmax = textlen * 4 | |
611 maxdist = revlog.delta_config.max_deltachain_span | |
612 if not maxdist: | |
613 maxdist = deltainfo.distance # ensure the conditional pass | |
614 maxdist = max(maxdist, defaultmax) | |
615 | |
616 # Bad delta from read span: | |
617 # | |
618 # If the span of data read is larger than the maximum allowed. | |
619 # | |
620 # In the sparse-revlog case, we rely on the associated "sparse reading" | |
621 # to avoid issue related to the span of data. In theory, it would be | |
622 # possible to build pathological revlog where delta pattern would lead | |
623 # to too many reads. However, they do not happen in practice at all. So | |
624 # we skip the span check entirely. | |
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance: | |
626 return False | |
627 | |
628 # Bad delta from new delta size: | |
629 # | |
630 # If the delta size is larger than the target text, storing the | |
631 # delta will be inefficient. | |
632 if textlen < deltainfo.deltalen: | |
633 return False | |
634 | |
635 # Bad delta from cumulated payload size: | |
636 # | |
637 # If the sum of delta get larger than K * target text length. | |
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen: | |
639 return False | |
640 | |
641 # Bad delta from chain length: | |
642 # | |
643 # If the number of delta in the chain gets too high. | |
644 if ( | |
645 revlog.delta_config.max_chain_len | |
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen | |
647 ): | |
648 return False | |
649 | |
650 # bad delta from intermediate snapshot size limit | |
651 # | |
652 # If an intermediate snapshot size is higher than the limit. The | |
653 # limit exist to prevent endless chain of intermediate delta to be | |
654 # created. | |
655 if ( | |
656 deltainfo.snapshotdepth is not None | |
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen | |
658 ): | |
659 return False | |
660 | |
661 # bad delta if new intermediate snapshot is larger than the previous | |
662 # snapshot | |
663 if ( | |
664 deltainfo.snapshotdepth | |
665 and revlog.length(deltainfo.base) < deltainfo.deltalen | |
666 ): | |
667 return False | |
668 | |
669 return True | |
670 | 585 |
671 | 586 |
672 # If a revision's full text is that much bigger than a base candidate full | 587 # If a revision's full text is that much bigger than a base candidate full |
673 # text's, it is very unlikely that it will produce a valid delta. We no longer | 588 # text's, it is very unlikely that it will produce a valid delta. We no longer |
674 # consider these candidates. | 589 # consider these candidates. |
1058 | 973 |
1059 if not sparse: | 974 if not sparse: |
1060 # other approach failed try against prev to hopefully save us a | 975 # other approach failed try against prev to hopefully save us a |
1061 # fulltext. | 976 # fulltext. |
1062 yield (prev,) | 977 yield (prev,) |
978 | |
979 def is_good_delta_info(self, deltainfo): | |
980 """Returns True if the given delta is good. Good means that it is | |
981 within the disk span, disk size, and chain length bounds that we know | |
982 to be performant.""" | |
983 if deltainfo is None: | |
984 return False | |
985 | |
986 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner | |
987 # so we should never end up asking such question. Adding the assert as | |
988 # a safe-guard to detect anything that would be fishy in this regard. | |
989 assert ( | |
990 self.revinfo.cachedelta is None | |
991 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE | |
992 or not self.revlog.delta_config.general_delta | |
993 ) | |
994 | |
995 # - 'deltainfo.distance' is the distance from the base revision -- | |
996 # bounding it limits the amount of I/O we need to do. | |
997 # - 'deltainfo.compresseddeltalen' is the sum of the total size of | |
998 # deltas we need to apply -- bounding it limits the amount of CPU | |
999 # we consume. | |
1000 | |
1001 textlen = self.revinfo.textlen | |
1002 defaultmax = textlen * 4 | |
1003 maxdist = self.revlog.delta_config.max_deltachain_span | |
1004 if not maxdist: | |
1005 maxdist = deltainfo.distance # ensure the conditional pass | |
1006 maxdist = max(maxdist, defaultmax) | |
1007 | |
1008 # Bad delta from read span: | |
1009 # | |
1010 # If the span of data read is larger than the maximum allowed. | |
1011 # | |
1012 # In the sparse-revlog case, we rely on the associated "sparse | |
1013 # reading" to avoid issue related to the span of data. In theory, it | |
1014 # would be possible to build pathological revlog where delta pattern | |
1015 # would lead to too many reads. However, they do not happen in | |
1016 # practice at all. So we skip the span check entirely. | |
1017 if ( | |
1018 not self.revlog.delta_config.sparse_revlog | |
1019 and maxdist < deltainfo.distance | |
1020 ): | |
1021 return False | |
1022 | |
1023 # Bad delta from new delta size: | |
1024 # | |
1025 # If the delta size is larger than the target text, storing the delta | |
1026 # will be inefficient. | |
1027 if textlen < deltainfo.deltalen: | |
1028 return False | |
1029 | |
1030 # Bad delta from cumulated payload size: | |
1031 # | |
1032 # If the sum of delta get larger than K * target text length. | |
1033 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen: | |
1034 return False | |
1035 | |
1036 # Bad delta from chain length: | |
1037 # | |
1038 # If the number of delta in the chain gets too high. | |
1039 if ( | |
1040 self.revlog.delta_config.max_chain_len | |
1041 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen | |
1042 ): | |
1043 return False | |
1044 | |
1045 # bad delta from intermediate snapshot size limit | |
1046 # | |
1047 # If an intermediate snapshot size is higher than the limit. The | |
1048 # limit exist to prevent endless chain of intermediate delta to be | |
1049 # created. | |
1050 if ( | |
1051 deltainfo.snapshotdepth is not None | |
1052 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen | |
1053 ): | |
1054 return False | |
1055 | |
1056 # bad delta if new intermediate snapshot is larger than the previous | |
1057 # snapshot | |
1058 if ( | |
1059 deltainfo.snapshotdepth | |
1060 and self.revlog.length(deltainfo.base) < deltainfo.deltalen | |
1061 ): | |
1062 return False | |
1063 | |
1064 return True | |
1063 | 1065 |
1064 | 1066 |
1065 class SnapshotCache: | 1067 class SnapshotCache: |
1066 __slots__ = ('snapshots', '_start_rev', '_end_rev') | 1068 __slots__ = ('snapshots', '_start_rev', '_end_rev') |
1067 | 1069 |
1519 delta_end = util.timer() | 1521 delta_end = util.timer() |
1520 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n" | 1522 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n" |
1521 msg %= delta_end - delta_start | 1523 msg %= delta_end - delta_start |
1522 self._write_debug(msg) | 1524 self._write_debug(msg) |
1523 if candidatedelta is not None: | 1525 if candidatedelta is not None: |
1524 if is_good_delta_info(self.revlog, candidatedelta, revinfo): | 1526 if search.is_good_delta_info(candidatedelta): |
1525 if self._debug_search: | 1527 if self._debug_search: |
1526 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n" | 1528 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n" |
1527 msg %= candidatedelta.deltalen | 1529 msg %= candidatedelta.deltalen |
1528 self._write_debug(msg) | 1530 self._write_debug(msg) |
1529 nominateddeltas.append(candidatedelta) | 1531 nominateddeltas.append(candidatedelta) |