Mercurial > public > mercurial-scm > hg
comparison mercurial/revlogutils/deltas.py @ 51341:f3f35b37f4b2
delta-find: move good delta code earlier in the class
Nothing change except the code location. This greatly helps readability of the
next future diff,
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sun, 07 Jan 2024 00:56:15 +0100 |
parents | 5c0693553cb6 |
children | 866ab9f447d4 |
comparison
equal
deleted
inserted
replaced
51340:5c0693553cb6 | 51341:f3f35b37f4b2 |
---|---|
634 self.tested = {nullrev} | 634 self.tested = {nullrev} |
635 | 635 |
636 self._candidates_iterator = self._candidate_groups() | 636 self._candidates_iterator = self._candidate_groups() |
637 self._last_good = None | 637 self._last_good = None |
638 self.current_group = self._candidates_iterator.send(self._last_good) | 638 self.current_group = self._candidates_iterator.send(self._last_good) |
639 | |
640 def is_good_delta_info(self, deltainfo): | |
641 """Returns True if the given delta is good. | |
642 | |
643 Good means that it is within the disk span, disk size, and chain length | |
644 bounds that we know to be performant. | |
645 """ | |
646 if not self._is_good_delta_info_universal(deltainfo): | |
647 return False | |
648 if not self._is_good_delta_info_chain_quality(deltainfo): | |
649 return False | |
650 if not self._is_good_delta_info_snapshot_constraints(deltainfo): | |
651 return False | |
652 return True | |
653 | |
654 def _is_good_delta_info_universal(self, deltainfo): | |
655 """Returns True if the given delta is good. | |
656 | |
657 This performs generic checks needed by all format variants. | |
658 | |
659 This is used by is_good_delta_info. | |
660 """ | |
661 | |
662 if deltainfo is None: | |
663 return False | |
664 | |
665 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner | |
666 # so we should never end up asking such question. Adding the assert as | |
667 # a safe-guard to detect anything that would be fishy in this regard. | |
668 assert ( | |
669 self.revinfo.cachedelta is None | |
670 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE | |
671 or not self.revlog.delta_config.general_delta | |
672 ) | |
673 | |
674 # Bad delta from new delta size: | |
675 # | |
676 # If the delta size is larger than the target text, storing the delta | |
677 # will be inefficient. | |
678 if self.revinfo.textlen < deltainfo.deltalen: | |
679 return False | |
680 | |
681 return True | |
682 | |
683 def _is_good_delta_info_chain_quality(self, deltainfo): | |
684 """Returns True if the chain associated with the delta is good. | |
685 | |
686 This performs checks for format that use delta chains. | |
687 | |
688 This is used by is_good_delta_info. | |
689 """ | |
690 # - 'deltainfo.distance' is the distance from the base revision -- | |
691 # bounding it limits the amount of I/O we need to do. | |
692 | |
693 defaultmax = self.revinfo.textlen * 4 | |
694 maxdist = self.revlog.delta_config.max_deltachain_span | |
695 if not maxdist: | |
696 maxdist = deltainfo.distance # ensure the conditional pass | |
697 maxdist = max(maxdist, defaultmax) | |
698 | |
699 # Bad delta from read span: | |
700 # | |
701 # If the span of data read is larger than the maximum allowed. | |
702 # | |
703 # In the sparse-revlog case, we rely on the associated "sparse | |
704 # reading" to avoid issue related to the span of data. In theory, it | |
705 # would be possible to build pathological revlog where delta pattern | |
706 # would lead to too many reads. However, they do not happen in | |
707 # practice at all. So we skip the span check entirely. | |
708 if ( | |
709 not self.revlog.delta_config.sparse_revlog | |
710 and maxdist < deltainfo.distance | |
711 ): | |
712 return False | |
713 | |
714 # Bad delta from cumulated payload size: | |
715 # | |
716 # - 'deltainfo.compresseddeltalen' is the sum of the total size of | |
717 # deltas we need to apply -- bounding it limits the amount of CPU | |
718 # we consume. | |
719 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT | |
720 # If the sum of delta get larger than K * target text length. | |
721 if max_chain_data < deltainfo.compresseddeltalen: | |
722 return False | |
723 | |
724 # Bad delta from chain length: | |
725 # | |
726 # If the number of delta in the chain gets too high. | |
727 if ( | |
728 self.revlog.delta_config.max_chain_len | |
729 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen | |
730 ): | |
731 return False | |
732 return True | |
733 | |
734 def _is_good_delta_info_snapshot_constraints(self, deltainfo): | |
735 """Returns True if the chain associated with snapshots | |
736 | |
737 This performs checks for format that use sparse-revlog and intermediate | |
738 snapshots. | |
739 | |
740 This is used by is_good_delta_info. | |
741 """ | |
742 # bad delta from intermediate snapshot size limit | |
743 # | |
744 # If an intermediate snapshot size is higher than the limit. The | |
745 # limit exist to prevent endless chain of intermediate delta to be | |
746 # created. | |
747 if ( | |
748 deltainfo.snapshotdepth is not None | |
749 and (self.revinfo.textlen >> deltainfo.snapshotdepth) | |
750 < deltainfo.deltalen | |
751 ): | |
752 return False | |
753 | |
754 # bad delta if new intermediate snapshot is larger than the previous | |
755 # snapshot | |
756 if ( | |
757 deltainfo.snapshotdepth | |
758 and self.revlog.length(deltainfo.base) < deltainfo.deltalen | |
759 ): | |
760 return False | |
761 | |
762 return True | |
639 | 763 |
640 @property | 764 @property |
641 def done(self): | 765 def done(self): |
642 """True when all possible candidate have been tested""" | 766 """True when all possible candidate have been tested""" |
643 return self.current_group is None | 767 return self.current_group is None |
1038 | 1162 |
1039 if not sparse: | 1163 if not sparse: |
1040 # other approach failed try against prev to hopefully save us a | 1164 # other approach failed try against prev to hopefully save us a |
1041 # fulltext. | 1165 # fulltext. |
1042 yield (prev,) | 1166 yield (prev,) |
1043 | |
1044 def is_good_delta_info(self, deltainfo): | |
1045 """Returns True if the given delta is good. | |
1046 | |
1047 Good means that it is within the disk span, disk size, and chain length | |
1048 bounds that we know to be performant. | |
1049 """ | |
1050 if not self._is_good_delta_info_universal(deltainfo): | |
1051 return False | |
1052 if not self._is_good_delta_info_chain_quality(deltainfo): | |
1053 return False | |
1054 if not self._is_good_delta_info_snapshot_constraints(deltainfo): | |
1055 return False | |
1056 return True | |
1057 | |
1058 def _is_good_delta_info_universal(self, deltainfo): | |
1059 """Returns True if the given delta is good. | |
1060 | |
1061 This performs generic checks needed by all format variants. | |
1062 | |
1063 This is used by is_good_delta_info. | |
1064 """ | |
1065 | |
1066 if deltainfo is None: | |
1067 return False | |
1068 | |
1069 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner | |
1070 # so we should never end up asking such question. Adding the assert as | |
1071 # a safe-guard to detect anything that would be fishy in this regard. | |
1072 assert ( | |
1073 self.revinfo.cachedelta is None | |
1074 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE | |
1075 or not self.revlog.delta_config.general_delta | |
1076 ) | |
1077 | |
1078 # Bad delta from new delta size: | |
1079 # | |
1080 # If the delta size is larger than the target text, storing the delta | |
1081 # will be inefficient. | |
1082 if self.revinfo.textlen < deltainfo.deltalen: | |
1083 return False | |
1084 | |
1085 return True | |
1086 | |
1087 def _is_good_delta_info_chain_quality(self, deltainfo): | |
1088 """Returns True if the chain associated with the delta is good. | |
1089 | |
1090 This performs checks for format that use delta chains. | |
1091 | |
1092 This is used by is_good_delta_info. | |
1093 """ | |
1094 # - 'deltainfo.distance' is the distance from the base revision -- | |
1095 # bounding it limits the amount of I/O we need to do. | |
1096 | |
1097 defaultmax = self.revinfo.textlen * 4 | |
1098 maxdist = self.revlog.delta_config.max_deltachain_span | |
1099 if not maxdist: | |
1100 maxdist = deltainfo.distance # ensure the conditional pass | |
1101 maxdist = max(maxdist, defaultmax) | |
1102 | |
1103 # Bad delta from read span: | |
1104 # | |
1105 # If the span of data read is larger than the maximum allowed. | |
1106 # | |
1107 # In the sparse-revlog case, we rely on the associated "sparse | |
1108 # reading" to avoid issue related to the span of data. In theory, it | |
1109 # would be possible to build pathological revlog where delta pattern | |
1110 # would lead to too many reads. However, they do not happen in | |
1111 # practice at all. So we skip the span check entirely. | |
1112 if ( | |
1113 not self.revlog.delta_config.sparse_revlog | |
1114 and maxdist < deltainfo.distance | |
1115 ): | |
1116 return False | |
1117 | |
1118 # Bad delta from cumulated payload size: | |
1119 # | |
1120 # - 'deltainfo.compresseddeltalen' is the sum of the total size of | |
1121 # deltas we need to apply -- bounding it limits the amount of CPU | |
1122 # we consume. | |
1123 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT | |
1124 # If the sum of delta get larger than K * target text length. | |
1125 if max_chain_data < deltainfo.compresseddeltalen: | |
1126 return False | |
1127 | |
1128 # Bad delta from chain length: | |
1129 # | |
1130 # If the number of delta in the chain gets too high. | |
1131 if ( | |
1132 self.revlog.delta_config.max_chain_len | |
1133 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen | |
1134 ): | |
1135 return False | |
1136 return True | |
1137 | |
1138 def _is_good_delta_info_snapshot_constraints(self, deltainfo): | |
1139 """Returns True if the chain associated with snapshots | |
1140 | |
1141 This performs checks for format that use sparse-revlog and intermediate | |
1142 snapshots. | |
1143 | |
1144 This is used by is_good_delta_info. | |
1145 """ | |
1146 # bad delta from intermediate snapshot size limit | |
1147 # | |
1148 # If an intermediate snapshot size is higher than the limit. The | |
1149 # limit exist to prevent endless chain of intermediate delta to be | |
1150 # created. | |
1151 if ( | |
1152 deltainfo.snapshotdepth is not None | |
1153 and (self.revinfo.textlen >> deltainfo.snapshotdepth) | |
1154 < deltainfo.deltalen | |
1155 ): | |
1156 return False | |
1157 | |
1158 # bad delta if new intermediate snapshot is larger than the previous | |
1159 # snapshot | |
1160 if ( | |
1161 deltainfo.snapshotdepth | |
1162 and self.revlog.length(deltainfo.base) < deltainfo.deltalen | |
1163 ): | |
1164 return False | |
1165 | |
1166 return True | |
1167 | 1167 |
1168 | 1168 |
1169 class SnapshotCache: | 1169 class SnapshotCache: |
1170 __slots__ = ('snapshots', '_start_rev', '_end_rev') | 1170 __slots__ = ('snapshots', '_start_rev', '_end_rev') |
1171 | 1171 |