diff -r f70ce1aedbcb -r 498afb627f78 mercurial/revlog.py --- a/mercurial/revlog.py Tue Oct 10 10:01:57 2023 +0200 +++ b/mercurial/revlog.py Tue Oct 10 10:02:05 2023 +0200 @@ -241,6 +241,92 @@ hexdigits = b'0123456789abcdefABCDEF' +@attr.s() +class FeatureConfig: + """Hold configuration values about the available revlog features""" + + # the default compression engine + compression_engine = attr.ib(default=b'zlib') + # compression engines options + compression_engine_options = attr.ib(default=attr.Factory(dict)) + + # can we use censor on this revlog + censorable = attr.ib(default=False) + # does this revlog use the "side data" feature + has_side_data = attr.ib(default=False) + # might remove rank configuration once the computation has no impact + compute_rank = attr.ib(default=False) + # parent order is supposed to be semantically irrelevant, so we + # normally resort parents to ensure that the first parent is non-null, + # if there is a non-null parent at all. + # filelog abuses the parent order as flag to mark some instances of + # meta-encoded files, so allow it to disable this behavior. + canonical_parent_order = attr.ib(default=False) + # can ellipsis commit be used + enable_ellipsis = attr.ib(default=False) + + +@attr.s() +class DataConfig: + """Hold configuration value about how the revlog data are read""" + + # should we try to open the "pending" version of the revlog + try_pending = attr.ib(default=False) + # should we try to open the "splitted" version of the revlog + try_split = attr.ib(default=False) + # When True, indexfile should be opened with checkambig=True at writing, + # to avoid file stat ambiguity. + check_ambig = attr.ib(default=False) + + # If true, use mmap instead of reading to deal with large index + mmap_large_index = attr.ib(default=False) + # how much data is large + mmap_index_threshold = attr.ib(default=None) + # How much data to read and cache into the raw revlog data cache. + chunk_cache_size = attr.ib(default=65536) + + # Allow sparse reading of the revlog data + with_sparse_read = attr.ib(default=False) + # minimal density of a sparse read chunk + sr_density_threshold = attr.ib(default=0.50) + # minimal size of data we skip when performing sparse read + sr_min_gap_size = attr.ib(default=262144) + + # are delta encoded against arbitrary bases. + generaldelta = attr.ib(default=False) + + +@attr.s() +class DeltaConfig: + """Hold configuration value about how new delta are computed + + Some attributes are duplicated from DataConfig to help havign each object + self contained. + """ + + # can delta be encoded against arbitrary bases. + general_delta = attr.ib(default=False) + # Allow sparse writing of the revlog data + sparse_revlog = attr.ib(default=False) + # maximum length of a delta chain + max_chain_len = attr.ib(default=None) + # Maximum distance between delta chain base start and end + max_deltachain_span = attr.ib(default=-1) + # If `upper_bound_comp` is not None, this is the expected maximal gain from + # compression for the data content. + upper_bound_comp = attr.ib(default=None) + # Should we try a delta against both parent + delta_both_parents = attr.ib(default=True) + # Test delta base candidate group by chunk of this maximal size. + candidate_group_chunk_size = attr.ib(default=0) + # Should we display debug information about delta computation + debug_delta = attr.ib(default=False) + # trust incoming delta by default + lazy_delta = attr.ib(default=True) + # trust the base of incoming delta by default + lazy_delta_base = attr.ib(default=False) + + class revlog: """ the underlying revision storage object @@ -348,43 +434,31 @@ assert target[0] in ALL_KINDS assert len(target) == 2 self.target = target - # When True, indexfile is opened with checkambig=True at writing, to - # avoid file stat ambiguity. - self._checkambig = checkambig - self._mmaplargeindex = mmaplargeindex - self._censorable = censorable + self.feature_config = FeatureConfig( + censorable=censorable, + canonical_parent_order=canonical_parent_order, + ) + self.data_config = DataConfig( + check_ambig=checkambig, + mmap_large_index=mmaplargeindex, + ) + self.delta_config = DeltaConfig() + # 3-tuple of (node, rev, text) for a raw revision. self._revisioncache = None # Maps rev to chain base rev. self._chainbasecache = util.lrucachedict(100) # 2-tuple of (offset, data) of raw data from the revlog at an offset. self._chunkcache = (0, b'') - # How much data to read and cache into the raw revlog data cache. - self._chunkcachesize = 65536 - self._maxchainlen = None - self._deltabothparents = True - self._candidate_group_chunk_size = 0 - self._debug_delta = False + self.index = None self._docket = None self._nodemap_docket = None # Mapping of partial identifiers to full nodes. self._pcache = {} - # Mapping of revision integer to full node. - self._compengine = b'zlib' - self._compengineopts = {} - self._maxdeltachainspan = -1 - self._withsparseread = False - self._sparserevlog = False - self.hassidedata = False - self._srdensitythreshold = 0.50 - self._srmingapsize = 262144 # other optionnals features - # might remove rank configuration once the computation has no impact - self._compute_rank = False - # Make copy of flag processors so each revlog instance can support # custom flags. self._flagprocessors = dict(flagutil.flagprocessors) @@ -398,12 +472,110 @@ self._concurrencychecker = concurrencychecker - # parent order is supposed to be semantically irrelevant, so we - # normally resort parents to ensure that the first parent is non-null, - # if there is a non-null parent at all. - # filelog abuses the parent order as flag to mark some instances of - # meta-encoded files, so allow it to disable this behavior. - self.canonical_parent_order = canonical_parent_order + @property + def _generaldelta(self): + """temporary compatibility proxy""" + return self.delta_config.general_delta + + @property + def _checkambig(self): + """temporary compatibility proxy""" + return self.data_config.check_ambig + + @property + def _mmaplargeindex(self): + """temporary compatibility proxy""" + return self.data_config.mmap_large_index + + @property + def _censorable(self): + """temporary compatibility proxy""" + return self.feature_config.censorable + + @property + def _chunkcachesize(self): + """temporary compatibility proxy""" + return self.data_config.chunk_cache_size + + @property + def _maxchainlen(self): + """temporary compatibility proxy""" + return self.delta_config.max_chain_len + + @property + def _deltabothparents(self): + """temporary compatibility proxy""" + return self.delta_config.delta_both_parents + + @property + def _candidate_group_chunk_size(self): + """temporary compatibility proxy""" + return self.delta_config.candidate_group_chunk_size + + @property + def _debug_delta(self): + """temporary compatibility proxy""" + return self.delta_config.debug_delta + + @property + def _compengine(self): + """temporary compatibility proxy""" + return self.feature_config.compression_engine + + @property + def _compengineopts(self): + """temporary compatibility proxy""" + return self.feature_config.compression_engine_options + + @property + def _maxdeltachainspan(self): + """temporary compatibility proxy""" + return self.delta_config.max_deltachain_span + + @property + def _withsparseread(self): + """temporary compatibility proxy""" + return self.data_config.with_sparse_read + + @property + def _sparserevlog(self): + """temporary compatibility proxy""" + return self.delta_config.sparse_revlog + + @property + def hassidedata(self): + """temporary compatibility proxy""" + return self.feature_config.has_side_data + + @property + def _srdensitythreshold(self): + """temporary compatibility proxy""" + return self.data_config.sr_density_threshold + + @property + def _srmingapsize(self): + """temporary compatibility proxy""" + return self.data_config.sr_min_gap_size + + @property + def _compute_rank(self): + """temporary compatibility proxy""" + return self.feature_config.compute_rank + + @property + def canonical_parent_order(self): + """temporary compatibility proxy""" + return self.feature_config.canonical_parent_order + + @property + def _lazydelta(self): + """temporary compatibility proxy""" + return self.delta_config.lazy_delta + + @property + def _lazydeltabase(self): + """temporary compatibility proxy""" + return self.delta_config.lazy_delta_base def _init_opts(self): """process options (from above/config) to setup associated default revlog mode @@ -426,7 +598,8 @@ if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG: new_header = CHANGELOGV2 - self._compute_rank = opts.get(b'changelogv2.compute-rank', True) + compute_rank = opts.get(b'changelogv2.compute-rank', True) + self.feature_config.compute_rank = compute_rank elif b'revlogv2' in opts: new_header = REVLOGV2 elif b'revlogv1' in opts: @@ -439,54 +612,63 @@ new_header = REVLOG_DEFAULT_VERSION if b'chunkcachesize' in opts: - self._chunkcachesize = opts[b'chunkcachesize'] + self.data_config.chunk_cache_size = opts[b'chunkcachesize'] if b'maxchainlen' in opts: - self._maxchainlen = opts[b'maxchainlen'] + self.delta_config.max_chain_len = opts[b'maxchainlen'] if b'deltabothparents' in opts: - self._deltabothparents = opts[b'deltabothparents'] + self.delta_config.delta_both_parents = opts[b'deltabothparents'] dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size') if dps_cgds: - self._candidate_group_chunk_size = dps_cgds - self._lazydelta = bool(opts.get(b'lazydelta', True)) - self._lazydeltabase = False - if self._lazydelta: - self._lazydeltabase = bool(opts.get(b'lazydeltabase', False)) + self.delta_config.candidate_group_chunk_size = dps_cgds + if b'lazydelta' in opts: + self.delta_config.lazy_delta = bool(opts[b'lazydelta']) + if self._lazydelta and b'lazydeltabase' in opts: + self.delta_config.lazy_delta_base = opts[b'lazydeltabase'] if b'debug-delta' in opts: - self._debug_delta = opts[b'debug-delta'] + self.delta_config.debug_delta = opts[b'debug-delta'] if b'compengine' in opts: - self._compengine = opts[b'compengine'] + self.feature_config.compression_engine = opts[b'compengine'] + comp_engine_opts = self.feature_config.compression_engine_options if b'zlib.level' in opts: - self._compengineopts[b'zlib.level'] = opts[b'zlib.level'] + comp_engine_opts[b'zlib.level'] = opts[b'zlib.level'] if b'zstd.level' in opts: - self._compengineopts[b'zstd.level'] = opts[b'zstd.level'] + comp_engine_opts[b'zstd.level'] = opts[b'zstd.level'] if b'maxdeltachainspan' in opts: - self._maxdeltachainspan = opts[b'maxdeltachainspan'] + self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan'] if self._mmaplargeindex and b'mmapindexthreshold' in opts: mmapindexthreshold = opts[b'mmapindexthreshold'] - self._sparserevlog = bool(opts.get(b'sparse-revlog', False)) - withsparseread = bool(opts.get(b'with-sparse-read', False)) - # sparse-revlog forces sparse-read - self._withsparseread = self._sparserevlog or withsparseread + self.data_config.mmap_index_threshold = mmapindexthreshold + if b'sparse-revlog' in opts: + self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog']) + if self.delta_config.sparse_revlog: + # sparse-revlog forces sparse-read + self.data_config.with_sparse_read = True + elif b'with-sparse-read' in opts: + self.data_config.with_sparse_read = bool(opts[b'with-sparse-read']) if b'sparse-read-density-threshold' in opts: - self._srdensitythreshold = opts[b'sparse-read-density-threshold'] + self.data_config.sr_density_threshold = opts[ + b'sparse-read-density-threshold' + ] if b'sparse-read-min-gap-size' in opts: - self._srmingapsize = opts[b'sparse-read-min-gap-size'] + self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size'] if opts.get(b'enableellipsis'): + self.feature_config.enable_ellipsis = True self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor # revlog v0 doesn't have flag processors for flag, processor in opts.get(b'flagprocessors', {}).items(): flagutil.insertflagprocessor(flag, processor, self._flagprocessors) - if self._chunkcachesize <= 0: + chunk_cache_size = self.data_config.chunk_cache_size + if chunk_cache_size <= 0: raise error.RevlogError( _(b'revlog chunk cache size %r is not greater than 0') - % self._chunkcachesize + % chunk_cache_size ) - elif self._chunkcachesize & (self._chunkcachesize - 1): + elif chunk_cache_size & (chunk_cache_size - 1): raise error.RevlogError( _(b'revlog chunk cache size %r is not a power of 2') - % self._chunkcachesize + % chunk_cache_size ) force_nodemap = opts.get(b'devel-force-nodemap', False) return new_header, mmapindexthreshold, force_nodemap @@ -664,8 +846,10 @@ features = FEATURES_BY_VERSION[self._format_version] self._inline = features[b'inline'](self._format_flags) - self._generaldelta = features[b'generaldelta'](self._format_flags) - self.hassidedata = features[b'sidedata'] + self.delta_config.general_delta = features[b'generaldelta']( + self._format_flags + ) + self.feature_config.has_side_data = features[b'sidedata'] if not features[b'docket']: self._indexfile = entry_point @@ -694,7 +878,7 @@ self._inline = False # generaldelta implied by version 2 revlogs. - self._generaldelta = True + self.delta_config.general_delta = True # the logic for persistent nodemap will be dealt with within the # main docket, so disable it for now. self._nodemap_file = None @@ -712,7 +896,7 @@ # sparse-revlog can't be on without general-delta (issue6056) if not self._generaldelta: - self._sparserevlog = False + self.delta_config.sparse_revlog = False self._storedeltachains = True @@ -3197,16 +3381,17 @@ try: if deltareuse == self.DELTAREUSEALWAYS: - destrevlog._lazydeltabase = True - destrevlog._lazydelta = True + destrevlog.delta_config.lazy_delta_base = True + destrevlog.delta_config.lazy_delta = True elif deltareuse == self.DELTAREUSESAMEREVS: - destrevlog._lazydeltabase = False - destrevlog._lazydelta = True + destrevlog.delta_config.lazy_delta_base = False + destrevlog.delta_config.lazy_delta = True elif deltareuse == self.DELTAREUSENEVER: - destrevlog._lazydeltabase = False - destrevlog._lazydelta = False - - destrevlog._deltabothparents = forcedeltabothparents or oldamd + destrevlog.delta_config.lazy_delta_base = False + destrevlog.delta_config.lazy_delta = False + + delta_both_parents = forcedeltabothparents or oldamd + destrevlog.delta_config.delta_both_parents = delta_both_parents with self.reading(): self._clone( @@ -3219,9 +3404,9 @@ ) finally: - destrevlog._lazydelta = oldlazydelta - destrevlog._lazydeltabase = oldlazydeltabase - destrevlog._deltabothparents = oldamd + destrevlog.delta_config.lazy_delta = oldlazydelta + destrevlog.delta_config.lazy_delta_base = oldlazydeltabase + destrevlog.delta_config.delta_both_parents = oldamd def _clone( self,