view rust/hg-core/src/revlog/options.rs @ 52160:e01e84e5e426

rust-revlog: add a Rust-only `InnerRevlog` This mirrors the Python `InnerRevlog` and will be used in a future patch to replace said Python implementation. This allows us to start doing more things in pure Rust, in particular reading and writing operations. A lot of changes have to be introduced all at once, it wouldn't be very useful to separate this patch IMO since all of them are either interlocked or only useful with the rest.
author Rapha?l Gom?s <rgomes@octobus.net>
date Thu, 10 Oct 2024 10:34:51 +0200
parents 039b7caeb4d9
children 84b5802ba7d3
line wrap: on
line source

//! Helpers for the revlog config and opening options

use std::collections::HashSet;

use crate::{
    config::{Config, ResourceProfileValue},
    errors::HgError,
    requirements::{
        CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT,
        NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
        SPARSEREVLOG_REQUIREMENT,
    },
};

use super::{compression::CompressionConfig, RevlogType};

const DEFAULT_CHUNK_CACHE_SIZE: u64 = 65536;
const DEFAULT_SPARSE_READ_DENSITY_THRESHOLD: f64 = 0.50;
const DEFAULT_SPARSE_READ_MIN_GAP_SIZE: u64 = 262144;

/// The known revlog versions and their options
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum RevlogVersionOptions {
    V0,
    V1 { general_delta: bool, inline: bool },
    V2,
    ChangelogV2 { compute_rank: bool },
}

/// Options to govern how a revlog should be opened, usually from the
/// repository configuration or requirements.
#[derive(Debug, Copy, Clone)]
pub struct RevlogOpenOptions {
    /// The revlog version, along with any option specific to this version
    pub version: RevlogVersionOptions,
    /// Whether the revlog uses a persistent nodemap.
    pub use_nodemap: bool,
    pub delta_config: RevlogDeltaConfig,
    pub data_config: RevlogDataConfig,
    pub feature_config: RevlogFeatureConfig,
}

#[cfg(test)]
impl Default for RevlogOpenOptions {
    fn default() -> Self {
        Self {
            version: RevlogVersionOptions::V1 {
                general_delta: true,
                inline: false,
            },
            use_nodemap: true,
            data_config: Default::default(),
            delta_config: Default::default(),
            feature_config: Default::default(),
        }
    }
}

impl RevlogOpenOptions {
    pub fn new(
        inline: bool,
        data_config: RevlogDataConfig,
        delta_config: RevlogDeltaConfig,
        feature_config: RevlogFeatureConfig,
    ) -> Self {
        Self {
            version: RevlogVersionOptions::V1 {
                general_delta: data_config.general_delta,
                inline,
            },
            use_nodemap: false,
            data_config,
            delta_config,
            feature_config,
        }
    }

    pub fn index_header(&self) -> super::index::IndexHeader {
        super::index::IndexHeader {
            header_bytes: match self.version {
                RevlogVersionOptions::V0 => [0, 0, 0, 0],
                RevlogVersionOptions::V1 {
                    general_delta,
                    inline,
                } => [
                    0,
                    if general_delta && inline {
                        3
                    } else if general_delta {
                        2
                    } else {
                        u8::from(inline)
                    },
                    0,
                    1,
                ],
                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
                    0xD34Du32.to_be_bytes()
                }
            },
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
/// Holds configuration values about how the revlog data is read
pub struct RevlogDataConfig {
    /// Should we try to open the "pending" version of the revlog
    pub try_pending: bool,
    /// Should we try to open the "split" version of the revlog
    pub try_split: bool,
    /// When True, `indexfile` should be opened with `checkambig=True` at
    /// writing time, to avoid file stat ambiguity
    pub check_ambig: bool,
    /// If true, use mmap instead of reading to deal with large indexes
    pub mmap_large_index: bool,
    /// How much data is considered large
    pub mmap_index_threshold: Option<u64>,
    /// How much data to read and cache into the raw revlog data cache
    pub chunk_cache_size: u64,
    /// The size of the uncompressed cache compared to the largest revision
    /// seen
    pub uncompressed_cache_factor: Option<f64>,
    /// The number of chunks cached
    pub uncompressed_cache_count: Option<u64>,
    /// Allow sparse reading of the revlog data
    pub with_sparse_read: bool,
    /// Minimal density of a sparse read chunk
    pub sr_density_threshold: f64,
    /// Minimal size of the data we skip when performing sparse reads
    pub sr_min_gap_size: u64,
    /// Whether deltas are encoded against arbitrary bases
    pub general_delta: bool,
}

impl RevlogDataConfig {
    pub fn new(
        config: &Config,
        requirements: &HashSet<String>,
    ) -> Result<Self, HgError> {
        let mut data_config = Self::default();
        if let Some(chunk_cache_size) =
            config.get_byte_size(b"format", b"chunkcachesize")?
        {
            data_config.chunk_cache_size = chunk_cache_size;
        }

        let memory_profile = config.get_resource_profile(Some("memory"));
        if memory_profile.value >= ResourceProfileValue::Medium {
            data_config.uncompressed_cache_count = Some(10_000);
            data_config.uncompressed_cache_factor = Some(4.0);
            if memory_profile.value >= ResourceProfileValue::High {
                data_config.uncompressed_cache_factor = Some(10.0)
            }
        }

        if let Some(mmap_index_threshold) = config
            .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
        {
            data_config.mmap_index_threshold = Some(mmap_index_threshold);
        }

        let with_sparse_read =
            config.get_bool(b"experimental", b"sparse-read")?;
        if let Some(sr_density_threshold) = config
            .get_f64(b"experimental", b"sparse-read.density-threshold")?
        {
            data_config.sr_density_threshold = sr_density_threshold;
        }
        data_config.with_sparse_read = with_sparse_read;
        if let Some(sr_min_gap_size) = config
            .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
        {
            data_config.sr_min_gap_size = sr_min_gap_size;
        }

        data_config.with_sparse_read =
            requirements.contains(SPARSEREVLOG_REQUIREMENT);

        Ok(data_config)
    }
}

impl Default for RevlogDataConfig {
    fn default() -> Self {
        Self {
            chunk_cache_size: DEFAULT_CHUNK_CACHE_SIZE,
            sr_density_threshold: DEFAULT_SPARSE_READ_DENSITY_THRESHOLD,
            sr_min_gap_size: DEFAULT_SPARSE_READ_MIN_GAP_SIZE,
            try_pending: Default::default(),
            try_split: Default::default(),
            check_ambig: Default::default(),
            mmap_large_index: Default::default(),
            mmap_index_threshold: Default::default(),
            uncompressed_cache_factor: Default::default(),
            uncompressed_cache_count: Default::default(),
            with_sparse_read: Default::default(),
            general_delta: Default::default(),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
/// Holds configuration values about how new deltas are computed.
///
/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
/// each object self contained.
pub struct RevlogDeltaConfig {
    /// Whether deltas can be encoded against arbitrary bases
    pub general_delta: bool,
    /// Allow sparse writing of the revlog data
    pub sparse_revlog: bool,
    /// Maximum length of a delta chain
    pub max_chain_len: Option<u64>,
    /// Maximum distance between a delta chain's start and end
    pub max_deltachain_span: Option<u64>,
    /// If `upper_bound_comp` is not None, this is the expected maximal
    /// gain from compression for the data content
    pub upper_bound_comp: Option<f64>,
    /// Should we try a delta against both parents
    pub delta_both_parents: bool,
    /// Test delta base candidate groups by chunks of this maximal size
    pub candidate_group_chunk_size: u64,
    /// Should we display debug information about delta computation
    pub debug_delta: bool,
    /// Trust incoming deltas by default
    pub lazy_delta: bool,
    /// Trust the base of incoming deltas by default
    pub lazy_delta_base: bool,
}

impl RevlogDeltaConfig {
    pub fn new(
        config: &Config,
        requirements: &HashSet<String>,
        revlog_type: RevlogType,
    ) -> Result<Self, HgError> {
        let mut delta_config = Self {
            delta_both_parents: config
                .get_option_no_default(
                    b"storage",
                    b"revlog.optimize-delta-parent-choice",
                )?
                .unwrap_or(true),
            candidate_group_chunk_size: config
                .get_u64(
                    b"storage",
                    b"revlog.delta-parent-search.candidate-group-chunk-size",
                )?
                .unwrap_or_default(),
            ..Default::default()
        };

        delta_config.debug_delta =
            config.get_bool(b"debug", b"revlog.debug-delta")?;

        delta_config.general_delta =
            requirements.contains(GENERALDELTA_REQUIREMENT);

        let lazy_delta =
            config.get_bool(b"storage", b"revlog.reuse-external-delta")?;

        if revlog_type == RevlogType::Manifestlog {
            // upper bound of what we expect from compression
            // (real life value seems to be 3)
            delta_config.upper_bound_comp = Some(3.0)
        }

        let mut lazy_delta_base = false;
        if lazy_delta {
            lazy_delta_base = match config.get_option_no_default(
                b"storage",
                b"revlog.reuse-external-delta-parent",
            )? {
                Some(base) => base,
                None => config.get_bool(b"format", b"generaldelta")?,
            };
        }
        delta_config.lazy_delta = lazy_delta;
        delta_config.lazy_delta_base = lazy_delta_base;

        delta_config.max_deltachain_span =
            match config.get_i64(b"experimental", b"maxdeltachainspan")? {
                Some(span) => {
                    if span < 0 {
                        None
                    } else {
                        Some(span as u64)
                    }
                }
                None => None,
            };

        delta_config.sparse_revlog =
            requirements.contains(SPARSEREVLOG_REQUIREMENT);

        delta_config.max_chain_len =
            config.get_byte_size_no_default(b"format", b"maxchainlen")?;

        Ok(delta_config)
    }
}

impl Default for RevlogDeltaConfig {
    fn default() -> Self {
        Self {
            delta_both_parents: true,
            lazy_delta: true,
            general_delta: Default::default(),
            sparse_revlog: Default::default(),
            max_chain_len: Default::default(),
            max_deltachain_span: Default::default(),
            upper_bound_comp: Default::default(),
            candidate_group_chunk_size: Default::default(),
            debug_delta: Default::default(),
            lazy_delta_base: Default::default(),
        }
    }
}

#[derive(Debug, Default, Clone, Copy, PartialEq)]
/// Holds configuration values about the available revlog features
pub struct RevlogFeatureConfig {
    /// The compression engine and its options
    pub compression_engine: CompressionConfig,
    /// Can we use censor on this revlog
    pub censorable: bool,
    /// Does this revlog use the "side data" feature
    pub has_side_data: bool,
    /// Might remove this configuration once the rank computation has no
    /// impact
    pub compute_rank: bool,
    /// Parent order is supposed to be semantically irrelevant, so we
    /// normally re-sort parents to ensure that the first parent is non-null,
    /// if there is a non-null parent at all.
    /// filelog abuses the parent order as a flag to mark some instances of
    /// meta-encoded files, so allow it to disable this behavior.
    pub canonical_parent_order: bool,
    /// Can ellipsis commit be used
    pub enable_ellipsis: bool,
}

impl RevlogFeatureConfig {
    pub fn new(
        config: &Config,
        requirements: &HashSet<String>,
    ) -> Result<Self, HgError> {
        Ok(Self {
            compression_engine: CompressionConfig::new(config, requirements)?,
            enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
            ..Default::default()
        })
    }
}

/// Return the default options for a revlog of `revlog_type` according to the
/// current config and requirements.
pub fn default_revlog_options(
    config: &Config,
    requirements: &HashSet<String>,
    revlog_type: RevlogType,
) -> Result<RevlogOpenOptions, HgError> {
    let is_changelog = revlog_type == RevlogType::Changelog;
    let version =
        if is_changelog && requirements.contains(CHANGELOGV2_REQUIREMENT) {
            let compute_rank = config
                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
            RevlogVersionOptions::ChangelogV2 { compute_rank }
        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
            RevlogVersionOptions::V2
        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
            RevlogVersionOptions::V1 {
                general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
                inline: !is_changelog,
            }
        } else {
            RevlogVersionOptions::V0
        };
    Ok(RevlogOpenOptions {
        version,
        // We don't need to dance around the slow path like in the Python
        // implementation since we know we have access to the fast code.
        use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
        delta_config: RevlogDeltaConfig::new(
            config,
            requirements,
            revlog_type,
        )?,
        data_config: RevlogDataConfig::new(config, requirements)?,
        feature_config: RevlogFeatureConfig::new(config, requirements)?,
    })
}