rust-revlog: introduce an `options` module
This helps group all the relevant revlog options code and makes the `mod.rs`
more readable.
--- a/rust/hg-core/src/operations/debugdata.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/operations/debugdata.rs Wed Sep 25 18:24:15 2024 +0200
@@ -7,6 +7,7 @@
use crate::errors::HgError;
use crate::repo::Repo;
+use crate::revlog::options::default_revlog_options;
use crate::revlog::Revlog;
use crate::{exit_codes, RevlogError, RevlogType};
@@ -31,7 +32,11 @@
&repo.store_vfs(),
index_file,
None,
- repo.default_revlog_options(RevlogType::Changelog)?,
+ default_revlog_options(
+ repo.config(),
+ repo.requirements(),
+ RevlogType::Changelog,
+ )?,
)?;
let rev =
crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
--- a/rust/hg-core/src/repo.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/repo.rs Wed Sep 25 18:24:15 2024 +0200
@@ -10,11 +10,8 @@
use crate::errors::{HgError, IoResultExt};
use crate::lock::{try_with_lock_no_wait, LockError};
use crate::manifest::{Manifest, Manifestlog};
-use crate::requirements::{
- CHANGELOGV2_REQUIREMENT, DIRSTATE_TRACKED_HINT_V1,
- GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT,
- REVLOGV2_REQUIREMENT,
-};
+use crate::options::default_revlog_options;
+use crate::requirements::DIRSTATE_TRACKED_HINT_V1;
use crate::revlog::filelog::Filelog;
use crate::revlog::RevlogError;
use crate::utils::debug::debug_wait_for_file_or_print;
@@ -22,11 +19,10 @@
use crate::utils::hg_path::HgPath;
use crate::utils::SliceExt;
use crate::vfs::{is_dir, is_file, VfsImpl};
+use crate::DirstateError;
use crate::{
- exit_codes, requirements, NodePrefix, RevlogDataConfig, RevlogDeltaConfig,
- RevlogFeatureConfig, RevlogType, RevlogVersionOptions, UncheckedRevision,
+ exit_codes, requirements, NodePrefix, RevlogType, UncheckedRevision,
};
-use crate::{DirstateError, RevlogOpenOptions};
use std::cell::{Ref, RefCell, RefMut};
use std::collections::HashSet;
use std::io::Seek;
@@ -577,7 +573,11 @@
fn new_changelog(&self) -> Result<Changelog, HgError> {
Changelog::open(
&self.store_vfs(),
- self.default_revlog_options(RevlogType::Changelog)?,
+ default_revlog_options(
+ self.config(),
+ self.requirements(),
+ RevlogType::Changelog,
+ )?,
)
}
@@ -592,7 +592,11 @@
fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
Manifestlog::open(
&self.store_vfs(),
- self.default_revlog_options(RevlogType::Manifestlog)?,
+ default_revlog_options(
+ self.config(),
+ self.requirements(),
+ RevlogType::Manifestlog,
+ )?,
)
}
@@ -642,7 +646,11 @@
Filelog::open(
self,
path,
- self.default_revlog_options(RevlogType::Filelog)?,
+ default_revlog_options(
+ self.config(),
+ self.requirements(),
+ RevlogType::Filelog,
+ )?,
)
}
/// Write to disk any updates that were made through `dirstate_map_mut`.
@@ -792,50 +800,6 @@
Ok(())
}
- pub fn default_revlog_options(
- &self,
- revlog_type: RevlogType,
- ) -> Result<RevlogOpenOptions, HgError> {
- let requirements = self.requirements();
- let is_changelog = revlog_type == RevlogType::Changelog;
- let version = if is_changelog
- && requirements.contains(CHANGELOGV2_REQUIREMENT)
- {
- let compute_rank = self
- .config()
- .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
- RevlogVersionOptions::ChangelogV2 { compute_rank }
- } else if requirements.contains(REVLOGV2_REQUIREMENT) {
- RevlogVersionOptions::V2
- } else if requirements.contains(REVLOGV1_REQUIREMENT) {
- RevlogVersionOptions::V1 {
- general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
- inline: !is_changelog,
- }
- } else {
- RevlogVersionOptions::V0
- };
- Ok(RevlogOpenOptions {
- version,
- // We don't need to dance around the slow path like in the Python
- // implementation since we know we have access to the fast code.
- use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
- delta_config: RevlogDeltaConfig::new(
- self.config(),
- self.requirements(),
- revlog_type,
- )?,
- data_config: RevlogDataConfig::new(
- self.config(),
- self.requirements(),
- )?,
- feature_config: RevlogFeatureConfig::new(
- self.config(),
- requirements,
- )?,
- })
- }
-
pub fn node(&self, rev: UncheckedRevision) -> Option<crate::Node> {
self.changelog()
.ok()
--- a/rust/hg-core/src/revlog/changelog.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/changelog.rs Wed Sep 25 18:24:15 2024 +0200
@@ -14,7 +14,9 @@
use crate::revlog::{Revlog, RevlogEntry, RevlogError};
use crate::utils::hg_path::HgPath;
use crate::vfs::VfsImpl;
-use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
+use crate::{Graph, GraphError, UncheckedRevision};
+
+use super::options::RevlogOpenOptions;
/// A specialized `Revlog` to work with changelog data format.
pub struct Changelog {
@@ -504,10 +506,7 @@
mod tests {
use super::*;
use crate::vfs::VfsImpl;
- use crate::{
- RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
- NULL_REVISION,
- };
+ use crate::NULL_REVISION;
use pretty_assertions::assert_eq;
#[test]
@@ -571,18 +570,9 @@
};
std::fs::write(temp.path().join("foo.i"), b"").unwrap();
std::fs::write(temp.path().join("foo.d"), b"").unwrap();
- let revlog = Revlog::open(
- &vfs,
- "foo.i",
- None,
- RevlogOpenOptions::new(
- false,
- RevlogDataConfig::default(),
- RevlogDeltaConfig::default(),
- RevlogFeatureConfig::default(),
- ),
- )
- .unwrap();
+ let revlog =
+ Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::default())
+ .unwrap();
let changelog = Changelog { revlog };
assert_eq!(
--- a/rust/hg-core/src/revlog/filelog.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/filelog.rs Wed Sep 25 18:24:15 2024 +0200
@@ -11,10 +11,11 @@
use crate::utils::SliceExt;
use crate::Graph;
use crate::GraphError;
-use crate::RevlogOpenOptions;
use crate::UncheckedRevision;
use std::path::PathBuf;
+use super::options::RevlogOpenOptions;
+
/// A specialized `Revlog` to work with file data logs.
pub struct Filelog {
/// The generic `revlog` format.
--- a/rust/hg-core/src/revlog/manifest.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/manifest.rs Wed Sep 25 18:24:15 2024 +0200
@@ -6,9 +6,9 @@
use crate::utils::hg_path::HgPath;
use crate::utils::SliceExt;
use crate::vfs::VfsImpl;
-use crate::{
- Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision,
-};
+use crate::{Graph, GraphError, Revision, UncheckedRevision};
+
+use super::options::RevlogOpenOptions;
/// A specialized `Revlog` to work with `manifest` data format.
pub struct Manifestlog {
--- a/rust/hg-core/src/revlog/mod.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/revlog/mod.rs Wed Sep 25 18:24:15 2024 +0200
@@ -9,18 +9,19 @@
pub mod nodemap;
mod nodemap_docket;
pub mod path_encode;
-use compression::{uncompressed_zstd_data, CompressionConfig};
+use compression::uncompressed_zstd_data;
pub use node::{FromHexError, Node, NodePrefix};
+use options::RevlogOpenOptions;
pub mod changelog;
pub mod compression;
pub mod file_io;
pub mod filelog;
pub mod index;
pub mod manifest;
+pub mod options;
pub mod patch;
use std::borrow::Cow;
-use std::collections::HashSet;
use std::io::Read;
use std::ops::Deref;
use std::path::Path;
@@ -33,12 +34,8 @@
use super::index::Index;
use super::index::INDEX_ENTRY_SIZE;
use super::nodemap::{NodeMap, NodeMapError};
-use crate::config::{Config, ResourceProfileValue};
use crate::errors::HgError;
use crate::exit_codes;
-use crate::requirements::{
- GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT, SPARSEREVLOG_REQUIREMENT,
-};
use crate::vfs::VfsImpl;
/// As noted in revlog.c, revision numbers are actually encoded in
@@ -259,255 +256,6 @@
}
}
-#[derive(Debug, Clone, Copy, PartialEq)]
-/// Holds configuration values about how the revlog data is read
-pub struct RevlogDataConfig {
- /// Should we try to open the "pending" version of the revlog
- pub try_pending: bool,
- /// Should we try to open the "split" version of the revlog
- pub try_split: bool,
- /// When True, `indexfile` should be opened with `checkambig=True` at
- /// writing time, to avoid file stat ambiguity
- pub check_ambig: bool,
- /// If true, use mmap instead of reading to deal with large indexes
- pub mmap_large_index: bool,
- /// How much data is considered large
- pub mmap_index_threshold: Option<u64>,
- /// How much data to read and cache into the raw revlog data cache
- pub chunk_cache_size: u64,
- /// The size of the uncompressed cache compared to the largest revision
- /// seen
- pub uncompressed_cache_factor: Option<f64>,
- /// The number of chunks cached
- pub uncompressed_cache_count: Option<u64>,
- /// Allow sparse reading of the revlog data
- pub with_sparse_read: bool,
- /// Minimal density of a sparse read chunk
- pub sr_density_threshold: f64,
- /// Minimal size of the data we skip when performing sparse reads
- pub sr_min_gap_size: u64,
- /// Whether deltas are encoded against arbitrary bases
- pub general_delta: bool,
-}
-
-impl RevlogDataConfig {
- pub fn new(
- config: &Config,
- requirements: &HashSet<String>,
- ) -> Result<Self, HgError> {
- let mut data_config = Self::default();
- if let Some(chunk_cache_size) =
- config.get_byte_size(b"format", b"chunkcachesize")?
- {
- data_config.chunk_cache_size = chunk_cache_size;
- }
-
- let memory_profile = config.get_resource_profile(Some("memory"));
- if memory_profile.value >= ResourceProfileValue::Medium {
- data_config.uncompressed_cache_count = Some(10_000);
- data_config.uncompressed_cache_factor = Some(4.0);
- if memory_profile.value >= ResourceProfileValue::High {
- data_config.uncompressed_cache_factor = Some(10.0)
- }
- }
-
- if let Some(mmap_index_threshold) = config
- .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
- {
- data_config.mmap_index_threshold = Some(mmap_index_threshold);
- }
-
- let with_sparse_read =
- config.get_bool(b"experimental", b"sparse-read")?;
- if let Some(sr_density_threshold) = config
- .get_f64(b"experimental", b"sparse-read.density-threshold")?
- {
- data_config.sr_density_threshold = sr_density_threshold;
- }
- data_config.with_sparse_read = with_sparse_read;
- if let Some(sr_min_gap_size) = config
- .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
- {
- data_config.sr_min_gap_size = sr_min_gap_size;
- }
-
- data_config.with_sparse_read =
- requirements.contains(SPARSEREVLOG_REQUIREMENT);
-
- Ok(data_config)
- }
-}
-
-impl Default for RevlogDataConfig {
- fn default() -> Self {
- Self {
- chunk_cache_size: 65536,
- sr_density_threshold: 0.50,
- sr_min_gap_size: 262144,
- try_pending: Default::default(),
- try_split: Default::default(),
- check_ambig: Default::default(),
- mmap_large_index: Default::default(),
- mmap_index_threshold: Default::default(),
- uncompressed_cache_factor: Default::default(),
- uncompressed_cache_count: Default::default(),
- with_sparse_read: Default::default(),
- general_delta: Default::default(),
- }
- }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq)]
-/// Holds configuration values about how new deltas are computed.
-///
-/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
-/// each object self contained.
-pub struct RevlogDeltaConfig {
- /// Whether deltas can be encoded against arbitrary bases
- pub general_delta: bool,
- /// Allow sparse writing of the revlog data
- pub sparse_revlog: bool,
- /// Maximum length of a delta chain
- pub max_chain_len: Option<u64>,
- /// Maximum distance between a delta chain's start and end
- pub max_deltachain_span: Option<u64>,
- /// If `upper_bound_comp` is not None, this is the expected maximal
- /// gain from compression for the data content
- pub upper_bound_comp: Option<f64>,
- /// Should we try a delta against both parents
- pub delta_both_parents: bool,
- /// Test delta base candidate groups by chunks of this maximal size
- pub candidate_group_chunk_size: u64,
- /// Should we display debug information about delta computation
- pub debug_delta: bool,
- /// Trust incoming deltas by default
- pub lazy_delta: bool,
- /// Trust the base of incoming deltas by default
- pub lazy_delta_base: bool,
-}
-impl RevlogDeltaConfig {
- pub fn new(
- config: &Config,
- requirements: &HashSet<String>,
- revlog_type: RevlogType,
- ) -> Result<Self, HgError> {
- let mut delta_config = Self {
- delta_both_parents: config
- .get_option_no_default(
- b"storage",
- b"revlog.optimize-delta-parent-choice",
- )?
- .unwrap_or(true),
- candidate_group_chunk_size: config
- .get_u64(
- b"storage",
- b"revlog.delta-parent-search.candidate-group-chunk-size",
- )?
- .unwrap_or_default(),
- ..Default::default()
- };
-
- delta_config.debug_delta =
- config.get_bool(b"debug", b"revlog.debug-delta")?;
-
- delta_config.general_delta =
- requirements.contains(GENERALDELTA_REQUIREMENT);
-
- let lazy_delta =
- config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
-
- if revlog_type == RevlogType::Manifestlog {
- // upper bound of what we expect from compression
- // (real life value seems to be 3)
- delta_config.upper_bound_comp = Some(3.0)
- }
-
- let mut lazy_delta_base = false;
- if lazy_delta {
- lazy_delta_base = match config.get_option_no_default(
- b"storage",
- b"revlog.reuse-external-delta-parent",
- )? {
- Some(base) => base,
- None => config.get_bool(b"format", b"generaldelta")?,
- };
- }
- delta_config.lazy_delta = lazy_delta;
- delta_config.lazy_delta_base = lazy_delta_base;
-
- delta_config.max_deltachain_span =
- match config.get_i64(b"experimental", b"maxdeltachainspan")? {
- Some(span) => {
- if span < 0 {
- None
- } else {
- Some(span as u64)
- }
- }
- None => None,
- };
-
- delta_config.sparse_revlog =
- requirements.contains(SPARSEREVLOG_REQUIREMENT);
-
- delta_config.max_chain_len =
- config.get_byte_size_no_default(b"format", b"maxchainlen")?;
-
- Ok(delta_config)
- }
-}
-
-impl Default for RevlogDeltaConfig {
- fn default() -> Self {
- Self {
- delta_both_parents: true,
- lazy_delta: true,
- general_delta: Default::default(),
- sparse_revlog: Default::default(),
- max_chain_len: Default::default(),
- max_deltachain_span: Default::default(),
- upper_bound_comp: Default::default(),
- candidate_group_chunk_size: Default::default(),
- debug_delta: Default::default(),
- lazy_delta_base: Default::default(),
- }
- }
-}
-
-#[derive(Debug, Default, Clone, Copy, PartialEq)]
-/// Holds configuration values about the available revlog features
-pub struct RevlogFeatureConfig {
- /// The compression engine and its options
- pub compression_engine: CompressionConfig,
- /// Can we use censor on this revlog
- pub censorable: bool,
- /// Does this revlog use the "side data" feature
- pub has_side_data: bool,
- /// Might remove this configuration once the rank computation has no
- /// impact
- pub compute_rank: bool,
- /// Parent order is supposed to be semantically irrelevant, so we
- /// normally re-sort parents to ensure that the first parent is non-null,
- /// if there is a non-null parent at all.
- /// filelog abuses the parent order as a flag to mark some instances of
- /// meta-encoded files, so allow it to disable this behavior.
- pub canonical_parent_order: bool,
- /// Can ellipsis commit be used
- pub enable_ellipsis: bool,
-}
-impl RevlogFeatureConfig {
- pub fn new(
- config: &Config,
- requirements: &HashSet<String>,
- ) -> Result<Self, HgError> {
- Ok(Self {
- compression_engine: CompressionConfig::new(config, requirements)?,
- enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
- ..Default::default()
- })
- }
-}
-
/// Read only implementation of revlog.
pub struct Revlog {
/// When index and data are not interleaved: bytes of the revlog index.
@@ -526,90 +274,6 @@
}
}
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum RevlogVersionOptions {
- V0,
- V1 { general_delta: bool, inline: bool },
- V2,
- ChangelogV2 { compute_rank: bool },
-}
-
-/// Options to govern how a revlog should be opened, usually from the
-/// repository configuration or requirements.
-#[derive(Debug, Copy, Clone)]
-pub struct RevlogOpenOptions {
- /// The revlog version, along with any option specific to this version
- pub version: RevlogVersionOptions,
- /// Whether the revlog uses a persistent nodemap.
- pub use_nodemap: bool,
- pub delta_config: RevlogDeltaConfig,
- pub data_config: RevlogDataConfig,
- pub feature_config: RevlogFeatureConfig,
-}
-
-#[cfg(test)]
-impl Default for RevlogOpenOptions {
- fn default() -> Self {
- Self {
- version: RevlogVersionOptions::V1 {
- general_delta: true,
- inline: false,
- },
- use_nodemap: true,
- data_config: Default::default(),
- delta_config: Default::default(),
- feature_config: Default::default(),
- }
- }
-}
-
-impl RevlogOpenOptions {
- pub fn new(
- inline: bool,
- data_config: RevlogDataConfig,
- delta_config: RevlogDeltaConfig,
- feature_config: RevlogFeatureConfig,
- ) -> Self {
- Self {
- version: RevlogVersionOptions::V1 {
- general_delta: data_config.general_delta,
- inline,
- },
- use_nodemap: false,
- data_config,
- delta_config,
- feature_config,
- }
- }
-
- pub fn index_header(&self) -> index::IndexHeader {
- index::IndexHeader {
- header_bytes: match self.version {
- RevlogVersionOptions::V0 => [0, 0, 0, 0],
- RevlogVersionOptions::V1 {
- general_delta,
- inline,
- } => [
- 0,
- if general_delta && inline {
- 3
- } else if general_delta {
- 2
- } else {
- u8::from(inline)
- },
- 0,
- 1,
- ],
- RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
- RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
- 0xD34Du32.to_be_bytes()
- }
- },
- }
- }
-}
-
impl Revlog {
/// Open a revlog index file.
///
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/options.rs Wed Sep 25 18:24:15 2024 +0200
@@ -0,0 +1,393 @@
+//! Helpers for the revlog config and opening options
+
+use std::collections::HashSet;
+
+use crate::{
+ config::{Config, ResourceProfileValue},
+ errors::HgError,
+ requirements::{
+ CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NARROW_REQUIREMENT,
+ NODEMAP_REQUIREMENT, REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
+ SPARSEREVLOG_REQUIREMENT,
+ },
+};
+
+use super::{compression::CompressionConfig, RevlogType};
+
+const DEFAULT_CHUNK_CACHE_SIZE: u64 = 65536;
+const DEFAULT_SPARSE_READ_DENSITY_THRESHOLD: f64 = 0.50;
+const DEFAULT_SPARSE_READ_MIN_GAP_SIZE: u64 = 262144;
+
+/// The known revlog versions and their options
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum RevlogVersionOptions {
+ V0,
+ V1 { general_delta: bool, inline: bool },
+ V2,
+ ChangelogV2 { compute_rank: bool },
+}
+
+/// Options to govern how a revlog should be opened, usually from the
+/// repository configuration or requirements.
+#[derive(Debug, Copy, Clone)]
+pub struct RevlogOpenOptions {
+ /// The revlog version, along with any option specific to this version
+ pub version: RevlogVersionOptions,
+ /// Whether the revlog uses a persistent nodemap.
+ pub use_nodemap: bool,
+ pub delta_config: RevlogDeltaConfig,
+ pub data_config: RevlogDataConfig,
+ pub feature_config: RevlogFeatureConfig,
+}
+
+#[cfg(test)]
+impl Default for RevlogOpenOptions {
+ fn default() -> Self {
+ Self {
+ version: RevlogVersionOptions::V1 {
+ general_delta: true,
+ inline: false,
+ },
+ use_nodemap: true,
+ data_config: Default::default(),
+ delta_config: Default::default(),
+ feature_config: Default::default(),
+ }
+ }
+}
+
+impl RevlogOpenOptions {
+ pub fn new(
+ inline: bool,
+ data_config: RevlogDataConfig,
+ delta_config: RevlogDeltaConfig,
+ feature_config: RevlogFeatureConfig,
+ ) -> Self {
+ Self {
+ version: RevlogVersionOptions::V1 {
+ general_delta: data_config.general_delta,
+ inline,
+ },
+ use_nodemap: false,
+ data_config,
+ delta_config,
+ feature_config,
+ }
+ }
+
+ pub fn index_header(&self) -> super::index::IndexHeader {
+ super::index::IndexHeader {
+ header_bytes: match self.version {
+ RevlogVersionOptions::V0 => [0, 0, 0, 0],
+ RevlogVersionOptions::V1 {
+ general_delta,
+ inline,
+ } => [
+ 0,
+ if general_delta && inline {
+ 3
+ } else if general_delta {
+ 2
+ } else {
+ u8::from(inline)
+ },
+ 0,
+ 1,
+ ],
+ RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
+ RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
+ 0xD34Du32.to_be_bytes()
+ }
+ },
+ }
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how the revlog data is read
+pub struct RevlogDataConfig {
+ /// Should we try to open the "pending" version of the revlog
+ pub try_pending: bool,
+ /// Should we try to open the "split" version of the revlog
+ pub try_split: bool,
+ /// When True, `indexfile` should be opened with `checkambig=True` at
+ /// writing time, to avoid file stat ambiguity
+ pub check_ambig: bool,
+ /// If true, use mmap instead of reading to deal with large indexes
+ pub mmap_large_index: bool,
+ /// How much data is considered large
+ pub mmap_index_threshold: Option<u64>,
+ /// How much data to read and cache into the raw revlog data cache
+ pub chunk_cache_size: u64,
+ /// The size of the uncompressed cache compared to the largest revision
+ /// seen
+ pub uncompressed_cache_factor: Option<f64>,
+ /// The number of chunks cached
+ pub uncompressed_cache_count: Option<u64>,
+ /// Allow sparse reading of the revlog data
+ pub with_sparse_read: bool,
+ /// Minimal density of a sparse read chunk
+ pub sr_density_threshold: f64,
+ /// Minimal size of the data we skip when performing sparse reads
+ pub sr_min_gap_size: u64,
+ /// Whether deltas are encoded against arbitrary bases
+ pub general_delta: bool,
+}
+
+impl RevlogDataConfig {
+ pub fn new(
+ config: &Config,
+ requirements: &HashSet<String>,
+ ) -> Result<Self, HgError> {
+ let mut data_config = Self::default();
+ if let Some(chunk_cache_size) =
+ config.get_byte_size(b"format", b"chunkcachesize")?
+ {
+ data_config.chunk_cache_size = chunk_cache_size;
+ }
+
+ let memory_profile = config.get_resource_profile(Some("memory"));
+ if memory_profile.value >= ResourceProfileValue::Medium {
+ data_config.uncompressed_cache_count = Some(10_000);
+ data_config.uncompressed_cache_factor = Some(4.0);
+ if memory_profile.value >= ResourceProfileValue::High {
+ data_config.uncompressed_cache_factor = Some(10.0)
+ }
+ }
+
+ if let Some(mmap_index_threshold) = config
+ .get_byte_size(b"storage", b"revlog.mmap.index:size-threshold")?
+ {
+ data_config.mmap_index_threshold = Some(mmap_index_threshold);
+ }
+
+ let with_sparse_read =
+ config.get_bool(b"experimental", b"sparse-read")?;
+ if let Some(sr_density_threshold) = config
+ .get_f64(b"experimental", b"sparse-read.density-threshold")?
+ {
+ data_config.sr_density_threshold = sr_density_threshold;
+ }
+ data_config.with_sparse_read = with_sparse_read;
+ if let Some(sr_min_gap_size) = config
+ .get_byte_size(b"experimental", b"sparse-read.min-gap-size")?
+ {
+ data_config.sr_min_gap_size = sr_min_gap_size;
+ }
+
+ data_config.with_sparse_read =
+ requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+ Ok(data_config)
+ }
+}
+
+impl Default for RevlogDataConfig {
+ fn default() -> Self {
+ Self {
+ chunk_cache_size: DEFAULT_CHUNK_CACHE_SIZE,
+ sr_density_threshold: DEFAULT_SPARSE_READ_DENSITY_THRESHOLD,
+ sr_min_gap_size: DEFAULT_SPARSE_READ_MIN_GAP_SIZE,
+ try_pending: Default::default(),
+ try_split: Default::default(),
+ check_ambig: Default::default(),
+ mmap_large_index: Default::default(),
+ mmap_index_threshold: Default::default(),
+ uncompressed_cache_factor: Default::default(),
+ uncompressed_cache_count: Default::default(),
+ with_sparse_read: Default::default(),
+ general_delta: Default::default(),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+/// Holds configuration values about how new deltas are computed.
+///
+/// Some attributes are duplicated from [`RevlogDataConfig`] to help having
+/// each object self contained.
+pub struct RevlogDeltaConfig {
+ /// Whether deltas can be encoded against arbitrary bases
+ pub general_delta: bool,
+ /// Allow sparse writing of the revlog data
+ pub sparse_revlog: bool,
+ /// Maximum length of a delta chain
+ pub max_chain_len: Option<u64>,
+ /// Maximum distance between a delta chain's start and end
+ pub max_deltachain_span: Option<u64>,
+ /// If `upper_bound_comp` is not None, this is the expected maximal
+ /// gain from compression for the data content
+ pub upper_bound_comp: Option<f64>,
+ /// Should we try a delta against both parents
+ pub delta_both_parents: bool,
+ /// Test delta base candidate groups by chunks of this maximal size
+ pub candidate_group_chunk_size: u64,
+ /// Should we display debug information about delta computation
+ pub debug_delta: bool,
+ /// Trust incoming deltas by default
+ pub lazy_delta: bool,
+ /// Trust the base of incoming deltas by default
+ pub lazy_delta_base: bool,
+}
+
+impl RevlogDeltaConfig {
+ pub fn new(
+ config: &Config,
+ requirements: &HashSet<String>,
+ revlog_type: RevlogType,
+ ) -> Result<Self, HgError> {
+ let mut delta_config = Self {
+ delta_both_parents: config
+ .get_option_no_default(
+ b"storage",
+ b"revlog.optimize-delta-parent-choice",
+ )?
+ .unwrap_or(true),
+ candidate_group_chunk_size: config
+ .get_u64(
+ b"storage",
+ b"revlog.delta-parent-search.candidate-group-chunk-size",
+ )?
+ .unwrap_or_default(),
+ ..Default::default()
+ };
+
+ delta_config.debug_delta =
+ config.get_bool(b"debug", b"revlog.debug-delta")?;
+
+ delta_config.general_delta =
+ requirements.contains(GENERALDELTA_REQUIREMENT);
+
+ let lazy_delta =
+ config.get_bool(b"storage", b"revlog.reuse-external-delta")?;
+
+ if revlog_type == RevlogType::Manifestlog {
+ // upper bound of what we expect from compression
+ // (real life value seems to be 3)
+ delta_config.upper_bound_comp = Some(3.0)
+ }
+
+ let mut lazy_delta_base = false;
+ if lazy_delta {
+ lazy_delta_base = match config.get_option_no_default(
+ b"storage",
+ b"revlog.reuse-external-delta-parent",
+ )? {
+ Some(base) => base,
+ None => config.get_bool(b"format", b"generaldelta")?,
+ };
+ }
+ delta_config.lazy_delta = lazy_delta;
+ delta_config.lazy_delta_base = lazy_delta_base;
+
+ delta_config.max_deltachain_span =
+ match config.get_i64(b"experimental", b"maxdeltachainspan")? {
+ Some(span) => {
+ if span < 0 {
+ None
+ } else {
+ Some(span as u64)
+ }
+ }
+ None => None,
+ };
+
+ delta_config.sparse_revlog =
+ requirements.contains(SPARSEREVLOG_REQUIREMENT);
+
+ delta_config.max_chain_len =
+ config.get_byte_size_no_default(b"format", b"maxchainlen")?;
+
+ Ok(delta_config)
+ }
+}
+
+impl Default for RevlogDeltaConfig {
+ fn default() -> Self {
+ Self {
+ delta_both_parents: true,
+ lazy_delta: true,
+ general_delta: Default::default(),
+ sparse_revlog: Default::default(),
+ max_chain_len: Default::default(),
+ max_deltachain_span: Default::default(),
+ upper_bound_comp: Default::default(),
+ candidate_group_chunk_size: Default::default(),
+ debug_delta: Default::default(),
+ lazy_delta_base: Default::default(),
+ }
+ }
+}
+
+#[derive(Debug, Default, Clone, Copy, PartialEq)]
+/// Holds configuration values about the available revlog features
+pub struct RevlogFeatureConfig {
+ /// The compression engine and its options
+ pub compression_engine: CompressionConfig,
+ /// Can we use censor on this revlog
+ pub censorable: bool,
+ /// Does this revlog use the "side data" feature
+ pub has_side_data: bool,
+ /// Might remove this configuration once the rank computation has no
+ /// impact
+ pub compute_rank: bool,
+ /// Parent order is supposed to be semantically irrelevant, so we
+ /// normally re-sort parents to ensure that the first parent is non-null,
+ /// if there is a non-null parent at all.
+ /// filelog abuses the parent order as a flag to mark some instances of
+ /// meta-encoded files, so allow it to disable this behavior.
+ pub canonical_parent_order: bool,
+ /// Can ellipsis commit be used
+ pub enable_ellipsis: bool,
+}
+
+impl RevlogFeatureConfig {
+ pub fn new(
+ config: &Config,
+ requirements: &HashSet<String>,
+ ) -> Result<Self, HgError> {
+ Ok(Self {
+ compression_engine: CompressionConfig::new(config, requirements)?,
+ enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
+ ..Default::default()
+ })
+ }
+}
+
+/// Return the default options for a revlog of `revlog_type` according to the
+/// current config and requirements.
+pub fn default_revlog_options(
+ config: &Config,
+ requirements: &HashSet<String>,
+ revlog_type: RevlogType,
+) -> Result<RevlogOpenOptions, HgError> {
+ let is_changelog = revlog_type == RevlogType::Changelog;
+ let version =
+ if is_changelog && requirements.contains(CHANGELOGV2_REQUIREMENT) {
+ let compute_rank = config
+ .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
+ RevlogVersionOptions::ChangelogV2 { compute_rank }
+ } else if requirements.contains(REVLOGV2_REQUIREMENT) {
+ RevlogVersionOptions::V2
+ } else if requirements.contains(REVLOGV1_REQUIREMENT) {
+ RevlogVersionOptions::V1 {
+ general_delta: requirements.contains(GENERALDELTA_REQUIREMENT),
+ inline: !is_changelog,
+ }
+ } else {
+ RevlogVersionOptions::V0
+ };
+ Ok(RevlogOpenOptions {
+ version,
+ // We don't need to dance around the slow path like in the Python
+ // implementation since we know we have access to the fast code.
+ use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
+ delta_config: RevlogDeltaConfig::new(
+ config,
+ requirements,
+ revlog_type,
+ )?,
+ data_config: RevlogDataConfig::new(config, requirements)?,
+ feature_config: RevlogFeatureConfig::new(config, requirements)?,
+ })
+}
--- a/rust/hg-core/src/update.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/hg-core/src/update.rs Wed Sep 25 18:24:15 2024 +0200
@@ -19,6 +19,7 @@
narrow,
node::NULL_NODE,
operations::{list_rev_tracked_files, ExpandedManifestEntry},
+ options::{default_revlog_options, RevlogOpenOptions},
progress::Progress,
repo::Repo,
sparse,
@@ -28,7 +29,7 @@
path_auditor::PathAuditor,
},
vfs::{is_on_nfs_mount, VfsImpl},
- DirstateParents, RevlogError, RevlogOpenOptions, UncheckedRevision,
+ DirstateParents, RevlogError, UncheckedRevision,
};
use crossbeam_channel::{Receiver, Sender};
use rayon::prelude::*;
@@ -89,7 +90,11 @@
return Ok(0);
}
let store_vfs = &repo.store_vfs();
- let options = repo.default_revlog_options(crate::RevlogType::Filelog)?;
+ let options = default_revlog_options(
+ repo.config(),
+ repo.requirements(),
+ crate::RevlogType::Filelog,
+ )?;
let (errors_sender, errors_receiver) = crossbeam_channel::unbounded();
let (files_sender, files_receiver) = crossbeam_channel::unbounded();
let working_directory_path = &repo.working_directory_path();
--- a/rust/rhg/src/commands/status.rs Wed Sep 25 18:10:03 2024 +0200
+++ b/rust/rhg/src/commands/status.rs Wed Sep 25 18:24:15 2024 +0200
@@ -23,16 +23,17 @@
use hg::manifest::Manifest;
use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
use hg::repo::Repo;
+use hg::revlog::options::{default_revlog_options, RevlogOpenOptions};
use hg::utils::debug::debug_wait_for_file;
use hg::utils::files::{
get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
};
use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
+use hg::DirstateStatus;
use hg::Revision;
use hg::StatusError;
use hg::StatusOptions;
use hg::{self, narrow, sparse};
-use hg::{DirstateStatus, RevlogOpenOptions};
use hg::{PatternFileWarning, RevlogType};
use log::info;
use rayon::prelude::*;
@@ -383,8 +384,11 @@
})?;
let working_directory_vfs = repo.working_directory_vfs();
let store_vfs = repo.store_vfs();
- let revlog_open_options =
- repo.default_revlog_options(RevlogType::Manifestlog)?;
+ let revlog_open_options = default_revlog_options(
+ repo.config(),
+ repo.requirements(),
+ RevlogType::Manifestlog,
+ )?;
let res: Vec<_> = take(&mut ds_status.unsure)
.into_par_iter()
.map(|to_check| {