diff rust/hg-core/src/revlog/mod.rs @ 52154:0744248cc541

rust-revlog: add compression helpers This will be used in the upcoming `InnerRevlog` when reading/writing data.
author Rapha?l Gom?s <rgomes@octobus.net>
date Wed, 25 Sep 2024 16:42:21 +0200
parents 8b7123c8947b
children 426696af24d3
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/mod.rs	Tue Oct 29 09:38:48 2024 +0100
+++ b/rust/hg-core/src/revlog/mod.rs	Wed Sep 25 16:42:21 2024 +0200
@@ -9,8 +9,10 @@
 pub mod nodemap;
 mod nodemap_docket;
 pub mod path_encode;
+use compression::{uncompressed_zstd_data, CompressionConfig};
 pub use node::{FromHexError, Node, NodePrefix};
 pub mod changelog;
+pub mod compression;
 pub mod filelog;
 pub mod index;
 pub mod manifest;
@@ -24,8 +26,6 @@
 
 use flate2::read::ZlibDecoder;
 use sha1::{Digest, Sha1};
-use std::cell::RefCell;
-use zstd;
 
 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
 use self::nodemap_docket::NodeMapDocket;
@@ -258,75 +258,6 @@
     }
 }
 
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum CompressionEngine {
-    Zlib {
-        /// Between 0 and 9 included
-        level: u32,
-    },
-    Zstd {
-        /// Between 0 and 22 included
-        level: u32,
-        /// Never used in practice for now
-        threads: u32,
-    },
-    /// No compression is performed
-    None,
-}
-impl CompressionEngine {
-    pub fn set_level(&mut self, new_level: usize) -> Result<(), HgError> {
-        match self {
-            CompressionEngine::Zlib { level } => {
-                if new_level > 9 {
-                    return Err(HgError::abort(
-                        format!(
-                            "invalid compression zlib compression level {}",
-                            new_level
-                        ),
-                        exit_codes::ABORT,
-                        None,
-                    ));
-                }
-                *level = new_level as u32;
-            }
-            CompressionEngine::Zstd { level, .. } => {
-                if new_level > 22 {
-                    return Err(HgError::abort(
-                        format!(
-                            "invalid compression zstd compression level {}",
-                            new_level
-                        ),
-                        exit_codes::ABORT,
-                        None,
-                    ));
-                }
-                *level = new_level as u32;
-            }
-            CompressionEngine::None => {}
-        }
-        Ok(())
-    }
-
-    pub fn zstd(
-        zstd_level: Option<u32>,
-    ) -> Result<CompressionEngine, HgError> {
-        let mut engine = CompressionEngine::Zstd {
-            level: 3,
-            threads: 0,
-        };
-        if let Some(level) = zstd_level {
-            engine.set_level(level as usize)?;
-        }
-        Ok(engine)
-    }
-}
-
-impl Default for CompressionEngine {
-    fn default() -> Self {
-        Self::Zlib { level: 6 }
-    }
-}
-
 #[derive(Debug, Clone, Copy, PartialEq)]
 /// Holds configuration values about how the revlog data is read
 pub struct RevlogDataConfig {
@@ -546,7 +477,7 @@
 /// Holds configuration values about the available revlog features
 pub struct RevlogFeatureConfig {
     /// The compression engine and its options
-    pub compression_engine: CompressionEngine,
+    pub compression_engine: CompressionConfig,
     /// Can we use censor on this revlog
     pub censorable: bool,
     /// Does this revlog use the "side data" feature
@@ -568,46 +499,11 @@
         config: &Config,
         requirements: &HashSet<String>,
     ) -> Result<Self, HgError> {
-        let mut feature_config = Self::default();
-
-        let zlib_level = config.get_u32(b"storage", b"revlog.zlib.level")?;
-        let zstd_level = config.get_u32(b"storage", b"revlog.zstd.level")?;
-
-        feature_config.compression_engine = CompressionEngine::default();
-
-        for requirement in requirements {
-            if requirement.starts_with("revlog-compression-")
-                || requirement.starts_with("exp-compression-")
-            {
-                let split = &mut requirement.splitn(3, '-');
-                split.next();
-                split.next();
-                feature_config.compression_engine = match split.next().unwrap()
-                {
-                    "zstd" => CompressionEngine::zstd(zstd_level)?,
-                    e => {
-                        return Err(HgError::UnsupportedFeature(format!(
-                            "Unsupported compression engine '{e}'"
-                        )))
-                    }
-                };
-            }
-        }
-        if let Some(level) = zlib_level {
-            if matches!(
-                feature_config.compression_engine,
-                CompressionEngine::Zlib { .. }
-            ) {
-                feature_config
-                    .compression_engine
-                    .set_level(level as usize)?;
-            }
-        }
-
-        feature_config.enable_ellipsis =
-            requirements.contains(NARROW_REQUIREMENT);
-
-        Ok(feature_config)
+        Ok(Self {
+            compression_engine: CompressionConfig::new(config, requirements)?,
+            enable_ellipsis: requirements.contains(NARROW_REQUIREMENT),
+            ..Default::default()
+        })
     }
 }
 
@@ -1058,21 +954,6 @@
     hash: Node,
 }
 
-thread_local! {
-  // seems fine to [unwrap] here: this can only fail due to memory allocation
-  // failing, and it's normal for that to cause panic.
-  static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
-      RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
-}
-
-fn zstd_decompress_to_buffer(
-    bytes: &[u8],
-    buf: &mut Vec<u8>,
-) -> Result<usize, std::io::Error> {
-    ZSTD_DECODER
-        .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
-}
-
 impl<'revlog> RevlogEntry<'revlog> {
     pub fn revision(&self) -> Revision {
         self.rev
@@ -1218,7 +1099,11 @@
             // zlib (RFC 1950) data.
             b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
             // zstd data.
-            b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
+            b'\x28' => Ok(Cow::Owned(uncompressed_zstd_data(
+                self.bytes,
+                self.is_delta(),
+                self.uncompressed_len.max(0),
+            )?)),
             // A proper new format should have had a repo/store requirement.
             format_type => Err(corrupted(format!(
                 "unknown compression header '{}'",
@@ -1245,38 +1130,6 @@
         }
     }
 
-    fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
-        let cap = self.uncompressed_len.max(0) as usize;
-        if self.is_delta() {
-            // [cap] is usually an over-estimate of the space needed because
-            // it's the length of delta-decoded data, but we're interested
-            // in the size of the delta.
-            // This means we have to [shrink_to_fit] to avoid holding on
-            // to a large chunk of memory, but it also means we must have a
-            // fallback branch, for the case when the delta is longer than
-            // the original data (surprisingly, this does happen in practice)
-            let mut buf = Vec::with_capacity(cap);
-            match zstd_decompress_to_buffer(self.bytes, &mut buf) {
-                Ok(_) => buf.shrink_to_fit(),
-                Err(_) => {
-                    buf.clear();
-                    zstd::stream::copy_decode(self.bytes, &mut buf)
-                        .map_err(|e| corrupted(e.to_string()))?;
-                }
-            };
-            Ok(buf)
-        } else {
-            let mut buf = Vec::with_capacity(cap);
-            let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
-            if len != self.uncompressed_len as usize {
-                Err(corrupted("uncompressed length does not match"))
-            } else {
-                Ok(buf)
-            }
-        }
-    }
-
     /// Tell if the entry is a snapshot or a delta
     /// (influences on decompression).
     fn is_delta(&self) -> bool {