changeset 52761:8497cfb0d76c

rust-manifest: add Manifestlog::inexact_data_delta_parents This is similar to manifestctx.read_delta_parents(exact=False) in manifest.py. It is useful to determine if a file was added in a changeset without delta-resolving the entire manifest. I will use it for rhg annotate.
author Mitchell Kember <mkember@janestreet.com>
date Tue, 14 Jan 2025 17:44:02 -0500
parents 94e2547e6f3d
children 169ccd142ef8
files rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/revlog/mod.rs rust/hg-core/src/revlog/patch.rs
diffstat 3 files changed, 77 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/manifest.rs	Thu Jan 16 13:15:02 2025 -0500
+++ b/rust/hg-core/src/revlog/manifest.rs	Tue Jan 14 17:44:02 2025 -0500
@@ -1,14 +1,12 @@
 use std::num::NonZeroU8;
 
 use crate::errors::HgError;
-use crate::revlog::{Node, NodePrefix};
-use crate::revlog::{Revlog, RevlogError};
+use crate::revlog::options::RevlogOpenOptions;
+use crate::revlog::{Node, NodePrefix, Revlog, RevlogError};
 use crate::utils::hg_path::HgPath;
 use crate::utils::strings::SliceExt;
 use crate::vfs::VfsImpl;
-use crate::{Graph, GraphError, Revision, UncheckedRevision};
-
-use super::options::RevlogOpenOptions;
+use crate::{Graph, GraphError, Revision, UncheckedRevision, NULL_REVISION};
 
 /// A specialized `Revlog` to work with `manifest` data format.
 pub struct Manifestlog {
@@ -66,6 +64,29 @@
         let bytes = self.revlog.get_data(rev)?.into_owned();
         Ok(Manifest { bytes })
     }
+
+    /// Returns a manifest containing entries for `rev` that are not in its
+    /// parents. It is inexact because it might return a superset of this.
+    /// Equivalent to `manifestctx.read_delta_parents(exact=False)` in Python.
+    pub fn inexact_data_delta_parents(
+        &self,
+        rev: Revision,
+    ) -> Result<Manifest, RevlogError> {
+        let delta_parent = self.revlog.delta_parent(rev);
+        let parents = self.parents(rev).map_err(|err| match err {
+            GraphError::ParentOutOfRange(parent) => RevlogError::corrupted(
+                format!("rev {rev} has corrupted parent ({parent})"),
+            ),
+        })?;
+        if delta_parent == NULL_REVISION || !parents.contains(&delta_parent) {
+            return self.data(rev);
+        }
+        let mut bytes = vec![];
+        for chunk in self.revlog.get_data_incr(rev)?.as_patch_list()?.chunks {
+            bytes.extend_from_slice(chunk.data);
+        }
+        Ok(Manifest { bytes })
+    }
 }
 
 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
--- a/rust/hg-core/src/revlog/mod.rs	Thu Jan 16 13:15:02 2025 -0500
+++ b/rust/hg-core/src/revlog/mod.rs	Tue Jan 14 17:44:02 2025 -0500
@@ -439,6 +439,26 @@
         self.get_entry(rev)?.data()
     }
 
+    /// Gets the raw uncompressed data stored for a revision, which is either
+    /// the full text or a delta. Panics if `rev` is null.
+    pub fn get_data_incr(
+        &self,
+        rev: Revision,
+    ) -> Result<RawdataBuf, RevlogError> {
+        let index = self.index();
+        let entry = index.get_entry(rev).expect("rev should not be null");
+        let delta_base = entry.base_revision_or_base_of_delta_chain();
+        let base = if UncheckedRevision::from(rev) == delta_base {
+            None
+        } else if index.uses_generaldelta() {
+            Some(delta_base)
+        } else {
+            Some(UncheckedRevision(rev.0 - 1))
+        };
+        let data = self.inner.chunk_for_rev(rev)?;
+        Ok(RawdataBuf { base, data })
+    }
+
     /// Check the hash of some given data against the recorded hash.
     pub fn check_hash(
         &self,
@@ -471,6 +491,21 @@
     }
 }
 
+pub struct RawdataBuf {
+    // If `Some`, data is a delta.
+    base: Option<UncheckedRevision>,
+    data: std::sync::Arc<[u8]>,
+}
+
+impl RawdataBuf {
+    fn as_patch_list(&self) -> Result<patch::PatchList, RevlogError> {
+        match self.base {
+            None => Ok(patch::PatchList::full_snapshot(&self.data)),
+            Some(_) => patch::PatchList::new(&self.data),
+        }
+    }
+}
+
 type IndexData = Box<dyn Deref<Target = [u8]> + Send + Sync>;
 
 /// TODO We should check for version 5.14+ at runtime, but we either should
--- a/rust/hg-core/src/revlog/patch.rs	Thu Jan 16 13:15:02 2025 -0500
+++ b/rust/hg-core/src/revlog/patch.rs	Tue Jan 14 17:44:02 2025 -0500
@@ -12,13 +12,13 @@
 /// - a replacement when `!data.is_empty() && start < end`
 /// - not doing anything when `data.is_empty() && start == end`
 #[derive(Debug, Clone)]
-struct Chunk<'a> {
+pub(crate) struct Chunk<'a> {
     /// The start position of the chunk of data to replace
-    start: u32,
+    pub(crate) start: u32,
     /// The end position of the chunk of data to replace (open end interval)
-    end: u32,
+    pub(crate) end: u32,
     /// The data replacing the chunk
-    data: &'a [u8],
+    pub(crate) data: &'a [u8],
 }
 
 impl Chunk<'_> {
@@ -60,7 +60,7 @@
     /// - ordered from the left-most replacement to the right-most replacement
     /// - non-overlapping, meaning that two chucks can not change the same
     ///   chunk of the patched data
-    chunks: Vec<Chunk<'a>>,
+    pub(crate) chunks: Vec<Chunk<'a>>,
 }
 
 impl<'a> PatchList<'a> {
@@ -85,6 +85,17 @@
         Ok(PatchList { chunks })
     }
 
+    /// Creates a patch for a full snapshot, going from nothing to `data`.
+    pub fn full_snapshot(data: &'a [u8]) -> Self {
+        Self {
+            chunks: vec![Chunk {
+                start: 0,
+                end: 0,
+                data,
+            }],
+        }
+    }
+
     /// Apply the patch to some data.
     pub fn apply<T>(
         &self,