rust-manifest: add Manifestlog::inexact_data_delta_parents
This is similar to manifestctx.read_delta_parents(exact=False) in manifest.py.
It is useful to determine if a file was added in a changeset without
delta-resolving the entire manifest. I will use it for rhg annotate.
--- a/rust/hg-core/src/revlog/manifest.rs Thu Jan 16 13:15:02 2025 -0500
+++ b/rust/hg-core/src/revlog/manifest.rs Tue Jan 14 17:44:02 2025 -0500
@@ -1,14 +1,12 @@
use std::num::NonZeroU8;
use crate::errors::HgError;
-use crate::revlog::{Node, NodePrefix};
-use crate::revlog::{Revlog, RevlogError};
+use crate::revlog::options::RevlogOpenOptions;
+use crate::revlog::{Node, NodePrefix, Revlog, RevlogError};
use crate::utils::hg_path::HgPath;
use crate::utils::strings::SliceExt;
use crate::vfs::VfsImpl;
-use crate::{Graph, GraphError, Revision, UncheckedRevision};
-
-use super::options::RevlogOpenOptions;
+use crate::{Graph, GraphError, Revision, UncheckedRevision, NULL_REVISION};
/// A specialized `Revlog` to work with `manifest` data format.
pub struct Manifestlog {
@@ -66,6 +64,29 @@
let bytes = self.revlog.get_data(rev)?.into_owned();
Ok(Manifest { bytes })
}
+
+ /// Returns a manifest containing entries for `rev` that are not in its
+ /// parents. It is inexact because it might return a superset of this.
+ /// Equivalent to `manifestctx.read_delta_parents(exact=False)` in Python.
+ pub fn inexact_data_delta_parents(
+ &self,
+ rev: Revision,
+ ) -> Result<Manifest, RevlogError> {
+ let delta_parent = self.revlog.delta_parent(rev);
+ let parents = self.parents(rev).map_err(|err| match err {
+ GraphError::ParentOutOfRange(parent) => RevlogError::corrupted(
+ format!("rev {rev} has corrupted parent ({parent})"),
+ ),
+ })?;
+ if delta_parent == NULL_REVISION || !parents.contains(&delta_parent) {
+ return self.data(rev);
+ }
+ let mut bytes = vec![];
+ for chunk in self.revlog.get_data_incr(rev)?.as_patch_list()?.chunks {
+ bytes.extend_from_slice(chunk.data);
+ }
+ Ok(Manifest { bytes })
+ }
}
/// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
--- a/rust/hg-core/src/revlog/mod.rs Thu Jan 16 13:15:02 2025 -0500
+++ b/rust/hg-core/src/revlog/mod.rs Tue Jan 14 17:44:02 2025 -0500
@@ -439,6 +439,26 @@
self.get_entry(rev)?.data()
}
+ /// Gets the raw uncompressed data stored for a revision, which is either
+ /// the full text or a delta. Panics if `rev` is null.
+ pub fn get_data_incr(
+ &self,
+ rev: Revision,
+ ) -> Result<RawdataBuf, RevlogError> {
+ let index = self.index();
+ let entry = index.get_entry(rev).expect("rev should not be null");
+ let delta_base = entry.base_revision_or_base_of_delta_chain();
+ let base = if UncheckedRevision::from(rev) == delta_base {
+ None
+ } else if index.uses_generaldelta() {
+ Some(delta_base)
+ } else {
+ Some(UncheckedRevision(rev.0 - 1))
+ };
+ let data = self.inner.chunk_for_rev(rev)?;
+ Ok(RawdataBuf { base, data })
+ }
+
/// Check the hash of some given data against the recorded hash.
pub fn check_hash(
&self,
@@ -471,6 +491,21 @@
}
}
+pub struct RawdataBuf {
+ // If `Some`, data is a delta.
+ base: Option<UncheckedRevision>,
+ data: std::sync::Arc<[u8]>,
+}
+
+impl RawdataBuf {
+ fn as_patch_list(&self) -> Result<patch::PatchList, RevlogError> {
+ match self.base {
+ None => Ok(patch::PatchList::full_snapshot(&self.data)),
+ Some(_) => patch::PatchList::new(&self.data),
+ }
+ }
+}
+
type IndexData = Box<dyn Deref<Target = [u8]> + Send + Sync>;
/// TODO We should check for version 5.14+ at runtime, but we either should
--- a/rust/hg-core/src/revlog/patch.rs Thu Jan 16 13:15:02 2025 -0500
+++ b/rust/hg-core/src/revlog/patch.rs Tue Jan 14 17:44:02 2025 -0500
@@ -12,13 +12,13 @@
/// - a replacement when `!data.is_empty() && start < end`
/// - not doing anything when `data.is_empty() && start == end`
#[derive(Debug, Clone)]
-struct Chunk<'a> {
+pub(crate) struct Chunk<'a> {
/// The start position of the chunk of data to replace
- start: u32,
+ pub(crate) start: u32,
/// The end position of the chunk of data to replace (open end interval)
- end: u32,
+ pub(crate) end: u32,
/// The data replacing the chunk
- data: &'a [u8],
+ pub(crate) data: &'a [u8],
}
impl Chunk<'_> {
@@ -60,7 +60,7 @@
/// - ordered from the left-most replacement to the right-most replacement
/// - non-overlapping, meaning that two chucks can not change the same
/// chunk of the patched data
- chunks: Vec<Chunk<'a>>,
+ pub(crate) chunks: Vec<Chunk<'a>>,
}
impl<'a> PatchList<'a> {
@@ -85,6 +85,17 @@
Ok(PatchList { chunks })
}
+ /// Creates a patch for a full snapshot, going from nothing to `data`.
+ pub fn full_snapshot(data: &'a [u8]) -> Self {
+ Self {
+ chunks: vec![Chunk {
+ start: 0,
+ end: 0,
+ data,
+ }],
+ }
+ }
+
/// Apply the patch to some data.
pub fn apply<T>(
&self,