Mercurial > public > mercurial-scm > hg
changeset 52761:8497cfb0d76c
rust-manifest: add Manifestlog::inexact_data_delta_parents
This is similar to manifestctx.read_delta_parents(exact=False) in manifest.py.
It is useful to determine if a file was added in a changeset without
delta-resolving the entire manifest. I will use it for rhg annotate.
author | Mitchell Kember <mkember@janestreet.com> |
---|---|
date | Tue, 14 Jan 2025 17:44:02 -0500 |
parents | 94e2547e6f3d |
children | 169ccd142ef8 |
files | rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/revlog/mod.rs rust/hg-core/src/revlog/patch.rs |
diffstat | 3 files changed, 77 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/manifest.rs Thu Jan 16 13:15:02 2025 -0500 +++ b/rust/hg-core/src/revlog/manifest.rs Tue Jan 14 17:44:02 2025 -0500 @@ -1,14 +1,12 @@ use std::num::NonZeroU8; use crate::errors::HgError; -use crate::revlog::{Node, NodePrefix}; -use crate::revlog::{Revlog, RevlogError}; +use crate::revlog::options::RevlogOpenOptions; +use crate::revlog::{Node, NodePrefix, Revlog, RevlogError}; use crate::utils::hg_path::HgPath; use crate::utils::strings::SliceExt; use crate::vfs::VfsImpl; -use crate::{Graph, GraphError, Revision, UncheckedRevision}; - -use super::options::RevlogOpenOptions; +use crate::{Graph, GraphError, Revision, UncheckedRevision, NULL_REVISION}; /// A specialized `Revlog` to work with `manifest` data format. pub struct Manifestlog { @@ -66,6 +64,29 @@ let bytes = self.revlog.get_data(rev)?.into_owned(); Ok(Manifest { bytes }) } + + /// Returns a manifest containing entries for `rev` that are not in its + /// parents. It is inexact because it might return a superset of this. + /// Equivalent to `manifestctx.read_delta_parents(exact=False)` in Python. + pub fn inexact_data_delta_parents( + &self, + rev: Revision, + ) -> Result<Manifest, RevlogError> { + let delta_parent = self.revlog.delta_parent(rev); + let parents = self.parents(rev).map_err(|err| match err { + GraphError::ParentOutOfRange(parent) => RevlogError::corrupted( + format!("rev {rev} has corrupted parent ({parent})"), + ), + })?; + if delta_parent == NULL_REVISION || !parents.contains(&delta_parent) { + return self.data(rev); + } + let mut bytes = vec![]; + for chunk in self.revlog.get_data_incr(rev)?.as_patch_list()?.chunks { + bytes.extend_from_slice(chunk.data); + } + Ok(Manifest { bytes }) + } } /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
--- a/rust/hg-core/src/revlog/mod.rs Thu Jan 16 13:15:02 2025 -0500 +++ b/rust/hg-core/src/revlog/mod.rs Tue Jan 14 17:44:02 2025 -0500 @@ -439,6 +439,26 @@ self.get_entry(rev)?.data() } + /// Gets the raw uncompressed data stored for a revision, which is either + /// the full text or a delta. Panics if `rev` is null. + pub fn get_data_incr( + &self, + rev: Revision, + ) -> Result<RawdataBuf, RevlogError> { + let index = self.index(); + let entry = index.get_entry(rev).expect("rev should not be null"); + let delta_base = entry.base_revision_or_base_of_delta_chain(); + let base = if UncheckedRevision::from(rev) == delta_base { + None + } else if index.uses_generaldelta() { + Some(delta_base) + } else { + Some(UncheckedRevision(rev.0 - 1)) + }; + let data = self.inner.chunk_for_rev(rev)?; + Ok(RawdataBuf { base, data }) + } + /// Check the hash of some given data against the recorded hash. pub fn check_hash( &self, @@ -471,6 +491,21 @@ } } +pub struct RawdataBuf { + // If `Some`, data is a delta. + base: Option<UncheckedRevision>, + data: std::sync::Arc<[u8]>, +} + +impl RawdataBuf { + fn as_patch_list(&self) -> Result<patch::PatchList, RevlogError> { + match self.base { + None => Ok(patch::PatchList::full_snapshot(&self.data)), + Some(_) => patch::PatchList::new(&self.data), + } + } +} + type IndexData = Box<dyn Deref<Target = [u8]> + Send + Sync>; /// TODO We should check for version 5.14+ at runtime, but we either should
--- a/rust/hg-core/src/revlog/patch.rs Thu Jan 16 13:15:02 2025 -0500 +++ b/rust/hg-core/src/revlog/patch.rs Tue Jan 14 17:44:02 2025 -0500 @@ -12,13 +12,13 @@ /// - a replacement when `!data.is_empty() && start < end` /// - not doing anything when `data.is_empty() && start == end` #[derive(Debug, Clone)] -struct Chunk<'a> { +pub(crate) struct Chunk<'a> { /// The start position of the chunk of data to replace - start: u32, + pub(crate) start: u32, /// The end position of the chunk of data to replace (open end interval) - end: u32, + pub(crate) end: u32, /// The data replacing the chunk - data: &'a [u8], + pub(crate) data: &'a [u8], } impl Chunk<'_> { @@ -60,7 +60,7 @@ /// - ordered from the left-most replacement to the right-most replacement /// - non-overlapping, meaning that two chucks can not change the same /// chunk of the patched data - chunks: Vec<Chunk<'a>>, + pub(crate) chunks: Vec<Chunk<'a>>, } impl<'a> PatchList<'a> { @@ -85,6 +85,17 @@ Ok(PatchList { chunks }) } + /// Creates a patch for a full snapshot, going from nothing to `data`. + pub fn full_snapshot(data: &'a [u8]) -> Self { + Self { + chunks: vec![Chunk { + start: 0, + end: 0, + data, + }], + } + } + /// Apply the patch to some data. pub fn apply<T>( &self,