Mercurial > public > mercurial-scm > hg
view rust/hg-core/src/revlog/revlog.rs @ 45526:26c53ee51c68
hg-core: Add a limited read only `revlog` implementation
Only covers the needs of the upcoming `rhg debugdata` command.
Differential Revision: https://phab.mercurial-scm.org/D8958
author | Antoine Cezar <antoine.cezar@octobus.net> |
---|---|
date | Fri, 04 Sep 2020 11:55:07 +0200 |
parents | |
children | b0d6309ff50c |
line wrap: on
line source
use std::borrow::Cow; use std::fs::File; use std::io::Read; use std::ops::Deref; use std::path::Path; use byteorder::{BigEndian, ByteOrder}; use flate2::read::ZlibDecoder; use memmap::{Mmap, MmapOptions}; use micro_timer::timed; use zstd; use super::index::Index; use super::patch; use crate::revlog::Revision; pub enum RevlogError { IoError(std::io::Error), UnsuportedVersion(u16), InvalidRevision, Corrupted, UnknowDataFormat(u8), } fn mmap_open(path: &Path) -> Result<Mmap, std::io::Error> { let file = File::open(path)?; let mmap = unsafe { MmapOptions::new().map(&file) }?; Ok(mmap) } /// Read only implementation of revlog. pub struct Revlog { /// When index and data are not interleaved: bytes of the revlog index. /// When index and data are interleaved: bytes of the revlog index and /// data. index_bytes: Box<dyn Deref<Target = [u8]> + Send>, /// When index and data are not interleaved: bytes of the revlog data data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>, } impl Revlog { /// Open a revlog index file. /// /// It will also open the associated data file if index and data are not /// interleaved. #[timed] pub fn open(index_path: &Path) -> Result<Self, RevlogError> { let index_mmap = mmap_open(&index_path).map_err(RevlogError::IoError)?; let version = get_version(&index_mmap); if version != 1 { return Err(RevlogError::UnsuportedVersion(version)); } let is_inline = is_inline(&index_mmap); let index_bytes = Box::new(index_mmap); // TODO load data only when needed // // type annotation required // won't recognize Mmap as Deref<Target = [u8]> let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> = if is_inline { None } else { let data_path = index_path.with_extension("d"); let data_mmap = mmap_open(&data_path).map_err(RevlogError::IoError)?; Some(Box::new(data_mmap)) }; Ok(Revlog { index_bytes, data_bytes, }) } /// Return the full data associated to a revision. /// /// All entries required to build the final data out of deltas will be /// retrieved as needed, and the deltas will be applied to the inital /// snapshot to rebuild the final data. #[timed] pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> { // Todo return -> Cow let mut entry = self.get_entry(rev)?; let mut delta_chain = vec![]; while let Some(base_rev) = entry.base_rev { delta_chain.push(entry); entry = self .get_entry(base_rev) .map_err(|_| RevlogError::Corrupted)?; } if delta_chain.is_empty() { Ok(entry.data()?.into()) } else { Revlog::build_data_from_deltas(entry, &delta_chain) } } /// Build the full data of a revision out its snapshot /// and its deltas. #[timed] fn build_data_from_deltas( snapshot: RevlogEntry, deltas: &[RevlogEntry], ) -> Result<Vec<u8>, RevlogError> { let snapshot = snapshot.data()?; let deltas = deltas .iter() .rev() .map(RevlogEntry::data) .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?; let patches: Vec<_> = deltas.iter().map(|d| patch::PatchList::new(d)).collect(); let patch = patch::fold_patch_lists(&patches); Ok(patch.apply(&snapshot)) } /// Return the revlog index. fn index(&self) -> Index { let is_inline = self.data_bytes.is_none(); Index::new(&self.index_bytes, is_inline) } /// Return the revlog data. fn data(&self) -> &[u8] { match self.data_bytes { Some(ref data_bytes) => &data_bytes, None => &self.index_bytes, } } /// Get an entry of the revlog. fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { let index = self.index(); let index_entry = index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?; let start = index_entry.offset(); let end = start + index_entry.compressed_len(); let entry = RevlogEntry { rev, bytes: &self.data()[start..end], compressed_len: index_entry.compressed_len(), uncompressed_len: index_entry.uncompressed_len(), base_rev: if index_entry.base_revision() == rev { None } else { Some(index_entry.base_revision()) }, }; Ok(entry) } } /// The revlog entry's bytes and the necessary informations to extract /// the entry's data. #[derive(Debug)] pub struct RevlogEntry<'a> { rev: Revision, bytes: &'a [u8], compressed_len: usize, uncompressed_len: usize, base_rev: Option<Revision>, } impl<'a> RevlogEntry<'a> { /// Extract the data contained in the entry. pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> { if self.bytes.is_empty() { return Ok(Cow::Borrowed(&[])); } match self.bytes[0] { // Revision data is the entirety of the entry, including this // header. b'\0' => Ok(Cow::Borrowed(self.bytes)), // Raw revision data follows. b'u' => Ok(Cow::Borrowed(&self.bytes[1..])), // zlib (RFC 1950) data. b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data())), // zstd data. b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data())), format_type => Err(RevlogError::UnknowDataFormat(format_type)), } } fn uncompressed_zlib_data(&self) -> Vec<u8> { let mut decoder = ZlibDecoder::new(self.bytes); if self.is_delta() { let mut buf = Vec::with_capacity(self.compressed_len); decoder.read_to_end(&mut buf).expect("corrupted zlib data"); buf } else { let mut buf = vec![0; self.uncompressed_len]; decoder.read_exact(&mut buf).expect("corrupted zlib data"); buf } } fn uncompressed_zstd_data(&self) -> Vec<u8> { if self.is_delta() { let mut buf = Vec::with_capacity(self.compressed_len); zstd::stream::copy_decode(self.bytes, &mut buf) .expect("corrupted zstd data"); buf } else { let mut buf = vec![0; self.uncompressed_len]; let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) .expect("corrupted zstd data"); assert_eq!(len, self.uncompressed_len, "corrupted zstd data"); buf } } /// Tell if the entry is a snapshot or a delta /// (influences on decompression). fn is_delta(&self) -> bool { self.base_rev.is_some() } } /// Value of the inline flag. pub fn is_inline(index_bytes: &[u8]) -> bool { match &index_bytes[0..=1] { [0, 0] | [0, 2] => false, _ => true, } } /// Format version of the revlog. pub fn get_version(index_bytes: &[u8]) -> u16 { BigEndian::read_u16(&index_bytes[2..=3]) } #[cfg(test)] mod tests { use super::*; use super::super::index::IndexEntryBuilder; #[cfg(test)] pub struct RevlogBuilder { version: u16, is_general_delta: bool, is_inline: bool, offset: usize, index: Vec<Vec<u8>>, data: Vec<Vec<u8>>, } #[cfg(test)] impl RevlogBuilder { pub fn new() -> Self { Self { version: 2, is_inline: false, is_general_delta: true, offset: 0, index: vec![], data: vec![], } } pub fn with_inline(&mut self, value: bool) -> &mut Self { self.is_inline = value; self } pub fn with_general_delta(&mut self, value: bool) -> &mut Self { self.is_general_delta = value; self } pub fn with_version(&mut self, value: u16) -> &mut Self { self.version = value; self } pub fn push( &mut self, mut index: IndexEntryBuilder, data: Vec<u8>, ) -> &mut Self { if self.index.is_empty() { index.is_first(true); index.with_general_delta(self.is_general_delta); index.with_inline(self.is_inline); index.with_version(self.version); } else { index.with_offset(self.offset); } self.index.push(index.build()); self.offset += data.len(); self.data.push(data); self } pub fn build_inline(&self) -> Vec<u8> { let mut bytes = Vec::with_capacity(self.index.len() + self.data.len()); for (index, data) in self.index.iter().zip(self.data.iter()) { bytes.extend(index); bytes.extend(data); } bytes } } #[test] fn is_not_inline_when_no_inline_flag_test() { let bytes = RevlogBuilder::new() .with_general_delta(false) .with_inline(false) .push(IndexEntryBuilder::new(), vec![]) .build_inline(); assert_eq!(is_inline(&bytes), false) } #[test] fn is_inline_when_inline_flag_test() { let bytes = RevlogBuilder::new() .with_general_delta(false) .with_inline(true) .push(IndexEntryBuilder::new(), vec![]) .build_inline(); assert_eq!(is_inline(&bytes), true) } #[test] fn is_inline_when_inline_and_generaldelta_flags_test() { let bytes = RevlogBuilder::new() .with_general_delta(true) .with_inline(true) .push(IndexEntryBuilder::new(), vec![]) .build_inline(); assert_eq!(is_inline(&bytes), true) } #[test] fn version_test() { let bytes = RevlogBuilder::new() .with_version(1) .push(IndexEntryBuilder::new(), vec![]) .build_inline(); assert_eq!(get_version(&bytes), 1) } }