Mercurial > public > mercurial-scm > hg
diff rust/hg-core/src/revlog/index.rs @ 45526:26c53ee51c68
hg-core: Add a limited read only `revlog` implementation
Only covers the needs of the upcoming `rhg debugdata` command.
Differential Revision: https://phab.mercurial-scm.org/D8958
author | Antoine Cezar <antoine.cezar@octobus.net> |
---|---|
date | Fri, 04 Sep 2020 11:55:07 +0200 |
parents | |
children | b0d6309ff50c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/revlog/index.rs Fri Sep 04 11:55:07 2020 +0200 @@ -0,0 +1,299 @@ +use crate::revlog::{Revision, NULL_REVISION}; +use byteorder::{BigEndian, ByteOrder}; + +pub const INDEX_ENTRY_SIZE: usize = 64; + +/// A Revlog index +#[derive(Debug)] +pub struct Index<'a> { + bytes: &'a [u8], + /// Offsets of starts of index blocks. + /// Only needed when the index is interleaved with data. + offsets: Option<Vec<usize>>, +} + +impl<'a> Index<'a> { + /// Create an index from bytes. + /// Calculate the start of each entry when is_inline is true. + pub fn new(bytes: &'a [u8], is_inline: bool) -> Self { + if is_inline { + let mut offset: usize = 0; + let mut offsets = Vec::new(); + + while (bytes.len() - offset) >= INDEX_ENTRY_SIZE { + offsets.push(offset); + let end = offset + INDEX_ENTRY_SIZE; + let entry = IndexEntry { + bytes: &bytes[offset..end], + offset_override: None, + }; + + offset += INDEX_ENTRY_SIZE + entry.compressed_len(); + } + + Self { + bytes, + offsets: Some(offsets), + } + } else { + Self { + bytes, + offsets: None, + } + } + } + + /// Return the index entry corresponding to the given revision if it + /// exists. + pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { + if rev == NULL_REVISION { + return None; + } + if let Some(offsets) = &self.offsets { + self.get_entry_inline(rev, offsets) + } else { + self.get_entry_separated(rev) + } + } + + fn get_entry_inline( + &self, + rev: Revision, + offsets: &[usize], + ) -> Option<IndexEntry> { + let start = *offsets.get(rev as usize)?; + let end = start.checked_add(INDEX_ENTRY_SIZE)?; + let bytes = &self.bytes[start..end]; + + // See IndexEntry for an explanation of this override. + let offset_override = Some(end); + + Some(IndexEntry { + bytes, + offset_override, + }) + } + + fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> { + let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; + if rev as usize >= max_rev { + return None; + } + let start = rev as usize * INDEX_ENTRY_SIZE; + let end = start + INDEX_ENTRY_SIZE; + let bytes = &self.bytes[start..end]; + + // See IndexEntry for an explanation of this override. + let offset_override = match rev { + 0 => Some(0), + _ => None, + }; + + Some(IndexEntry { + bytes, + offset_override, + }) + } +} + +#[derive(Debug)] +pub struct IndexEntry<'a> { + bytes: &'a [u8], + /// Allows to override the offset value of the entry. + /// + /// For interleaved index and data, the offset stored in the index + /// corresponds to the separated data offset. + /// It has to be overridden with the actual offset in the interleaved + /// index which is just after the index block. + /// + /// For separated index and data, the offset stored in the first index + /// entry is mixed with the index headers. + /// It has to be overridden with 0. + offset_override: Option<usize>, +} + +impl<'a> IndexEntry<'a> { + /// Return the offset of the data if not overridden by offset_override. + pub fn offset(&self) -> usize { + if let Some(offset_override) = self.offset_override { + offset_override + } else { + let mut bytes = [0; 8]; + bytes[2..8].copy_from_slice(&self.bytes[0..=5]); + BigEndian::read_u64(&bytes[..]) as usize + } + } + + /// Return the compressed length of the data. + pub fn compressed_len(&self) -> usize { + BigEndian::read_u32(&self.bytes[8..=11]) as usize + } + + /// Return the uncompressed length of the data. + pub fn uncompressed_len(&self) -> usize { + BigEndian::read_u32(&self.bytes[12..=15]) as usize + } + + /// Return the revision upon which the data has been derived. + pub fn base_revision(&self) -> Revision { + // TODO Maybe return an Option when base_revision == rev? + // Requires to add rev to IndexEntry + + BigEndian::read_i32(&self.bytes[16..]) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[cfg(test)] + #[derive(Debug, Copy, Clone)] + pub struct IndexEntryBuilder { + is_first: bool, + is_inline: bool, + is_general_delta: bool, + version: u16, + offset: usize, + compressed_len: usize, + uncompressed_len: usize, + base_revision: Revision, + } + + #[cfg(test)] + impl IndexEntryBuilder { + pub fn new() -> Self { + Self { + is_first: false, + is_inline: false, + is_general_delta: true, + version: 2, + offset: 0, + compressed_len: 0, + uncompressed_len: 0, + base_revision: 0, + } + } + + pub fn is_first(&mut self, value: bool) -> &mut Self { + self.is_first = value; + self + } + + pub fn with_inline(&mut self, value: bool) -> &mut Self { + self.is_inline = value; + self + } + + pub fn with_general_delta(&mut self, value: bool) -> &mut Self { + self.is_general_delta = value; + self + } + + pub fn with_version(&mut self, value: u16) -> &mut Self { + self.version = value; + self + } + + pub fn with_offset(&mut self, value: usize) -> &mut Self { + self.offset = value; + self + } + + pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { + self.compressed_len = value; + self + } + + pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { + self.uncompressed_len = value; + self + } + + pub fn with_base_revision(&mut self, value: Revision) -> &mut Self { + self.base_revision = value; + self + } + + pub fn build(&self) -> Vec<u8> { + let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); + if self.is_first { + bytes.extend(&match (self.is_general_delta, self.is_inline) { + (false, false) => [0u8, 0], + (false, true) => [0u8, 1], + (true, false) => [0u8, 2], + (true, true) => [0u8, 3], + }); + bytes.extend(&self.version.to_be_bytes()); + // Remaining offset bytes. + bytes.extend(&[0u8; 2]); + } else { + // Offset is only 6 bytes will usize is 8. + bytes.extend(&self.offset.to_be_bytes()[2..]); + } + bytes.extend(&[0u8; 2]); // Revision flags. + bytes.extend(&self.compressed_len.to_be_bytes()[4..]); + bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]); + bytes.extend(&self.base_revision.to_be_bytes()); + bytes + } + } + + #[test] + fn test_offset() { + let bytes = IndexEntryBuilder::new().with_offset(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.offset(), 1) + } + + #[test] + fn test_with_overridden_offset() { + let bytes = IndexEntryBuilder::new().with_offset(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: Some(2), + }; + + assert_eq!(entry.offset(), 2) + } + + #[test] + fn test_compressed_len() { + let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.compressed_len(), 1) + } + + #[test] + fn test_uncompressed_len() { + let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.uncompressed_len(), 1) + } + + #[test] + fn test_base_revision() { + let bytes = IndexEntryBuilder::new().with_base_revision(1).build(); + let entry = IndexEntry { + bytes: &bytes, + offset_override: None, + }; + + assert_eq!(entry.base_revision(), 1) + } +} + +#[cfg(test)] +pub use tests::IndexEntryBuilder;