diff rust/hg-core/src/revlog/patch.rs @ 45532:26c53ee51c68

hg-core: Add a limited read only `revlog` implementation Only covers the needs of the upcoming `rhg debugdata` command. Differential Revision: https://phab.mercurial-scm.org/D8958
author Antoine Cezar <antoine.cezar@octobus.net>
date Fri, 04 Sep 2020 11:55:07 +0200
parents
children 48438e8968a2
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/patch.rs	Fri Sep 04 11:55:07 2020 +0200
@@ -0,0 +1,367 @@
+use byteorder::{BigEndian, ByteOrder};
+
+/// A chunk of data to insert, delete or replace in a patch
+///
+/// A chunk is:
+/// - an insertion when `!data.is_empty() && start == end`
+/// - an deletion when `data.is_empty() && start < end`
+/// - a replacement when `!data.is_empty() && start < end`
+/// - not doing anything when `data.is_empty() && start == end`
+#[derive(Debug, Clone)]
+struct PatchFrag<'a> {
+    /// The start position of the chunk of data to replace
+    start: i32,
+    /// The end position of the chunk of data to replace (open end interval)
+    end: i32,
+    /// The data replacing the chunk
+    data: &'a [u8],
+}
+
+impl<'a> PatchFrag<'a> {
+    /// Adjusted start of the chunk to replace.
+    ///
+    /// Offset allow to take into account the growth/shrinkage of data
+    /// induced by previously applied chunks.
+    fn start_offseted_by(&self, offset: i32) -> i32 {
+        self.start + offset
+    }
+
+    /// Adjusted end of the chunk to replace.
+    ///
+    /// Offset allow to take into account the growth/shrinkage of data
+    /// induced by previously applied chunks.
+    fn end_offseted_by(&self, offset: i32) -> i32 {
+        self.start_offseted_by(offset) + (self.data.len() as i32)
+    }
+
+    /// Length of the replaced chunk.
+    fn replaced_len(&self) -> i32 {
+        self.end - self.start
+    }
+
+    /// Length difference between the replacing data and the replaced data.
+    fn len_diff(&self) -> i32 {
+        (self.data.len() as i32) - self.replaced_len()
+    }
+}
+
+/// The delta between two revisions data.
+#[derive(Debug, Clone)]
+pub struct PatchList<'a> {
+    /// A collection of chunks to apply.
+    ///
+    /// Those chunks are:
+    /// - ordered from the left-most replacement to the right-most replacement
+    /// - non-overlapping, meaning that two chucks can not change the same
+    ///   chunk of the patched data
+    frags: Vec<PatchFrag<'a>>,
+}
+
+impl<'a> PatchList<'a> {
+    /// Create a `PatchList` from bytes.
+    pub fn new(data: &'a [u8]) -> Self {
+        let mut frags = vec![];
+        let mut data = data;
+        while !data.is_empty() {
+            let start = BigEndian::read_i32(&data[0..]);
+            let end = BigEndian::read_i32(&data[4..]);
+            let len = BigEndian::read_i32(&data[8..]);
+            assert!(0 <= start && start <= end && len >= 0);
+            frags.push(PatchFrag {
+                start,
+                end,
+                data: &data[12..12 + (len as usize)],
+            });
+            data = &data[12 + (len as usize)..];
+        }
+        PatchList { frags }
+    }
+
+    /// Return the final length of data after patching
+    /// given its initial length .
+    fn size(&self, initial_size: i32) -> i32 {
+        self.frags
+            .iter()
+            .fold(initial_size, |acc, frag| acc + frag.len_diff())
+    }
+
+    /// Apply the patch to some data.
+    pub fn apply(&self, initial: &[u8]) -> Vec<u8> {
+        let mut last: usize = 0;
+        let mut vec =
+            Vec::with_capacity(self.size(initial.len() as i32) as usize);
+        for PatchFrag { start, end, data } in self.frags.iter() {
+            vec.extend(&initial[last..(*start as usize)]);
+            vec.extend(data.iter());
+            last = *end as usize;
+        }
+        vec.extend(&initial[last..]);
+        vec
+    }
+
+    /// Combine two patch lists into a single patch list.
+    ///
+    /// Applying consecutive patches can lead to waste of time and memory
+    /// as the changes introduced by one patch can be overridden by the next.
+    /// Combining patches optimizes the whole patching sequence.
+    fn combine(&mut self, other: &mut Self) -> Self {
+        let mut frags = vec![];
+
+        // Keep track of each growth/shrinkage resulting from applying a chunk
+        // in order to adjust the start/end of subsequent chunks.
+        let mut offset = 0i32;
+
+        // Keep track of the chunk of self.chunks to process.
+        let mut pos = 0;
+
+        // For each chunk of `other`, chunks of `self` are processed
+        // until they start after the end of the current chunk.
+        for PatchFrag { start, end, data } in other.frags.iter() {
+            // Add chunks of `self` that start before this chunk of `other`
+            // without overlap.
+            while pos < self.frags.len()
+                && self.frags[pos].end_offseted_by(offset) <= *start
+            {
+                let first = self.frags[pos].clone();
+                offset += first.len_diff();
+                frags.push(first);
+                pos += 1;
+            }
+
+            // The current chunk of `self` starts before this chunk of `other`
+            // with overlap.
+            // The left-most part of data is added as an insertion chunk.
+            // The right-most part data is kept in the chunk.
+            if pos < self.frags.len()
+                && self.frags[pos].start_offseted_by(offset) < *start
+            {
+                let first = &mut self.frags[pos];
+
+                let (data_left, data_right) = first.data.split_at(
+                    (*start - first.start_offseted_by(offset)) as usize,
+                );
+                let left = PatchFrag {
+                    start: first.start,
+                    end: first.start,
+                    data: data_left,
+                };
+
+                first.data = data_right;
+
+                offset += left.len_diff();
+
+                frags.push(left);
+
+                // There is no index incrementation because the right-most part
+                // needs further examination.
+            }
+
+            // At this point remaining chunks of `self` starts after
+            // the current chunk of `other`.
+
+            // `start_offset` will be used to adjust the start of the current
+            // chunk of `other`.
+            // Offset tracking continues with `end_offset` to adjust the end
+            // of the current chunk of `other`.
+            let mut next_offset = offset;
+
+            // Discard the chunks of `self` that are totally overridden
+            // by the current chunk of `other`
+            while pos < self.frags.len()
+                && self.frags[pos].end_offseted_by(next_offset) <= *end
+            {
+                let first = &self.frags[pos];
+                next_offset += first.len_diff();
+                pos += 1;
+            }
+
+            // Truncate the left-most part of chunk of `self` that overlaps
+            // the current chunk of `other`.
+            if pos < self.frags.len()
+                && self.frags[pos].start_offseted_by(next_offset) < *end
+            {
+                let first = &mut self.frags[pos];
+
+                let how_much_to_discard =
+                    *end - first.start_offseted_by(next_offset);
+
+                first.data = &first.data[(how_much_to_discard as usize)..];
+
+                next_offset += how_much_to_discard;
+            }
+
+            // Add the chunk of `other` with adjusted position.
+            frags.push(PatchFrag {
+                start: *start - offset,
+                end: *end - next_offset,
+                data,
+            });
+
+            // Go back to normal offset tracking for the next `o` chunk
+            offset = next_offset;
+        }
+
+        // Add remaining chunks of `self`.
+        for elt in &self.frags[pos..] {
+            frags.push(elt.clone());
+        }
+        PatchList { frags }
+    }
+}
+
+/// Combine a list of patch list into a single patch optimized patch list.
+pub fn fold_patch_lists<'a>(lists: &[PatchList<'a>]) -> PatchList<'a> {
+    if lists.len() <= 1 {
+        if lists.is_empty() {
+            PatchList { frags: vec![] }
+        } else {
+            lists[0].clone()
+        }
+    } else {
+        let (left, right) = lists.split_at(lists.len() / 2);
+        let mut left_res = fold_patch_lists(left);
+        let mut right_res = fold_patch_lists(right);
+        left_res.combine(&mut right_res)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct PatchDataBuilder {
+        data: Vec<u8>,
+    }
+
+    impl PatchDataBuilder {
+        pub fn new() -> Self {
+            Self { data: vec![] }
+        }
+
+        pub fn replace(
+            &mut self,
+            start: usize,
+            end: usize,
+            data: &[u8],
+        ) -> &mut Self {
+            assert!(start <= end);
+            self.data.extend(&(start as i32).to_be_bytes());
+            self.data.extend(&(end as i32).to_be_bytes());
+            self.data.extend(&(data.len() as i32).to_be_bytes());
+            self.data.extend(data.iter());
+            self
+        }
+
+        pub fn get(&mut self) -> &[u8] {
+            &self.data[..]
+        }
+    }
+
+    #[test]
+    fn test_ends_before() {
+        let data = vec![0u8, 0u8, 0u8];
+        let mut patch1_data = PatchDataBuilder::new();
+        patch1_data.replace(0, 1, &[1, 2]);
+        let mut patch1 = PatchList::new(patch1_data.get());
+
+        let mut patch2_data = PatchDataBuilder::new();
+        patch2_data.replace(2, 4, &[3, 4]);
+        let mut patch2 = PatchList::new(patch2_data.get());
+
+        let patch = patch1.combine(&mut patch2);
+
+        let result = patch.apply(&data);
+
+        assert_eq!(result, vec![1u8, 2, 3, 4]);
+    }
+
+    #[test]
+    fn test_starts_after() {
+        let data = vec![0u8, 0u8, 0u8];
+        let mut patch1_data = PatchDataBuilder::new();
+        patch1_data.replace(2, 3, &[3]);
+        let mut patch1 = PatchList::new(patch1_data.get());
+
+        let mut patch2_data = PatchDataBuilder::new();
+        patch2_data.replace(1, 2, &[1, 2]);
+        let mut patch2 = PatchList::new(patch2_data.get());
+
+        let patch = patch1.combine(&mut patch2);
+
+        let result = patch.apply(&data);
+
+        assert_eq!(result, vec![0u8, 1, 2, 3]);
+    }
+
+    #[test]
+    fn test_overridden() {
+        let data = vec![0u8, 0, 0];
+        let mut patch1_data = PatchDataBuilder::new();
+        patch1_data.replace(1, 2, &[3, 4]);
+        let mut patch1 = PatchList::new(patch1_data.get());
+
+        let mut patch2_data = PatchDataBuilder::new();
+        patch2_data.replace(1, 4, &[1, 2, 3]);
+        let mut patch2 = PatchList::new(patch2_data.get());
+
+        let patch = patch1.combine(&mut patch2);
+
+        let result = patch.apply(&data);
+
+        assert_eq!(result, vec![0u8, 1, 2, 3]);
+    }
+
+    #[test]
+    fn test_right_most_part_is_overridden() {
+        let data = vec![0u8, 0, 0];
+        let mut patch1_data = PatchDataBuilder::new();
+        patch1_data.replace(0, 1, &[1, 3]);
+        let mut patch1 = PatchList::new(patch1_data.get());
+
+        let mut patch2_data = PatchDataBuilder::new();
+        patch2_data.replace(1, 4, &[2, 3, 4]);
+        let mut patch2 = PatchList::new(patch2_data.get());
+
+        let patch = patch1.combine(&mut patch2);
+
+        let result = patch.apply(&data);
+
+        assert_eq!(result, vec![1u8, 2, 3, 4]);
+    }
+
+    #[test]
+    fn test_left_most_part_is_overridden() {
+        let data = vec![0u8, 0, 0];
+        let mut patch1_data = PatchDataBuilder::new();
+        patch1_data.replace(1, 3, &[1, 3, 4]);
+        let mut patch1 = PatchList::new(patch1_data.get());
+
+        let mut patch2_data = PatchDataBuilder::new();
+        patch2_data.replace(0, 2, &[1, 2]);
+        let mut patch2 = PatchList::new(patch2_data.get());
+
+        let patch = patch1.combine(&mut patch2);
+
+        let result = patch.apply(&data);
+
+        assert_eq!(result, vec![1u8, 2, 3, 4]);
+    }
+
+    #[test]
+    fn test_mid_is_overridden() {
+        let data = vec![0u8, 0, 0];
+        let mut patch1_data = PatchDataBuilder::new();
+        patch1_data.replace(0, 3, &[1, 3, 3, 4]);
+        let mut patch1 = PatchList::new(patch1_data.get());
+
+        let mut patch2_data = PatchDataBuilder::new();
+        patch2_data.replace(1, 3, &[2, 3]);
+        let mut patch2 = PatchList::new(patch2_data.get());
+
+        let patch = patch1.combine(&mut patch2);
+
+        let result = patch.apply(&data);
+
+        assert_eq!(result, vec![1u8, 2, 3, 4]);
+    }
+}