Mercurial > public > mercurial-scm > hg-stable
view rust/hg-core/src/revlog/index.rs @ 46155:fce2f20a54ce
copies-rust: start recording overwrite as they happens
If a revision has information overwriting data from another revision, the
overwriting revision is a descendant of the overwritten one. So we could warm
the Oracle cache with such information to avoid potential future `is_ancestors`
call.
This provide us with a large speedup in the most expensive cases:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev
Full comparison below:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, ? 1.0000, 42 ?s/rev
mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000109 s, -0.000005 s, ? 0.9561, 18 ?s/rev
mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004934 s, 0.004953 s, +0.000019 s, ? 1.0039, 4 ?s/rev
pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000195 s, 0.000237 s, +0.000042 s, ? 1.2154, 26 ?s/rev
pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev
pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000113 s, 0.000113 s, +0.000000 s, ? 1.0000, 16 ?s/rev
pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.6f1f4a s, 0.6f1f4a s, +0.000000 s, ? 1.0000, 322 ?s/rev
pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010788 s, 0.010702 s, -0.000086 s, ? 0.9920, 1783 ?s/rev
pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050880 s, 0.050504 s, -0.000376 s, ? 0.9926, 10 ?s/rev
pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.081760 s, 0.080159 s, -0.001601 s, ? 0.9804, 11 ?s/rev
pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.061382 s, 0.060058 s, -0.001324 s, ? 0.9784, 11 ?s/rev
pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.585802 s, 0.536950 s, -0.048852 s, ? 0.9166, 12 ?s/rev
pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012803 s, 0.012868 s, +0.000065 s, ? 1.0051, 6434 ?s/rev
pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.113558 s, 0.112806 s, -0.000752 s, ? 0.9934, 9 ?s/rev
netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000085 s, 0.000084 s, -0.000001 s, ? 0.9882, 42 ?s/rev
netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000106 s, 0.000106 s, +0.000000 s, ? 1.0000, 53 ?s/rev
netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000174 s, -0.000001 s, ? 0.9943, 58 ?s/rev
netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000721 s, 0.000726 s, +0.000005 s, ? 1.0069, 80 ?s/rev
netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010127 s, 0.010105 s, -0.000022 s, ? 0.9978, 7 ?s/rev
netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015616 s, 0.015748 s, +0.000132 s, ? 1.0085, 10 ?s/rev
netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.061341 s, 0.060357 s, -0.000984 s, ? 0.9840, 10 ?s/rev
netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.542214 s, 0.499356 s, -0.042858 s, ? 0.9210, 7 ?s/rev
mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000092 s, +0.000003 s, ? 1.0337, 46 ?s/rev
mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000279 s, 0.000279 s, +0.000000 s, ? 1.0000, 34 ?s/rev
mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000184 s, 0.000186 s, +0.000002 s, ? 1.0109, 20 ?s/rev
mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000661 s, 0.000660 s, -0.000001 s, ? 0.9985, 94 ?s/rev
mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003377 s, 0.003372 s, -0.000005 s, ? 0.9985, 1124 ?s/rev
mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.070508 s, 0.070294 s, -0.000214 s, ? 0.9970, 11715 ?s/rev
mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006576 s, 0.006545 s, -0.000031 s, ? 0.9953, 4 ?s/rev
mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004809 s, 0.004998 s, +0.000189 s, ? 1.0393, 121 ?s/rev
mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.064872 s, 0.063348 s, -0.001524 s, ? 0.9765, 8 ?s/rev
mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026142 s, 0.026154 s, +0.000012 s, ? 1.0005, 42 ?s/rev
mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203956 s, 0.199063 s, -0.004893 s, ? 0.9760, 6 ?s/rev
mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.763853 s, 1.277320 s, -0.486533 s, ? 0.7242, 8 ?s/rev
mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.609761 s, 1.698794 s, -0.910967 s, ? 0.6509, 8 ?s/rev
mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000847 s, 0.000842 s, -0.000005 s, ? 0.9941, 421 ?s/rev
mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000867 s, 0.000865 s, -0.000002 s, ? 0.9977, 432 ?s/rev
mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000160 s, -0.000001 s, ? 0.9938, 40 ?s/rev
mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001131 s, 0.001122 s, -0.000009 s, ? 0.9920, 561 ?s/rev
mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033114 s, 0.032743 s, -0.000371 s, ? 0.9888, 32743 ?s/rev
mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.071092 s, 0.071529 s, +0.000437 s, ? 1.0061, 11921 ?s/rev
mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006554 s, 0.006593 s, +0.000039 s, ? 1.0060, 4 ?s/rev
mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005160 s, 0.005311 s, +0.000151 s, ? 1.0293, 129 ?s/rev
mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.065063 s, 0.063063 s, -0.002000 s, ? 0.9693, 9 ?s/rev
mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.297118 s, 0.312363 s, +0.015245 s, ? 1.0513, 7 ?s/rev
mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.284002 s, 0.283106 s, -0.000896 s, ? 0.9968, 7 ?s/rev
mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.086311 s, 0.083817 s, -0.002494 s, ? 0.9711, 9 ?s/rev
mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026738 s, 0.026516 s, -0.000222 s, ? 0.9917, 43 ?s/rev
mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 1.514270 s, 1.304865 s, -0.209405 s, ? 0.8617, 13 ?s/rev
mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.735875 s, 0.681088 s, -0.054787 s, ? 0.9255, 13 ?s/rev
mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.843329 s, 4.454320 s, -0.389009 s, ? 0.9197, 12 ?s/rev
mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.591752 s, 0.567913 s, -0.023839 s, ? 0.9597, 16 ?s/rev
mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.760563 s, 4.547043 s, -0.213520 s, ? 0.9551, 12 ?s/rev
mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.751942 s, 4.378579 s, -0.373363 s, ? 0.9214, 12 ?s/rev
mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.605014 s, 1.703622 s, -0.901392 s, ? 0.6540, 8 ?s/rev
mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev
Differential Revision: https://phab.mercurial-scm.org/D9497
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sat, 21 Nov 2020 17:00:32 +0100 |
parents | 9eb07ab3f2d4 |
children | 43d63979a75e |
line wrap: on
line source
use std::convert::TryInto; use std::ops::Deref; use byteorder::{BigEndian, ByteOrder}; use crate::revlog::node::Node; use crate::revlog::revlog::RevlogError; use crate::revlog::{Revision, NULL_REVISION}; pub const INDEX_ENTRY_SIZE: usize = 64; /// A Revlog index pub struct Index { bytes: Box<dyn Deref<Target = [u8]> + Send>, /// Offsets of starts of index blocks. /// Only needed when the index is interleaved with data. offsets: Option<Vec<usize>>, } impl Index { /// Create an index from bytes. /// Calculate the start of each entry when is_inline is true. pub fn new( bytes: Box<dyn Deref<Target = [u8]> + Send>, ) -> Result<Self, RevlogError> { if is_inline(&bytes) { let mut offset: usize = 0; let mut offsets = Vec::new(); while offset + INDEX_ENTRY_SIZE <= bytes.len() { offsets.push(offset); let end = offset + INDEX_ENTRY_SIZE; let entry = IndexEntry { bytes: &bytes[offset..end], offset_override: None, }; offset += INDEX_ENTRY_SIZE + entry.compressed_len(); } if offset == bytes.len() { Ok(Self { bytes, offsets: Some(offsets), }) } else { Err(RevlogError::Corrupted) } } else { Ok(Self { bytes, offsets: None, }) } } /// Value of the inline flag. pub fn is_inline(&self) -> bool { is_inline(&self.bytes) } /// Return a slice of bytes if `revlog` is inline. Panic if not. pub fn data(&self, start: usize, end: usize) -> &[u8] { if !self.is_inline() { panic!("tried to access data in the index of a revlog that is not inline"); } &self.bytes[start..end] } /// Return number of entries of the revlog index. pub fn len(&self) -> usize { if let Some(offsets) = &self.offsets { offsets.len() } else { self.bytes.len() / INDEX_ENTRY_SIZE } } /// Returns `true` if the `Index` has zero `entries`. pub fn is_empty(&self) -> bool { self.len() == 0 } /// Return the index entry corresponding to the given revision if it /// exists. pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { if rev == NULL_REVISION { return None; } if let Some(offsets) = &self.offsets { self.get_entry_inline(rev, offsets) } else { self.get_entry_separated(rev) } } fn get_entry_inline( &self, rev: Revision, offsets: &[usize], ) -> Option<IndexEntry> { let start = *offsets.get(rev as usize)?; let end = start.checked_add(INDEX_ENTRY_SIZE)?; let bytes = &self.bytes[start..end]; // See IndexEntry for an explanation of this override. let offset_override = Some(end); Some(IndexEntry { bytes, offset_override, }) } fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> { let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; if rev as usize >= max_rev { return None; } let start = rev as usize * INDEX_ENTRY_SIZE; let end = start + INDEX_ENTRY_SIZE; let bytes = &self.bytes[start..end]; // Override the offset of the first revision as its bytes are used // for the index's metadata (saving space because it is always 0) let offset_override = if rev == 0 { Some(0) } else { None }; Some(IndexEntry { bytes, offset_override, }) } } impl super::RevlogIndex for Index { fn len(&self) -> usize { self.len() } fn node(&self, rev: Revision) -> Option<&Node> { self.get_entry(rev).map(|entry| entry.hash()) } } #[derive(Debug)] pub struct IndexEntry<'a> { bytes: &'a [u8], /// Allows to override the offset value of the entry. /// /// For interleaved index and data, the offset stored in the index /// corresponds to the separated data offset. /// It has to be overridden with the actual offset in the interleaved /// index which is just after the index block. /// /// For separated index and data, the offset stored in the first index /// entry is mixed with the index headers. /// It has to be overridden with 0. offset_override: Option<usize>, } impl<'a> IndexEntry<'a> { /// Return the offset of the data. pub fn offset(&self) -> usize { if let Some(offset_override) = self.offset_override { offset_override } else { let mut bytes = [0; 8]; bytes[2..8].copy_from_slice(&self.bytes[0..=5]); BigEndian::read_u64(&bytes[..]) as usize } } /// Return the compressed length of the data. pub fn compressed_len(&self) -> usize { BigEndian::read_u32(&self.bytes[8..=11]) as usize } /// Return the uncompressed length of the data. pub fn uncompressed_len(&self) -> usize { BigEndian::read_u32(&self.bytes[12..=15]) as usize } /// Return the revision upon which the data has been derived. pub fn base_revision(&self) -> Revision { // TODO Maybe return an Option when base_revision == rev? // Requires to add rev to IndexEntry BigEndian::read_i32(&self.bytes[16..]) } pub fn p1(&self) -> Revision { BigEndian::read_i32(&self.bytes[24..]) } pub fn p2(&self) -> Revision { BigEndian::read_i32(&self.bytes[28..]) } /// Return the hash of revision's full text. /// /// Currently, SHA-1 is used and only the first 20 bytes of this field /// are used. pub fn hash(&self) -> &'a Node { (&self.bytes[32..52]).try_into().unwrap() } } /// Value of the inline flag. pub fn is_inline(index_bytes: &[u8]) -> bool { match &index_bytes[0..=1] { [0, 0] | [0, 2] => false, _ => true, } } #[cfg(test)] mod tests { use super::*; #[cfg(test)] #[derive(Debug, Copy, Clone)] pub struct IndexEntryBuilder { is_first: bool, is_inline: bool, is_general_delta: bool, version: u16, offset: usize, compressed_len: usize, uncompressed_len: usize, base_revision: Revision, } #[cfg(test)] impl IndexEntryBuilder { pub fn new() -> Self { Self { is_first: false, is_inline: false, is_general_delta: true, version: 2, offset: 0, compressed_len: 0, uncompressed_len: 0, base_revision: 0, } } pub fn is_first(&mut self, value: bool) -> &mut Self { self.is_first = value; self } pub fn with_inline(&mut self, value: bool) -> &mut Self { self.is_inline = value; self } pub fn with_general_delta(&mut self, value: bool) -> &mut Self { self.is_general_delta = value; self } pub fn with_version(&mut self, value: u16) -> &mut Self { self.version = value; self } pub fn with_offset(&mut self, value: usize) -> &mut Self { self.offset = value; self } pub fn with_compressed_len(&mut self, value: usize) -> &mut Self { self.compressed_len = value; self } pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self { self.uncompressed_len = value; self } pub fn with_base_revision(&mut self, value: Revision) -> &mut Self { self.base_revision = value; self } pub fn build(&self) -> Vec<u8> { let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); if self.is_first { bytes.extend(&match (self.is_general_delta, self.is_inline) { (false, false) => [0u8, 0], (false, true) => [0u8, 1], (true, false) => [0u8, 2], (true, true) => [0u8, 3], }); bytes.extend(&self.version.to_be_bytes()); // Remaining offset bytes. bytes.extend(&[0u8; 2]); } else { // Offset is only 6 bytes will usize is 8. bytes.extend(&self.offset.to_be_bytes()[2..]); } bytes.extend(&[0u8; 2]); // Revision flags. bytes.extend(&self.compressed_len.to_be_bytes()[4..]); bytes.extend(&self.uncompressed_len.to_be_bytes()[4..]); bytes.extend(&self.base_revision.to_be_bytes()); bytes } } #[test] fn is_not_inline_when_no_inline_flag_test() { let bytes = IndexEntryBuilder::new() .is_first(true) .with_general_delta(false) .with_inline(false) .build(); assert_eq!(is_inline(&bytes), false) } #[test] fn is_inline_when_inline_flag_test() { let bytes = IndexEntryBuilder::new() .is_first(true) .with_general_delta(false) .with_inline(true) .build(); assert_eq!(is_inline(&bytes), true) } #[test] fn is_inline_when_inline_and_generaldelta_flags_test() { let bytes = IndexEntryBuilder::new() .is_first(true) .with_general_delta(true) .with_inline(true) .build(); assert_eq!(is_inline(&bytes), true) } #[test] fn test_offset() { let bytes = IndexEntryBuilder::new().with_offset(1).build(); let entry = IndexEntry { bytes: &bytes, offset_override: None, }; assert_eq!(entry.offset(), 1) } #[test] fn test_with_overridden_offset() { let bytes = IndexEntryBuilder::new().with_offset(1).build(); let entry = IndexEntry { bytes: &bytes, offset_override: Some(2), }; assert_eq!(entry.offset(), 2) } #[test] fn test_compressed_len() { let bytes = IndexEntryBuilder::new().with_compressed_len(1).build(); let entry = IndexEntry { bytes: &bytes, offset_override: None, }; assert_eq!(entry.compressed_len(), 1) } #[test] fn test_uncompressed_len() { let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build(); let entry = IndexEntry { bytes: &bytes, offset_override: None, }; assert_eq!(entry.uncompressed_len(), 1) } #[test] fn test_base_revision() { let bytes = IndexEntryBuilder::new().with_base_revision(1).build(); let entry = IndexEntry { bytes: &bytes, offset_override: None, }; assert_eq!(entry.base_revision(), 1) } } #[cfg(test)] pub use tests::IndexEntryBuilder;