Mercurial > public > mercurial-scm > hg-stable
view rust/hg-core/src/copy_tracing.rs @ 46621:b0a3ca02d17a
copies-rust: implement PartialEqual manually
Now that we know that each (dest, rev) pair has at most a unique CopySource, we
can simplify comparison a lot.
This "simple" step buy a good share of the previous slowdown back in some case:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 43.304637 s, 34.443661 s, -8.860976 s, ? 0.7954, 90 ?s/rev
Full benchmark:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000043 s, 0.000043 s, +0.000000 s, ? 1.0000, 43 ?s/rev
mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000117 s, +0.000003 s, ? 1.0263, 19 ?s/rev
mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004937 s, 0.004892 s, -0.000045 s, ? 0.9909, 4 ?s/rev
pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000339 s, 0.000196 s, -0.000143 s, ? 0.5782, 21 ?s/rev
pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000049 s, 0.000050 s, +0.000001 s, ? 1.0204, 50 ?s/rev
pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000202 s, 0.000117 s, -0.000085 s, ? 0.5792, 16 ?s/rev
pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.000409 s, 0.6f1f4a s, -0.000087 s, ? 0.7873, 322 ?s/rev
pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.011984 s, 0.011949 s, -0.000035 s, ? 0.9971, 1991 ?s/rev
pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050820 s, 0.050802 s, -0.000018 s, ? 0.9996, 10 ?s/rev
pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.087953 s, 0.088090 s, +0.000137 s, ? 1.0016, 12 ?s/rev
pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.062902 s, 0.062079 s, -0.000823 s, ? 0.9869, 11 ?s/rev
pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.679234 s, 0.635337 s, -0.043897 s, ? 0.9354, 14 ?s/rev
pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.013095 s, 0.013262 s, +0.000167 s, ? 1.0128, 6631 ?s/rev
pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.120910 s, 0.120085 s, -0.000825 s, ? 0.9932, 10 ?s/rev
netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000087 s, 0.000085 s, -0.000002 s, ? 0.9770, 42 ?s/rev
netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000107 s, 0.000110 s, +0.000003 s, ? 1.0280, 55 ?s/rev
netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000186 s, 0.000177 s, -0.000009 s, ? 0.9516, 59 ?s/rev
netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000754 s, 0.000743 s, -0.000011 s, ? 0.9854, 82 ?s/rev
netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010443 s, 0.010168 s, -0.000275 s, ? 0.9737, 7 ?s/rev
netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015697 s, 0.015946 s, +0.000249 s, ? 1.0159, 10 ?s/rev
netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.063528 s, 0.062712 s, -0.000816 s, ? 0.9872, 10 ?s/rev
netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.545515 s, 0.523832 s, -0.021683 s, ? 0.9603, 7 ?s/rev
mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000090 s, +0.000001 s, ? 1.0112, 45 ?s/rev
mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000265 s, 0.000264 s, -0.000001 s, ? 0.9962, 33 ?s/rev
mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000381 s, 0.000187 s, -0.000194 s, ? 0.4908, 20 ?s/rev
mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000672 s, 0.000665 s, -0.000007 s, ? 0.9896, 95 ?s/rev
mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003497 s, 0.003556 s, +0.000059 s, ? 1.0169, 1185 ?s/rev
mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.073204 s, 0.071345 s, -0.001859 s, ? 0.9746, 11890 ?s/rev
mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006482 s, 0.006551 s, +0.000069 s, ? 1.0106, 4 ?s/rev
mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005066 s, 0.005078 s, +0.000012 s, ? 1.0024, 123 ?s/rev
mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.065707 s, 0.065823 s, +0.000116 s, ? 1.0018, 8 ?s/rev
mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026800 s, 0.027050 s, +0.000250 s, ? 1.0093, 43 ?s/rev
mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203856 s, 0.202443 s, -0.001413 s, ? 0.9931, 6 ?s/rev
mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.293394 s, 1.261583 s, -0.031811 s, ? 0.9754, 8 ?s/rev
mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 1.698239 s, 1.643869 s, -0.054370 s, ? 0.9680, 8 ?s/rev
mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000875 s, 0.000868 s, -0.000007 s, ? 0.9920, 434 ?s/rev
mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000891 s, 0.000887 s, -0.000004 s, ? 0.9955, 443 ?s/rev
mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000292 s, 0.000168 s, -0.000124 s, ? 0.5753, 42 ?s/rev
mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.003939 s, 0.001160 s, -0.002779 s, ? 0.2945, 580 ?s/rev
mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033027 s, 0.033016 s, -0.000011 s, ? 0.9997, 33016 ?s/rev
mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.073703 s, 0.073312 s, -0.39ae31 s, ? 0.9947, 12218 ?s/rev
mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006469 s, 0.006485 s, +0.000016 s, ? 1.0025, 4 ?s/rev
mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005278 s, 0.005494 s, +0.000216 s, ? 1.0409, 134 ?s/rev
mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.064995 s, 0.064879 s, -0.000116 s, ? 0.9982, 9 ?s/rev
mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.301041 s, 0.301469 s, +0.000428 s, ? 1.0014, 7 ?s/rev
mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.285575 s, 0.297113 s, +0.011538 s, ? 1.0404, 7 ?s/rev
mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.085597 s, 0.085890 s, +0.000293 s, ? 1.0034, 9 ?s/rev
mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.027118 s, 0.027718 s, +0.000600 s, ? 1.0221, 45 ?s/rev
mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 2.119204 s, 2.048949 s, -0.070255 s, ? 0.9668, 21 ?s/rev
mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.701479 s, 0.685924 s, -0.015555 s, ? 0.9778, 13 ?s/rev
mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.482399 s, 4.482891 s, +0.000492 s, ? 1.0001, 12 ?s/rev
mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.574082 s, 0.577633 s, +0.003551 s, ? 1.0062, 16 ?s/rev
mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.480366 s, 4.397816 s, -0.082550 s, ? 0.9816, 12 ?s/rev
mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.369070 s, 4.370538 s, +0.001468 s, ? 1.0003, 12 ?s/rev
mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 1.592506 s, 1.570439 s, -0.022067 s, ? 0.9861, 8 ?s/rev
mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 87.824489 s, 88.388512 s, +0.564023 s, ? 1.0064, 386 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 43.304637 s, 34.443661 s, -8.860976 s, ? 0.7954, 90 ?s/rev
private : 459513 revs, 33.853687 s, 27.370148 s, -6.483539 s, ? 0.8085, 59 ?s/rev
Differential Revision: https://phab.mercurial-scm.org/D9653
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 16 Dec 2020 11:11:05 +0100 |
parents | d6d57bfc1a1b |
children | 8fcf07e6bbb4 |
line wrap: on
line source
use crate::utils::hg_path::HgPath; use crate::utils::hg_path::HgPathBuf; use crate::Revision; use crate::NULL_REVISION; use im_rc::ordmap::DiffItem; use im_rc::ordmap::Entry; use im_rc::ordmap::OrdMap; use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; use std::convert::TryInto; pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>; type PathToken = usize; #[derive(Clone, Debug)] struct CopySource { /// revision at which the copy information was added rev: Revision, /// the copy source, (Set to None in case of deletion of the associated /// key) path: Option<PathToken>, /// a set of previous `CopySource.rev` value directly or indirectly /// overwritten by this one. overwritten: HashSet<Revision>, } impl CopySource { /// create a new CopySource /// /// Use this when no previous copy source existed. fn new(rev: Revision, path: Option<PathToken>) -> Self { Self { rev, path, overwritten: HashSet::new(), } } /// create a new CopySource from merging two others /// /// Use this when merging two InternalPathCopies requires active merging of /// some entries. fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self { let mut overwritten = HashSet::new(); overwritten.extend(winner.overwritten.iter().copied()); overwritten.extend(loser.overwritten.iter().copied()); overwritten.insert(winner.rev); overwritten.insert(loser.rev); Self { rev, path: winner.path, overwritten: overwritten, } } /// Update the value of a pre-existing CopySource /// /// Use this when recording copy information from parent → child edges fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) { self.overwritten.insert(self.rev); self.rev = rev; self.path = path; } /// Mark pre-existing copy information as "dropped" by a file deletion /// /// Use this when recording copy information from parent → child edges fn mark_delete(&mut self, rev: Revision) { self.overwritten.insert(self.rev); self.rev = rev; self.path = None; } /// Mark pre-existing copy information as "dropped" by a file deletion /// /// Use this when recording copy information from parent → child edges fn mark_delete_with_pair(&mut self, rev: Revision, other: &Self) { self.overwritten.insert(self.rev); if other.rev != rev { self.overwritten.insert(other.rev); } self.overwritten.extend(other.overwritten.iter().copied()); self.rev = rev; self.path = None; } fn is_overwritten_by(&self, other: &Self) -> bool { other.overwritten.contains(&self.rev) } } // For the same "dest", content generated for a given revision will always be // the same. impl PartialEq for CopySource { fn eq(&self, other: &Self) -> bool { #[cfg(debug_assertions)] { if self.rev == other.rev { debug_assert!(self.path == other.path); debug_assert!(self.overwritten == other.overwritten); } } self.rev == other.rev } } /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation) type InternalPathCopies = OrdMap<PathToken, CopySource>; /// hold parent 1, parent 2 and relevant files actions. pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>); /// represent the files affected by a changesets /// /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need /// all the data categories tracked by it. /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need /// all the data categories tracked by it. pub struct ChangedFiles<'a> { nb_items: u32, index: &'a [u8], data: &'a [u8], } /// Represent active changes that affect the copy tracing. enum Action<'a> { /// The parent ? children edge is removing a file /// /// (actually, this could be the edge from the other parent, but it does /// not matters) Removed(&'a HgPath), /// The parent ? children edge introduce copy information between (dest, /// source) CopiedFromP1(&'a HgPath, &'a HgPath), CopiedFromP2(&'a HgPath, &'a HgPath), } /// This express the possible "special" case we can get in a merge /// /// See mercurial/metadata.py for details on these values. #[derive(PartialEq)] enum MergeCase { /// Merged: file had history on both side that needed to be merged Merged, /// Salvaged: file was candidate for deletion, but survived the merge Salvaged, /// Normal: Not one of the two cases above Normal, } type FileChange<'a> = (u8, &'a HgPath, &'a HgPath); const EMPTY: &[u8] = b""; const COPY_MASK: u8 = 3; const P1_COPY: u8 = 2; const P2_COPY: u8 = 3; const ACTION_MASK: u8 = 28; const REMOVED: u8 = 12; const MERGED: u8 = 8; const SALVAGED: u8 = 16; impl<'a> ChangedFiles<'a> { const INDEX_START: usize = 4; const ENTRY_SIZE: u32 = 9; const FILENAME_START: u32 = 1; const COPY_SOURCE_START: u32 = 5; pub fn new(data: &'a [u8]) -> Self { assert!( data.len() >= 4, "data size ({}) is too small to contain the header (4)", data.len() ); let nb_items_raw: [u8; 4] = (&data[0..=3]) .try_into() .expect("failed to turn 4 bytes into 4 bytes"); let nb_items = u32::from_be_bytes(nb_items_raw); let index_size = (nb_items * Self::ENTRY_SIZE) as usize; let index_end = Self::INDEX_START + index_size; assert!( data.len() >= index_end, "data size ({}) is too small to fit the index_data ({})", data.len(), index_end ); let ret = ChangedFiles { nb_items, index: &data[Self::INDEX_START..index_end], data: &data[index_end..], }; let max_data = ret.filename_end(nb_items - 1) as usize; assert!( ret.data.len() >= max_data, "data size ({}) is too small to fit all data ({})", data.len(), index_end + max_data ); ret } pub fn new_empty() -> Self { ChangedFiles { nb_items: 0, index: EMPTY, data: EMPTY, } } /// internal function to return an individual entry at a given index fn entry(&'a self, idx: u32) -> FileChange<'a> { if idx >= self.nb_items { panic!( "index for entry is higher that the number of file {} >= {}", idx, self.nb_items ) } let flags = self.flags(idx); let filename = self.filename(idx); let copy_idx = self.copy_idx(idx); let copy_source = self.filename(copy_idx); (flags, filename, copy_source) } /// internal function to return the filename of the entry at a given index fn filename(&self, idx: u32) -> &HgPath { let filename_start; if idx == 0 { filename_start = 0; } else { filename_start = self.filename_end(idx - 1) } let filename_end = self.filename_end(idx); let filename_start = filename_start as usize; let filename_end = filename_end as usize; HgPath::new(&self.data[filename_start..filename_end]) } /// internal function to return the flag field of the entry at a given /// index fn flags(&self, idx: u32) -> u8 { let idx = idx as usize; self.index[idx * (Self::ENTRY_SIZE as usize)] } /// internal function to return the end of a filename part at a given index fn filename_end(&self, idx: u32) -> u32 { let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START; let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START; let start = start as usize; let end = end as usize; let raw = (&self.index[start..end]) .try_into() .expect("failed to turn 4 bytes into 4 bytes"); u32::from_be_bytes(raw) } /// internal function to return index of the copy source of the entry at a /// given index fn copy_idx(&self, idx: u32) -> u32 { let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START; let end = (idx + 1) * Self::ENTRY_SIZE; let start = start as usize; let end = end as usize; let raw = (&self.index[start..end]) .try_into() .expect("failed to turn 4 bytes into 4 bytes"); u32::from_be_bytes(raw) } /// Return an iterator over all the `Action` in this instance. fn iter_actions(&self) -> ActionsIterator { ActionsIterator { changes: &self, current: 0, } } /// return the MergeCase value associated with a filename fn get_merge_case(&self, path: &HgPath) -> MergeCase { if self.nb_items == 0 { return MergeCase::Normal; } let mut low_part = 0; let mut high_part = self.nb_items; while low_part < high_part { let cursor = (low_part + high_part - 1) / 2; let (flags, filename, _source) = self.entry(cursor); match path.cmp(filename) { Ordering::Less => low_part = cursor + 1, Ordering::Greater => high_part = cursor, Ordering::Equal => { return match flags & ACTION_MASK { MERGED => MergeCase::Merged, SALVAGED => MergeCase::Salvaged, _ => MergeCase::Normal, }; } } } MergeCase::Normal } } struct ActionsIterator<'a> { changes: &'a ChangedFiles<'a>, current: u32, } impl<'a> Iterator for ActionsIterator<'a> { type Item = Action<'a>; fn next(&mut self) -> Option<Action<'a>> { while self.current < self.changes.nb_items { let (flags, file, source) = self.changes.entry(self.current); self.current += 1; if (flags & ACTION_MASK) == REMOVED { return Some(Action::Removed(file)); } let copy = flags & COPY_MASK; if copy == P1_COPY { return Some(Action::CopiedFromP1(file, source)); } else if copy == P2_COPY { return Some(Action::CopiedFromP2(file, source)); } } return None; } } /// A small struct whose purpose is to ensure lifetime of bytes referenced in /// ChangedFiles /// /// It is passed to the RevInfoMaker callback who can assign any necessary /// content to the `data` attribute. The copy tracing code is responsible for /// keeping the DataHolder alive at least as long as the ChangedFiles object. pub struct DataHolder<D> { /// RevInfoMaker callback should assign data referenced by the /// ChangedFiles struct it return to this attribute. The DataHolder /// lifetime will be at least as long as the ChangedFiles one. pub data: Option<D>, } pub type RevInfoMaker<'a, D> = Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>; /// A small "tokenizer" responsible of turning full HgPath into lighter /// PathToken /// /// Dealing with small object, like integer is much faster, so HgPath input are /// turned into integer "PathToken" and converted back in the end. #[derive(Clone, Debug, Default)] struct TwoWayPathMap { token: HashMap<HgPathBuf, PathToken>, path: Vec<HgPathBuf>, } impl TwoWayPathMap { fn tokenize(&mut self, path: &HgPath) -> PathToken { match self.token.get(path) { Some(a) => *a, None => { let a = self.token.len(); let buf = path.to_owned(); self.path.push(buf.clone()); self.token.insert(buf, a); a } } } fn untokenize(&self, token: PathToken) -> &HgPathBuf { assert!(token < self.path.len(), format!("Unknown token: {}", token)); &self.path[token] } } /// Same as mercurial.copies._combine_changeset_copies, but in Rust. /// /// Arguments are: /// /// revs: all revisions to be considered /// children: a {parent ? [childrens]} mapping /// target_rev: the final revision we are combining copies to /// rev_info(rev): callback to get revision information: /// * first parent /// * second parent /// * ChangedFiles /// isancestors(low_rev, high_rev): callback to check if a revision is an /// ancestor of another pub fn combine_changeset_copies<D>( revs: Vec<Revision>, mut children_count: HashMap<Revision, usize>, target_rev: Revision, rev_info: RevInfoMaker<D>, ) -> PathCopies { let mut all_copies = HashMap::new(); let mut path_map = TwoWayPathMap::default(); for rev in revs { let mut d: DataHolder<D> = DataHolder { data: None }; let (p1, p2, changes) = rev_info(rev, &mut d); // We will chain the copies information accumulated for the parent with // the individual copies information the curent revision. Creating a // new TimeStampedPath for each `rev` → `children` vertex. // Retrieve data computed in a previous iteration let p1_copies = match p1 { NULL_REVISION => None, _ => get_and_clean_parent_copies( &mut all_copies, &mut children_count, p1, ), // will be None if the vertex is not to be traversed }; let p2_copies = match p2 { NULL_REVISION => None, _ => get_and_clean_parent_copies( &mut all_copies, &mut children_count, p2, ), // will be None if the vertex is not to be traversed }; // combine it with data for that revision let (p1_copies, p2_copies) = chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev); let copies = match (p1_copies, p2_copies) { (None, None) => None, (c, None) => c, (None, c) => c, (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict( &path_map, rev, p2_copies, p1_copies, &changes, )), }; if let Some(c) = copies { all_copies.insert(rev, c); } } // Drop internal information (like the timestamp) and return the final // mapping. let tt_result = all_copies .remove(&target_rev) .expect("target revision was not processed"); let mut result = PathCopies::default(); for (dest, tt_source) in tt_result { if let Some(path) = tt_source.path { let path_dest = path_map.untokenize(dest).to_owned(); let path_path = path_map.untokenize(path).to_owned(); result.insert(path_dest, path_path); } } result } /// fetch previous computed information /// /// If no other children are expected to need this information, we drop it from /// the cache. /// /// If parent is not part of the set we are expected to walk, return None. fn get_and_clean_parent_copies( all_copies: &mut HashMap<Revision, InternalPathCopies>, children_count: &mut HashMap<Revision, usize>, parent_rev: Revision, ) -> Option<InternalPathCopies> { let count = children_count.get_mut(&parent_rev)?; *count -= 1; if *count == 0 { match all_copies.remove(&parent_rev) { Some(c) => Some(c), None => Some(InternalPathCopies::default()), } } else { match all_copies.get(&parent_rev) { Some(c) => Some(c.clone()), None => Some(InternalPathCopies::default()), } } } /// Combine ChangedFiles with some existing PathCopies information and return /// the result fn chain_changes( path_map: &mut TwoWayPathMap, base_p1_copies: Option<InternalPathCopies>, base_p2_copies: Option<InternalPathCopies>, changes: &ChangedFiles, current_rev: Revision, ) -> (Option<InternalPathCopies>, Option<InternalPathCopies>) { // Fast path the "nothing to do" case. if let (None, None) = (&base_p1_copies, &base_p2_copies) { return (None, None); } let mut p1_copies = base_p1_copies.clone(); let mut p2_copies = base_p2_copies.clone(); for action in changes.iter_actions() { match action { Action::CopiedFromP1(path_dest, path_source) => { match &mut p1_copies { None => (), // This is not a vertex we should proceed. Some(copies) => add_one_copy( current_rev, path_map, copies, base_p1_copies.as_ref().unwrap(), path_dest, path_source, ), } } Action::CopiedFromP2(path_dest, path_source) => { match &mut p2_copies { None => (), // This is not a vertex we should proceed. Some(copies) => add_one_copy( current_rev, path_map, copies, base_p2_copies.as_ref().unwrap(), path_dest, path_source, ), } } Action::Removed(deleted_path) => { // We must drop copy information for removed file. // // We need to explicitly record them as dropped to // propagate this information when merging two // InternalPathCopies object. let deleted = path_map.tokenize(deleted_path); let p1_entry = match &mut p1_copies { None => None, Some(copies) => match copies.entry(deleted) { Entry::Occupied(e) => Some(e), Entry::Vacant(_) => None, }, }; let p2_entry = match &mut p2_copies { None => None, Some(copies) => match copies.entry(deleted) { Entry::Occupied(e) => Some(e), Entry::Vacant(_) => None, }, }; match (p1_entry, p2_entry) { (None, None) => (), (Some(mut e), None) => { e.get_mut().mark_delete(current_rev) } (None, Some(mut e)) => { e.get_mut().mark_delete(current_rev) } (Some(mut e1), Some(mut e2)) => { let cs1 = e1.get_mut(); let cs2 = e2.get(); cs1.mark_delete_with_pair(current_rev, &cs2); e2.insert(cs1.clone()); } } } } } (p1_copies, p2_copies) } // insert one new copy information in an InternalPathCopies // // This deal with chaining and overwrite. fn add_one_copy( current_rev: Revision, path_map: &mut TwoWayPathMap, copies: &mut InternalPathCopies, base_copies: &InternalPathCopies, path_dest: &HgPath, path_source: &HgPath, ) { let dest = path_map.tokenize(path_dest); let source = path_map.tokenize(path_source); let entry; if let Some(v) = base_copies.get(&source) { entry = match &v.path { Some(path) => Some((*(path)).to_owned()), None => Some(source.to_owned()), } } else { entry = Some(source.to_owned()); } // Each new entry is introduced by the children, we // record this information as we will need it to take // the right decision when merging conflicting copy // information. See merge_copies_dict for details. match copies.entry(dest) { Entry::Vacant(slot) => { let ttpc = CopySource::new(current_rev, entry); slot.insert(ttpc); } Entry::Occupied(mut slot) => { let ttpc = slot.get_mut(); ttpc.overwrite(current_rev, entry); } } } /// merge two copies-mapping together, minor and major /// /// In case of conflict, value from "major" will be picked, unless in some /// cases. See inline documentation for details. fn merge_copies_dict( path_map: &TwoWayPathMap, current_merge: Revision, mut minor: InternalPathCopies, mut major: InternalPathCopies, changes: &ChangedFiles, ) -> InternalPathCopies { // This closure exist as temporary help while multiple developper are // actively working on this code. Feel free to re-inline it once this // code is more settled. let cmp_value = |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| { compare_value( path_map, current_merge, changes, dest, src_minor, src_major, ) }; if minor.is_empty() { major } else if major.is_empty() { minor } else if minor.len() * 2 < major.len() { // Lets says we are merging two InternalPathCopies instance A and B. // // If A contains N items, the merge result will never contains more // than N values differents than the one in A // // If B contains M items, with M > N, the merge result will always // result in a minimum of M - N value differents than the on in // A // // As a result, if N < (M-N), we know that simply iterating over A will // yield less difference than iterating over the difference // between A and B. // // This help performance a lot in case were a tiny // InternalPathCopies is merged with a much larger one. for (dest, src_minor) in minor { let src_major = major.get(&dest); match src_major { None => { major.insert(dest, src_minor); } Some(src_major) => { let (pick, overwrite) = cmp_value(&dest, &src_minor, src_major); if overwrite { let src = match pick { MergePick::Major => CopySource::new_from_merge( current_merge, src_major, &src_minor, ), MergePick::Minor => CopySource::new_from_merge( current_merge, &src_minor, src_major, ), MergePick::Any => CopySource::new_from_merge( current_merge, src_major, &src_minor, ), }; major.insert(dest, src); } else { match pick { MergePick::Any | MergePick::Major => None, MergePick::Minor => major.insert(dest, src_minor), }; } } }; } major } else if major.len() * 2 < minor.len() { // This use the same rational than the previous block. // (Check previous block documentation for details.) for (dest, src_major) in major { let src_minor = minor.get(&dest); match src_minor { None => { minor.insert(dest, src_major); } Some(src_minor) => { let (pick, overwrite) = cmp_value(&dest, src_minor, &src_major); if overwrite { let src = match pick { MergePick::Major => CopySource::new_from_merge( current_merge, &src_major, src_minor, ), MergePick::Minor => CopySource::new_from_merge( current_merge, src_minor, &src_major, ), MergePick::Any => CopySource::new_from_merge( current_merge, &src_major, src_minor, ), }; minor.insert(dest, src); } else { match pick { MergePick::Any | MergePick::Minor => None, MergePick::Major => minor.insert(dest, src_major), }; } } }; } minor } else { let mut override_minor = Vec::new(); let mut override_major = Vec::new(); let mut to_major = |k: &PathToken, v: &CopySource| { override_major.push((k.clone(), v.clone())) }; let mut to_minor = |k: &PathToken, v: &CopySource| { override_minor.push((k.clone(), v.clone())) }; // The diff function leverage detection of the identical subpart if // minor and major has some common ancestors. This make it very // fast is most case. // // In case where the two map are vastly different in size, the current // approach is still slowish because the iteration will iterate over // all the "exclusive" content of the larger on. This situation can be // frequent when the subgraph of revision we are processing has a lot // of roots. Each roots adding they own fully new map to the mix (and // likely a small map, if the path from the root to the "main path" is // small. // // We could do better by detecting such situation and processing them // differently. for d in minor.diff(&major) { match d { DiffItem::Add(k, v) => to_minor(k, v), DiffItem::Remove(k, v) => to_major(k, v), DiffItem::Update { old, new } => { let (dest, src_major) = new; let (_, src_minor) = old; let (pick, overwrite) = cmp_value(dest, src_minor, src_major); if overwrite { let src = match pick { MergePick::Major => CopySource::new_from_merge( current_merge, src_major, src_minor, ), MergePick::Minor => CopySource::new_from_merge( current_merge, src_minor, src_major, ), MergePick::Any => CopySource::new_from_merge( current_merge, src_major, src_minor, ), }; to_minor(dest, &src); to_major(dest, &src); } else { match pick { MergePick::Major => to_minor(dest, src_major), MergePick::Minor => to_major(dest, src_minor), // If the two entry are identical, no need to do // anything (but diff should not have yield them) MergePick::Any => unreachable!(), } } } }; } let updates; let mut result; if override_major.is_empty() { result = major } else if override_minor.is_empty() { result = minor } else { if override_minor.len() < override_major.len() { updates = override_minor; result = minor; } else { updates = override_major; result = major; } for (k, v) in updates { result.insert(k, v); } } result } } /// represent the side that should prevail when merging two /// InternalPathCopies enum MergePick { /// The "major" (p1) side prevails Major, /// The "minor" (p2) side prevails Minor, /// Any side could be used (because they are the same) Any, } /// decide which side prevails in case of conflicting values #[allow(clippy::if_same_then_else)] fn compare_value( path_map: &TwoWayPathMap, current_merge: Revision, changes: &ChangedFiles, dest: &PathToken, src_minor: &CopySource, src_major: &CopySource, ) -> (MergePick, bool) { if src_major.rev == current_merge { if src_minor.rev == current_merge { if src_major.path.is_none() { // We cannot get different copy information for both p1 and p2 // from the same revision. Unless this was a // deletion. // // However the deletion might come over different data on each // branch. let need_over = src_major.overwritten != src_minor.overwritten; (MergePick::Any, need_over) } else { unreachable!(); } } else { // The last value comes the current merge, this value -will- win // eventually. (MergePick::Major, true) } } else if src_minor.rev == current_merge { // The last value comes the current merge, this value -will- win // eventually. (MergePick::Minor, true) } else if src_major.path == src_minor.path { // we have the same value, but from other source; if src_major.rev == src_minor.rev { // If the two entry are identical, they are both valid debug_assert!(src_minor.overwritten == src_minor.overwritten); (MergePick::Any, false) } else if src_major.is_overwritten_by(src_minor) { (MergePick::Minor, false) } else if src_minor.is_overwritten_by(src_major) { (MergePick::Major, false) } else { (MergePick::Any, true) } } else if src_major.rev == src_minor.rev { // We cannot get copy information for both p1 and p2 in the // same rev. So this is the same value. unreachable!( "conflicting information from p1 and p2 in the same revision" ); } else { let dest_path = path_map.untokenize(*dest); let action = changes.get_merge_case(dest_path); if src_minor.path.is_some() && src_major.path.is_none() && action == MergeCase::Salvaged { // If the file is "deleted" in the major side but was // salvaged by the merge, we keep the minor side alive (MergePick::Minor, true) } else if src_major.path.is_some() && src_minor.path.is_none() && action == MergeCase::Salvaged { // If the file is "deleted" in the minor side but was // salvaged by the merge, unconditionnaly preserve the // major side. (MergePick::Major, true) } else if src_minor.is_overwritten_by(src_major) { // The information from the minor version are strictly older than // the major version if action == MergeCase::Merged { // If the file was actively merged, its means some non-copy // activity happened on the other branch. It // mean the older copy information are still relevant. // // The major side wins such conflict. (MergePick::Major, true) } else { // No activity on the minor branch, pick the newer one. (MergePick::Major, false) } } else if src_major.is_overwritten_by(src_minor) { if action == MergeCase::Merged { // If the file was actively merged, its means some non-copy // activity happened on the other branch. It // mean the older copy information are still relevant. // // The major side wins such conflict. (MergePick::Major, true) } else { // No activity on the minor branch, pick the newer one. (MergePick::Minor, false) } } else if src_minor.path.is_none() { // the minor side has no relevant information, pick the alive one (MergePick::Major, true) } else if src_major.path.is_none() { // the major side has no relevant information, pick the alive one (MergePick::Minor, true) } else { // by default the major side wins (MergePick::Major, true) } } }