Mercurial > public > mercurial-scm > hg
diff rust/hg-core/src/utils.rs @ 46586:435d9fc72646
copies-rust: extract generic map merge logic from merge_copies_dict
This deduplicates the copy-tracing-specific logic
Differential Revision: https://phab.mercurial-scm.org/D9682
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 23 Dec 2020 11:48:16 +0100 |
parents | a25033eb43b5 |
children | 755c31a1caf9 |
line wrap: on
line diff
--- a/rust/hg-core/src/utils.rs Mon Dec 21 12:34:59 2020 +0100 +++ b/rust/hg-core/src/utils.rs Wed Dec 23 11:48:16 2020 +0100 @@ -9,6 +9,8 @@ use crate::errors::{HgError, IoErrorContext}; use crate::utils::hg_path::HgPath; +use im_rc::ordmap::DiffItem; +use im_rc::ordmap::OrdMap; use std::{io::Write, ops::Deref}; pub mod files; @@ -199,3 +201,151 @@ context: IoErrorContext::CurrentExe, }) } + +pub(crate) enum MergeResult<V> { + UseLeftValue, + UseRightValue, + UseNewValue(V), +} + +/// Return the union of the two given maps, +/// calling `merge(key, left_value, right_value)` to resolve keys that exist in +/// both. +/// +/// CC https://github.com/bodil/im-rs/issues/166 +pub(crate) fn ordmap_union_with_merge<K, V>( + left: OrdMap<K, V>, + right: OrdMap<K, V>, + mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, +) -> OrdMap<K, V> +where + K: Clone + Ord, + V: Clone + PartialEq, +{ + if left.ptr_eq(&right) { + // One of the two maps is an unmodified clone of the other + left + } else if left.len() / 2 > right.len() { + // When two maps have different sizes, + // their size difference is a lower bound on + // how many keys of the larger map are not also in the smaller map. + // This in turn is a lower bound on the number of differences in + // `OrdMap::diff` and the "amount of work" that would be done + // by `ordmap_union_with_merge_by_diff`. + // + // Here `left` is more than twice the size of `right`, + // so the number of differences is more than the total size of + // `right`. Therefore an algorithm based on iterating `right` + // is more efficient. + // + // This helps a lot when a tiny (or empty) map is merged + // with a large one. + ordmap_union_with_merge_by_iter(left, right, merge) + } else if left.len() < right.len() / 2 { + // Same as above but with `left` and `right` swapped + ordmap_union_with_merge_by_iter(right, left, |key, a, b| { + // Also swapped in `merge` arguments: + match merge(key, b, a) { + MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v), + // … and swap back in `merge` result: + MergeResult::UseLeftValue => MergeResult::UseRightValue, + MergeResult::UseRightValue => MergeResult::UseLeftValue, + } + }) + } else { + // For maps of similar size, use the algorithm based on `OrdMap::diff` + ordmap_union_with_merge_by_diff(left, right, merge) + } +} + +/// Efficient if `right` is much smaller than `left` +fn ordmap_union_with_merge_by_iter<K, V>( + mut left: OrdMap<K, V>, + right: OrdMap<K, V>, + mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, +) -> OrdMap<K, V> +where + K: Clone + Ord, + V: Clone, +{ + for (key, right_value) in right { + match left.get(&key) { + None => { + left.insert(key, right_value); + } + Some(left_value) => match merge(&key, left_value, &right_value) { + MergeResult::UseLeftValue => {} + MergeResult::UseRightValue => { + left.insert(key, right_value); + } + MergeResult::UseNewValue(new_value) => { + left.insert(key, new_value); + } + }, + } + } + left +} + +/// Fallback when both maps are of similar size +fn ordmap_union_with_merge_by_diff<K, V>( + mut left: OrdMap<K, V>, + mut right: OrdMap<K, V>, + mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, +) -> OrdMap<K, V> +where + K: Clone + Ord, + V: Clone + PartialEq, +{ + // (key, value) pairs that would need to be inserted in either map + // in order to turn it into the union. + // + // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, + // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` + // with `left_updates` only borrowing from `right` and `right_updates` from + // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`. + // + // This would allow moving all `.clone()` calls to after we’ve decided + // which of `right_updates` or `left_updates` to use + // (value ones becoming `Cow::into_owned`), + // and avoid making clones we don’t end up using. + let mut left_updates = Vec::new(); + let mut right_updates = Vec::new(); + + for difference in left.diff(&right) { + match difference { + DiffItem::Add(key, value) => { + left_updates.push((key.clone(), value.clone())) + } + DiffItem::Remove(key, value) => { + right_updates.push((key.clone(), value.clone())) + } + DiffItem::Update { + old: (key, left_value), + new: (_, right_value), + } => match merge(key, left_value, right_value) { + MergeResult::UseLeftValue => { + right_updates.push((key.clone(), left_value.clone())) + } + MergeResult::UseRightValue => { + left_updates.push((key.clone(), right_value.clone())) + } + MergeResult::UseNewValue(new_value) => { + left_updates.push((key.clone(), new_value.clone())); + right_updates.push((key.clone(), new_value)) + } + }, + } + } + if left_updates.len() < right_updates.len() { + for (key, value) in left_updates { + left.insert(key, value); + } + left + } else { + for (key, value) in right_updates { + right.insert(key, value); + } + right + } +}