view rust/hg-core/src/dagops.rs @ 46155:fce2f20a54ce

copies-rust: start recording overwrite as they happens If a revision has information overwriting data from another revision, the overwriting revision is a descendant of the overwritten one. So we could warm the Oracle cache with such information to avoid potential future `is_ancestors` call. This provide us with a large speedup in the most expensive cases: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Full comparison below: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, ? 1.0000, 42 ?s/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000109 s, -0.000005 s, ? 0.9561, 18 ?s/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004934 s, 0.004953 s, +0.000019 s, ? 1.0039, 4 ?s/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000195 s, 0.000237 s, +0.000042 s, ? 1.2154, 26 ?s/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000113 s, 0.000113 s, +0.000000 s, ? 1.0000, 16 ?s/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.6f1f4a s, 0.6f1f4a s, +0.000000 s, ? 1.0000, 322 ?s/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010788 s, 0.010702 s, -0.000086 s, ? 0.9920, 1783 ?s/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050880 s, 0.050504 s, -0.000376 s, ? 0.9926, 10 ?s/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.081760 s, 0.080159 s, -0.001601 s, ? 0.9804, 11 ?s/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.061382 s, 0.060058 s, -0.001324 s, ? 0.9784, 11 ?s/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.585802 s, 0.536950 s, -0.048852 s, ? 0.9166, 12 ?s/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012803 s, 0.012868 s, +0.000065 s, ? 1.0051, 6434 ?s/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.113558 s, 0.112806 s, -0.000752 s, ? 0.9934, 9 ?s/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000085 s, 0.000084 s, -0.000001 s, ? 0.9882, 42 ?s/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000106 s, 0.000106 s, +0.000000 s, ? 1.0000, 53 ?s/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000174 s, -0.000001 s, ? 0.9943, 58 ?s/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000721 s, 0.000726 s, +0.000005 s, ? 1.0069, 80 ?s/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010127 s, 0.010105 s, -0.000022 s, ? 0.9978, 7 ?s/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015616 s, 0.015748 s, +0.000132 s, ? 1.0085, 10 ?s/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.061341 s, 0.060357 s, -0.000984 s, ? 0.9840, 10 ?s/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.542214 s, 0.499356 s, -0.042858 s, ? 0.9210, 7 ?s/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000092 s, +0.000003 s, ? 1.0337, 46 ?s/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000279 s, 0.000279 s, +0.000000 s, ? 1.0000, 34 ?s/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000184 s, 0.000186 s, +0.000002 s, ? 1.0109, 20 ?s/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000661 s, 0.000660 s, -0.000001 s, ? 0.9985, 94 ?s/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003377 s, 0.003372 s, -0.000005 s, ? 0.9985, 1124 ?s/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.070508 s, 0.070294 s, -0.000214 s, ? 0.9970, 11715 ?s/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006576 s, 0.006545 s, -0.000031 s, ? 0.9953, 4 ?s/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004809 s, 0.004998 s, +0.000189 s, ? 1.0393, 121 ?s/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.064872 s, 0.063348 s, -0.001524 s, ? 0.9765, 8 ?s/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026142 s, 0.026154 s, +0.000012 s, ? 1.0005, 42 ?s/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203956 s, 0.199063 s, -0.004893 s, ? 0.9760, 6 ?s/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.763853 s, 1.277320 s, -0.486533 s, ? 0.7242, 8 ?s/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.609761 s, 1.698794 s, -0.910967 s, ? 0.6509, 8 ?s/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000847 s, 0.000842 s, -0.000005 s, ? 0.9941, 421 ?s/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000867 s, 0.000865 s, -0.000002 s, ? 0.9977, 432 ?s/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000160 s, -0.000001 s, ? 0.9938, 40 ?s/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001131 s, 0.001122 s, -0.000009 s, ? 0.9920, 561 ?s/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033114 s, 0.032743 s, -0.000371 s, ? 0.9888, 32743 ?s/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.071092 s, 0.071529 s, +0.000437 s, ? 1.0061, 11921 ?s/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006554 s, 0.006593 s, +0.000039 s, ? 1.0060, 4 ?s/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005160 s, 0.005311 s, +0.000151 s, ? 1.0293, 129 ?s/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.065063 s, 0.063063 s, -0.002000 s, ? 0.9693, 9 ?s/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.297118 s, 0.312363 s, +0.015245 s, ? 1.0513, 7 ?s/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.284002 s, 0.283106 s, -0.000896 s, ? 0.9968, 7 ?s/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.086311 s, 0.083817 s, -0.002494 s, ? 0.9711, 9 ?s/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026738 s, 0.026516 s, -0.000222 s, ? 0.9917, 43 ?s/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 1.514270 s, 1.304865 s, -0.209405 s, ? 0.8617, 13 ?s/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.735875 s, 0.681088 s, -0.054787 s, ? 0.9255, 13 ?s/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.843329 s, 4.454320 s, -0.389009 s, ? 0.9197, 12 ?s/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.591752 s, 0.567913 s, -0.023839 s, ? 0.9597, 16 ?s/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.760563 s, 4.547043 s, -0.213520 s, ? 0.9551, 12 ?s/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.751942 s, 4.378579 s, -0.373363 s, ? 0.9214, 12 ?s/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.605014 s, 1.703622 s, -0.901392 s, ? 0.6540, 8 ?s/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Differential Revision: https://phab.mercurial-scm.org/D9497
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 21 Nov 2020 17:00:32 +0100
parents 26114bd6ec60
children e98fd81bb151
line wrap: on
line source

// dagops.rs
//
// Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

//! Miscellaneous DAG operations
//!
//! # Terminology
//! - By *relative heads* of a collection of revision numbers (`Revision`), we
//!   mean those revisions that have no children among the collection.
//! - Similarly *relative roots* of a collection of `Revision`, we mean those
//!   whose parents, if any, don't belong to the collection.
use super::{Graph, GraphError, Revision, NULL_REVISION};
use crate::ancestors::AncestorsIterator;
use std::collections::{BTreeSet, HashSet};

fn remove_parents<S: std::hash::BuildHasher>(
    graph: &impl Graph,
    rev: Revision,
    set: &mut HashSet<Revision, S>,
) -> Result<(), GraphError> {
    for parent in graph.parents(rev)?.iter() {
        if *parent != NULL_REVISION {
            set.remove(parent);
        }
    }
    Ok(())
}

/// Relative heads out of some revisions, passed as an iterator.
///
/// These heads are defined as those revisions that have no children
/// among those emitted by the iterator.
///
/// # Performance notes
/// Internally, this clones the iterator, and builds a `HashSet` out of it.
///
/// This function takes an `Iterator` instead of `impl IntoIterator` to
/// guarantee that cloning the iterator doesn't result in cloning the full
/// construct it comes from.
pub fn heads<'a>(
    graph: &impl Graph,
    iter_revs: impl Clone + Iterator<Item = &'a Revision>,
) -> Result<HashSet<Revision>, GraphError> {
    let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
    heads.remove(&NULL_REVISION);
    for rev in iter_revs {
        if *rev != NULL_REVISION {
            remove_parents(graph, *rev, &mut heads)?;
        }
    }
    Ok(heads)
}

/// Retain in `revs` only its relative heads.
///
/// This is an in-place operation, so that control of the incoming
/// set is left to the caller.
/// - a direct Python binding would probably need to build its own `HashSet`
///   from an incoming iterable, even if its sole purpose is to extract the
///   heads.
/// - a Rust caller can decide whether cloning beforehand is appropriate
///
/// # Performance notes
/// Internally, this function will store a full copy of `revs` in a `Vec`.
pub fn retain_heads<S: std::hash::BuildHasher>(
    graph: &impl Graph,
    revs: &mut HashSet<Revision, S>,
) -> Result<(), GraphError> {
    revs.remove(&NULL_REVISION);
    // we need to construct an iterable copy of revs to avoid itering while
    // mutating
    let as_vec: Vec<Revision> = revs.iter().cloned().collect();
    for rev in as_vec {
        if rev != NULL_REVISION {
            remove_parents(graph, rev, revs)?;
        }
    }
    Ok(())
}

/// Roots of `revs`, passed as a `HashSet`
///
/// They are returned in arbitrary order
pub fn roots<G: Graph, S: std::hash::BuildHasher>(
    graph: &G,
    revs: &HashSet<Revision, S>,
) -> Result<Vec<Revision>, GraphError> {
    let mut roots: Vec<Revision> = Vec::new();
    for rev in revs {
        if graph
            .parents(*rev)?
            .iter()
            .filter(|p| **p != NULL_REVISION)
            .all(|p| !revs.contains(p))
        {
            roots.push(*rev);
        }
    }
    Ok(roots)
}

/// Compute the topological range between two collections of revisions
///
/// This is equivalent to the revset `<roots>::<heads>`.
///
/// Currently, the given `Graph` has to implement `Clone`, which means
/// actually cloning just a reference-counted Python pointer if
/// it's passed over through `rust-cpython`. This is due to the internal
/// use of `AncestorsIterator`
///
/// # Algorithmic details
///
/// This is a two-pass swipe inspired from what `reachableroots2` from
/// `mercurial.cext.parsers` does to obtain the same results.
///
/// - first, we climb up the DAG from `heads` in topological order, keeping
///   them in the vector `heads_ancestors` vector, and adding any element of
///   `roots` we find among them to the resulting range.
/// - Then, we iterate on that recorded vector so that a revision is always
///   emitted after its parents and add all revisions whose parents are already
///   in the range to the results.
///
/// # Performance notes
///
/// The main difference with the C implementation is that
/// the latter uses a flat array with bit flags, instead of complex structures
/// like `HashSet`, making it faster in most scenarios. In theory, it's
/// possible that the present implementation could be more memory efficient
/// for very large repositories with many branches.
pub fn range(
    graph: &(impl Graph + Clone),
    roots: impl IntoIterator<Item = Revision>,
    heads: impl IntoIterator<Item = Revision>,
) -> Result<BTreeSet<Revision>, GraphError> {
    let mut range = BTreeSet::new();
    let roots: HashSet<Revision> = roots.into_iter().collect();
    let min_root: Revision = match roots.iter().cloned().min() {
        None => {
            return Ok(range);
        }
        Some(r) => r,
    };

    // Internally, AncestorsIterator currently maintains a `HashSet`
    // of all seen revision, which is also what we record, albeit in an ordered
    // way. There's room for improvement on this duplication.
    let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
    let mut heads_ancestors: Vec<Revision> = Vec::new();
    for revres in ait {
        let rev = revres?;
        if roots.contains(&rev) {
            range.insert(rev);
        }
        heads_ancestors.push(rev);
    }

    for rev in heads_ancestors.into_iter().rev() {
        for parent in graph.parents(rev)?.iter() {
            if *parent != NULL_REVISION && range.contains(parent) {
                range.insert(rev);
            }
        }
    }
    Ok(range)
}

#[cfg(test)]
mod tests {

    use super::*;
    use crate::testing::SampleGraph;

    /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
    fn retain_heads_sorted(
        graph: &impl Graph,
        revs: &[Revision],
    ) -> Result<Vec<Revision>, GraphError> {
        let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
        retain_heads(graph, &mut revs)?;
        let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
        as_vec.sort();
        Ok(as_vec)
    }

    #[test]
    fn test_retain_heads() -> Result<(), GraphError> {
        assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
        assert_eq!(
            retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
            vec![1, 6, 12]
        );
        assert_eq!(
            retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
            vec![3, 5, 8, 9]
        );
        Ok(())
    }

    /// Apply `heads()` to the given slice and return as a sorted `Vec`
    fn heads_sorted(
        graph: &impl Graph,
        revs: &[Revision],
    ) -> Result<Vec<Revision>, GraphError> {
        let heads = heads(graph, revs.iter())?;
        let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
        as_vec.sort();
        Ok(as_vec)
    }

    #[test]
    fn test_heads() -> Result<(), GraphError> {
        assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
        assert_eq!(
            heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
            vec![1, 6, 12]
        );
        assert_eq!(
            heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
            vec![3, 5, 8, 9]
        );
        Ok(())
    }

    /// Apply `roots()` and sort the result for easier comparison
    fn roots_sorted(
        graph: &impl Graph,
        revs: &[Revision],
    ) -> Result<Vec<Revision>, GraphError> {
        let set: HashSet<_> = revs.iter().cloned().collect();
        let mut as_vec = roots(graph, &set)?;
        as_vec.sort();
        Ok(as_vec)
    }

    #[test]
    fn test_roots() -> Result<(), GraphError> {
        assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
        assert_eq!(
            roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
            vec![0, 4, 12]
        );
        assert_eq!(
            roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
            vec![1, 8]
        );
        Ok(())
    }

    /// Apply `range()` and convert the result into a Vec for easier comparison
    fn range_vec(
        graph: impl Graph + Clone,
        roots: &[Revision],
        heads: &[Revision],
    ) -> Result<Vec<Revision>, GraphError> {
        range(&graph, roots.iter().cloned(), heads.iter().cloned())
            .map(|bs| bs.into_iter().collect())
    }

    #[test]
    fn test_range() -> Result<(), GraphError> {
        assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
        assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
        assert_eq!(
            range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
            vec![5, 10]
        );
        assert_eq!(
            range_vec(SampleGraph, &[5, 6], &[10, 12])?,
            vec![5, 6, 9, 10, 12]
        );
        Ok(())
    }
}