view rust/hg-core/src/discovery.rs @ 46155:fce2f20a54ce

copies-rust: start recording overwrite as they happens If a revision has information overwriting data from another revision, the overwriting revision is a descendant of the overwritten one. So we could warm the Oracle cache with such information to avoid potential future `is_ancestors` call. This provide us with a large speedup in the most expensive cases: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Full comparison below: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, ? 1.0000, 42 ?s/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000109 s, -0.000005 s, ? 0.9561, 18 ?s/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004934 s, 0.004953 s, +0.000019 s, ? 1.0039, 4 ?s/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000195 s, 0.000237 s, +0.000042 s, ? 1.2154, 26 ?s/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000113 s, 0.000113 s, +0.000000 s, ? 1.0000, 16 ?s/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.6f1f4a s, 0.6f1f4a s, +0.000000 s, ? 1.0000, 322 ?s/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010788 s, 0.010702 s, -0.000086 s, ? 0.9920, 1783 ?s/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050880 s, 0.050504 s, -0.000376 s, ? 0.9926, 10 ?s/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.081760 s, 0.080159 s, -0.001601 s, ? 0.9804, 11 ?s/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.061382 s, 0.060058 s, -0.001324 s, ? 0.9784, 11 ?s/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.585802 s, 0.536950 s, -0.048852 s, ? 0.9166, 12 ?s/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012803 s, 0.012868 s, +0.000065 s, ? 1.0051, 6434 ?s/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.113558 s, 0.112806 s, -0.000752 s, ? 0.9934, 9 ?s/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000085 s, 0.000084 s, -0.000001 s, ? 0.9882, 42 ?s/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000106 s, 0.000106 s, +0.000000 s, ? 1.0000, 53 ?s/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000174 s, -0.000001 s, ? 0.9943, 58 ?s/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000721 s, 0.000726 s, +0.000005 s, ? 1.0069, 80 ?s/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010127 s, 0.010105 s, -0.000022 s, ? 0.9978, 7 ?s/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015616 s, 0.015748 s, +0.000132 s, ? 1.0085, 10 ?s/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.061341 s, 0.060357 s, -0.000984 s, ? 0.9840, 10 ?s/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.542214 s, 0.499356 s, -0.042858 s, ? 0.9210, 7 ?s/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000092 s, +0.000003 s, ? 1.0337, 46 ?s/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000279 s, 0.000279 s, +0.000000 s, ? 1.0000, 34 ?s/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000184 s, 0.000186 s, +0.000002 s, ? 1.0109, 20 ?s/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000661 s, 0.000660 s, -0.000001 s, ? 0.9985, 94 ?s/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003377 s, 0.003372 s, -0.000005 s, ? 0.9985, 1124 ?s/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.070508 s, 0.070294 s, -0.000214 s, ? 0.9970, 11715 ?s/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006576 s, 0.006545 s, -0.000031 s, ? 0.9953, 4 ?s/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004809 s, 0.004998 s, +0.000189 s, ? 1.0393, 121 ?s/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.064872 s, 0.063348 s, -0.001524 s, ? 0.9765, 8 ?s/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026142 s, 0.026154 s, +0.000012 s, ? 1.0005, 42 ?s/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203956 s, 0.199063 s, -0.004893 s, ? 0.9760, 6 ?s/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.763853 s, 1.277320 s, -0.486533 s, ? 0.7242, 8 ?s/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.609761 s, 1.698794 s, -0.910967 s, ? 0.6509, 8 ?s/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000847 s, 0.000842 s, -0.000005 s, ? 0.9941, 421 ?s/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000867 s, 0.000865 s, -0.000002 s, ? 0.9977, 432 ?s/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000160 s, -0.000001 s, ? 0.9938, 40 ?s/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001131 s, 0.001122 s, -0.000009 s, ? 0.9920, 561 ?s/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033114 s, 0.032743 s, -0.000371 s, ? 0.9888, 32743 ?s/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.071092 s, 0.071529 s, +0.000437 s, ? 1.0061, 11921 ?s/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006554 s, 0.006593 s, +0.000039 s, ? 1.0060, 4 ?s/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005160 s, 0.005311 s, +0.000151 s, ? 1.0293, 129 ?s/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.065063 s, 0.063063 s, -0.002000 s, ? 0.9693, 9 ?s/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.297118 s, 0.312363 s, +0.015245 s, ? 1.0513, 7 ?s/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.284002 s, 0.283106 s, -0.000896 s, ? 0.9968, 7 ?s/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.086311 s, 0.083817 s, -0.002494 s, ? 0.9711, 9 ?s/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026738 s, 0.026516 s, -0.000222 s, ? 0.9917, 43 ?s/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 1.514270 s, 1.304865 s, -0.209405 s, ? 0.8617, 13 ?s/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.735875 s, 0.681088 s, -0.054787 s, ? 0.9255, 13 ?s/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.843329 s, 4.454320 s, -0.389009 s, ? 0.9197, 12 ?s/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.591752 s, 0.567913 s, -0.023839 s, ? 0.9597, 16 ?s/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.760563 s, 4.547043 s, -0.213520 s, ? 0.9551, 12 ?s/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.751942 s, 4.378579 s, -0.373363 s, ? 0.9214, 12 ?s/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.605014 s, 1.703622 s, -0.901392 s, ? 0.6540, 8 ?s/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Differential Revision: https://phab.mercurial-scm.org/D9497
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 21 Nov 2020 17:00:32 +0100
parents 26114bd6ec60
children e98fd81bb151
line wrap: on
line source

// discovery.rs
//
// Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

//! Discovery operations
//!
//! This is a Rust counterpart to the `partialdiscovery` class of
//! `mercurial.setdiscovery`

use super::{Graph, GraphError, Revision, NULL_REVISION};
use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
use rand::seq::SliceRandom;
use rand::{thread_rng, RngCore, SeedableRng};
use std::cmp::{max, min};
use std::collections::{HashSet, VecDeque};

type Rng = rand_pcg::Pcg32;
type Seed = [u8; 16];

pub struct PartialDiscovery<G: Graph + Clone> {
    target_heads: Option<Vec<Revision>>,
    graph: G, // plays the role of self._repo
    common: MissingAncestors<G>,
    undecided: Option<HashSet<Revision>>,
    children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
    missing: HashSet<Revision>,
    rng: Rng,
    respect_size: bool,
    randomize: bool,
}

pub struct DiscoveryStats {
    pub undecided: Option<usize>,
}

/// Update an existing sample to match the expected size
///
/// The sample is updated with revisions exponentially distant from each
/// element of `heads`.
///
/// If a target size is specified, the sampling will stop once this size is
/// reached. Otherwise sampling will happen until roots of the <revs> set are
/// reached.
///
/// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
///   represented by `parentfn`
/// - `heads`: set of DAG head revs
/// - `sample`: a sample to update
/// - `parentfn`: a callable to resolve parents for a revision
/// - `quicksamplesize`: optional target size of the sample
fn update_sample<I>(
    revs: Option<&HashSet<Revision>>,
    heads: impl IntoIterator<Item = Revision>,
    sample: &mut HashSet<Revision>,
    parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
    quicksamplesize: Option<usize>,
) -> Result<(), GraphError>
where
    I: Iterator<Item = Revision>,
{
    let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
    let mut visit: VecDeque<Revision> = heads.into_iter().collect();
    let mut factor: u32 = 1;
    let mut seen: HashSet<Revision> = HashSet::new();
    while let Some(current) = visit.pop_front() {
        if !seen.insert(current) {
            continue;
        }

        let d = *distances.entry(current).or_insert(1);
        if d > factor {
            factor *= 2;
        }
        if d == factor {
            sample.insert(current);
            if let Some(sz) = quicksamplesize {
                if sample.len() >= sz {
                    return Ok(());
                }
            }
        }
        for p in parentsfn(current)? {
            if let Some(revs) = revs {
                if !revs.contains(&p) {
                    continue;
                }
            }
            distances.entry(p).or_insert(d + 1);
            visit.push_back(p);
        }
    }
    Ok(())
}

struct ParentsIterator {
    parents: [Revision; 2],
    cur: usize,
}

impl ParentsIterator {
    fn graph_parents(
        graph: &impl Graph,
        r: Revision,
    ) -> Result<ParentsIterator, GraphError> {
        Ok(ParentsIterator {
            parents: graph.parents(r)?,
            cur: 0,
        })
    }
}

impl Iterator for ParentsIterator {
    type Item = Revision;

    fn next(&mut self) -> Option<Revision> {
        if self.cur > 1 {
            return None;
        }
        let rev = self.parents[self.cur];
        self.cur += 1;
        if rev == NULL_REVISION {
            return self.next();
        }
        Some(rev)
    }
}

impl<G: Graph + Clone> PartialDiscovery<G> {
    /// Create a PartialDiscovery object, with the intent
    /// of comparing our `::<target_heads>` revset to the contents of another
    /// repo.
    ///
    /// For now `target_heads` is passed as a vector, and will be used
    /// at the first call to `ensure_undecided()`.
    ///
    /// If we want to make the signature more flexible,
    /// we'll have to make it a type argument of `PartialDiscovery` or a trait
    /// object since we'll keep it in the meanwhile
    ///
    /// The `respect_size` boolean controls how the sampling methods
    /// will interpret the size argument requested by the caller. If it's
    /// `false`, they are allowed to produce a sample whose size is more
    /// appropriate to the situation (typically bigger).
    ///
    /// The `randomize` boolean affects sampling, and specifically how
    /// limiting or last-minute expanding is been done:
    ///
    /// If `true`, both will perform random picking from `self.undecided`.
    /// This is currently the best for actual discoveries.
    ///
    /// If `false`, a reproductible picking strategy is performed. This is
    /// useful for integration tests.
    pub fn new(
        graph: G,
        target_heads: Vec<Revision>,
        respect_size: bool,
        randomize: bool,
    ) -> Self {
        let mut seed = [0; 16];
        if randomize {
            thread_rng().fill_bytes(&mut seed);
        }
        Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
    }

    pub fn new_with_seed(
        graph: G,
        target_heads: Vec<Revision>,
        seed: Seed,
        respect_size: bool,
        randomize: bool,
    ) -> Self {
        PartialDiscovery {
            undecided: None,
            children_cache: None,
            target_heads: Some(target_heads),
            graph: graph.clone(),
            common: MissingAncestors::new(graph, vec![]),
            missing: HashSet::new(),
            rng: Rng::from_seed(seed),
            respect_size,
            randomize,
        }
    }

    /// Extract at most `size` random elements from sample and return them
    /// as a vector
    fn limit_sample(
        &mut self,
        mut sample: Vec<Revision>,
        size: usize,
    ) -> Vec<Revision> {
        if !self.randomize {
            sample.sort();
            sample.truncate(size);
            return sample;
        }
        let sample_len = sample.len();
        if sample_len <= size {
            return sample;
        }
        let rng = &mut self.rng;
        let dropped_size = sample_len - size;
        let limited_slice = if size < dropped_size {
            sample.partial_shuffle(rng, size).0
        } else {
            sample.partial_shuffle(rng, dropped_size).1
        };
        limited_slice.to_owned()
    }

    /// Register revisions known as being common
    pub fn add_common_revisions(
        &mut self,
        common: impl IntoIterator<Item = Revision>,
    ) -> Result<(), GraphError> {
        let before_len = self.common.get_bases().len();
        self.common.add_bases(common);
        if self.common.get_bases().len() == before_len {
            return Ok(());
        }
        if let Some(ref mut undecided) = self.undecided {
            self.common.remove_ancestors_from(undecided)?;
        }
        Ok(())
    }

    /// Register revisions known as being missing
    ///
    /// # Performance note
    ///
    /// Except in the most trivial case, the first call of this method has
    /// the side effect of computing `self.undecided` set for the first time,
    /// and the related caches it might need for efficiency of its internal
    /// computation. This is typically faster if more information is
    /// available in `self.common`. Therefore, for good performance, the
    /// caller should avoid calling this too early.
    pub fn add_missing_revisions(
        &mut self,
        missing: impl IntoIterator<Item = Revision>,
    ) -> Result<(), GraphError> {
        let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
        if tovisit.is_empty() {
            return Ok(());
        }
        self.ensure_children_cache()?;
        self.ensure_undecided()?; // for safety of possible future refactors
        let children = self.children_cache.as_ref().unwrap();
        let mut seen: HashSet<Revision> = HashSet::new();
        let undecided_mut = self.undecided.as_mut().unwrap();
        while let Some(rev) = tovisit.pop_front() {
            if !self.missing.insert(rev) {
                // either it's known to be missing from a previous
                // invocation, and there's no need to iterate on its
                // children (we now they are all missing)
                // or it's from a previous iteration of this loop
                // and its children have already been queued
                continue;
            }
            undecided_mut.remove(&rev);
            match children.get(&rev) {
                None => {
                    continue;
                }
                Some(this_children) => {
                    for child in this_children.iter().cloned() {
                        if seen.insert(child) {
                            tovisit.push_back(child);
                        }
                    }
                }
            }
        }
        Ok(())
    }

    /// Do we have any information about the peer?
    pub fn has_info(&self) -> bool {
        self.common.has_bases()
    }

    /// Did we acquire full knowledge of our Revisions that the peer has?
    pub fn is_complete(&self) -> bool {
        self.undecided.as_ref().map_or(false, HashSet::is_empty)
    }

    /// Return the heads of the currently known common set of revisions.
    ///
    /// If the discovery process is not complete (see `is_complete()`), the
    /// caller must be aware that this is an intermediate state.
    ///
    /// On the other hand, if it is complete, then this is currently
    /// the only way to retrieve the end results of the discovery process.
    ///
    /// We may introduce in the future an `into_common_heads` call that
    /// would be more appropriate for normal Rust callers, dropping `self`
    /// if it is complete.
    pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
        self.common.bases_heads()
    }

    /// Force first computation of `self.undecided`
    ///
    /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
    /// unwrapped to get workable immutable or mutable references without
    /// any panic.
    ///
    /// This is an imperative call instead of an access with added lazyness
    /// to reduce easily the scope of mutable borrow for the caller,
    /// compared to undecided(&'a mut self) -> &'a… that would keep it
    /// as long as the resulting immutable one.
    fn ensure_undecided(&mut self) -> Result<(), GraphError> {
        if self.undecided.is_some() {
            return Ok(());
        }
        let tgt = self.target_heads.take().unwrap();
        self.undecided =
            Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
        Ok(())
    }

    fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
        if self.children_cache.is_some() {
            return Ok(());
        }
        self.ensure_undecided()?;

        let mut children: FastHashMap<Revision, Vec<Revision>> =
            FastHashMap::default();
        for &rev in self.undecided.as_ref().unwrap() {
            for p in ParentsIterator::graph_parents(&self.graph, rev)? {
                children.entry(p).or_insert_with(Vec::new).push(rev);
            }
        }
        self.children_cache = Some(children);
        Ok(())
    }

    /// Provide statistics about the current state of the discovery process
    pub fn stats(&self) -> DiscoveryStats {
        DiscoveryStats {
            undecided: self.undecided.as_ref().map(HashSet::len),
        }
    }

    pub fn take_quick_sample(
        &mut self,
        headrevs: impl IntoIterator<Item = Revision>,
        size: usize,
    ) -> Result<Vec<Revision>, GraphError> {
        self.ensure_undecided()?;
        let mut sample = {
            let undecided = self.undecided.as_ref().unwrap();
            if undecided.len() <= size {
                return Ok(undecided.iter().cloned().collect());
            }
            dagops::heads(&self.graph, undecided.iter())?
        };
        if sample.len() >= size {
            return Ok(self.limit_sample(sample.into_iter().collect(), size));
        }
        update_sample(
            None,
            headrevs,
            &mut sample,
            |r| ParentsIterator::graph_parents(&self.graph, r),
            Some(size),
        )?;
        Ok(sample.into_iter().collect())
    }

    /// Extract a sample from `self.undecided`, going from its heads and roots.
    ///
    /// The `size` parameter is used to avoid useless computations if
    /// it turns out to be bigger than the whole set of undecided Revisions.
    ///
    /// The sample is taken by using `update_sample` from the heads, then
    /// from the roots, working on the reverse DAG,
    /// expressed by `self.children_cache`.
    ///
    /// No effort is being made to complete or limit the sample to `size`
    /// but this method returns another interesting size that it derives
    /// from its knowledge of the structure of the various sets, leaving
    /// to the caller the decision to use it or not.
    fn bidirectional_sample(
        &mut self,
        size: usize,
    ) -> Result<(HashSet<Revision>, usize), GraphError> {
        self.ensure_undecided()?;
        {
            // we don't want to compute children_cache before this
            // but doing it after extracting self.undecided takes a mutable
            // ref to self while a shareable one is still active.
            let undecided = self.undecided.as_ref().unwrap();
            if undecided.len() <= size {
                return Ok((undecided.clone(), size));
            }
        }

        self.ensure_children_cache()?;
        let revs = self.undecided.as_ref().unwrap();
        let mut sample: HashSet<Revision> = revs.clone();

        // it's possible that leveraging the children cache would be more
        // efficient here
        dagops::retain_heads(&self.graph, &mut sample)?;
        let revsheads = sample.clone(); // was again heads(revs) in python

        // update from heads
        update_sample(
            Some(revs),
            revsheads.iter().cloned(),
            &mut sample,
            |r| ParentsIterator::graph_parents(&self.graph, r),
            None,
        )?;

        // update from roots
        let revroots: HashSet<Revision> =
            dagops::roots(&self.graph, revs)?.into_iter().collect();
        let prescribed_size = max(size, min(revroots.len(), revsheads.len()));

        let children = self.children_cache.as_ref().unwrap();
        let empty_vec: Vec<Revision> = Vec::new();
        update_sample(
            Some(revs),
            revroots,
            &mut sample,
            |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
            None,
        )?;
        Ok((sample, prescribed_size))
    }

    /// Fill up sample up to the wished size with random undecided Revisions.
    ///
    /// This is intended to be used as a last resort completion if the
    /// regular sampling algorithm returns too few elements.
    fn random_complete_sample(
        &mut self,
        sample: &mut Vec<Revision>,
        size: usize,
    ) {
        let sample_len = sample.len();
        if size <= sample_len {
            return;
        }
        let take_from: Vec<Revision> = self
            .undecided
            .as_ref()
            .unwrap()
            .iter()
            .filter(|&r| !sample.contains(r))
            .cloned()
            .collect();
        sample.extend(self.limit_sample(take_from, size - sample_len));
    }

    pub fn take_full_sample(
        &mut self,
        size: usize,
    ) -> Result<Vec<Revision>, GraphError> {
        let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
        let size = if self.respect_size {
            size
        } else {
            prescribed_size
        };
        let mut sample =
            self.limit_sample(sample_set.into_iter().collect(), size);
        self.random_complete_sample(&mut sample, size);
        Ok(sample)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::testing::SampleGraph;

    /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
    ///
    /// To avoid actual randomness in these tests, we give it a fixed
    /// random seed, but by default we'll test the random version.
    fn full_disco() -> PartialDiscovery<SampleGraph> {
        PartialDiscovery::new_with_seed(
            SampleGraph,
            vec![10, 11, 12, 13],
            [0; 16],
            true,
            true,
        )
    }

    /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
    ///
    /// To avoid actual randomness in tests, we give it a fixed random seed.
    fn disco12() -> PartialDiscovery<SampleGraph> {
        PartialDiscovery::new_with_seed(
            SampleGraph,
            vec![12],
            [0; 16],
            true,
            true,
        )
    }

    fn sorted_undecided(
        disco: &PartialDiscovery<SampleGraph>,
    ) -> Vec<Revision> {
        let mut as_vec: Vec<Revision> =
            disco.undecided.as_ref().unwrap().iter().cloned().collect();
        as_vec.sort();
        as_vec
    }

    fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
        let mut as_vec: Vec<Revision> =
            disco.missing.iter().cloned().collect();
        as_vec.sort();
        as_vec
    }

    fn sorted_common_heads(
        disco: &PartialDiscovery<SampleGraph>,
    ) -> Result<Vec<Revision>, GraphError> {
        let mut as_vec: Vec<Revision> =
            disco.common_heads()?.iter().cloned().collect();
        as_vec.sort();
        Ok(as_vec)
    }

    #[test]
    fn test_add_common_get_undecided() -> Result<(), GraphError> {
        let mut disco = full_disco();
        assert_eq!(disco.undecided, None);
        assert!(!disco.has_info());
        assert_eq!(disco.stats().undecided, None);

        disco.add_common_revisions(vec![11, 12])?;
        assert!(disco.has_info());
        assert!(!disco.is_complete());
        assert!(disco.missing.is_empty());

        // add_common_revisions did not trigger a premature computation
        // of `undecided`, let's check that and ask for them
        assert_eq!(disco.undecided, None);
        disco.ensure_undecided()?;
        assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
        assert_eq!(disco.stats().undecided, Some(4));
        Ok(())
    }

    /// in this test, we pretend that our peer misses exactly (8+10)::
    /// and we're comparing all our repo to it (as in a bare push)
    #[test]
    fn test_discovery() -> Result<(), GraphError> {
        let mut disco = full_disco();
        disco.add_common_revisions(vec![11, 12])?;
        disco.add_missing_revisions(vec![8, 10])?;
        assert_eq!(sorted_undecided(&disco), vec![5]);
        assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
        assert!(!disco.is_complete());

        disco.add_common_revisions(vec![5])?;
        assert_eq!(sorted_undecided(&disco), vec![]);
        assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
        assert!(disco.is_complete());
        assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
        Ok(())
    }

    #[test]
    fn test_add_missing_early_continue() -> Result<(), GraphError> {
        eprintln!("test_add_missing_early_stop");
        let mut disco = full_disco();
        disco.add_common_revisions(vec![13, 3, 4])?;
        disco.ensure_children_cache()?;
        // 12 is grand-child of 6 through 9
        // passing them in this order maximizes the chances of the
        // early continue to do the wrong thing
        disco.add_missing_revisions(vec![6, 9, 12])?;
        assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
        assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
        assert!(!disco.is_complete());
        Ok(())
    }

    #[test]
    fn test_limit_sample_no_need_to() {
        let sample = vec![1, 2, 3, 4];
        assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
    }

    #[test]
    fn test_limit_sample_less_than_half() {
        assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
    }

    #[test]
    fn test_limit_sample_more_than_half() {
        assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
    }

    #[test]
    fn test_limit_sample_no_random() {
        let mut disco = full_disco();
        disco.randomize = false;
        assert_eq!(
            disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
            vec![1, 3, 5, 7]
        );
    }

    #[test]
    fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
        let mut disco = full_disco();
        disco.undecided = Some((1..=13).collect());

        let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
        sample_vec.sort();
        assert_eq!(sample_vec, vec![10, 11, 12, 13]);
        Ok(())
    }

    #[test]
    fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
        let mut disco = disco12();
        disco.ensure_undecided()?;

        let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
        sample_vec.sort();
        // r12's only parent is r9, whose unique grand-parent through the
        // diamond shape is r4. This ends there because the distance from r4
        // to the root is only 3.
        assert_eq!(sample_vec, vec![4, 9, 12]);
        Ok(())
    }

    #[test]
    fn test_children_cache() -> Result<(), GraphError> {
        let mut disco = full_disco();
        disco.ensure_children_cache()?;

        let cache = disco.children_cache.unwrap();
        assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
        assert_eq!(cache.get(&10).cloned(), None);

        let mut children_4 = cache.get(&4).cloned().unwrap();
        children_4.sort();
        assert_eq!(children_4, vec![5, 6, 7]);

        let mut children_7 = cache.get(&7).cloned().unwrap();
        children_7.sort();
        assert_eq!(children_7, vec![9, 11]);

        Ok(())
    }

    #[test]
    fn test_complete_sample() {
        let mut disco = full_disco();
        let undecided: HashSet<Revision> =
            [4, 7, 9, 2, 3].iter().cloned().collect();
        disco.undecided = Some(undecided);

        let mut sample = vec![0];
        disco.random_complete_sample(&mut sample, 3);
        assert_eq!(sample.len(), 3);

        let mut sample = vec![2, 4, 7];
        disco.random_complete_sample(&mut sample, 1);
        assert_eq!(sample.len(), 3);
    }

    #[test]
    fn test_bidirectional_sample() -> Result<(), GraphError> {
        let mut disco = full_disco();
        disco.undecided = Some((0..=13).into_iter().collect());

        let (sample_set, size) = disco.bidirectional_sample(7)?;
        assert_eq!(size, 7);
        let mut sample: Vec<Revision> = sample_set.into_iter().collect();
        sample.sort();
        // our DAG is a bit too small for the results to be really interesting
        // at least it shows that
        // - we went both ways
        // - we didn't take all Revisions (6 is not in the sample)
        assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
        Ok(())
    }
}