Mercurial > public > mercurial-scm > hg
view rust/hg-core/src/lib.rs @ 52056:8b7123c8947b
update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Tue, 01 Oct 2024 13:49:11 +0200 |
parents | 3ae7c43ad8aa |
children | e01e84e5e426 96b113d22b34 |
line wrap: on
line source
// Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net> // and Mercurial contributors // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. mod ancestors; pub mod dagops; pub mod errors; pub mod narrow; pub mod sparse; pub use ancestors::{AncestorsIterator, MissingAncestors}; pub mod dirstate; pub mod dirstate_tree; pub mod discovery; pub mod exit_codes; pub mod requirements; pub mod testing; // unconditionally built, for use from integration tests pub use dirstate::{ dirs_multiset::{DirsMultiset, DirsMultisetIter}, status::{ BadMatch, BadType, DirstateStatus, HgPathCow, StatusError, StatusOptions, }, DirstateEntry, DirstateParents, EntryState, }; pub mod copy_tracing; pub mod filepatterns; pub mod matchers; pub mod repo; pub mod revlog; pub use revlog::*; pub mod checkexec; pub mod config; pub mod lock; pub mod logging; pub mod operations; pub mod progress; pub mod revset; pub mod update; pub mod utils; pub mod vfs; use crate::utils::hg_path::{HgPathBuf, HgPathError}; pub use filepatterns::{ parse_pattern_syntax_kind, read_pattern_file, IgnorePattern, PatternFileWarning, PatternSyntax, }; use std::collections::HashMap; use std::fmt; use twox_hash::RandomXxHashBuilder64; pub type LineNumber = usize; /// Rust's default hasher is too slow because it tries to prevent collision /// attacks. We are not concerned about those: if an ill-minded person has /// write access to your repository, you have other issues. pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>; // TODO: should this be the default `FastHashMap` for all of hg-core, not just // dirstate_tree? How does XxHash compare with AHash, hashbrown’s default? pub type FastHashbrownMap<K, V> = hashbrown::HashMap<K, V, RandomXxHashBuilder64>; #[derive(Debug, PartialEq)] pub enum DirstateMapError { PathNotFound(HgPathBuf), InvalidPath(HgPathError), } impl From<HgPathError> for DirstateMapError { fn from(error: HgPathError) -> Self { Self::InvalidPath(error) } } impl fmt::Display for DirstateMapError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DirstateMapError::PathNotFound(_) => { f.write_str("expected a value, found none") } DirstateMapError::InvalidPath(path_error) => path_error.fmt(f), } } } #[derive(Debug, derive_more::From)] pub enum DirstateError { Map(DirstateMapError), Common(errors::HgError), } impl From<HgPathError> for DirstateError { fn from(error: HgPathError) -> Self { Self::Map(DirstateMapError::InvalidPath(error)) } } impl fmt::Display for DirstateError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DirstateError::Map(error) => error.fmt(f), DirstateError::Common(error) => error.fmt(f), } } } #[derive(Debug, derive_more::From)] pub enum PatternError { #[from] Path(HgPathError), UnsupportedSyntax(String), UnsupportedSyntaxInFile(String, String, usize), TooLong(usize), #[from] IO(std::io::Error), /// Needed a pattern that can be turned into a regex but got one that /// can't. This should only happen through programmer error. NonRegexPattern(IgnorePattern), } impl fmt::Display for PatternError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { PatternError::UnsupportedSyntax(syntax) => { write!(f, "Unsupported syntax {}", syntax) } PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => { write!( f, "{}:{}: unsupported syntax {}", file_path, line, syntax ) } PatternError::TooLong(size) => { write!(f, "matcher pattern is too long ({} bytes)", size) } PatternError::IO(error) => error.fmt(f), PatternError::Path(error) => error.fmt(f), PatternError::NonRegexPattern(pattern) => { write!(f, "'{:?}' cannot be turned into a regex", pattern) } } } }