update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
// debug.rs
//
// Copyright 2024 Mercurial developers
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Module for updating a repository.
use cpython::{PyDict, PyModule, PyObject, PyResult, Python};
use hg::{
progress::{HgProgressBar, Progress},
update::update_from_null,
BaseRevision,
};
use crate::{
exceptions::FallbackError,
utils::{hgerror_to_pyerr, repo_from_path},
};
pub fn update_from_null_fast_path(
py: Python,
repo_path: PyObject,
to: BaseRevision,
) -> PyResult<usize> {
log::trace!("Using update from null fastpath");
let repo = repo_from_path(py, repo_path)?;
let progress: &dyn Progress = &HgProgressBar::new("updating");
hgerror_to_pyerr(py, update_from_null(&repo, to.into(), progress))
}
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let dotted_name = &format!("{}.update", package);
let m = PyModule::new(py, dotted_name)?;
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "Rust module for updating a repository")?;
m.add(py, "FallbackError", py.get_type::<FallbackError>())?;
m.add(
py,
"update_from_null",
py_fn!(
py,
update_from_null_fast_path(repo_path: PyObject, to: BaseRevision,)
),
)?;
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
sys_modules.set_item(py, dotted_name, &m)?;
Ok(m)
}