update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
//! The revset query language
//!
//! <https://www.mercurial-scm.org/repo/hg/help/revsets>
use crate::errors::HgError;
use crate::repo::Repo;
use crate::revlog::NodePrefix;
use crate::revlog::{Revision, NULL_REVISION, WORKING_DIRECTORY_HEX};
use crate::revlog::{Revlog, RevlogError};
use crate::Node;
/// Resolve a query string into a single revision.
///
/// Only some of the revset language is implemented yet.
pub fn resolve_single(
input: &str,
repo: &Repo,
) -> Result<Revision, RevlogError> {
let changelog = repo.changelog()?;
match input {
"." => {
let p1 = repo.dirstate_parents()?.p1;
return changelog.revlog.rev_from_node(p1.into());
}
"null" => return Ok(NULL_REVISION),
_ => {}
}
match resolve_rev_number_or_hex_prefix(input, &changelog.revlog) {
Err(RevlogError::InvalidRevision(revision)) => {
// TODO: support for the rest of the language here.
let msg = format!("cannot parse revset '{}'", revision);
Err(HgError::unsupported(msg).into())
}
result => result,
}
}
/// Resolve the small subset of the language suitable for revlogs other than
/// the changelog, such as in `hg debugdata --manifest` CLI argument.
///
/// * A non-negative decimal integer for a revision number, or
/// * An hexadecimal string, for the unique node ID that starts with this
/// prefix
pub fn resolve_rev_number_or_hex_prefix(
input: &str,
revlog: &Revlog,
) -> Result<Revision, RevlogError> {
// The Python equivalent of this is part of `revsymbol` in
// `mercurial/scmutil.py`
if let Ok(integer) = input.parse::<i32>() {
if integer.to_string() == input
&& integer >= 0
&& revlog.has_rev(integer.into())
{
// This is fine because we've just checked that the revision is
// valid for the given revlog.
return Ok(Revision(integer));
}
}
if let Ok(prefix) = NodePrefix::from_hex(input) {
if prefix.is_prefix_of(&Node::from_hex(WORKING_DIRECTORY_HEX).unwrap())
{
return Err(RevlogError::WDirUnsupported);
}
return revlog.rev_from_node(prefix);
}
Err(RevlogError::InvalidRevision(input.to_string()))
}