Mercurial > public > mercurial-scm > hg
diff rust/hg-core/src/filepatterns.rs @ 44802:e0414fcd35e0
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex
This optimization yields some very interesting results in `rootglob`-heavy
repositories.
I build a test repository of the following structure:
```
root
/<uuid>/build/empty_file
... repeat for 4000 entries
```
and a `.hgignore` containing the corresponding 4000 `rootglob` entries pointing
to all `build/` folders.
Rust+c `hg status` goes from 350ms down to 110ms.
Differential Revision: https://phab.mercurial-scm.org/D8491
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Wed, 06 May 2020 11:17:27 +0200 |
parents | 496868f1030c |
children | ad1ec40975aa |
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs Wed Apr 15 16:43:05 2020 -0400 +++ b/rust/hg-core/src/filepatterns.rs Wed May 06 11:17:27 2020 +0200 @@ -271,7 +271,7 @@ /// that don't need to be transformed into a regex. pub fn build_single_regex( entry: &IgnorePattern, -) -> Result<Vec<u8>, PatternError> { +) -> Result<Option<Vec<u8>>, PatternError> { let IgnorePattern { pattern, syntax, .. } = entry; @@ -288,16 +288,11 @@ if *syntax == PatternSyntax::RootGlob && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) { - // The `regex` crate adds `.*` to the start and end of expressions - // if there are no anchors, so add the start anchor. - let mut escaped = vec![b'^']; - escaped.extend(escape_pattern(&pattern)); - escaped.extend(GLOB_SUFFIX); - Ok(escaped) + Ok(None) } else { let mut entry = entry.clone(); entry.pattern = pattern; - Ok(_build_single_regex(&entry)) + Ok(Some(_build_single_regex(&entry))) } } @@ -628,7 +623,7 @@ Path::new("") )) .unwrap(), - br"(?:.*/)?rust/target(?:/|$)".to_vec(), + Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), ); } @@ -641,7 +636,7 @@ Path::new("") )) .unwrap(), - br"^\.(?:/|$)".to_vec(), + None, ); assert_eq!( build_single_regex(&IgnorePattern::new( @@ -650,7 +645,7 @@ Path::new("") )) .unwrap(), - br"^whatever(?:/|$)".to_vec(), + None, ); assert_eq!( build_single_regex(&IgnorePattern::new( @@ -659,7 +654,7 @@ Path::new("") )) .unwrap(), - br"^[^/]*\.o(?:/|$)".to_vec(), + Some(br"^[^/]*\.o(?:/|$)".to_vec()), ); } }