Mercurial > public > mercurial-scm > hg
comparison rust/hg-core/src/filepatterns.rs @ 44802:e0414fcd35e0
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex
This optimization yields some very interesting results in `rootglob`-heavy
repositories.
I build a test repository of the following structure:
```
root
/<uuid>/build/empty_file
... repeat for 4000 entries
```
and a `.hgignore` containing the corresponding 4000 `rootglob` entries pointing
to all `build/` folders.
Rust+c `hg status` goes from 350ms down to 110ms.
Differential Revision: https://phab.mercurial-scm.org/D8491
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Wed, 06 May 2020 11:17:27 +0200 |
parents | 496868f1030c |
children | ad1ec40975aa |
comparison
equal
deleted
inserted
replaced
44801:373dd22ae60e | 44802:e0414fcd35e0 |
---|---|
269 | 269 |
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs | 270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs |
271 /// that don't need to be transformed into a regex. | 271 /// that don't need to be transformed into a regex. |
272 pub fn build_single_regex( | 272 pub fn build_single_regex( |
273 entry: &IgnorePattern, | 273 entry: &IgnorePattern, |
274 ) -> Result<Vec<u8>, PatternError> { | 274 ) -> Result<Option<Vec<u8>>, PatternError> { |
275 let IgnorePattern { | 275 let IgnorePattern { |
276 pattern, syntax, .. | 276 pattern, syntax, .. |
277 } = entry; | 277 } = entry; |
278 let pattern = match syntax { | 278 let pattern = match syntax { |
279 PatternSyntax::RootGlob | 279 PatternSyntax::RootGlob |
286 _ => pattern.to_owned(), | 286 _ => pattern.to_owned(), |
287 }; | 287 }; |
288 if *syntax == PatternSyntax::RootGlob | 288 if *syntax == PatternSyntax::RootGlob |
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) | 289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) |
290 { | 290 { |
291 // The `regex` crate adds `.*` to the start and end of expressions | 291 Ok(None) |
292 // if there are no anchors, so add the start anchor. | |
293 let mut escaped = vec![b'^']; | |
294 escaped.extend(escape_pattern(&pattern)); | |
295 escaped.extend(GLOB_SUFFIX); | |
296 Ok(escaped) | |
297 } else { | 292 } else { |
298 let mut entry = entry.clone(); | 293 let mut entry = entry.clone(); |
299 entry.pattern = pattern; | 294 entry.pattern = pattern; |
300 Ok(_build_single_regex(&entry)) | 295 Ok(Some(_build_single_regex(&entry))) |
301 } | 296 } |
302 } | 297 } |
303 | 298 |
304 lazy_static! { | 299 lazy_static! { |
305 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { | 300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { |
626 PatternSyntax::RelGlob, | 621 PatternSyntax::RelGlob, |
627 b"rust/target/", | 622 b"rust/target/", |
628 Path::new("") | 623 Path::new("") |
629 )) | 624 )) |
630 .unwrap(), | 625 .unwrap(), |
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(), | 626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), |
632 ); | 627 ); |
633 } | 628 } |
634 | 629 |
635 #[test] | 630 #[test] |
636 fn test_build_single_regex_shortcut() { | 631 fn test_build_single_regex_shortcut() { |
639 PatternSyntax::RootGlob, | 634 PatternSyntax::RootGlob, |
640 b"", | 635 b"", |
641 Path::new("") | 636 Path::new("") |
642 )) | 637 )) |
643 .unwrap(), | 638 .unwrap(), |
644 br"^\.(?:/|$)".to_vec(), | 639 None, |
645 ); | 640 ); |
646 assert_eq!( | 641 assert_eq!( |
647 build_single_regex(&IgnorePattern::new( | 642 build_single_regex(&IgnorePattern::new( |
648 PatternSyntax::RootGlob, | 643 PatternSyntax::RootGlob, |
649 b"whatever", | 644 b"whatever", |
650 Path::new("") | 645 Path::new("") |
651 )) | 646 )) |
652 .unwrap(), | 647 .unwrap(), |
653 br"^whatever(?:/|$)".to_vec(), | 648 None, |
654 ); | 649 ); |
655 assert_eq!( | 650 assert_eq!( |
656 build_single_regex(&IgnorePattern::new( | 651 build_single_regex(&IgnorePattern::new( |
657 PatternSyntax::RootGlob, | 652 PatternSyntax::RootGlob, |
658 b"*.o", | 653 b"*.o", |
659 Path::new("") | 654 Path::new("") |
660 )) | 655 )) |
661 .unwrap(), | 656 .unwrap(), |
662 br"^[^/]*\.o(?:/|$)".to_vec(), | 657 Some(br"^[^/]*\.o(?:/|$)".to_vec()), |
663 ); | 658 ); |
664 } | 659 } |
665 } | 660 } |