comparison rust/hg-core/src/filepatterns.rs @ 44802:e0414fcd35e0

rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex This optimization yields some very interesting results in `rootglob`-heavy repositories. I build a test repository of the following structure: ``` root /<uuid>/build/empty_file ... repeat for 4000 entries ``` and a `.hgignore` containing the corresponding 4000 `rootglob` entries pointing to all `build/` folders. Rust+c `hg status` goes from 350ms down to 110ms. Differential Revision: https://phab.mercurial-scm.org/D8491
author Rapha?l Gom?s <rgomes@octobus.net>
date Wed, 06 May 2020 11:17:27 +0200
parents 496868f1030c
children ad1ec40975aa
comparison
equal deleted inserted replaced
44801:373dd22ae60e 44802:e0414fcd35e0
269 269
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs 270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 /// that don't need to be transformed into a regex. 271 /// that don't need to be transformed into a regex.
272 pub fn build_single_regex( 272 pub fn build_single_regex(
273 entry: &IgnorePattern, 273 entry: &IgnorePattern,
274 ) -> Result<Vec<u8>, PatternError> { 274 ) -> Result<Option<Vec<u8>>, PatternError> {
275 let IgnorePattern { 275 let IgnorePattern {
276 pattern, syntax, .. 276 pattern, syntax, ..
277 } = entry; 277 } = entry;
278 let pattern = match syntax { 278 let pattern = match syntax {
279 PatternSyntax::RootGlob 279 PatternSyntax::RootGlob
286 _ => pattern.to_owned(), 286 _ => pattern.to_owned(),
287 }; 287 };
288 if *syntax == PatternSyntax::RootGlob 288 if *syntax == PatternSyntax::RootGlob
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) 289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 { 290 {
291 // The `regex` crate adds `.*` to the start and end of expressions 291 Ok(None)
292 // if there are no anchors, so add the start anchor.
293 let mut escaped = vec![b'^'];
294 escaped.extend(escape_pattern(&pattern));
295 escaped.extend(GLOB_SUFFIX);
296 Ok(escaped)
297 } else { 292 } else {
298 let mut entry = entry.clone(); 293 let mut entry = entry.clone();
299 entry.pattern = pattern; 294 entry.pattern = pattern;
300 Ok(_build_single_regex(&entry)) 295 Ok(Some(_build_single_regex(&entry)))
301 } 296 }
302 } 297 }
303 298
304 lazy_static! { 299 lazy_static! {
305 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { 300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
626 PatternSyntax::RelGlob, 621 PatternSyntax::RelGlob,
627 b"rust/target/", 622 b"rust/target/",
628 Path::new("") 623 Path::new("")
629 )) 624 ))
630 .unwrap(), 625 .unwrap(),
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(), 626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
632 ); 627 );
633 } 628 }
634 629
635 #[test] 630 #[test]
636 fn test_build_single_regex_shortcut() { 631 fn test_build_single_regex_shortcut() {
639 PatternSyntax::RootGlob, 634 PatternSyntax::RootGlob,
640 b"", 635 b"",
641 Path::new("") 636 Path::new("")
642 )) 637 ))
643 .unwrap(), 638 .unwrap(),
644 br"^\.(?:/|$)".to_vec(), 639 None,
645 ); 640 );
646 assert_eq!( 641 assert_eq!(
647 build_single_regex(&IgnorePattern::new( 642 build_single_regex(&IgnorePattern::new(
648 PatternSyntax::RootGlob, 643 PatternSyntax::RootGlob,
649 b"whatever", 644 b"whatever",
650 Path::new("") 645 Path::new("")
651 )) 646 ))
652 .unwrap(), 647 .unwrap(),
653 br"^whatever(?:/|$)".to_vec(), 648 None,
654 ); 649 );
655 assert_eq!( 650 assert_eq!(
656 build_single_regex(&IgnorePattern::new( 651 build_single_regex(&IgnorePattern::new(
657 PatternSyntax::RootGlob, 652 PatternSyntax::RootGlob,
658 b"*.o", 653 b"*.o",
659 Path::new("") 654 Path::new("")
660 )) 655 ))
661 .unwrap(), 656 .unwrap(),
662 br"^[^/]*\.o(?:/|$)".to_vec(), 657 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
663 ); 658 );
664 } 659 }
665 } 660 }