diff rust/hg-core/src/matchers.rs @ 44802:e0414fcd35e0

rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex This optimization yields some very interesting results in `rootglob`-heavy repositories. I build a test repository of the following structure: ``` root /<uuid>/build/empty_file ... repeat for 4000 entries ``` and a `.hgignore` containing the corresponding 4000 `rootglob` entries pointing to all `build/` folders. Rust+c `hg status` goes from 350ms down to 110ms. Differential Revision: https://phab.mercurial-scm.org/D8491
author Rapha?l Gom?s <rgomes@octobus.net>
date Wed, 06 May 2020 11:17:27 +0200
parents 83c97c0bd319
children de0fb4463a3d
line wrap: on
line diff
--- a/rust/hg-core/src/matchers.rs	Wed Apr 15 16:43:05 2020 -0400
+++ b/rust/hg-core/src/matchers.rs	Wed May 06 11:17:27 2020 +0200
@@ -24,6 +24,7 @@
     PatternSyntax,
 };
 
+use crate::filepatterns::normalize_path_bytes;
 use std::borrow::ToOwned;
 use std::collections::HashSet;
 use std::fmt::{Display, Error, Formatter};
@@ -373,15 +374,32 @@
 fn build_regex_match<'a>(
     ignore_patterns: &'a [&'a IgnorePattern],
 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
-    let regexps: Result<Vec<_>, PatternError> = ignore_patterns
-        .into_iter()
-        .map(|k| build_single_regex(*k))
-        .collect();
-    let regexps = regexps?;
+    let mut regexps = vec![];
+    let mut exact_set = HashSet::new();
+
+    for pattern in ignore_patterns {
+        if let Some(re) = build_single_regex(pattern)? {
+            regexps.push(re);
+        } else {
+            let exact = normalize_path_bytes(&pattern.pattern);
+            exact_set.insert(HgPathBuf::from_bytes(&exact));
+        }
+    }
+
     let full_regex = regexps.join(&b'|');
 
-    let matcher = re_matcher(&full_regex)?;
-    let func = Box::new(move |filename: &HgPath| matcher(filename));
+    // An empty pattern would cause the regex engine to incorrectly match the
+    // (empty) root directory
+    let func = if !(regexps.is_empty()) {
+        let matcher = re_matcher(&full_regex)?;
+        let func = move |filename: &HgPath| {
+            exact_set.contains(filename) || matcher(filename)
+        };
+        Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
+    } else {
+        let func = move |filename: &HgPath| exact_set.contains(filename);
+        Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
+    };
 
     Ok((full_regex, func))
 }