diff rust/hg-core/src/filepatterns.rs @ 44891:ad1ec40975aa

rust-regex: fix issues with regex anchoring and performance It turns out that the way I tried to work around `regex`'s behavior difference with `re2` and Python's `re` was 1) buggy and 2) much more complicated than needed. In a few words: `regex` adds `.*` on either side of patterns when no start or end anchor is present. My previous workaround put `^` or `$` for every pattern, which is wrong even without the other 2 bugs on top of it. Using `^(?:<patterns>)` right at the end of the `regex` path fixes the issue. I've opened an issue to get a build option instead: https://github.com/rust-lang/regex/issues/675 Differential Revision: https://phab.mercurial-scm.org/D8506
author Rapha?l Gom?s <rgomes@octobus.net>
date Thu, 07 May 2020 23:52:08 +0200
parents e0414fcd35e0
children 1e9bfeaec9ba
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs	Thu May 07 16:56:03 2020 -0400
+++ b/rust/hg-core/src/filepatterns.rs	Thu May 07 23:52:08 2020 +0200
@@ -176,9 +176,7 @@
         return vec![];
     }
     match syntax {
-        // The `regex` crate adds `.*` to the start and end of expressions
-        // if there are no anchors, so add them.
-        PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
+        PatternSyntax::Regexp => pattern.to_owned(),
         PatternSyntax::RelRegexp => {
             // The `regex` crate accepts `**` while `re2` and Python's `re`
             // do not. Checking for `*` correctly triggers the same error all
@@ -196,15 +194,14 @@
         }
         PatternSyntax::RootFiles => {
             let mut res = if pattern == b"." {
-                vec![b'^']
+                vec![]
             } else {
                 // Pattern is a directory name.
-                [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
+                [escape_pattern(pattern).as_slice(), b"/"].concat()
             };
 
             // Anything after the pattern must be a non-directory.
             res.extend(b"[^/]+$");
-            res.push(b'$');
             res
         }
         PatternSyntax::RelGlob => {
@@ -216,7 +213,7 @@
             }
         }
         PatternSyntax::Glob | PatternSyntax::RootGlob => {
-            [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
+            [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
         }
         PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
     }
@@ -654,7 +651,7 @@
                 Path::new("")
             ))
             .unwrap(),
-            Some(br"^[^/]*\.o(?:/|$)".to_vec()),
+            Some(br"[^/]*\.o(?:/|$)".to_vec()),
         );
     }
 }