comparison rust/hg-core/src/filepatterns.rs @ 44891:ad1ec40975aa

rust-regex: fix issues with regex anchoring and performance It turns out that the way I tried to work around `regex`'s behavior difference with `re2` and Python's `re` was 1) buggy and 2) much more complicated than needed. In a few words: `regex` adds `.*` on either side of patterns when no start or end anchor is present. My previous workaround put `^` or `$` for every pattern, which is wrong even without the other 2 bugs on top of it. Using `^(?:<patterns>)` right at the end of the `regex` path fixes the issue. I've opened an issue to get a build option instead: https://github.com/rust-lang/regex/issues/675 Differential Revision: https://phab.mercurial-scm.org/D8506
author Rapha?l Gom?s <rgomes@octobus.net>
date Thu, 07 May 2020 23:52:08 +0200
parents e0414fcd35e0
children 1e9bfeaec9ba
comparison
equal deleted inserted replaced
44890:6d3768b11241 44891:ad1ec40975aa
174 } = entry; 174 } = entry;
175 if pattern.is_empty() { 175 if pattern.is_empty() {
176 return vec![]; 176 return vec![];
177 } 177 }
178 match syntax { 178 match syntax {
179 // The `regex` crate adds `.*` to the start and end of expressions 179 PatternSyntax::Regexp => pattern.to_owned(),
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
182 PatternSyntax::RelRegexp => { 180 PatternSyntax::RelRegexp => {
183 // The `regex` crate accepts `**` while `re2` and Python's `re` 181 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all 182 // do not. Checking for `*` correctly triggers the same error all
185 // engines. 183 // engines.
186 if pattern[0] == b'^' || pattern[0] == b'*' { 184 if pattern[0] == b'^' || pattern[0] == b'*' {
194 } 192 }
195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat() 193 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
196 } 194 }
197 PatternSyntax::RootFiles => { 195 PatternSyntax::RootFiles => {
198 let mut res = if pattern == b"." { 196 let mut res = if pattern == b"." {
199 vec![b'^'] 197 vec![]
200 } else { 198 } else {
201 // Pattern is a directory name. 199 // Pattern is a directory name.
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat() 200 [escape_pattern(pattern).as_slice(), b"/"].concat()
203 }; 201 };
204 202
205 // Anything after the pattern must be a non-directory. 203 // Anything after the pattern must be a non-directory.
206 res.extend(b"[^/]+$"); 204 res.extend(b"[^/]+$");
207 res.push(b'$');
208 res 205 res
209 } 206 }
210 PatternSyntax::RelGlob => { 207 PatternSyntax::RelGlob => {
211 let glob_re = glob_to_re(pattern); 208 let glob_re = glob_to_re(pattern);
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { 209 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
214 } else { 211 } else {
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() 212 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 } 213 }
217 } 214 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => { 215 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() 216 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 } 217 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), 218 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 } 219 }
223 } 220 }
224 221
652 PatternSyntax::RootGlob, 649 PatternSyntax::RootGlob,
653 b"*.o", 650 b"*.o",
654 Path::new("") 651 Path::new("")
655 )) 652 ))
656 .unwrap(), 653 .unwrap(),
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()), 654 Some(br"[^/]*\.o(?:/|$)".to_vec()),
658 ); 655 );
659 } 656 }
660 } 657 }