Mercurial > public > mercurial-scm > hg-stable
comparison rust/hg-core/src/filepatterns.rs @ 44891:ad1ec40975aa
rust-regex: fix issues with regex anchoring and performance
It turns out that the way I tried to work around `regex`'s behavior difference
with `re2` and Python's `re` was 1) buggy and 2) much more complicated than
needed.
In a few words:
`regex` adds `.*` on either side of patterns when no start or end anchor is
present. My previous workaround put `^` or `$` for every pattern, which is
wrong even without the other 2 bugs on top of it.
Using `^(?:<patterns>)` right at the end of the `regex` path fixes the issue.
I've opened an issue to get a build option instead:
https://github.com/rust-lang/regex/issues/675
Differential Revision: https://phab.mercurial-scm.org/D8506
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Thu, 07 May 2020 23:52:08 +0200 |
parents | e0414fcd35e0 |
children | 1e9bfeaec9ba |
comparison
equal
deleted
inserted
replaced
44890:6d3768b11241 | 44891:ad1ec40975aa |
---|---|
174 } = entry; | 174 } = entry; |
175 if pattern.is_empty() { | 175 if pattern.is_empty() { |
176 return vec![]; | 176 return vec![]; |
177 } | 177 } |
178 match syntax { | 178 match syntax { |
179 // The `regex` crate adds `.*` to the start and end of expressions | 179 PatternSyntax::Regexp => pattern.to_owned(), |
180 // if there are no anchors, so add them. | |
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(), | |
182 PatternSyntax::RelRegexp => { | 180 PatternSyntax::RelRegexp => { |
183 // The `regex` crate accepts `**` while `re2` and Python's `re` | 181 // The `regex` crate accepts `**` while `re2` and Python's `re` |
184 // do not. Checking for `*` correctly triggers the same error all | 182 // do not. Checking for `*` correctly triggers the same error all |
185 // engines. | 183 // engines. |
186 if pattern[0] == b'^' || pattern[0] == b'*' { | 184 if pattern[0] == b'^' || pattern[0] == b'*' { |
194 } | 192 } |
195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat() | 193 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat() |
196 } | 194 } |
197 PatternSyntax::RootFiles => { | 195 PatternSyntax::RootFiles => { |
198 let mut res = if pattern == b"." { | 196 let mut res = if pattern == b"." { |
199 vec![b'^'] | 197 vec![] |
200 } else { | 198 } else { |
201 // Pattern is a directory name. | 199 // Pattern is a directory name. |
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat() | 200 [escape_pattern(pattern).as_slice(), b"/"].concat() |
203 }; | 201 }; |
204 | 202 |
205 // Anything after the pattern must be a non-directory. | 203 // Anything after the pattern must be a non-directory. |
206 res.extend(b"[^/]+$"); | 204 res.extend(b"[^/]+$"); |
207 res.push(b'$'); | |
208 res | 205 res |
209 } | 206 } |
210 PatternSyntax::RelGlob => { | 207 PatternSyntax::RelGlob => { |
211 let glob_re = glob_to_re(pattern); | 208 let glob_re = glob_to_re(pattern); |
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { | 209 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { |
214 } else { | 211 } else { |
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() | 212 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() |
216 } | 213 } |
217 } | 214 } |
218 PatternSyntax::Glob | PatternSyntax::RootGlob => { | 215 PatternSyntax::Glob | PatternSyntax::RootGlob => { |
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() | 216 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() |
220 } | 217 } |
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), | 218 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), |
222 } | 219 } |
223 } | 220 } |
224 | 221 |
652 PatternSyntax::RootGlob, | 649 PatternSyntax::RootGlob, |
653 b"*.o", | 650 b"*.o", |
654 Path::new("") | 651 Path::new("") |
655 )) | 652 )) |
656 .unwrap(), | 653 .unwrap(), |
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()), | 654 Some(br"[^/]*\.o(?:/|$)".to_vec()), |
658 ); | 655 ); |
659 } | 656 } |
660 } | 657 } |