5 // This software may be used and distributed according to the terms of the |
5 // This software may be used and distributed according to the terms of the |
6 // GNU General Public License version 2 or any later version. |
6 // GNU General Public License version 2 or any later version. |
7 |
7 |
8 //! Handling of Mercurial-specific patterns. |
8 //! Handling of Mercurial-specific patterns. |
9 |
9 |
10 use crate::{ |
10 use crate::{utils::SliceExt, FastHashMap, PatternError}; |
11 utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError, |
|
12 }; |
|
13 use lazy_static::lazy_static; |
11 use lazy_static::lazy_static; |
14 use regex::bytes::{NoExpand, Regex}; |
12 use regex::bytes::{NoExpand, Regex}; |
15 use std::fs::File; |
13 use std::fs::File; |
16 use std::io::Read; |
14 use std::io::Read; |
17 use std::path::{Path, PathBuf}; |
15 use std::path::{Path, PathBuf}; |
30 |
28 |
31 /// These are matched in order |
29 /// These are matched in order |
32 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = |
30 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = |
33 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; |
31 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; |
34 |
32 |
|
33 /// Appended to the regexp of globs |
|
34 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; |
|
35 |
35 #[derive(Debug, Copy, Clone, PartialEq, Eq)] |
36 #[derive(Debug, Copy, Clone, PartialEq, Eq)] |
36 pub enum PatternSyntax { |
37 pub enum PatternSyntax { |
|
38 /// A regular expression |
37 Regexp, |
39 Regexp, |
38 /// Glob that matches at the front of the path |
40 /// Glob that matches at the front of the path |
39 RootGlob, |
41 RootGlob, |
40 /// Glob that matches at any suffix of the path (still anchored at |
42 /// Glob that matches at any suffix of the path (still anchored at |
41 /// slashes) |
43 /// slashes) |
42 Glob, |
44 Glob, |
|
45 /// a path relative to repository root, which is matched recursively |
43 Path, |
46 Path, |
|
47 /// A path relative to cwd |
44 RelPath, |
48 RelPath, |
|
49 /// an unrooted glob (*.rs matches Rust files in all dirs) |
45 RelGlob, |
50 RelGlob, |
|
51 /// A regexp that needn't match the start of a name |
46 RelRegexp, |
52 RelRegexp, |
|
53 /// A path relative to repository root, which is matched non-recursively |
|
54 /// (will not match subdirectories) |
47 RootFiles, |
55 RootFiles, |
48 } |
56 } |
49 |
57 |
50 /// Transforms a glob pattern into a regex |
58 /// Transforms a glob pattern into a regex |
51 fn glob_to_re(pat: &[u8]) -> Vec<u8> { |
59 fn glob_to_re(pat: &[u8]) -> Vec<u8> { |
123 .iter() |
131 .iter() |
124 .flat_map(|c| RE_ESCAPE[*c as usize].clone()) |
132 .flat_map(|c| RE_ESCAPE[*c as usize].clone()) |
125 .collect() |
133 .collect() |
126 } |
134 } |
127 |
135 |
128 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> { |
136 pub fn parse_pattern_syntax( |
|
137 kind: &[u8], |
|
138 ) -> Result<PatternSyntax, PatternError> { |
129 match kind { |
139 match kind { |
130 b"re" => Ok(PatternSyntax::Regexp), |
140 b"re:" => Ok(PatternSyntax::Regexp), |
131 b"path" => Ok(PatternSyntax::Path), |
141 b"path:" => Ok(PatternSyntax::Path), |
132 b"relpath" => Ok(PatternSyntax::RelPath), |
142 b"relpath:" => Ok(PatternSyntax::RelPath), |
133 b"rootfilesin" => Ok(PatternSyntax::RootFiles), |
143 b"rootfilesin:" => Ok(PatternSyntax::RootFiles), |
134 b"relglob" => Ok(PatternSyntax::RelGlob), |
144 b"relglob:" => Ok(PatternSyntax::RelGlob), |
135 b"relre" => Ok(PatternSyntax::RelRegexp), |
145 b"relre:" => Ok(PatternSyntax::RelRegexp), |
136 b"glob" => Ok(PatternSyntax::Glob), |
146 b"glob:" => Ok(PatternSyntax::Glob), |
137 b"rootglob" => Ok(PatternSyntax::RootGlob), |
147 b"rootglob:" => Ok(PatternSyntax::RootGlob), |
138 _ => Err(PatternError::UnsupportedSyntax( |
148 _ => Err(PatternError::UnsupportedSyntax( |
139 String::from_utf8_lossy(kind).to_string(), |
149 String::from_utf8_lossy(kind).to_string(), |
140 )), |
150 )), |
141 } |
151 } |
142 } |
152 } |
143 |
153 |
144 /// Builds the regex that corresponds to the given pattern. |
154 /// Builds the regex that corresponds to the given pattern. |
145 /// If within a `syntax: regexp` context, returns the pattern, |
155 /// If within a `syntax: regexp` context, returns the pattern, |
146 /// otherwise, returns the corresponding regex. |
156 /// otherwise, returns the corresponding regex. |
147 fn _build_single_regex( |
157 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> { |
148 syntax: PatternSyntax, |
158 let IgnorePattern { |
149 pattern: &[u8], |
159 syntax, pattern, .. |
150 globsuffix: &[u8], |
160 } = entry; |
151 ) -> Vec<u8> { |
|
152 if pattern.is_empty() { |
161 if pattern.is_empty() { |
153 return vec![]; |
162 return vec![]; |
154 } |
163 } |
155 match syntax { |
164 match syntax { |
156 PatternSyntax::Regexp => pattern.to_owned(), |
165 PatternSyntax::Regexp => pattern.to_owned(), |
157 PatternSyntax::RelRegexp => { |
166 PatternSyntax::RelRegexp => { |
158 if pattern[0] == b'^' { |
167 if pattern[0] == b'^' { |
159 return pattern.to_owned(); |
168 return pattern.to_owned(); |
160 } |
169 } |
161 [b".*", pattern].concat() |
170 [&b".*"[..], pattern].concat() |
162 } |
171 } |
163 PatternSyntax::Path | PatternSyntax::RelPath => { |
172 PatternSyntax::Path | PatternSyntax::RelPath => { |
164 if pattern == b"." { |
173 if pattern == b"." { |
165 return vec![]; |
174 return vec![]; |
166 } |
175 } |
179 res |
188 res |
180 } |
189 } |
181 PatternSyntax::RelGlob => { |
190 PatternSyntax::RelGlob => { |
182 let glob_re = glob_to_re(pattern); |
191 let glob_re = glob_to_re(pattern); |
183 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { |
192 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { |
184 [b".*", rest, globsuffix].concat() |
193 [b".*", rest, GLOB_SUFFIX].concat() |
185 } else { |
194 } else { |
186 [b"(?:|.*/)", glob_re.as_slice(), globsuffix].concat() |
195 [b"(?:|.*/)", glob_re.as_slice(), GLOB_SUFFIX].concat() |
187 } |
196 } |
188 } |
197 } |
189 PatternSyntax::Glob | PatternSyntax::RootGlob => { |
198 PatternSyntax::Glob | PatternSyntax::RootGlob => { |
190 [glob_to_re(pattern).as_slice(), globsuffix].concat() |
199 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() |
191 } |
200 } |
192 } |
201 } |
193 } |
202 } |
194 |
203 |
195 const GLOB_SPECIAL_CHARACTERS: [u8; 7] = |
204 const GLOB_SPECIAL_CHARACTERS: [u8; 7] = |
196 [b'*', b'?', b'[', b']', b'{', b'}', b'\\']; |
205 [b'*', b'?', b'[', b']', b'{', b'}', b'\\']; |
|
206 |
|
207 /// TODO support other platforms |
|
208 #[cfg(unix)] |
|
209 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> { |
|
210 if bytes.is_empty() { |
|
211 return b".".to_vec(); |
|
212 } |
|
213 let sep = b'/'; |
|
214 |
|
215 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count(); |
|
216 if initial_slashes > 2 { |
|
217 // POSIX allows one or two initial slashes, but treats three or more |
|
218 // as single slash. |
|
219 initial_slashes = 1; |
|
220 } |
|
221 let components = bytes |
|
222 .split(|b| *b == sep) |
|
223 .filter(|c| !(c.is_empty() || c == b".")) |
|
224 .fold(vec![], |mut acc, component| { |
|
225 if component != b".." |
|
226 || (initial_slashes == 0 && acc.is_empty()) |
|
227 || (!acc.is_empty() && acc[acc.len() - 1] == b"..") |
|
228 { |
|
229 acc.push(component) |
|
230 } else if !acc.is_empty() { |
|
231 acc.pop(); |
|
232 } |
|
233 acc |
|
234 }); |
|
235 let mut new_bytes = components.join(&sep); |
|
236 |
|
237 if initial_slashes > 0 { |
|
238 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect(); |
|
239 buf.extend(new_bytes); |
|
240 new_bytes = buf; |
|
241 } |
|
242 if new_bytes.is_empty() { |
|
243 b".".to_vec() |
|
244 } else { |
|
245 new_bytes |
|
246 } |
|
247 } |
197 |
248 |
198 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs |
249 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs |
199 /// that don't need to be transformed into a regex. |
250 /// that don't need to be transformed into a regex. |
200 pub fn build_single_regex( |
251 pub fn build_single_regex( |
201 kind: &[u8], |
252 entry: &IgnorePattern, |
202 pat: &[u8], |
|
203 globsuffix: &[u8], |
|
204 ) -> Result<Vec<u8>, PatternError> { |
253 ) -> Result<Vec<u8>, PatternError> { |
205 let enum_kind = parse_pattern_syntax(kind)?; |
254 let IgnorePattern { |
206 if enum_kind == PatternSyntax::RootGlob |
255 pattern, syntax, .. |
207 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) |
256 } = entry; |
|
257 let pattern = match syntax { |
|
258 PatternSyntax::RootGlob |
|
259 | PatternSyntax::Path |
|
260 | PatternSyntax::RelGlob |
|
261 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern), |
|
262 _ => pattern.to_owned(), |
|
263 }; |
|
264 if *syntax == PatternSyntax::RootGlob |
|
265 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) |
208 { |
266 { |
209 let mut escaped = escape_pattern(pat); |
267 let mut escaped = escape_pattern(&pattern); |
210 escaped.extend(b"(?:/|$)"); |
268 escaped.extend(GLOB_SUFFIX); |
211 Ok(escaped) |
269 Ok(escaped) |
212 } else { |
270 } else { |
213 Ok(_build_single_regex(enum_kind, pat, globsuffix)) |
271 let mut entry = entry.clone(); |
|
272 entry.pattern = pattern; |
|
273 Ok(_build_single_regex(&entry)) |
214 } |
274 } |
215 } |
275 } |
216 |
276 |
217 lazy_static! { |
277 lazy_static! { |
218 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { |
278 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { |
220 |
280 |
221 m.insert(b"re".as_ref(), b"relre:".as_ref()); |
281 m.insert(b"re".as_ref(), b"relre:".as_ref()); |
222 m.insert(b"regexp".as_ref(), b"relre:".as_ref()); |
282 m.insert(b"regexp".as_ref(), b"relre:".as_ref()); |
223 m.insert(b"glob".as_ref(), b"relglob:".as_ref()); |
283 m.insert(b"glob".as_ref(), b"relglob:".as_ref()); |
224 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); |
284 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); |
225 m.insert(b"include".as_ref(), b"include".as_ref()); |
285 m.insert(b"include".as_ref(), b"include:".as_ref()); |
226 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref()); |
286 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref()); |
227 m |
287 m |
228 }; |
288 }; |
229 } |
289 } |
230 |
290 |
231 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>); |
291 #[derive(Debug)] |
232 type WarningTuple = (PathBuf, Vec<u8>); |
292 pub enum PatternFileWarning { |
|
293 /// (file path, syntax bytes) |
|
294 InvalidSyntax(PathBuf, Vec<u8>), |
|
295 /// File path |
|
296 NoSuchFile(PathBuf), |
|
297 } |
233 |
298 |
234 pub fn parse_pattern_file_contents<P: AsRef<Path>>( |
299 pub fn parse_pattern_file_contents<P: AsRef<Path>>( |
235 lines: &[u8], |
300 lines: &[u8], |
236 file_path: P, |
301 file_path: P, |
237 warn: bool, |
302 warn: bool, |
238 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) { |
303 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { |
239 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); |
304 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); |
240 let comment_escape_regex = Regex::new(r"\\#").unwrap(); |
305 let comment_escape_regex = Regex::new(r"\\#").unwrap(); |
241 let mut inputs: Vec<PatternTuple> = vec![]; |
306 let mut inputs: Vec<IgnorePattern> = vec![]; |
242 let mut warnings: Vec<WarningTuple> = vec![]; |
307 let mut warnings: Vec<PatternFileWarning> = vec![]; |
243 |
308 |
244 let mut current_syntax = b"relre:".as_ref(); |
309 let mut current_syntax = b"relre:".as_ref(); |
245 |
310 |
246 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { |
311 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { |
247 let line_number = line_number + 1; |
312 let line_number = line_number + 1; |
286 line = rest; |
353 line = rest; |
287 break; |
354 break; |
288 } |
355 } |
289 } |
356 } |
290 |
357 |
291 inputs.push(( |
358 inputs.push(IgnorePattern::new( |
292 [line_syntax, line].concat(), |
359 parse_pattern_syntax(&line_syntax).map_err(|e| match e { |
293 line_number, |
360 PatternError::UnsupportedSyntax(syntax) => { |
294 line.to_owned(), |
361 PatternError::UnsupportedSyntaxInFile( |
|
362 syntax, |
|
363 file_path.as_ref().to_string_lossy().into(), |
|
364 line_number, |
|
365 ) |
|
366 } |
|
367 _ => e, |
|
368 })?, |
|
369 &line, |
|
370 &file_path, |
295 )); |
371 )); |
296 } |
372 } |
297 (inputs, warnings) |
373 Ok((inputs, warnings)) |
298 } |
374 } |
299 |
375 |
300 pub fn read_pattern_file<P: AsRef<Path>>( |
376 pub fn read_pattern_file<P: AsRef<Path>>( |
301 file_path: P, |
377 file_path: P, |
302 warn: bool, |
378 warn: bool, |
303 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> { |
379 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { |
304 let mut f = File::open(file_path.as_ref())?; |
380 let mut f = match File::open(file_path.as_ref()) { |
|
381 Ok(f) => Ok(f), |
|
382 Err(e) => match e.kind() { |
|
383 std::io::ErrorKind::NotFound => { |
|
384 return Ok(( |
|
385 vec![], |
|
386 vec![PatternFileWarning::NoSuchFile( |
|
387 file_path.as_ref().to_owned(), |
|
388 )], |
|
389 )) |
|
390 } |
|
391 _ => Err(e), |
|
392 }, |
|
393 }?; |
305 let mut contents = Vec::new(); |
394 let mut contents = Vec::new(); |
306 |
395 |
307 f.read_to_end(&mut contents)?; |
396 f.read_to_end(&mut contents)?; |
308 |
397 |
309 Ok(parse_pattern_file_contents(&contents, file_path, warn)) |
398 Ok(parse_pattern_file_contents(&contents, file_path, warn)?) |
310 } |
399 } |
|
400 |
|
401 /// Represents an entry in an "ignore" file. |
|
402 #[derive(Debug, Eq, PartialEq, Clone)] |
|
403 pub struct IgnorePattern { |
|
404 pub syntax: PatternSyntax, |
|
405 pub pattern: Vec<u8>, |
|
406 pub source: PathBuf, |
|
407 } |
|
408 |
|
409 impl IgnorePattern { |
|
410 pub fn new( |
|
411 syntax: PatternSyntax, |
|
412 pattern: &[u8], |
|
413 source: impl AsRef<Path>, |
|
414 ) -> Self { |
|
415 Self { |
|
416 syntax, |
|
417 pattern: pattern.to_owned(), |
|
418 source: source.as_ref().to_owned(), |
|
419 } |
|
420 } |
|
421 } |
|
422 |
|
423 pub type PatternResult<T> = Result<T, PatternError>; |
311 |
424 |
312 #[cfg(test)] |
425 #[cfg(test)] |
313 mod tests { |
426 mod tests { |
314 use super::*; |
427 use super::*; |
|
428 use pretty_assertions::assert_eq; |
315 |
429 |
316 #[test] |
430 #[test] |
317 fn escape_pattern_test() { |
431 fn escape_pattern_test() { |
318 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#; |
432 let untouched = |
|
433 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#; |
319 assert_eq!(escape_pattern(untouched), untouched.to_vec()); |
434 assert_eq!(escape_pattern(untouched), untouched.to_vec()); |
320 // All escape codes |
435 // All escape codes |
321 assert_eq!( |
436 assert_eq!( |
322 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#), |
437 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#), |
323 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"# |
438 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"# |
340 #[test] |
455 #[test] |
341 fn test_parse_pattern_file_contents() { |
456 fn test_parse_pattern_file_contents() { |
342 let lines = b"syntax: glob\n*.elc"; |
457 let lines = b"syntax: glob\n*.elc"; |
343 |
458 |
344 assert_eq!( |
459 assert_eq!( |
345 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())], |
|
346 parse_pattern_file_contents(lines, Path::new("file_path"), false) |
460 parse_pattern_file_contents(lines, Path::new("file_path"), false) |
|
461 .unwrap() |
347 .0, |
462 .0, |
|
463 vec![IgnorePattern::new( |
|
464 PatternSyntax::RelGlob, |
|
465 b"*.elc", |
|
466 Path::new("file_path") |
|
467 )], |
348 ); |
468 ); |
349 |
469 |
350 let lines = b"syntax: include\nsyntax: glob"; |
470 let lines = b"syntax: include\nsyntax: glob"; |
351 |
471 |
352 assert_eq!( |
472 assert_eq!( |
353 parse_pattern_file_contents(lines, Path::new("file_path"), false) |
473 parse_pattern_file_contents(lines, Path::new("file_path"), false) |
|
474 .unwrap() |
354 .0, |
475 .0, |
355 vec![] |
476 vec![] |
356 ); |
477 ); |
357 let lines = b"glob:**.o"; |
478 let lines = b"glob:**.o"; |
358 assert_eq!( |
479 assert_eq!( |
359 parse_pattern_file_contents(lines, Path::new("file_path"), false) |
480 parse_pattern_file_contents(lines, Path::new("file_path"), false) |
|
481 .unwrap() |
360 .0, |
482 .0, |
361 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())] |
483 vec![IgnorePattern::new( |
|
484 PatternSyntax::RelGlob, |
|
485 b"**.o", |
|
486 Path::new("file_path") |
|
487 )] |
|
488 ); |
|
489 } |
|
490 |
|
491 #[test] |
|
492 fn test_build_single_regex() { |
|
493 assert_eq!( |
|
494 build_single_regex(&IgnorePattern::new( |
|
495 PatternSyntax::RelGlob, |
|
496 b"rust/target/", |
|
497 Path::new("") |
|
498 )) |
|
499 .unwrap(), |
|
500 br"(?:|.*/)rust/target(?:/|$)".to_vec(), |
362 ); |
501 ); |
363 } |
502 } |
364 |
503 |
365 #[test] |
504 #[test] |
366 fn test_build_single_regex_shortcut() { |
505 fn test_build_single_regex_shortcut() { |
367 assert_eq!( |
506 assert_eq!( |
368 br"(?:/|$)".to_vec(), |
507 build_single_regex(&IgnorePattern::new( |
369 build_single_regex(b"rootglob", b"", b"").unwrap() |
508 PatternSyntax::RootGlob, |
370 ); |
509 b"", |
371 assert_eq!( |
510 Path::new("") |
|
511 )) |
|
512 .unwrap(), |
|
513 br"\.(?:/|$)".to_vec(), |
|
514 ); |
|
515 assert_eq!( |
|
516 build_single_regex(&IgnorePattern::new( |
|
517 PatternSyntax::RootGlob, |
|
518 b"whatever", |
|
519 Path::new("") |
|
520 )) |
|
521 .unwrap(), |
372 br"whatever(?:/|$)".to_vec(), |
522 br"whatever(?:/|$)".to_vec(), |
373 build_single_regex(b"rootglob", b"whatever", b"").unwrap() |
523 ); |
374 ); |
524 assert_eq!( |
375 assert_eq!( |
525 build_single_regex(&IgnorePattern::new( |
376 br"[^/]*\.o".to_vec(), |
526 PatternSyntax::RootGlob, |
377 build_single_regex(b"rootglob", b"*.o", b"").unwrap() |
527 b"*.o", |
378 ); |
528 Path::new("") |
379 } |
529 )) |
380 } |
530 .unwrap(), |
|
531 br"[^/]*\.o(?:/|$)".to_vec(), |
|
532 ); |
|
533 } |
|
534 } |