Mercurial > public > mercurial-scm > hg-stable
comparison rust/hg-core/src/filepatterns.rs @ 47385:f6bb181c75f8
rust: Parse "subinclude"d files along the way, not later
When parsing a `.hgignore` file and encountering an `include:` line,
the included file is parsed recursively right then in a depth-first fashion.
With `subinclude:` however included files were parsed (recursively) much later.
This changes it to be expanded during parsing, like `.hgignore`.
The motivation for this is an upcoming changeset that needs to detect changes
in which files are ignored or not. The plan is to hash all ignore files while
they are being read, and store that hash in the dirstate (in v2 format).
In order to allow a potential alternative implementations to read that format,
the algorithm to compute that hash must be documented. Having a well-defined
depth-first ordering for the tree of (sub-)included files makes that easier.
Differential Revision: https://phab.mercurial-scm.org/D10834
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 02 Jun 2021 18:03:43 +0200 |
parents | 777c3d231913 |
children | 0ef8231e413f |
comparison
equal
deleted
inserted
replaced
47384:777c3d231913 | 47385:f6bb181c75f8 |
---|---|
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; | 39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; |
40 | 40 |
41 /// Appended to the regexp of globs | 41 /// Appended to the regexp of globs |
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; | 42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; |
43 | 43 |
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)] | 44 #[derive(Debug, Clone, PartialEq, Eq)] |
45 pub enum PatternSyntax { | 45 pub enum PatternSyntax { |
46 /// A regular expression | 46 /// A regular expression |
47 Regexp, | 47 Regexp, |
48 /// Glob that matches at the front of the path | 48 /// Glob that matches at the front of the path |
49 RootGlob, | 49 RootGlob, |
63 RootFiles, | 63 RootFiles, |
64 /// A file of patterns to read and include | 64 /// A file of patterns to read and include |
65 Include, | 65 Include, |
66 /// A file of patterns to match against files under the same directory | 66 /// A file of patterns to match against files under the same directory |
67 SubInclude, | 67 SubInclude, |
68 /// SubInclude with the result of parsing the included file | |
69 /// | |
70 /// Note: there is no ExpandedInclude because that expansion can be done | |
71 /// in place by replacing the Include pattern by the included patterns. | |
72 /// SubInclude requires more handling. | |
73 /// | |
74 /// Note: `Box` is used to minimize size impact on other enum variants | |
75 ExpandedSubInclude(Box<SubInclude>), | |
68 } | 76 } |
69 | 77 |
70 /// Transforms a glob pattern into a regex | 78 /// Transforms a glob pattern into a regex |
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> { | 79 fn glob_to_re(pat: &[u8]) -> Vec<u8> { |
72 let mut input = pat; | 80 let mut input = pat; |
216 } | 224 } |
217 } | 225 } |
218 PatternSyntax::Glob | PatternSyntax::RootGlob => { | 226 PatternSyntax::Glob | PatternSyntax::RootGlob => { |
219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() | 227 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() |
220 } | 228 } |
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), | 229 PatternSyntax::Include |
230 | PatternSyntax::SubInclude | |
231 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(), | |
222 } | 232 } |
223 } | 233 } |
224 | 234 |
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] = | 235 const GLOB_SPECIAL_CHARACTERS: [u8; 7] = |
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\']; | 236 [b'*', b'?', b'[', b']', b'{', b'}', b'\\']; |
439 } | 449 } |
440 | 450 |
441 pub type PatternResult<T> = Result<T, PatternError>; | 451 pub type PatternResult<T> = Result<T, PatternError>; |
442 | 452 |
443 /// Wrapper for `read_pattern_file` that also recursively expands `include:` | 453 /// Wrapper for `read_pattern_file` that also recursively expands `include:` |
444 /// patterns. | 454 /// and `subinclude:` patterns. |
445 /// | 455 /// |
446 /// `subinclude:` is not treated as a special pattern here: unraveling them | 456 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude` |
447 /// needs to occur in the "ignore" phase. | 457 /// is used for the latter to form a tree of patterns. |
448 pub fn get_patterns_from_file( | 458 pub fn get_patterns_from_file( |
449 pattern_file: &Path, | 459 pattern_file: &Path, |
450 root_dir: &Path, | 460 root_dir: &Path, |
451 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> { | 461 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> { |
452 let (patterns, mut warnings) = read_pattern_file(pattern_file, true)?; | 462 let (patterns, mut warnings) = read_pattern_file(pattern_file, true)?; |
453 let patterns = patterns | 463 let patterns = patterns |
454 .into_iter() | 464 .into_iter() |
455 .flat_map(|entry| -> PatternResult<_> { | 465 .flat_map(|entry| -> PatternResult<_> { |
456 let IgnorePattern { | 466 Ok(match &entry.syntax { |
457 syntax, pattern, .. | |
458 } = &entry; | |
459 Ok(match syntax { | |
460 PatternSyntax::Include => { | 467 PatternSyntax::Include => { |
461 let inner_include = | 468 let inner_include = |
462 root_dir.join(get_path_from_bytes(&pattern)); | 469 root_dir.join(get_path_from_bytes(&entry.pattern)); |
463 let (inner_pats, inner_warnings) = | 470 let (inner_pats, inner_warnings) = |
464 get_patterns_from_file(&inner_include, root_dir)?; | 471 get_patterns_from_file(&inner_include, root_dir)?; |
465 warnings.extend(inner_warnings); | 472 warnings.extend(inner_warnings); |
466 inner_pats | 473 inner_pats |
474 } | |
475 PatternSyntax::SubInclude => { | |
476 let mut sub_include = SubInclude::new( | |
477 &root_dir, | |
478 &entry.pattern, | |
479 &entry.source, | |
480 )?; | |
481 let (inner_patterns, inner_warnings) = | |
482 get_patterns_from_file( | |
483 &sub_include.path, | |
484 &sub_include.root, | |
485 )?; | |
486 sub_include.included_patterns = inner_patterns; | |
487 warnings.extend(inner_warnings); | |
488 vec![IgnorePattern { | |
489 syntax: PatternSyntax::ExpandedSubInclude(Box::new( | |
490 sub_include, | |
491 )), | |
492 ..entry | |
493 }] | |
467 } | 494 } |
468 _ => vec![entry], | 495 _ => vec![entry], |
469 }) | 496 }) |
470 }) | 497 }) |
471 .flatten() | 498 .flatten() |
473 | 500 |
474 Ok((patterns, warnings)) | 501 Ok((patterns, warnings)) |
475 } | 502 } |
476 | 503 |
477 /// Holds all the information needed to handle a `subinclude:` pattern. | 504 /// Holds all the information needed to handle a `subinclude:` pattern. |
505 #[derive(Debug, PartialEq, Eq, Clone)] | |
478 pub struct SubInclude { | 506 pub struct SubInclude { |
479 /// Will be used for repository (hg) paths that start with this prefix. | 507 /// Will be used for repository (hg) paths that start with this prefix. |
480 /// It is relative to the current working directory, so comparing against | 508 /// It is relative to the current working directory, so comparing against |
481 /// repository paths is painless. | 509 /// repository paths is painless. |
482 pub prefix: HgPathBuf, | 510 pub prefix: HgPathBuf, |
483 /// The file itself, containing the patterns | 511 /// The file itself, containing the patterns |
484 pub path: PathBuf, | 512 pub path: PathBuf, |
485 /// Folder in the filesystem where this it applies | 513 /// Folder in the filesystem where this it applies |
486 pub root: PathBuf, | 514 pub root: PathBuf, |
515 | |
516 pub included_patterns: Vec<IgnorePattern>, | |
487 } | 517 } |
488 | 518 |
489 impl SubInclude { | 519 impl SubInclude { |
490 pub fn new( | 520 pub fn new( |
491 root_dir: &Path, | 521 root_dir: &Path, |
511 } | 541 } |
512 Ok(p) | 542 Ok(p) |
513 })?, | 543 })?, |
514 path: path.to_owned(), | 544 path: path.to_owned(), |
515 root: new_root.to_owned(), | 545 root: new_root.to_owned(), |
546 included_patterns: Vec::new(), | |
516 }) | 547 }) |
517 } | 548 } |
518 } | 549 } |
519 | 550 |
520 /// Separate and pre-process subincludes from other patterns for the "ignore" | 551 /// Separate and pre-process subincludes from other patterns for the "ignore" |
521 /// phase. | 552 /// phase. |
522 pub fn filter_subincludes<'a>( | 553 pub fn filter_subincludes( |
523 ignore_patterns: &'a [IgnorePattern], | 554 ignore_patterns: Vec<IgnorePattern>, |
524 root_dir: &Path, | 555 ) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> { |
525 ) -> Result<(Vec<SubInclude>, Vec<&'a IgnorePattern>), HgPathError> { | |
526 let mut subincludes = vec![]; | 556 let mut subincludes = vec![]; |
527 let mut others = vec![]; | 557 let mut others = vec![]; |
528 | 558 |
529 for ignore_pattern in ignore_patterns.iter() { | 559 for pattern in ignore_patterns { |
530 let IgnorePattern { | 560 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax |
531 syntax, | 561 { |
532 pattern, | 562 subincludes.push(sub_include); |
533 source, | |
534 } = ignore_pattern; | |
535 if *syntax == PatternSyntax::SubInclude { | |
536 subincludes.push(SubInclude::new(root_dir, pattern, &source)?); | |
537 } else { | 563 } else { |
538 others.push(ignore_pattern) | 564 others.push(pattern) |
539 } | 565 } |
540 } | 566 } |
541 Ok((subincludes, others)) | 567 Ok((subincludes, others)) |
542 } | 568 } |
543 | 569 |