comparison rust/hg-core/src/filepatterns.rs @ 47385:f6bb181c75f8

rust: Parse "subinclude"d files along the way, not later When parsing a `.hgignore` file and encountering an `include:` line, the included file is parsed recursively right then in a depth-first fashion. With `subinclude:` however included files were parsed (recursively) much later. This changes it to be expanded during parsing, like `.hgignore`. The motivation for this is an upcoming changeset that needs to detect changes in which files are ignored or not. The plan is to hash all ignore files while they are being read, and store that hash in the dirstate (in v2 format). In order to allow a potential alternative implementations to read that format, the algorithm to compute that hash must be documented. Having a well-defined depth-first ordering for the tree of (sub-)included files makes that easier. Differential Revision: https://phab.mercurial-scm.org/D10834
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 02 Jun 2021 18:03:43 +0200
parents 777c3d231913
children 0ef8231e413f
comparison
equal deleted inserted replaced
47384:777c3d231913 47385:f6bb181c75f8
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; 39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40 40
41 /// Appended to the regexp of globs 41 /// Appended to the regexp of globs
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; 42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43 43
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)] 44 #[derive(Debug, Clone, PartialEq, Eq)]
45 pub enum PatternSyntax { 45 pub enum PatternSyntax {
46 /// A regular expression 46 /// A regular expression
47 Regexp, 47 Regexp,
48 /// Glob that matches at the front of the path 48 /// Glob that matches at the front of the path
49 RootGlob, 49 RootGlob,
63 RootFiles, 63 RootFiles,
64 /// A file of patterns to read and include 64 /// A file of patterns to read and include
65 Include, 65 Include,
66 /// A file of patterns to match against files under the same directory 66 /// A file of patterns to match against files under the same directory
67 SubInclude, 67 SubInclude,
68 /// SubInclude with the result of parsing the included file
69 ///
70 /// Note: there is no ExpandedInclude because that expansion can be done
71 /// in place by replacing the Include pattern by the included patterns.
72 /// SubInclude requires more handling.
73 ///
74 /// Note: `Box` is used to minimize size impact on other enum variants
75 ExpandedSubInclude(Box<SubInclude>),
68 } 76 }
69 77
70 /// Transforms a glob pattern into a regex 78 /// Transforms a glob pattern into a regex
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> { 79 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 let mut input = pat; 80 let mut input = pat;
216 } 224 }
217 } 225 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => { 226 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() 227 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 } 228 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), 229 PatternSyntax::Include
230 | PatternSyntax::SubInclude
231 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
222 } 232 }
223 } 233 }
224 234
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] = 235 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\']; 236 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
439 } 449 }
440 450
441 pub type PatternResult<T> = Result<T, PatternError>; 451 pub type PatternResult<T> = Result<T, PatternError>;
442 452
443 /// Wrapper for `read_pattern_file` that also recursively expands `include:` 453 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
444 /// patterns. 454 /// and `subinclude:` patterns.
445 /// 455 ///
446 /// `subinclude:` is not treated as a special pattern here: unraveling them 456 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
447 /// needs to occur in the "ignore" phase. 457 /// is used for the latter to form a tree of patterns.
448 pub fn get_patterns_from_file( 458 pub fn get_patterns_from_file(
449 pattern_file: &Path, 459 pattern_file: &Path,
450 root_dir: &Path, 460 root_dir: &Path,
451 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> { 461 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
452 let (patterns, mut warnings) = read_pattern_file(pattern_file, true)?; 462 let (patterns, mut warnings) = read_pattern_file(pattern_file, true)?;
453 let patterns = patterns 463 let patterns = patterns
454 .into_iter() 464 .into_iter()
455 .flat_map(|entry| -> PatternResult<_> { 465 .flat_map(|entry| -> PatternResult<_> {
456 let IgnorePattern { 466 Ok(match &entry.syntax {
457 syntax, pattern, ..
458 } = &entry;
459 Ok(match syntax {
460 PatternSyntax::Include => { 467 PatternSyntax::Include => {
461 let inner_include = 468 let inner_include =
462 root_dir.join(get_path_from_bytes(&pattern)); 469 root_dir.join(get_path_from_bytes(&entry.pattern));
463 let (inner_pats, inner_warnings) = 470 let (inner_pats, inner_warnings) =
464 get_patterns_from_file(&inner_include, root_dir)?; 471 get_patterns_from_file(&inner_include, root_dir)?;
465 warnings.extend(inner_warnings); 472 warnings.extend(inner_warnings);
466 inner_pats 473 inner_pats
474 }
475 PatternSyntax::SubInclude => {
476 let mut sub_include = SubInclude::new(
477 &root_dir,
478 &entry.pattern,
479 &entry.source,
480 )?;
481 let (inner_patterns, inner_warnings) =
482 get_patterns_from_file(
483 &sub_include.path,
484 &sub_include.root,
485 )?;
486 sub_include.included_patterns = inner_patterns;
487 warnings.extend(inner_warnings);
488 vec![IgnorePattern {
489 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
490 sub_include,
491 )),
492 ..entry
493 }]
467 } 494 }
468 _ => vec![entry], 495 _ => vec![entry],
469 }) 496 })
470 }) 497 })
471 .flatten() 498 .flatten()
473 500
474 Ok((patterns, warnings)) 501 Ok((patterns, warnings))
475 } 502 }
476 503
477 /// Holds all the information needed to handle a `subinclude:` pattern. 504 /// Holds all the information needed to handle a `subinclude:` pattern.
505 #[derive(Debug, PartialEq, Eq, Clone)]
478 pub struct SubInclude { 506 pub struct SubInclude {
479 /// Will be used for repository (hg) paths that start with this prefix. 507 /// Will be used for repository (hg) paths that start with this prefix.
480 /// It is relative to the current working directory, so comparing against 508 /// It is relative to the current working directory, so comparing against
481 /// repository paths is painless. 509 /// repository paths is painless.
482 pub prefix: HgPathBuf, 510 pub prefix: HgPathBuf,
483 /// The file itself, containing the patterns 511 /// The file itself, containing the patterns
484 pub path: PathBuf, 512 pub path: PathBuf,
485 /// Folder in the filesystem where this it applies 513 /// Folder in the filesystem where this it applies
486 pub root: PathBuf, 514 pub root: PathBuf,
515
516 pub included_patterns: Vec<IgnorePattern>,
487 } 517 }
488 518
489 impl SubInclude { 519 impl SubInclude {
490 pub fn new( 520 pub fn new(
491 root_dir: &Path, 521 root_dir: &Path,
511 } 541 }
512 Ok(p) 542 Ok(p)
513 })?, 543 })?,
514 path: path.to_owned(), 544 path: path.to_owned(),
515 root: new_root.to_owned(), 545 root: new_root.to_owned(),
546 included_patterns: Vec::new(),
516 }) 547 })
517 } 548 }
518 } 549 }
519 550
520 /// Separate and pre-process subincludes from other patterns for the "ignore" 551 /// Separate and pre-process subincludes from other patterns for the "ignore"
521 /// phase. 552 /// phase.
522 pub fn filter_subincludes<'a>( 553 pub fn filter_subincludes(
523 ignore_patterns: &'a [IgnorePattern], 554 ignore_patterns: Vec<IgnorePattern>,
524 root_dir: &Path, 555 ) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
525 ) -> Result<(Vec<SubInclude>, Vec<&'a IgnorePattern>), HgPathError> {
526 let mut subincludes = vec![]; 556 let mut subincludes = vec![];
527 let mut others = vec![]; 557 let mut others = vec![];
528 558
529 for ignore_pattern in ignore_patterns.iter() { 559 for pattern in ignore_patterns {
530 let IgnorePattern { 560 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
531 syntax, 561 {
532 pattern, 562 subincludes.push(sub_include);
533 source,
534 } = ignore_pattern;
535 if *syntax == PatternSyntax::SubInclude {
536 subincludes.push(SubInclude::new(root_dir, pattern, &source)?);
537 } else { 563 } else {
538 others.push(ignore_pattern) 564 others.push(pattern)
539 } 565 }
540 } 566 }
541 Ok((subincludes, others)) 567 Ok((subincludes, others))
542 } 568 }
543 569