Mercurial > public > mercurial-scm > hg
changeset 52353:e2e49069eeb6
rust-ignore: make `debugignorerhg` command show a full regex, with exact files
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Tue, 03 Dec 2024 13:51:51 +0000 |
parents | 2ff004fb491c |
children | ff19ddb256b3 |
files | rust/hg-core/src/filepatterns.rs rust/hg-core/src/matchers.rs rust/rhg/src/commands/debugignorerhg.rs |
diffstat | 3 files changed, 113 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs Mon Dec 02 11:25:26 2024 +0000 +++ b/rust/hg-core/src/filepatterns.rs Tue Dec 03 13:51:51 2024 +0000 @@ -368,11 +368,35 @@ } } +/// Controls whether we want the emitted regex to cover all cases +/// or just the cases that are not covered by optimized code paths. +#[derive(Debug, Clone, Copy)] +pub enum RegexCompleteness { + /// `Complete` emits a regex that handles all files, including the ones + /// that are typically handled by a different code path. + /// This is used in `hg debugignorerhg -a` to avoid missing some rules. + Complete, + /// `ExcludeExactFiles` excludes the patterns that correspond to exact + /// file matches. This is the normal behavior, and gives a potentially + /// much smaller regex. + ExcludeExactFiles, +} + +impl RegexCompleteness { + fn may_exclude_exact_files(self) -> bool { + match self { + Self::Complete => false, + Self::ExcludeExactFiles => true, + } + } +} + /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs /// that don't need to be transformed into a regex. pub fn build_single_regex( entry: &IgnorePattern, glob_suffix: GlobSuffix, + regex_config: RegexCompleteness, ) -> Result<Option<Vec<u8>>, PatternError> { let IgnorePattern { pattern, syntax, .. @@ -390,7 +414,9 @@ }; let is_simple_rootglob = *syntax == PatternSyntax::RootGlob && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)); - if is_simple_rootglob || syntax == &PatternSyntax::FilePath { + if regex_config.may_exclude_exact_files() + && (is_simple_rootglob || syntax == &PatternSyntax::FilePath) + { Ok(None) } else { let mut entry = entry.clone(); @@ -818,6 +844,17 @@ ); } + pub fn build_single_regex( + entry: &IgnorePattern, + glob_suffix: GlobSuffix, + ) -> Result<Option<Vec<u8>>, PatternError> { + super::build_single_regex( + entry, + glob_suffix, + RegexCompleteness::ExcludeExactFiles, + ) + } + #[test] fn test_build_single_regex() { assert_eq!(
--- a/rust/hg-core/src/matchers.rs Mon Dec 02 11:25:26 2024 +0000 +++ b/rust/hg-core/src/matchers.rs Tue Dec 03 13:51:51 2024 +0000 @@ -15,7 +15,7 @@ filepatterns::{ build_single_regex, filter_subincludes, get_patterns_from_file, GlobSuffix, IgnorePattern, PatternError, PatternFileWarning, - PatternResult, PatternSyntax, + PatternResult, PatternSyntax, RegexCompleteness, }, utils::{ files::{dir_ancestors, find_dirs}, @@ -328,8 +328,11 @@ let prefix = ignore_patterns.iter().all(|k| { matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath) }); - let (patterns, match_fn) = - build_match(ignore_patterns, GlobSuffix::Empty)?; + let (patterns, match_fn) = build_match( + ignore_patterns, + GlobSuffix::Empty, + RegexCompleteness::ExcludeExactFiles, + )?; Ok(Self { patterns, @@ -384,6 +387,29 @@ } } +/// A collection of patterns sufficient to construct an `IncludeMatcher`. +pub struct IncludeMatcherPre { + patterns: Vec<IgnorePattern>, +} + +impl IncludeMatcherPre { + pub fn build_matcher(self) -> PatternResult<IncludeMatcher<'static>> { + IncludeMatcher::new(self.patterns) + } + + /// Used to print the full hgignore regex in `hg debugignorerhg`. + pub fn build_debug_matcher( + self, + regex_config: RegexCompleteness, + ) -> PatternResult<IncludeMatcher<'static>> { + IncludeMatcher::new_gen(self.patterns, regex_config) + } + + fn new(patterns: Vec<IgnorePattern>) -> Self { + Self { patterns } + } +} + /// Matches files that are included in the ignore rules. /// ``` /// use hg::{ @@ -809,12 +835,15 @@ fn build_regex_match<'a>( ignore_patterns: &[IgnorePattern], glob_suffix: GlobSuffix, + regex_config: RegexCompleteness, ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { let mut regexps = vec![]; let mut exact_set = HashSet::new(); for pattern in ignore_patterns { - if let Some(re) = build_single_regex(pattern, glob_suffix)? { + if let Some(re) = + build_single_regex(pattern, glob_suffix, regex_config)? + { regexps.push(re); } else { let exact = normalize_path_bytes(&pattern.pattern); @@ -929,6 +958,7 @@ fn build_match<'a>( ignore_patterns: Vec<IgnorePattern>, glob_suffix: GlobSuffix, + regex_config: RegexCompleteness, ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![]; // For debugging and printing @@ -988,8 +1018,11 @@ dirs_vec.sort(); patterns.extend(dirs_vec.escaped_bytes()); } else { - let (new_re, match_func) = - build_regex_match(&ignore_patterns, glob_suffix)?; + let (new_re, match_func) = build_regex_match( + &ignore_patterns, + glob_suffix, + regex_config, + )?; patterns = new_re; match_funcs.push(match_func) } @@ -1010,11 +1043,11 @@ /// Parses all "ignore" files with their recursive includes and returns a /// function that checks whether a given file (in the general sense) should be /// ignored. -pub fn get_ignore_matcher<'a>( +pub fn get_ignore_matcher_pre( mut all_pattern_files: Vec<PathBuf>, root_dir: &Path, inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), -) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> { +) -> PatternResult<(IncludeMatcherPre, Vec<PatternFileWarning>)> { let mut all_patterns = vec![]; let mut all_warnings = vec![]; @@ -1036,10 +1069,23 @@ all_patterns.extend(patterns.to_owned()); all_warnings.extend(warnings); } - let matcher = IncludeMatcher::new(all_patterns)?; + let matcher = IncludeMatcherPre::new(all_patterns); Ok((matcher, all_warnings)) } +pub fn get_ignore_matcher<'a>( + all_pattern_files: Vec<PathBuf>, + root_dir: &Path, + inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), +) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> { + let (pre_matcher, warnings) = get_ignore_matcher_pre( + all_pattern_files, + root_dir, + inspect_pattern_bytes, + )?; + Ok((pre_matcher.build_matcher()?, warnings)) +} + /// Parses all "ignore" files with their recursive includes and returns a /// function that checks whether a given file (in the general sense) should be /// ignored. @@ -1059,7 +1105,10 @@ } impl<'a> IncludeMatcher<'a> { - pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> { + fn new_gen( + ignore_patterns: Vec<IgnorePattern>, + regex_config: RegexCompleteness, + ) -> PatternResult<Self> { let RootsDirsAndParents { roots, dirs, @@ -1068,8 +1117,11 @@ let prefix = ignore_patterns.iter().all(|k| { matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath) }); - let (patterns, match_fn) = - build_match(ignore_patterns, GlobSuffix::MoreComponents)?; + let (patterns, match_fn) = build_match( + ignore_patterns, + GlobSuffix::MoreComponents, + regex_config, + )?; Ok(Self { patterns, @@ -1081,6 +1133,10 @@ }) } + pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> { + Self::new_gen(ignore_patterns, RegexCompleteness::ExcludeExactFiles) + } + fn get_all_parents_children(&self) -> DirsChildrenMultiset { // TODO cache let thing = self
--- a/rust/rhg/src/commands/debugignorerhg.rs Mon Dec 02 11:25:26 2024 +0000 +++ b/rust/rhg/src/commands/debugignorerhg.rs Tue Dec 03 13:51:51 2024 +0000 @@ -1,6 +1,7 @@ use crate::error::CommandError; use hg::dirstate::status::StatusError; -use hg::matchers::get_ignore_matcher; +use hg::filepatterns::RegexCompleteness; +use hg::matchers::get_ignore_matcher_pre; use log::warn; pub const HELP_TEXT: &str = " @@ -20,13 +21,17 @@ let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded - let (ignore_matcher, warnings) = get_ignore_matcher( + let (ignore_matcher, warnings) = get_ignore_matcher_pre( vec![ignore_file], repo.working_directory_path(), &mut |_source, _pattern_bytes| (), ) .map_err(StatusError::from)?; + let ignore_matcher = ignore_matcher + .build_debug_matcher(RegexComprehensiveness::Comprehensive) + .map_err(StatusError::from)?; + if !warnings.is_empty() { warn!("Pattern warnings: {:?}", &warnings); }