changeset 52353:e2e49069eeb6

rust-ignore: make `debugignorerhg` command show a full regex, with exact files
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Tue, 03 Dec 2024 13:51:51 +0000
parents 2ff004fb491c
children ff19ddb256b3
files rust/hg-core/src/filepatterns.rs rust/hg-core/src/matchers.rs rust/rhg/src/commands/debugignorerhg.rs
diffstat 3 files changed, 113 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs	Mon Dec 02 11:25:26 2024 +0000
+++ b/rust/hg-core/src/filepatterns.rs	Tue Dec 03 13:51:51 2024 +0000
@@ -368,11 +368,35 @@
     }
 }
 
+/// Controls whether we want the emitted regex to cover all cases
+/// or just the cases that are not covered by optimized code paths.
+#[derive(Debug, Clone, Copy)]
+pub enum RegexCompleteness {
+    /// `Complete` emits a regex that handles all files, including the ones
+    /// that are typically handled by a different code path.
+    /// This is used in `hg debugignorerhg -a` to avoid missing some rules.
+    Complete,
+    /// `ExcludeExactFiles` excludes the patterns that correspond to exact
+    /// file matches. This is the normal behavior, and gives a potentially
+    /// much smaller regex.
+    ExcludeExactFiles,
+}
+
+impl RegexCompleteness {
+    fn may_exclude_exact_files(self) -> bool {
+        match self {
+            Self::Complete => false,
+            Self::ExcludeExactFiles => true,
+        }
+    }
+}
+
 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
 /// that don't need to be transformed into a regex.
 pub fn build_single_regex(
     entry: &IgnorePattern,
     glob_suffix: GlobSuffix,
+    regex_config: RegexCompleteness,
 ) -> Result<Option<Vec<u8>>, PatternError> {
     let IgnorePattern {
         pattern, syntax, ..
@@ -390,7 +414,9 @@
     };
     let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
         && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
-    if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
+    if regex_config.may_exclude_exact_files()
+        && (is_simple_rootglob || syntax == &PatternSyntax::FilePath)
+    {
         Ok(None)
     } else {
         let mut entry = entry.clone();
@@ -818,6 +844,17 @@
         );
     }
 
+    pub fn build_single_regex(
+        entry: &IgnorePattern,
+        glob_suffix: GlobSuffix,
+    ) -> Result<Option<Vec<u8>>, PatternError> {
+        super::build_single_regex(
+            entry,
+            glob_suffix,
+            RegexCompleteness::ExcludeExactFiles,
+        )
+    }
+
     #[test]
     fn test_build_single_regex() {
         assert_eq!(
--- a/rust/hg-core/src/matchers.rs	Mon Dec 02 11:25:26 2024 +0000
+++ b/rust/hg-core/src/matchers.rs	Tue Dec 03 13:51:51 2024 +0000
@@ -15,7 +15,7 @@
     filepatterns::{
         build_single_regex, filter_subincludes, get_patterns_from_file,
         GlobSuffix, IgnorePattern, PatternError, PatternFileWarning,
-        PatternResult, PatternSyntax,
+        PatternResult, PatternSyntax, RegexCompleteness,
     },
     utils::{
         files::{dir_ancestors, find_dirs},
@@ -328,8 +328,11 @@
         let prefix = ignore_patterns.iter().all(|k| {
             matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
         });
-        let (patterns, match_fn) =
-            build_match(ignore_patterns, GlobSuffix::Empty)?;
+        let (patterns, match_fn) = build_match(
+            ignore_patterns,
+            GlobSuffix::Empty,
+            RegexCompleteness::ExcludeExactFiles,
+        )?;
 
         Ok(Self {
             patterns,
@@ -384,6 +387,29 @@
     }
 }
 
+/// A collection of patterns sufficient to construct an `IncludeMatcher`.
+pub struct IncludeMatcherPre {
+    patterns: Vec<IgnorePattern>,
+}
+
+impl IncludeMatcherPre {
+    pub fn build_matcher(self) -> PatternResult<IncludeMatcher<'static>> {
+        IncludeMatcher::new(self.patterns)
+    }
+
+    /// Used to print the full hgignore regex in `hg debugignorerhg`.
+    pub fn build_debug_matcher(
+        self,
+        regex_config: RegexCompleteness,
+    ) -> PatternResult<IncludeMatcher<'static>> {
+        IncludeMatcher::new_gen(self.patterns, regex_config)
+    }
+
+    fn new(patterns: Vec<IgnorePattern>) -> Self {
+        Self { patterns }
+    }
+}
+
 /// Matches files that are included in the ignore rules.
 /// ```
 /// use hg::{
@@ -809,12 +835,15 @@
 fn build_regex_match<'a>(
     ignore_patterns: &[IgnorePattern],
     glob_suffix: GlobSuffix,
+    regex_config: RegexCompleteness,
 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
     let mut regexps = vec![];
     let mut exact_set = HashSet::new();
 
     for pattern in ignore_patterns {
-        if let Some(re) = build_single_regex(pattern, glob_suffix)? {
+        if let Some(re) =
+            build_single_regex(pattern, glob_suffix, regex_config)?
+        {
             regexps.push(re);
         } else {
             let exact = normalize_path_bytes(&pattern.pattern);
@@ -929,6 +958,7 @@
 fn build_match<'a>(
     ignore_patterns: Vec<IgnorePattern>,
     glob_suffix: GlobSuffix,
+    regex_config: RegexCompleteness,
 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
     let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
     // For debugging and printing
@@ -988,8 +1018,11 @@
             dirs_vec.sort();
             patterns.extend(dirs_vec.escaped_bytes());
         } else {
-            let (new_re, match_func) =
-                build_regex_match(&ignore_patterns, glob_suffix)?;
+            let (new_re, match_func) = build_regex_match(
+                &ignore_patterns,
+                glob_suffix,
+                regex_config,
+            )?;
             patterns = new_re;
             match_funcs.push(match_func)
         }
@@ -1010,11 +1043,11 @@
 /// Parses all "ignore" files with their recursive includes and returns a
 /// function that checks whether a given file (in the general sense) should be
 /// ignored.
-pub fn get_ignore_matcher<'a>(
+pub fn get_ignore_matcher_pre(
     mut all_pattern_files: Vec<PathBuf>,
     root_dir: &Path,
     inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
-) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
+) -> PatternResult<(IncludeMatcherPre, Vec<PatternFileWarning>)> {
     let mut all_patterns = vec![];
     let mut all_warnings = vec![];
 
@@ -1036,10 +1069,23 @@
         all_patterns.extend(patterns.to_owned());
         all_warnings.extend(warnings);
     }
-    let matcher = IncludeMatcher::new(all_patterns)?;
+    let matcher = IncludeMatcherPre::new(all_patterns);
     Ok((matcher, all_warnings))
 }
 
+pub fn get_ignore_matcher<'a>(
+    all_pattern_files: Vec<PathBuf>,
+    root_dir: &Path,
+    inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
+) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
+    let (pre_matcher, warnings) = get_ignore_matcher_pre(
+        all_pattern_files,
+        root_dir,
+        inspect_pattern_bytes,
+    )?;
+    Ok((pre_matcher.build_matcher()?, warnings))
+}
+
 /// Parses all "ignore" files with their recursive includes and returns a
 /// function that checks whether a given file (in the general sense) should be
 /// ignored.
@@ -1059,7 +1105,10 @@
 }
 
 impl<'a> IncludeMatcher<'a> {
-    pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
+    fn new_gen(
+        ignore_patterns: Vec<IgnorePattern>,
+        regex_config: RegexCompleteness,
+    ) -> PatternResult<Self> {
         let RootsDirsAndParents {
             roots,
             dirs,
@@ -1068,8 +1117,11 @@
         let prefix = ignore_patterns.iter().all(|k| {
             matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
         });
-        let (patterns, match_fn) =
-            build_match(ignore_patterns, GlobSuffix::MoreComponents)?;
+        let (patterns, match_fn) = build_match(
+            ignore_patterns,
+            GlobSuffix::MoreComponents,
+            regex_config,
+        )?;
 
         Ok(Self {
             patterns,
@@ -1081,6 +1133,10 @@
         })
     }
 
+    pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
+        Self::new_gen(ignore_patterns, RegexCompleteness::ExcludeExactFiles)
+    }
+
     fn get_all_parents_children(&self) -> DirsChildrenMultiset {
         // TODO cache
         let thing = self
--- a/rust/rhg/src/commands/debugignorerhg.rs	Mon Dec 02 11:25:26 2024 +0000
+++ b/rust/rhg/src/commands/debugignorerhg.rs	Tue Dec 03 13:51:51 2024 +0000
@@ -1,6 +1,7 @@
 use crate::error::CommandError;
 use hg::dirstate::status::StatusError;
-use hg::matchers::get_ignore_matcher;
+use hg::filepatterns::RegexCompleteness;
+use hg::matchers::get_ignore_matcher_pre;
 use log::warn;
 
 pub const HELP_TEXT: &str = "
@@ -20,13 +21,17 @@
 
     let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded
 
-    let (ignore_matcher, warnings) = get_ignore_matcher(
+    let (ignore_matcher, warnings) = get_ignore_matcher_pre(
         vec![ignore_file],
         repo.working_directory_path(),
         &mut |_source, _pattern_bytes| (),
     )
     .map_err(StatusError::from)?;
 
+    let ignore_matcher = ignore_matcher
+        .build_debug_matcher(RegexComprehensiveness::Comprehensive)
+        .map_err(StatusError::from)?;
+
     if !warnings.is_empty() {
         warn!("Pattern warnings: {:?}", &warnings);
     }