changeset 52352:2ff004fb491c

hgignore: add a GlobSuffix type, instead of passing byte arrays I think this makes it easier to understand the purpose of this extra argument.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 02 Dec 2024 11:25:26 +0000
parents 9042ffea4edd
children e2e49069eeb6
files rust/hg-core/src/filepatterns.rs rust/hg-core/src/matchers.rs
diffstat 2 files changed, 50 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs	Fri Nov 29 19:43:39 2024 -0500
+++ b/rust/hg-core/src/filepatterns.rs	Mon Dec 02 11:25:26 2024 +0000
@@ -214,10 +214,33 @@
     static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
 }
 
+/// Extra path components to match at the end of the pattern
+#[derive(Clone, Copy)]
+pub enum GlobSuffix {
+    /// `Empty` means the pattern only matches files, not directories,
+    /// so the path needs to match exactly.
+    Empty,
+    /// `MoreComponents` means the pattern matches directories as well,
+    /// so any path that has the pattern as a prefix, should match.
+    MoreComponents,
+}
+
+impl GlobSuffix {
+    fn to_re(self) -> &'static [u8] {
+        match self {
+            Self::Empty => b"$",
+            Self::MoreComponents => b"(?:/|$)",
+        }
+    }
+}
+
 /// Builds the regex that corresponds to the given pattern.
 /// If within a `syntax: regexp` context, returns the pattern,
 /// otherwise, returns the corresponding regex.
-fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
+fn _build_single_regex(
+    entry: &IgnorePattern,
+    glob_suffix: GlobSuffix,
+) -> Vec<u8> {
     let IgnorePattern {
         syntax, pattern, ..
     } = entry;
@@ -264,7 +287,11 @@
             if pattern == b"." {
                 return vec![];
             }
-            [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
+            [
+                escape_pattern(pattern).as_slice(),
+                GlobSuffix::MoreComponents.to_re(),
+            ]
+            .concat()
         }
         PatternSyntax::RootFilesIn => {
             let mut res = if pattern == b"." {
@@ -281,13 +308,13 @@
         PatternSyntax::RelGlob => {
             let glob_re = glob_to_re(pattern);
             if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
-                [b".*", rest, glob_suffix].concat()
+                [b".*", rest, glob_suffix.to_re()].concat()
             } else {
-                [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
+                [b"(?:.*/)?", glob_re.as_slice(), glob_suffix.to_re()].concat()
             }
         }
         PatternSyntax::Glob | PatternSyntax::RootGlob => {
-            [glob_to_re(pattern).as_slice(), glob_suffix].concat()
+            [glob_to_re(pattern).as_slice(), glob_suffix.to_re()].concat()
         }
         PatternSyntax::Include
         | PatternSyntax::SubInclude
@@ -345,7 +372,7 @@
 /// that don't need to be transformed into a regex.
 pub fn build_single_regex(
     entry: &IgnorePattern,
-    glob_suffix: &[u8],
+    glob_suffix: GlobSuffix,
 ) -> Result<Option<Vec<u8>>, PatternError> {
     let IgnorePattern {
         pattern, syntax, ..
@@ -800,7 +827,7 @@
                     b"rust/target/",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
@@ -812,7 +839,7 @@
                     br"rust/target/\d+",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(br"rust/target/\d+".to_vec()),
@@ -828,7 +855,7 @@
                     b"",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             None,
@@ -840,7 +867,7 @@
                     b"whatever",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             None,
@@ -852,7 +879,7 @@
                     b"*.o",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(br"[^/]*\.o(?:/|$)".to_vec()),
@@ -868,7 +895,7 @@
                     b"^ba{2}r",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(b"^ba{2}r".to_vec()),
@@ -880,7 +907,7 @@
                     b"ba{2}r",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(b".*ba{2}r".to_vec()),
@@ -892,7 +919,7 @@
                     b"(?ia)ba{2}r",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(b"(?ia:.*ba{2}r)".to_vec()),
@@ -904,7 +931,7 @@
                     b"(?ia)^ba{2}r",
                     Path::new("")
                 ),
-                b"(?:/|$)"
+                GlobSuffix::MoreComponents
             )
             .unwrap(),
             Some(b"(?ia:^ba{2}r)".to_vec()),
--- a/rust/hg-core/src/matchers.rs	Fri Nov 29 19:43:39 2024 -0500
+++ b/rust/hg-core/src/matchers.rs	Mon Dec 02 11:25:26 2024 +0000
@@ -14,8 +14,8 @@
     dirstate::dirs_multiset::{DirsChildrenMultiset, DirsMultiset},
     filepatterns::{
         build_single_regex, filter_subincludes, get_patterns_from_file,
-        IgnorePattern, PatternError, PatternFileWarning, PatternResult,
-        PatternSyntax,
+        GlobSuffix, IgnorePattern, PatternError, PatternFileWarning,
+        PatternResult, PatternSyntax,
     },
     utils::{
         files::{dir_ancestors, find_dirs},
@@ -328,7 +328,8 @@
         let prefix = ignore_patterns.iter().all(|k| {
             matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
         });
-        let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
+        let (patterns, match_fn) =
+            build_match(ignore_patterns, GlobSuffix::Empty)?;
 
         Ok(Self {
             patterns,
@@ -807,7 +808,7 @@
 /// said regex formed by the given ignore patterns.
 fn build_regex_match<'a>(
     ignore_patterns: &[IgnorePattern],
-    glob_suffix: &[u8],
+    glob_suffix: GlobSuffix,
 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
     let mut regexps = vec![];
     let mut exact_set = HashSet::new();
@@ -927,7 +928,7 @@
 /// should be matched.
 fn build_match<'a>(
     ignore_patterns: Vec<IgnorePattern>,
-    glob_suffix: &[u8],
+    glob_suffix: GlobSuffix,
 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
     let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
     // For debugging and printing
@@ -1067,7 +1068,8 @@
         let prefix = ignore_patterns.iter().all(|k| {
             matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
         });
-        let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
+        let (patterns, match_fn) =
+            build_match(ignore_patterns, GlobSuffix::MoreComponents)?;
 
         Ok(Self {
             patterns,