Mercurial > public > mercurial-scm > hg
diff rust/hg-core/src/matchers.rs @ 52557:b89c934e6269
rust-hgignore: add a scripting command to print the hgignore regexp
Add a command `script::hgignore --print-re` to print the
hgignore regexp.
One complication is that the `rootfilesin`-only matcher doesn't use a
regular expression, and the existing converts it to something that's
not a regular expression.
We add code to handle that case.
Since this command is now sufficient to generate a tidy-looking
regexp for scripting, this frees up the "debug" command to report
the internal regexp used by the regex engine, so we make that
change too.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Fri, 13 Dec 2024 15:05:37 +0000 |
parents | 1866119cbad7 |
children | 94e2547e6f3d |
line wrap: on
line diff
--- a/rust/hg-core/src/matchers.rs Fri Dec 06 20:27:59 2024 +0000 +++ b/rust/hg-core/src/matchers.rs Fri Dec 13 15:05:37 2024 +0000 @@ -8,6 +8,7 @@ //! Structs and types for matching files and directories. use format_bytes::format_bytes; +use itertools::Itertools; use once_cell::sync::OnceCell; use regex_automata::meta::Regex; use regex_syntax::hir::Hir; @@ -30,10 +31,10 @@ use crate::dirstate::status::IgnoreFnType; use crate::filepatterns::normalize_path_bytes; -use std::collections::HashSet; use std::fmt::{Display, Error, Formatter}; use std::path::{Path, PathBuf}; use std::{borrow::ToOwned, collections::BTreeSet}; +use std::{collections::HashSet, str::FromStr}; #[derive(Debug, PartialEq)] pub enum VisitChildrenSet { @@ -297,7 +298,7 @@ /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds /// ``` pub struct PatternMatcher<'a> { - patterns: Vec<u8>, + patterns: PatternsDesc, match_fn: IgnoreFnType<'a>, /// Whether all the patterns match a prefix (i.e. recursively) prefix: bool, @@ -306,10 +307,63 @@ dirs: DirsMultiset, } +enum PatternsDesc { + Re(PreRegex), + RootFilesIn(Vec<Vec<u8>>, GlobSuffix), +} + +pub enum ReSyntax { + Tidy, + Internal, +} + +impl PatternsDesc { + fn to_re(&self) -> PreRegex { + match self { + Self::Re(re) => re.clone(), + Self::RootFilesIn(patterns, glob_suffix) => { + let patterns = patterns + .clone() + .into_iter() + .map(|pattern: Vec<u8>| IgnorePattern { + syntax: PatternSyntax::RootFilesIn, + source: PathBuf::from_str("<rootfilesin-matcher>") + .unwrap(), + pattern, + }) + .collect_vec(); + build_regex_match_for_debug(&patterns, *glob_suffix).unwrap() + } + } + } + + fn to_pattern_bytes(&self, syntax: ReSyntax) -> Vec<u8> { + match syntax { + ReSyntax::Tidy => self.to_re().to_bytes(), + ReSyntax::Internal => match self { + PatternsDesc::Re(re) => re.to_hir().to_string().into_bytes(), + PatternsDesc::RootFilesIn(dirs, _) => { + let mut patterns = vec![]; + patterns.extend(b"rootfilesin: "); + let mut dirs_vec = dirs.clone(); + dirs_vec.sort(); + patterns.extend(dirs_vec.escaped_bytes()); + patterns + } + }, + } + } +} + impl core::fmt::Debug for PatternMatcher<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("PatternMatcher") - .field("patterns", &String::from_utf8_lossy(&self.patterns)) + .field( + "patterns", + &String::from_utf8_lossy( + &self.patterns.to_pattern_bytes(ReSyntax::Internal), + ), + ) .field("prefix", &self.prefix) .field("files", &self.files) .field("dirs", &self.dirs) @@ -441,7 +495,7 @@ /// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file"))); /// ``` pub struct IncludeMatcher<'a> { - patterns: Vec<u8>, + patterns: PatternsDesc, match_fn: IgnoreFnType<'a>, /// Whether all the patterns match a prefix (i.e. recursively) prefix: bool, @@ -453,7 +507,10 @@ impl core::fmt::Debug for IncludeMatcher<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("IncludeMatcher") - .field("patterns", &String::from_utf8_lossy(&self.patterns)) + .field( + "patterns", + &String::from_utf8_lossy(&self.patterns.to_re().to_bytes()), + ) .field("prefix", &self.prefix) .field("roots", &self.roots) .field("dirs", &self.dirs) @@ -815,7 +872,7 @@ ignore_patterns: &[IgnorePattern], glob_suffix: GlobSuffix, regex_config: RegexCompleteness, -) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { +) -> PatternResult<(PreRegex, IgnoreFnType<'a>)> { let mut regexps = vec![]; let mut exact_set = HashSet::new(); @@ -850,7 +907,32 @@ Box::new(func) as IgnoreFnType }; - Ok((full_regex.to_bytes(), func)) + Ok((full_regex, func)) +} + +#[logging_timer::time("trace")] +fn build_regex_match_for_debug<'a>( + ignore_patterns: &[IgnorePattern], + glob_suffix: GlobSuffix, +) -> PatternResult<PreRegex> { + let mut regexps = vec![]; + + for pattern in ignore_patterns { + if let Some(re) = build_single_regex( + pattern, + glob_suffix, + RegexCompleteness::Complete, + )? { + regexps.push(re); + } else { + panic!("RegexCompleteness::Complete should prevent this branch"); + } + } + + Ok(PreRegex::Sequence(vec![ + PreRegex::parse(&b"^"[..])?, + PreRegex::Alternation(regexps), + ])) } /// Returns roots and directories corresponding to each pattern. @@ -943,10 +1025,10 @@ ignore_patterns: Vec<IgnorePattern>, glob_suffix: GlobSuffix, regex_config: RegexCompleteness, -) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { +) -> PatternResult<(PatternsDesc, IgnoreFnType<'a>)> { let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![]; // For debugging and printing - let mut patterns = vec![]; + let patterns; let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?; @@ -998,18 +1080,19 @@ }; match_funcs.push(Box::new(match_func)); - patterns.extend(b"rootfilesin: "); dirs_vec.sort(); - patterns.extend(dirs_vec.escaped_bytes()); + patterns = PatternsDesc::RootFilesIn(dirs_vec, glob_suffix); } else { let (new_re, match_func) = build_regex_match( &ignore_patterns, glob_suffix, regex_config, )?; - patterns = new_re; + patterns = PatternsDesc::Re(new_re); match_funcs.push(match_func) } + } else { + patterns = PatternsDesc::Re(PreRegex::Empty) } Ok(if match_funcs.len() == 1 { @@ -1131,8 +1214,8 @@ DirsChildrenMultiset::new(thing, Some(self.parents.iter())) } - pub fn debug_get_patterns(&self) -> &[u8] { - self.patterns.as_ref() + pub fn debug_get_patterns(&self, syntax: ReSyntax) -> Vec<u8> { + self.patterns.to_pattern_bytes(syntax) } } @@ -1147,7 +1230,9 @@ write!( f, "IncludeMatcher(includes='{}')", - String::from_utf8_lossy(&self.patterns.escaped_bytes()) + &String::from_utf8_lossy( + &self.patterns.to_pattern_bytes(ReSyntax::Internal) + ) ) } }