rust-hgignore: add a scripting command to print the hgignore regexp
Add a command `script::hgignore --print-re` to print the
hgignore regexp.
One complication is that the `rootfilesin`-only matcher doesn't use a
regular expression, and the existing converts it to something that's
not a regular expression.
We add code to handle that case.
Since this command is now sufficient to generate a tidy-looking
regexp for scripting, this frees up the "debug" command to report
the internal regexp used by the regex engine, so we make that
change too.
--- a/rust/hg-core/src/matchers.rs Fri Dec 06 20:27:59 2024 +0000
+++ b/rust/hg-core/src/matchers.rs Fri Dec 13 15:05:37 2024 +0000
@@ -8,6 +8,7 @@
//! Structs and types for matching files and directories.
use format_bytes::format_bytes;
+use itertools::Itertools;
use once_cell::sync::OnceCell;
use regex_automata::meta::Regex;
use regex_syntax::hir::Hir;
@@ -30,10 +31,10 @@
use crate::dirstate::status::IgnoreFnType;
use crate::filepatterns::normalize_path_bytes;
-use std::collections::HashSet;
use std::fmt::{Display, Error, Formatter};
use std::path::{Path, PathBuf};
use std::{borrow::ToOwned, collections::BTreeSet};
+use std::{collections::HashSet, str::FromStr};
#[derive(Debug, PartialEq)]
pub enum VisitChildrenSet {
@@ -297,7 +298,7 @@
/// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
/// ```
pub struct PatternMatcher<'a> {
- patterns: Vec<u8>,
+ patterns: PatternsDesc,
match_fn: IgnoreFnType<'a>,
/// Whether all the patterns match a prefix (i.e. recursively)
prefix: bool,
@@ -306,10 +307,63 @@
dirs: DirsMultiset,
}
+enum PatternsDesc {
+ Re(PreRegex),
+ RootFilesIn(Vec<Vec<u8>>, GlobSuffix),
+}
+
+pub enum ReSyntax {
+ Tidy,
+ Internal,
+}
+
+impl PatternsDesc {
+ fn to_re(&self) -> PreRegex {
+ match self {
+ Self::Re(re) => re.clone(),
+ Self::RootFilesIn(patterns, glob_suffix) => {
+ let patterns = patterns
+ .clone()
+ .into_iter()
+ .map(|pattern: Vec<u8>| IgnorePattern {
+ syntax: PatternSyntax::RootFilesIn,
+ source: PathBuf::from_str("<rootfilesin-matcher>")
+ .unwrap(),
+ pattern,
+ })
+ .collect_vec();
+ build_regex_match_for_debug(&patterns, *glob_suffix).unwrap()
+ }
+ }
+ }
+
+ fn to_pattern_bytes(&self, syntax: ReSyntax) -> Vec<u8> {
+ match syntax {
+ ReSyntax::Tidy => self.to_re().to_bytes(),
+ ReSyntax::Internal => match self {
+ PatternsDesc::Re(re) => re.to_hir().to_string().into_bytes(),
+ PatternsDesc::RootFilesIn(dirs, _) => {
+ let mut patterns = vec![];
+ patterns.extend(b"rootfilesin: ");
+ let mut dirs_vec = dirs.clone();
+ dirs_vec.sort();
+ patterns.extend(dirs_vec.escaped_bytes());
+ patterns
+ }
+ },
+ }
+ }
+}
+
impl core::fmt::Debug for PatternMatcher<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PatternMatcher")
- .field("patterns", &String::from_utf8_lossy(&self.patterns))
+ .field(
+ "patterns",
+ &String::from_utf8_lossy(
+ &self.patterns.to_pattern_bytes(ReSyntax::Internal),
+ ),
+ )
.field("prefix", &self.prefix)
.field("files", &self.files)
.field("dirs", &self.dirs)
@@ -441,7 +495,7 @@
/// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file")));
/// ```
pub struct IncludeMatcher<'a> {
- patterns: Vec<u8>,
+ patterns: PatternsDesc,
match_fn: IgnoreFnType<'a>,
/// Whether all the patterns match a prefix (i.e. recursively)
prefix: bool,
@@ -453,7 +507,10 @@
impl core::fmt::Debug for IncludeMatcher<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("IncludeMatcher")
- .field("patterns", &String::from_utf8_lossy(&self.patterns))
+ .field(
+ "patterns",
+ &String::from_utf8_lossy(&self.patterns.to_re().to_bytes()),
+ )
.field("prefix", &self.prefix)
.field("roots", &self.roots)
.field("dirs", &self.dirs)
@@ -815,7 +872,7 @@
ignore_patterns: &[IgnorePattern],
glob_suffix: GlobSuffix,
regex_config: RegexCompleteness,
-) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
+) -> PatternResult<(PreRegex, IgnoreFnType<'a>)> {
let mut regexps = vec![];
let mut exact_set = HashSet::new();
@@ -850,7 +907,32 @@
Box::new(func) as IgnoreFnType
};
- Ok((full_regex.to_bytes(), func))
+ Ok((full_regex, func))
+}
+
+#[logging_timer::time("trace")]
+fn build_regex_match_for_debug<'a>(
+ ignore_patterns: &[IgnorePattern],
+ glob_suffix: GlobSuffix,
+) -> PatternResult<PreRegex> {
+ let mut regexps = vec![];
+
+ for pattern in ignore_patterns {
+ if let Some(re) = build_single_regex(
+ pattern,
+ glob_suffix,
+ RegexCompleteness::Complete,
+ )? {
+ regexps.push(re);
+ } else {
+ panic!("RegexCompleteness::Complete should prevent this branch");
+ }
+ }
+
+ Ok(PreRegex::Sequence(vec![
+ PreRegex::parse(&b"^"[..])?,
+ PreRegex::Alternation(regexps),
+ ]))
}
/// Returns roots and directories corresponding to each pattern.
@@ -943,10 +1025,10 @@
ignore_patterns: Vec<IgnorePattern>,
glob_suffix: GlobSuffix,
regex_config: RegexCompleteness,
-) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
+) -> PatternResult<(PatternsDesc, IgnoreFnType<'a>)> {
let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
// For debugging and printing
- let mut patterns = vec![];
+ let patterns;
let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
@@ -998,18 +1080,19 @@
};
match_funcs.push(Box::new(match_func));
- patterns.extend(b"rootfilesin: ");
dirs_vec.sort();
- patterns.extend(dirs_vec.escaped_bytes());
+ patterns = PatternsDesc::RootFilesIn(dirs_vec, glob_suffix);
} else {
let (new_re, match_func) = build_regex_match(
&ignore_patterns,
glob_suffix,
regex_config,
)?;
- patterns = new_re;
+ patterns = PatternsDesc::Re(new_re);
match_funcs.push(match_func)
}
+ } else {
+ patterns = PatternsDesc::Re(PreRegex::Empty)
}
Ok(if match_funcs.len() == 1 {
@@ -1131,8 +1214,8 @@
DirsChildrenMultiset::new(thing, Some(self.parents.iter()))
}
- pub fn debug_get_patterns(&self) -> &[u8] {
- self.patterns.as_ref()
+ pub fn debug_get_patterns(&self, syntax: ReSyntax) -> Vec<u8> {
+ self.patterns.to_pattern_bytes(syntax)
}
}
@@ -1147,7 +1230,9 @@
write!(
f,
"IncludeMatcher(includes='{}')",
- String::from_utf8_lossy(&self.patterns.escaped_bytes())
+ &String::from_utf8_lossy(
+ &self.patterns.to_pattern_bytes(ReSyntax::Internal)
+ )
)
}
}
--- a/rust/hg-core/src/pre_regex.rs Fri Dec 06 20:27:59 2024 +0000
+++ b/rust/hg-core/src/pre_regex.rs Fri Dec 13 15:05:37 2024 +0000
@@ -19,6 +19,9 @@
&RE_ESCAPE[c as usize]
}
+/// An intermediate regular expression representation, that can be used
+/// both to compile down to a `Regex` for matching, or converted to
+/// a string directly for diagnostics.
#[derive(Debug, Clone)]
pub enum PreRegex {
Empty,
--- a/rust/rhg/src/commands/debugignorerhg.rs Fri Dec 06 20:27:59 2024 +0000
+++ b/rust/rhg/src/commands/debugignorerhg.rs Fri Dec 13 15:05:37 2024 +0000
@@ -1,8 +1,10 @@
use crate::error::CommandError;
+use crate::ui::Ui;
use clap::Arg;
use hg::dirstate::status::StatusError;
use hg::filepatterns::RegexCompleteness;
-use hg::matchers::get_ignore_matcher_pre;
+use hg::matchers::{get_ignore_matcher_pre, ReSyntax};
+use hg::repo::Repo;
use log::warn;
pub const HELP_TEXT: &str = "
@@ -24,12 +26,19 @@
).about(HELP_TEXT)
}
-pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
- let repo = invocation.repo?;
- let args = invocation.subcommand_args;
+pub enum WhichPatterns {
+ All,
+ SlowPathOnly,
+}
+pub fn work(
+ repo: &Repo,
+ ui: &Ui,
+ which: WhichPatterns,
+ syntax: ReSyntax,
+) -> Result<(), CommandError> {
let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded
- let all_patterns = args.get_flag("all-patterns");
+ let all_patterns = matches!(which, WhichPatterns::All);
let (ignore_matcher, warnings) = get_ignore_matcher_pre(
vec![ignore_file],
@@ -51,8 +60,22 @@
warn!("Pattern warnings: {:?}", &warnings);
}
- let patterns = ignore_matcher.debug_get_patterns();
- invocation.ui.write_stdout(patterns)?;
- invocation.ui.write_stdout(b"\n")?;
+ let patterns = ignore_matcher.debug_get_patterns(syntax);
+ ui.write_stdout(&patterns)?;
+ ui.write_stdout(b"\n")?;
Ok(())
}
+
+pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
+ let repo = invocation.repo?;
+ let args = invocation.subcommand_args;
+
+ let all_patterns = args.get_flag("all-patterns");
+ let patterns = if all_patterns {
+ WhichPatterns::All
+ } else {
+ WhichPatterns::SlowPathOnly
+ };
+
+ work(repo, invocation.ui, patterns, ReSyntax::Internal)
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/rhg/src/commands/script_hgignore.rs Fri Dec 13 15:05:37 2024 +0000
@@ -0,0 +1,42 @@
+use clap::Arg;
+use hg::matchers::ReSyntax;
+
+use crate::error::CommandError;
+
+use super::debugignorerhg::WhichPatterns;
+
+pub const HELP_TEXT: &str = "
+Show effective hgignore patterns used by rhg.
+
+This is a pure Rust version of `hg debugignore`.
+
+Some options might be missing, check the list below.
+";
+
+pub fn args() -> clap::Command {
+ clap::command!("script::hgignore")
+ .arg(
+ Arg::new("print-re")
+ .help("Print the regular expression that matches all ignored files.")
+ .action(clap::ArgAction::SetTrue)
+ .long("print-re"),
+ ).about(HELP_TEXT)
+}
+
+pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
+ let repo = invocation.repo?;
+ let args = invocation.subcommand_args;
+ let print_re = args.get_flag("print-re");
+ if !print_re {
+ return Err(CommandError::unsupported(
+ "Unsupported invocation: flag --print-re is required",
+ ));
+ }
+
+ crate::commands::debugignorerhg::work(
+ repo,
+ invocation.ui,
+ WhichPatterns::All,
+ ReSyntax::Tidy,
+ )
+}
--- a/rust/rhg/src/main.rs Fri Dec 06 20:27:59 2024 +0000
+++ b/rust/rhg/src/main.rs Fri Dec 13 15:05:37 2024 +0000
@@ -534,6 +534,7 @@
pub mod debugrhgsparse;
pub mod files;
pub mod root;
+ pub mod script_hgignore;
pub mod status;
}
@@ -616,6 +617,7 @@
subcommand!(root),
subcommand!(config),
subcommand!(status),
+ subcommand!(script_hgignore),
];
let mut commands = Subcommands::new();
for cmd in subcommands {
--- a/tests/test-hgignore.t Fri Dec 06 20:27:59 2024 +0000
+++ b/tests/test-hgignore.t Fri Dec 13 15:05:37 2024 +0000
@@ -17,7 +17,16 @@
<nevermatcher>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore() {
+ > echo debugignorerhg:
+ > hg debugignorerhg
+ > echo script::hgignore --print-re:
+ > hg script::hgignore --print-re
+ > }
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:)
+ script::hgignore --print-re:
#endif
@@ -83,7 +92,10 @@
<includematcher includes='(?i:.*\\.O$)|.*.hgignore'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\\A[\x00- \x0b-\xf4\x8f\xbf\xbf]*(?:(?:\\.[Oo]\\z)|(?:[\x00- \x0b-\xf4\x8f\xbf\xbf](?:hgignore)))) (esc)
+ script::hgignore --print-re:
^(?:(?i:.*\.O$)|.*.hgignore)
#endif
@@ -99,7 +111,10 @@
<includematcher includes='.*.hgignore|(?i:.*\\.O$)'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\\A[\x00- \x0b-\xf4\x8f\xbf\xbf]*(?:(?:[\x00- \x0b-\xf4\x8f\xbf\xbf](?:hgignore))|(?:\\.[Oo]\\z))) (esc)
+ script::hgignore --print-re:
^(?:.*.hgignore|(?i:.*\.O$))
#endif
@@ -116,7 +131,10 @@
<includematcher includes='(?i:.*\\.O$)|.*.HGIGNORE'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\\A[\x00- \x0b-\xf4\x8f\xbf\xbf]*(?:(?:\\.[Oo]\\z)|(?:[\x00- \x0b-\xf4\x8f\xbf\xbf](?:HGIGNORE)))) (esc)
+ script::hgignore --print-re:
^(?:(?i:.*\.O$)|.*.HGIGNORE)
#endif
@@ -131,7 +149,10 @@
<includematcher includes='.*.HGIGNORE|(?i:.*\\.O$)'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\\A[\x00- \x0b-\xf4\x8f\xbf\xbf]*(?:(?:[\x00- \x0b-\xf4\x8f\xbf\xbf](?:HGIGNORE))|(?:\\.[Oo]\\z))) (esc)
+ script::hgignore --print-re:
^(?:.*.HGIGNORE|(?i:.*\.O$))
#endif
@@ -149,7 +170,10 @@
<includematcher includes='(?i:^[^a].*\\.O$)|.*.HGIGNORE'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\\A(?:(?:\\A[\x00-@B-`b-\xf4\x8f\xbf\xbf][\x00- \x0b-\xf4\x8f\xbf\xbf]*\\.[Oo]\\z)|(?:[\x00- \x0b-\xf4\x8f\xbf\xbf]*[\x00- \x0b-\xf4\x8f\xbf\xbf](?:HGIGNORE)))) (esc)
+ script::hgignore --print-re:
^(?:(?i:^[^a].*\.O$)|.*.HGIGNORE)
#endif
@@ -165,7 +189,10 @@
<includematcher includes='.*.HGIGNORE|(?i:^[^a].*\\.O$)'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\\A(?:(?:[\x00- \x0b-\xf4\x8f\xbf\xbf]*[\x00- \x0b-\xf4\x8f\xbf\xbf](?:HGIGNORE))|(?:\\A[\x00-@B-`b-\xf4\x8f\xbf\xbf][\x00- \x0b-\xf4\x8f\xbf\xbf]*\\.[Oo]\\z))) (esc)
+ script::hgignore --print-re:
^(?:.*.HGIGNORE|(?i:^[^a].*\.O$))
#endif
@@ -360,11 +387,11 @@
? dir/c.o
? syntax
#if rhg
- $ hg debugignorerhg -a
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\A[a&&b])
+ script::hgignore --print-re:
^(?:dir/b\.o(?:/|$))
-
- $ hg debugignorerhg
- ^ ^
#endif
$ echo "relglob:*" > .hgignore
@@ -379,7 +406,10 @@
<includematcher includes='.*(?:/|$)'>
#if rhg
- $ hg debugignorerhg
+ $ rhg_debugignore
+ debugignorerhg:
+ (?:\A(?-u:[\x00-\xFF])*?(?:/|\z))
+ script::hgignore --print-re:
^(?:.*(?:/|$))
#endif