Mercurial > public > mercurial-scm > hg
changeset 52969:874c64e041b5
rhg-annotate: support whitespace options
This adds support to rhg annotate for all the whitespace options:
-w, --ignore-all-space
-b, --ignore-space-change
-B, --ignore-blank-lines
-Z, --ignore-space-at-eol
Note that --ignore-blank-lines has no effect on annotate so it is ignored. You
can see this in dagop.py _annotepair which only checks if blocks are '=' or not,
whereas the effect of --ignore-blank-lines is to change some '!' into '~'.
When the other 3 are combined, we use the strongest option since -w implies -b
and -b implies -Z. This is not explicit in the Python implementation, but I have
verified that's how it behaves.
author | Mitchell Kember <mkember@janestreet.com> |
---|---|
date | Fri, 07 Feb 2025 17:42:43 -0500 |
parents | 515196315b82 |
children | e4ff37b5317c |
files | rust/hg-core/src/operations/annotate.rs rust/hg-core/src/utils/strings.rs rust/rhg/src/commands/annotate.rs tests/test-rhg.t |
diffstat | 4 files changed, 133 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/operations/annotate.rs Wed Feb 12 11:37:07 2025 -0500 +++ b/rust/hg-core/src/operations/annotate.rs Fri Feb 07 17:42:43 2025 -0500 @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use crate::{ bdiff::{self, Lines}, errors::HgError, @@ -10,6 +12,7 @@ utils::{ self, hg_path::{HgPath, HgPathBuf}, + strings::{clean_whitespace, CleanWhitespace}, }, AncestorsIterator, FastHashMap, Graph, GraphError, Node, Revision, NULL_REVISION, @@ -23,6 +26,7 @@ pub struct AnnotateOptions { pub treat_binary_as_text: bool, pub follow_copies: bool, + pub whitespace: CleanWhitespace, } /// The final result of annotating a file. @@ -55,7 +59,8 @@ } self_cell!( - /// A wrapper around [`Lines`] that owns the file text. + /// A wrapper around [`Lines`] that owns the buffer the lines point into. + /// The buffer contains the file text processed by [`clean_whitespace`]. struct OwnedLines { owner: Vec<u8>, #[covariant] @@ -64,7 +69,15 @@ ); impl OwnedLines { - fn split(data: Vec<u8>) -> Result<Self, HgError> { + /// Cleans `data` based on `whitespace` and then splits into lines. + fn split( + data: Vec<u8>, + whitespace: CleanWhitespace, + ) -> Result<Self, HgError> { + let data = match clean_whitespace(&data, whitespace) { + Cow::Borrowed(_) => data, + Cow::Owned(data) => data, + }; Self::try_new(data, |data| bdiff::split_lines(data)) } @@ -293,7 +306,10 @@ fls.parents(repo, id, options.follow_copies)?; info.parents = Some(parents.clone()); if let Some(data) = file_data { - info.file = AnnotatedFileState::Read(OwnedLines::split(data)?); + info.file = AnnotatedFileState::Read(OwnedLines::split( + data, + options.whitespace, + )?); } for id in parents { let info = graph.get_or_insert_default(id); @@ -304,15 +320,26 @@ } } - // Step 3: Read files and split lines in parallel. - graph[base_id].file = - AnnotatedFileState::Read(OwnedLines::split(base_file_data)?); + // Step 3: Read files and split lines. Do the base file with and without + // whitespace cleaning. Do the rest of the files in parallel with rayon. + let base_file_original_lines = match options.whitespace { + CleanWhitespace::None => None, + _ => Some(OwnedLines::split( + base_file_data.clone(), + CleanWhitespace::None, + )?), + }; + graph[base_id].file = AnnotatedFileState::Read(OwnedLines::split( + base_file_data, + options.whitespace, + )?); graph.0.par_iter_mut().try_for_each( |(&id, info)| -> Result<(), HgError> { if let AnnotatedFileState::None = info.file { - let lines = - OwnedLines::split(fls.read(id)?.into_file_data()?)?; - info.file = AnnotatedFileState::Read(lines); + info.file = AnnotatedFileState::Read(OwnedLines::split( + fls.read(id)?.into_file_data()?, + options.whitespace, + )?); } Ok(()) }, @@ -376,6 +403,8 @@ else { panic!("the base file should have been annotated in step 4") }; + // Don't use the lines from the graph if they had whitespace cleaned. + let lines = base_file_original_lines.unwrap_or(lines); // Only convert revisions that actually appear in the final output. for &Annotation { id, .. } in &annotations { graph[id].revision = ChangelogRevisionState::Needed;
--- a/rust/hg-core/src/utils/strings.rs Wed Feb 12 11:37:07 2025 -0500 +++ b/rust/hg-core/src/utils/strings.rs Fri Feb 07 17:42:43 2025 -0500 @@ -1,7 +1,9 @@ //! Contains string-related utilities. use crate::utils::hg_path::HgPath; -use std::{cell::Cell, fmt, io::Write as _, ops::Deref as _}; +use lazy_static::lazy_static; +use regex::bytes::Regex; +use std::{borrow::Cow, cell::Cell, fmt, io::Write as _, ops::Deref as _}; /// Useful until rust/issues/56345 is stable /// @@ -299,6 +301,54 @@ str } +/// Options for [`clean_whitespace`]. +#[derive(Copy, Clone)] +pub enum CleanWhitespace { + /// Do nothing. + None, + /// Remove whitespace at ends of lines. + AtEol, + /// Collapse consecutive whitespace characters into a single space. + Collapse, + /// Remove all whitespace characters. + All, +} + +/// Normalizes whitespace in text so that it won't apppear in diffs. +/// Returns `Cow::Borrowed(text)` if the result is unchanged. +pub fn clean_whitespace(text: &[u8], how: CleanWhitespace) -> Cow<[u8]> { + lazy_static! { + // To match wsclean in mdiff.py, this includes "\f". + static ref AT_EOL: Regex = + Regex::new(r"(?m)[ \t\r\f]+$").expect("valid regex"); + // To match fixws in cext/bdiff.c, this does *not* include "\f". + static ref MULTIPLE: Regex = + Regex::new(r"[ \t\r]+").expect("valid regex"); + } + let replacement: &[u8] = match how { + CleanWhitespace::None => return Cow::Borrowed(text), + CleanWhitespace::AtEol => return AT_EOL.replace_all(text, b""), + CleanWhitespace::Collapse => b" ", + CleanWhitespace::All => b"", + }; + let text = MULTIPLE.replace_all(text, replacement); + replace_all_cow(&AT_EOL, text, b"") +} + +/// Helper to call [`Regex::replace_all`] with `Cow` as input and output. +fn replace_all_cow<'a>( + regex: &Regex, + haystack: Cow<'a, [u8]>, + replacement: &[u8], +) -> Cow<'a, [u8]> { + match haystack { + Cow::Borrowed(haystack) => regex.replace_all(haystack, replacement), + Cow::Owned(haystack) => { + Cow::Owned(regex.replace_all(&haystack, replacement).into_owned()) + } + } +} + #[cfg(test)] mod tests { use super::*;
--- a/rust/rhg/src/commands/annotate.rs Wed Feb 12 11:37:07 2025 -0500 +++ b/rust/rhg/src/commands/annotate.rs Fri Feb 07 17:42:43 2025 -0500 @@ -8,6 +8,7 @@ annotate, AnnotateOptions, AnnotateOutput, ChangesetAnnotation, }, revlog::changelog::Changelog, + utils::strings::CleanWhitespace, FastHashMap, Revision, }; @@ -106,6 +107,34 @@ .action(clap::ArgAction::SetTrue) .conflicts_with("quiet"), ) + .arg( + clap::Arg::new("ignore-all-space") + .help("ignore white space when comparing lines") + .short('w') + .long("ignore-all-space") + .action(clap::ArgAction::SetTrue), + ) + .arg( + clap::Arg::new("ignore-space-change") + .help("ignore changes in the amount of white space") + .short('b') + .long("ignore-space-change") + .action(clap::ArgAction::SetTrue), + ) + .arg( + clap::Arg::new("ignore-blank-lines") + .help("ignore changes whose lines are all blank") + .short('B') + .long("ignore-blank-lines") + .action(clap::ArgAction::SetTrue), + ) + .arg( + clap::Arg::new("ignore-space-at-eol") + .help("ignore changes in whitespace at EOL") + .short('Z') + .long("ignore-space-at-eol") + .action(clap::ArgAction::SetTrue), + ) .about(HELP_TEXT) } @@ -131,6 +160,17 @@ let options = AnnotateOptions { treat_binary_as_text: args.get_flag("text"), follow_copies: !args.get_flag("no-follow"), + whitespace: if args.get_flag("ignore-all-space") { + CleanWhitespace::All + } else if args.get_flag("ignore-space-change") { + CleanWhitespace::Collapse + } else if args.get_flag("ignore-space-at-eol") { + CleanWhitespace::AtEol + } else { + // We ignore the --ignore-blank-lines flag (present for consistency + // with other commands) since it has no effect on annotate. + CleanWhitespace::None + }, }; let mut include = Include {
--- a/tests/test-rhg.t Wed Feb 12 11:37:07 2025 -0500 +++ b/tests/test-rhg.t Fri Feb 07 17:42:43 2025 -0500 @@ -196,12 +196,13 @@ Annotate files $ $NO_FALLBACK rhg annotate original 0: original content - $ $NO_FALLBACK rhg annotate --rev . --user --file --date --number --changeset --line-number --text --no-follow original + $ $NO_FALLBACK rhg annotate --rev . --user --file --date --number --changeset \ + > --line-number --text --no-follow --ignore-all-space --ignore-space-change \ + > --ignore-blank-lines --ignore-space-at-eol original test 0 1c9e69808da7 Thu Jan 01 00:00:00 1970 +0000 original:1: original content - $ $NO_FALLBACK rhg blame -r . -ufdncla --no-follow original + $ $NO_FALLBACK rhg blame -r . -ufdnclawbBZ --no-follow original test 0 1c9e69808da7 Thu Jan 01 00:00:00 1970 +0000 original:1: original content - Fallback to Python $ $NO_FALLBACK rhg cat original --exclude="*.rs" unsupported feature: error: unexpected argument '--exclude' found