changeset 52969:874c64e041b5

rhg-annotate: support whitespace options This adds support to rhg annotate for all the whitespace options: -w, --ignore-all-space -b, --ignore-space-change -B, --ignore-blank-lines -Z, --ignore-space-at-eol Note that --ignore-blank-lines has no effect on annotate so it is ignored. You can see this in dagop.py _annotepair which only checks if blocks are '=' or not, whereas the effect of --ignore-blank-lines is to change some '!' into '~'. When the other 3 are combined, we use the strongest option since -w implies -b and -b implies -Z. This is not explicit in the Python implementation, but I have verified that's how it behaves.
author Mitchell Kember <mkember@janestreet.com>
date Fri, 07 Feb 2025 17:42:43 -0500
parents 515196315b82
children e4ff37b5317c
files rust/hg-core/src/operations/annotate.rs rust/hg-core/src/utils/strings.rs rust/rhg/src/commands/annotate.rs tests/test-rhg.t
diffstat 4 files changed, 133 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/operations/annotate.rs	Wed Feb 12 11:37:07 2025 -0500
+++ b/rust/hg-core/src/operations/annotate.rs	Fri Feb 07 17:42:43 2025 -0500
@@ -1,3 +1,5 @@
+use std::borrow::Cow;
+
 use crate::{
     bdiff::{self, Lines},
     errors::HgError,
@@ -10,6 +12,7 @@
     utils::{
         self,
         hg_path::{HgPath, HgPathBuf},
+        strings::{clean_whitespace, CleanWhitespace},
     },
     AncestorsIterator, FastHashMap, Graph, GraphError, Node, Revision,
     NULL_REVISION,
@@ -23,6 +26,7 @@
 pub struct AnnotateOptions {
     pub treat_binary_as_text: bool,
     pub follow_copies: bool,
+    pub whitespace: CleanWhitespace,
 }
 
 /// The final result of annotating a file.
@@ -55,7 +59,8 @@
 }
 
 self_cell!(
-    /// A wrapper around [`Lines`] that owns the file text.
+    /// A wrapper around [`Lines`] that owns the buffer the lines point into.
+    /// The buffer contains the file text processed by [`clean_whitespace`].
     struct OwnedLines {
         owner: Vec<u8>,
         #[covariant]
@@ -64,7 +69,15 @@
 );
 
 impl OwnedLines {
-    fn split(data: Vec<u8>) -> Result<Self, HgError> {
+    /// Cleans `data` based on `whitespace` and then splits into lines.
+    fn split(
+        data: Vec<u8>,
+        whitespace: CleanWhitespace,
+    ) -> Result<Self, HgError> {
+        let data = match clean_whitespace(&data, whitespace) {
+            Cow::Borrowed(_) => data,
+            Cow::Owned(data) => data,
+        };
         Self::try_new(data, |data| bdiff::split_lines(data))
     }
 
@@ -293,7 +306,10 @@
             fls.parents(repo, id, options.follow_copies)?;
         info.parents = Some(parents.clone());
         if let Some(data) = file_data {
-            info.file = AnnotatedFileState::Read(OwnedLines::split(data)?);
+            info.file = AnnotatedFileState::Read(OwnedLines::split(
+                data,
+                options.whitespace,
+            )?);
         }
         for id in parents {
             let info = graph.get_or_insert_default(id);
@@ -304,15 +320,26 @@
         }
     }
 
-    // Step 3: Read files and split lines in parallel.
-    graph[base_id].file =
-        AnnotatedFileState::Read(OwnedLines::split(base_file_data)?);
+    // Step 3: Read files and split lines. Do the base file with and without
+    // whitespace cleaning. Do the rest of the files in parallel with rayon.
+    let base_file_original_lines = match options.whitespace {
+        CleanWhitespace::None => None,
+        _ => Some(OwnedLines::split(
+            base_file_data.clone(),
+            CleanWhitespace::None,
+        )?),
+    };
+    graph[base_id].file = AnnotatedFileState::Read(OwnedLines::split(
+        base_file_data,
+        options.whitespace,
+    )?);
     graph.0.par_iter_mut().try_for_each(
         |(&id, info)| -> Result<(), HgError> {
             if let AnnotatedFileState::None = info.file {
-                let lines =
-                    OwnedLines::split(fls.read(id)?.into_file_data()?)?;
-                info.file = AnnotatedFileState::Read(lines);
+                info.file = AnnotatedFileState::Read(OwnedLines::split(
+                    fls.read(id)?.into_file_data()?,
+                    options.whitespace,
+                )?);
             }
             Ok(())
         },
@@ -376,6 +403,8 @@
     else {
         panic!("the base file should have been annotated in step 4")
     };
+    // Don't use the lines from the graph if they had whitespace cleaned.
+    let lines = base_file_original_lines.unwrap_or(lines);
     // Only convert revisions that actually appear in the final output.
     for &Annotation { id, .. } in &annotations {
         graph[id].revision = ChangelogRevisionState::Needed;
--- a/rust/hg-core/src/utils/strings.rs	Wed Feb 12 11:37:07 2025 -0500
+++ b/rust/hg-core/src/utils/strings.rs	Fri Feb 07 17:42:43 2025 -0500
@@ -1,7 +1,9 @@
 //! Contains string-related utilities.
 
 use crate::utils::hg_path::HgPath;
-use std::{cell::Cell, fmt, io::Write as _, ops::Deref as _};
+use lazy_static::lazy_static;
+use regex::bytes::Regex;
+use std::{borrow::Cow, cell::Cell, fmt, io::Write as _, ops::Deref as _};
 
 /// Useful until rust/issues/56345 is stable
 ///
@@ -299,6 +301,54 @@
     str
 }
 
+/// Options for [`clean_whitespace`].
+#[derive(Copy, Clone)]
+pub enum CleanWhitespace {
+    /// Do nothing.
+    None,
+    /// Remove whitespace at ends of lines.
+    AtEol,
+    /// Collapse consecutive whitespace characters into a single space.
+    Collapse,
+    /// Remove all whitespace characters.
+    All,
+}
+
+/// Normalizes whitespace in text so that it won't apppear in diffs.
+/// Returns `Cow::Borrowed(text)` if the result is unchanged.
+pub fn clean_whitespace(text: &[u8], how: CleanWhitespace) -> Cow<[u8]> {
+    lazy_static! {
+        // To match wsclean in mdiff.py, this includes "\f".
+        static ref AT_EOL: Regex =
+            Regex::new(r"(?m)[ \t\r\f]+$").expect("valid regex");
+        // To match fixws in cext/bdiff.c, this does *not* include "\f".
+        static ref MULTIPLE: Regex =
+            Regex::new(r"[ \t\r]+").expect("valid regex");
+    }
+    let replacement: &[u8] = match how {
+        CleanWhitespace::None => return Cow::Borrowed(text),
+        CleanWhitespace::AtEol => return AT_EOL.replace_all(text, b""),
+        CleanWhitespace::Collapse => b" ",
+        CleanWhitespace::All => b"",
+    };
+    let text = MULTIPLE.replace_all(text, replacement);
+    replace_all_cow(&AT_EOL, text, b"")
+}
+
+/// Helper to call [`Regex::replace_all`] with `Cow` as input and output.
+fn replace_all_cow<'a>(
+    regex: &Regex,
+    haystack: Cow<'a, [u8]>,
+    replacement: &[u8],
+) -> Cow<'a, [u8]> {
+    match haystack {
+        Cow::Borrowed(haystack) => regex.replace_all(haystack, replacement),
+        Cow::Owned(haystack) => {
+            Cow::Owned(regex.replace_all(&haystack, replacement).into_owned())
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
--- a/rust/rhg/src/commands/annotate.rs	Wed Feb 12 11:37:07 2025 -0500
+++ b/rust/rhg/src/commands/annotate.rs	Fri Feb 07 17:42:43 2025 -0500
@@ -8,6 +8,7 @@
         annotate, AnnotateOptions, AnnotateOutput, ChangesetAnnotation,
     },
     revlog::changelog::Changelog,
+    utils::strings::CleanWhitespace,
     FastHashMap, Revision,
 };
 
@@ -106,6 +107,34 @@
                 .action(clap::ArgAction::SetTrue)
                 .conflicts_with("quiet"),
         )
+        .arg(
+            clap::Arg::new("ignore-all-space")
+                .help("ignore white space when comparing lines")
+                .short('w')
+                .long("ignore-all-space")
+                .action(clap::ArgAction::SetTrue),
+        )
+        .arg(
+            clap::Arg::new("ignore-space-change")
+                .help("ignore changes in the amount of white space")
+                .short('b')
+                .long("ignore-space-change")
+                .action(clap::ArgAction::SetTrue),
+        )
+        .arg(
+            clap::Arg::new("ignore-blank-lines")
+                .help("ignore changes whose lines are all blank")
+                .short('B')
+                .long("ignore-blank-lines")
+                .action(clap::ArgAction::SetTrue),
+        )
+        .arg(
+            clap::Arg::new("ignore-space-at-eol")
+                .help("ignore changes in whitespace at EOL")
+                .short('Z')
+                .long("ignore-space-at-eol")
+                .action(clap::ArgAction::SetTrue),
+        )
         .about(HELP_TEXT)
 }
 
@@ -131,6 +160,17 @@
     let options = AnnotateOptions {
         treat_binary_as_text: args.get_flag("text"),
         follow_copies: !args.get_flag("no-follow"),
+        whitespace: if args.get_flag("ignore-all-space") {
+            CleanWhitespace::All
+        } else if args.get_flag("ignore-space-change") {
+            CleanWhitespace::Collapse
+        } else if args.get_flag("ignore-space-at-eol") {
+            CleanWhitespace::AtEol
+        } else {
+            // We ignore the --ignore-blank-lines flag (present for consistency
+            // with other commands) since it has no effect on annotate.
+            CleanWhitespace::None
+        },
     };
 
     let mut include = Include {
--- a/tests/test-rhg.t	Wed Feb 12 11:37:07 2025 -0500
+++ b/tests/test-rhg.t	Fri Feb 07 17:42:43 2025 -0500
@@ -196,12 +196,13 @@
 Annotate files
   $ $NO_FALLBACK rhg annotate original
   0: original content
-  $ $NO_FALLBACK rhg annotate --rev . --user --file --date --number --changeset --line-number --text --no-follow original
+  $ $NO_FALLBACK rhg annotate --rev . --user --file --date --number --changeset \
+  > --line-number --text --no-follow --ignore-all-space --ignore-space-change \
+  > --ignore-blank-lines --ignore-space-at-eol original
   test 0 1c9e69808da7 Thu Jan 01 00:00:00 1970 +0000 original:1: original content
-  $ $NO_FALLBACK rhg blame -r . -ufdncla --no-follow original
+  $ $NO_FALLBACK rhg blame -r . -ufdnclawbBZ --no-follow original
   test 0 1c9e69808da7 Thu Jan 01 00:00:00 1970 +0000 original:1: original content
 
-
 Fallback to Python
   $ $NO_FALLBACK rhg cat original --exclude="*.rs"
   unsupported feature: error: unexpected argument '--exclude' found