diff rust/hg-core/src/utils/strings.rs @ 52969:874c64e041b5

rhg-annotate: support whitespace options This adds support to rhg annotate for all the whitespace options: -w, --ignore-all-space -b, --ignore-space-change -B, --ignore-blank-lines -Z, --ignore-space-at-eol Note that --ignore-blank-lines has no effect on annotate so it is ignored. You can see this in dagop.py _annotepair which only checks if blocks are '=' or not, whereas the effect of --ignore-blank-lines is to change some '!' into '~'. When the other 3 are combined, we use the strongest option since -w implies -b and -b implies -Z. This is not explicit in the Python implementation, but I have verified that's how it behaves.
author Mitchell Kember <mkember@janestreet.com>
date Fri, 07 Feb 2025 17:42:43 -0500
parents 94e2547e6f3d
children
line wrap: on
line diff
--- a/rust/hg-core/src/utils/strings.rs	Wed Feb 12 11:37:07 2025 -0500
+++ b/rust/hg-core/src/utils/strings.rs	Fri Feb 07 17:42:43 2025 -0500
@@ -1,7 +1,9 @@
 //! Contains string-related utilities.
 
 use crate::utils::hg_path::HgPath;
-use std::{cell::Cell, fmt, io::Write as _, ops::Deref as _};
+use lazy_static::lazy_static;
+use regex::bytes::Regex;
+use std::{borrow::Cow, cell::Cell, fmt, io::Write as _, ops::Deref as _};
 
 /// Useful until rust/issues/56345 is stable
 ///
@@ -299,6 +301,54 @@
     str
 }
 
+/// Options for [`clean_whitespace`].
+#[derive(Copy, Clone)]
+pub enum CleanWhitespace {
+    /// Do nothing.
+    None,
+    /// Remove whitespace at ends of lines.
+    AtEol,
+    /// Collapse consecutive whitespace characters into a single space.
+    Collapse,
+    /// Remove all whitespace characters.
+    All,
+}
+
+/// Normalizes whitespace in text so that it won't apppear in diffs.
+/// Returns `Cow::Borrowed(text)` if the result is unchanged.
+pub fn clean_whitespace(text: &[u8], how: CleanWhitespace) -> Cow<[u8]> {
+    lazy_static! {
+        // To match wsclean in mdiff.py, this includes "\f".
+        static ref AT_EOL: Regex =
+            Regex::new(r"(?m)[ \t\r\f]+$").expect("valid regex");
+        // To match fixws in cext/bdiff.c, this does *not* include "\f".
+        static ref MULTIPLE: Regex =
+            Regex::new(r"[ \t\r]+").expect("valid regex");
+    }
+    let replacement: &[u8] = match how {
+        CleanWhitespace::None => return Cow::Borrowed(text),
+        CleanWhitespace::AtEol => return AT_EOL.replace_all(text, b""),
+        CleanWhitespace::Collapse => b" ",
+        CleanWhitespace::All => b"",
+    };
+    let text = MULTIPLE.replace_all(text, replacement);
+    replace_all_cow(&AT_EOL, text, b"")
+}
+
+/// Helper to call [`Regex::replace_all`] with `Cow` as input and output.
+fn replace_all_cow<'a>(
+    regex: &Regex,
+    haystack: Cow<'a, [u8]>,
+    replacement: &[u8],
+) -> Cow<'a, [u8]> {
+    match haystack {
+        Cow::Borrowed(haystack) => regex.replace_all(haystack, replacement),
+        Cow::Owned(haystack) => {
+            Cow::Owned(regex.replace_all(&haystack, replacement).into_owned())
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;