comparison rust/hg-core/src/utils/strings.rs @ 52969:874c64e041b5

rhg-annotate: support whitespace options This adds support to rhg annotate for all the whitespace options: -w, --ignore-all-space -b, --ignore-space-change -B, --ignore-blank-lines -Z, --ignore-space-at-eol Note that --ignore-blank-lines has no effect on annotate so it is ignored. You can see this in dagop.py _annotepair which only checks if blocks are '=' or not, whereas the effect of --ignore-blank-lines is to change some '!' into '~'. When the other 3 are combined, we use the strongest option since -w implies -b and -b implies -Z. This is not explicit in the Python implementation, but I have verified that's how it behaves.
author Mitchell Kember <mkember@janestreet.com>
date Fri, 07 Feb 2025 17:42:43 -0500
parents 94e2547e6f3d
children
comparison
equal deleted inserted replaced
52968:515196315b82 52969:874c64e041b5
1 //! Contains string-related utilities. 1 //! Contains string-related utilities.
2 2
3 use crate::utils::hg_path::HgPath; 3 use crate::utils::hg_path::HgPath;
4 use std::{cell::Cell, fmt, io::Write as _, ops::Deref as _}; 4 use lazy_static::lazy_static;
5 use regex::bytes::Regex;
6 use std::{borrow::Cow, cell::Cell, fmt, io::Write as _, ops::Deref as _};
5 7
6 /// Useful until rust/issues/56345 is stable 8 /// Useful until rust/issues/56345 is stable
7 /// 9 ///
8 /// # Examples 10 /// # Examples
9 /// 11 ///
297 str = &str[..i]; 299 str = &str[..i];
298 } 300 }
299 str 301 str
300 } 302 }
301 303
304 /// Options for [`clean_whitespace`].
305 #[derive(Copy, Clone)]
306 pub enum CleanWhitespace {
307 /// Do nothing.
308 None,
309 /// Remove whitespace at ends of lines.
310 AtEol,
311 /// Collapse consecutive whitespace characters into a single space.
312 Collapse,
313 /// Remove all whitespace characters.
314 All,
315 }
316
317 /// Normalizes whitespace in text so that it won't apppear in diffs.
318 /// Returns `Cow::Borrowed(text)` if the result is unchanged.
319 pub fn clean_whitespace(text: &[u8], how: CleanWhitespace) -> Cow<[u8]> {
320 lazy_static! {
321 // To match wsclean in mdiff.py, this includes "\f".
322 static ref AT_EOL: Regex =
323 Regex::new(r"(?m)[ \t\r\f]+$").expect("valid regex");
324 // To match fixws in cext/bdiff.c, this does *not* include "\f".
325 static ref MULTIPLE: Regex =
326 Regex::new(r"[ \t\r]+").expect("valid regex");
327 }
328 let replacement: &[u8] = match how {
329 CleanWhitespace::None => return Cow::Borrowed(text),
330 CleanWhitespace::AtEol => return AT_EOL.replace_all(text, b""),
331 CleanWhitespace::Collapse => b" ",
332 CleanWhitespace::All => b"",
333 };
334 let text = MULTIPLE.replace_all(text, replacement);
335 replace_all_cow(&AT_EOL, text, b"")
336 }
337
338 /// Helper to call [`Regex::replace_all`] with `Cow` as input and output.
339 fn replace_all_cow<'a>(
340 regex: &Regex,
341 haystack: Cow<'a, [u8]>,
342 replacement: &[u8],
343 ) -> Cow<'a, [u8]> {
344 match haystack {
345 Cow::Borrowed(haystack) => regex.replace_all(haystack, replacement),
346 Cow::Owned(haystack) => {
347 Cow::Owned(regex.replace_all(&haystack, replacement).into_owned())
348 }
349 }
350 }
351
302 #[cfg(test)] 352 #[cfg(test)]
303 mod tests { 353 mod tests {
304 use super::*; 354 use super::*;
305 355
306 #[test] 356 #[test]