diff rust/hg-core/src/utils.rs @ 52760:94e2547e6f3d

rust: move code from utils to utils::strings This moves string-related functions in hg::utils into the recently added hg::utils::strings module.
author Mitchell Kember <mkember@janestreet.com>
date Thu, 16 Jan 2025 13:15:02 -0500
parents 36d39726c0af
children
line wrap: on
line diff
--- a/rust/hg-core/src/utils.rs	Fri Jan 03 10:50:17 2025 -0500
+++ b/rust/hg-core/src/utils.rs	Thu Jan 16 13:15:02 2025 -0500
@@ -8,15 +8,11 @@
 //! Contains useful functions, traits, structs, etc. for use in core.
 
 use crate::errors::{HgError, IoErrorContext};
-use crate::utils::hg_path::HgPath;
 use im_rc::ordmap::DiffItem;
 use im_rc::ordmap::OrdMap;
 use itertools::EitherOrBoth;
 use itertools::Itertools;
-use std::cell::Cell;
 use std::cmp::Ordering;
-use std::fmt;
-use std::{io::Write, ops::Deref};
 
 pub mod debug;
 pub mod files;
@@ -24,208 +20,6 @@
 pub mod path_auditor;
 pub mod strings;
 
-/// Useful until rust/issues/56345 is stable
-///
-/// # Examples
-///
-/// ```
-/// use crate::hg::utils::find_slice_in_slice;
-///
-/// let haystack = b"This is the haystack".to_vec();
-/// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
-/// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
-/// ```
-pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
-where
-    for<'a> &'a [T]: PartialEq,
-{
-    slice
-        .windows(needle.len())
-        .position(|window| window == needle)
-}
-
-/// Replaces the `from` slice with the `to` slice inside the `buf` slice.
-///
-/// # Examples
-///
-/// ```
-/// use crate::hg::utils::replace_slice;
-/// let mut line = b"I hate writing tests!".to_vec();
-/// replace_slice(&mut line, b"hate", b"love");
-/// assert_eq!(
-///     line,
-///     b"I love writing tests!".to_vec()
-/// );
-/// ```
-pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
-where
-    T: Clone + PartialEq,
-{
-    if buf.len() < from.len() || from.len() != to.len() {
-        return;
-    }
-    for i in 0..=buf.len() - from.len() {
-        if buf[i..].starts_with(from) {
-            buf[i..(i + from.len())].clone_from_slice(to);
-        }
-    }
-}
-
-pub trait SliceExt {
-    fn trim_end(&self) -> &Self;
-    fn trim_start(&self) -> &Self;
-    fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
-    fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
-    fn trim(&self) -> &Self;
-    fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
-    fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
-    fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
-}
-
-impl SliceExt for [u8] {
-    fn trim_end(&self) -> &[u8] {
-        self.trim_end_matches(|byte| byte.is_ascii_whitespace())
-    }
-
-    fn trim_start(&self) -> &[u8] {
-        self.trim_start_matches(|byte| byte.is_ascii_whitespace())
-    }
-
-    fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
-        if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
-            &self[..=last]
-        } else {
-            &[]
-        }
-    }
-
-    fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
-        if let Some(first) = self.iter().position(|&byte| !f(byte)) {
-            &self[first..]
-        } else {
-            &[]
-        }
-    }
-
-    /// ```
-    /// use hg::utils::SliceExt;
-    /// assert_eq!(
-    ///     b"  to trim  ".trim(),
-    ///     b"to trim"
-    /// );
-    /// assert_eq!(
-    ///     b"to trim  ".trim(),
-    ///     b"to trim"
-    /// );
-    /// assert_eq!(
-    ///     b"  to trim".trim(),
-    ///     b"to trim"
-    /// );
-    /// ```
-    fn trim(&self) -> &[u8] {
-        self.trim_start().trim_end()
-    }
-
-    fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
-        if self.starts_with(needle) {
-            Some(&self[needle.len()..])
-        } else {
-            None
-        }
-    }
-
-    fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
-        let pos = memchr::memchr(separator, self)?;
-        Some((&self[..pos], &self[pos + 1..]))
-    }
-
-    fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
-        find_slice_in_slice(self, separator)
-            .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
-    }
-}
-
-pub trait Escaped {
-    /// Return bytes escaped for display to the user
-    fn escaped_bytes(&self) -> Vec<u8>;
-}
-
-impl Escaped for u8 {
-    fn escaped_bytes(&self) -> Vec<u8> {
-        let mut acc = vec![];
-        match self {
-            c @ b'\'' | c @ b'\\' => {
-                acc.push(b'\\');
-                acc.push(*c);
-            }
-            b'\t' => {
-                acc.extend(br"\\t");
-            }
-            b'\n' => {
-                acc.extend(br"\\n");
-            }
-            b'\r' => {
-                acc.extend(br"\\r");
-            }
-            c if (*c < b' ' || *c >= 127) => {
-                write!(acc, "\\x{:x}", self).unwrap();
-            }
-            c => {
-                acc.push(*c);
-            }
-        }
-        acc
-    }
-}
-
-impl<'a, T: Escaped> Escaped for &'a [T] {
-    fn escaped_bytes(&self) -> Vec<u8> {
-        self.iter().flat_map(Escaped::escaped_bytes).collect()
-    }
-}
-
-impl<T: Escaped> Escaped for Vec<T> {
-    fn escaped_bytes(&self) -> Vec<u8> {
-        self.deref().escaped_bytes()
-    }
-}
-
-impl<'a> Escaped for &'a HgPath {
-    fn escaped_bytes(&self) -> Vec<u8> {
-        self.as_bytes().escaped_bytes()
-    }
-}
-
-#[cfg(unix)]
-pub fn shell_quote(value: &[u8]) -> Vec<u8> {
-    if value.iter().all(|&byte| {
-        matches!(
-            byte,
-            b'a'..=b'z'
-            | b'A'..=b'Z'
-            | b'0'..=b'9'
-            | b'.'
-            | b'_'
-            | b'/'
-            | b'+'
-            | b'-'
-        )
-    }) {
-        value.to_owned()
-    } else {
-        let mut quoted = Vec::with_capacity(value.len() + 2);
-        quoted.push(b'\'');
-        for &byte in value {
-            if byte == b'\'' {
-                quoted.push(b'\\');
-            }
-            quoted.push(byte);
-        }
-        quoted.push(b'\'');
-        quoted
-    }
-}
-
 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
     std::env::current_dir().map_err(|error| HgError::IoError {
         error,
@@ -240,59 +34,6 @@
     })
 }
 
-/// Expand `$FOO` and `${FOO}` environment variables in the given byte string
-pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
-    lazy_static::lazy_static! {
-        /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
-        /// The `x` makes whitespace ignored.
-        /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
-        static ref VAR_RE: regex::bytes::Regex =
-            regex::bytes::Regex::new(r"(?x-u)
-                \$
-                (?:
-                    (\w+)
-                    |
-                    \{
-                        ([^}]*)
-                    \}
-                )
-            ").unwrap();
-    }
-    VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
-        let var_name = files::get_os_str_from_bytes(
-            captures
-                .get(1)
-                .or_else(|| captures.get(2))
-                .expect("either side of `|` must participate in match")
-                .as_bytes(),
-        );
-        std::env::var_os(var_name)
-            .map(files::get_bytes_from_os_str)
-            .unwrap_or_else(|| {
-                // Referencing an environment variable that does not exist.
-                // Leave the $FOO reference as-is.
-                captures[0].to_owned()
-            })
-    })
-}
-
-#[test]
-fn test_expand_vars() {
-    // Modifying process-global state in a test isn’t great,
-    // but hopefully this won’t collide with anything.
-    std::env::set_var("TEST_EXPAND_VAR", "1");
-    assert_eq!(
-        expand_vars(b"before/$TEST_EXPAND_VAR/after"),
-        &b"before/1/after"[..]
-    );
-    assert_eq!(
-        expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
-        &b"before111after"[..]
-    );
-    let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
-    assert_eq!(expand_vars(s), &s[..]);
-}
-
 pub(crate) enum MergeResult<V> {
     Left,
     Right,
@@ -441,46 +182,6 @@
     }
 }
 
-/// Join items of the iterable with the given separator, similar to Python’s
-/// `separator.join(iter)`.
-///
-/// Formatting the return value consumes the iterator.
-/// Formatting it again will produce an empty string.
-pub fn join_display(
-    iter: impl IntoIterator<Item = impl fmt::Display>,
-    separator: impl fmt::Display,
-) -> impl fmt::Display {
-    JoinDisplay {
-        iter: Cell::new(Some(iter.into_iter())),
-        separator,
-    }
-}
-
-struct JoinDisplay<I, S> {
-    iter: Cell<Option<I>>,
-    separator: S,
-}
-
-impl<I, T, S> fmt::Display for JoinDisplay<I, S>
-where
-    I: Iterator<Item = T>,
-    T: fmt::Display,
-    S: fmt::Display,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if let Some(mut iter) = self.iter.take() {
-            if let Some(first) = iter.next() {
-                first.fmt(f)?;
-            }
-            for value in iter {
-                self.separator.fmt(f)?;
-                value.fmt(f)?;
-            }
-        }
-        Ok(())
-    }
-}
-
 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
 ///
 /// The callback is only called for incoming `Ok` values. Errors are passed