Mercurial > public > mercurial-scm > hg
diff rust/hg-core/src/utils.rs @ 52760:94e2547e6f3d
rust: move code from utils to utils::strings
This moves string-related functions in hg::utils into the recently added
hg::utils::strings module.
author | Mitchell Kember <mkember@janestreet.com> |
---|---|
date | Thu, 16 Jan 2025 13:15:02 -0500 |
parents | 36d39726c0af |
children |
line wrap: on
line diff
--- a/rust/hg-core/src/utils.rs Fri Jan 03 10:50:17 2025 -0500 +++ b/rust/hg-core/src/utils.rs Thu Jan 16 13:15:02 2025 -0500 @@ -8,15 +8,11 @@ //! Contains useful functions, traits, structs, etc. for use in core. use crate::errors::{HgError, IoErrorContext}; -use crate::utils::hg_path::HgPath; use im_rc::ordmap::DiffItem; use im_rc::ordmap::OrdMap; use itertools::EitherOrBoth; use itertools::Itertools; -use std::cell::Cell; use std::cmp::Ordering; -use std::fmt; -use std::{io::Write, ops::Deref}; pub mod debug; pub mod files; @@ -24,208 +20,6 @@ pub mod path_auditor; pub mod strings; -/// Useful until rust/issues/56345 is stable -/// -/// # Examples -/// -/// ``` -/// use crate::hg::utils::find_slice_in_slice; -/// -/// let haystack = b"This is the haystack".to_vec(); -/// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8)); -/// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None); -/// ``` -pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize> -where - for<'a> &'a [T]: PartialEq, -{ - slice - .windows(needle.len()) - .position(|window| window == needle) -} - -/// Replaces the `from` slice with the `to` slice inside the `buf` slice. -/// -/// # Examples -/// -/// ``` -/// use crate::hg::utils::replace_slice; -/// let mut line = b"I hate writing tests!".to_vec(); -/// replace_slice(&mut line, b"hate", b"love"); -/// assert_eq!( -/// line, -/// b"I love writing tests!".to_vec() -/// ); -/// ``` -pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T]) -where - T: Clone + PartialEq, -{ - if buf.len() < from.len() || from.len() != to.len() { - return; - } - for i in 0..=buf.len() - from.len() { - if buf[i..].starts_with(from) { - buf[i..(i + from.len())].clone_from_slice(to); - } - } -} - -pub trait SliceExt { - fn trim_end(&self) -> &Self; - fn trim_start(&self) -> &Self; - fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; - fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; - fn trim(&self) -> &Self; - fn drop_prefix(&self, needle: &Self) -> Option<&Self>; - fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; - fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>; -} - -impl SliceExt for [u8] { - fn trim_end(&self) -> &[u8] { - self.trim_end_matches(|byte| byte.is_ascii_whitespace()) - } - - fn trim_start(&self) -> &[u8] { - self.trim_start_matches(|byte| byte.is_ascii_whitespace()) - } - - fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { - if let Some(last) = self.iter().rposition(|&byte| !f(byte)) { - &self[..=last] - } else { - &[] - } - } - - fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { - if let Some(first) = self.iter().position(|&byte| !f(byte)) { - &self[first..] - } else { - &[] - } - } - - /// ``` - /// use hg::utils::SliceExt; - /// assert_eq!( - /// b" to trim ".trim(), - /// b"to trim" - /// ); - /// assert_eq!( - /// b"to trim ".trim(), - /// b"to trim" - /// ); - /// assert_eq!( - /// b" to trim".trim(), - /// b"to trim" - /// ); - /// ``` - fn trim(&self) -> &[u8] { - self.trim_start().trim_end() - } - - fn drop_prefix(&self, needle: &Self) -> Option<&Self> { - if self.starts_with(needle) { - Some(&self[needle.len()..]) - } else { - None - } - } - - fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { - let pos = memchr::memchr(separator, self)?; - Some((&self[..pos], &self[pos + 1..])) - } - - fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> { - find_slice_in_slice(self, separator) - .map(|pos| (&self[..pos], &self[pos + separator.len()..])) - } -} - -pub trait Escaped { - /// Return bytes escaped for display to the user - fn escaped_bytes(&self) -> Vec<u8>; -} - -impl Escaped for u8 { - fn escaped_bytes(&self) -> Vec<u8> { - let mut acc = vec![]; - match self { - c @ b'\'' | c @ b'\\' => { - acc.push(b'\\'); - acc.push(*c); - } - b'\t' => { - acc.extend(br"\\t"); - } - b'\n' => { - acc.extend(br"\\n"); - } - b'\r' => { - acc.extend(br"\\r"); - } - c if (*c < b' ' || *c >= 127) => { - write!(acc, "\\x{:x}", self).unwrap(); - } - c => { - acc.push(*c); - } - } - acc - } -} - -impl<'a, T: Escaped> Escaped for &'a [T] { - fn escaped_bytes(&self) -> Vec<u8> { - self.iter().flat_map(Escaped::escaped_bytes).collect() - } -} - -impl<T: Escaped> Escaped for Vec<T> { - fn escaped_bytes(&self) -> Vec<u8> { - self.deref().escaped_bytes() - } -} - -impl<'a> Escaped for &'a HgPath { - fn escaped_bytes(&self) -> Vec<u8> { - self.as_bytes().escaped_bytes() - } -} - -#[cfg(unix)] -pub fn shell_quote(value: &[u8]) -> Vec<u8> { - if value.iter().all(|&byte| { - matches!( - byte, - b'a'..=b'z' - | b'A'..=b'Z' - | b'0'..=b'9' - | b'.' - | b'_' - | b'/' - | b'+' - | b'-' - ) - }) { - value.to_owned() - } else { - let mut quoted = Vec::with_capacity(value.len() + 2); - quoted.push(b'\''); - for &byte in value { - if byte == b'\'' { - quoted.push(b'\\'); - } - quoted.push(byte); - } - quoted.push(b'\''); - quoted - } -} - pub fn current_dir() -> Result<std::path::PathBuf, HgError> { std::env::current_dir().map_err(|error| HgError::IoError { error, @@ -240,59 +34,6 @@ }) } -/// Expand `$FOO` and `${FOO}` environment variables in the given byte string -pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> { - lazy_static::lazy_static! { - /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301 - /// The `x` makes whitespace ignored. - /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag. - static ref VAR_RE: regex::bytes::Regex = - regex::bytes::Regex::new(r"(?x-u) - \$ - (?: - (\w+) - | - \{ - ([^}]*) - \} - ) - ").unwrap(); - } - VAR_RE.replace_all(s, |captures: ®ex::bytes::Captures| { - let var_name = files::get_os_str_from_bytes( - captures - .get(1) - .or_else(|| captures.get(2)) - .expect("either side of `|` must participate in match") - .as_bytes(), - ); - std::env::var_os(var_name) - .map(files::get_bytes_from_os_str) - .unwrap_or_else(|| { - // Referencing an environment variable that does not exist. - // Leave the $FOO reference as-is. - captures[0].to_owned() - }) - }) -} - -#[test] -fn test_expand_vars() { - // Modifying process-global state in a test isn’t great, - // but hopefully this won’t collide with anything. - std::env::set_var("TEST_EXPAND_VAR", "1"); - assert_eq!( - expand_vars(b"before/$TEST_EXPAND_VAR/after"), - &b"before/1/after"[..] - ); - assert_eq!( - expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"), - &b"before111after"[..] - ); - let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after"; - assert_eq!(expand_vars(s), &s[..]); -} - pub(crate) enum MergeResult<V> { Left, Right, @@ -441,46 +182,6 @@ } } -/// Join items of the iterable with the given separator, similar to Python’s -/// `separator.join(iter)`. -/// -/// Formatting the return value consumes the iterator. -/// Formatting it again will produce an empty string. -pub fn join_display( - iter: impl IntoIterator<Item = impl fmt::Display>, - separator: impl fmt::Display, -) -> impl fmt::Display { - JoinDisplay { - iter: Cell::new(Some(iter.into_iter())), - separator, - } -} - -struct JoinDisplay<I, S> { - iter: Cell<Option<I>>, - separator: S, -} - -impl<I, T, S> fmt::Display for JoinDisplay<I, S> -where - I: Iterator<Item = T>, - T: fmt::Display, - S: fmt::Display, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(mut iter) = self.iter.take() { - if let Some(first) = iter.next() { - first.fmt(f)?; - } - for value in iter { - self.separator.fmt(f)?; - value.fmt(f)?; - } - } - Ok(()) - } -} - /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. /// /// The callback is only called for incoming `Ok` values. Errors are passed