rust/hg-core/src/utils.rs
changeset 52760 94e2547e6f3d
parent 52759 36d39726c0af
equal deleted inserted replaced
52759:36d39726c0af 52760:94e2547e6f3d
     6 // GNU General Public License version 2 or any later version.
     6 // GNU General Public License version 2 or any later version.
     7 
     7 
     8 //! Contains useful functions, traits, structs, etc. for use in core.
     8 //! Contains useful functions, traits, structs, etc. for use in core.
     9 
     9 
    10 use crate::errors::{HgError, IoErrorContext};
    10 use crate::errors::{HgError, IoErrorContext};
    11 use crate::utils::hg_path::HgPath;
       
    12 use im_rc::ordmap::DiffItem;
    11 use im_rc::ordmap::DiffItem;
    13 use im_rc::ordmap::OrdMap;
    12 use im_rc::ordmap::OrdMap;
    14 use itertools::EitherOrBoth;
    13 use itertools::EitherOrBoth;
    15 use itertools::Itertools;
    14 use itertools::Itertools;
    16 use std::cell::Cell;
       
    17 use std::cmp::Ordering;
    15 use std::cmp::Ordering;
    18 use std::fmt;
       
    19 use std::{io::Write, ops::Deref};
       
    20 
    16 
    21 pub mod debug;
    17 pub mod debug;
    22 pub mod files;
    18 pub mod files;
    23 pub mod hg_path;
    19 pub mod hg_path;
    24 pub mod path_auditor;
    20 pub mod path_auditor;
    25 pub mod strings;
    21 pub mod strings;
    26 
    22 
    27 /// Useful until rust/issues/56345 is stable
       
    28 ///
       
    29 /// # Examples
       
    30 ///
       
    31 /// ```
       
    32 /// use crate::hg::utils::find_slice_in_slice;
       
    33 ///
       
    34 /// let haystack = b"This is the haystack".to_vec();
       
    35 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
       
    36 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
       
    37 /// ```
       
    38 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
       
    39 where
       
    40     for<'a> &'a [T]: PartialEq,
       
    41 {
       
    42     slice
       
    43         .windows(needle.len())
       
    44         .position(|window| window == needle)
       
    45 }
       
    46 
       
    47 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
       
    48 ///
       
    49 /// # Examples
       
    50 ///
       
    51 /// ```
       
    52 /// use crate::hg::utils::replace_slice;
       
    53 /// let mut line = b"I hate writing tests!".to_vec();
       
    54 /// replace_slice(&mut line, b"hate", b"love");
       
    55 /// assert_eq!(
       
    56 ///     line,
       
    57 ///     b"I love writing tests!".to_vec()
       
    58 /// );
       
    59 /// ```
       
    60 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
       
    61 where
       
    62     T: Clone + PartialEq,
       
    63 {
       
    64     if buf.len() < from.len() || from.len() != to.len() {
       
    65         return;
       
    66     }
       
    67     for i in 0..=buf.len() - from.len() {
       
    68         if buf[i..].starts_with(from) {
       
    69             buf[i..(i + from.len())].clone_from_slice(to);
       
    70         }
       
    71     }
       
    72 }
       
    73 
       
    74 pub trait SliceExt {
       
    75     fn trim_end(&self) -> &Self;
       
    76     fn trim_start(&self) -> &Self;
       
    77     fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
       
    78     fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
       
    79     fn trim(&self) -> &Self;
       
    80     fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
       
    81     fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
       
    82     fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
       
    83 }
       
    84 
       
    85 impl SliceExt for [u8] {
       
    86     fn trim_end(&self) -> &[u8] {
       
    87         self.trim_end_matches(|byte| byte.is_ascii_whitespace())
       
    88     }
       
    89 
       
    90     fn trim_start(&self) -> &[u8] {
       
    91         self.trim_start_matches(|byte| byte.is_ascii_whitespace())
       
    92     }
       
    93 
       
    94     fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
       
    95         if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
       
    96             &self[..=last]
       
    97         } else {
       
    98             &[]
       
    99         }
       
   100     }
       
   101 
       
   102     fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
       
   103         if let Some(first) = self.iter().position(|&byte| !f(byte)) {
       
   104             &self[first..]
       
   105         } else {
       
   106             &[]
       
   107         }
       
   108     }
       
   109 
       
   110     /// ```
       
   111     /// use hg::utils::SliceExt;
       
   112     /// assert_eq!(
       
   113     ///     b"  to trim  ".trim(),
       
   114     ///     b"to trim"
       
   115     /// );
       
   116     /// assert_eq!(
       
   117     ///     b"to trim  ".trim(),
       
   118     ///     b"to trim"
       
   119     /// );
       
   120     /// assert_eq!(
       
   121     ///     b"  to trim".trim(),
       
   122     ///     b"to trim"
       
   123     /// );
       
   124     /// ```
       
   125     fn trim(&self) -> &[u8] {
       
   126         self.trim_start().trim_end()
       
   127     }
       
   128 
       
   129     fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
       
   130         if self.starts_with(needle) {
       
   131             Some(&self[needle.len()..])
       
   132         } else {
       
   133             None
       
   134         }
       
   135     }
       
   136 
       
   137     fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
       
   138         let pos = memchr::memchr(separator, self)?;
       
   139         Some((&self[..pos], &self[pos + 1..]))
       
   140     }
       
   141 
       
   142     fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
       
   143         find_slice_in_slice(self, separator)
       
   144             .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
       
   145     }
       
   146 }
       
   147 
       
   148 pub trait Escaped {
       
   149     /// Return bytes escaped for display to the user
       
   150     fn escaped_bytes(&self) -> Vec<u8>;
       
   151 }
       
   152 
       
   153 impl Escaped for u8 {
       
   154     fn escaped_bytes(&self) -> Vec<u8> {
       
   155         let mut acc = vec![];
       
   156         match self {
       
   157             c @ b'\'' | c @ b'\\' => {
       
   158                 acc.push(b'\\');
       
   159                 acc.push(*c);
       
   160             }
       
   161             b'\t' => {
       
   162                 acc.extend(br"\\t");
       
   163             }
       
   164             b'\n' => {
       
   165                 acc.extend(br"\\n");
       
   166             }
       
   167             b'\r' => {
       
   168                 acc.extend(br"\\r");
       
   169             }
       
   170             c if (*c < b' ' || *c >= 127) => {
       
   171                 write!(acc, "\\x{:x}", self).unwrap();
       
   172             }
       
   173             c => {
       
   174                 acc.push(*c);
       
   175             }
       
   176         }
       
   177         acc
       
   178     }
       
   179 }
       
   180 
       
   181 impl<'a, T: Escaped> Escaped for &'a [T] {
       
   182     fn escaped_bytes(&self) -> Vec<u8> {
       
   183         self.iter().flat_map(Escaped::escaped_bytes).collect()
       
   184     }
       
   185 }
       
   186 
       
   187 impl<T: Escaped> Escaped for Vec<T> {
       
   188     fn escaped_bytes(&self) -> Vec<u8> {
       
   189         self.deref().escaped_bytes()
       
   190     }
       
   191 }
       
   192 
       
   193 impl<'a> Escaped for &'a HgPath {
       
   194     fn escaped_bytes(&self) -> Vec<u8> {
       
   195         self.as_bytes().escaped_bytes()
       
   196     }
       
   197 }
       
   198 
       
   199 #[cfg(unix)]
       
   200 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
       
   201     if value.iter().all(|&byte| {
       
   202         matches!(
       
   203             byte,
       
   204             b'a'..=b'z'
       
   205             | b'A'..=b'Z'
       
   206             | b'0'..=b'9'
       
   207             | b'.'
       
   208             | b'_'
       
   209             | b'/'
       
   210             | b'+'
       
   211             | b'-'
       
   212         )
       
   213     }) {
       
   214         value.to_owned()
       
   215     } else {
       
   216         let mut quoted = Vec::with_capacity(value.len() + 2);
       
   217         quoted.push(b'\'');
       
   218         for &byte in value {
       
   219             if byte == b'\'' {
       
   220                 quoted.push(b'\\');
       
   221             }
       
   222             quoted.push(byte);
       
   223         }
       
   224         quoted.push(b'\'');
       
   225         quoted
       
   226     }
       
   227 }
       
   228 
       
   229 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
    23 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
   230     std::env::current_dir().map_err(|error| HgError::IoError {
    24     std::env::current_dir().map_err(|error| HgError::IoError {
   231         error,
    25         error,
   232         context: IoErrorContext::CurrentDir,
    26         context: IoErrorContext::CurrentDir,
   233     })
    27     })
   236 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
    30 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
   237     std::env::current_exe().map_err(|error| HgError::IoError {
    31     std::env::current_exe().map_err(|error| HgError::IoError {
   238         error,
    32         error,
   239         context: IoErrorContext::CurrentExe,
    33         context: IoErrorContext::CurrentExe,
   240     })
    34     })
   241 }
       
   242 
       
   243 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
       
   244 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
       
   245     lazy_static::lazy_static! {
       
   246         /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
       
   247         /// The `x` makes whitespace ignored.
       
   248         /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
       
   249         static ref VAR_RE: regex::bytes::Regex =
       
   250             regex::bytes::Regex::new(r"(?x-u)
       
   251                 \$
       
   252                 (?:
       
   253                     (\w+)
       
   254                     |
       
   255                     \{
       
   256                         ([^}]*)
       
   257                     \}
       
   258                 )
       
   259             ").unwrap();
       
   260     }
       
   261     VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
       
   262         let var_name = files::get_os_str_from_bytes(
       
   263             captures
       
   264                 .get(1)
       
   265                 .or_else(|| captures.get(2))
       
   266                 .expect("either side of `|` must participate in match")
       
   267                 .as_bytes(),
       
   268         );
       
   269         std::env::var_os(var_name)
       
   270             .map(files::get_bytes_from_os_str)
       
   271             .unwrap_or_else(|| {
       
   272                 // Referencing an environment variable that does not exist.
       
   273                 // Leave the $FOO reference as-is.
       
   274                 captures[0].to_owned()
       
   275             })
       
   276     })
       
   277 }
       
   278 
       
   279 #[test]
       
   280 fn test_expand_vars() {
       
   281     // Modifying process-global state in a test isn’t great,
       
   282     // but hopefully this won’t collide with anything.
       
   283     std::env::set_var("TEST_EXPAND_VAR", "1");
       
   284     assert_eq!(
       
   285         expand_vars(b"before/$TEST_EXPAND_VAR/after"),
       
   286         &b"before/1/after"[..]
       
   287     );
       
   288     assert_eq!(
       
   289         expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
       
   290         &b"before111after"[..]
       
   291     );
       
   292     let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
       
   293     assert_eq!(expand_vars(s), &s[..]);
       
   294 }
    35 }
   295 
    36 
   296 pub(crate) enum MergeResult<V> {
    37 pub(crate) enum MergeResult<V> {
   297     Left,
    38     Left,
   298     Right,
    39     Right,
   439         }
   180         }
   440         right
   181         right
   441     }
   182     }
   442 }
   183 }
   443 
   184 
   444 /// Join items of the iterable with the given separator, similar to Python’s
       
   445 /// `separator.join(iter)`.
       
   446 ///
       
   447 /// Formatting the return value consumes the iterator.
       
   448 /// Formatting it again will produce an empty string.
       
   449 pub fn join_display(
       
   450     iter: impl IntoIterator<Item = impl fmt::Display>,
       
   451     separator: impl fmt::Display,
       
   452 ) -> impl fmt::Display {
       
   453     JoinDisplay {
       
   454         iter: Cell::new(Some(iter.into_iter())),
       
   455         separator,
       
   456     }
       
   457 }
       
   458 
       
   459 struct JoinDisplay<I, S> {
       
   460     iter: Cell<Option<I>>,
       
   461     separator: S,
       
   462 }
       
   463 
       
   464 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
       
   465 where
       
   466     I: Iterator<Item = T>,
       
   467     T: fmt::Display,
       
   468     S: fmt::Display,
       
   469 {
       
   470     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
       
   471         if let Some(mut iter) = self.iter.take() {
       
   472             if let Some(first) = iter.next() {
       
   473                 first.fmt(f)?;
       
   474             }
       
   475             for value in iter {
       
   476                 self.separator.fmt(f)?;
       
   477                 value.fmt(f)?;
       
   478             }
       
   479         }
       
   480         Ok(())
       
   481     }
       
   482 }
       
   483 
       
   484 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
   185 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
   485 ///
   186 ///
   486 /// The callback is only called for incoming `Ok` values. Errors are passed
   187 /// The callback is only called for incoming `Ok` values. Errors are passed
   487 /// through as-is. In order to let it use the `?` operator the callback is
   188 /// through as-is. In order to let it use the `?` operator the callback is
   488 /// expected to return a `Result` of `Option`, instead of an `Option` of
   189 /// expected to return a `Result` of `Option`, instead of an `Option` of