comparison rust/hg-core/src/utils/strings.rs @ 52760:94e2547e6f3d

rust: move code from utils to utils::strings This moves string-related functions in hg::utils into the recently added hg::utils::strings module.
author Mitchell Kember <mkember@janestreet.com>
date Thu, 16 Jan 2025 13:15:02 -0500
parents 36d39726c0af
children 874c64e041b5
comparison
equal deleted inserted replaced
52759:36d39726c0af 52760:94e2547e6f3d
1 //! Contains string-related utilities.
2
3 use crate::utils::hg_path::HgPath;
4 use std::{cell::Cell, fmt, io::Write as _, ops::Deref as _};
5
6 /// Useful until rust/issues/56345 is stable
7 ///
8 /// # Examples
9 ///
10 /// ```
11 /// use hg::utils::strings::find_slice_in_slice;
12 ///
13 /// let haystack = b"This is the haystack".to_vec();
14 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
15 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
16 /// ```
17 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
18 where
19 for<'a> &'a [T]: PartialEq,
20 {
21 slice
22 .windows(needle.len())
23 .position(|window| window == needle)
24 }
25
26 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
27 ///
28 /// # Examples
29 ///
30 /// ```
31 /// use hg::utils::strings::replace_slice;
32 /// let mut line = b"I hate writing tests!".to_vec();
33 /// replace_slice(&mut line, b"hate", b"love");
34 /// assert_eq!(
35 /// line,
36 /// b"I love writing tests!".to_vec()
37 /// );
38 /// ```
39 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
40 where
41 T: Clone + PartialEq,
42 {
43 if buf.len() < from.len() || from.len() != to.len() {
44 return;
45 }
46 for i in 0..=buf.len() - from.len() {
47 if buf[i..].starts_with(from) {
48 buf[i..(i + from.len())].clone_from_slice(to);
49 }
50 }
51 }
52
53 pub trait SliceExt {
54 fn trim_end(&self) -> &Self;
55 fn trim_start(&self) -> &Self;
56 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
57 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self;
58 fn trim(&self) -> &Self;
59 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
60 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
61 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>;
62 }
63
64 impl SliceExt for [u8] {
65 fn trim_end(&self) -> &[u8] {
66 self.trim_end_matches(|byte| byte.is_ascii_whitespace())
67 }
68
69 fn trim_start(&self) -> &[u8] {
70 self.trim_start_matches(|byte| byte.is_ascii_whitespace())
71 }
72
73 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
74 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) {
75 &self[..=last]
76 } else {
77 &[]
78 }
79 }
80
81 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self {
82 if let Some(first) = self.iter().position(|&byte| !f(byte)) {
83 &self[first..]
84 } else {
85 &[]
86 }
87 }
88
89 /// ```
90 /// use hg::utils::strings::SliceExt;
91 /// assert_eq!(
92 /// b" to trim ".trim(),
93 /// b"to trim"
94 /// );
95 /// assert_eq!(
96 /// b"to trim ".trim(),
97 /// b"to trim"
98 /// );
99 /// assert_eq!(
100 /// b" to trim".trim(),
101 /// b"to trim"
102 /// );
103 /// ```
104 fn trim(&self) -> &[u8] {
105 self.trim_start().trim_end()
106 }
107
108 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
109 if self.starts_with(needle) {
110 Some(&self[needle.len()..])
111 } else {
112 None
113 }
114 }
115
116 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
117 let pos = memchr::memchr(separator, self)?;
118 Some((&self[..pos], &self[pos + 1..]))
119 }
120
121 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {
122 find_slice_in_slice(self, separator)
123 .map(|pos| (&self[..pos], &self[pos + separator.len()..]))
124 }
125 }
126
127 pub trait Escaped {
128 /// Return bytes escaped for display to the user
129 fn escaped_bytes(&self) -> Vec<u8>;
130 }
131
132 impl Escaped for u8 {
133 fn escaped_bytes(&self) -> Vec<u8> {
134 let mut acc = vec![];
135 match self {
136 c @ b'\'' | c @ b'\\' => {
137 acc.push(b'\\');
138 acc.push(*c);
139 }
140 b'\t' => {
141 acc.extend(br"\\t");
142 }
143 b'\n' => {
144 acc.extend(br"\\n");
145 }
146 b'\r' => {
147 acc.extend(br"\\r");
148 }
149 c if (*c < b' ' || *c >= 127) => {
150 write!(acc, "\\x{:x}", self).unwrap();
151 }
152 c => {
153 acc.push(*c);
154 }
155 }
156 acc
157 }
158 }
159
160 impl<'a, T: Escaped> Escaped for &'a [T] {
161 fn escaped_bytes(&self) -> Vec<u8> {
162 self.iter().flat_map(Escaped::escaped_bytes).collect()
163 }
164 }
165
166 impl<T: Escaped> Escaped for Vec<T> {
167 fn escaped_bytes(&self) -> Vec<u8> {
168 self.deref().escaped_bytes()
169 }
170 }
171
172 impl<'a> Escaped for &'a HgPath {
173 fn escaped_bytes(&self) -> Vec<u8> {
174 self.as_bytes().escaped_bytes()
175 }
176 }
177
178 #[cfg(unix)]
179 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
180 if value.iter().all(|&byte| {
181 matches!(
182 byte,
183 b'a'..=b'z'
184 | b'A'..=b'Z'
185 | b'0'..=b'9'
186 | b'.'
187 | b'_'
188 | b'/'
189 | b'+'
190 | b'-'
191 )
192 }) {
193 value.to_owned()
194 } else {
195 let mut quoted = Vec::with_capacity(value.len() + 2);
196 quoted.push(b'\'');
197 for &byte in value {
198 if byte == b'\'' {
199 quoted.push(b'\\');
200 }
201 quoted.push(byte);
202 }
203 quoted.push(b'\'');
204 quoted
205 }
206 }
207
208 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
209 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
210 lazy_static::lazy_static! {
211 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
212 /// The `x` makes whitespace ignored.
213 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
214 static ref VAR_RE: regex::bytes::Regex =
215 regex::bytes::Regex::new(r"(?x-u)
216 \$
217 (?:
218 (\w+)
219 |
220 \{
221 ([^}]*)
222 \}
223 )
224 ").unwrap();
225 }
226 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
227 let var_name = crate::utils::files::get_os_str_from_bytes(
228 captures
229 .get(1)
230 .or_else(|| captures.get(2))
231 .expect("either side of `|` must participate in match")
232 .as_bytes(),
233 );
234 std::env::var_os(var_name)
235 .map(crate::utils::files::get_bytes_from_os_str)
236 .unwrap_or_else(|| {
237 // Referencing an environment variable that does not exist.
238 // Leave the $FOO reference as-is.
239 captures[0].to_owned()
240 })
241 })
242 }
243
244 /// Join items of the iterable with the given separator, similar to Python’s
245 /// `separator.join(iter)`.
246 ///
247 /// Formatting the return value consumes the iterator.
248 /// Formatting it again will produce an empty string.
249 pub fn join_display(
250 iter: impl IntoIterator<Item = impl fmt::Display>,
251 separator: impl fmt::Display,
252 ) -> impl fmt::Display {
253 JoinDisplay {
254 iter: Cell::new(Some(iter.into_iter())),
255 separator,
256 }
257 }
258
259 struct JoinDisplay<I, S> {
260 iter: Cell<Option<I>>,
261 separator: S,
262 }
263
264 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
265 where
266 I: Iterator<Item = T>,
267 T: fmt::Display,
268 S: fmt::Display,
269 {
270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271 if let Some(mut iter) = self.iter.take() {
272 if let Some(first) = iter.next() {
273 first.fmt(f)?;
274 }
275 for value in iter {
276 self.separator.fmt(f)?;
277 value.fmt(f)?;
278 }
279 }
280 Ok(())
281 }
282 }
283
1 /// Returns a short representation of a user name or email address. 284 /// Returns a short representation of a user name or email address.
2 pub fn short_user(user: &[u8]) -> &[u8] { 285 pub fn short_user(user: &[u8]) -> &[u8] {
3 let mut str = user; 286 let mut str = user;
4 if let Some(i) = memchr::memchr(b'@', str) { 287 if let Some(i) = memchr::memchr(b'@', str) {
5 str = &str[..i]; 288 str = &str[..i];
17 } 300 }
18 301
19 #[cfg(test)] 302 #[cfg(test)]
20 mod tests { 303 mod tests {
21 use super::*; 304 use super::*;
305
306 #[test]
307 fn test_expand_vars() {
308 // Modifying process-global state in a test isn’t great,
309 // but hopefully this won’t collide with anything.
310 std::env::set_var("TEST_EXPAND_VAR", "1");
311 assert_eq!(
312 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
313 &b"before/1/after"[..]
314 );
315 assert_eq!(
316 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
317 &b"before111after"[..]
318 );
319 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
320 assert_eq!(expand_vars(s), &s[..]);
321 }
22 322
23 #[test] 323 #[test]
24 fn test_short_user() { 324 fn test_short_user() {
25 assert_eq!(short_user(b""), b""); 325 assert_eq!(short_user(b""), b"");
26 assert_eq!(short_user(b"Name"), b"Name"); 326 assert_eq!(short_user(b"Name"), b"Name");