Mercurial > public > mercurial-scm > hg
comparison rust/hg-core/src/utils/strings.rs @ 52760:94e2547e6f3d
rust: move code from utils to utils::strings
This moves string-related functions in hg::utils into the recently added
hg::utils::strings module.
author | Mitchell Kember <mkember@janestreet.com> |
---|---|
date | Thu, 16 Jan 2025 13:15:02 -0500 |
parents | 36d39726c0af |
children | 874c64e041b5 |
comparison
equal
deleted
inserted
replaced
52759:36d39726c0af | 52760:94e2547e6f3d |
---|---|
1 //! Contains string-related utilities. | |
2 | |
3 use crate::utils::hg_path::HgPath; | |
4 use std::{cell::Cell, fmt, io::Write as _, ops::Deref as _}; | |
5 | |
6 /// Useful until rust/issues/56345 is stable | |
7 /// | |
8 /// # Examples | |
9 /// | |
10 /// ``` | |
11 /// use hg::utils::strings::find_slice_in_slice; | |
12 /// | |
13 /// let haystack = b"This is the haystack".to_vec(); | |
14 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8)); | |
15 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None); | |
16 /// ``` | |
17 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize> | |
18 where | |
19 for<'a> &'a [T]: PartialEq, | |
20 { | |
21 slice | |
22 .windows(needle.len()) | |
23 .position(|window| window == needle) | |
24 } | |
25 | |
26 /// Replaces the `from` slice with the `to` slice inside the `buf` slice. | |
27 /// | |
28 /// # Examples | |
29 /// | |
30 /// ``` | |
31 /// use hg::utils::strings::replace_slice; | |
32 /// let mut line = b"I hate writing tests!".to_vec(); | |
33 /// replace_slice(&mut line, b"hate", b"love"); | |
34 /// assert_eq!( | |
35 /// line, | |
36 /// b"I love writing tests!".to_vec() | |
37 /// ); | |
38 /// ``` | |
39 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T]) | |
40 where | |
41 T: Clone + PartialEq, | |
42 { | |
43 if buf.len() < from.len() || from.len() != to.len() { | |
44 return; | |
45 } | |
46 for i in 0..=buf.len() - from.len() { | |
47 if buf[i..].starts_with(from) { | |
48 buf[i..(i + from.len())].clone_from_slice(to); | |
49 } | |
50 } | |
51 } | |
52 | |
53 pub trait SliceExt { | |
54 fn trim_end(&self) -> &Self; | |
55 fn trim_start(&self) -> &Self; | |
56 fn trim_end_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | |
57 fn trim_start_matches(&self, f: impl FnMut(u8) -> bool) -> &Self; | |
58 fn trim(&self) -> &Self; | |
59 fn drop_prefix(&self, needle: &Self) -> Option<&Self>; | |
60 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; | |
61 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])>; | |
62 } | |
63 | |
64 impl SliceExt for [u8] { | |
65 fn trim_end(&self) -> &[u8] { | |
66 self.trim_end_matches(|byte| byte.is_ascii_whitespace()) | |
67 } | |
68 | |
69 fn trim_start(&self) -> &[u8] { | |
70 self.trim_start_matches(|byte| byte.is_ascii_whitespace()) | |
71 } | |
72 | |
73 fn trim_end_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { | |
74 if let Some(last) = self.iter().rposition(|&byte| !f(byte)) { | |
75 &self[..=last] | |
76 } else { | |
77 &[] | |
78 } | |
79 } | |
80 | |
81 fn trim_start_matches(&self, mut f: impl FnMut(u8) -> bool) -> &Self { | |
82 if let Some(first) = self.iter().position(|&byte| !f(byte)) { | |
83 &self[first..] | |
84 } else { | |
85 &[] | |
86 } | |
87 } | |
88 | |
89 /// ``` | |
90 /// use hg::utils::strings::SliceExt; | |
91 /// assert_eq!( | |
92 /// b" to trim ".trim(), | |
93 /// b"to trim" | |
94 /// ); | |
95 /// assert_eq!( | |
96 /// b"to trim ".trim(), | |
97 /// b"to trim" | |
98 /// ); | |
99 /// assert_eq!( | |
100 /// b" to trim".trim(), | |
101 /// b"to trim" | |
102 /// ); | |
103 /// ``` | |
104 fn trim(&self) -> &[u8] { | |
105 self.trim_start().trim_end() | |
106 } | |
107 | |
108 fn drop_prefix(&self, needle: &Self) -> Option<&Self> { | |
109 if self.starts_with(needle) { | |
110 Some(&self[needle.len()..]) | |
111 } else { | |
112 None | |
113 } | |
114 } | |
115 | |
116 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { | |
117 let pos = memchr::memchr(separator, self)?; | |
118 Some((&self[..pos], &self[pos + 1..])) | |
119 } | |
120 | |
121 fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> { | |
122 find_slice_in_slice(self, separator) | |
123 .map(|pos| (&self[..pos], &self[pos + separator.len()..])) | |
124 } | |
125 } | |
126 | |
127 pub trait Escaped { | |
128 /// Return bytes escaped for display to the user | |
129 fn escaped_bytes(&self) -> Vec<u8>; | |
130 } | |
131 | |
132 impl Escaped for u8 { | |
133 fn escaped_bytes(&self) -> Vec<u8> { | |
134 let mut acc = vec![]; | |
135 match self { | |
136 c @ b'\'' | c @ b'\\' => { | |
137 acc.push(b'\\'); | |
138 acc.push(*c); | |
139 } | |
140 b'\t' => { | |
141 acc.extend(br"\\t"); | |
142 } | |
143 b'\n' => { | |
144 acc.extend(br"\\n"); | |
145 } | |
146 b'\r' => { | |
147 acc.extend(br"\\r"); | |
148 } | |
149 c if (*c < b' ' || *c >= 127) => { | |
150 write!(acc, "\\x{:x}", self).unwrap(); | |
151 } | |
152 c => { | |
153 acc.push(*c); | |
154 } | |
155 } | |
156 acc | |
157 } | |
158 } | |
159 | |
160 impl<'a, T: Escaped> Escaped for &'a [T] { | |
161 fn escaped_bytes(&self) -> Vec<u8> { | |
162 self.iter().flat_map(Escaped::escaped_bytes).collect() | |
163 } | |
164 } | |
165 | |
166 impl<T: Escaped> Escaped for Vec<T> { | |
167 fn escaped_bytes(&self) -> Vec<u8> { | |
168 self.deref().escaped_bytes() | |
169 } | |
170 } | |
171 | |
172 impl<'a> Escaped for &'a HgPath { | |
173 fn escaped_bytes(&self) -> Vec<u8> { | |
174 self.as_bytes().escaped_bytes() | |
175 } | |
176 } | |
177 | |
178 #[cfg(unix)] | |
179 pub fn shell_quote(value: &[u8]) -> Vec<u8> { | |
180 if value.iter().all(|&byte| { | |
181 matches!( | |
182 byte, | |
183 b'a'..=b'z' | |
184 | b'A'..=b'Z' | |
185 | b'0'..=b'9' | |
186 | b'.' | |
187 | b'_' | |
188 | b'/' | |
189 | b'+' | |
190 | b'-' | |
191 ) | |
192 }) { | |
193 value.to_owned() | |
194 } else { | |
195 let mut quoted = Vec::with_capacity(value.len() + 2); | |
196 quoted.push(b'\''); | |
197 for &byte in value { | |
198 if byte == b'\'' { | |
199 quoted.push(b'\\'); | |
200 } | |
201 quoted.push(byte); | |
202 } | |
203 quoted.push(b'\''); | |
204 quoted | |
205 } | |
206 } | |
207 | |
208 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string | |
209 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> { | |
210 lazy_static::lazy_static! { | |
211 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301 | |
212 /// The `x` makes whitespace ignored. | |
213 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag. | |
214 static ref VAR_RE: regex::bytes::Regex = | |
215 regex::bytes::Regex::new(r"(?x-u) | |
216 \$ | |
217 (?: | |
218 (\w+) | |
219 | | |
220 \{ | |
221 ([^}]*) | |
222 \} | |
223 ) | |
224 ").unwrap(); | |
225 } | |
226 VAR_RE.replace_all(s, |captures: ®ex::bytes::Captures| { | |
227 let var_name = crate::utils::files::get_os_str_from_bytes( | |
228 captures | |
229 .get(1) | |
230 .or_else(|| captures.get(2)) | |
231 .expect("either side of `|` must participate in match") | |
232 .as_bytes(), | |
233 ); | |
234 std::env::var_os(var_name) | |
235 .map(crate::utils::files::get_bytes_from_os_str) | |
236 .unwrap_or_else(|| { | |
237 // Referencing an environment variable that does not exist. | |
238 // Leave the $FOO reference as-is. | |
239 captures[0].to_owned() | |
240 }) | |
241 }) | |
242 } | |
243 | |
244 /// Join items of the iterable with the given separator, similar to Python’s | |
245 /// `separator.join(iter)`. | |
246 /// | |
247 /// Formatting the return value consumes the iterator. | |
248 /// Formatting it again will produce an empty string. | |
249 pub fn join_display( | |
250 iter: impl IntoIterator<Item = impl fmt::Display>, | |
251 separator: impl fmt::Display, | |
252 ) -> impl fmt::Display { | |
253 JoinDisplay { | |
254 iter: Cell::new(Some(iter.into_iter())), | |
255 separator, | |
256 } | |
257 } | |
258 | |
259 struct JoinDisplay<I, S> { | |
260 iter: Cell<Option<I>>, | |
261 separator: S, | |
262 } | |
263 | |
264 impl<I, T, S> fmt::Display for JoinDisplay<I, S> | |
265 where | |
266 I: Iterator<Item = T>, | |
267 T: fmt::Display, | |
268 S: fmt::Display, | |
269 { | |
270 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
271 if let Some(mut iter) = self.iter.take() { | |
272 if let Some(first) = iter.next() { | |
273 first.fmt(f)?; | |
274 } | |
275 for value in iter { | |
276 self.separator.fmt(f)?; | |
277 value.fmt(f)?; | |
278 } | |
279 } | |
280 Ok(()) | |
281 } | |
282 } | |
283 | |
1 /// Returns a short representation of a user name or email address. | 284 /// Returns a short representation of a user name or email address. |
2 pub fn short_user(user: &[u8]) -> &[u8] { | 285 pub fn short_user(user: &[u8]) -> &[u8] { |
3 let mut str = user; | 286 let mut str = user; |
4 if let Some(i) = memchr::memchr(b'@', str) { | 287 if let Some(i) = memchr::memchr(b'@', str) { |
5 str = &str[..i]; | 288 str = &str[..i]; |
17 } | 300 } |
18 | 301 |
19 #[cfg(test)] | 302 #[cfg(test)] |
20 mod tests { | 303 mod tests { |
21 use super::*; | 304 use super::*; |
305 | |
306 #[test] | |
307 fn test_expand_vars() { | |
308 // Modifying process-global state in a test isn’t great, | |
309 // but hopefully this won’t collide with anything. | |
310 std::env::set_var("TEST_EXPAND_VAR", "1"); | |
311 assert_eq!( | |
312 expand_vars(b"before/$TEST_EXPAND_VAR/after"), | |
313 &b"before/1/after"[..] | |
314 ); | |
315 assert_eq!( | |
316 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"), | |
317 &b"before111after"[..] | |
318 ); | |
319 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after"; | |
320 assert_eq!(expand_vars(s), &s[..]); | |
321 } | |
22 | 322 |
23 #[test] | 323 #[test] |
24 fn test_short_user() { | 324 fn test_short_user() { |
25 assert_eq!(short_user(b""), b""); | 325 assert_eq!(short_user(b""), b""); |
26 assert_eq!(short_user(b"Name"), b"Name"); | 326 assert_eq!(short_user(b"Name"), b"Name"); |