Mercurial > public > mercurial-scm > hg
comparison rust/hg-core/src/utils/hg_path.rs @ 42956:3fe40dd6355d
rust-hgpath: add HgPath and HgPathBuf structs to encapsulate handling of paths
This change is a direct consequence of this discussion on the mailing list:
https://www.mercurial-scm.org/pipermail/mercurial-devel/2019-August/133574.html
The implementations of `HgPath` and `HgPathBuf` are, for the most part, taken
directly from `OsStr` and `OsString` respectively from the standard library.
What this change does *not* yet do is implement the Windows MBCS to WTF8
conversion, but it lays the basis for a very flexible interface for paths.
Differential Revision: https://phab.mercurial-scm.org/D6773
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Sun, 01 Sep 2019 20:53:14 +0200 |
parents | |
children | 98d996a138de |
comparison
equal
deleted
inserted
replaced
42955:9668744c9122 | 42956:3fe40dd6355d |
---|---|
1 // hg_path.rs | |
2 // | |
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
4 // | |
5 // This software may be used and distributed according to the terms of the | |
6 // GNU General Public License version 2 or any later version. | |
7 | |
8 use std::borrow::Borrow; | |
9 use std::ffi::{OsStr, OsString}; | |
10 use std::ops::Deref; | |
11 use std::path::{Path, PathBuf}; | |
12 | |
13 #[derive(Debug, Eq, PartialEq)] | |
14 pub enum HgPathError { | |
15 /// Bytes from the invalid `HgPath` | |
16 LeadingSlash(Vec<u8>), | |
17 /// Bytes and index of the second slash | |
18 ConsecutiveSlashes(Vec<u8>, usize), | |
19 /// Bytes and index of the null byte | |
20 ContainsNullByte(Vec<u8>, usize), | |
21 /// Bytes | |
22 DecodeError(Vec<u8>), | |
23 } | |
24 | |
25 impl ToString for HgPathError { | |
26 fn to_string(&self) -> String { | |
27 match self { | |
28 HgPathError::LeadingSlash(bytes) => { | |
29 format!("Invalid HgPath '{:?}': has a leading slash.", bytes) | |
30 } | |
31 HgPathError::ConsecutiveSlashes(bytes, pos) => format!( | |
32 "Invalid HgPath '{:?}': consecutive slahes at pos {}.", | |
33 bytes, pos | |
34 ), | |
35 HgPathError::ContainsNullByte(bytes, pos) => format!( | |
36 "Invalid HgPath '{:?}': contains null byte at pos {}.", | |
37 bytes, pos | |
38 ), | |
39 HgPathError::DecodeError(bytes) => { | |
40 format!("Invalid HgPath '{:?}': could not be decoded.", bytes) | |
41 } | |
42 } | |
43 } | |
44 } | |
45 | |
46 impl From<HgPathError> for std::io::Error { | |
47 fn from(e: HgPathError) -> Self { | |
48 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()) | |
49 } | |
50 } | |
51 | |
52 /// This is a repository-relative path (or canonical path): | |
53 /// - no null characters | |
54 /// - `/` separates directories | |
55 /// - no consecutive slashes | |
56 /// - no leading slash, | |
57 /// - no `.` nor `..` of special meaning | |
58 /// - stored in repository and shared across platforms | |
59 /// | |
60 /// Note: there is no guarantee of any `HgPath` being well-formed at any point | |
61 /// in its lifetime for performance reasons and to ease ergonomics. It is | |
62 /// however checked using the `check_state` method before any file-system | |
63 /// operation. | |
64 /// | |
65 /// This allows us to be encoding-transparent as much as possible, until really | |
66 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr` | |
67 /// or `Path`) whenever more complex operations are needed: | |
68 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be | |
69 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source | |
70 /// character encoding will be determined on a per-repository basis. | |
71 // | |
72 // FIXME: (adapted from a comment in the stdlib) | |
73 // `HgPath::new()` current implementation relies on `Slice` being | |
74 // layout-compatible with `[u8]`. | |
75 // When attribute privacy is implemented, `Slice` should be annotated as | |
76 // `#[repr(transparent)]`. | |
77 // Anyway, `Slice` representation and layout are considered implementation | |
78 // detail, are not documented and must not be relied upon. | |
79 #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)] | |
80 pub struct HgPath { | |
81 inner: [u8], | |
82 } | |
83 | |
84 impl HgPath { | |
85 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self { | |
86 unsafe { &*(s.as_ref() as *const [u8] as *const Self) } | |
87 } | |
88 pub fn is_empty(&self) -> bool { | |
89 self.inner.is_empty() | |
90 } | |
91 pub fn len(&self) -> usize { | |
92 self.inner.len() | |
93 } | |
94 fn to_hg_path_buf(&self) -> HgPathBuf { | |
95 HgPathBuf { | |
96 inner: self.inner.to_owned(), | |
97 } | |
98 } | |
99 pub fn bytes(&self) -> std::slice::Iter<u8> { | |
100 self.inner.iter() | |
101 } | |
102 pub fn to_ascii_uppercase(&self) -> HgPathBuf { | |
103 HgPathBuf::from(self.inner.to_ascii_uppercase()) | |
104 } | |
105 pub fn to_ascii_lowercase(&self) -> HgPathBuf { | |
106 HgPathBuf::from(self.inner.to_ascii_lowercase()) | |
107 } | |
108 pub fn as_bytes(&self) -> &[u8] { | |
109 &self.inner | |
110 } | |
111 pub fn contains(&self, other: u8) -> bool { | |
112 self.inner.contains(&other) | |
113 } | |
114 pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf { | |
115 let mut inner = self.inner.to_owned(); | |
116 if inner.len() != 0 && inner.last() != Some(&b'/') { | |
117 inner.push(b'/'); | |
118 } | |
119 inner.extend(other.as_ref().bytes()); | |
120 HgPathBuf::from_bytes(&inner) | |
121 } | |
122 /// Checks for errors in the path, short-circuiting at the first one. | |
123 /// This generates fine-grained errors useful for debugging. | |
124 /// To simply check if the path is valid during tests, use `is_valid`. | |
125 pub fn check_state(&self) -> Result<(), HgPathError> { | |
126 if self.len() == 0 { | |
127 return Ok(()); | |
128 } | |
129 let bytes = self.as_bytes(); | |
130 let mut previous_byte = None; | |
131 | |
132 if bytes[0] == b'/' { | |
133 return Err(HgPathError::LeadingSlash(bytes.to_vec())); | |
134 } | |
135 for (index, byte) in bytes.iter().enumerate() { | |
136 match byte { | |
137 0 => { | |
138 return Err(HgPathError::ContainsNullByte( | |
139 bytes.to_vec(), | |
140 index, | |
141 )) | |
142 } | |
143 b'/' => { | |
144 if previous_byte.is_some() && previous_byte == Some(b'/') { | |
145 return Err(HgPathError::ConsecutiveSlashes( | |
146 bytes.to_vec(), | |
147 index, | |
148 )); | |
149 } | |
150 } | |
151 _ => (), | |
152 }; | |
153 previous_byte = Some(*byte); | |
154 } | |
155 Ok(()) | |
156 } | |
157 | |
158 #[cfg(test)] | |
159 /// Only usable during tests to force developers to handle invalid states | |
160 fn is_valid(&self) -> bool { | |
161 self.check_state().is_ok() | |
162 } | |
163 } | |
164 | |
165 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)] | |
166 pub struct HgPathBuf { | |
167 inner: Vec<u8>, | |
168 } | |
169 | |
170 impl HgPathBuf { | |
171 pub fn new() -> Self { | |
172 Self { inner: Vec::new() } | |
173 } | |
174 pub fn push(&mut self, byte: u8) { | |
175 self.inner.push(byte); | |
176 } | |
177 pub fn from_bytes(s: &[u8]) -> HgPathBuf { | |
178 HgPath::new(s).to_owned() | |
179 } | |
180 pub fn into_vec(self) -> Vec<u8> { | |
181 self.inner | |
182 } | |
183 pub fn as_ref(&self) -> &[u8] { | |
184 self.inner.as_ref() | |
185 } | |
186 } | |
187 | |
188 impl Deref for HgPathBuf { | |
189 type Target = HgPath; | |
190 | |
191 #[inline] | |
192 fn deref(&self) -> &HgPath { | |
193 &HgPath::new(&self.inner) | |
194 } | |
195 } | |
196 | |
197 impl From<Vec<u8>> for HgPathBuf { | |
198 fn from(vec: Vec<u8>) -> Self { | |
199 Self { inner: vec } | |
200 } | |
201 } | |
202 | |
203 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf { | |
204 fn from(s: &T) -> HgPathBuf { | |
205 s.as_ref().to_owned() | |
206 } | |
207 } | |
208 | |
209 impl Into<Vec<u8>> for HgPathBuf { | |
210 fn into(self) -> Vec<u8> { | |
211 self.inner | |
212 } | |
213 } | |
214 | |
215 impl Borrow<HgPath> for HgPathBuf { | |
216 fn borrow(&self) -> &HgPath { | |
217 &HgPath::new(self.as_bytes()) | |
218 } | |
219 } | |
220 | |
221 impl ToOwned for HgPath { | |
222 type Owned = HgPathBuf; | |
223 | |
224 fn to_owned(&self) -> HgPathBuf { | |
225 self.to_hg_path_buf() | |
226 } | |
227 } | |
228 | |
229 impl AsRef<HgPath> for HgPath { | |
230 fn as_ref(&self) -> &HgPath { | |
231 self | |
232 } | |
233 } | |
234 | |
235 impl AsRef<HgPath> for HgPathBuf { | |
236 fn as_ref(&self) -> &HgPath { | |
237 self | |
238 } | |
239 } | |
240 | |
241 impl Extend<u8> for HgPathBuf { | |
242 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) { | |
243 self.inner.extend(iter); | |
244 } | |
245 } | |
246 | |
247 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is | |
248 /// implemented, these conversion utils will have to work differently depending | |
249 /// on the repository encoding: either `UTF-8` or `MBCS`. | |
250 | |
251 pub fn hg_path_to_os_string<P: AsRef<HgPath>>( | |
252 hg_path: P, | |
253 ) -> Result<OsString, HgPathError> { | |
254 hg_path.as_ref().check_state()?; | |
255 let os_str; | |
256 #[cfg(unix)] | |
257 { | |
258 use std::os::unix::ffi::OsStrExt; | |
259 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes()); | |
260 } | |
261 #[cfg(windows)] | |
262 { | |
263 // TODO: convert from Windows MBCS (ANSI encoding) to WTF8. | |
264 unimplemented!(); | |
265 } | |
266 Ok(os_str.to_os_string()) | |
267 } | |
268 | |
269 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>( | |
270 hg_path: P, | |
271 ) -> Result<PathBuf, HgPathError> { | |
272 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf()) | |
273 } | |
274 | |
275 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>( | |
276 os_string: S, | |
277 ) -> Result<HgPathBuf, HgPathError> { | |
278 let buf; | |
279 #[cfg(unix)] | |
280 { | |
281 use std::os::unix::ffi::OsStrExt; | |
282 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes()); | |
283 } | |
284 #[cfg(windows)] | |
285 { | |
286 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). | |
287 unimplemented!(); | |
288 } | |
289 buf.check_state()?; | |
290 Ok(buf) | |
291 } | |
292 | |
293 pub fn path_to_hg_path_buf<P: AsRef<Path>>( | |
294 path: P, | |
295 ) -> Result<HgPathBuf, HgPathError> { | |
296 let buf; | |
297 let os_str = path.as_ref().as_os_str(); | |
298 #[cfg(unix)] | |
299 { | |
300 use std::os::unix::ffi::OsStrExt; | |
301 buf = HgPathBuf::from_bytes(&os_str.as_bytes()); | |
302 } | |
303 #[cfg(windows)] | |
304 { | |
305 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). | |
306 unimplemented!(); | |
307 } | |
308 buf.check_state()?; | |
309 Ok(buf) | |
310 } | |
311 | |
312 #[cfg(test)] | |
313 mod tests { | |
314 use super::*; | |
315 | |
316 #[test] | |
317 fn test_path_states() { | |
318 assert_eq!( | |
319 Err(HgPathError::LeadingSlash(b"/".to_vec())), | |
320 HgPath::new(b"/").check_state() | |
321 ); | |
322 assert_eq!( | |
323 Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)), | |
324 HgPath::new(b"a/b//c").check_state() | |
325 ); | |
326 assert_eq!( | |
327 Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)), | |
328 HgPath::new(b"a/b/\0c").check_state() | |
329 ); | |
330 // TODO test HgPathError::DecodeError for the Windows implementation. | |
331 assert_eq!(true, HgPath::new(b"").is_valid()); | |
332 assert_eq!(true, HgPath::new(b"a/b/c").is_valid()); | |
333 // Backslashes in paths are not significant, but allowed | |
334 assert_eq!(true, HgPath::new(br"a\b/c").is_valid()); | |
335 // Dots in paths are not significant, but allowed | |
336 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid()); | |
337 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid()); | |
338 } | |
339 | |
340 #[test] | |
341 fn test_iter() { | |
342 let path = HgPath::new(b"a"); | |
343 let mut iter = path.bytes(); | |
344 assert_eq!(Some(&b'a'), iter.next()); | |
345 assert_eq!(None, iter.next_back()); | |
346 assert_eq!(None, iter.next()); | |
347 | |
348 let path = HgPath::new(b"a"); | |
349 let mut iter = path.bytes(); | |
350 assert_eq!(Some(&b'a'), iter.next_back()); | |
351 assert_eq!(None, iter.next_back()); | |
352 assert_eq!(None, iter.next()); | |
353 | |
354 let path = HgPath::new(b"abc"); | |
355 let mut iter = path.bytes(); | |
356 assert_eq!(Some(&b'a'), iter.next()); | |
357 assert_eq!(Some(&b'c'), iter.next_back()); | |
358 assert_eq!(Some(&b'b'), iter.next_back()); | |
359 assert_eq!(None, iter.next_back()); | |
360 assert_eq!(None, iter.next()); | |
361 | |
362 let path = HgPath::new(b"abc"); | |
363 let mut iter = path.bytes(); | |
364 assert_eq!(Some(&b'a'), iter.next()); | |
365 assert_eq!(Some(&b'b'), iter.next()); | |
366 assert_eq!(Some(&b'c'), iter.next()); | |
367 assert_eq!(None, iter.next_back()); | |
368 assert_eq!(None, iter.next()); | |
369 | |
370 let path = HgPath::new(b"abc"); | |
371 let iter = path.bytes(); | |
372 let mut vec = Vec::new(); | |
373 vec.extend(iter); | |
374 assert_eq!(vec![b'a', b'b', b'c'], vec); | |
375 | |
376 let path = HgPath::new(b"abc"); | |
377 let mut iter = path.bytes(); | |
378 assert_eq!(Some(2), iter.rposition(|c| *c == b'c')); | |
379 | |
380 let path = HgPath::new(b"abc"); | |
381 let mut iter = path.bytes(); | |
382 assert_eq!(None, iter.rposition(|c| *c == b'd')); | |
383 } | |
384 | |
385 #[test] | |
386 fn test_join() { | |
387 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b")); | |
388 assert_eq!(b"a/b", path.as_bytes()); | |
389 | |
390 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c")); | |
391 assert_eq!(b"a/b/c", path.as_bytes()); | |
392 | |
393 // No leading slash if empty before join | |
394 let path = HgPathBuf::new().join(HgPath::new(b"b/c")); | |
395 assert_eq!(b"b/c", path.as_bytes()); | |
396 | |
397 // The leading slash is an invalid representation of an `HgPath`, but | |
398 // it can happen. This creates another invalid representation of | |
399 // consecutive bytes. | |
400 // TODO What should be done in this case? Should we silently remove | |
401 // the extra slash? Should we change the signature to a problematic | |
402 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and | |
403 // let the error happen upon filesystem interaction? | |
404 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b")); | |
405 assert_eq!(b"a//b", path.as_bytes()); | |
406 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b")); | |
407 assert_eq!(b"a//b", path.as_bytes()); | |
408 } | |
409 } |