Mercurial > public > mercurial-scm > hg-stable
diff rust/hg-core/src/utils/files.rs @ 44312:c18dd48cea4a
rust-pathauditor: add Rust implementation of the `pathauditor`
It does not offer the same flexibility as the Python implementation, but
should check incoming paths just as well.
Differential Revision: https://phab.mercurial-scm.org/D7866
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Wed, 05 Feb 2020 17:05:37 +0100 |
parents | cf065c6a0197 |
children | 0e9ac3968b56 |
line wrap: on
line diff
--- a/rust/hg-core/src/utils/files.rs Wed Jan 22 03:17:06 2020 +0530 +++ b/rust/hg-core/src/utils/files.rs Wed Feb 05 17:05:37 2020 +0100 @@ -12,6 +12,8 @@ use crate::utils::hg_path::{HgPath, HgPathBuf}; use std::iter::FusedIterator; +use crate::utils::replace_slice; +use lazy_static::lazy_static; use std::fs::Metadata; use std::path::Path; @@ -85,6 +87,41 @@ path.to_ascii_lowercase() } +lazy_static! { + static ref IGNORED_CHARS: Vec<Vec<u8>> = { + [ + 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, + 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff, + ] + .iter() + .map(|code| { + std::char::from_u32(*code) + .unwrap() + .encode_utf8(&mut [0; 3]) + .bytes() + .collect() + }) + .collect() + }; +} + +fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> { + let mut buf = bytes.to_owned(); + let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef'); + if needs_escaping { + for forbidden in IGNORED_CHARS.iter() { + replace_slice(&mut buf, forbidden, &[]) + } + buf + } else { + buf + } +} + +pub fn lower_clean(bytes: &[u8]) -> Vec<u8> { + hfs_ignore_clean(&bytes.to_ascii_lowercase()) +} + #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)] pub struct HgMetadata { pub st_dev: u64,