Mercurial > public > mercurial-scm > hg
diff rust/hg-core/src/filepatterns.rs @ 42437:9609430d3625
rust-filepatterns: use bytes instead of String
In my initial patch, I introduced an unnecessary hard constraint on UTF-8
filenames and patterns which I forgot to remove. Although the performance
penalty for using String might be negligible, we don't want to break
compatibility with non-UTF-8 encodings for no reason.
Moreover, this change allows for a cleaner Rust core API.
This patch introduces a new utils module that is used with this fix.
Finally, PatternError was not put inside the Python module generated by
Rust, which would have raised a NameError.
Differential Revision: https://phab.mercurial-scm.org/D6485
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Thu, 06 Jun 2019 15:30:56 +0200 |
parents | e8f3740cc067 |
children | 48f1f864d928 |
line wrap: on
line diff
--- a/rust/hg-core/src/filepatterns.rs Sat Jun 01 01:24:49 2019 +0200 +++ b/rust/hg-core/src/filepatterns.rs Thu Jun 06 15:30:56 2019 +0200 @@ -1,9 +1,11 @@ use crate::{LineNumber, PatternError, PatternFileError}; -use regex::Regex; +use regex::bytes::Regex; use std::collections::HashMap; use std::fs::File; use std::io::Read; use std::vec::Vec; +use utils::files::get_path_from_bytes; +use utils::{replace_slice, SliceExt}; lazy_static! { static ref reescape: Vec<Vec<u8>> = { @@ -192,11 +194,11 @@ /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs /// that don't need to be transformed into a regex. pub fn build_single_regex( - kind: &str, + kind: &[u8], pat: &[u8], globsuffix: &[u8], ) -> Result<Vec<u8>, PatternError> { - let enum_kind = parse_pattern_syntax(kind.as_bytes())?; + let enum_kind = parse_pattern_syntax(kind)?; if enum_kind == PatternSyntax::RootGlob && pat.iter().all(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) { @@ -207,43 +209,42 @@ } lazy_static! { - static ref SYNTAXES: HashMap<&'static str, &'static str> = { + static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = { let mut m = HashMap::new(); - m.insert("re", "relre:"); - m.insert("regexp", "relre:"); - m.insert("glob", "relglob:"); - m.insert("rootglob", "rootglob:"); - m.insert("include", "include"); - m.insert("subinclude", "subinclude"); + m.insert(b"re".as_ref(), b"relre:".as_ref()); + m.insert(b"regexp".as_ref(), b"relre:".as_ref()); + m.insert(b"glob".as_ref(), b"relglob:".as_ref()); + m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); + m.insert(b"include".as_ref(), b"include".as_ref()); + m.insert(b"subinclude".as_ref(), b"subinclude".as_ref()); m }; } -pub type PatternTuple = (String, LineNumber, String); +pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>); type WarningTuple = (String, String); pub fn parse_pattern_file_contents( - lines: &str, - file_path: &str, + lines: &[u8], + file_path: &[u8], warn: bool, ) -> (Vec<PatternTuple>, Vec<WarningTuple>) { let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); let mut inputs: Vec<PatternTuple> = vec![]; let mut warnings: Vec<WarningTuple> = vec![]; - let mut current_syntax = "relre:"; + let mut current_syntax = b"relre:".as_ref(); - let mut line = String::new(); - for (line_number, line_str) in lines.split('\n').enumerate() { + for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { let line_number = line_number + 1; - line.replace_range(.., line_str); - if line.contains('#') { - if let Some(cap) = comment_regex.captures(line.clone().as_ref()) { - line = line[..cap.get(1).unwrap().end()].to_string() + if line.contains(&('#' as u8)) { + if let Some(cap) = comment_regex.captures(line) { + line = &line[..cap.get(1).unwrap().end()] } - line = line.replace(r"\#", "#"); + let mut line = line.to_owned(); + replace_slice(&mut line, br"\#", b"#"); } let mut line = line.trim_end(); @@ -252,25 +253,28 @@ continue; } - if line.starts_with("syntax:") { - let syntax = line["syntax:".len()..].trim(); + if line.starts_with(b"syntax:") { + let syntax = line[b"syntax:".len()..].trim(); if let Some(rel_syntax) = SYNTAXES.get(syntax) { current_syntax = rel_syntax; } else if warn { - warnings.push((file_path.to_string(), syntax.to_string())); + warnings.push(( + String::from_utf8_lossy(file_path).to_string(), + String::from_utf8_lossy(syntax).to_string(), + )); } continue; } - let mut line_syntax: &str = ¤t_syntax; + let mut line_syntax: &[u8] = ¤t_syntax; for (s, rels) in SYNTAXES.iter() { if line.starts_with(rels) { line_syntax = rels; line = &line[rels.len()..]; break; - } else if line.starts_with(&format!("{}:", s)) { + } else if line.starts_with(&[s, b":".as_ref()].concat()) { line_syntax = rels; line = &line[s.len() + 1..]; break; @@ -278,24 +282,24 @@ } inputs.push(( - format!("{}{}", line_syntax, line), + [line_syntax, line].concat(), line_number, - line.to_string(), + line.to_owned(), )); } (inputs, warnings) } pub fn read_pattern_file( - file_path: String, + file_path: &[u8], warn: bool, ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> { - let mut f = File::open(&file_path)?; - let mut contents = String::new(); + let mut f = File::open(get_path_from_bytes(file_path))?; + let mut contents = Vec::new(); - f.read_to_string(&mut contents)?; + f.read_to_end(&mut contents)?; - Ok(parse_pattern_file_contents(&contents, &file_path, warn)) + Ok(parse_pattern_file_contents(&contents, file_path, warn)) } #[cfg(test)] @@ -328,18 +332,23 @@ #[test] fn test_parse_pattern_file_contents() { - let lines = "syntax: glob\n*.elc"; + let lines = b"syntax: glob\n*.elc"; assert_eq!( - vec![("relglob:*.elc".to_string(), 2, "*.elc".to_string())], - parse_pattern_file_contents(lines, "file_path", false).0, + vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())], + parse_pattern_file_contents(lines, b"file_path", false).0, ); - let lines = "syntax: include\nsyntax: glob"; + let lines = b"syntax: include\nsyntax: glob"; assert_eq!( - parse_pattern_file_contents(lines, "file_path", false).0, + parse_pattern_file_contents(lines, b"file_path", false).0, vec![] ); + let lines = b"glob:**.o"; + assert_eq!( + parse_pattern_file_contents(lines, b"file_path", false).0, + vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())] + ); } }