Mercurial > public > mercurial-scm > hg-stable
view rust/hg-core/src/filepatterns.rs @ 46155:fce2f20a54ce
copies-rust: start recording overwrite as they happens
If a revision has information overwriting data from another revision, the
overwriting revision is a descendant of the overwritten one. So we could warm
the Oracle cache with such information to avoid potential future `is_ancestors`
call.
This provide us with a large speedup in the most expensive cases:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev
Full comparison below:
Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev
---------------------------------------------------------------------------------------------------------------------------------------------------------------
mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, ? 1.0000, 42 ?s/rev
mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000109 s, -0.000005 s, ? 0.9561, 18 ?s/rev
mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004934 s, 0.004953 s, +0.000019 s, ? 1.0039, 4 ?s/rev
pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000195 s, 0.000237 s, +0.000042 s, ? 1.2154, 26 ?s/rev
pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev
pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000113 s, 0.000113 s, +0.000000 s, ? 1.0000, 16 ?s/rev
pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.6f1f4a s, 0.6f1f4a s, +0.000000 s, ? 1.0000, 322 ?s/rev
pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010788 s, 0.010702 s, -0.000086 s, ? 0.9920, 1783 ?s/rev
pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050880 s, 0.050504 s, -0.000376 s, ? 0.9926, 10 ?s/rev
pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.081760 s, 0.080159 s, -0.001601 s, ? 0.9804, 11 ?s/rev
pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.061382 s, 0.060058 s, -0.001324 s, ? 0.9784, 11 ?s/rev
pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.585802 s, 0.536950 s, -0.048852 s, ? 0.9166, 12 ?s/rev
pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012803 s, 0.012868 s, +0.000065 s, ? 1.0051, 6434 ?s/rev
pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.113558 s, 0.112806 s, -0.000752 s, ? 0.9934, 9 ?s/rev
netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000085 s, 0.000084 s, -0.000001 s, ? 0.9882, 42 ?s/rev
netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000106 s, 0.000106 s, +0.000000 s, ? 1.0000, 53 ?s/rev
netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000174 s, -0.000001 s, ? 0.9943, 58 ?s/rev
netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000721 s, 0.000726 s, +0.000005 s, ? 1.0069, 80 ?s/rev
netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010127 s, 0.010105 s, -0.000022 s, ? 0.9978, 7 ?s/rev
netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015616 s, 0.015748 s, +0.000132 s, ? 1.0085, 10 ?s/rev
netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.061341 s, 0.060357 s, -0.000984 s, ? 0.9840, 10 ?s/rev
netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.542214 s, 0.499356 s, -0.042858 s, ? 0.9210, 7 ?s/rev
mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000092 s, +0.000003 s, ? 1.0337, 46 ?s/rev
mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000279 s, 0.000279 s, +0.000000 s, ? 1.0000, 34 ?s/rev
mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000184 s, 0.000186 s, +0.000002 s, ? 1.0109, 20 ?s/rev
mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000661 s, 0.000660 s, -0.000001 s, ? 0.9985, 94 ?s/rev
mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003377 s, 0.003372 s, -0.000005 s, ? 0.9985, 1124 ?s/rev
mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.070508 s, 0.070294 s, -0.000214 s, ? 0.9970, 11715 ?s/rev
mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006576 s, 0.006545 s, -0.000031 s, ? 0.9953, 4 ?s/rev
mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004809 s, 0.004998 s, +0.000189 s, ? 1.0393, 121 ?s/rev
mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.064872 s, 0.063348 s, -0.001524 s, ? 0.9765, 8 ?s/rev
mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026142 s, 0.026154 s, +0.000012 s, ? 1.0005, 42 ?s/rev
mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203956 s, 0.199063 s, -0.004893 s, ? 0.9760, 6 ?s/rev
mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.763853 s, 1.277320 s, -0.486533 s, ? 0.7242, 8 ?s/rev
mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.609761 s, 1.698794 s, -0.910967 s, ? 0.6509, 8 ?s/rev
mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000847 s, 0.000842 s, -0.000005 s, ? 0.9941, 421 ?s/rev
mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000867 s, 0.000865 s, -0.000002 s, ? 0.9977, 432 ?s/rev
mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000160 s, -0.000001 s, ? 0.9938, 40 ?s/rev
mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001131 s, 0.001122 s, -0.000009 s, ? 0.9920, 561 ?s/rev
mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033114 s, 0.032743 s, -0.000371 s, ? 0.9888, 32743 ?s/rev
mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.071092 s, 0.071529 s, +0.000437 s, ? 1.0061, 11921 ?s/rev
mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006554 s, 0.006593 s, +0.000039 s, ? 1.0060, 4 ?s/rev
mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005160 s, 0.005311 s, +0.000151 s, ? 1.0293, 129 ?s/rev
mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.065063 s, 0.063063 s, -0.002000 s, ? 0.9693, 9 ?s/rev
mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.297118 s, 0.312363 s, +0.015245 s, ? 1.0513, 7 ?s/rev
mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.284002 s, 0.283106 s, -0.000896 s, ? 0.9968, 7 ?s/rev
mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.086311 s, 0.083817 s, -0.002494 s, ? 0.9711, 9 ?s/rev
mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026738 s, 0.026516 s, -0.000222 s, ? 0.9917, 43 ?s/rev
mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 1.514270 s, 1.304865 s, -0.209405 s, ? 0.8617, 13 ?s/rev
mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.735875 s, 0.681088 s, -0.054787 s, ? 0.9255, 13 ?s/rev
mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.843329 s, 4.454320 s, -0.389009 s, ? 0.9197, 12 ?s/rev
mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.591752 s, 0.567913 s, -0.023839 s, ? 0.9597, 16 ?s/rev
mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.760563 s, 4.547043 s, -0.213520 s, ? 0.9551, 12 ?s/rev
mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.751942 s, 4.378579 s, -0.373363 s, ? 0.9214, 12 ?s/rev
mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.605014 s, 1.703622 s, -0.901392 s, ? 0.6540, 8 ?s/rev
mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev
mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev
Differential Revision: https://phab.mercurial-scm.org/D9497
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Sat, 21 Nov 2020 17:00:32 +0100 |
parents | 26114bd6ec60 |
children | 777c3d231913 |
line wrap: on
line source
// filepatterns.rs // // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. //! Handling of Mercurial-specific patterns. use crate::{ utils::{ files::{canonical_path, get_bytes_from_path, get_path_from_bytes}, hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError}, SliceExt, }, FastHashMap, PatternError, }; use lazy_static::lazy_static; use regex::bytes::{NoExpand, Regex}; use std::fs::File; use std::io::Read; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::vec::Vec; lazy_static! { static ref RE_ESCAPE: Vec<Vec<u8>> = { let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect(); let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c"; for byte in to_escape { v[*byte as usize].insert(0, b'\\'); } v }; } /// These are matched in order const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; /// Appended to the regexp of globs const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)"; #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum PatternSyntax { /// A regular expression Regexp, /// Glob that matches at the front of the path RootGlob, /// Glob that matches at any suffix of the path (still anchored at /// slashes) Glob, /// a path relative to repository root, which is matched recursively Path, /// A path relative to cwd RelPath, /// an unrooted glob (*.rs matches Rust files in all dirs) RelGlob, /// A regexp that needn't match the start of a name RelRegexp, /// A path relative to repository root, which is matched non-recursively /// (will not match subdirectories) RootFiles, /// A file of patterns to read and include Include, /// A file of patterns to match against files under the same directory SubInclude, } /// Transforms a glob pattern into a regex fn glob_to_re(pat: &[u8]) -> Vec<u8> { let mut input = pat; let mut res: Vec<u8> = vec![]; let mut group_depth = 0; while let Some((c, rest)) = input.split_first() { input = rest; match c { b'*' => { for (source, repl) in GLOB_REPLACEMENTS { if let Some(rest) = input.drop_prefix(source) { input = rest; res.extend(*repl); break; } } } b'?' => res.extend(b"."), b'[' => { match input.iter().skip(1).position(|b| *b == b']') { None => res.extend(b"\\["), Some(end) => { // Account for the one we skipped let end = end + 1; res.extend(b"["); for (i, b) in input[..end].iter().enumerate() { if *b == b'!' && i == 0 { res.extend(b"^") } else if *b == b'^' && i == 0 { res.extend(b"\\^") } else if *b == b'\\' { res.extend(b"\\\\") } else { res.push(*b) } } res.extend(b"]"); input = &input[end + 1..]; } } } b'{' => { group_depth += 1; res.extend(b"(?:") } b'}' if group_depth > 0 => { group_depth -= 1; res.extend(b")"); } b',' if group_depth > 0 => res.extend(b"|"), b'\\' => { let c = { if let Some((c, rest)) = input.split_first() { input = rest; c } else { c } }; res.extend(&RE_ESCAPE[*c as usize]) } _ => res.extend(&RE_ESCAPE[*c as usize]), } } res } fn escape_pattern(pattern: &[u8]) -> Vec<u8> { pattern .iter() .flat_map(|c| RE_ESCAPE[*c as usize].clone()) .collect() } pub fn parse_pattern_syntax( kind: &[u8], ) -> Result<PatternSyntax, PatternError> { match kind { b"re:" => Ok(PatternSyntax::Regexp), b"path:" => Ok(PatternSyntax::Path), b"relpath:" => Ok(PatternSyntax::RelPath), b"rootfilesin:" => Ok(PatternSyntax::RootFiles), b"relglob:" => Ok(PatternSyntax::RelGlob), b"relre:" => Ok(PatternSyntax::RelRegexp), b"glob:" => Ok(PatternSyntax::Glob), b"rootglob:" => Ok(PatternSyntax::RootGlob), b"include:" => Ok(PatternSyntax::Include), b"subinclude:" => Ok(PatternSyntax::SubInclude), _ => Err(PatternError::UnsupportedSyntax( String::from_utf8_lossy(kind).to_string(), )), } } /// Builds the regex that corresponds to the given pattern. /// If within a `syntax: regexp` context, returns the pattern, /// otherwise, returns the corresponding regex. fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> { let IgnorePattern { syntax, pattern, .. } = entry; if pattern.is_empty() { return vec![]; } match syntax { PatternSyntax::Regexp => pattern.to_owned(), PatternSyntax::RelRegexp => { // The `regex` crate accepts `**` while `re2` and Python's `re` // do not. Checking for `*` correctly triggers the same error all // engines. if pattern[0] == b'^' || pattern[0] == b'*' || pattern.starts_with(b".*") { return pattern.to_owned(); } [&b".*"[..], pattern].concat() } PatternSyntax::Path | PatternSyntax::RelPath => { if pattern == b"." { return vec![]; } [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat() } PatternSyntax::RootFiles => { let mut res = if pattern == b"." { vec![] } else { // Pattern is a directory name. [escape_pattern(pattern).as_slice(), b"/"].concat() }; // Anything after the pattern must be a non-directory. res.extend(b"[^/]+$"); res } PatternSyntax::RelGlob => { let glob_re = glob_to_re(pattern); if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { [b".*", rest, GLOB_SUFFIX].concat() } else { [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() } } PatternSyntax::Glob | PatternSyntax::RootGlob => { [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() } PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), } } const GLOB_SPECIAL_CHARACTERS: [u8; 7] = [b'*', b'?', b'[', b']', b'{', b'}', b'\\']; /// TODO support other platforms #[cfg(unix)] pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> { if bytes.is_empty() { return b".".to_vec(); } let sep = b'/'; let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count(); if initial_slashes > 2 { // POSIX allows one or two initial slashes, but treats three or more // as single slash. initial_slashes = 1; } let components = bytes .split(|b| *b == sep) .filter(|c| !(c.is_empty() || c == b".")) .fold(vec![], |mut acc, component| { if component != b".." || (initial_slashes == 0 && acc.is_empty()) || (!acc.is_empty() && acc[acc.len() - 1] == b"..") { acc.push(component) } else if !acc.is_empty() { acc.pop(); } acc }); let mut new_bytes = components.join(&sep); if initial_slashes > 0 { let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect(); buf.extend(new_bytes); new_bytes = buf; } if new_bytes.is_empty() { b".".to_vec() } else { new_bytes } } /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs /// that don't need to be transformed into a regex. pub fn build_single_regex( entry: &IgnorePattern, ) -> Result<Option<Vec<u8>>, PatternError> { let IgnorePattern { pattern, syntax, .. } = entry; let pattern = match syntax { PatternSyntax::RootGlob | PatternSyntax::Path | PatternSyntax::RelGlob | PatternSyntax::RootFiles => normalize_path_bytes(&pattern), PatternSyntax::Include | PatternSyntax::SubInclude => { return Err(PatternError::NonRegexPattern(entry.clone())) } _ => pattern.to_owned(), }; if *syntax == PatternSyntax::RootGlob && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) { Ok(None) } else { let mut entry = entry.clone(); entry.pattern = pattern; Ok(Some(_build_single_regex(&entry))) } } lazy_static! { static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { let mut m = FastHashMap::default(); m.insert(b"re".as_ref(), b"relre:".as_ref()); m.insert(b"regexp".as_ref(), b"relre:".as_ref()); m.insert(b"glob".as_ref(), b"relglob:".as_ref()); m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); m.insert(b"include".as_ref(), b"include:".as_ref()); m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref()); m }; } #[derive(Debug)] pub enum PatternFileWarning { /// (file path, syntax bytes) InvalidSyntax(PathBuf, Vec<u8>), /// File path NoSuchFile(PathBuf), } pub fn parse_pattern_file_contents<P: AsRef<Path>>( lines: &[u8], file_path: P, warn: bool, ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); #[allow(clippy::trivial_regex)] let comment_escape_regex = Regex::new(r"\\#").unwrap(); let mut inputs: Vec<IgnorePattern> = vec![]; let mut warnings: Vec<PatternFileWarning> = vec![]; let mut current_syntax = b"relre:".as_ref(); for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { let line_number = line_number + 1; let line_buf; if line.contains(&b'#') { if let Some(cap) = comment_regex.captures(line) { line = &line[..cap.get(1).unwrap().end()] } line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#")); line = &line_buf; } let mut line = line.trim_end(); if line.is_empty() { continue; } if let Some(syntax) = line.drop_prefix(b"syntax:") { let syntax = syntax.trim(); if let Some(rel_syntax) = SYNTAXES.get(syntax) { current_syntax = rel_syntax; } else if warn { warnings.push(PatternFileWarning::InvalidSyntax( file_path.as_ref().to_owned(), syntax.to_owned(), )); } continue; } let mut line_syntax: &[u8] = ¤t_syntax; for (s, rels) in SYNTAXES.iter() { if let Some(rest) = line.drop_prefix(rels) { line_syntax = rels; line = rest; break; } if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) { line_syntax = rels; line = rest; break; } } inputs.push(IgnorePattern::new( parse_pattern_syntax(&line_syntax).map_err(|e| match e { PatternError::UnsupportedSyntax(syntax) => { PatternError::UnsupportedSyntaxInFile( syntax, file_path.as_ref().to_string_lossy().into(), line_number, ) } _ => e, })?, &line, &file_path, )); } Ok((inputs, warnings)) } pub fn read_pattern_file<P: AsRef<Path>>( file_path: P, warn: bool, ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { let mut f = match File::open(file_path.as_ref()) { Ok(f) => Ok(f), Err(e) => match e.kind() { std::io::ErrorKind::NotFound => { return Ok(( vec![], vec![PatternFileWarning::NoSuchFile( file_path.as_ref().to_owned(), )], )) } _ => Err(e), }, }?; let mut contents = Vec::new(); f.read_to_end(&mut contents)?; Ok(parse_pattern_file_contents(&contents, file_path, warn)?) } /// Represents an entry in an "ignore" file. #[derive(Debug, Eq, PartialEq, Clone)] pub struct IgnorePattern { pub syntax: PatternSyntax, pub pattern: Vec<u8>, pub source: PathBuf, } impl IgnorePattern { pub fn new( syntax: PatternSyntax, pattern: &[u8], source: impl AsRef<Path>, ) -> Self { Self { syntax, pattern: pattern.to_owned(), source: source.as_ref().to_owned(), } } } pub type PatternResult<T> = Result<T, PatternError>; /// Wrapper for `read_pattern_file` that also recursively expands `include:` /// patterns. /// /// `subinclude:` is not treated as a special pattern here: unraveling them /// needs to occur in the "ignore" phase. pub fn get_patterns_from_file( pattern_file: impl AsRef<Path>, root_dir: impl AsRef<Path>, ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> { let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?; let patterns = patterns .into_iter() .flat_map(|entry| -> PatternResult<_> { let IgnorePattern { syntax, pattern, .. } = &entry; Ok(match syntax { PatternSyntax::Include => { let inner_include = root_dir.as_ref().join(get_path_from_bytes(&pattern)); let (inner_pats, inner_warnings) = get_patterns_from_file( &inner_include, root_dir.as_ref(), )?; warnings.extend(inner_warnings); inner_pats } _ => vec![entry], }) }) .flatten() .collect(); Ok((patterns, warnings)) } /// Holds all the information needed to handle a `subinclude:` pattern. pub struct SubInclude { /// Will be used for repository (hg) paths that start with this prefix. /// It is relative to the current working directory, so comparing against /// repository paths is painless. pub prefix: HgPathBuf, /// The file itself, containing the patterns pub path: PathBuf, /// Folder in the filesystem where this it applies pub root: PathBuf, } impl SubInclude { pub fn new( root_dir: impl AsRef<Path>, pattern: &[u8], source: impl AsRef<Path>, ) -> Result<SubInclude, HgPathError> { let normalized_source = normalize_path_bytes(&get_bytes_from_path(source)); let source_root = get_path_from_bytes(&normalized_source); let source_root = source_root.parent().unwrap_or_else(|| source_root.deref()); let path = source_root.join(get_path_from_bytes(pattern)); let new_root = path.parent().unwrap_or_else(|| path.deref()); let prefix = canonical_path(&root_dir, &root_dir, new_root)?; Ok(Self { prefix: path_to_hg_path_buf(prefix).and_then(|mut p| { if !p.is_empty() { p.push(b'/'); } Ok(p) })?, path: path.to_owned(), root: new_root.to_owned(), }) } } /// Separate and pre-process subincludes from other patterns for the "ignore" /// phase. pub fn filter_subincludes( ignore_patterns: &[IgnorePattern], root_dir: impl AsRef<Path>, ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> { let mut subincludes = vec![]; let mut others = vec![]; for ignore_pattern in ignore_patterns.iter() { let IgnorePattern { syntax, pattern, source, } = ignore_pattern; if *syntax == PatternSyntax::SubInclude { subincludes.push(SubInclude::new(&root_dir, pattern, &source)?); } else { others.push(ignore_pattern) } } Ok((subincludes, others)) } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] fn escape_pattern_test() { let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#; assert_eq!(escape_pattern(untouched), untouched.to_vec()); // All escape codes assert_eq!( escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#), br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"# .to_vec() ); } #[test] fn glob_test() { assert_eq!(glob_to_re(br#"?"#), br#"."#); assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#); assert_eq!(glob_to_re(br#"**"#), br#".*"#); assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#); assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#); assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#); assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#); assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#); } #[test] fn test_parse_pattern_file_contents() { let lines = b"syntax: glob\n*.elc"; assert_eq!( parse_pattern_file_contents(lines, Path::new("file_path"), false) .unwrap() .0, vec![IgnorePattern::new( PatternSyntax::RelGlob, b"*.elc", Path::new("file_path") )], ); let lines = b"syntax: include\nsyntax: glob"; assert_eq!( parse_pattern_file_contents(lines, Path::new("file_path"), false) .unwrap() .0, vec![] ); let lines = b"glob:**.o"; assert_eq!( parse_pattern_file_contents(lines, Path::new("file_path"), false) .unwrap() .0, vec![IgnorePattern::new( PatternSyntax::RelGlob, b"**.o", Path::new("file_path") )] ); } #[test] fn test_build_single_regex() { assert_eq!( build_single_regex(&IgnorePattern::new( PatternSyntax::RelGlob, b"rust/target/", Path::new("") )) .unwrap(), Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), ); assert_eq!( build_single_regex(&IgnorePattern::new( PatternSyntax::Regexp, br"rust/target/\d+", Path::new("") )) .unwrap(), Some(br"rust/target/\d+".to_vec()), ); } #[test] fn test_build_single_regex_shortcut() { assert_eq!( build_single_regex(&IgnorePattern::new( PatternSyntax::RootGlob, b"", Path::new("") )) .unwrap(), None, ); assert_eq!( build_single_regex(&IgnorePattern::new( PatternSyntax::RootGlob, b"whatever", Path::new("") )) .unwrap(), None, ); assert_eq!( build_single_regex(&IgnorePattern::new( PatternSyntax::RootGlob, b"*.o", Path::new("") )) .unwrap(), Some(br"[^/]*\.o(?:/|$)".to_vec()), ); } }