view rust/hg-core/src/filepatterns.rs @ 46155:fce2f20a54ce

copies-rust: start recording overwrite as they happens If a revision has information overwriting data from another revision, the overwriting revision is a descendant of the overwritten one. So we could warm the Oracle cache with such information to avoid potential future `is_ancestors` call. This provide us with a large speedup in the most expensive cases: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Full comparison below: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, ? 1.0000, 42 ?s/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000109 s, -0.000005 s, ? 0.9561, 18 ?s/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004934 s, 0.004953 s, +0.000019 s, ? 1.0039, 4 ?s/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000195 s, 0.000237 s, +0.000042 s, ? 1.2154, 26 ?s/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000113 s, 0.000113 s, +0.000000 s, ? 1.0000, 16 ?s/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.6f1f4a s, 0.6f1f4a s, +0.000000 s, ? 1.0000, 322 ?s/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010788 s, 0.010702 s, -0.000086 s, ? 0.9920, 1783 ?s/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050880 s, 0.050504 s, -0.000376 s, ? 0.9926, 10 ?s/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.081760 s, 0.080159 s, -0.001601 s, ? 0.9804, 11 ?s/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.061382 s, 0.060058 s, -0.001324 s, ? 0.9784, 11 ?s/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.585802 s, 0.536950 s, -0.048852 s, ? 0.9166, 12 ?s/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012803 s, 0.012868 s, +0.000065 s, ? 1.0051, 6434 ?s/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.113558 s, 0.112806 s, -0.000752 s, ? 0.9934, 9 ?s/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000085 s, 0.000084 s, -0.000001 s, ? 0.9882, 42 ?s/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000106 s, 0.000106 s, +0.000000 s, ? 1.0000, 53 ?s/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000174 s, -0.000001 s, ? 0.9943, 58 ?s/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000721 s, 0.000726 s, +0.000005 s, ? 1.0069, 80 ?s/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010127 s, 0.010105 s, -0.000022 s, ? 0.9978, 7 ?s/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015616 s, 0.015748 s, +0.000132 s, ? 1.0085, 10 ?s/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.061341 s, 0.060357 s, -0.000984 s, ? 0.9840, 10 ?s/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.542214 s, 0.499356 s, -0.042858 s, ? 0.9210, 7 ?s/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000092 s, +0.000003 s, ? 1.0337, 46 ?s/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000279 s, 0.000279 s, +0.000000 s, ? 1.0000, 34 ?s/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000184 s, 0.000186 s, +0.000002 s, ? 1.0109, 20 ?s/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000661 s, 0.000660 s, -0.000001 s, ? 0.9985, 94 ?s/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003377 s, 0.003372 s, -0.000005 s, ? 0.9985, 1124 ?s/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.070508 s, 0.070294 s, -0.000214 s, ? 0.9970, 11715 ?s/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006576 s, 0.006545 s, -0.000031 s, ? 0.9953, 4 ?s/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004809 s, 0.004998 s, +0.000189 s, ? 1.0393, 121 ?s/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.064872 s, 0.063348 s, -0.001524 s, ? 0.9765, 8 ?s/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026142 s, 0.026154 s, +0.000012 s, ? 1.0005, 42 ?s/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203956 s, 0.199063 s, -0.004893 s, ? 0.9760, 6 ?s/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.763853 s, 1.277320 s, -0.486533 s, ? 0.7242, 8 ?s/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.609761 s, 1.698794 s, -0.910967 s, ? 0.6509, 8 ?s/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000847 s, 0.000842 s, -0.000005 s, ? 0.9941, 421 ?s/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000867 s, 0.000865 s, -0.000002 s, ? 0.9977, 432 ?s/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000160 s, -0.000001 s, ? 0.9938, 40 ?s/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001131 s, 0.001122 s, -0.000009 s, ? 0.9920, 561 ?s/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033114 s, 0.032743 s, -0.000371 s, ? 0.9888, 32743 ?s/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.071092 s, 0.071529 s, +0.000437 s, ? 1.0061, 11921 ?s/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006554 s, 0.006593 s, +0.000039 s, ? 1.0060, 4 ?s/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005160 s, 0.005311 s, +0.000151 s, ? 1.0293, 129 ?s/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.065063 s, 0.063063 s, -0.002000 s, ? 0.9693, 9 ?s/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.297118 s, 0.312363 s, +0.015245 s, ? 1.0513, 7 ?s/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.284002 s, 0.283106 s, -0.000896 s, ? 0.9968, 7 ?s/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.086311 s, 0.083817 s, -0.002494 s, ? 0.9711, 9 ?s/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026738 s, 0.026516 s, -0.000222 s, ? 0.9917, 43 ?s/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 1.514270 s, 1.304865 s, -0.209405 s, ? 0.8617, 13 ?s/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.735875 s, 0.681088 s, -0.054787 s, ? 0.9255, 13 ?s/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.843329 s, 4.454320 s, -0.389009 s, ? 0.9197, 12 ?s/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.591752 s, 0.567913 s, -0.023839 s, ? 0.9597, 16 ?s/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.760563 s, 4.547043 s, -0.213520 s, ? 0.9551, 12 ?s/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.751942 s, 4.378579 s, -0.373363 s, ? 0.9214, 12 ?s/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.605014 s, 1.703622 s, -0.901392 s, ? 0.6540, 8 ?s/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Differential Revision: https://phab.mercurial-scm.org/D9497
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 21 Nov 2020 17:00:32 +0100
parents 26114bd6ec60
children 777c3d231913
line wrap: on
line source

// filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

//! Handling of Mercurial-specific patterns.

use crate::{
    utils::{
        files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
        hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
        SliceExt,
    },
    FastHashMap, PatternError,
};
use lazy_static::lazy_static;
use regex::bytes::{NoExpand, Regex};
use std::fs::File;
use std::io::Read;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::vec::Vec;

lazy_static! {
    static ref RE_ESCAPE: Vec<Vec<u8>> = {
        let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
        let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
        for byte in to_escape {
            v[*byte as usize].insert(0, b'\\');
        }
        v
    };
}

/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
    &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];

/// Appended to the regexp of globs
const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum PatternSyntax {
    /// A regular expression
    Regexp,
    /// Glob that matches at the front of the path
    RootGlob,
    /// Glob that matches at any suffix of the path (still anchored at
    /// slashes)
    Glob,
    /// a path relative to repository root, which is matched recursively
    Path,
    /// A path relative to cwd
    RelPath,
    /// an unrooted glob (*.rs matches Rust files in all dirs)
    RelGlob,
    /// A regexp that needn't match the start of a name
    RelRegexp,
    /// A path relative to repository root, which is matched non-recursively
    /// (will not match subdirectories)
    RootFiles,
    /// A file of patterns to read and include
    Include,
    /// A file of patterns to match against files under the same directory
    SubInclude,
}

/// Transforms a glob pattern into a regex
fn glob_to_re(pat: &[u8]) -> Vec<u8> {
    let mut input = pat;
    let mut res: Vec<u8> = vec![];
    let mut group_depth = 0;

    while let Some((c, rest)) = input.split_first() {
        input = rest;

        match c {
            b'*' => {
                for (source, repl) in GLOB_REPLACEMENTS {
                    if let Some(rest) = input.drop_prefix(source) {
                        input = rest;
                        res.extend(*repl);
                        break;
                    }
                }
            }
            b'?' => res.extend(b"."),
            b'[' => {
                match input.iter().skip(1).position(|b| *b == b']') {
                    None => res.extend(b"\\["),
                    Some(end) => {
                        // Account for the one we skipped
                        let end = end + 1;

                        res.extend(b"[");

                        for (i, b) in input[..end].iter().enumerate() {
                            if *b == b'!' && i == 0 {
                                res.extend(b"^")
                            } else if *b == b'^' && i == 0 {
                                res.extend(b"\\^")
                            } else if *b == b'\\' {
                                res.extend(b"\\\\")
                            } else {
                                res.push(*b)
                            }
                        }
                        res.extend(b"]");
                        input = &input[end + 1..];
                    }
                }
            }
            b'{' => {
                group_depth += 1;
                res.extend(b"(?:")
            }
            b'}' if group_depth > 0 => {
                group_depth -= 1;
                res.extend(b")");
            }
            b',' if group_depth > 0 => res.extend(b"|"),
            b'\\' => {
                let c = {
                    if let Some((c, rest)) = input.split_first() {
                        input = rest;
                        c
                    } else {
                        c
                    }
                };
                res.extend(&RE_ESCAPE[*c as usize])
            }
            _ => res.extend(&RE_ESCAPE[*c as usize]),
        }
    }
    res
}

fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
    pattern
        .iter()
        .flat_map(|c| RE_ESCAPE[*c as usize].clone())
        .collect()
}

pub fn parse_pattern_syntax(
    kind: &[u8],
) -> Result<PatternSyntax, PatternError> {
    match kind {
        b"re:" => Ok(PatternSyntax::Regexp),
        b"path:" => Ok(PatternSyntax::Path),
        b"relpath:" => Ok(PatternSyntax::RelPath),
        b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
        b"relglob:" => Ok(PatternSyntax::RelGlob),
        b"relre:" => Ok(PatternSyntax::RelRegexp),
        b"glob:" => Ok(PatternSyntax::Glob),
        b"rootglob:" => Ok(PatternSyntax::RootGlob),
        b"include:" => Ok(PatternSyntax::Include),
        b"subinclude:" => Ok(PatternSyntax::SubInclude),
        _ => Err(PatternError::UnsupportedSyntax(
            String::from_utf8_lossy(kind).to_string(),
        )),
    }
}

/// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
    let IgnorePattern {
        syntax, pattern, ..
    } = entry;
    if pattern.is_empty() {
        return vec![];
    }
    match syntax {
        PatternSyntax::Regexp => pattern.to_owned(),
        PatternSyntax::RelRegexp => {
            // The `regex` crate accepts `**` while `re2` and Python's `re`
            // do not. Checking for `*` correctly triggers the same error all
            // engines.
            if pattern[0] == b'^'
                || pattern[0] == b'*'
                || pattern.starts_with(b".*")
            {
                return pattern.to_owned();
            }
            [&b".*"[..], pattern].concat()
        }
        PatternSyntax::Path | PatternSyntax::RelPath => {
            if pattern == b"." {
                return vec![];
            }
            [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
        }
        PatternSyntax::RootFiles => {
            let mut res = if pattern == b"." {
                vec![]
            } else {
                // Pattern is a directory name.
                [escape_pattern(pattern).as_slice(), b"/"].concat()
            };

            // Anything after the pattern must be a non-directory.
            res.extend(b"[^/]+$");
            res
        }
        PatternSyntax::RelGlob => {
            let glob_re = glob_to_re(pattern);
            if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
                [b".*", rest, GLOB_SUFFIX].concat()
            } else {
                [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
            }
        }
        PatternSyntax::Glob | PatternSyntax::RootGlob => {
            [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
        }
        PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
    }
}

const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
    [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];

/// TODO support other platforms
#[cfg(unix)]
pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
    if bytes.is_empty() {
        return b".".to_vec();
    }
    let sep = b'/';

    let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
    if initial_slashes > 2 {
        // POSIX allows one or two initial slashes, but treats three or more
        // as single slash.
        initial_slashes = 1;
    }
    let components = bytes
        .split(|b| *b == sep)
        .filter(|c| !(c.is_empty() || c == b"."))
        .fold(vec![], |mut acc, component| {
            if component != b".."
                || (initial_slashes == 0 && acc.is_empty())
                || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
            {
                acc.push(component)
            } else if !acc.is_empty() {
                acc.pop();
            }
            acc
        });
    let mut new_bytes = components.join(&sep);

    if initial_slashes > 0 {
        let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
        buf.extend(new_bytes);
        new_bytes = buf;
    }
    if new_bytes.is_empty() {
        b".".to_vec()
    } else {
        new_bytes
    }
}

/// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
    entry: &IgnorePattern,
) -> Result<Option<Vec<u8>>, PatternError> {
    let IgnorePattern {
        pattern, syntax, ..
    } = entry;
    let pattern = match syntax {
        PatternSyntax::RootGlob
        | PatternSyntax::Path
        | PatternSyntax::RelGlob
        | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
        PatternSyntax::Include | PatternSyntax::SubInclude => {
            return Err(PatternError::NonRegexPattern(entry.clone()))
        }
        _ => pattern.to_owned(),
    };
    if *syntax == PatternSyntax::RootGlob
        && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
    {
        Ok(None)
    } else {
        let mut entry = entry.clone();
        entry.pattern = pattern;
        Ok(Some(_build_single_regex(&entry)))
    }
}

lazy_static! {
    static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
        let mut m = FastHashMap::default();

        m.insert(b"re".as_ref(), b"relre:".as_ref());
        m.insert(b"regexp".as_ref(), b"relre:".as_ref());
        m.insert(b"glob".as_ref(), b"relglob:".as_ref());
        m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
        m.insert(b"include".as_ref(), b"include:".as_ref());
        m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
        m
    };
}

#[derive(Debug)]
pub enum PatternFileWarning {
    /// (file path, syntax bytes)
    InvalidSyntax(PathBuf, Vec<u8>),
    /// File path
    NoSuchFile(PathBuf),
}

pub fn parse_pattern_file_contents<P: AsRef<Path>>(
    lines: &[u8],
    file_path: P,
    warn: bool,
) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
    let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();

    #[allow(clippy::trivial_regex)]
    let comment_escape_regex = Regex::new(r"\\#").unwrap();
    let mut inputs: Vec<IgnorePattern> = vec![];
    let mut warnings: Vec<PatternFileWarning> = vec![];

    let mut current_syntax = b"relre:".as_ref();

    for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
        let line_number = line_number + 1;

        let line_buf;
        if line.contains(&b'#') {
            if let Some(cap) = comment_regex.captures(line) {
                line = &line[..cap.get(1).unwrap().end()]
            }
            line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
            line = &line_buf;
        }

        let mut line = line.trim_end();

        if line.is_empty() {
            continue;
        }

        if let Some(syntax) = line.drop_prefix(b"syntax:") {
            let syntax = syntax.trim();

            if let Some(rel_syntax) = SYNTAXES.get(syntax) {
                current_syntax = rel_syntax;
            } else if warn {
                warnings.push(PatternFileWarning::InvalidSyntax(
                    file_path.as_ref().to_owned(),
                    syntax.to_owned(),
                ));
            }
            continue;
        }

        let mut line_syntax: &[u8] = &current_syntax;

        for (s, rels) in SYNTAXES.iter() {
            if let Some(rest) = line.drop_prefix(rels) {
                line_syntax = rels;
                line = rest;
                break;
            }
            if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
                line_syntax = rels;
                line = rest;
                break;
            }
        }

        inputs.push(IgnorePattern::new(
            parse_pattern_syntax(&line_syntax).map_err(|e| match e {
                PatternError::UnsupportedSyntax(syntax) => {
                    PatternError::UnsupportedSyntaxInFile(
                        syntax,
                        file_path.as_ref().to_string_lossy().into(),
                        line_number,
                    )
                }
                _ => e,
            })?,
            &line,
            &file_path,
        ));
    }
    Ok((inputs, warnings))
}

pub fn read_pattern_file<P: AsRef<Path>>(
    file_path: P,
    warn: bool,
) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
    let mut f = match File::open(file_path.as_ref()) {
        Ok(f) => Ok(f),
        Err(e) => match e.kind() {
            std::io::ErrorKind::NotFound => {
                return Ok((
                    vec![],
                    vec![PatternFileWarning::NoSuchFile(
                        file_path.as_ref().to_owned(),
                    )],
                ))
            }
            _ => Err(e),
        },
    }?;
    let mut contents = Vec::new();

    f.read_to_end(&mut contents)?;

    Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
}

/// Represents an entry in an "ignore" file.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct IgnorePattern {
    pub syntax: PatternSyntax,
    pub pattern: Vec<u8>,
    pub source: PathBuf,
}

impl IgnorePattern {
    pub fn new(
        syntax: PatternSyntax,
        pattern: &[u8],
        source: impl AsRef<Path>,
    ) -> Self {
        Self {
            syntax,
            pattern: pattern.to_owned(),
            source: source.as_ref().to_owned(),
        }
    }
}

pub type PatternResult<T> = Result<T, PatternError>;

/// Wrapper for `read_pattern_file` that also recursively expands `include:`
/// patterns.
///
/// `subinclude:` is not treated as a special pattern here: unraveling them
/// needs to occur in the "ignore" phase.
pub fn get_patterns_from_file(
    pattern_file: impl AsRef<Path>,
    root_dir: impl AsRef<Path>,
) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
    let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
    let patterns = patterns
        .into_iter()
        .flat_map(|entry| -> PatternResult<_> {
            let IgnorePattern {
                syntax, pattern, ..
            } = &entry;
            Ok(match syntax {
                PatternSyntax::Include => {
                    let inner_include =
                        root_dir.as_ref().join(get_path_from_bytes(&pattern));
                    let (inner_pats, inner_warnings) = get_patterns_from_file(
                        &inner_include,
                        root_dir.as_ref(),
                    )?;
                    warnings.extend(inner_warnings);
                    inner_pats
                }
                _ => vec![entry],
            })
        })
        .flatten()
        .collect();

    Ok((patterns, warnings))
}

/// Holds all the information needed to handle a `subinclude:` pattern.
pub struct SubInclude {
    /// Will be used for repository (hg) paths that start with this prefix.
    /// It is relative to the current working directory, so comparing against
    /// repository paths is painless.
    pub prefix: HgPathBuf,
    /// The file itself, containing the patterns
    pub path: PathBuf,
    /// Folder in the filesystem where this it applies
    pub root: PathBuf,
}

impl SubInclude {
    pub fn new(
        root_dir: impl AsRef<Path>,
        pattern: &[u8],
        source: impl AsRef<Path>,
    ) -> Result<SubInclude, HgPathError> {
        let normalized_source =
            normalize_path_bytes(&get_bytes_from_path(source));

        let source_root = get_path_from_bytes(&normalized_source);
        let source_root =
            source_root.parent().unwrap_or_else(|| source_root.deref());

        let path = source_root.join(get_path_from_bytes(pattern));
        let new_root = path.parent().unwrap_or_else(|| path.deref());

        let prefix = canonical_path(&root_dir, &root_dir, new_root)?;

        Ok(Self {
            prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
                if !p.is_empty() {
                    p.push(b'/');
                }
                Ok(p)
            })?,
            path: path.to_owned(),
            root: new_root.to_owned(),
        })
    }
}

/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
pub fn filter_subincludes(
    ignore_patterns: &[IgnorePattern],
    root_dir: impl AsRef<Path>,
) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
    let mut subincludes = vec![];
    let mut others = vec![];

    for ignore_pattern in ignore_patterns.iter() {
        let IgnorePattern {
            syntax,
            pattern,
            source,
        } = ignore_pattern;
        if *syntax == PatternSyntax::SubInclude {
            subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
        } else {
            others.push(ignore_pattern)
        }
    }
    Ok((subincludes, others))
}

#[cfg(test)]
mod tests {
    use super::*;
    use pretty_assertions::assert_eq;

    #[test]
    fn escape_pattern_test() {
        let untouched =
            br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
        assert_eq!(escape_pattern(untouched), untouched.to_vec());
        // All escape codes
        assert_eq!(
            escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
            br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
                .to_vec()
        );
    }

    #[test]
    fn glob_test() {
        assert_eq!(glob_to_re(br#"?"#), br#"."#);
        assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
        assert_eq!(glob_to_re(br#"**"#), br#".*"#);
        assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
        assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
        assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
        assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
        assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
    }

    #[test]
    fn test_parse_pattern_file_contents() {
        let lines = b"syntax: glob\n*.elc";

        assert_eq!(
            parse_pattern_file_contents(lines, Path::new("file_path"), false)
                .unwrap()
                .0,
            vec![IgnorePattern::new(
                PatternSyntax::RelGlob,
                b"*.elc",
                Path::new("file_path")
            )],
        );

        let lines = b"syntax: include\nsyntax: glob";

        assert_eq!(
            parse_pattern_file_contents(lines, Path::new("file_path"), false)
                .unwrap()
                .0,
            vec![]
        );
        let lines = b"glob:**.o";
        assert_eq!(
            parse_pattern_file_contents(lines, Path::new("file_path"), false)
                .unwrap()
                .0,
            vec![IgnorePattern::new(
                PatternSyntax::RelGlob,
                b"**.o",
                Path::new("file_path")
            )]
        );
    }

    #[test]
    fn test_build_single_regex() {
        assert_eq!(
            build_single_regex(&IgnorePattern::new(
                PatternSyntax::RelGlob,
                b"rust/target/",
                Path::new("")
            ))
            .unwrap(),
            Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
        );
        assert_eq!(
            build_single_regex(&IgnorePattern::new(
                PatternSyntax::Regexp,
                br"rust/target/\d+",
                Path::new("")
            ))
            .unwrap(),
            Some(br"rust/target/\d+".to_vec()),
        );
    }

    #[test]
    fn test_build_single_regex_shortcut() {
        assert_eq!(
            build_single_regex(&IgnorePattern::new(
                PatternSyntax::RootGlob,
                b"",
                Path::new("")
            ))
            .unwrap(),
            None,
        );
        assert_eq!(
            build_single_regex(&IgnorePattern::new(
                PatternSyntax::RootGlob,
                b"whatever",
                Path::new("")
            ))
            .unwrap(),
            None,
        );
        assert_eq!(
            build_single_regex(&IgnorePattern::new(
                PatternSyntax::RootGlob,
                b"*.o",
                Path::new("")
            ))
            .unwrap(),
            Some(br"[^/]*\.o(?:/|$)".to_vec()),
        );
    }
}