view rust/hg-core/src/utils/hg_path.rs @ 46155:fce2f20a54ce

copies-rust: start recording overwrite as they happens If a revision has information overwriting data from another revision, the overwriting revision is a descendant of the overwritten one. So we could warm the Oracle cache with such information to avoid potential future `is_ancestors` call. This provide us with a large speedup in the most expensive cases: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Full comparison below: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, ? 1.0000, 42 ?s/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000114 s, 0.000109 s, -0.000005 s, ? 0.9561, 18 ?s/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004934 s, 0.004953 s, +0.000019 s, ? 1.0039, 4 ?s/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000195 s, 0.000237 s, +0.000042 s, ? 1.2154, 26 ?s/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000113 s, 0.000113 s, +0.000000 s, ? 1.0000, 16 ?s/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.6f1f4a s, 0.6f1f4a s, +0.000000 s, ? 1.0000, 322 ?s/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010788 s, 0.010702 s, -0.000086 s, ? 0.9920, 1783 ?s/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.050880 s, 0.050504 s, -0.000376 s, ? 0.9926, 10 ?s/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.081760 s, 0.080159 s, -0.001601 s, ? 0.9804, 11 ?s/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.061382 s, 0.060058 s, -0.001324 s, ? 0.9784, 11 ?s/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.585802 s, 0.536950 s, -0.048852 s, ? 0.9166, 12 ?s/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012803 s, 0.012868 s, +0.000065 s, ? 1.0051, 6434 ?s/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.113558 s, 0.112806 s, -0.000752 s, ? 0.9934, 9 ?s/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000085 s, 0.000084 s, -0.000001 s, ? 0.9882, 42 ?s/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000106 s, 0.000106 s, +0.000000 s, ? 1.0000, 53 ?s/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000174 s, -0.000001 s, ? 0.9943, 58 ?s/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000721 s, 0.000726 s, +0.000005 s, ? 1.0069, 80 ?s/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010127 s, 0.010105 s, -0.000022 s, ? 0.9978, 7 ?s/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015616 s, 0.015748 s, +0.000132 s, ? 1.0085, 10 ?s/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.061341 s, 0.060357 s, -0.000984 s, ? 0.9840, 10 ?s/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.542214 s, 0.499356 s, -0.042858 s, ? 0.9210, 7 ?s/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000092 s, +0.000003 s, ? 1.0337, 46 ?s/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000279 s, 0.000279 s, +0.000000 s, ? 1.0000, 34 ?s/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000184 s, 0.000186 s, +0.000002 s, ? 1.0109, 20 ?s/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000661 s, 0.000660 s, -0.000001 s, ? 0.9985, 94 ?s/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003377 s, 0.003372 s, -0.000005 s, ? 0.9985, 1124 ?s/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.070508 s, 0.070294 s, -0.000214 s, ? 0.9970, 11715 ?s/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006576 s, 0.006545 s, -0.000031 s, ? 0.9953, 4 ?s/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004809 s, 0.004998 s, +0.000189 s, ? 1.0393, 121 ?s/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.064872 s, 0.063348 s, -0.001524 s, ? 0.9765, 8 ?s/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026142 s, 0.026154 s, +0.000012 s, ? 1.0005, 42 ?s/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203956 s, 0.199063 s, -0.004893 s, ? 0.9760, 6 ?s/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.763853 s, 1.277320 s, -0.486533 s, ? 0.7242, 8 ?s/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.609761 s, 1.698794 s, -0.910967 s, ? 0.6509, 8 ?s/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000847 s, 0.000842 s, -0.000005 s, ? 0.9941, 421 ?s/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000867 s, 0.000865 s, -0.000002 s, ? 0.9977, 432 ?s/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000160 s, -0.000001 s, ? 0.9938, 40 ?s/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001131 s, 0.001122 s, -0.000009 s, ? 0.9920, 561 ?s/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.033114 s, 0.032743 s, -0.000371 s, ? 0.9888, 32743 ?s/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.071092 s, 0.071529 s, +0.000437 s, ? 1.0061, 11921 ?s/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006554 s, 0.006593 s, +0.000039 s, ? 1.0060, 4 ?s/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005160 s, 0.005311 s, +0.000151 s, ? 1.0293, 129 ?s/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.065063 s, 0.063063 s, -0.002000 s, ? 0.9693, 9 ?s/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.297118 s, 0.312363 s, +0.015245 s, ? 1.0513, 7 ?s/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.284002 s, 0.283106 s, -0.000896 s, ? 0.9968, 7 ?s/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.086311 s, 0.083817 s, -0.002494 s, ? 0.9711, 9 ?s/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026738 s, 0.026516 s, -0.000222 s, ? 0.9917, 43 ?s/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 1.514270 s, 1.304865 s, -0.209405 s, ? 0.8617, 13 ?s/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.735875 s, 0.681088 s, -0.054787 s, ? 0.9255, 13 ?s/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 4.843329 s, 4.454320 s, -0.389009 s, ? 0.9197, 12 ?s/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.591752 s, 0.567913 s, -0.023839 s, ? 0.9597, 16 ?s/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 4.760563 s, 4.547043 s, -0.213520 s, ? 0.9551, 12 ?s/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 4.751942 s, 4.378579 s, -0.373363 s, ? 0.9214, 12 ?s/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.605014 s, 1.703622 s, -0.901392 s, ? 0.6540, 8 ?s/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 41.113063 s, 36.001255 s, -5.111808 s, ? 0.8757, 157 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 27.891612 s, 14.340641 s, -13.550971 s, ? 0.5142, 37 ?s/rev Differential Revision: https://phab.mercurial-scm.org/D9497
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 21 Nov 2020 17:00:32 +0100
parents 2d5dfc8fed55
children 2e2033081274
line wrap: on
line source

// hg_path.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

use std::borrow::Borrow;
use std::convert::TryFrom;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::ops::Deref;
use std::path::{Path, PathBuf};

#[derive(Debug, Eq, PartialEq)]
pub enum HgPathError {
    /// Bytes from the invalid `HgPath`
    LeadingSlash(Vec<u8>),
    ConsecutiveSlashes {
        bytes: Vec<u8>,
        second_slash_index: usize,
    },
    ContainsNullByte {
        bytes: Vec<u8>,
        null_byte_index: usize,
    },
    /// Bytes
    DecodeError(Vec<u8>),
    /// The rest come from audit errors
    EndsWithSlash(HgPathBuf),
    ContainsIllegalComponent(HgPathBuf),
    /// Path is inside the `.hg` folder
    InsideDotHg(HgPathBuf),
    IsInsideNestedRepo {
        path: HgPathBuf,
        nested_repo: HgPathBuf,
    },
    TraversesSymbolicLink {
        path: HgPathBuf,
        symlink: HgPathBuf,
    },
    NotFsCompliant(HgPathBuf),
    /// `path` is the smallest invalid path
    NotUnderRoot {
        path: PathBuf,
        root: PathBuf,
    },
}

impl ToString for HgPathError {
    fn to_string(&self) -> String {
        match self {
            HgPathError::LeadingSlash(bytes) => {
                format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
            }
            HgPathError::ConsecutiveSlashes {
                bytes,
                second_slash_index: pos,
            } => format!(
                "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
                bytes, pos
            ),
            HgPathError::ContainsNullByte {
                bytes,
                null_byte_index: pos,
            } => format!(
                "Invalid HgPath '{:?}': contains null byte at pos {}.",
                bytes, pos
            ),
            HgPathError::DecodeError(bytes) => {
                format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
            }
            HgPathError::EndsWithSlash(path) => {
                format!("Audit failed for '{}': ends with a slash.", path)
            }
            HgPathError::ContainsIllegalComponent(path) => format!(
                "Audit failed for '{}': contains an illegal component.",
                path
            ),
            HgPathError::InsideDotHg(path) => format!(
                "Audit failed for '{}': is inside the '.hg' folder.",
                path
            ),
            HgPathError::IsInsideNestedRepo {
                path,
                nested_repo: nested,
            } => format!(
                "Audit failed for '{}': is inside a nested repository '{}'.",
                path, nested
            ),
            HgPathError::TraversesSymbolicLink { path, symlink } => format!(
                "Audit failed for '{}': traverses symbolic link '{}'.",
                path, symlink
            ),
            HgPathError::NotFsCompliant(path) => format!(
                "Audit failed for '{}': cannot be turned into a \
                 filesystem path.",
                path
            ),
            HgPathError::NotUnderRoot { path, root } => format!(
                "Audit failed for '{}': not under root {}.",
                path.display(),
                root.display()
            ),
        }
    }
}

impl From<HgPathError> for std::io::Error {
    fn from(e: HgPathError) -> Self {
        std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
    }
}

/// This is a repository-relative path (or canonical path):
///     - no null characters
///     - `/` separates directories
///     - no consecutive slashes
///     - no leading slash,
///     - no `.` nor `..` of special meaning
///     - stored in repository and shared across platforms
///
/// Note: there is no guarantee of any `HgPath` being well-formed at any point
/// in its lifetime for performance reasons and to ease ergonomics. It is
/// however checked using the `check_state` method before any file-system
/// operation.
///
/// This allows us to be encoding-transparent as much as possible, until really
/// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
/// or `Path`) whenever more complex operations are needed:
/// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
/// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
/// character encoding will be determined on a per-repository basis.
//
// FIXME: (adapted from a comment in the stdlib)
// `HgPath::new()` current implementation relies on `Slice` being
// layout-compatible with `[u8]`.
// When attribute privacy is implemented, `Slice` should be annotated as
// `#[repr(transparent)]`.
// Anyway, `Slice` representation and layout are considered implementation
// detail, are not documented and must not be relied upon.
#[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
pub struct HgPath {
    inner: [u8],
}

impl HgPath {
    pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
        unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
    }
    pub fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }
    pub fn len(&self) -> usize {
        self.inner.len()
    }
    fn to_hg_path_buf(&self) -> HgPathBuf {
        HgPathBuf {
            inner: self.inner.to_owned(),
        }
    }
    pub fn bytes(&self) -> std::slice::Iter<u8> {
        self.inner.iter()
    }
    pub fn to_ascii_uppercase(&self) -> HgPathBuf {
        HgPathBuf::from(self.inner.to_ascii_uppercase())
    }
    pub fn to_ascii_lowercase(&self) -> HgPathBuf {
        HgPathBuf::from(self.inner.to_ascii_lowercase())
    }
    pub fn as_bytes(&self) -> &[u8] {
        &self.inner
    }
    pub fn contains(&self, other: u8) -> bool {
        self.inner.contains(&other)
    }
    pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
        self.inner.starts_with(needle.as_ref().as_bytes())
    }
    pub fn trim_trailing_slash(&self) -> &Self {
        Self::new(if self.inner.last() == Some(&b'/') {
            &self.inner[..self.inner.len() - 1]
        } else {
            &self.inner[..]
        })
    }
    /// Returns a tuple of slices `(base, filename)` resulting from the split
    /// at the rightmost `/`, if any.
    ///
    /// # Examples:
    ///
    /// ```
    /// use hg::utils::hg_path::HgPath;
    ///
    /// let path = HgPath::new(b"cool/hg/path").split_filename();
    /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
    ///
    /// let path = HgPath::new(b"pathwithoutsep").split_filename();
    /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
    /// ```
    pub fn split_filename(&self) -> (&Self, &Self) {
        match &self.inner.iter().rposition(|c| *c == b'/') {
            None => (HgPath::new(""), &self),
            Some(size) => (
                HgPath::new(&self.inner[..*size]),
                HgPath::new(&self.inner[*size + 1..]),
            ),
        }
    }
    pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
        let mut inner = self.inner.to_owned();
        if !inner.is_empty() && inner.last() != Some(&b'/') {
            inner.push(b'/');
        }
        inner.extend(other.as_ref().bytes());
        HgPathBuf::from_bytes(&inner)
    }
    pub fn parent(&self) -> &Self {
        let inner = self.as_bytes();
        HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
            Some(pos) => &inner[..pos],
            None => &[],
        })
    }
    /// Given a base directory, returns the slice of `self` relative to the
    /// base directory. If `base` is not a directory (does not end with a
    /// `b'/'`), returns `None`.
    pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
        let base = base.as_ref();
        if base.is_empty() {
            return Some(self);
        }
        let is_dir = base.as_bytes().ends_with(b"/");
        if is_dir && self.starts_with(base) {
            Some(Self::new(&self.inner[base.len()..]))
        } else {
            None
        }
    }

    #[cfg(windows)]
    /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
    ///
    /// Split a pathname into drive/UNC sharepoint and relative path
    /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
    /// be empty.
    ///
    /// If you assign
    ///  result = split_drive(p)
    /// It is always true that:
    ///  result[0] + result[1] == p
    ///
    /// If the path contained a drive letter, drive_or_unc will contain
    /// everything up to and including the colon.
    /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
    ///
    /// If the path contained a UNC path, the drive_or_unc will contain the
    /// host name and share up to but not including the fourth directory
    /// separator character.
    /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
    /// "/dir")
    ///
    /// Paths cannot contain both a drive letter and a UNC path.
    pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
        let bytes = self.as_bytes();
        let is_sep = |b| std::path::is_separator(b as char);

        if self.len() < 2 {
            (HgPath::new(b""), &self)
        } else if is_sep(bytes[0])
            && is_sep(bytes[1])
            && (self.len() == 2 || !is_sep(bytes[2]))
        {
            // Is a UNC path:
            // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
            // \\machine\mountpoint\directory\etc\...
            //           directory ^^^^^^^^^^^^^^^

            let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
            let mountpoint_start_index = if let Some(i) = machine_end_index {
                i + 2
            } else {
                return (HgPath::new(b""), &self);
            };

            match bytes[mountpoint_start_index + 1..]
                .iter()
                .position(|b| is_sep(*b))
            {
                // A UNC path can't have two slashes in a row
                // (after the initial two)
                Some(0) => (HgPath::new(b""), &self),
                Some(i) => {
                    let (a, b) =
                        bytes.split_at(mountpoint_start_index + 1 + i);
                    (HgPath::new(a), HgPath::new(b))
                }
                None => (&self, HgPath::new(b"")),
            }
        } else if bytes[1] == b':' {
            // Drive path c:\directory
            let (a, b) = bytes.split_at(2);
            (HgPath::new(a), HgPath::new(b))
        } else {
            (HgPath::new(b""), &self)
        }
    }

    #[cfg(unix)]
    /// Split a pathname into drive and path. On Posix, drive is always empty.
    pub fn split_drive(&self) -> (&HgPath, &HgPath) {
        (HgPath::new(b""), &self)
    }

    /// Checks for errors in the path, short-circuiting at the first one.
    /// This generates fine-grained errors useful for debugging.
    /// To simply check if the path is valid during tests, use `is_valid`.
    pub fn check_state(&self) -> Result<(), HgPathError> {
        if self.is_empty() {
            return Ok(());
        }
        let bytes = self.as_bytes();
        let mut previous_byte = None;

        if bytes[0] == b'/' {
            return Err(HgPathError::LeadingSlash(bytes.to_vec()));
        }
        for (index, byte) in bytes.iter().enumerate() {
            match byte {
                0 => {
                    return Err(HgPathError::ContainsNullByte {
                        bytes: bytes.to_vec(),
                        null_byte_index: index,
                    })
                }
                b'/' => {
                    if previous_byte.is_some() && previous_byte == Some(b'/') {
                        return Err(HgPathError::ConsecutiveSlashes {
                            bytes: bytes.to_vec(),
                            second_slash_index: index,
                        });
                    }
                }
                _ => (),
            };
            previous_byte = Some(*byte);
        }
        Ok(())
    }

    #[cfg(test)]
    /// Only usable during tests to force developers to handle invalid states
    fn is_valid(&self) -> bool {
        self.check_state().is_ok()
    }
}

impl fmt::Debug for HgPath {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
    }
}

impl fmt::Display for HgPath {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", String::from_utf8_lossy(&self.inner))
    }
}

#[derive(Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
pub struct HgPathBuf {
    inner: Vec<u8>,
}

impl HgPathBuf {
    pub fn new() -> Self {
        Default::default()
    }
    pub fn push(&mut self, byte: u8) {
        self.inner.push(byte);
    }
    pub fn from_bytes(s: &[u8]) -> HgPathBuf {
        HgPath::new(s).to_owned()
    }
    pub fn into_vec(self) -> Vec<u8> {
        self.inner
    }
}

impl fmt::Debug for HgPathBuf {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
    }
}

impl fmt::Display for HgPathBuf {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", String::from_utf8_lossy(&self.inner))
    }
}

impl Deref for HgPathBuf {
    type Target = HgPath;

    #[inline]
    fn deref(&self) -> &HgPath {
        &HgPath::new(&self.inner)
    }
}

impl From<Vec<u8>> for HgPathBuf {
    fn from(vec: Vec<u8>) -> Self {
        Self { inner: vec }
    }
}

impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
    fn from(s: &T) -> HgPathBuf {
        s.as_ref().to_owned()
    }
}

impl Into<Vec<u8>> for HgPathBuf {
    fn into(self) -> Vec<u8> {
        self.inner
    }
}

impl Borrow<HgPath> for HgPathBuf {
    fn borrow(&self) -> &HgPath {
        &HgPath::new(self.as_bytes())
    }
}

impl ToOwned for HgPath {
    type Owned = HgPathBuf;

    fn to_owned(&self) -> HgPathBuf {
        self.to_hg_path_buf()
    }
}

impl AsRef<HgPath> for HgPath {
    fn as_ref(&self) -> &HgPath {
        self
    }
}

impl AsRef<HgPath> for HgPathBuf {
    fn as_ref(&self) -> &HgPath {
        self
    }
}

impl Extend<u8> for HgPathBuf {
    fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
        self.inner.extend(iter);
    }
}

/// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
/// implemented, these conversion utils will have to work differently depending
/// on the repository encoding: either `UTF-8` or `MBCS`.

pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
    hg_path: P,
) -> Result<OsString, HgPathError> {
    hg_path.as_ref().check_state()?;
    let os_str;
    #[cfg(unix)]
    {
        use std::os::unix::ffi::OsStrExt;
        os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
    }
    // TODO Handle other platforms
    // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
    Ok(os_str.to_os_string())
}

pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
    hg_path: P,
) -> Result<PathBuf, HgPathError> {
    Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
}

pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
    os_string: S,
) -> Result<HgPathBuf, HgPathError> {
    let buf;
    #[cfg(unix)]
    {
        use std::os::unix::ffi::OsStrExt;
        buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
    }
    // TODO Handle other platforms
    // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).

    buf.check_state()?;
    Ok(buf)
}

pub fn path_to_hg_path_buf<P: AsRef<Path>>(
    path: P,
) -> Result<HgPathBuf, HgPathError> {
    let buf;
    let os_str = path.as_ref().as_os_str();
    #[cfg(unix)]
    {
        use std::os::unix::ffi::OsStrExt;
        buf = HgPathBuf::from_bytes(&os_str.as_bytes());
    }
    // TODO Handle other platforms
    // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).

    buf.check_state()?;
    Ok(buf)
}

impl TryFrom<PathBuf> for HgPathBuf {
    type Error = HgPathError;
    fn try_from(path: PathBuf) -> Result<Self, Self::Error> {
        path_to_hg_path_buf(path)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use pretty_assertions::assert_eq;

    #[test]
    fn test_path_states() {
        assert_eq!(
            Err(HgPathError::LeadingSlash(b"/".to_vec())),
            HgPath::new(b"/").check_state()
        );
        assert_eq!(
            Err(HgPathError::ConsecutiveSlashes {
                bytes: b"a/b//c".to_vec(),
                second_slash_index: 4
            }),
            HgPath::new(b"a/b//c").check_state()
        );
        assert_eq!(
            Err(HgPathError::ContainsNullByte {
                bytes: b"a/b/\0c".to_vec(),
                null_byte_index: 4
            }),
            HgPath::new(b"a/b/\0c").check_state()
        );
        // TODO test HgPathError::DecodeError for the Windows implementation.
        assert_eq!(true, HgPath::new(b"").is_valid());
        assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
        // Backslashes in paths are not significant, but allowed
        assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
        // Dots in paths are not significant, but allowed
        assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
        assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
    }

    #[test]
    fn test_iter() {
        let path = HgPath::new(b"a");
        let mut iter = path.bytes();
        assert_eq!(Some(&b'a'), iter.next());
        assert_eq!(None, iter.next_back());
        assert_eq!(None, iter.next());

        let path = HgPath::new(b"a");
        let mut iter = path.bytes();
        assert_eq!(Some(&b'a'), iter.next_back());
        assert_eq!(None, iter.next_back());
        assert_eq!(None, iter.next());

        let path = HgPath::new(b"abc");
        let mut iter = path.bytes();
        assert_eq!(Some(&b'a'), iter.next());
        assert_eq!(Some(&b'c'), iter.next_back());
        assert_eq!(Some(&b'b'), iter.next_back());
        assert_eq!(None, iter.next_back());
        assert_eq!(None, iter.next());

        let path = HgPath::new(b"abc");
        let mut iter = path.bytes();
        assert_eq!(Some(&b'a'), iter.next());
        assert_eq!(Some(&b'b'), iter.next());
        assert_eq!(Some(&b'c'), iter.next());
        assert_eq!(None, iter.next_back());
        assert_eq!(None, iter.next());

        let path = HgPath::new(b"abc");
        let iter = path.bytes();
        let mut vec = Vec::new();
        vec.extend(iter);
        assert_eq!(vec![b'a', b'b', b'c'], vec);

        let path = HgPath::new(b"abc");
        let mut iter = path.bytes();
        assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));

        let path = HgPath::new(b"abc");
        let mut iter = path.bytes();
        assert_eq!(None, iter.rposition(|c| *c == b'd'));
    }

    #[test]
    fn test_join() {
        let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
        assert_eq!(b"a/b", path.as_bytes());

        let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
        assert_eq!(b"a/b/c", path.as_bytes());

        // No leading slash if empty before join
        let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
        assert_eq!(b"b/c", path.as_bytes());

        // The leading slash is an invalid representation of an `HgPath`, but
        // it can happen. This creates another invalid representation of
        // consecutive bytes.
        // TODO What should be done in this case? Should we silently remove
        // the extra slash? Should we change the signature to a problematic
        // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
        // let the error happen upon filesystem interaction?
        let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
        assert_eq!(b"a//b", path.as_bytes());
        let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
        assert_eq!(b"a//b", path.as_bytes());
    }

    #[test]
    fn test_relative_to() {
        let path = HgPath::new(b"");
        let base = HgPath::new(b"");
        assert_eq!(Some(path), path.relative_to(base));

        let path = HgPath::new(b"path");
        let base = HgPath::new(b"");
        assert_eq!(Some(path), path.relative_to(base));

        let path = HgPath::new(b"a");
        let base = HgPath::new(b"b");
        assert_eq!(None, path.relative_to(base));

        let path = HgPath::new(b"a/b");
        let base = HgPath::new(b"a");
        assert_eq!(None, path.relative_to(base));

        let path = HgPath::new(b"a/b");
        let base = HgPath::new(b"a/");
        assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));

        let path = HgPath::new(b"nested/path/to/b");
        let base = HgPath::new(b"nested/path/");
        assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));

        let path = HgPath::new(b"ends/with/dir/");
        let base = HgPath::new(b"ends/");
        assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
    }

    #[test]
    #[cfg(unix)]
    fn test_split_drive() {
        // Taken from the Python stdlib's tests
        assert_eq!(
            HgPath::new(br"/foo/bar").split_drive(),
            (HgPath::new(b""), HgPath::new(br"/foo/bar"))
        );
        assert_eq!(
            HgPath::new(br"foo:bar").split_drive(),
            (HgPath::new(b""), HgPath::new(br"foo:bar"))
        );
        assert_eq!(
            HgPath::new(br":foo:bar").split_drive(),
            (HgPath::new(b""), HgPath::new(br":foo:bar"))
        );
        // Also try NT paths; should not split them
        assert_eq!(
            HgPath::new(br"c:\foo\bar").split_drive(),
            (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
        );
        assert_eq!(
            HgPath::new(b"c:/foo/bar").split_drive(),
            (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
        );
        assert_eq!(
            HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
            (
                HgPath::new(b""),
                HgPath::new(br"\\conky\mountpoint\foo\bar")
            )
        );
    }

    #[test]
    #[cfg(windows)]
    fn test_split_drive() {
        assert_eq!(
            HgPath::new(br"c:\foo\bar").split_drive(),
            (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
        );
        assert_eq!(
            HgPath::new(b"c:/foo/bar").split_drive(),
            (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
        );
        assert_eq!(
            HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
            (
                HgPath::new(br"\\conky\mountpoint"),
                HgPath::new(br"\foo\bar")
            )
        );
        assert_eq!(
            HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
            (
                HgPath::new(br"//conky/mountpoint"),
                HgPath::new(br"/foo/bar")
            )
        );
        assert_eq!(
            HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
            (
                HgPath::new(br""),
                HgPath::new(br"\\\conky\mountpoint\foo\bar")
            )
        );
        assert_eq!(
            HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
            (
                HgPath::new(br""),
                HgPath::new(br"///conky/mountpoint/foo/bar")
            )
        );
        assert_eq!(
            HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
            (
                HgPath::new(br""),
                HgPath::new(br"\\conky\\mountpoint\foo\bar")
            )
        );
        assert_eq!(
            HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
            (
                HgPath::new(br""),
                HgPath::new(br"//conky//mountpoint/foo/bar")
            )
        );
        // UNC part containing U+0130
        assert_eq!(
            HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
            (
                HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
                HgPath::new(br"/foo/bar")
            )
        );
    }

    #[test]
    fn test_parent() {
        let path = HgPath::new(b"");
        assert_eq!(path.parent(), path);

        let path = HgPath::new(b"a");
        assert_eq!(path.parent(), HgPath::new(b""));

        let path = HgPath::new(b"a/b");
        assert_eq!(path.parent(), HgPath::new(b"a"));

        let path = HgPath::new(b"a/other/b");
        assert_eq!(path.parent(), HgPath::new(b"a/other"));
    }
}