view rust/hg-core/src/revlog/path_encode.rs @ 46128:c94d013e2299

copies-rust: add smarter approach for merging small mapping with large mapping The current approach (finding the smaller updated set) works great when the mapping have similar size, but do a lot of unnecessary work when one side is tinier than the other one. So we do better in theses cases. See inline documentation for details. It give a sizeable boost to many of out slower cases: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 18.123103 s, 5.693818 s, -12.429285 s, ? 0.3142, 15 ?s/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 17.907312 s, 5.677655 s, -12.229657 s, ? 0.3171, 15 ?s/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 17.684797 s, 5.563370 s, -12.121427 s, ? 0.3146, 15 ?s/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.881471 s, 2.864099 s, -0.017372 s, ? 0.9940, 14 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 63.148971 s, 59.498652 s, -3.650319 s, ? 0.9422, 155 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 63.148971 s, 59.498652 s, -3.650319 s, ? 0.9422, 155 ?s/rev ideally, the im-rs object would have a `merge` method, but it does not (yet) Full timing comparison below (they are one pathological case than become even worse, for unclear reason). Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000043 s, 0.000042 s, -0.000001 s, ? 0.9767, 42 ?s/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000105 s, 0.000104 s, -0.000001 s, ? 0.9905, 17 ?s/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004895 s, 0.004913 s, +0.000018 s, ? 1.0037, 4 ?s/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000194 s, 0.000191 s, -0.000003 s, ? 0.9845, 21 ?s/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000050 s, +0.000000 s, ? 1.0000, 50 ?s/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000115 s, 0.000112 s, -0.000003 s, ? 0.9739, 16 ?s/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.000289 s, 0.000288 s, -0.000001 s, ? 0.9965, 288 ?s/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010513 s, 0.010411 s, -0.000102 s, ? 0.9903, 1735 ?s/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.051474 s, 0.052852 s, +0.001378 s, ? 1.0268, 11 ?s/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.088086 s, 0.092828 s, +0.004742 s, ? 1.0538, 13 ?s/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.062176 s, 0.063269 s, +0.001093 s, ? 1.0176, 11 ?s/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.720950 s, 0.711975 s, -0.008975 s, ? 0.9876, 16 ?s/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012897 s, 0.012771 s, -0.000126 s, ? 0.9902, 6385 ?s/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.121524 s, 0.124505 s, +0.002981 s, ? 1.0245, 11 ?s/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000082 s, 0.000082 s, +0.000000 s, ? 1.0000, 41 ?s/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000109 s, 0.000111 s, +0.000002 s, ? 1.0183, 55 ?s/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000175 s, 0.000171 s, -0.000004 s, ? 0.9771, 57 ?s/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000719 s, 0.000708 s, -0.000011 s, ? 0.9847, 78 ?s/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010426 s, 0.010608 s, +0.000182 s, ? 1.0175, 7 ?s/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015712 s, 0.015635 s, -0.000077 s, ? 0.9951, 10 ?s/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.077353 s, 0.072072 s, -0.005281 s, ? 0.9317, 12 ?s/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.673930 s, 0.682732 s, +0.008802 s, ? 1.0131, 10 ?s/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000089 s, 0.000090 s, +0.000001 s, ? 1.0112, 45 ?s/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000212 s, 0.000210 s, -0.000002 s, ? 0.9906, 26 ?s/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000183 s, 0.000182 s, -0.000001 s, ? 0.9945, 20 ?s/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000595 s, 0.000594 s, -0.000001 s, ? 0.9983, 84 ?s/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003117 s, 0.003102 s, -0.000015 s, ? 0.9952, 1034 ?s/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.060197 s, 0.060234 s, +0.000037 s, ? 1.0006, 10039 ?s/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006379 s, 0.006300 s, -0.000079 s, ? 0.9876, 3 ?s/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005008 s, 0.004817 s, -0.000191 s, ? 0.9619, 117 ?s/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.065123 s, 0.065451 s, +0.000328 s, ? 1.0050, 8 ?s/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026404 s, 0.026282 s, -0.000122 s, ? 0.9954, 42 ?s/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.203456 s, 0.206873 s, +0.003417 s, ? 1.0168, 6 ?s/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.929809 s, 1.935918 s, +0.006109 s, ? 1.0032, 12 ?s/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.825064 s, 2.827320 s, +0.002256 s, ? 1.0008, 13 ?s/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000857 s, 0.000842 s, -0.000015 s, ? 0.9825, 421 ?s/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000870 s, 0.000870 s, +0.000000 s, ? 1.0000, 435 ?s/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000161 s, 0.000165 s, +0.000004 s, ? 1.0248, 41 ?s/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001147 s, 0.001145 s, -0.000002 s, ? 0.9983, 572 ?s/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.026640 s, 0.026500 s, -0.000140 s, ? 0.9947, 26500 ?s/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.059849 s, 0.059407 s, -0.000442 s, ? 0.9926, 9901 ?s/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006326 s, 0.006325 s, -0.000001 s, ? 0.9998, 3 ?s/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005188 s, 0.005171 s, -0.000017 s, ? 0.9967, 126 ?s/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.067633 s, 0.066837 s, -0.000796 s, ? 0.9882, 10 ?s/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.306969 s, 0.314252 s, +0.007283 s, ? 1.0237, 7 ?s/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.293370 s, 0.304160 s, +0.010790 s, ? 1.0368, 7 ?s/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.087159 s, 0.089223 s, +0.002064 s, ? 1.0237, 10 ?s/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.027251 s, 0.026711 s, -0.000540 s, ? 0.9802, 43 ?s/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 3.010011 s, 3.243010 s, +0.232999 s, ? 1.0774, 33 ?s/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.753434 s, 0.756500 s, +0.003066 s, ? 1.0041, 14 ?s/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 18.123103 s, 5.693818 s, -12.429285 s, ? 0.3142, 15 ?s/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.583206 s, 0.590904 s, +0.007698 s, ? 1.0132, 17 ?s/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 17.907312 s, 5.677655 s, -12.229657 s, ? 0.3171, 15 ?s/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 17.684797 s, 5.563370 s, -12.121427 s, ? 0.3146, 15 ?s/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.881471 s, 2.864099 s, -0.017372 s, ? 0.9940, 14 ?s/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 101.062002 s, 113.297287 s, +12.235285 s, ? 1.1211, 494 ?s/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 63.148971 s, 59.498652 s, -3.650319 s, ? 0.9422, 155 ?s/rev Differential Revision: https://phab.mercurial-scm.org/D9491
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 21 Nov 2020 09:40:52 +0100
parents aebc976fd7d5
children fad504cfc94b
line wrap: on
line source

use crypto::digest::Digest;
use crypto::sha1::Sha1;

#[derive(PartialEq, Debug)]
#[allow(non_camel_case_types)]
enum path_state {
    START, /* first byte of a path component */
    A,     /* "AUX" */
    AU,
    THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
    C,     /* "CON" or "COMn" */
    CO,
    COMLPT, /* "COM" or "LPT" */
    COMLPTn,
    L,
    LP,
    N,
    NU,
    P, /* "PRN" */
    PR,
    LDOT, /* leading '.' */
    DOT,  /* '.' in a non-leading position */
    H,    /* ".h" */
    HGDI, /* ".hg", ".d", or ".i" */
    SPACE,
    DEFAULT, /* byte of a path component after the first */
}

/* state machine for dir-encoding */
#[allow(non_camel_case_types)]
enum dir_state {
    DDOT,
    DH,
    DHGDI,
    DDEFAULT,
}

fn inset(bitset: &[u32; 8], c: u8) -> bool {
    bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0
}

fn charcopy(dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
    if let Some(slice) = dest {
        slice[*destlen] = c
    }
    *destlen += 1
}

fn memcopy(dest: Option<&mut [u8]>, destlen: &mut usize, src: &[u8]) {
    if let Some(slice) = dest {
        slice[*destlen..*destlen + src.len()].copy_from_slice(src)
    }
    *destlen += src.len();
}

fn rewrap_option<'a, 'b: 'a>(
    x: &'a mut Option<&'b mut [u8]>,
) -> Option<&'a mut [u8]> {
    match x {
        None => None,
        Some(y) => Some(y),
    }
}

fn hexencode<'a>(mut dest: Option<&'a mut [u8]>, destlen: &mut usize, c: u8) {
    let hexdigit = b"0123456789abcdef";
    charcopy(
        rewrap_option(&mut dest),
        destlen,
        hexdigit[(c as usize) >> 4],
    );
    charcopy(dest, destlen, hexdigit[(c as usize) & 15]);
}

/* 3-byte escape: tilde followed by two hex digits */
fn escape3(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
    charcopy(rewrap_option(&mut dest), destlen, b'~');
    hexencode(dest, destlen, c);
}

fn encode_dir(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize {
    let mut state = dir_state::DDEFAULT;
    let mut i = 0;
    let mut destlen = 0;

    while i < src.len() {
        match state {
            dir_state::DDOT => match src[i] {
                b'd' | b'i' => {
                    state = dir_state::DHGDI;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'h' => {
                    state = dir_state::DH;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                _ => {
                    state = dir_state::DDEFAULT;
                }
            },
            dir_state::DH => {
                if src[i] == b'g' {
                    state = dir_state::DHGDI;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = dir_state::DDEFAULT;
                }
            }
            dir_state::DHGDI => {
                if src[i] == b'/' {
                    memcopy(rewrap_option(&mut dest), &mut destlen, b".hg");
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                state = dir_state::DDEFAULT;
            }
            dir_state::DDEFAULT => {
                if src[i] == b'.' {
                    state = dir_state::DDOT
                }
                charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                i += 1;
            }
        }
    }
    destlen
}

fn _encode(
    twobytes: &[u32; 8],
    onebyte: &[u32; 8],
    mut dest: Option<&mut [u8]>,
    src: &[u8],
    encodedir: bool,
) -> usize {
    let mut state = path_state::START;
    let mut i = 0;
    let mut destlen = 0;
    let len = src.len();

    while i < len {
        match state {
            path_state::START => match src[i] {
                b'/' => {
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'.' => {
                    state = path_state::LDOT;
                    escape3(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b' ' => {
                    state = path_state::DEFAULT;
                    escape3(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'a' => {
                    state = path_state::A;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'c' => {
                    state = path_state::C;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'l' => {
                    state = path_state::L;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'n' => {
                    state = path_state::N;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'p' => {
                    state = path_state::P;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                _ => {
                    state = path_state::DEFAULT;
                }
            },
            path_state::A => {
                if src[i] == b'u' {
                    state = path_state::AU;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::AU => {
                if src[i] == b'x' {
                    state = path_state::THIRD;
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::THIRD => {
                state = path_state::DEFAULT;
                match src[i] {
                    b'.' | b'/' | b'\0' => escape3(
                        rewrap_option(&mut dest),
                        &mut destlen,
                        src[i - 1],
                    ),
                    _ => i -= 1,
                }
            }
            path_state::C => {
                if src[i] == b'o' {
                    state = path_state::CO;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::CO => {
                if src[i] == b'm' {
                    state = path_state::COMLPT;
                    i += 1;
                } else if src[i] == b'n' {
                    state = path_state::THIRD;
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::COMLPT => {
                if src[i] >= b'1' && src[i] <= b'9' {
                    state = path_state::COMLPTn;
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                    charcopy(
                        rewrap_option(&mut dest),
                        &mut destlen,
                        src[i - 1],
                    );
                }
            }
            path_state::COMLPTn => {
                state = path_state::DEFAULT;
                match src[i] {
                    b'.' | b'/' | b'\0' => {
                        escape3(
                            rewrap_option(&mut dest),
                            &mut destlen,
                            src[i - 2],
                        );
                        charcopy(
                            rewrap_option(&mut dest),
                            &mut destlen,
                            src[i - 1],
                        );
                    }
                    _ => {
                        memcopy(
                            rewrap_option(&mut dest),
                            &mut destlen,
                            &src[i - 2..i],
                        );
                    }
                }
            }
            path_state::L => {
                if src[i] == b'p' {
                    state = path_state::LP;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::LP => {
                if src[i] == b't' {
                    state = path_state::COMLPT;
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::N => {
                if src[i] == b'u' {
                    state = path_state::NU;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::NU => {
                if src[i] == b'l' {
                    state = path_state::THIRD;
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::P => {
                if src[i] == b'r' {
                    state = path_state::PR;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::PR => {
                if src[i] == b'n' {
                    state = path_state::THIRD;
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::LDOT => match src[i] {
                b'd' | b'i' => {
                    state = path_state::HGDI;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'h' => {
                    state = path_state::H;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                _ => {
                    state = path_state::DEFAULT;
                }
            },
            path_state::DOT => match src[i] {
                b'/' | b'\0' => {
                    state = path_state::START;
                    memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e");
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'd' | b'i' => {
                    state = path_state::HGDI;
                    charcopy(rewrap_option(&mut dest), &mut destlen, b'.');
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                b'h' => {
                    state = path_state::H;
                    memcopy(rewrap_option(&mut dest), &mut destlen, b".h");
                    i += 1;
                }
                _ => {
                    state = path_state::DEFAULT;
                    charcopy(rewrap_option(&mut dest), &mut destlen, b'.');
                }
            },
            path_state::H => {
                if src[i] == b'g' {
                    state = path_state::HGDI;
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::HGDI => {
                if src[i] == b'/' {
                    state = path_state::START;
                    if encodedir {
                        memcopy(
                            rewrap_option(&mut dest),
                            &mut destlen,
                            b".hg",
                        );
                    }
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1
                } else {
                    state = path_state::DEFAULT;
                }
            }
            path_state::SPACE => match src[i] {
                b'/' | b'\0' => {
                    state = path_state::START;
                    memcopy(rewrap_option(&mut dest), &mut destlen, b"~20");
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                _ => {
                    state = path_state::DEFAULT;
                    charcopy(rewrap_option(&mut dest), &mut destlen, b' ');
                }
            },
            path_state::DEFAULT => {
                while i != len && inset(onebyte, src[i]) {
                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
                    i += 1;
                }
                if i == len {
                    break;
                }
                match src[i] {
                    b'.' => {
                        state = path_state::DOT;
                        i += 1
                    }
                    b' ' => {
                        state = path_state::SPACE;
                        i += 1
                    }
                    b'/' => {
                        state = path_state::START;
                        charcopy(rewrap_option(&mut dest), &mut destlen, b'/');
                        i += 1;
                    }
                    _ => {
                        if inset(onebyte, src[i]) {
                            loop {
                                charcopy(
                                    rewrap_option(&mut dest),
                                    &mut destlen,
                                    src[i],
                                );
                                i += 1;
                                if !(i < len && inset(onebyte, src[i])) {
                                    break;
                                }
                            }
                        } else if inset(twobytes, src[i]) {
                            let c = src[i];
                            i += 1;
                            charcopy(
                                rewrap_option(&mut dest),
                                &mut destlen,
                                b'_',
                            );
                            charcopy(
                                rewrap_option(&mut dest),
                                &mut destlen,
                                if c == b'_' { b'_' } else { c + 32 },
                            );
                        } else {
                            escape3(
                                rewrap_option(&mut dest),
                                &mut destlen,
                                src[i],
                            );
                            i += 1;
                        }
                    }
                }
            }
        }
    }
    match state {
        path_state::START => (),
        path_state::A => (),
        path_state::AU => (),
        path_state::THIRD => {
            escape3(rewrap_option(&mut dest), &mut destlen, src[i - 1])
        }
        path_state::C => (),
        path_state::CO => (),
        path_state::COMLPT => {
            charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1])
        }
        path_state::COMLPTn => {
            escape3(rewrap_option(&mut dest), &mut destlen, src[i - 2]);
            charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1]);
        }
        path_state::L => (),
        path_state::LP => (),
        path_state::N => (),
        path_state::NU => (),
        path_state::P => (),
        path_state::PR => (),
        path_state::LDOT => (),
        path_state::DOT => {
            memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e");
        }
        path_state::H => (),
        path_state::HGDI => (),
        path_state::SPACE => {
            memcopy(rewrap_option(&mut dest), &mut destlen, b"~20");
        }
        path_state::DEFAULT => (),
    };
    destlen
}

fn basic_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize {
    let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0];
    let onebyte: [u32; 8] =
        [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0];
    _encode(&twobytes, &onebyte, dest, src, true)
}

const MAXSTOREPATHLEN: usize = 120;

fn lower_encode(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize {
    let onebyte: [u32; 8] =
        [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0];
    let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0];
    let mut destlen = 0;
    for c in src {
        if inset(&onebyte, *c) {
            charcopy(rewrap_option(&mut dest), &mut destlen, *c)
        } else if inset(&lower, *c) {
            charcopy(rewrap_option(&mut dest), &mut destlen, *c + 32)
        } else {
            escape3(rewrap_option(&mut dest), &mut destlen, *c)
        }
    }
    destlen
}

fn aux_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize {
    let twobytes = [0; 8];
    let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0];
    _encode(&twobytes, &onebyte, dest, src, false)
}

fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> {
    let dirprefixlen = 8;
    let maxshortdirslen = 68;
    let mut destlen = 0;

    let last_slash = src.iter().rposition(|b| *b == b'/');
    let last_dot: Option<usize> = {
        let s = last_slash.unwrap_or(0);
        src[s..]
            .iter()
            .rposition(|b| *b == b'.')
            .and_then(|i| Some(i + s))
    };

    let mut dest = vec![0; MAXSTOREPATHLEN];
    memcopy(Some(&mut dest), &mut destlen, b"dh/");

    {
        let mut first = true;
        for slice in src[..last_slash.unwrap_or_else(|| src.len())]
            .split(|b| *b == b'/')
        {
            let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
            if destlen + (slice.len() + if first { 0 } else { 1 })
                > maxshortdirslen + 3
            {
                break;
            } else {
                if !first {
                    charcopy(Some(&mut dest), &mut destlen, b'/')
                };
                memcopy(Some(&mut dest), &mut destlen, slice);
                if dest[destlen - 1] == b'.' || dest[destlen - 1] == b' ' {
                    dest[destlen - 1] = b'_'
                }
            }
            first = false;
        }
        if !first {
            charcopy(Some(&mut dest), &mut destlen, b'/');
        }
    }

    let used = destlen + 40 + {
        if let Some(l) = last_dot {
            src.len() - l
        } else {
            0
        }
    };

    if MAXSTOREPATHLEN > used {
        let slop = MAXSTOREPATHLEN - used;
        let basenamelen = match last_slash {
            Some(l) => src.len() - l - 1,
            None => src.len(),
        };
        let basenamelen = std::cmp::min(basenamelen, slop);
        if basenamelen > 0 {
            let start = match last_slash {
                Some(l) => l + 1,
                None => 0,
            };
            memcopy(
                Some(&mut dest),
                &mut destlen,
                &src[start..][..basenamelen],
            )
        }
    }
    for c in sha {
        hexencode(Some(&mut dest), &mut destlen, *c);
    }
    if let Some(l) = last_dot {
        memcopy(Some(&mut dest), &mut destlen, &src[l..]);
    }
    if destlen == dest.len() {
        dest
    } else {
        // sometimes the path are shorter than MAXSTOREPATHLEN
        dest[..destlen].to_vec()
    }
}

const MAXENCODE: usize = 4096 * 4;
fn hash_encode(src: &[u8]) -> Vec<u8> {
    let dired = &mut [0; MAXENCODE];
    let lowered = &mut [0; MAXENCODE];
    let auxed = &mut [0; MAXENCODE];
    let baselen = (src.len() - 5) * 3;
    if baselen >= MAXENCODE {
        panic!("path_encode::hash_encore: string too long: {}", baselen)
    };
    let dirlen = encode_dir(Some(&mut dired[..]), src);
    let sha = {
        let mut hasher = Sha1::new();
        hasher.input(&dired[..dirlen]);
        let mut hash = vec![0; 20];
        hasher.result(&mut hash);
        hash
    };
    let lowerlen = lower_encode(Some(&mut lowered[..]), &dired[..dirlen][5..]);
    let auxlen = aux_encode(Some(&mut auxed[..]), &lowered[..lowerlen]);
    hash_mangle(&auxed[..auxlen], &sha)
}

pub fn path_encode(path: &[u8]) -> Vec<u8> {
    let newlen = if path.len() <= MAXSTOREPATHLEN {
        basic_encode(None, path)
    } else {
        MAXSTOREPATHLEN + 1
    };
    if newlen <= MAXSTOREPATHLEN {
        if newlen == path.len() {
            path.to_vec()
        } else {
            let mut res = vec![0; newlen];
            basic_encode(Some(&mut res), path);
            res
        }
    } else {
        hash_encode(&path)
    }
}