view rust/hg-cpython/src/parsers.rs @ 43826:5ac243a92e37

rust-performance: introduce FastHashMap type alias for HashMap Rust's default hashing is slow, because it is meant for preventing collision attacks. For all of the current Rust code, we don't care about those attacks, because if an person with bad intentions has write access to your repo, you have other issues. I've chosen to use the TwoXHash crate because it was made by a reputable member of the Rust community and has very good benchmarks. For now it does not seem to improve performance by much for the current code, but it's something else to not worry about when benchmarking code: in a previous experiment with copytracing in Rust, it accounted for more than 10% of the time of the entire script. Differential Revision: https://phab.mercurial-scm.org/D7116
author Rapha?l Gom?s <rgomes@octobus.net>
date Mon, 14 Oct 2019 13:57:30 +0200
parents ce088b38f92b
children 26114bd6ec60
line wrap: on
line source

// parsers.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

//! Bindings for the `hg::dirstate::parsers` module provided by the
//! `hg-core` package.
//!
//! From Python, this will be seen as `mercurial.rustext.parsers`
use cpython::{
    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
    PythonObject, ToPyObject,
};
use hg::{
    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
    DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
    PARENT_SIZE,
};
use std::convert::TryInto;

use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
use std::time::Duration;

fn parse_dirstate_wrapper(
    py: Python,
    dmap: PyDict,
    copymap: PyDict,
    st: PyBytes,
) -> PyResult<PyTuple> {
    let mut dirstate_map = FastHashMap::default();
    let mut copies = FastHashMap::default();

    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
        Ok(parents) => {
            for (filename, entry) in &dirstate_map {
                dmap.set_item(
                    py,
                    PyBytes::new(py, filename.as_ref()),
                    make_dirstate_tuple(py, entry)?,
                )?;
            }
            for (path, copy_path) in copies {
                copymap.set_item(
                    py,
                    PyBytes::new(py, path.as_ref()),
                    PyBytes::new(py, copy_path.as_ref()),
                )?;
            }
            Ok(
                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
                    .to_py_object(py),
            )
        }
        Err(e) => Err(PyErr::new::<exc::ValueError, _>(
            py,
            match e {
                DirstateParseError::TooLittleData => {
                    "too little data for parents".to_string()
                }
                DirstateParseError::Overflow => {
                    "overflow in dirstate".to_string()
                }
                DirstateParseError::CorruptedEntry(e) => e,
                DirstateParseError::Damaged => {
                    "dirstate appears to be damaged".to_string()
                }
            },
        )),
    }
}

fn pack_dirstate_wrapper(
    py: Python,
    dmap: PyDict,
    copymap: PyDict,
    pl: PyTuple,
    now: PyInt,
) -> PyResult<PyBytes> {
    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
    let p1: &[u8] = p1.data(py);
    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
    let p2: &[u8] = p2.data(py);

    let mut dirstate_map = extract_dirstate(py, &dmap)?;

    let copies: Result<FastHashMap<HgPathBuf, HgPathBuf>, PyErr> = copymap
        .items(py)
        .iter()
        .map(|(key, value)| {
            Ok((
                HgPathBuf::from_bytes(key.extract::<PyBytes>(py)?.data(py)),
                HgPathBuf::from_bytes(value.extract::<PyBytes>(py)?.data(py)),
            ))
        })
        .collect();

    if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
        return Err(PyErr::new::<exc::ValueError, _>(
            py,
            "expected a 20-byte hash".to_string(),
        ));
    }

    match pack_dirstate(
        &mut dirstate_map,
        &copies?,
        DirstateParents {
            p1: p1.try_into().unwrap(),
            p2: p2.try_into().unwrap(),
        },
        Duration::from_secs(now.as_object().extract::<u64>(py)?),
    ) {
        Ok(packed) => {
            for (filename, entry) in &dirstate_map {
                dmap.set_item(
                    py,
                    PyBytes::new(py, filename.as_ref()),
                    make_dirstate_tuple(py, entry)?,
                )?;
            }
            Ok(PyBytes::new(py, &packed))
        }
        Err(error) => Err(PyErr::new::<exc::ValueError, _>(
            py,
            match error {
                DirstatePackError::CorruptedParent => {
                    "expected a 20-byte hash".to_string()
                }
                DirstatePackError::CorruptedEntry(e) => e,
                DirstatePackError::BadSize(expected, actual) => {
                    format!("bad dirstate size: {} != {}", actual, expected)
                }
            },
        )),
    }
}

/// Create the module, with `__package__` given from parent
pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
    let dotted_name = &format!("{}.parsers", package);
    let m = PyModule::new(py, dotted_name)?;

    m.add(py, "__package__", package)?;
    m.add(py, "__doc__", "Parsers - Rust implementation")?;

    m.add(
        py,
        "parse_dirstate",
        py_fn!(
            py,
            parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
        ),
    )?;
    m.add(
        py,
        "pack_dirstate",
        py_fn!(
            py,
            pack_dirstate_wrapper(
                dmap: PyDict,
                copymap: PyDict,
                pl: PyTuple,
                now: PyInt
            )
        ),
    )?;

    let sys = PyModule::import(py, "sys")?;
    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
    sys_modules.set_item(py, dotted_name, &m)?;

    Ok(m)
}