view rust/hg-cpython/src/pybytes_deref.rs @ 52411:c2480ac4c5e2

rust-pyo3: retrieving the InnerRevlog of hg-cpython This allows PyO3-based code to use the InnerRevlog, access its shared data (core InnerRevlog), which will then allow, e.g., to retrieve references on the core Index. On the `hg-cpython` (`rusthg` crate, `rustext` Python extension module), we had to also build as a Rust library, and open up some accesses (see notably the public accessor for `inner`, the core `InnerRevlog`). Retrieving the Rust struct underlying a Python object defined by another extension module written in Rust is tricky because the Python type objects are duplicated in the extension modules, leading to failure of the normal type checking. See the doc-comment of `convert_cpython::extract_inner_revlog` for a complete explanation. To solve this, we import the Python type object of `rustext` (defined by `hg-cpython`) and perform a manual check. Checking the Python type is necessary, as PyO3 documentation clearly state that downcasting an object that has not the proper type is Undefined Behaviour. At this point, we do not have conversion facilities for exceptions (`PyErr` on both sides), hence the remaining unwraps).
author Georges Racinet <georges.racinet@cloudcrane.io>
date Sat, 30 Nov 2024 20:57:02 +0100
parents 7346f93be7a4
children d9d6ae9b9722
line wrap: on
line source

use crate::cpython::buffer::Element;
use cpython::{
    buffer::PyBuffer, exc::ValueError, PyBytes, PyErr, PyResult, Python,
};
use stable_deref_trait::StableDeref;

/// Safe abstraction over a `PyBytes` together with the `&[u8]` slice
/// that borrows it. Implements `Deref<Target = [u8]>`.
///
/// Calling `PyBytes::data` requires a GIL marker but we want to access the
/// data in a thread that (ideally) does not need to acquire the GIL.
/// This type allows separating the call an the use.
///
/// It also enables using a (wrapped) `PyBytes` in GIL-unaware generic code.
pub struct PyBytesDeref {
    #[allow(unused)]
    keep_alive: PyBytes,

    /// Borrows the buffer inside `self.keep_alive`,
    /// but the borrow-checker cannot express self-referential structs.
    data: *const [u8],
}

impl PyBytesDeref {
    pub fn new(py: Python, bytes: PyBytes) -> Self {
        Self {
            data: bytes.data(py),
            keep_alive: bytes,
        }
    }

    pub fn unwrap(self) -> PyBytes {
        self.keep_alive
    }
}

impl std::ops::Deref for PyBytesDeref {
    type Target = [u8];

    fn deref(&self) -> &[u8] {
        // Safety: the raw pointer is valid as long as the PyBytes is still
        // alive, and the returned slice borrows `self`.
        unsafe { &*self.data }
    }
}

unsafe impl StableDeref for PyBytesDeref {}

fn require_send<T: Send>() {}

#[allow(unused)]
fn static_assert_pybytes_is_send() {
    #[allow(clippy::no_effect)]
    require_send::<PyBytes>;
}

// Safety: PyBytes is Send. Raw pointers are not by default,
// but here sending one to another thread is fine since we ensure it stays
// valid.
unsafe impl Send for PyBytesDeref {}

///
/// It also enables using a (wrapped) `PyBuffer` in GIL-unaware generic code.
pub struct PyBufferDeref {
    #[allow(unused)]
    keep_alive: PyBuffer,

    /// Borrows the buffer inside `self.keep_alive`,
    /// but the borrow-checker cannot express self-referential structs.
    data: *const [u8],
}

fn get_buffer<'a>(py: Python, buf: &'a PyBuffer) -> PyResult<&'a [u8]> {
    let len = buf.item_count();

    let cbuf = buf.buf_ptr();
    let has_correct_item_size = std::mem::size_of::<u8>() == buf.item_size();
    let is_valid_buffer = has_correct_item_size
        && buf.is_c_contiguous()
        && u8::is_compatible_format(buf.format())
        && buf.readonly();

    let bytes = if is_valid_buffer {
        unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
    } else {
        return Err(PyErr::new::<ValueError, _>(
            py,
            "Buffer has an invalid memory representation",
        ));
    };
    Ok(bytes)
}

impl PyBufferDeref {
    pub fn new(py: Python, buf: PyBuffer) -> PyResult<Self> {
        Ok(Self {
            data: get_buffer(py, &buf)?,
            keep_alive: buf,
        })
    }
}

impl std::ops::Deref for PyBufferDeref {
    type Target = [u8];

    fn deref(&self) -> &[u8] {
        // Safety: the raw pointer is valid as long as the PyBuffer is still
        // alive, and the returned slice borrows `self`.
        unsafe { &*self.data }
    }
}

unsafe impl StableDeref for PyBufferDeref {}

#[allow(unused)]
fn static_assert_pybuffer_is_send() {
    #[allow(clippy::no_effect)]
    require_send::<PyBuffer>;
}

// Safety: PyBuffer is Send. Raw pointers are not by default,
// but here sending one to another thread is fine since we ensure it stays
// valid.
unsafe impl Send for PyBufferDeref {}