diff rust/hg-pyo3/src/util.rs @ 52793:42b219a1404a

rust-pyo3-revlog: InnerRevlog definition and constructor We felt that the `mmap_keeparound` naming was after all inappropriate: - it does nothing to keep the data alive. Instead, it requires the caller to keep it alive. - using it for mmap'ed data is certainly the main use case, but more generally, it is about extracting any Python buffer for direct usage as a bytes slice. Everything else is rather straightforward. This object has several layers of inner mutability, like its progenitor in `hg-cpython`.
author Georges Racinet <georges.racinet@cloudcrane.io>
date Wed, 25 Dec 2024 13:29:56 +0100
parents c5128c541021
children 4f41a8acf350
line wrap: on
line diff
--- a/rust/hg-pyo3/src/util.rs	Wed Dec 25 12:43:45 2024 +0100
+++ b/rust/hg-pyo3/src/util.rs	Wed Dec 25 13:29:56 2024 +0100
@@ -1,3 +1,5 @@
+use pyo3::buffer::{Element, PyBuffer};
+use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::types::PyDict;
 /// Create the module, with `__package__` given from parent
@@ -26,3 +28,52 @@
     // Rust PyObject is dropped.
     Ok(m)
 }
+
+/// Type shortcut for the kind of bytes slice trait objects that are used in
+/// particular for mmap data
+type BoxedBytesSlice =
+    Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>;
+
+/// Take a Python object backed by a Python buffer, and return the underlying
+/// [`PyBuffer`] along with the Rust slice into said buffer.
+///
+/// The caller needs to make sure that the Python buffer is not freed before
+/// the slice, otherwise we'd get a dangling pointer once the incoming
+/// object is freed from Python side. This can be achieved by storing it a
+/// Python object.
+///
+/// The typical use case is to extract mmap data to make it useable in the
+/// constructs from the `hg` crate.
+///
+/// # Safety
+///
+/// The caller must make sure that the incoming Python object is kept around
+/// for at least as long as the returned [`BoxedBytesSlice`].
+// TODO in PyO3, we already get a reference with two lifetimes, and we
+// could even take a `Borrowed<'a, 'py, T>`.
+// So perhaps we could tie everything together with a lifetime so that is
+// is, after all, safe, and this could be called something like `share_buffer`.
+#[deny(unsafe_op_in_unsafe_fn)]
+pub unsafe fn take_buffer_with_slice(
+    data: &Bound<'_, PyAny>,
+) -> PyResult<(PyBuffer<u8>, BoxedBytesSlice)> {
+    let buf = PyBuffer::<u8>::get(data)?;
+    let len = buf.item_count();
+
+    // Build a slice from the buffer data
+    let cbuf = buf.buf_ptr();
+    let bytes = if std::mem::size_of::<u8>() == buf.item_size()
+        && buf.is_c_contiguous()
+        && u8::is_compatible_format(buf.format())
+        && buf.dimensions() == 1
+        && buf.readonly()
+    {
+        unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
+    } else {
+        return Err(PyValueError::new_err(
+            "buffer has an invalid memory representation",
+        ));
+    };
+
+    Ok((buf, Box::new(bytes)))
+}