diff rust/hg-core/src/dirstate_tree/owning.rs @ 47954:4afd6cc447b9

rust: Make OwningDirstateMap generic and move it into hg-core This will enable using it in rhg too. The `OwningDirstateMap::new_empty` constructor is generic and accepts a value of any type that gives acces to a bytes buffer. That buffer must stay valid as long as the value hasn?t been dropped, and must keep its memory address even if the value is moved. The `StableDeref` marker trait encodes those constraints. Previously no trait was needed because the value was always of type `PyBytes` which we know satisfies those constraints. The buffer type is ereased in the struct itself through boxing and dynamic dispatch, in order to simplify other signatures that mention `OwningDirstateMap`. Differential Revision: https://phab.mercurial-scm.org/D11396
author Simon Sapin <simon.sapin@octobus.net>
date Thu, 09 Sep 2021 18:07:40 +0200
parents rust/hg-cpython/src/dirstate/owning.rs@78f7f0d490ee
children 37a41267d000
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/dirstate_tree/owning.rs	Thu Sep 09 18:07:40 2021 +0200
@@ -0,0 +1,105 @@
+use super::dirstate_map::DirstateMap;
+use stable_deref_trait::StableDeref;
+use std::ops::Deref;
+
+/// Keep a `DirstateMap<'on_disk>` next to the `on_disk` buffer that it
+/// borrows.
+///
+/// This is similar to [`OwningRef`] which is more limited because it
+/// represents exactly one `&T` reference next to the value it borrows, as
+/// opposed to a struct that may contain an arbitrary number of references in
+/// arbitrarily-nested data structures.
+///
+/// [`OwningRef`]: https://docs.rs/owning_ref/0.4.1/owning_ref/struct.OwningRef.html
+pub struct OwningDirstateMap {
+    /// Owned handle to a bytes buffer with a stable address.
+    ///
+    /// See <https://docs.rs/owning_ref/0.4.1/owning_ref/trait.StableAddress.html>.
+    on_disk: Box<dyn Deref<Target = [u8]> + Send>,
+
+    /// Pointer for `Box<DirstateMap<'on_disk>>`, typed-erased because the
+    /// language cannot represent a lifetime referencing a sibling field.
+    /// This is not quite a self-referencial struct (moving this struct is not
+    /// a problem as it doesn’t change the address of the bytes buffer owned
+    /// by `PyBytes`) but touches similar borrow-checker limitations.
+    ptr: *mut (),
+}
+
+impl OwningDirstateMap {
+    pub fn new_empty<OnDisk>(on_disk: OnDisk) -> Self
+    where
+        OnDisk: Deref<Target = [u8]> + StableDeref + Send + 'static,
+    {
+        let on_disk = Box::new(on_disk);
+        let bytes: &'_ [u8] = &on_disk;
+        let map = DirstateMap::empty(bytes);
+
+        // Like in `bytes` above, this `'_` lifetime parameter borrows from
+        // the bytes buffer owned by `on_disk`.
+        let ptr: *mut DirstateMap<'_> = Box::into_raw(Box::new(map));
+
+        // Erase the pointed type entirely in order to erase the lifetime.
+        let ptr: *mut () = ptr.cast();
+
+        Self { on_disk, ptr }
+    }
+
+    pub fn get_mut_pair<'a>(
+        &'a mut self,
+    ) -> (&'a [u8], &'a mut DirstateMap<'a>) {
+        // SAFETY: We cast the type-erased pointer back to the same type it had
+        // in `new`, except with a different lifetime parameter. This time we
+        // connect the lifetime to that of `self`. This cast is valid because
+        // `self` owns the same `PyBytes` whose buffer `DirstateMap`
+        // references. That buffer has a stable memory address because the byte
+        // string value of a `PyBytes` is immutable.
+        let ptr: *mut DirstateMap<'a> = self.ptr.cast();
+        // SAFETY: we dereference that pointer, connecting the lifetime of the
+        // new   `&mut` to that of `self`. This is valid because the
+        // raw pointer is   to a boxed value, and `self` owns that box.
+        (&self.on_disk, unsafe { &mut *ptr })
+    }
+
+    pub fn get_mut<'a>(&'a mut self) -> &'a mut DirstateMap<'a> {
+        self.get_mut_pair().1
+    }
+
+    pub fn get<'a>(&'a self) -> &'a DirstateMap<'a> {
+        // SAFETY: same reasoning as in `get_mut` above.
+        let ptr: *mut DirstateMap<'a> = self.ptr.cast();
+        unsafe { &*ptr }
+    }
+
+    pub fn on_disk<'a>(&'a self) -> &'a [u8] {
+        &self.on_disk
+    }
+}
+
+impl Drop for OwningDirstateMap {
+    fn drop(&mut self) {
+        // Silence a "field is never read" warning, and demonstrate that this
+        // value is still alive.
+        let _ = &self.on_disk;
+        // SAFETY: this cast is the same as in `get_mut`, and is valid for the
+        // same reason. `self.on_disk` still exists at this point, drop glue
+        // will drop it implicitly after this `drop` method returns.
+        let ptr: *mut DirstateMap<'_> = self.ptr.cast();
+        // SAFETY: `Box::from_raw` takes ownership of the box away from `self`.
+        // This is fine because drop glue does nothig for `*mut ()` and we’re
+        // in `drop`, so `get` and `get_mut` cannot be called again.
+        unsafe { drop(Box::from_raw(ptr)) }
+    }
+}
+
+fn _static_assert_is_send<T: Send>() {}
+
+fn _static_assert_fields_are_send() {
+    _static_assert_is_send::<Box<DirstateMap<'_>>>();
+}
+
+// SAFETY: we don’t get this impl implicitly because `*mut (): !Send` because
+// thread-safety of raw pointers is unknown in the general case. However this
+// particular raw pointer represents a `Box<DirstateMap<'on_disk>>` that we
+// own. Since that `Box` is `Send` as shown in above, it is sound to mark
+// this struct as `Send` too.
+unsafe impl Send for OwningDirstateMap {}