changeset 52560:6b694bdf752a

rust-pyo3: implementation of LazyAncestors There is a difference in the implementaion of `__contains__` between PyO3 and rust-cpython: if the specified signature in Rust code is for a precise type (e.g., `PyRevision`) rust-cpython would automatically convert the potential resulting `TypeError` into `Ok(false)`, whereas PyO3 let it bubble up. Hence we treat the case manually and add it to the common test. In Mercurial Python code, `None in` for a `LazyAncestors` object can really happens, namely in this lambda from `discover._postprocessobsolete`: ``` ispushed = lambda n: torev(n) in futurecommon ``` This lambda can get called with `n` such that `torev(n)` is `False` (seen in `test-bookmarks-push-pull.t`).
author Georges Racinet <georges.racinet@cloudcrane.io>
date Sat, 07 Dec 2024 18:54:31 +0100
parents 3ffcdbf0b432
children 9af0330788a5
files rust/hg-pyo3/src/ancestors.rs rust/hg-pyo3/src/convert_cpython.rs tests/test-rust-ancestor.py
diffstat 3 files changed, 131 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-pyo3/src/ancestors.rs	Sat Dec 07 18:42:06 2024 +0100
+++ b/rust/hg-pyo3/src/ancestors.rs	Sat Dec 07 18:54:31 2024 +0100
@@ -13,11 +13,14 @@
 
 use std::sync::RwLock;
 
-use vcsgraph::lazy_ancestors::AncestorsIterator as VCGAncestorsIterator;
+use vcsgraph::lazy_ancestors::{
+    AncestorsIterator as VCGAncestorsIterator,
+    LazyAncestors as VCGLazyAncestors,
+};
 
 use crate::convert_cpython::{
-    proxy_index_extract, proxy_index_py_leak, py_leaked_borrow_mut,
-    py_leaked_or_map_err,
+    proxy_index_extract, proxy_index_py_leak, py_leaked_borrow,
+    py_leaked_borrow_mut, py_leaked_or_map_err,
 };
 use crate::exceptions::{map_lock_error, GraphError};
 use crate::revision::{rev_pyiter_collect, PyRevision};
@@ -77,11 +80,93 @@
     }
 }
 
+#[pyclass(sequence)]
+struct LazyAncestors {
+    inner: RwLock<UnsafePyLeaked<VCGLazyAncestors<PySharedIndex>>>,
+    proxy_index: PyObject,
+    initrevs: PyObject,
+    stoprev: PyRevision,
+    inclusive: bool,
+}
+
+#[pymethods]
+impl LazyAncestors {
+    #[new]
+    fn new(
+        index_proxy: &Bound<'_, PyAny>,
+        initrevs: &Bound<'_, PyAny>,
+        stoprev: PyRevision,
+        inclusive: bool,
+    ) -> PyResult<Self> {
+        let cloned_proxy = index_proxy.clone().unbind();
+        let initvec: Vec<_> = {
+            // Safety: we don't leak the "faked" reference out of
+            // `UnsafePyLeaked`
+            let borrowed_idx = unsafe { proxy_index_extract(index_proxy)? };
+            rev_pyiter_collect(initrevs, borrowed_idx)?
+        };
+        let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
+        // Safety: we don't leak the "faked" reference out of
+        // `UnsafePyLeaked`
+        let res_lazy = unsafe {
+            leaked_idx.map(py, |idx| {
+                VCGLazyAncestors::new(
+                    idx,
+                    initvec.into_iter().map(|r| r.0),
+                    stoprev.0,
+                    inclusive,
+                )
+            })
+        };
+        let lazy =
+            py_leaked_or_map_err(py, res_lazy, GraphError::from_vcsgraph)?;
+        Ok(Self {
+            inner: lazy.into(),
+            proxy_index: cloned_proxy,
+            initrevs: initrevs.clone().unbind(),
+            stoprev,
+            inclusive,
+        })
+    }
+
+    fn __bool__(slf: PyRef<'_, Self>) -> PyResult<bool> {
+        let leaked = slf.inner.read().map_err(map_lock_error)?;
+        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+        let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
+        Ok(!inner.is_empty())
+    }
+
+    fn __contains__(
+        slf: PyRefMut<'_, Self>,
+        obj: &Bound<'_, PyAny>,
+    ) -> PyResult<bool> {
+        PyRevision::extract_bound(obj).map_or(Ok(false), |rev| {
+            let mut leaked = slf.inner.write().map_err(map_lock_error)?;
+            // Safety: we don't leak the "faked" reference out of
+            // `UnsafePyLeaked`
+            let mut inner =
+                unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
+            inner.contains(rev.0).map_err(GraphError::from_vcsgraph)
+        })
+    }
+
+    fn __iter__(slf: PyRef<'_, Self>) -> PyResult<AncestorsIterator> {
+        let py = slf.py();
+        AncestorsIterator::new(
+            slf.proxy_index.clone_ref(py).bind(py),
+            slf.initrevs.clone_ref(py).bind(py),
+            slf.stoprev,
+            slf.inclusive,
+        )
+    }
+}
+
 pub fn init_module<'py>(
     py: Python<'py>,
     package: &str,
 ) -> PyResult<Bound<'py, PyModule>> {
     let m = new_submodule(py, package, "ancestor")?;
     m.add_class::<AncestorsIterator>()?;
+    m.add_class::<LazyAncestors>()?;
     Ok(m)
 }
--- a/rust/hg-pyo3/src/convert_cpython.rs	Sat Dec 07 18:42:06 2024 +0100
+++ b/rust/hg-pyo3/src/convert_cpython.rs	Sat Dec 07 18:54:31 2024 +0100
@@ -222,7 +222,6 @@
 /// static reference. It is possible, depending on `T` that such a leak cannot
 /// occur in practice. We may later on define a marker trait for this,
 /// which will allow us to make declare this function to be safe.
-#[allow(dead_code)]
 pub(crate) unsafe fn py_leaked_borrow<'a, 'py: 'a, T>(
     py: &impl WithGIL<'py>,
     leaked: &'a cpython::UnsafePyLeaked<T>,
--- a/tests/test-rust-ancestor.py	Sat Dec 07 18:42:06 2024 +0100
+++ b/tests/test-rust-ancestor.py	Sat Dec 07 18:54:31 2024 +0100
@@ -68,6 +68,49 @@
         ait = AncestorsIterator(idx, [3], 0, False)
         self.assertEqual([r for r in ait], [2, 1, 0])
 
+        ait = AncestorsIterator(idx, [3], 0, False)
+        # tainting the index with a mutation, let's see what happens
+        # (should be more critical with AncestorsIterator)
+        del idx[0:2]
+        try:
+            next(ait)
+        except RuntimeError as exc:
+            assert "leaked reference after mutation" in exc.args[0]
+        else:
+            raise AssertionError("Expected an exception")
+
+    def testlazyancestors(self):
+        LazyAncestors = self.ancestors_mod().LazyAncestors
+
+        idx = self.parserustindex()
+        start_count = sys.getrefcount(idx.inner)  # should be 2 (see Python doc)
+        self.assertEqual(
+            {i: (r[5], r[6]) for i, r in enumerate(idx)},
+            {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
+        )
+        lazy = LazyAncestors(idx, [3], 0, True)
+        # the LazyAncestors instance holds just one reference to the
+        # inner revlog. TODO check that this is normal
+        self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
+
+        self.assertTrue(2 in lazy)
+        self.assertTrue(bool(lazy))
+        self.assertFalse(None in lazy)
+        self.assertEqual(list(lazy), [3, 2, 1, 0])
+        # a second time to validate that we spawn new iterators
+        self.assertEqual(list(lazy), [3, 2, 1, 0])
+
+        # now let's watch the refcounts closer
+        ait = iter(lazy)
+        self.assertEqual(sys.getrefcount(idx.inner), start_count + 2)
+        del ait
+        self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
+        del lazy
+        self.assertEqual(sys.getrefcount(idx.inner), start_count)
+
+        # let's check bool for an empty one
+        self.assertFalse(LazyAncestors(idx, [0], 0, False))
+
     def testrefcount(self):
         AncestorsIterator = self.ancestors_mod().AncestorsIterator
 
@@ -135,37 +178,6 @@
 ):
     rustext_pkg = rustext
 
-    def testlazyancestors(self):
-        LazyAncestors = self.ancestors_mod().LazyAncestors
-
-        idx = self.parserustindex()
-        start_count = sys.getrefcount(idx.inner)  # should be 2 (see Python doc)
-        self.assertEqual(
-            {i: (r[5], r[6]) for i, r in enumerate(idx)},
-            {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
-        )
-        lazy = LazyAncestors(idx, [3], 0, True)
-        # the LazyAncestors instance holds just one reference to the
-        # inner revlog.
-        self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
-
-        self.assertTrue(2 in lazy)
-        self.assertTrue(bool(lazy))
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-        # a second time to validate that we spawn new iterators
-        self.assertEqual(list(lazy), [3, 2, 1, 0])
-
-        # now let's watch the refcounts closer
-        ait = iter(lazy)
-        self.assertEqual(sys.getrefcount(idx.inner), start_count + 2)
-        del ait
-        self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
-        del lazy
-        self.assertEqual(sys.getrefcount(idx.inner), start_count)
-
-        # let's check bool for an empty one
-        self.assertFalse(LazyAncestors(idx, [0], 0, False))
-
     def testmissingancestors(self):
         MissingAncestors = self.ancestors_mod().MissingAncestors