rust-pyo3: implementation of LazyAncestors
There is a difference in the implementaion of `__contains__`
between PyO3 and rust-cpython: if the specified signature in
Rust code is for a precise type (e.g., `PyRevision`) rust-cpython
would automatically convert the potential resulting `TypeError` into
`Ok(false)`, whereas PyO3 let it bubble up.
Hence we treat the case manually and add it to the common test.
In Mercurial Python code, `None in` for a `LazyAncestors` object can
really happens, namely in this lambda from `discover._postprocessobsolete`:
```
ispushed = lambda n: torev(n) in futurecommon
```
This lambda can get called with `n` such that `torev(n)` is `False`
(seen in `test-bookmarks-push-pull.t`).
--- a/rust/hg-pyo3/src/ancestors.rs Sat Dec 07 18:42:06 2024 +0100
+++ b/rust/hg-pyo3/src/ancestors.rs Sat Dec 07 18:54:31 2024 +0100
@@ -13,11 +13,14 @@
use std::sync::RwLock;
-use vcsgraph::lazy_ancestors::AncestorsIterator as VCGAncestorsIterator;
+use vcsgraph::lazy_ancestors::{
+ AncestorsIterator as VCGAncestorsIterator,
+ LazyAncestors as VCGLazyAncestors,
+};
use crate::convert_cpython::{
- proxy_index_extract, proxy_index_py_leak, py_leaked_borrow_mut,
- py_leaked_or_map_err,
+ proxy_index_extract, proxy_index_py_leak, py_leaked_borrow,
+ py_leaked_borrow_mut, py_leaked_or_map_err,
};
use crate::exceptions::{map_lock_error, GraphError};
use crate::revision::{rev_pyiter_collect, PyRevision};
@@ -77,11 +80,93 @@
}
}
+#[pyclass(sequence)]
+struct LazyAncestors {
+ inner: RwLock<UnsafePyLeaked<VCGLazyAncestors<PySharedIndex>>>,
+ proxy_index: PyObject,
+ initrevs: PyObject,
+ stoprev: PyRevision,
+ inclusive: bool,
+}
+
+#[pymethods]
+impl LazyAncestors {
+ #[new]
+ fn new(
+ index_proxy: &Bound<'_, PyAny>,
+ initrevs: &Bound<'_, PyAny>,
+ stoprev: PyRevision,
+ inclusive: bool,
+ ) -> PyResult<Self> {
+ let cloned_proxy = index_proxy.clone().unbind();
+ let initvec: Vec<_> = {
+ // Safety: we don't leak the "faked" reference out of
+ // `UnsafePyLeaked`
+ let borrowed_idx = unsafe { proxy_index_extract(index_proxy)? };
+ rev_pyiter_collect(initrevs, borrowed_idx)?
+ };
+ let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?;
+ // Safety: we don't leak the "faked" reference out of
+ // `UnsafePyLeaked`
+ let res_lazy = unsafe {
+ leaked_idx.map(py, |idx| {
+ VCGLazyAncestors::new(
+ idx,
+ initvec.into_iter().map(|r| r.0),
+ stoprev.0,
+ inclusive,
+ )
+ })
+ };
+ let lazy =
+ py_leaked_or_map_err(py, res_lazy, GraphError::from_vcsgraph)?;
+ Ok(Self {
+ inner: lazy.into(),
+ proxy_index: cloned_proxy,
+ initrevs: initrevs.clone().unbind(),
+ stoprev,
+ inclusive,
+ })
+ }
+
+ fn __bool__(slf: PyRef<'_, Self>) -> PyResult<bool> {
+ let leaked = slf.inner.read().map_err(map_lock_error)?;
+ // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
+ let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?;
+ Ok(!inner.is_empty())
+ }
+
+ fn __contains__(
+ slf: PyRefMut<'_, Self>,
+ obj: &Bound<'_, PyAny>,
+ ) -> PyResult<bool> {
+ PyRevision::extract_bound(obj).map_or(Ok(false), |rev| {
+ let mut leaked = slf.inner.write().map_err(map_lock_error)?;
+ // Safety: we don't leak the "faked" reference out of
+ // `UnsafePyLeaked`
+ let mut inner =
+ unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?;
+ inner.contains(rev.0).map_err(GraphError::from_vcsgraph)
+ })
+ }
+
+ fn __iter__(slf: PyRef<'_, Self>) -> PyResult<AncestorsIterator> {
+ let py = slf.py();
+ AncestorsIterator::new(
+ slf.proxy_index.clone_ref(py).bind(py),
+ slf.initrevs.clone_ref(py).bind(py),
+ slf.stoprev,
+ slf.inclusive,
+ )
+ }
+}
+
pub fn init_module<'py>(
py: Python<'py>,
package: &str,
) -> PyResult<Bound<'py, PyModule>> {
let m = new_submodule(py, package, "ancestor")?;
m.add_class::<AncestorsIterator>()?;
+ m.add_class::<LazyAncestors>()?;
Ok(m)
}
--- a/rust/hg-pyo3/src/convert_cpython.rs Sat Dec 07 18:42:06 2024 +0100
+++ b/rust/hg-pyo3/src/convert_cpython.rs Sat Dec 07 18:54:31 2024 +0100
@@ -222,7 +222,6 @@
/// static reference. It is possible, depending on `T` that such a leak cannot
/// occur in practice. We may later on define a marker trait for this,
/// which will allow us to make declare this function to be safe.
-#[allow(dead_code)]
pub(crate) unsafe fn py_leaked_borrow<'a, 'py: 'a, T>(
py: &impl WithGIL<'py>,
leaked: &'a cpython::UnsafePyLeaked<T>,
--- a/tests/test-rust-ancestor.py Sat Dec 07 18:42:06 2024 +0100
+++ b/tests/test-rust-ancestor.py Sat Dec 07 18:54:31 2024 +0100
@@ -68,6 +68,49 @@
ait = AncestorsIterator(idx, [3], 0, False)
self.assertEqual([r for r in ait], [2, 1, 0])
+ ait = AncestorsIterator(idx, [3], 0, False)
+ # tainting the index with a mutation, let's see what happens
+ # (should be more critical with AncestorsIterator)
+ del idx[0:2]
+ try:
+ next(ait)
+ except RuntimeError as exc:
+ assert "leaked reference after mutation" in exc.args[0]
+ else:
+ raise AssertionError("Expected an exception")
+
+ def testlazyancestors(self):
+ LazyAncestors = self.ancestors_mod().LazyAncestors
+
+ idx = self.parserustindex()
+ start_count = sys.getrefcount(idx.inner) # should be 2 (see Python doc)
+ self.assertEqual(
+ {i: (r[5], r[6]) for i, r in enumerate(idx)},
+ {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
+ )
+ lazy = LazyAncestors(idx, [3], 0, True)
+ # the LazyAncestors instance holds just one reference to the
+ # inner revlog. TODO check that this is normal
+ self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
+
+ self.assertTrue(2 in lazy)
+ self.assertTrue(bool(lazy))
+ self.assertFalse(None in lazy)
+ self.assertEqual(list(lazy), [3, 2, 1, 0])
+ # a second time to validate that we spawn new iterators
+ self.assertEqual(list(lazy), [3, 2, 1, 0])
+
+ # now let's watch the refcounts closer
+ ait = iter(lazy)
+ self.assertEqual(sys.getrefcount(idx.inner), start_count + 2)
+ del ait
+ self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
+ del lazy
+ self.assertEqual(sys.getrefcount(idx.inner), start_count)
+
+ # let's check bool for an empty one
+ self.assertFalse(LazyAncestors(idx, [0], 0, False))
+
def testrefcount(self):
AncestorsIterator = self.ancestors_mod().AncestorsIterator
@@ -135,37 +178,6 @@
):
rustext_pkg = rustext
- def testlazyancestors(self):
- LazyAncestors = self.ancestors_mod().LazyAncestors
-
- idx = self.parserustindex()
- start_count = sys.getrefcount(idx.inner) # should be 2 (see Python doc)
- self.assertEqual(
- {i: (r[5], r[6]) for i, r in enumerate(idx)},
- {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)},
- )
- lazy = LazyAncestors(idx, [3], 0, True)
- # the LazyAncestors instance holds just one reference to the
- # inner revlog.
- self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
-
- self.assertTrue(2 in lazy)
- self.assertTrue(bool(lazy))
- self.assertEqual(list(lazy), [3, 2, 1, 0])
- # a second time to validate that we spawn new iterators
- self.assertEqual(list(lazy), [3, 2, 1, 0])
-
- # now let's watch the refcounts closer
- ait = iter(lazy)
- self.assertEqual(sys.getrefcount(idx.inner), start_count + 2)
- del ait
- self.assertEqual(sys.getrefcount(idx.inner), start_count + 1)
- del lazy
- self.assertEqual(sys.getrefcount(idx.inner), start_count)
-
- # let's check bool for an empty one
- self.assertFalse(LazyAncestors(idx, [0], 0, False))
-
def testmissingancestors(self):
MissingAncestors = self.ancestors_mod().MissingAncestors