Mercurial > public > mercurial-scm > hg-stable
changeset 52560:6b694bdf752a
rust-pyo3: implementation of LazyAncestors
There is a difference in the implementaion of `__contains__`
between PyO3 and rust-cpython: if the specified signature in
Rust code is for a precise type (e.g., `PyRevision`) rust-cpython
would automatically convert the potential resulting `TypeError` into
`Ok(false)`, whereas PyO3 let it bubble up.
Hence we treat the case manually and add it to the common test.
In Mercurial Python code, `None in` for a `LazyAncestors` object can
really happens, namely in this lambda from `discover._postprocessobsolete`:
```
ispushed = lambda n: torev(n) in futurecommon
```
This lambda can get called with `n` such that `torev(n)` is `False`
(seen in `test-bookmarks-push-pull.t`).
author | Georges Racinet <georges.racinet@cloudcrane.io> |
---|---|
date | Sat, 07 Dec 2024 18:54:31 +0100 |
parents | 3ffcdbf0b432 |
children | 9af0330788a5 |
files | rust/hg-pyo3/src/ancestors.rs rust/hg-pyo3/src/convert_cpython.rs tests/test-rust-ancestor.py |
diffstat | 3 files changed, 131 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-pyo3/src/ancestors.rs Sat Dec 07 18:42:06 2024 +0100 +++ b/rust/hg-pyo3/src/ancestors.rs Sat Dec 07 18:54:31 2024 +0100 @@ -13,11 +13,14 @@ use std::sync::RwLock; -use vcsgraph::lazy_ancestors::AncestorsIterator as VCGAncestorsIterator; +use vcsgraph::lazy_ancestors::{ + AncestorsIterator as VCGAncestorsIterator, + LazyAncestors as VCGLazyAncestors, +}; use crate::convert_cpython::{ - proxy_index_extract, proxy_index_py_leak, py_leaked_borrow_mut, - py_leaked_or_map_err, + proxy_index_extract, proxy_index_py_leak, py_leaked_borrow, + py_leaked_borrow_mut, py_leaked_or_map_err, }; use crate::exceptions::{map_lock_error, GraphError}; use crate::revision::{rev_pyiter_collect, PyRevision}; @@ -77,11 +80,93 @@ } } +#[pyclass(sequence)] +struct LazyAncestors { + inner: RwLock<UnsafePyLeaked<VCGLazyAncestors<PySharedIndex>>>, + proxy_index: PyObject, + initrevs: PyObject, + stoprev: PyRevision, + inclusive: bool, +} + +#[pymethods] +impl LazyAncestors { + #[new] + fn new( + index_proxy: &Bound<'_, PyAny>, + initrevs: &Bound<'_, PyAny>, + stoprev: PyRevision, + inclusive: bool, + ) -> PyResult<Self> { + let cloned_proxy = index_proxy.clone().unbind(); + let initvec: Vec<_> = { + // Safety: we don't leak the "faked" reference out of + // `UnsafePyLeaked` + let borrowed_idx = unsafe { proxy_index_extract(index_proxy)? }; + rev_pyiter_collect(initrevs, borrowed_idx)? + }; + let (py, leaked_idx) = proxy_index_py_leak(index_proxy)?; + // Safety: we don't leak the "faked" reference out of + // `UnsafePyLeaked` + let res_lazy = unsafe { + leaked_idx.map(py, |idx| { + VCGLazyAncestors::new( + idx, + initvec.into_iter().map(|r| r.0), + stoprev.0, + inclusive, + ) + }) + }; + let lazy = + py_leaked_or_map_err(py, res_lazy, GraphError::from_vcsgraph)?; + Ok(Self { + inner: lazy.into(), + proxy_index: cloned_proxy, + initrevs: initrevs.clone().unbind(), + stoprev, + inclusive, + }) + } + + fn __bool__(slf: PyRef<'_, Self>) -> PyResult<bool> { + let leaked = slf.inner.read().map_err(map_lock_error)?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { py_leaked_borrow(&slf, &leaked) }?; + Ok(!inner.is_empty()) + } + + fn __contains__( + slf: PyRefMut<'_, Self>, + obj: &Bound<'_, PyAny>, + ) -> PyResult<bool> { + PyRevision::extract_bound(obj).map_or(Ok(false), |rev| { + let mut leaked = slf.inner.write().map_err(map_lock_error)?; + // Safety: we don't leak the "faked" reference out of + // `UnsafePyLeaked` + let mut inner = + unsafe { py_leaked_borrow_mut(&slf, &mut leaked) }?; + inner.contains(rev.0).map_err(GraphError::from_vcsgraph) + }) + } + + fn __iter__(slf: PyRef<'_, Self>) -> PyResult<AncestorsIterator> { + let py = slf.py(); + AncestorsIterator::new( + slf.proxy_index.clone_ref(py).bind(py), + slf.initrevs.clone_ref(py).bind(py), + slf.stoprev, + slf.inclusive, + ) + } +} + pub fn init_module<'py>( py: Python<'py>, package: &str, ) -> PyResult<Bound<'py, PyModule>> { let m = new_submodule(py, package, "ancestor")?; m.add_class::<AncestorsIterator>()?; + m.add_class::<LazyAncestors>()?; Ok(m) }
--- a/rust/hg-pyo3/src/convert_cpython.rs Sat Dec 07 18:42:06 2024 +0100 +++ b/rust/hg-pyo3/src/convert_cpython.rs Sat Dec 07 18:54:31 2024 +0100 @@ -222,7 +222,6 @@ /// static reference. It is possible, depending on `T` that such a leak cannot /// occur in practice. We may later on define a marker trait for this, /// which will allow us to make declare this function to be safe. -#[allow(dead_code)] pub(crate) unsafe fn py_leaked_borrow<'a, 'py: 'a, T>( py: &impl WithGIL<'py>, leaked: &'a cpython::UnsafePyLeaked<T>,
--- a/tests/test-rust-ancestor.py Sat Dec 07 18:42:06 2024 +0100 +++ b/tests/test-rust-ancestor.py Sat Dec 07 18:54:31 2024 +0100 @@ -68,6 +68,49 @@ ait = AncestorsIterator(idx, [3], 0, False) self.assertEqual([r for r in ait], [2, 1, 0]) + ait = AncestorsIterator(idx, [3], 0, False) + # tainting the index with a mutation, let's see what happens + # (should be more critical with AncestorsIterator) + del idx[0:2] + try: + next(ait) + except RuntimeError as exc: + assert "leaked reference after mutation" in exc.args[0] + else: + raise AssertionError("Expected an exception") + + def testlazyancestors(self): + LazyAncestors = self.ancestors_mod().LazyAncestors + + idx = self.parserustindex() + start_count = sys.getrefcount(idx.inner) # should be 2 (see Python doc) + self.assertEqual( + {i: (r[5], r[6]) for i, r in enumerate(idx)}, + {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)}, + ) + lazy = LazyAncestors(idx, [3], 0, True) + # the LazyAncestors instance holds just one reference to the + # inner revlog. TODO check that this is normal + self.assertEqual(sys.getrefcount(idx.inner), start_count + 1) + + self.assertTrue(2 in lazy) + self.assertTrue(bool(lazy)) + self.assertFalse(None in lazy) + self.assertEqual(list(lazy), [3, 2, 1, 0]) + # a second time to validate that we spawn new iterators + self.assertEqual(list(lazy), [3, 2, 1, 0]) + + # now let's watch the refcounts closer + ait = iter(lazy) + self.assertEqual(sys.getrefcount(idx.inner), start_count + 2) + del ait + self.assertEqual(sys.getrefcount(idx.inner), start_count + 1) + del lazy + self.assertEqual(sys.getrefcount(idx.inner), start_count) + + # let's check bool for an empty one + self.assertFalse(LazyAncestors(idx, [0], 0, False)) + def testrefcount(self): AncestorsIterator = self.ancestors_mod().AncestorsIterator @@ -135,37 +178,6 @@ ): rustext_pkg = rustext - def testlazyancestors(self): - LazyAncestors = self.ancestors_mod().LazyAncestors - - idx = self.parserustindex() - start_count = sys.getrefcount(idx.inner) # should be 2 (see Python doc) - self.assertEqual( - {i: (r[5], r[6]) for i, r in enumerate(idx)}, - {0: (-1, -1), 1: (0, -1), 2: (1, -1), 3: (2, -1)}, - ) - lazy = LazyAncestors(idx, [3], 0, True) - # the LazyAncestors instance holds just one reference to the - # inner revlog. - self.assertEqual(sys.getrefcount(idx.inner), start_count + 1) - - self.assertTrue(2 in lazy) - self.assertTrue(bool(lazy)) - self.assertEqual(list(lazy), [3, 2, 1, 0]) - # a second time to validate that we spawn new iterators - self.assertEqual(list(lazy), [3, 2, 1, 0]) - - # now let's watch the refcounts closer - ait = iter(lazy) - self.assertEqual(sys.getrefcount(idx.inner), start_count + 2) - del ait - self.assertEqual(sys.getrefcount(idx.inner), start_count + 1) - del lazy - self.assertEqual(sys.getrefcount(idx.inner), start_count) - - # let's check bool for an empty one - self.assertFalse(LazyAncestors(idx, [0], 0, False)) - def testmissingancestors(self): MissingAncestors = self.ancestors_mod().MissingAncestors