Mercurial > public > mercurial-scm > hg
changeset 52789:34f44aa5e844
rust-pyo3-index: first mutating methods
This brings in `append()` and `__delitem__`, and is for us the
first validation of our inner mutability.
In the case of `__delitem__`, there was a tricky case where we
were tempted to use the higher level `PySlice::indices` API, with
a possible dangerous change of behaviour. We test it carefully from
the Python side.
author | Georges Racinet <georges.racinet@cloudcrane.io> |
---|---|
date | Wed, 25 Dec 2024 17:17:47 +0100 |
parents | e29e75e8328c |
children | 1b9907575768 |
files | rust/hg-pyo3/src/revlog/index.rs rust/hg-pyo3/src/revlog/mod.rs tests/test-rust-revlog.py |
diffstat | 3 files changed, 177 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-pyo3/src/revlog/index.rs Wed Dec 25 17:17:47 2024 +0100 @@ -0,0 +1,41 @@ +// revlog/index.rs +// +// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> +// 2020-2024 Raphaël Gomès <raphael.gomes@octobus.net> +// 2024 Georges Racinet <georges.racinet@cloudcrane.io> +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. +//! Utilities for dealing with the index at the Python boundary +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyTuple}; + +use hg::revlog::index::RevisionDataParams; + +pub fn py_tuple_to_revision_data_params( + tuple: &Bound<'_, PyTuple>, +) -> PyResult<RevisionDataParams> { + // no need to check length: in PyO3 tup.get_item() does return + // proper errors + let offset_or_flags: u64 = tuple.get_item(0)?.extract()?; + let node_id = tuple + .get_item(7)? + .downcast::<PyBytes>()? + .as_bytes() + .try_into() + .expect("nodeid should be set"); + let flags = (offset_or_flags & 0xFFFF) as u16; + let data_offset = offset_or_flags >> 16; + Ok(RevisionDataParams { + flags, + data_offset, + data_compressed_length: tuple.get_item(1)?.extract()?, + data_uncompressed_length: tuple.get_item(2)?.extract()?, + data_delta_base: tuple.get_item(3)?.extract()?, + link_rev: tuple.get_item(4)?.extract()?, + parent_rev_1: tuple.get_item(5)?.extract()?, + parent_rev_2: tuple.get_item(6)?.extract()?, + node_id, + ..Default::default() + }) +}
--- a/rust/hg-pyo3/src/revlog/mod.rs Wed Dec 25 17:17:40 2024 +0100 +++ b/rust/hg-pyo3/src/revlog/mod.rs Wed Dec 25 17:17:47 2024 +0100 @@ -9,7 +9,7 @@ #![allow(non_snake_case)] use pyo3::buffer::PyBuffer; use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyBytesMethods, PyList}; +use pyo3::types::{PyBytes, PyBytesMethods, PyList, PyTuple}; use pyo3_sharedref::PyShareable; use std::sync::{ @@ -21,13 +21,13 @@ revlog::{ index::Index, inner_revlog::InnerRevlog as CoreInnerRevlog, - nodemap::{NodeMap, NodeTree as CoreNodeTree}, + nodemap::{NodeMap, NodeMapError, NodeTree as CoreNodeTree}, options::RevlogOpenOptions, RevlogIndex, RevlogType, }, utils::files::get_path_from_bytes, vfs::FnCacheVfs, - BaseRevision, Revision, + BaseRevision, Revision, UncheckedRevision, }; use crate::{ @@ -43,6 +43,8 @@ mod config; use config::*; +mod index; +use index::py_tuple_to_revision_data_params; #[pyclass] #[allow(dead_code)] @@ -225,6 +227,66 @@ }) } + /// append an index entry + fn _index_append( + slf: &Bound<'_, Self>, + tup: &Bound<'_, PyTuple>, + ) -> PyResult<()> { + // no need to check length: in PyO3 tup.get_item() does return + // proper errors + let node_bytes = tup.get_item(7)?.extract()?; + let node = node_from_py_bytes(&node_bytes)?; + + Self::with_index_nt_write(slf, |idx, nt| { + let rev = idx.len() as BaseRevision; + // This is ok since we will immediately add the revision to the + // index + let rev = Revision(rev); + idx.append(py_tuple_to_revision_data_params(tup)?) + .map_err(revlog_error_from_msg)?; + + nt.insert(idx, &node, rev).map_err(nodemap_error)?; + Ok(()) + }) + } + + /// Removes one or several entries from the index. + /// + /// Historically, on the Mercurial revlog index, `__delitem__` has always + /// been both for `del idx[r1]` and `del idx[r1:r2]`. In both cases, + /// all entries starting from `r1` are removed anyway. + fn _index___delitem__( + slf: &Bound<'_, Self>, + arg: &Bound<'_, PyAny>, + ) -> PyResult<()> { + let start = if let Ok(rev) = arg.extract() { + UncheckedRevision(rev) + } else { + // here we could downcast to `PySlice` and use `indices()`, *but* + // the rust-cpython based version could not do that, and + // `indices()` does some resolving that makes it not equivalent, + // e.g., `idx[-1::]` has `start=0`. As we are currently in + // transition, we keep it the old way (hoping it was consistent + // with the C index). + let start = arg.getattr("start")?; + UncheckedRevision(start.extract()?) + }; + + Self::with_index_nt_write(slf, |idx, nt| { + // In the case of a slice, the check is possibly already done by + // `slice.indices`, which is itself an FFI wrapper for CPython's + // `PySlice_GetIndicesEx` + // (Python integration tests will tell us) + let start = idx.check_revision(start).ok_or_else(|| { + nodemap_error(NodeMapError::RevisionNotInIndex(start)) + })?; + idx.remove(start).map_err(revlog_error_from_msg)?; + nt.invalidate_all(); + Self::fill_nodemap(idx, nt)?; + Ok(()) + }) + } + fn _index___len__(slf: &Bound<'_, Self>) -> PyResult<usize> { Self::with_index_read(slf, |idx| Ok(idx.len())) } @@ -258,7 +320,6 @@ f(&self_ref, guard) } - #[allow(dead_code)] /// Take the lock on `slf.irl` for writing and call a closure. /// /// See [`Self::with_core_read`] for more explanations. @@ -310,7 +371,6 @@ }) } - #[allow(dead_code)] fn with_index_nt_write<T>( slf: &Bound<'_, Self>, f: impl FnOnce(&mut Index, &mut CoreNodeTree) -> PyResult<T>,
--- a/tests/test-rust-revlog.py Wed Dec 25 17:17:40 2024 +0100 +++ b/tests/test-rust-revlog.py Wed Dec 25 17:17:47 2024 +0100 @@ -28,6 +28,8 @@ node0 = node_bin(node_hex0) bogus_node_hex = b'cafe' * 10 bogus_node = node_bin(bogus_node_hex) + node_hex2 = b"020a0ec626a192ae360b0269fe2de5ba6f05d1e7" + node2 = node_bin(node_hex2) def test_index_nodemap(self): idx = self.parserustindex() @@ -50,6 +52,75 @@ idx = self.parserustindex() self.assertEqual(len(idx), 4) + def test_index_append(self): + idx = self.parserustindex(data=b'') + self.assertEqual(len(idx), 0) + self.assertIsNone(idx.get_rev(self.node0)) + + # this is the same first entry as in data provided by base test class + # (we do not have __getitem__ in the PyO3 version yet) + idx.append((0, 82969, 484626, 0, 0, -1, -1, self.node0, 0, 0, 2, 2, -1)) + self.assertEqual(len(idx), 1) + self.assertEqual(idx.get_rev(self.node0), 0) + + def test_index_delitem_single(self): + idx = self.parserustindex() + del idx[2] + self.assertEqual(len(idx), 2) + + # the nodetree is consistent + self.assertEqual(idx.get_rev(self.node0), 0) + self.assertIsNone(idx.get_rev(self.node2)) + + # not an error and does nothing + del idx[-1] + self.assertEqual(len(idx), 2) + + for bogus in (-2, 17): + try: + del idx[bogus] + except ValueError as exc: + # this underlines that we should do better with this message + assert exc.args[0] == ( + f"Inconsistency: Revision {bogus} found in nodemap " + "is not in revlog index" + ) + else: + raise AssertionError( + f"an exception was expected for `del idx[{bogus}]`" + ) + + def test_index_delitem_slice(self): + idx = self.parserustindex() + del idx[2:3] + self.assertEqual(len(idx), 2) + + # not an error and not equivalent to `del idx[0::]` but to + # `del idx[-1]` instead and thus does nothing. + del idx[-1::] + self.assertEqual(len(idx), 2) + + for start, stop in ( + (-2, None), + (17, None), + ): + try: + del idx[start:stop] + except ValueError as exc: + # this underlines that we should do better with this message + assert exc.args[0] == ( + f"Inconsistency: Revision {start} found in nodemap " + "is not in revlog index" + ) + else: + raise AssertionError( + f"an exception was expected for `del idx[{start}:{stop}]`" + ) + + # although the upper bound is way too big, this is not an error: + del idx[0::17] + self.assertEqual(len(idx), 0) + # Conditional skipping done by the base class class RustInnerRevlogTest(