rust-pyo3-index: first mutating methods
This brings in `append()` and `__delitem__`, and is for us the
first validation of our inner mutability.
In the case of `__delitem__`, there was a tricky case where we
were tempted to use the higher level `PySlice::indices` API, with
a possible dangerous change of behaviour. We test it carefully from
the Python side.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-pyo3/src/revlog/index.rs Wed Dec 25 17:17:47 2024 +0100
@@ -0,0 +1,41 @@
+// revlog/index.rs
+//
+// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
+// 2020-2024 Raphaël Gomès <raphael.gomes@octobus.net>
+// 2024 Georges Racinet <georges.racinet@cloudcrane.io>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+//! Utilities for dealing with the index at the Python boundary
+use pyo3::prelude::*;
+use pyo3::types::{PyBytes, PyTuple};
+
+use hg::revlog::index::RevisionDataParams;
+
+pub fn py_tuple_to_revision_data_params(
+ tuple: &Bound<'_, PyTuple>,
+) -> PyResult<RevisionDataParams> {
+ // no need to check length: in PyO3 tup.get_item() does return
+ // proper errors
+ let offset_or_flags: u64 = tuple.get_item(0)?.extract()?;
+ let node_id = tuple
+ .get_item(7)?
+ .downcast::<PyBytes>()?
+ .as_bytes()
+ .try_into()
+ .expect("nodeid should be set");
+ let flags = (offset_or_flags & 0xFFFF) as u16;
+ let data_offset = offset_or_flags >> 16;
+ Ok(RevisionDataParams {
+ flags,
+ data_offset,
+ data_compressed_length: tuple.get_item(1)?.extract()?,
+ data_uncompressed_length: tuple.get_item(2)?.extract()?,
+ data_delta_base: tuple.get_item(3)?.extract()?,
+ link_rev: tuple.get_item(4)?.extract()?,
+ parent_rev_1: tuple.get_item(5)?.extract()?,
+ parent_rev_2: tuple.get_item(6)?.extract()?,
+ node_id,
+ ..Default::default()
+ })
+}
--- a/rust/hg-pyo3/src/revlog/mod.rs Wed Dec 25 17:17:40 2024 +0100
+++ b/rust/hg-pyo3/src/revlog/mod.rs Wed Dec 25 17:17:47 2024 +0100
@@ -9,7 +9,7 @@
#![allow(non_snake_case)]
use pyo3::buffer::PyBuffer;
use pyo3::prelude::*;
-use pyo3::types::{PyBytes, PyBytesMethods, PyList};
+use pyo3::types::{PyBytes, PyBytesMethods, PyList, PyTuple};
use pyo3_sharedref::PyShareable;
use std::sync::{
@@ -21,13 +21,13 @@
revlog::{
index::Index,
inner_revlog::InnerRevlog as CoreInnerRevlog,
- nodemap::{NodeMap, NodeTree as CoreNodeTree},
+ nodemap::{NodeMap, NodeMapError, NodeTree as CoreNodeTree},
options::RevlogOpenOptions,
RevlogIndex, RevlogType,
},
utils::files::get_path_from_bytes,
vfs::FnCacheVfs,
- BaseRevision, Revision,
+ BaseRevision, Revision, UncheckedRevision,
};
use crate::{
@@ -43,6 +43,8 @@
mod config;
use config::*;
+mod index;
+use index::py_tuple_to_revision_data_params;
#[pyclass]
#[allow(dead_code)]
@@ -225,6 +227,66 @@
})
}
+ /// append an index entry
+ fn _index_append(
+ slf: &Bound<'_, Self>,
+ tup: &Bound<'_, PyTuple>,
+ ) -> PyResult<()> {
+ // no need to check length: in PyO3 tup.get_item() does return
+ // proper errors
+ let node_bytes = tup.get_item(7)?.extract()?;
+ let node = node_from_py_bytes(&node_bytes)?;
+
+ Self::with_index_nt_write(slf, |idx, nt| {
+ let rev = idx.len() as BaseRevision;
+ // This is ok since we will immediately add the revision to the
+ // index
+ let rev = Revision(rev);
+ idx.append(py_tuple_to_revision_data_params(tup)?)
+ .map_err(revlog_error_from_msg)?;
+
+ nt.insert(idx, &node, rev).map_err(nodemap_error)?;
+ Ok(())
+ })
+ }
+
+ /// Removes one or several entries from the index.
+ ///
+ /// Historically, on the Mercurial revlog index, `__delitem__` has always
+ /// been both for `del idx[r1]` and `del idx[r1:r2]`. In both cases,
+ /// all entries starting from `r1` are removed anyway.
+ fn _index___delitem__(
+ slf: &Bound<'_, Self>,
+ arg: &Bound<'_, PyAny>,
+ ) -> PyResult<()> {
+ let start = if let Ok(rev) = arg.extract() {
+ UncheckedRevision(rev)
+ } else {
+ // here we could downcast to `PySlice` and use `indices()`, *but*
+ // the rust-cpython based version could not do that, and
+ // `indices()` does some resolving that makes it not equivalent,
+ // e.g., `idx[-1::]` has `start=0`. As we are currently in
+ // transition, we keep it the old way (hoping it was consistent
+ // with the C index).
+ let start = arg.getattr("start")?;
+ UncheckedRevision(start.extract()?)
+ };
+
+ Self::with_index_nt_write(slf, |idx, nt| {
+ // In the case of a slice, the check is possibly already done by
+ // `slice.indices`, which is itself an FFI wrapper for CPython's
+ // `PySlice_GetIndicesEx`
+ // (Python integration tests will tell us)
+ let start = idx.check_revision(start).ok_or_else(|| {
+ nodemap_error(NodeMapError::RevisionNotInIndex(start))
+ })?;
+ idx.remove(start).map_err(revlog_error_from_msg)?;
+ nt.invalidate_all();
+ Self::fill_nodemap(idx, nt)?;
+ Ok(())
+ })
+ }
+
fn _index___len__(slf: &Bound<'_, Self>) -> PyResult<usize> {
Self::with_index_read(slf, |idx| Ok(idx.len()))
}
@@ -258,7 +320,6 @@
f(&self_ref, guard)
}
- #[allow(dead_code)]
/// Take the lock on `slf.irl` for writing and call a closure.
///
/// See [`Self::with_core_read`] for more explanations.
@@ -310,7 +371,6 @@
})
}
- #[allow(dead_code)]
fn with_index_nt_write<T>(
slf: &Bound<'_, Self>,
f: impl FnOnce(&mut Index, &mut CoreNodeTree) -> PyResult<T>,
--- a/tests/test-rust-revlog.py Wed Dec 25 17:17:40 2024 +0100
+++ b/tests/test-rust-revlog.py Wed Dec 25 17:17:47 2024 +0100
@@ -28,6 +28,8 @@
node0 = node_bin(node_hex0)
bogus_node_hex = b'cafe' * 10
bogus_node = node_bin(bogus_node_hex)
+ node_hex2 = b"020a0ec626a192ae360b0269fe2de5ba6f05d1e7"
+ node2 = node_bin(node_hex2)
def test_index_nodemap(self):
idx = self.parserustindex()
@@ -50,6 +52,75 @@
idx = self.parserustindex()
self.assertEqual(len(idx), 4)
+ def test_index_append(self):
+ idx = self.parserustindex(data=b'')
+ self.assertEqual(len(idx), 0)
+ self.assertIsNone(idx.get_rev(self.node0))
+
+ # this is the same first entry as in data provided by base test class
+ # (we do not have __getitem__ in the PyO3 version yet)
+ idx.append((0, 82969, 484626, 0, 0, -1, -1, self.node0, 0, 0, 2, 2, -1))
+ self.assertEqual(len(idx), 1)
+ self.assertEqual(idx.get_rev(self.node0), 0)
+
+ def test_index_delitem_single(self):
+ idx = self.parserustindex()
+ del idx[2]
+ self.assertEqual(len(idx), 2)
+
+ # the nodetree is consistent
+ self.assertEqual(idx.get_rev(self.node0), 0)
+ self.assertIsNone(idx.get_rev(self.node2))
+
+ # not an error and does nothing
+ del idx[-1]
+ self.assertEqual(len(idx), 2)
+
+ for bogus in (-2, 17):
+ try:
+ del idx[bogus]
+ except ValueError as exc:
+ # this underlines that we should do better with this message
+ assert exc.args[0] == (
+ f"Inconsistency: Revision {bogus} found in nodemap "
+ "is not in revlog index"
+ )
+ else:
+ raise AssertionError(
+ f"an exception was expected for `del idx[{bogus}]`"
+ )
+
+ def test_index_delitem_slice(self):
+ idx = self.parserustindex()
+ del idx[2:3]
+ self.assertEqual(len(idx), 2)
+
+ # not an error and not equivalent to `del idx[0::]` but to
+ # `del idx[-1]` instead and thus does nothing.
+ del idx[-1::]
+ self.assertEqual(len(idx), 2)
+
+ for start, stop in (
+ (-2, None),
+ (17, None),
+ ):
+ try:
+ del idx[start:stop]
+ except ValueError as exc:
+ # this underlines that we should do better with this message
+ assert exc.args[0] == (
+ f"Inconsistency: Revision {start} found in nodemap "
+ "is not in revlog index"
+ )
+ else:
+ raise AssertionError(
+ f"an exception was expected for `del idx[{start}:{stop}]`"
+ )
+
+ # although the upper bound is way too big, this is not an error:
+ del idx[0::17]
+ self.assertEqual(len(idx), 0)
+
# Conditional skipping done by the base class
class RustInnerRevlogTest(