changeset 52789:34f44aa5e844

rust-pyo3-index: first mutating methods This brings in `append()` and `__delitem__`, and is for us the first validation of our inner mutability. In the case of `__delitem__`, there was a tricky case where we were tempted to use the higher level `PySlice::indices` API, with a possible dangerous change of behaviour. We test it carefully from the Python side.
author Georges Racinet <georges.racinet@cloudcrane.io>
date Wed, 25 Dec 2024 17:17:47 +0100
parents e29e75e8328c
children 1b9907575768
files rust/hg-pyo3/src/revlog/index.rs rust/hg-pyo3/src/revlog/mod.rs tests/test-rust-revlog.py
diffstat 3 files changed, 177 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-pyo3/src/revlog/index.rs	Wed Dec 25 17:17:47 2024 +0100
@@ -0,0 +1,41 @@
+// revlog/index.rs
+//
+// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
+//           2020-2024 Raphaël Gomès <raphael.gomes@octobus.net>
+//           2024 Georges Racinet <georges.racinet@cloudcrane.io>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+//! Utilities for dealing with the index at the Python boundary
+use pyo3::prelude::*;
+use pyo3::types::{PyBytes, PyTuple};
+
+use hg::revlog::index::RevisionDataParams;
+
+pub fn py_tuple_to_revision_data_params(
+    tuple: &Bound<'_, PyTuple>,
+) -> PyResult<RevisionDataParams> {
+    // no need to check length: in PyO3 tup.get_item() does return
+    // proper errors
+    let offset_or_flags: u64 = tuple.get_item(0)?.extract()?;
+    let node_id = tuple
+        .get_item(7)?
+        .downcast::<PyBytes>()?
+        .as_bytes()
+        .try_into()
+        .expect("nodeid should be set");
+    let flags = (offset_or_flags & 0xFFFF) as u16;
+    let data_offset = offset_or_flags >> 16;
+    Ok(RevisionDataParams {
+        flags,
+        data_offset,
+        data_compressed_length: tuple.get_item(1)?.extract()?,
+        data_uncompressed_length: tuple.get_item(2)?.extract()?,
+        data_delta_base: tuple.get_item(3)?.extract()?,
+        link_rev: tuple.get_item(4)?.extract()?,
+        parent_rev_1: tuple.get_item(5)?.extract()?,
+        parent_rev_2: tuple.get_item(6)?.extract()?,
+        node_id,
+        ..Default::default()
+    })
+}
--- a/rust/hg-pyo3/src/revlog/mod.rs	Wed Dec 25 17:17:40 2024 +0100
+++ b/rust/hg-pyo3/src/revlog/mod.rs	Wed Dec 25 17:17:47 2024 +0100
@@ -9,7 +9,7 @@
 #![allow(non_snake_case)]
 use pyo3::buffer::PyBuffer;
 use pyo3::prelude::*;
-use pyo3::types::{PyBytes, PyBytesMethods, PyList};
+use pyo3::types::{PyBytes, PyBytesMethods, PyList, PyTuple};
 use pyo3_sharedref::PyShareable;
 
 use std::sync::{
@@ -21,13 +21,13 @@
     revlog::{
         index::Index,
         inner_revlog::InnerRevlog as CoreInnerRevlog,
-        nodemap::{NodeMap, NodeTree as CoreNodeTree},
+        nodemap::{NodeMap, NodeMapError, NodeTree as CoreNodeTree},
         options::RevlogOpenOptions,
         RevlogIndex, RevlogType,
     },
     utils::files::get_path_from_bytes,
     vfs::FnCacheVfs,
-    BaseRevision, Revision,
+    BaseRevision, Revision, UncheckedRevision,
 };
 
 use crate::{
@@ -43,6 +43,8 @@
 
 mod config;
 use config::*;
+mod index;
+use index::py_tuple_to_revision_data_params;
 
 #[pyclass]
 #[allow(dead_code)]
@@ -225,6 +227,66 @@
         })
     }
 
+    /// append an index entry
+    fn _index_append(
+        slf: &Bound<'_, Self>,
+        tup: &Bound<'_, PyTuple>,
+    ) -> PyResult<()> {
+        // no need to check length: in PyO3 tup.get_item() does return
+        // proper errors
+        let node_bytes = tup.get_item(7)?.extract()?;
+        let node = node_from_py_bytes(&node_bytes)?;
+
+        Self::with_index_nt_write(slf, |idx, nt| {
+            let rev = idx.len() as BaseRevision;
+            // This is ok since we will immediately add the revision to the
+            // index
+            let rev = Revision(rev);
+            idx.append(py_tuple_to_revision_data_params(tup)?)
+                .map_err(revlog_error_from_msg)?;
+
+            nt.insert(idx, &node, rev).map_err(nodemap_error)?;
+            Ok(())
+        })
+    }
+
+    /// Removes one or several entries from the index.
+    ///
+    /// Historically, on the Mercurial revlog index, `__delitem__` has always
+    /// been both for `del idx[r1]` and `del idx[r1:r2]`. In both cases,
+    /// all entries starting from `r1` are removed anyway.
+    fn _index___delitem__(
+        slf: &Bound<'_, Self>,
+        arg: &Bound<'_, PyAny>,
+    ) -> PyResult<()> {
+        let start = if let Ok(rev) = arg.extract() {
+            UncheckedRevision(rev)
+        } else {
+            // here we could downcast to `PySlice` and use `indices()`, *but*
+            // the rust-cpython based version could not do that, and
+            // `indices()` does some resolving that makes it not equivalent,
+            // e.g., `idx[-1::]` has `start=0`. As we are currently in
+            // transition, we keep it the old way (hoping it was consistent
+            // with the C index).
+            let start = arg.getattr("start")?;
+            UncheckedRevision(start.extract()?)
+        };
+
+        Self::with_index_nt_write(slf, |idx, nt| {
+            // In the case of a slice, the check is possibly already done by
+            // `slice.indices`, which is itself an FFI wrapper for CPython's
+            // `PySlice_GetIndicesEx`
+            // (Python integration tests will tell us)
+            let start = idx.check_revision(start).ok_or_else(|| {
+                nodemap_error(NodeMapError::RevisionNotInIndex(start))
+            })?;
+            idx.remove(start).map_err(revlog_error_from_msg)?;
+            nt.invalidate_all();
+            Self::fill_nodemap(idx, nt)?;
+            Ok(())
+        })
+    }
+
     fn _index___len__(slf: &Bound<'_, Self>) -> PyResult<usize> {
         Self::with_index_read(slf, |idx| Ok(idx.len()))
     }
@@ -258,7 +320,6 @@
         f(&self_ref, guard)
     }
 
-    #[allow(dead_code)]
     /// Take the lock on `slf.irl` for writing and call a closure.
     ///
     /// See [`Self::with_core_read`] for more explanations.
@@ -310,7 +371,6 @@
         })
     }
 
-    #[allow(dead_code)]
     fn with_index_nt_write<T>(
         slf: &Bound<'_, Self>,
         f: impl FnOnce(&mut Index, &mut CoreNodeTree) -> PyResult<T>,
--- a/tests/test-rust-revlog.py	Wed Dec 25 17:17:40 2024 +0100
+++ b/tests/test-rust-revlog.py	Wed Dec 25 17:17:47 2024 +0100
@@ -28,6 +28,8 @@
     node0 = node_bin(node_hex0)
     bogus_node_hex = b'cafe' * 10
     bogus_node = node_bin(bogus_node_hex)
+    node_hex2 = b"020a0ec626a192ae360b0269fe2de5ba6f05d1e7"
+    node2 = node_bin(node_hex2)
 
     def test_index_nodemap(self):
         idx = self.parserustindex()
@@ -50,6 +52,75 @@
         idx = self.parserustindex()
         self.assertEqual(len(idx), 4)
 
+    def test_index_append(self):
+        idx = self.parserustindex(data=b'')
+        self.assertEqual(len(idx), 0)
+        self.assertIsNone(idx.get_rev(self.node0))
+
+        # this is the same first entry as in data provided by base test class
+        # (we do not have __getitem__ in the PyO3 version yet)
+        idx.append((0, 82969, 484626, 0, 0, -1, -1, self.node0, 0, 0, 2, 2, -1))
+        self.assertEqual(len(idx), 1)
+        self.assertEqual(idx.get_rev(self.node0), 0)
+
+    def test_index_delitem_single(self):
+        idx = self.parserustindex()
+        del idx[2]
+        self.assertEqual(len(idx), 2)
+
+        # the nodetree is consistent
+        self.assertEqual(idx.get_rev(self.node0), 0)
+        self.assertIsNone(idx.get_rev(self.node2))
+
+        # not an error and does nothing
+        del idx[-1]
+        self.assertEqual(len(idx), 2)
+
+        for bogus in (-2, 17):
+            try:
+                del idx[bogus]
+            except ValueError as exc:
+                # this underlines that we should do better with this message
+                assert exc.args[0] == (
+                    f"Inconsistency: Revision {bogus} found in nodemap "
+                    "is not in revlog index"
+                )
+            else:
+                raise AssertionError(
+                    f"an exception was expected for `del idx[{bogus}]`"
+                )
+
+    def test_index_delitem_slice(self):
+        idx = self.parserustindex()
+        del idx[2:3]
+        self.assertEqual(len(idx), 2)
+
+        # not an error and not equivalent to `del idx[0::]` but to
+        # `del idx[-1]` instead and thus does nothing.
+        del idx[-1::]
+        self.assertEqual(len(idx), 2)
+
+        for start, stop in (
+            (-2, None),
+            (17, None),
+        ):
+            try:
+                del idx[start:stop]
+            except ValueError as exc:
+                # this underlines that we should do better with this message
+                assert exc.args[0] == (
+                    f"Inconsistency: Revision {start} found in nodemap "
+                    "is not in revlog index"
+                )
+            else:
+                raise AssertionError(
+                    f"an exception was expected for `del idx[{start}:{stop}]`"
+                )
+
+        # although the upper bound is way too big, this is not an error:
+        del idx[0::17]
+        self.assertEqual(len(idx), 0)
+
 
 # Conditional skipping done by the base class
 class RustInnerRevlogTest(