changeset 52786:4e34e8fd46d4

rust-pyo3-revlog: nodemap based index methods They are rather straightforward, except for `_index_get_rev` that takes care of not initializing a nodetree on some conditions. In `_index_partialmatch`, we solve the todo that was in the `hg-cpython`, since we introduced the `py_node_for_rev` helper earlier. The new test method in `test-rust-revlog.py` provides comparison with the `hg-cpython` implementation of `InnerRevlog`.
author Georges Racinet <georges.racinet@cloudcrane.io>
date Wed, 25 Dec 2024 16:16:22 +0100
parents 71ebe880f24a
children e5f89bd1a5ee
files rust/hg-pyo3/src/exceptions.rs rust/hg-pyo3/src/node.rs rust/hg-pyo3/src/revlog/mod.rs tests/test-rust-revlog.py
diffstat 4 files changed, 122 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-pyo3/src/exceptions.rs	Wed Dec 25 11:58:56 2024 +0100
+++ b/rust/hg-pyo3/src/exceptions.rs	Wed Dec 25 16:16:22 2024 +0100
@@ -54,6 +54,10 @@
     mercurial_py_errors::RevlogError::new_err(e.to_string().into_bytes())
 }
 
+pub fn revlog_error_bare() -> PyErr {
+    mercurial_py_errors::RevlogError::new_err((None::<String>,))
+}
+
 pub fn nodemap_error(err: NodeMapError) -> PyErr {
     match err {
         NodeMapError::MultipleResults => {
--- a/rust/hg-pyo3/src/node.rs	Wed Dec 25 11:58:56 2024 +0100
+++ b/rust/hg-pyo3/src/node.rs	Wed Dec 25 16:16:22 2024 +0100
@@ -1,4 +1,3 @@
-#![allow(dead_code)]
 use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::types::PyBytes;
--- a/rust/hg-pyo3/src/revlog/mod.rs	Wed Dec 25 11:58:56 2024 +0100
+++ b/rust/hg-pyo3/src/revlog/mod.rs	Wed Dec 25 16:16:22 2024 +0100
@@ -11,12 +11,17 @@
 use pyo3::types::{PyBytes, PyBytesMethods, PyList};
 use pyo3_sharedref::PyShareable;
 
-use std::sync::{atomic::AtomicUsize, RwLock, RwLockReadGuard};
+use std::sync::{
+    atomic::{AtomicUsize, Ordering},
+    RwLock, RwLockReadGuard,
+};
 
 use hg::{
     revlog::{
-        index::Index, inner_revlog::InnerRevlog as CoreInnerRevlog,
-        nodemap::NodeTree as CoreNodeTree, options::RevlogOpenOptions,
+        index::Index,
+        inner_revlog::InnerRevlog as CoreInnerRevlog,
+        nodemap::{NodeMap, NodeTree as CoreNodeTree},
+        options::RevlogOpenOptions,
         RevlogIndex, RevlogType,
     },
     utils::files::get_path_from_bytes,
@@ -26,9 +31,11 @@
 
 use crate::{
     exceptions::{
-        map_lock_error, map_try_lock_error, nodemap_error,
+        map_lock_error, map_try_lock_error, nodemap_error, revlog_error_bare,
         revlog_error_from_msg,
     },
+    node::{node_from_py_bytes, node_prefix_from_py_bytes, py_node_for_rev},
+    revision::PyRevision,
     store::PyFnCache,
     util::{new_submodule, take_buffer_with_slice},
 };
@@ -129,6 +136,93 @@
             nodemap_queries: AtomicUsize::new(0),
         })
     }
+
+    //
+    // -- forwarded index methods --
+    //
+
+    fn _index_get_rev(
+        slf: &Bound<'_, Self>,
+        node: &Bound<'_, PyBytes>,
+    ) -> PyResult<Option<PyRevision>> {
+        let node = node_from_py_bytes(node)?;
+
+        // Do not rewrite this with `Self::with_index_nt_read`: it makes
+        // inconditionally a volatile nodetree, and that is not the intent
+        // here: the code below specifically avoids that.
+        Self::with_core_read(slf, |self_ref, irl| {
+            let idx = &irl.index;
+
+            let prev_queries =
+                self_ref.nodemap_queries.fetch_add(1, Ordering::Relaxed);
+            // Filelogs have no persistent nodemaps and are often small,
+            // use a brute force lookup from the end
+            // backwards. If there is a very large filelog
+            // (automation file that changes every
+            // commit etc.), it also seems to work quite well for
+            // all measured purposes so far.
+            if !self_ref.use_persistent_nodemap && prev_queries <= 3 {
+                return Ok(idx
+                    .rev_from_node_no_persistent_nodemap(node.into())
+                    .ok()
+                    .map(Into::into));
+            }
+
+            let opt =
+                self_ref.get_nodetree(idx)?.read().map_err(map_lock_error)?;
+            let nt = opt.as_ref().expect("nodetree should be set");
+
+            let rust_rev =
+                nt.find_bin(idx, node.into()).map_err(nodemap_error)?;
+            Ok(rust_rev.map(Into::into))
+        })
+    }
+
+    /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if
+    /// node is not found.
+    ///
+    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
+    /// will catch and rewrap with it
+    fn _index_rev(
+        slf: &Bound<'_, Self>,
+        node: &Bound<'_, PyBytes>,
+    ) -> PyResult<PyRevision> {
+        Self::_index_get_rev(slf, node)?.ok_or_else(revlog_error_bare)
+    }
+
+    /// return True if the node exist in the index
+    fn _index_has_node(
+        slf: &Bound<'_, Self>,
+        node: &Bound<'_, PyBytes>,
+    ) -> PyResult<bool> {
+        Self::_index_get_rev(slf, node).map(|opt| opt.is_some())
+    }
+
+    /// find length of shortest hex nodeid of a binary ID
+    fn _index_shortest(
+        slf: &Bound<'_, Self>,
+        node: &Bound<'_, PyBytes>,
+    ) -> PyResult<usize> {
+        Self::with_index_nt_read(slf, |idx, nt| {
+            match nt.unique_prefix_len_node(idx, &node_from_py_bytes(node)?) {
+                Ok(Some(l)) => Ok(l),
+                Ok(None) => Err(revlog_error_bare()),
+                Err(e) => Err(nodemap_error(e)),
+            }
+        })
+    }
+
+    fn _index_partialmatch<'py>(
+        slf: &Bound<'py, Self>,
+        node: &Bound<'py, PyBytes>,
+    ) -> PyResult<Option<Bound<'py, PyBytes>>> {
+        Self::with_index_nt_read(slf, |idx, nt| {
+            Ok(nt
+                .find_bin(idx, node_prefix_from_py_bytes(node)?)
+                .map_err(nodemap_error)?
+                .map(|rev| py_node_for_rev(slf.py(), idx, rev)))
+        })
+    }
 }
 
 impl InnerRevlog {
@@ -171,7 +265,6 @@
     /// [`NodeTree`]
     ///
     /// The [`NodeTree`] is initialized an filled before hand if needed.
-    #[allow(dead_code)]
     fn with_index_nt_read<T>(
         slf: &Bound<'_, Self>,
         f: impl FnOnce(&Index, &CoreNodeTree) -> PyResult<T>,
@@ -214,7 +307,6 @@
     /// # Python exceptions
     /// The case mentioned in [`Self::fill_nodemap()`] cannot happen, as the
     /// NodeTree is empty when it is called.
-    #[allow(dead_code)]
     fn get_nodetree(
         &self,
         idx: &Index,
--- a/tests/test-rust-revlog.py	Wed Dec 25 11:58:56 2024 +0100
+++ b/tests/test-rust-revlog.py	Wed Dec 25 16:16:22 2024 +0100
@@ -4,6 +4,7 @@
     bin as node_bin,
     hex,
 )
+from mercurial import error
 
 try:
     from mercurial import rustext
@@ -25,6 +26,25 @@
 
     node_hex0 = b'd1f4bbb0befc13bd8cd39d0fcdd93b8c078c4a2f'
     node0 = node_bin(node_hex0)
+    bogus_node_hex = b'cafe' * 10
+    bogus_node = node_bin(bogus_node_hex)
+
+    def test_index_nodemap(self):
+        idx = self.parserustindex()
+        self.assertTrue(idx.has_node(self.node0))
+        self.assertFalse(idx.has_node(self.bogus_node))
+
+        self.assertEqual(idx.get_rev(self.node0), 0)
+        self.assertEqual(idx.get_rev(self.node0), 0)
+
+        self.assertEqual(idx.rev(self.node0), 0)
+        with self.assertRaises(error.RevlogError) as exc_info:
+            idx.rev(self.bogus_node)
+        self.assertEqual(exc_info.exception.args, (None,))
+
+        self.assertEqual(idx.partialmatch(self.node_hex0[:3]), self.node0)
+        self.assertIsNone(idx.partialmatch(self.bogus_node_hex[:3]))
+        self.assertEqual(idx.shortest(self.node0), 1)
 
 
 # Conditional skipping done by the base class