Mercurial > public > mercurial-scm > hg
changeset 52786:4e34e8fd46d4
rust-pyo3-revlog: nodemap based index methods
They are rather straightforward, except for `_index_get_rev`
that takes care of not initializing a nodetree on some conditions.
In `_index_partialmatch`, we solve the todo that was in the
`hg-cpython`, since we introduced the `py_node_for_rev` helper earlier.
The new test method in `test-rust-revlog.py` provides comparison
with the `hg-cpython` implementation of `InnerRevlog`.
author | Georges Racinet <georges.racinet@cloudcrane.io> |
---|---|
date | Wed, 25 Dec 2024 16:16:22 +0100 |
parents | 71ebe880f24a |
children | e5f89bd1a5ee |
files | rust/hg-pyo3/src/exceptions.rs rust/hg-pyo3/src/node.rs rust/hg-pyo3/src/revlog/mod.rs tests/test-rust-revlog.py |
diffstat | 4 files changed, 122 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-pyo3/src/exceptions.rs Wed Dec 25 11:58:56 2024 +0100 +++ b/rust/hg-pyo3/src/exceptions.rs Wed Dec 25 16:16:22 2024 +0100 @@ -54,6 +54,10 @@ mercurial_py_errors::RevlogError::new_err(e.to_string().into_bytes()) } +pub fn revlog_error_bare() -> PyErr { + mercurial_py_errors::RevlogError::new_err((None::<String>,)) +} + pub fn nodemap_error(err: NodeMapError) -> PyErr { match err { NodeMapError::MultipleResults => {
--- a/rust/hg-pyo3/src/node.rs Wed Dec 25 11:58:56 2024 +0100 +++ b/rust/hg-pyo3/src/node.rs Wed Dec 25 16:16:22 2024 +0100 @@ -1,4 +1,3 @@ -#![allow(dead_code)] use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::PyBytes;
--- a/rust/hg-pyo3/src/revlog/mod.rs Wed Dec 25 11:58:56 2024 +0100 +++ b/rust/hg-pyo3/src/revlog/mod.rs Wed Dec 25 16:16:22 2024 +0100 @@ -11,12 +11,17 @@ use pyo3::types::{PyBytes, PyBytesMethods, PyList}; use pyo3_sharedref::PyShareable; -use std::sync::{atomic::AtomicUsize, RwLock, RwLockReadGuard}; +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + RwLock, RwLockReadGuard, +}; use hg::{ revlog::{ - index::Index, inner_revlog::InnerRevlog as CoreInnerRevlog, - nodemap::NodeTree as CoreNodeTree, options::RevlogOpenOptions, + index::Index, + inner_revlog::InnerRevlog as CoreInnerRevlog, + nodemap::{NodeMap, NodeTree as CoreNodeTree}, + options::RevlogOpenOptions, RevlogIndex, RevlogType, }, utils::files::get_path_from_bytes, @@ -26,9 +31,11 @@ use crate::{ exceptions::{ - map_lock_error, map_try_lock_error, nodemap_error, + map_lock_error, map_try_lock_error, nodemap_error, revlog_error_bare, revlog_error_from_msg, }, + node::{node_from_py_bytes, node_prefix_from_py_bytes, py_node_for_rev}, + revision::PyRevision, store::PyFnCache, util::{new_submodule, take_buffer_with_slice}, }; @@ -129,6 +136,93 @@ nodemap_queries: AtomicUsize::new(0), }) } + + // + // -- forwarded index methods -- + // + + fn _index_get_rev( + slf: &Bound<'_, Self>, + node: &Bound<'_, PyBytes>, + ) -> PyResult<Option<PyRevision>> { + let node = node_from_py_bytes(node)?; + + // Do not rewrite this with `Self::with_index_nt_read`: it makes + // inconditionally a volatile nodetree, and that is not the intent + // here: the code below specifically avoids that. + Self::with_core_read(slf, |self_ref, irl| { + let idx = &irl.index; + + let prev_queries = + self_ref.nodemap_queries.fetch_add(1, Ordering::Relaxed); + // Filelogs have no persistent nodemaps and are often small, + // use a brute force lookup from the end + // backwards. If there is a very large filelog + // (automation file that changes every + // commit etc.), it also seems to work quite well for + // all measured purposes so far. + if !self_ref.use_persistent_nodemap && prev_queries <= 3 { + return Ok(idx + .rev_from_node_no_persistent_nodemap(node.into()) + .ok() + .map(Into::into)); + } + + let opt = + self_ref.get_nodetree(idx)?.read().map_err(map_lock_error)?; + let nt = opt.as_ref().expect("nodetree should be set"); + + let rust_rev = + nt.find_bin(idx, node.into()).map_err(nodemap_error)?; + Ok(rust_rev.map(Into::into)) + }) + } + + /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if + /// node is not found. + /// + /// No need to repeat `node` in the exception, `mercurial/revlog.py` + /// will catch and rewrap with it + fn _index_rev( + slf: &Bound<'_, Self>, + node: &Bound<'_, PyBytes>, + ) -> PyResult<PyRevision> { + Self::_index_get_rev(slf, node)?.ok_or_else(revlog_error_bare) + } + + /// return True if the node exist in the index + fn _index_has_node( + slf: &Bound<'_, Self>, + node: &Bound<'_, PyBytes>, + ) -> PyResult<bool> { + Self::_index_get_rev(slf, node).map(|opt| opt.is_some()) + } + + /// find length of shortest hex nodeid of a binary ID + fn _index_shortest( + slf: &Bound<'_, Self>, + node: &Bound<'_, PyBytes>, + ) -> PyResult<usize> { + Self::with_index_nt_read(slf, |idx, nt| { + match nt.unique_prefix_len_node(idx, &node_from_py_bytes(node)?) { + Ok(Some(l)) => Ok(l), + Ok(None) => Err(revlog_error_bare()), + Err(e) => Err(nodemap_error(e)), + } + }) + } + + fn _index_partialmatch<'py>( + slf: &Bound<'py, Self>, + node: &Bound<'py, PyBytes>, + ) -> PyResult<Option<Bound<'py, PyBytes>>> { + Self::with_index_nt_read(slf, |idx, nt| { + Ok(nt + .find_bin(idx, node_prefix_from_py_bytes(node)?) + .map_err(nodemap_error)? + .map(|rev| py_node_for_rev(slf.py(), idx, rev))) + }) + } } impl InnerRevlog { @@ -171,7 +265,6 @@ /// [`NodeTree`] /// /// The [`NodeTree`] is initialized an filled before hand if needed. - #[allow(dead_code)] fn with_index_nt_read<T>( slf: &Bound<'_, Self>, f: impl FnOnce(&Index, &CoreNodeTree) -> PyResult<T>, @@ -214,7 +307,6 @@ /// # Python exceptions /// The case mentioned in [`Self::fill_nodemap()`] cannot happen, as the /// NodeTree is empty when it is called. - #[allow(dead_code)] fn get_nodetree( &self, idx: &Index,
--- a/tests/test-rust-revlog.py Wed Dec 25 11:58:56 2024 +0100 +++ b/tests/test-rust-revlog.py Wed Dec 25 16:16:22 2024 +0100 @@ -4,6 +4,7 @@ bin as node_bin, hex, ) +from mercurial import error try: from mercurial import rustext @@ -25,6 +26,25 @@ node_hex0 = b'd1f4bbb0befc13bd8cd39d0fcdd93b8c078c4a2f' node0 = node_bin(node_hex0) + bogus_node_hex = b'cafe' * 10 + bogus_node = node_bin(bogus_node_hex) + + def test_index_nodemap(self): + idx = self.parserustindex() + self.assertTrue(idx.has_node(self.node0)) + self.assertFalse(idx.has_node(self.bogus_node)) + + self.assertEqual(idx.get_rev(self.node0), 0) + self.assertEqual(idx.get_rev(self.node0), 0) + + self.assertEqual(idx.rev(self.node0), 0) + with self.assertRaises(error.RevlogError) as exc_info: + idx.rev(self.bogus_node) + self.assertEqual(exc_info.exception.args, (None,)) + + self.assertEqual(idx.partialmatch(self.node_hex0[:3]), self.node0) + self.assertIsNone(idx.partialmatch(self.bogus_node_hex[:3])) + self.assertEqual(idx.shortest(self.node0), 1) # Conditional skipping done by the base class