Mercurial > public > mercurial-scm > hg
view rust/hg-pyo3/src/revlog/mod.rs @ 52788:e29e75e8328c
rust-pyo3-revlog: index and notetree writing accessors
It works in the same way as their reading counterparts, given that
we are leveraging the inner mutability of the `PyShareable` for the index
and of the main object for the nodetree.
author | Georges Racinet <georges.racinet@cloudcrane.io> |
---|---|
date | Wed, 25 Dec 2024 17:17:40 +0100 |
parents | e5f89bd1a5ee |
children | 34f44aa5e844 |
line wrap: on
line source
// revlog.rs // // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> // 2020-2024 Raphaël Gomès <raphael.gomes@octobus.net> // 2024 Georges Racinet <georges.racinet@cloudcrane.io> // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. #![allow(non_snake_case)] use pyo3::buffer::PyBuffer; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyBytesMethods, PyList}; use pyo3_sharedref::PyShareable; use std::sync::{ atomic::{AtomicUsize, Ordering}, RwLock, RwLockReadGuard, RwLockWriteGuard, }; use hg::{ revlog::{ index::Index, inner_revlog::InnerRevlog as CoreInnerRevlog, nodemap::{NodeMap, NodeTree as CoreNodeTree}, options::RevlogOpenOptions, RevlogIndex, RevlogType, }, utils::files::get_path_from_bytes, vfs::FnCacheVfs, BaseRevision, Revision, }; use crate::{ exceptions::{ map_lock_error, map_try_lock_error, nodemap_error, revlog_error_bare, revlog_error_from_msg, }, node::{node_from_py_bytes, node_prefix_from_py_bytes, py_node_for_rev}, revision::PyRevision, store::PyFnCache, util::{new_submodule, take_buffer_with_slice}, }; mod config; use config::*; #[pyclass] #[allow(dead_code)] struct InnerRevlog { irl: PyShareable<CoreInnerRevlog>, nt: RwLock<Option<CoreNodeTree>>, docket: Option<PyObject>, // Holds a reference to the mmap'ed persistent nodemap data nodemap_mmap: Option<PyBuffer<u8>>, // Holds a reference to the mmap'ed persistent index data index_mmap: Option<PyBuffer<u8>>, revision_cache: Option<PyObject>, head_revs_py_list: Option<Py<PyList>>, head_node_ids_py_list: Option<Py<PyList>>, use_persistent_nodemap: bool, nodemap_queries: AtomicUsize, } #[pymethods] impl InnerRevlog { #[new] // The Python side has authority on this signature. #[allow(clippy::too_many_arguments)] fn new( vfs_base: &Bound<'_, PyBytes>, fncache: &Bound<'_, PyAny>, vfs_is_readonly: bool, index_data: &Bound<'_, PyAny>, index_file: &Bound<'_, PyBytes>, data_file: &Bound<'_, PyBytes>, sidedata_file: &Bound<'_, PyAny>, inline: bool, data_config: &Bound<'_, PyAny>, delta_config: &Bound<'_, PyAny>, feature_config: &Bound<'_, PyAny>, chunk_cache: &Bound<'_, PyAny>, default_compression_header: &Bound<'_, PyAny>, revlog_type: usize, use_persistent_nodemap: bool, ) -> PyResult<Self> { // Let clippy accept the unused arguments. This is a bit better than // a blank `allow` directive let _ = sidedata_file; let _ = chunk_cache; let _ = default_compression_header; let index_file = get_path_from_bytes(index_file.as_bytes()).to_owned(); let data_file = get_path_from_bytes(data_file.as_bytes()).to_owned(); let revlog_type = RevlogType::try_from(revlog_type) .map_err(revlog_error_from_msg)?; let data_config = extract_data_config(data_config, revlog_type)?; let delta_config = extract_delta_config(delta_config, revlog_type)?; let feature_config = extract_feature_config(feature_config, revlog_type)?; let options = RevlogOpenOptions::new( inline, data_config, delta_config, feature_config, ); // Safety: we keep the buffer around inside the returned instance as // `index_mmap` let (buf, bytes) = unsafe { take_buffer_with_slice(index_data)? }; let index = Index::new(bytes, options.index_header()) .map_err(revlog_error_from_msg)?; let base = get_path_from_bytes(vfs_base.as_bytes()).to_owned(); let core = CoreInnerRevlog::new( Box::new(FnCacheVfs::new( base, vfs_is_readonly, Box::new(PyFnCache::new(fncache.clone().unbind())), )), index, index_file, data_file, data_config, delta_config, feature_config, ); Ok(Self { irl: core.into(), nt: None.into(), docket: None, nodemap_mmap: None, index_mmap: buf.into(), head_revs_py_list: None, head_node_ids_py_list: None, revision_cache: None, use_persistent_nodemap, nodemap_queries: AtomicUsize::new(0), }) } // // -- forwarded index methods -- // fn _index_get_rev( slf: &Bound<'_, Self>, node: &Bound<'_, PyBytes>, ) -> PyResult<Option<PyRevision>> { let node = node_from_py_bytes(node)?; // Do not rewrite this with `Self::with_index_nt_read`: it makes // inconditionally a volatile nodetree, and that is not the intent // here: the code below specifically avoids that. Self::with_core_read(slf, |self_ref, irl| { let idx = &irl.index; let prev_queries = self_ref.nodemap_queries.fetch_add(1, Ordering::Relaxed); // Filelogs have no persistent nodemaps and are often small, // use a brute force lookup from the end // backwards. If there is a very large filelog // (automation file that changes every // commit etc.), it also seems to work quite well for // all measured purposes so far. if !self_ref.use_persistent_nodemap && prev_queries <= 3 { return Ok(idx .rev_from_node_no_persistent_nodemap(node.into()) .ok() .map(Into::into)); } let opt = self_ref.get_nodetree(idx)?.read().map_err(map_lock_error)?; let nt = opt.as_ref().expect("nodetree should be set"); let rust_rev = nt.find_bin(idx, node.into()).map_err(nodemap_error)?; Ok(rust_rev.map(Into::into)) }) } /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if /// node is not found. /// /// No need to repeat `node` in the exception, `mercurial/revlog.py` /// will catch and rewrap with it fn _index_rev( slf: &Bound<'_, Self>, node: &Bound<'_, PyBytes>, ) -> PyResult<PyRevision> { Self::_index_get_rev(slf, node)?.ok_or_else(revlog_error_bare) } /// return True if the node exist in the index fn _index_has_node( slf: &Bound<'_, Self>, node: &Bound<'_, PyBytes>, ) -> PyResult<bool> { Self::_index_get_rev(slf, node).map(|opt| opt.is_some()) } /// find length of shortest hex nodeid of a binary ID fn _index_shortest( slf: &Bound<'_, Self>, node: &Bound<'_, PyBytes>, ) -> PyResult<usize> { Self::with_index_nt_read(slf, |idx, nt| { match nt.unique_prefix_len_node(idx, &node_from_py_bytes(node)?) { Ok(Some(l)) => Ok(l), Ok(None) => Err(revlog_error_bare()), Err(e) => Err(nodemap_error(e)), } }) } fn _index_partialmatch<'py>( slf: &Bound<'py, Self>, node: &Bound<'py, PyBytes>, ) -> PyResult<Option<Bound<'py, PyBytes>>> { Self::with_index_nt_read(slf, |idx, nt| { Ok(nt .find_bin(idx, node_prefix_from_py_bytes(node)?) .map_err(nodemap_error)? .map(|rev| py_node_for_rev(slf.py(), idx, rev))) }) } fn _index___len__(slf: &Bound<'_, Self>) -> PyResult<usize> { Self::with_index_read(slf, |idx| Ok(idx.len())) } } impl InnerRevlog { /// Take the lock on `slf.irl` for reading and call a closure. /// /// This serves the purpose to keep the needed intermediate [`PyRef`] /// that must be obtained to access the data from the [`Bound`] reference /// and of which the locked [`CoreInnerRevlog`] depends. /// This also provides releasing of the [`PyRef`] as soon as the closure /// is done, which is crucial if the caller needs to obtain a [`PyRefMut`] /// later on. /// /// In the closure, we hand back the intermediate [`PyRef`] that /// has been generated so that the closure can access more attributes. fn with_core_read<'py, T>( slf: &Bound<'py, Self>, f: impl FnOnce( &PyRef<'py, Self>, RwLockReadGuard<CoreInnerRevlog>, ) -> PyResult<T>, ) -> PyResult<T> { let self_ref = slf.borrow(); // Safety: the owner is the right one. We will anyway // not actually `share` it. Perhaps pyo3-sharedref should provide // something less scary for this kind of usage. let shareable_ref = unsafe { self_ref.irl.borrow_with_owner(slf) }; let guard = shareable_ref.try_read().map_err(map_try_lock_error)?; f(&self_ref, guard) } #[allow(dead_code)] /// Take the lock on `slf.irl` for writing and call a closure. /// /// See [`Self::with_core_read`] for more explanations. fn with_core_write<'py, T>( slf: &Bound<'py, Self>, f: impl FnOnce( &PyRef<'py, Self>, RwLockWriteGuard<CoreInnerRevlog>, ) -> PyResult<T>, ) -> PyResult<T> { let self_ref = slf.borrow(); // Safety: the owner is the right one. We will anyway // not actually `share` it. Perhaps pyo3-sharedref should provide // something less scary for this kind of usage. let shareable_ref = unsafe { self_ref.irl.borrow_with_owner(slf) }; let guard = shareable_ref.try_write().map_err(map_try_lock_error)?; f(&self_ref, guard) } fn with_index_read<T>( slf: &Bound<'_, Self>, f: impl FnOnce(&Index) -> PyResult<T>, ) -> PyResult<T> { Self::with_core_read(slf, |_, guard| f(&guard.index)) } #[allow(dead_code)] fn with_index_write<T>( slf: &Bound<'_, Self>, f: impl FnOnce(&mut Index) -> PyResult<T>, ) -> PyResult<T> { Self::with_core_write(slf, |_, mut guard| f(&mut guard.index)) } /// Lock `slf` for reading and execute a closure on its [`Index`] and /// [`NodeTree`] /// /// The [`NodeTree`] is initialized an filled before hand if needed. fn with_index_nt_read<T>( slf: &Bound<'_, Self>, f: impl FnOnce(&Index, &CoreNodeTree) -> PyResult<T>, ) -> PyResult<T> { Self::with_core_read(slf, |self_ref, guard| { let idx = &guard.index; let nt = self_ref.get_nodetree(idx)?.read().map_err(map_lock_error)?; let nt = nt.as_ref().expect("nodetree should be set"); f(idx, nt) }) } #[allow(dead_code)] fn with_index_nt_write<T>( slf: &Bound<'_, Self>, f: impl FnOnce(&mut Index, &mut CoreNodeTree) -> PyResult<T>, ) -> PyResult<T> { Self::with_core_write(slf, |self_ref, mut guard| { let idx = &mut guard.index; let mut nt = self_ref .get_nodetree(idx)? .write() .map_err(map_lock_error)?; let nt = nt.as_mut().expect("nodetree should be set"); f(idx, nt) }) } /// Fill a [`CoreNodeTree`] by doing a full iteration on the given /// [`Index`] /// /// # Python exceptions /// Raises `ValueError` if `nt` has existing data that is inconsistent /// with `idx`. fn fill_nodemap(idx: &Index, nt: &mut CoreNodeTree) -> PyResult<()> { for r in 0..idx.len() { let rev = Revision(r as BaseRevision); // in this case node() won't ever return None nt.insert(idx, idx.node(rev).expect("node should exist"), rev) .map_err(nodemap_error)? } Ok(()) } /// Return a working NodeTree of this InnerRevlog /// /// In case the NodeTree has not been initialized yet (in particular /// not from persistent data at instantiation), it is created and /// filled right away from the index. /// /// Technically, the returned NodeTree is still behind the lock of /// the `nt` field, hence still wrapped in an [`Option`]. Callers /// will need to take the lock and unwrap with `expect()`. /// /// # Python exceptions /// The case mentioned in [`Self::fill_nodemap()`] cannot happen, as the /// NodeTree is empty when it is called. fn get_nodetree( &self, idx: &Index, ) -> PyResult<&RwLock<Option<CoreNodeTree>>> { if self.nt.read().map_err(map_lock_error)?.is_none() { let readonly = Box::<Vec<_>>::default(); let mut nt = CoreNodeTree::load_bytes(readonly, 0); Self::fill_nodemap(idx, &mut nt)?; self.nt.write().map_err(map_lock_error)?.replace(nt); } Ok(&self.nt) } } pub fn init_module<'py>( py: Python<'py>, package: &str, ) -> PyResult<Bound<'py, PyModule>> { let m = new_submodule(py, package, "revlog")?; m.add_class::<InnerRevlog>()?; Ok(m) }