diff -r 13815c9decd4 -r 7346f93be7a4 rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs Wed Jun 19 17:03:13 2024 +0200 +++ b/rust/hg-cpython/src/revlog.rs Wed Jun 19 19:10:49 2024 +0200 @@ -4,32 +4,43 @@ // // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. +#![allow(non_snake_case)] use crate::{ conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, + pybytes_deref::{PyBufferDeref, PyBytesDeref}, utils::{node_from_py_bytes, node_from_py_object}, + vfs::PyVfs, PyRevision, }; use cpython::{ buffer::{Element, PyBuffer}, exc::{IndexError, ValueError}, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList, - PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python, + PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python, PythonObject, ToPyObject, UnsafePyLeaked, }; use hg::{ errors::HgError, - index::{ - IndexHeader, Phase, RevisionDataParams, SnapshotsCache, - INDEX_ENTRY_SIZE, + index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE}, + nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, + revlog::compression::CompressionConfig, + revlog::inner_revlog::InnerRevlog as CoreInnerRevlog, + revlog::inner_revlog::RevisionBuffer, + revlog::options::{ + RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, + RevlogOpenOptions, }, - nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, - BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION, + transaction::Transaction, + utils::files::{get_bytes_from_path, get_path_from_bytes}, + BaseRevision, Node, Revision, RevlogType, UncheckedRevision, + NULL_REVISION, }; use std::{ - cell::RefCell, + cell::{Cell, RefCell}, collections::{HashMap, HashSet}, + sync::OnceLock, }; use vcsgraph::graph::Graph as VCSGraph; @@ -41,12 +52,13 @@ /// Return a Struct implementing the Graph trait pub(crate) fn py_rust_index_to_graph( py: Python, - index: PyObject, + index_proxy: PyObject, ) -> PyResult> { - let midx = index.extract::(py)?; - let leaked = midx.index(py).leak_immutable(); + let inner_revlog = index_proxy.getattr(py, "inner")?; + let inner_revlog = inner_revlog.extract::(py)?; + let leaked = inner_revlog.inner(py).leak_immutable(); // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked` - Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) }) + Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: &idx.index }) }) } impl Clone for PySharedIndex { @@ -91,398 +103,6 @@ } } -py_class!(pub class Index |py| { - @shared data index: hg::index::Index; - data nt: RefCell>; - data docket: RefCell>; - // Holds a reference to the mmap'ed persistent nodemap data - data nodemap_mmap: RefCell>; - // Holds a reference to the mmap'ed persistent index data - data index_mmap: RefCell>; - data head_revs_py_list: RefCell>; - data head_node_ids_py_list: RefCell>; - - def __new__( - _cls, - data: PyObject, - default_header: u32, - ) -> PyResult { - Self::new(py, data, default_header) - } - - /// Compatibility layer used for Python consumers needing access to the C index - /// - /// Only use case so far is `scmutil.shortesthexnodeidprefix`, - /// that may need to build a custom `nodetree`, based on a specified revset. - /// With a Rust implementation of the nodemap, we will be able to get rid of - /// this, by exposing our own standalone nodemap class, - /// ready to accept `Index`. -/* def get_cindex(&self) -> PyResult { - Ok(self.cindex(py).borrow().inner().clone_ref(py)) - } -*/ - // Index API involving nodemap, as defined in mercurial/pure/parsers.py - - /// Return Revision if found, raises a bare `error.RevlogError` - /// in case of ambiguity, same as C version does - def get_rev(&self, node: PyBytes) -> PyResult> { - let opt = self.get_nodetree(py)?.borrow(); - let nt = opt.as_ref().unwrap(); - let ridx = &*self.index(py).borrow(); - let node = node_from_py_bytes(py, &node)?; - let rust_rev = - nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?; - Ok(rust_rev.map(Into::into)) - - } - - /// same as `get_rev()` but raises a bare `error.RevlogError` if node - /// is not found. - /// - /// No need to repeat `node` in the exception, `mercurial/revlog.py` - /// will catch and rewrap with it - def rev(&self, node: PyBytes) -> PyResult { - self.get_rev(py, node)?.ok_or_else(|| revlog_error(py)) - } - - /// return True if the node exist in the index - def has_node(&self, node: PyBytes) -> PyResult { - // TODO OPTIM we could avoid a needless conversion here, - // to do when scaffolding for pure Rust switch is removed, - // as `get_rev()` currently does the necessary assertions - self.get_rev(py, node).map(|opt| opt.is_some()) - } - - /// find length of shortest hex nodeid of a binary ID - def shortest(&self, node: PyBytes) -> PyResult { - let opt = self.get_nodetree(py)?.borrow(); - let nt = opt.as_ref().unwrap(); - let idx = &*self.index(py).borrow(); - match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) - { - Ok(Some(l)) => Ok(l), - Ok(None) => Err(revlog_error(py)), - Err(e) => Err(nodemap_error(py, e)), - } - } - - def partialmatch(&self, node: PyObject) -> PyResult> { - let opt = self.get_nodetree(py)?.borrow(); - let nt = opt.as_ref().unwrap(); - let idx = &*self.index(py).borrow(); - - let node_as_string = if cfg!(feature = "python3-sys") { - node.cast_as::(py)?.to_string(py)?.to_string() - } - else { - let node = node.extract::(py)?; - String::from_utf8_lossy(node.data(py)).to_string() - }; - - let prefix = NodePrefix::from_hex(&node_as_string) - .map_err(|_| PyErr::new::( - py, format!("Invalid node or prefix '{}'", node_as_string)) - )?; - - nt.find_bin(idx, prefix) - // TODO make an inner API returning the node directly - .map(|opt| opt.map( - |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) - .map_err(|e| nodemap_error(py, e)) - - } - - /// append an index entry - def append(&self, tup: PyTuple) -> PyResult { - if tup.len(py) < 8 { - // this is better than the panic promised by tup.get_item() - return Err( - PyErr::new::(py, "tuple index out of range")) - } - let node_bytes = tup.get_item(py, 7).extract(py)?; - let node = node_from_py_object(py, &node_bytes)?; - - let rev = self.len(py)? as BaseRevision; - - // This is ok since we will just add the revision to the index - let rev = Revision(rev); - self.index(py) - .borrow_mut() - .append(py_tuple_to_revision_data_params(py, tup)?) - .unwrap(); - let idx = &*self.index(py).borrow(); - self.get_nodetree(py)?.borrow_mut().as_mut().unwrap() - .insert(idx, &node, rev) - .map_err(|e| nodemap_error(py, e))?; - Ok(py.None()) - } - - def __delitem__(&self, key: PyObject) -> PyResult<()> { - // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` - let start = if let Ok(rev) = key.extract(py) { - UncheckedRevision(rev) - } else { - let start = key.getattr(py, "start")?; - UncheckedRevision(start.extract(py)?) - }; - let start = self.index(py) - .borrow() - .check_revision(start) - .ok_or_else(|| { - nodemap_error(py, NodeMapError::RevisionNotInIndex(start)) - })?; - self.index(py).borrow_mut().remove(start).unwrap(); - let mut opt = self.get_nodetree(py)?.borrow_mut(); - let nt = opt.as_mut().unwrap(); - nt.invalidate_all(); - self.fill_nodemap(py, nt)?; - Ok(()) - } - - // - // Index methods previously reforwarded to C index (tp_methods) - // Same ordering as in revlog.c - // - - /// return the gca set of the given revs - def ancestors(&self, *args, **_kw) -> PyResult { - let rust_res = self.inner_ancestors(py, args)?; - Ok(rust_res) - } - - /// return the heads of the common ancestors of the given revs - def commonancestorsheads(&self, *args, **_kw) -> PyResult { - let rust_res = self.inner_commonancestorsheads(py, args)?; - Ok(rust_res) - } - - /// Clear the index caches and inner py_class data. - /// It is Python's responsibility to call `update_nodemap_data` again. - def clearcaches(&self) -> PyResult { - self.nt(py).borrow_mut().take(); - self.docket(py).borrow_mut().take(); - self.nodemap_mmap(py).borrow_mut().take(); - self.head_revs_py_list(py).borrow_mut().take(); - self.head_node_ids_py_list(py).borrow_mut().take(); - self.index(py).borrow().clear_caches(); - Ok(py.None()) - } - - /// return the raw binary string representing a revision - def entry_binary(&self, *args, **_kw) -> PyResult { - let rindex = self.index(py).borrow(); - let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?); - let rust_bytes = rindex.check_revision(rev).and_then( - |r| rindex.entry_binary(r)) - .ok_or_else(|| rev_not_in_index(py, rev))?; - let rust_res = PyBytes::new(py, rust_bytes).into_object(); - Ok(rust_res) - } - - /// return a binary packed version of the header - def pack_header(&self, *args, **_kw) -> PyResult { - let rindex = self.index(py).borrow(); - let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?); - let rust_res = PyBytes::new(py, &packed).into_object(); - Ok(rust_res) - } - - /// compute phases - def computephasesmapsets(&self, *args, **_kw) -> PyResult { - let py_roots = args.get_item(py, 0).extract::(py)?; - let rust_res = self.inner_computephasesmapsets(py, py_roots)?; - Ok(rust_res) - } - - /// reachableroots - def reachableroots2(&self, *args, **_kw) -> PyResult { - let rust_res = self.inner_reachableroots2( - py, - UncheckedRevision(args.get_item(py, 0).extract(py)?), - args.get_item(py, 1), - args.get_item(py, 2), - args.get_item(py, 3).extract(py)?, - )?; - Ok(rust_res) - } - - /// get head revisions - def headrevs(&self, *args, **_kw) -> PyResult { - let (filtered_revs, stop_rev) = match &args.len(py) { - 0 => Ok((py.None(), py.None())), - 1 => Ok((args.get_item(py, 0), py.None())), - 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))), - _ => Err(PyErr::new::(py, "too many arguments")), - }?; - self.inner_headrevs(py, &filtered_revs, &stop_rev) - } - - /// get head nodeids - def head_node_ids(&self) -> PyResult { - let rust_res = self.inner_head_node_ids(py)?; - Ok(rust_res) - } - - /// get diff in head revisions - def headrevsdiff(&self, *args, **_kw) -> PyResult { - let rust_res = self.inner_headrevsdiff( - py, - &args.get_item(py, 0), - &args.get_item(py, 1))?; - Ok(rust_res) - } - - /// True if the object is a snapshot - def issnapshot(&self, *args, **_kw) -> PyResult { - let index = self.index(py).borrow(); - let result = index - .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?)) - .map_err(|e| { - PyErr::new::(py, e.to_string()) - })?; - Ok(result) - } - - /// Gather snapshot data in a cache dict - def findsnapshots(&self, *args, **_kw) -> PyResult { - let index = self.index(py).borrow(); - let cache: PyDict = args.get_item(py, 0).extract(py)?; - // this methods operates by setting new values in the cache, - // hence we will compare results by letting the C implementation - // operate over a deepcopy of the cache, and finally compare both - // caches. - let c_cache = PyDict::new(py); - for (k, v) in cache.items(py) { - c_cache.set_item(py, k, PySet::new(py, v)?)?; - } - - let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?); - let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?); - let mut cache_wrapper = PySnapshotsCache{ py, dict: cache }; - index.find_snapshots( - start_rev, - end_rev, - &mut cache_wrapper, - ).map_err(|_| revlog_error(py))?; - Ok(py.None()) - } - - /// determine revisions with deltas to reconstruct fulltext - def deltachain(&self, *args, **_kw) -> PyResult { - let index = self.index(py).borrow(); - let rev = args.get_item(py, 0).extract::(py)?.into(); - let stop_rev = - args.get_item(py, 1).extract::>(py)?; - let rev = index.check_revision(rev).ok_or_else(|| { - nodemap_error(py, NodeMapError::RevisionNotInIndex(rev)) - })?; - let stop_rev = if let Some(stop_rev) = stop_rev { - let stop_rev = UncheckedRevision(stop_rev); - Some(index.check_revision(stop_rev).ok_or_else(|| { - nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev)) - })?) - } else {None}; - let using_general_delta = args.get_item(py, 2) - .extract::>(py)? - .map(|i| i != 0); - let (chain, stopped) = index.delta_chain( - rev, stop_rev, using_general_delta - ).map_err(|e| { - PyErr::new::(py, e.to_string()) - })?; - - let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect(); - Ok( - PyTuple::new( - py, - &[ - chain.into_py_object(py).into_object(), - stopped.into_py_object(py).into_object() - ] - ).into_object() - ) - - } - - /// slice planned chunk read to reach a density threshold - def slicechunktodensity(&self, *args, **_kw) -> PyResult { - let rust_res = self.inner_slicechunktodensity( - py, - args.get_item(py, 0), - args.get_item(py, 1).extract(py)?, - args.get_item(py, 2).extract(py)? - )?; - Ok(rust_res) - } - - // index_sequence_methods and index_mapping_methods. - // - // Since we call back through the high level Python API, - // there's no point making a distinction between index_get - // and index_getitem. - // gracinet 2023: this above is no longer true for the pure Rust impl - - def __len__(&self) -> PyResult { - self.len(py) - } - - def __getitem__(&self, key: PyObject) -> PyResult { - let rust_res = self.inner_getitem(py, key.clone_ref(py))?; - Ok(rust_res) - } - - def __contains__(&self, item: PyObject) -> PyResult { - // ObjectProtocol does not seem to provide contains(), so - // this is an equivalent implementation of the index_contains() - // defined in revlog.c - match item.extract::(py) { - Ok(rev) => { - Ok(rev >= -1 && rev < self.len(py)? as BaseRevision) - } - Err(_) => { - let item_bytes: PyBytes = item.extract(py)?; - let rust_res = self.has_node(py, item_bytes)?; - Ok(rust_res) - } - } - } - - def nodemap_data_all(&self) -> PyResult { - self.inner_nodemap_data_all(py) - } - - def nodemap_data_incremental(&self) -> PyResult { - self.inner_nodemap_data_incremental(py) - } - def update_nodemap_data( - &self, - docket: PyObject, - nm_data: PyObject - ) -> PyResult { - self.inner_update_nodemap_data(py, docket, nm_data) - } - - @property - def entry_size(&self) -> PyResult { - let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py); - Ok(rust_res) - } - - @property - def rust_ext_compat(&self) -> PyResult { - // will be entirely removed when the Rust index yet useful to - // implement in Rust to detangle things when removing `self.cindex` - let rust_res: PyInt = 1.to_py_object(py); - Ok(rust_res) - } - - @property - def is_rust(&self) -> PyResult { - Ok(false.to_py_object(py)) - } - -}); - /// Take a (potentially) mmap'ed buffer, and return the underlying Python /// buffer along with the Rust slice into said buffer. We need to keep the /// Python buffer around, otherwise we'd get a dangling pointer once the buffer @@ -538,7 +158,7 @@ .extract::(py)? .data(py) .try_into() - .unwrap(); + .expect("nodeid should be set"); let flags = (offset_or_flags & 0xFFFF) as u16; let data_offset = offset_or_flags >> 16; Ok(RevisionDataParams { @@ -622,35 +242,1168 @@ } } -impl Index { - fn new(py: Python, data: PyObject, header: u32) -> PyResult { - // Safety: we keep the buffer around inside the class as `index_mmap` - let (buf, bytes) = unsafe { mmap_keeparound(py, data)? }; +// There are no static generics in Rust (because their implementation is hard, +// I'm guessing it's due to different compilation stages, etc.). +// So manually generate all three caches and use them in `with_filelog_cache`. +static DELTA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDeltaConfig)> = + OnceLock::new(); +static DATA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDataConfig)> = + OnceLock::new(); +static FEATURE_CONFIG_CACHE: OnceLock<(PyObject, RevlogFeatureConfig)> = + OnceLock::new(); + +/// Cache the first conversion from Python -> Rust config for all filelogs to +/// save on conversion time when called in a loop. +fn with_filelog_cache( + py: Python, + py_config: &PyObject, + revlog_type: RevlogType, + cache: &OnceLock<(PyObject, T)>, + callback: impl Fn() -> PyResult, +) -> PyResult { + let mut was_cached = false; + if revlog_type == RevlogType::Filelog { + if let Some((cached_py_config, rust_config)) = cache.get() { + was_cached = true; + // All filelogs in a given repository *most likely* have the + // exact same config, but it's not impossible that some extensions + // do some magic with configs or that this code will be used + // for longer-running processes. So compare the source `PyObject` + // in case the source changed, at the cost of some overhead. + // We can't use `py_config.eq(cached_py_config)` because all config + // objects are different in Python and `a is b` is false. + if py_config.compare(py, cached_py_config)?.is_eq() { + return Ok(*rust_config); + } + } + } + let config = callback()?; + // Do not call the lock unnecessarily if it's already been set. + if !was_cached && revlog_type == RevlogType::Filelog { + cache.set((py_config.clone_ref(py), config)).ok(); + } + Ok(config) +} + +fn extract_delta_config( + py: Python, + py_config: PyObject, + revlog_type: RevlogType, +) -> PyResult { + let get_delta_config = || { + let max_deltachain_span = py_config + .getattr(py, "max_deltachain_span")? + .extract::(py)?; + + let revlog_delta_config = RevlogDeltaConfig { + general_delta: py_config + .getattr(py, "general_delta")? + .extract(py)?, + sparse_revlog: py_config + .getattr(py, "sparse_revlog")? + .extract(py)?, + max_chain_len: py_config + .getattr(py, "max_chain_len")? + .extract(py)?, + max_deltachain_span: if max_deltachain_span < 0 { + None + } else { + Some(max_deltachain_span as u64) + }, + upper_bound_comp: py_config + .getattr(py, "upper_bound_comp")? + .extract(py)?, + delta_both_parents: py_config + .getattr(py, "delta_both_parents")? + .extract(py)?, + candidate_group_chunk_size: py_config + .getattr(py, "candidate_group_chunk_size")? + .extract(py)?, + debug_delta: py_config.getattr(py, "debug_delta")?.extract(py)?, + lazy_delta: py_config.getattr(py, "lazy_delta")?.extract(py)?, + lazy_delta_base: py_config + .getattr(py, "lazy_delta_base")? + .extract(py)?, + }; + Ok(revlog_delta_config) + }; + with_filelog_cache( + py, + &py_config, + revlog_type, + &DELTA_CONFIG_CACHE, + get_delta_config, + ) +} + +fn extract_data_config( + py: Python, + py_config: PyObject, + revlog_type: RevlogType, +) -> PyResult { + let get_data_config = || { + Ok(RevlogDataConfig { + try_pending: py_config.getattr(py, "try_pending")?.extract(py)?, + try_split: py_config.getattr(py, "try_split")?.extract(py)?, + check_ambig: py_config.getattr(py, "check_ambig")?.extract(py)?, + mmap_large_index: py_config + .getattr(py, "mmap_large_index")? + .extract(py)?, + mmap_index_threshold: py_config + .getattr(py, "mmap_index_threshold")? + .extract(py)?, + chunk_cache_size: py_config + .getattr(py, "chunk_cache_size")? + .extract(py)?, + uncompressed_cache_factor: py_config + .getattr(py, "uncompressed_cache_factor")? + .extract(py)?, + uncompressed_cache_count: py_config + .getattr(py, "uncompressed_cache_count")? + .extract(py)?, + with_sparse_read: py_config + .getattr(py, "with_sparse_read")? + .extract(py)?, + sr_density_threshold: py_config + .getattr(py, "sr_density_threshold")? + .extract(py)?, + sr_min_gap_size: py_config + .getattr(py, "sr_min_gap_size")? + .extract(py)?, + general_delta: py_config + .getattr(py, "generaldelta")? + .extract(py)?, + }) + }; + + with_filelog_cache( + py, + &py_config, + revlog_type, + &DATA_CONFIG_CACHE, + get_data_config, + ) +} - Self::create_instance( +fn extract_feature_config( + py: Python, + py_config: PyObject, + revlog_type: RevlogType, +) -> PyResult { + let get_feature_config = || { + let engine_bytes = &py_config + .getattr(py, "compression_engine")? + .extract::(py)?; + let compression_engine = engine_bytes.data(py); + let compression_engine = match compression_engine { + b"zlib" => { + let compression_options = &py_config + .getattr(py, "compression_engine_options")? + .extract::(py)?; + let zlib_level = compression_options + .get_item(py, PyBytes::new(py, &b"zlib.level"[..])); + let level = if let Some(level) = zlib_level { + if level.is_none(py) { + None + } else { + Some(level.extract(py)?) + } + } else { + None + }; + let mut engine = CompressionConfig::default(); + if let Some(level) = level { + engine + .set_level(level) + .expect("invalid compression level from Python"); + } + engine + } + b"zstd" => { + let compression_options = &py_config + .getattr(py, "compression_engine_options")? + .extract::(py)?; + let zstd_level = compression_options + .get_item(py, PyBytes::new(py, &b"zstd.level"[..])); + let level = if let Some(level) = zstd_level { + if level.is_none(py) { + None + } else { + Some(level.extract(py)?) + } + } else { + let level = compression_options + .get_item(py, PyBytes::new(py, &b"level"[..])); + if let Some(level) = level { + if level.is_none(py) { + None + } else { + Some(level.extract(py)?) + } + } else { + None + } + }; + CompressionConfig::zstd(level) + .expect("invalid compression level from Python") + } + b"none" => CompressionConfig::None, + e => { + return Err(PyErr::new::( + py, + format!( + "invalid compression engine {}", + String::from_utf8_lossy(e) + ), + )) + } + }; + let revlog_feature_config = RevlogFeatureConfig { + compression_engine, + censorable: py_config.getattr(py, "censorable")?.extract(py)?, + has_side_data: py_config + .getattr(py, "has_side_data")? + .extract(py)?, + compute_rank: py_config + .getattr(py, "compute_rank")? + .extract(py)?, + canonical_parent_order: py_config + .getattr(py, "canonical_parent_order")? + .extract(py)?, + enable_ellipsis: py_config + .getattr(py, "enable_ellipsis")? + .extract(py)?, + }; + Ok(revlog_feature_config) + }; + with_filelog_cache( + py, + &py_config, + revlog_type, + &FEATURE_CONFIG_CACHE, + get_feature_config, + ) +} + +fn revlog_error_from_msg(py: Python, e: impl ToString) -> PyErr { + let msg = e.to_string(); + + match py + .import("mercurial.error") + .and_then(|m| m.get(py, "RevlogError")) + { + Err(e) => e, + Ok(cls) => { + let msg = PyBytes::new(py, msg.as_bytes()); + PyErr::from_instance( + py, + cls.call(py, (msg,), None).ok().into_py_object(py), + ) + } + } +} + +py_class!(pub class ReadingContextManager |py| { + data inner_revlog: RefCell; + + def __enter__(&self) -> PyResult { + let res = self.inner_revlog(py) + .borrow() + .inner(py) + .borrow() + .enter_reading_context() + .map_err(|e| revlog_error_from_msg(py, e)); + if let Err(e) = res { + // `__exit__` is not called from Python if `__enter__` fails + self.inner_revlog(py) + .borrow() + .inner(py) + .borrow() + .exit_reading_context(); + return Err(e) + } + Ok(py.None()) + } + + def __exit__( + &self, + ty: Option, + value: PyObject, + traceback: PyObject + ) -> PyResult { + // unused arguments, keep clippy from complaining without adding + // a general rule + let _ = ty; + let _ = value; + let _ = traceback; + + self.inner_revlog(py) + .borrow() + .inner(py) + .borrow() + .exit_reading_context(); + Ok(py.None()) + } +}); + +// Only used from Python *tests* +py_class!(pub class PyFileHandle |py| { + data inner_file: RefCell; + + def tell(&self) -> PyResult { + let locals = PyDict::new(py); + locals.set_item(py, "os", py.import("os")?)?; + locals.set_item(py, "fd", *self.inner_file(py).borrow())?; + let f = py.eval("os.fdopen(fd)", None, Some(&locals))?; + + // Prevent Python from closing the file after garbage collecting. + // This is fine since Rust is still holding on to the actual File. + // (and also because it's only used in tests). + std::mem::forget(f.clone_ref(py)); + + locals.set_item(py, "f", f)?; + let res = py.eval("f.tell()", None, Some(&locals))?; + Ok(res) + } +}); + +/// Wrapper around a Python transaction object, to keep `hg-core` oblivious +/// of the fact it's being called from Python. +pub struct PyTransaction { + inner: PyObject, +} + +impl PyTransaction { + pub fn new(inner: PyObject) -> Self { + Self { inner } + } +} + +impl Clone for PyTransaction { + fn clone(&self) -> Self { + let gil = &Python::acquire_gil(); + let py = gil.python(); + Self { + inner: self.inner.clone_ref(py), + } + } +} + +impl Transaction for PyTransaction { + fn add(&mut self, file: impl AsRef, offset: usize) { + let gil = &Python::acquire_gil(); + let py = gil.python(); + let file = PyBytes::new(py, &get_bytes_from_path(file.as_ref())); + self.inner + .call_method(py, "add", (file, offset), None) + .expect("transaction add failed"); + } +} + +py_class!(pub class WritingContextManager |py| { + data inner_revlog: RefCell; + data transaction: RefCell; + data data_end: Cell>; + + def __enter__(&self) -> PyResult { + let res = self.inner_revlog(py) + .borrow_mut() + .inner(py) + .borrow_mut() + .enter_writing_context( + self.data_end(py).get(), + &mut *self.transaction(py).borrow_mut() + ).map_err(|e| revlog_error_from_msg(py, e)); + if let Err(e) = res { + // `__exit__` is not called from Python if `__enter__` fails + self.inner_revlog(py) + .borrow_mut() + .inner(py) + .borrow_mut() + .exit_writing_context(); + return Err(e) + } + Ok(py.None()) + } + + def __exit__( + &self, + ty: Option, + value: PyObject, + traceback: PyObject + ) -> PyResult { + // unused arguments, keep clippy from complaining without adding + // a general rule + let _ = ty; + let _ = value; + let _ = traceback; + + self.inner_revlog(py) + .borrow_mut() + .inner(py) + .borrow_mut() + .exit_writing_context(); + Ok(py.None()) + } +}); + +py_class!(pub class InnerRevlog |py| { + @shared data inner: CoreInnerRevlog; + data nt: RefCell>; + data docket: RefCell>; + // Holds a reference to the mmap'ed persistent nodemap data + data nodemap_mmap: RefCell>; + // Holds a reference to the mmap'ed persistent index data + data index_mmap: RefCell; + data head_revs_py_list: RefCell>; + data head_node_ids_py_list: RefCell>; + data revision_cache: RefCell>; + + def __new__( + _cls, + opener: PyObject, + index_data: PyObject, + index_file: PyObject, + data_file: PyObject, + sidedata_file: PyObject, + inline: bool, + data_config: PyObject, + delta_config: PyObject, + feature_config: PyObject, + chunk_cache: PyObject, + default_compression_header: PyObject, + revlog_type: usize, + ) -> PyResult { + Self::inner_new( py, - hg::index::Index::new( - bytes, - IndexHeader::parse(&header.to_be_bytes()) - .expect("default header is broken"), + opener, + index_data, + index_file, + data_file, + sidedata_file, + inline, + data_config, + delta_config, + feature_config, + chunk_cache, + default_compression_header, + revlog_type + ) + } + + def clear_cache(&self) -> PyResult { + assert!(!self.is_delaying(py)?); + self.revision_cache(py).borrow_mut().take(); + self.inner(py).borrow_mut().clear_cache(); + Ok(py.None()) + } + + @property def canonical_index_file(&self) -> PyResult { + let path = self.inner(py).borrow().canonical_index_file(); + Ok(PyBytes::new(py, &get_bytes_from_path(path))) + } + + @property def is_delaying(&self) -> PyResult { + Ok(self.inner(py).borrow().is_delaying()) + } + + @property def _revisioncache(&self) -> PyResult { + let cache = &*self.revision_cache(py).borrow(); + match cache { + None => Ok(py.None()), + Some(cache) => { + Ok(cache.clone_ref(py)) + } + } + + } + + @property def _writinghandles(&self) -> PyResult { + use std::os::fd::AsRawFd; + + let inner = self.inner(py).borrow(); + let handles = inner.python_writing_handles(); + + match handles.as_ref() { + None => Ok(py.None()), + Some(handles) => { + let d_handle = if let Some(d_handle) = &handles.data_handle { + let handle = RefCell::new(d_handle.file.as_raw_fd()); + Some(PyFileHandle::create_instance(py, handle)?) + } else { + None + }; + let handle = + RefCell::new(handles.index_handle.file.as_raw_fd()); + Ok( + ( + PyFileHandle::create_instance(py, handle)?, + d_handle, + py.None(), // Sidedata handle + + ).to_py_object(py).into_object() + ) + } + } + + } + + @_revisioncache.setter def set_revision_cache( + &self, + value: Option + ) -> PyResult<()> { + *self.revision_cache(py).borrow_mut() = value.clone_ref(py); + match value { + None => { + // This means the property has been deleted, *not* that the + // property has been set to `None`. Whatever happens is up + // to the implementation. Here we just set it to `None`. + self + .inner(py) + .borrow() + .last_revision_cache + .lock() + .expect("lock should not be held") + .take(); + }, + Some(tuple) => { + if tuple.is_none(py) { + self + .inner(py) + .borrow() + .last_revision_cache + .lock() + .expect("lock should not be held") + .take(); + return Ok(()) + } + let node = tuple.get_item(py, 0)?.extract::(py)?; + let node = node_from_py_bytes(py, &node)?; + let rev = tuple.get_item(py, 1)?.extract::(py)?; + // Ok because Python only sets this if the revision has been + // checked + let rev = Revision(rev); + let data = tuple.get_item(py, 2)?.extract::(py)?; + let inner = self.inner(py).borrow(); + let mut last_revision_cache = inner + .last_revision_cache + .lock() + .expect("lock should not be held"); + *last_revision_cache = + Some((node, rev, Box::new(PyBytesDeref::new(py, data)))); + } + } + Ok(()) + } + + @property def inline(&self) -> PyResult { + Ok(self.inner(py).borrow().is_inline()) + } + + @inline.setter def set_inline( + &self, + value: Option + ) -> PyResult<()> { + if let Some(v) = value { + self.inner(py).borrow_mut().inline = v.extract(py)?; + }; + Ok(()) + } + + @property def index_file(&self) -> PyResult { + Ok( + PyBytes::new( + py, + &get_bytes_from_path(&self.inner(py).borrow().index_file) ) - .map_err(|e| { - revlog_error_with_msg(py, e.to_string().as_bytes()) - })?, - RefCell::new(None), - RefCell::new(None), - RefCell::new(None), - RefCell::new(Some(buf)), - RefCell::new(None), - RefCell::new(None), ) } + @index_file.setter def set_index_file( + &self, + value: Option + ) -> PyResult<()> { + let path = get_path_from_bytes( + value + .expect("don't delete the index path") + .extract::(py)? + .data(py) + ).to_owned(); + self.inner(py).borrow_mut().index_file = path; + Ok(()) + } + + @property def is_writing(&self) -> PyResult { + Ok(self.inner(py).borrow().is_writing()) + } + + @property def is_open(&self) -> PyResult { + Ok(self.inner(py).borrow().is_open()) + } + + def issnapshot(&self, rev: PyRevision) -> PyResult { + self.inner_issnapshot(py, UncheckedRevision(rev.0)) + } + + def _deltachain(&self, *args, **kw) -> PyResult { + let inner = self.inner(py).borrow(); + let general_delta = inner.index.uses_generaldelta(); + let args = PyTuple::new( + py, + &[ + args.get_item(py, 0), + kw.and_then(|d| d.get_item(py, "stoprev")).to_py_object(py), + general_delta.to_py_object(py).into_object(), + ] + ); + self._index_deltachain(py, &args, kw) + } + + def compress(&self, data: PyObject) -> PyResult { + let inner = self.inner(py).borrow(); + let py_buffer = PyBuffer::get(py, &data)?; + let deref = PyBufferDeref::new(py, py_buffer)?; + let compressed = inner.compress(&deref) + .map_err(|e| revlog_error_from_msg(py, e))?; + let compressed = compressed.as_deref(); + let header = if compressed.is_some() { + PyBytes::new(py, &b""[..]) + } else { + PyBytes::new(py, &b"u"[..]) + }; + Ok( + ( + header, + PyBytes::new(py, compressed.unwrap_or(&deref)) + ).to_py_object(py) + ) + } + + def reading(&self) -> PyResult { + ReadingContextManager::create_instance( + py, + RefCell::new(self.clone_ref(py)), + ) + } + + def writing( + &self, + transaction: PyObject, + data_end: Option, + sidedata_end: Option, + ) -> PyResult { + // Silence unused argument (only relevant for changelog v2) + let _ = sidedata_end; + WritingContextManager::create_instance( + py, + RefCell::new(self.clone_ref(py)), + RefCell::new(PyTransaction::new(transaction)), + Cell::new(data_end) + ) + } + + def split_inline( + &self, + _tr: PyObject, + header: i32, + new_index_file_path: Option + ) -> PyResult { + let mut inner = self.inner(py).borrow_mut(); + let new_index_file_path = match new_index_file_path { + Some(path) => { + let path = path.extract::(py)?; + Some(get_path_from_bytes(path.data(py)).to_owned()) + }, + None => None, + }; + let header = hg::index::IndexHeader::parse(&header.to_be_bytes()); + let header = header.expect("invalid header bytes"); + let path = inner + .split_inline(header, new_index_file_path) + .map_err(|e| revlog_error_from_msg(py, e))?; + Ok(PyBytes::new(py, &get_bytes_from_path(path))) + } + + def get_segment_for_revs( + &self, + startrev: PyRevision, + endrev: PyRevision, + ) -> PyResult { + let inner = self.inner(py).borrow(); + let (offset, data) = inner + .get_segment_for_revs(Revision(startrev.0), Revision(endrev.0)) + .map_err(|e| revlog_error_from_msg(py, e))?; + let data = PyBytes::new(py, &data); + Ok((offset, data).to_py_object(py)) + } + + def raw_text( + &self, + _node: PyObject, + rev: PyRevision + ) -> PyResult { + let inner = self.inner(py).borrow(); + let mut py_bytes = PyBytes::new(py, &[]); + inner + .raw_text(Revision(rev.0), |size, f| { + py_bytes = with_pybytes_buffer(py, size, f)?; + Ok(()) + }).map_err(|e| revlog_error_from_msg(py, e))?; + Ok(py_bytes) + } + + def _chunk( + &self, + rev: PyRevision, + ) -> PyResult { + let inner = self.inner(py).borrow(); + let chunk = inner + .chunk_for_rev(Revision(rev.0)) + .map_err(|e| revlog_error_from_msg(py, e))?; + let chunk = PyBytes::new(py, &chunk); + Ok(chunk) + } + + def write_entry( + &self, + transaction: PyObject, + entry: PyObject, + data: PyTuple, + _link: PyObject, + offset: usize, + _sidedata: PyObject, + _sidedata_offset: PyInt, + index_end: Option, + data_end: Option, + _sidedata_end: Option, + ) -> PyResult { + let mut inner = self.inner(py).borrow_mut(); + let transaction = PyTransaction::new(transaction); + let py_bytes = entry.extract(py)?; + let entry = PyBytesDeref::new(py, py_bytes); + let header = data.get_item(py, 0).extract::(py)?; + let header = header.data(py); + let data = data.get_item(py, 1); + let py_bytes = data.extract(py)?; + let data = PyBytesDeref::new(py, py_bytes); + Ok( + inner.write_entry( + transaction, + &entry, + (header, &data), + offset, + index_end, + data_end + ).map_err(|e| revlog_error_from_msg(py, e))? + .to_py_object(py) + ) + } + + def delay(&self) -> PyResult> { + let path = self.inner(py) + .borrow_mut() + .delay() + .map_err(|e| revlog_error_from_msg(py, e))?; + Ok(path.map(|p| PyBytes::new(py, &get_bytes_from_path(p)))) + } + + def write_pending(&self) -> PyResult { + let (path, any_pending) = self.inner(py) + .borrow_mut() + .write_pending() + .map_err(|e| revlog_error_from_msg(py, e))?; + let maybe_path = match path { + Some(path) => { + PyBytes::new(py, &get_bytes_from_path(path)).into_object() + }, + None => { + py.None() + } + }; + Ok( + ( + maybe_path, + any_pending + ).to_py_object(py) + ) + } + + def finalize_pending(&self) -> PyResult { + let path = self.inner(py) + .borrow_mut() + .finalize_pending() + .map_err(|e| revlog_error_from_msg(py, e))?; + Ok(PyBytes::new(py, &get_bytes_from_path(path))) + } + + // -- forwarded index methods -- + + def _index_get_rev(&self, node: PyBytes) -> PyResult> { + let opt = self.get_nodetree(py)?.borrow(); + let nt = opt.as_ref().expect("nodetree should be set"); + let ridx = &self.inner(py).borrow().index; + let node = node_from_py_bytes(py, &node)?; + let rust_rev = + nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?; + Ok(rust_rev.map(Into::into)) + } + + /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if node + /// is not found. + /// + /// No need to repeat `node` in the exception, `mercurial/revlog.py` + /// will catch and rewrap with it + def _index_rev(&self, node: PyBytes) -> PyResult { + self._index_get_rev(py, node)?.ok_or_else(|| revlog_error(py)) + } + + /// return True if the node exist in the index + def _index_has_node(&self, node: PyBytes) -> PyResult { + // TODO OPTIM we could avoid a needless conversion here, + // to do when scaffolding for pure Rust switch is removed, + // as `_index_get_rev()` currently does the necessary assertions + self._index_get_rev(py, node).map(|opt| opt.is_some()) + } + + /// find length of shortest hex nodeid of a binary ID + def _index_shortest(&self, node: PyBytes) -> PyResult { + let opt = self.get_nodetree(py)?.borrow(); + let nt = opt.as_ref().expect("nodetree should be set"); + let idx = &self.inner(py).borrow().index; + match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) + { + Ok(Some(l)) => Ok(l), + Ok(None) => Err(revlog_error(py)), + Err(e) => Err(nodemap_error(py, e)), + } + } + + def _index_partialmatch( + &self, + node: PyObject + ) -> PyResult> { + let opt = self.get_nodetree(py)?.borrow(); + let nt = opt.as_ref().expect("nodetree should be set"); + let idx = &self.inner(py).borrow().index; + + let node = node.extract::(py)?; + let node_as_string = String::from_utf8_lossy(node.data(py)); + + let prefix = NodePrefix::from_hex(node_as_string.to_string()) + .map_err(|_| PyErr::new::( + py, format!("Invalid node or prefix '{}'", node_as_string)) + )?; + + nt.find_bin(idx, prefix) + // TODO make an inner API returning the node directly + .map(|opt| opt.map(|rev| { + PyBytes::new( + py, + idx.node(rev).expect("node should exist").as_bytes() + ) + })) + .map_err(|e| nodemap_error(py, e)) + + } + + /// append an index entry + def _index_append(&self, tup: PyTuple) -> PyResult { + if tup.len(py) < 8 { + // this is better than the panic promised by tup.get_item() + return Err( + PyErr::new::(py, "tuple index out of range")) + } + let node_bytes = tup.get_item(py, 7).extract(py)?; + let node = node_from_py_object(py, &node_bytes)?; + + let rev = self.len(py)? as BaseRevision; + + // This is ok since we will just add the revision to the index + let rev = Revision(rev); + self.inner(py) + .borrow_mut() + .index + .append(py_tuple_to_revision_data_params(py, tup)?) + .map_err(|e| revlog_error_from_msg(py, e))?; + let idx = &self.inner(py).borrow().index; + self.get_nodetree(py)? + .borrow_mut() + .as_mut() + .expect("nodetree should be set") + .insert(idx, &node, rev) + .map_err(|e| nodemap_error(py, e))?; + Ok(py.None()) + } + + def _index___delitem__(&self, key: PyObject) -> PyResult { + // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` + let start = if let Ok(rev) = key.extract(py) { + UncheckedRevision(rev) + } else { + let start = key.getattr(py, "start")?; + UncheckedRevision(start.extract(py)?) + }; + let mut borrow = self.inner(py).borrow_mut(); + let start = borrow + .index + .check_revision(start) + .ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(start)) + })?; + borrow.index + .remove(start) + .map_err(|e| revlog_error_from_msg(py, e))?; + drop(borrow); + let mut opt = self.get_nodetree(py)?.borrow_mut(); + let nt = opt.as_mut().expect("nodetree should be set"); + nt.invalidate_all(); + self.fill_nodemap(py, nt)?; + Ok(py.None()) + } + + /// return the gca set of the given revs + def _index_ancestors(&self, *args, **_kw) -> PyResult { + let rust_res = self.inner_ancestors(py, args)?; + Ok(rust_res) + } + + /// return the heads of the common ancestors of the given revs + def _index_commonancestorsheads( + &self, + *args, + **_kw + ) -> PyResult { + let rust_res = self.inner_commonancestorsheads(py, args)?; + Ok(rust_res) + } + + /// Clear the index caches and inner py_class data. + /// It is Python's responsibility to call `update_nodemap_data` again. + def _index_clearcaches(&self) -> PyResult { + self.nt(py).borrow_mut().take(); + self.docket(py).borrow_mut().take(); + self.nodemap_mmap(py).borrow_mut().take(); + self.head_revs_py_list(py).borrow_mut().take(); + self.head_node_ids_py_list(py).borrow_mut().take(); + self.inner(py).borrow_mut().index.clear_caches(); + Ok(py.None()) + } + + /// return the raw binary string representing a revision + def _index_entry_binary(&self, *args, **_kw) -> PyResult { + let rindex = &self.inner(py).borrow().index; + let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?); + let rust_bytes = rindex.check_revision(rev).and_then( + |r| rindex.entry_binary(r)).ok_or_else(|| rev_not_in_index(py, rev) + )?; + let rust_res = PyBytes::new(py, rust_bytes).into_object(); + Ok(rust_res) + } + + + /// return a binary packed version of the header + def _index_pack_header(&self, *args, **_kw) -> PyResult { + let rindex = &self.inner(py).borrow().index; + let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?); + let rust_res = PyBytes::new(py, &packed).into_object(); + Ok(rust_res) + } + + /// compute phases + def _index_computephasesmapsets( + &self, + *args, + **_kw + ) -> PyResult { + let py_roots = args.get_item(py, 0).extract::(py)?; + let rust_res = self.inner_computephasesmapsets(py, py_roots)?; + Ok(rust_res) + } + + /// reachableroots + def _index_reachableroots2(&self, *args, **_kw) -> PyResult { + let rust_res = self.inner_reachableroots2( + py, + UncheckedRevision(args.get_item(py, 0).extract(py)?), + args.get_item(py, 1), + args.get_item(py, 2), + args.get_item(py, 3).extract(py)?, + )?; + Ok(rust_res) + } + + /// get head revisions + def _index_headrevs(&self, *args, **_kw) -> PyResult { + let (filtered_revs, stop_rev) = match &args.len(py) { + 0 => Ok((py.None(), py.None())), + 1 => Ok((args.get_item(py, 0), py.None())), + 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))), + _ => Err(PyErr::new::(py, "too many arguments")), + }?; + self.inner_headrevs(py, &filtered_revs, &stop_rev) + } + + /// get head nodeids + def _index_head_node_ids(&self) -> PyResult { + let rust_res = self.inner_head_node_ids(py)?; + Ok(rust_res) + } + + /// get diff in head revisions + def _index_headrevsdiff(&self, *args, **_kw) -> PyResult { + let rust_res = self.inner_headrevsdiff( + py, + &args.get_item(py, 0), + &args.get_item(py, 1))?; + Ok(rust_res) + } + + /// True if the object is a snapshot + def _index_issnapshot(&self, *args, **_kw) -> PyResult { + let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?); + self.inner_issnapshot(py, rev) + } + + /// Gather snapshot data in a cache dict + def _index_findsnapshots(&self, *args, **_kw) -> PyResult { + let index = &self.inner(py).borrow().index; + let cache: PyDict = args.get_item(py, 0).extract(py)?; + // this methods operates by setting new values in the cache, + // hence we will compare results by letting the C implementation + // operate over a deepcopy of the cache, and finally compare both + // caches. + let c_cache = PyDict::new(py); + for (k, v) in cache.items(py) { + c_cache.set_item(py, k, PySet::new(py, v)?)?; + } + + let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?); + let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?); + let mut cache_wrapper = PySnapshotsCache{ py, dict: cache }; + index.find_snapshots( + start_rev, + end_rev, + &mut cache_wrapper, + ).map_err(|_| revlog_error(py))?; + Ok(py.None()) + } + + /// determine revisions with deltas to reconstruct fulltext + def _index_deltachain(&self, *args, **_kw) -> PyResult { + let index = &self.inner(py).borrow().index; + let rev = args.get_item(py, 0).extract::(py)?.into(); + let stop_rev = + args.get_item(py, 1).extract::>(py)?; + let rev = index.check_revision(rev).ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(rev)) + })?; + let stop_rev = if let Some(stop_rev) = stop_rev { + let stop_rev = UncheckedRevision(stop_rev); + Some(index.check_revision(stop_rev).ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev)) + })?) + } else {None}; + let using_general_delta = args.get_item(py, 2) + .extract::>(py)? + .map(|i| i != 0); + let (chain, stopped) = index.delta_chain( + rev, stop_rev, using_general_delta + ).map_err(|e| { + PyErr::new::(py, e.to_string()) + })?; + + let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect(); + Ok( + PyTuple::new( + py, + &[ + chain.into_py_object(py).into_object(), + stopped.into_py_object(py).into_object() + ] + ).into_object() + ) + } + + /// slice planned chunk read to reach a density threshold + def _index_slicechunktodensity(&self, *args, **_kw) -> PyResult { + let rust_res = self.inner_slicechunktodensity( + py, + args.get_item(py, 0), + args.get_item(py, 1).extract(py)?, + args.get_item(py, 2).extract(py)? + )?; + Ok(rust_res) + } + + def _index___len__(&self) -> PyResult { + self.len(py) + } + + def _index___getitem__(&self, key: PyObject) -> PyResult { + let rust_res = self.inner_getitem(py, key.clone_ref(py))?; + Ok(rust_res) + } + + def _index___contains__(&self, item: PyObject) -> PyResult { + // ObjectProtocol does not seem to provide contains(), so + // this is an equivalent implementation of the index_contains() + // defined in revlog.c + match item.extract::(py) { + Ok(rev) => { + Ok(rev >= -1 && rev < self.len(py)? as BaseRevision) + } + Err(_) => { + let item_bytes: PyBytes = item.extract(py)?; + let rust_res = self._index_has_node(py, item_bytes)?; + Ok(rust_res) + } + } + } + + def _index_nodemap_data_all(&self) -> PyResult { + self.inner_nodemap_data_all(py) + } + + def _index_nodemap_data_incremental(&self) -> PyResult { + self.inner_nodemap_data_incremental(py) + } + + def _index_update_nodemap_data( + &self, + docket: PyObject, + nm_data: PyObject + ) -> PyResult { + self.inner_update_nodemap_data(py, docket, nm_data) + } + + @property + def _index_entry_size(&self) -> PyResult { + let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py); + Ok(rust_res) + } + + @property + def _index_rust_ext_compat(&self) -> PyResult { + // will be entirely removed when the Rust index yet useful to + // implement in Rust to detangle things when removing `self.cindex` + let rust_res: PyInt = 1.to_py_object(py); + Ok(rust_res) + } + + @property + def _index_is_rust(&self) -> PyResult { + Ok(false.to_py_object(py)) + } + + +}); + +/// Forwarded index methods? +impl InnerRevlog { fn len(&self, py: Python) -> PyResult { - let rust_index_len = self.index(py).borrow().len(); + let rust_index_len = self.inner(py).borrow().index.len(); Ok(rust_index_len) } - /// This is scaffolding at this point, but it could also become /// a way to start a persistent nodemap or perform a /// vacuum / repack operation @@ -659,11 +1412,11 @@ py: Python, nt: &mut CoreNodeTree, ) -> PyResult { - let index = self.index(py).borrow(); + let index = &self.inner(py).borrow().index; for r in 0..self.len(py)? { let rev = Revision(r as BaseRevision); // in this case node() won't ever return None - nt.insert(&*index, index.node(rev).unwrap(), rev) + nt.insert(index, index.node(rev).expect("node should exist"), rev) .map_err(|e| nodemap_error(py, e))? } Ok(py.None()) @@ -684,7 +1437,11 @@ /// Returns the full nodemap bytes to be written as-is to disk fn inner_nodemap_data_all(&self, py: Python) -> PyResult { - let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap(); + let nodemap = self + .get_nodetree(py)? + .borrow_mut() + .take() + .expect("nodetree should exist"); let (readonly, bytes) = nodemap.into_readonly_and_added_bytes(); // If there's anything readonly, we need to build the data again from @@ -717,7 +1474,11 @@ None => return Ok(py.None()), }; - let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap(); + let node_tree = self + .get_nodetree(py)? + .borrow_mut() + .take() + .expect("nodetree should exist"); let masked_blocks = node_tree.masked_readonly_blocks(); let (_, data) = node_tree.into_readonly_and_added_bytes(); let changed = masked_blocks * std::mem::size_of::(); @@ -747,7 +1508,7 @@ .extract::(py)? .into(); self.docket(py).borrow_mut().replace(docket.clone_ref(py)); - let idx = self.index(py).borrow(); + let idx = &self.inner(py).borrow().index; let data_tip = idx.check_revision(data_tip).ok_or_else(|| { nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip)) })?; @@ -756,7 +1517,7 @@ for r in (data_tip.0 + 1)..current_tip as BaseRevision { let rev = Revision(r); // in this case node() won't ever return None - nt.insert(&*idx, idx.node(rev).unwrap(), rev) + nt.insert(idx, idx.node(rev).expect("node should exist"), rev) .map_err(|e| nodemap_error(py, e))? } @@ -766,7 +1527,7 @@ } fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult { - let idx = self.index(py).borrow(); + let idx = &self.inner(py).borrow().index; Ok(match key.extract::(py) { Ok(key_as_int) => { let entry_params = if key_as_int == NULL_REVISION.0 { @@ -786,15 +1547,17 @@ revision_data_params_to_py_tuple(py, entry_params) .into_object() } - _ => self.get_rev(py, key.extract::(py)?)?.map_or_else( - || py.None(), - |py_rev| py_rev.into_py_object(py).into_object(), - ), + _ => self + ._index_get_rev(py, key.extract::(py)?)? + .map_or_else( + || py.None(), + |py_rev| py_rev.into_py_object(py).into_object(), + ), }) } fn inner_head_node_ids(&self, py: Python) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; // We don't use the shortcut here, as it's actually slower to loop // through the cached `PyList` than to re-do the whole computation for @@ -826,7 +1589,7 @@ filtered_revs: &PyObject, stop_rev: &PyObject, ) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let stop_rev = if stop_rev.is_none(py) { None } else { @@ -899,7 +1662,7 @@ ) -> PyResult { let begin = begin.extract::(py)?; let end = end.extract::(py)?; - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let begin = Self::check_revision(index, UncheckedRevision(begin - 1), py)?; let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?; @@ -918,7 +1681,7 @@ new_heads: &[Revision], py: Python<'_>, ) -> PyList { - let index = self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let as_vec: Vec = new_heads .iter() .map(|r| { @@ -958,7 +1721,7 @@ py: Python, py_revs: &PyTuple, ) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?; let as_vec: Vec<_> = index .ancestors(&revs) @@ -974,7 +1737,7 @@ py: Python, py_revs: &PyTuple, ) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?; let as_vec: Vec<_> = index .common_ancestor_heads(&revs) @@ -990,7 +1753,7 @@ py: Python, py_roots: PyDict, ) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let roots: Result>, PyErr> = py_roots .items_list(py) .iter(py) @@ -1037,7 +1800,7 @@ target_density: f64, min_gap_size: usize, ) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?; let as_nested_vec = index.slice_chunk_to_density(&revs, target_density, min_gap_size); @@ -1069,7 +1832,7 @@ roots: PyObject, include_path: bool, ) -> PyResult { - let index = &*self.index(py).borrow(); + let index = &self.inner(py).borrow().index; let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| { PyErr::new::(py, "head out of range") })?; @@ -1091,6 +1854,84 @@ .collect(); Ok(PyList::new(py, &as_vec).into_object()) } + fn inner_issnapshot( + &self, + py: Python, + rev: UncheckedRevision, + ) -> PyResult { + let inner = &self.inner(py).borrow(); + let index = &self.inner(py).borrow().index; + let rev = index + .check_revision(rev) + .ok_or_else(|| rev_not_in_index(py, rev))?; + let result = inner.is_snapshot(rev).map_err(|e| { + PyErr::new::(py, e.to_string()) + })?; + Ok(result) + } +} + +impl InnerRevlog { + pub fn inner_new( + py: Python, + opener: PyObject, + index_data: PyObject, + index_file: PyObject, + data_file: PyObject, + _sidedata_file: PyObject, + inline: bool, + data_config: PyObject, + delta_config: PyObject, + feature_config: PyObject, + _chunk_cache: PyObject, + _default_compression_header: PyObject, + revlog_type: usize, + ) -> PyResult { + let vfs = Box::new(PyVfs::new(py, opener)?); + let index_file = + get_path_from_bytes(index_file.extract::(py)?.data(py)) + .to_owned(); + let data_file = + get_path_from_bytes(data_file.extract::(py)?.data(py)) + .to_owned(); + let revlog_type = RevlogType::try_from(revlog_type) + .map_err(|e| revlog_error_from_msg(py, e))?; + let data_config = extract_data_config(py, data_config, revlog_type)?; + let delta_config = + extract_delta_config(py, delta_config, revlog_type)?; + let feature_config = + extract_feature_config(py, feature_config, revlog_type)?; + let options = RevlogOpenOptions::new( + inline, + data_config, + delta_config, + feature_config, + ); + // Safety: we keep the buffer around inside the class as `index_mmap` + let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? }; + let index = hg::index::Index::new(bytes, options.index_header()) + .map_err(|e| revlog_error_from_msg(py, e))?; + let core = CoreInnerRevlog::new( + vfs, + index, + index_file, + data_file, + data_config, + delta_config, + feature_config, + ); + Self::create_instance( + py, + core, + RefCell::new(None), + RefCell::new(None), + RefCell::new(None), + RefCell::new(buf), + RefCell::new(None), + RefCell::new(None), + RefCell::new(None), + ) + } } py_class!(pub class NodeTree |py| { @@ -1111,7 +1952,7 @@ /// (generation-based guard), same as iterating on a `dict` that has /// been meanwhile mutated. def is_invalidated(&self) -> PyResult { - let leaked = self.index(py).borrow(); + let leaked = &self.index(py).borrow(); // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` let result = unsafe { leaked.try_borrow(py) }; // two cases for result to be an error: @@ -1123,7 +1964,7 @@ } def insert(&self, rev: PyRevision) -> PyResult { - let leaked = self.index(py).borrow(); + let leaked = &self.index(py).borrow(); // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` let index = &*unsafe { leaked.try_borrow(py)? }; @@ -1135,7 +1976,7 @@ return Err(rev_not_in_index(py, rev.into())) } - let entry = index.inner.get_entry(rev).unwrap(); + let entry = index.inner.get_entry(rev).expect("entry should exist"); let mut nt = self.nt(py).borrow_mut(); nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?; @@ -1158,7 +1999,7 @@ )?; let nt = self.nt(py).borrow(); - let leaked = self.index(py).borrow(); + let leaked = &self.index(py).borrow(); // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` let index = &*unsafe { leaked.try_borrow(py)? }; @@ -1170,7 +2011,7 @@ def shortest(&self, node: PyBytes) -> PyResult { let nt = self.nt(py).borrow(); - let leaked = self.index(py).borrow(); + let leaked = &self.index(py).borrow(); // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` let idx = &*unsafe { leaked.try_borrow(py)? }; match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) @@ -1182,6 +2023,120 @@ } }); +fn panic_after_error(_py: Python) -> ! { + unsafe { + python3_sys::PyErr_Print(); + } + panic!("Python API called failed"); +} + +/// # Safety +/// +/// Don't call this. Its only caller is taken from `PyO3`. +unsafe fn cast_from_owned_ptr_or_panic( + py: Python, + p: *mut python3_sys::PyObject, +) -> T +where + T: cpython::PythonObjectWithCheckedDowncast, +{ + if p.is_null() { + panic_after_error(py); + } else { + PyObject::from_owned_ptr(py, p).cast_into(py).unwrap() + } +} + +fn with_pybytes_buffer( + py: Python, + len: usize, + init: F, +) -> Result +where + F: FnOnce( + &mut dyn RevisionBuffer, + ) -> Result<(), RevlogError>, +{ + // Largely inspired by code in PyO3 + // https://pyo3.rs/main/doc/pyo3/types/struct.pybytes#method.new_bound_with + unsafe { + let pyptr = python3_sys::PyBytes_FromStringAndSize( + std::ptr::null(), + len as python3_sys::Py_ssize_t, + ); + let pybytes = cast_from_owned_ptr_or_panic::(py, pyptr); + let buffer: *mut u8 = python3_sys::PyBytes_AsString(pyptr).cast(); + debug_assert!(!buffer.is_null()); + let mut rev_buf = PyRevisionBuffer::new(pybytes, buffer, len); + // Initialise the bytestring in init + // If init returns an Err, the buffer is deallocated by `pybytes` + init(&mut rev_buf).map(|_| rev_buf.finish()) + } +} + +/// Wrapper around a Python-provided buffer into which the revision contents +/// will be written. Done for speed in order to save a large allocation + copy. +struct PyRevisionBuffer { + py_bytes: PyBytes, + _buf: *mut u8, + len: usize, + current_buf: *mut u8, + current_len: usize, +} + +impl PyRevisionBuffer { + /// # Safety + /// + /// `buf` should be the start of the allocated bytes of `bytes`, and `len` + /// exactly the length of said allocated bytes. + #[inline] + unsafe fn new(bytes: PyBytes, buf: *mut u8, len: usize) -> Self { + Self { + py_bytes: bytes, + _buf: buf, + len, + current_len: 0, + current_buf: buf, + } + } + + /// Number of bytes that have been copied to. Will be different to the + /// total allocated length of the buffer unless the revision is done being + /// written. + #[inline] + fn current_len(&self) -> usize { + self.current_len + } +} + +impl RevisionBuffer for PyRevisionBuffer { + type Target = PyBytes; + + #[inline] + fn extend_from_slice(&mut self, slice: &[u8]) { + assert!(self.current_len + slice.len() <= self.len); + unsafe { + // We cannot use `copy_from_nonoverlapping` since it's *possible* + // to create a slice from the same Python memory region using + // [`PyBytesDeref`]. Probable that LLVM has an optimization anyway? + self.current_buf.copy_from(slice.as_ptr(), slice.len()); + self.current_buf = self.current_buf.add(slice.len()); + } + self.current_len += slice.len() + } + + #[inline] + fn finish(self) -> Self::Target { + // catch unzeroed bytes before it becomes undefined behavior + assert_eq!( + self.current_len(), + self.len, + "not enough bytes read for revision" + ); + self.py_bytes + } +} + fn revlog_error(py: Python) -> PyErr { match py .import("mercurial.error") @@ -1195,21 +2150,6 @@ } } -fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr { - match py - .import("mercurial.error") - .and_then(|m| m.get(py, "RevlogError")) - { - Err(e) => e, - Ok(cls) => PyErr::from_instance( - py, - cls.call(py, (PyBytes::new(py, msg),), None) - .ok() - .into_py_object(py), - ), - } -} - fn graph_error(py: Python, _err: hg::GraphError) -> PyErr { // ParentOutOfRange is currently the only alternative // in `hg::GraphError`. The C index always raises this simple ValueError. @@ -1249,8 +2189,8 @@ m.add(py, "__package__", package)?; m.add(py, "__doc__", "RevLog - Rust implementations")?; - m.add_class::(py)?; m.add_class::(py)?; + m.add_class::(py)?; let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;