Mercurial > public > mercurial-scm > hg-stable
diff rust/hg-cpython/src/revlog.rs @ 52302:72bc29f01570
revlog: add glue to use a pure-Rust VFS
This will save us a lot of calling back into Python, which is always
horribly expensive.
We are now faster in all benchmarked cases except for `log --patch`
specifically on mozilla-try. Fixing this will happen in a later patch.
```
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 1.760765 ~~~~~
5559d7e63ec3: 1.555513 (-11.66%, -0.21)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 62.848869 ~~~~~
5559d7e63ec3: 58.113051 (-7.54%, -4.74)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 3.173532 ~~~~~
5559d7e63ec3: 3.543591 (+11.66%, +0.37)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 1.214698 ~~~~~
5559d7e63ec3: 1.192478 (-1.83%, -0.02)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 56.205474 ~~~~~
5559d7e63ec3: 51.520074 (-8.34%, -4.69)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 2.105419 ~~~~~
5559d7e63ec3: 2.051849 (-2.54%, -0.05)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 0.309960 ~~~~~
5559d7e63ec3: 0.299035 (-3.52%, -0.01)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 1.849832 ~~~~~
5559d7e63ec3: 1.805076 (-2.42%, -0.04)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 0.289521 ~~~~~
5559d7e63ec3: 0.279889 (-3.33%, -0.01)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 0.332270 ~~~~~
5559d7e63ec3: 0.323324 (-2.69%, -0.01)
```
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Mon, 29 Jul 2024 20:39:34 +0200 |
parents | 7346f93be7a4 |
children | bcd4962e0df9 |
line wrap: on
line diff
--- a/rust/hg-cpython/src/revlog.rs Mon Jul 29 20:35:44 2024 +0200 +++ b/rust/hg-cpython/src/revlog.rs Mon Jul 29 20:39:34 2024 +0200 @@ -10,7 +10,6 @@ conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, pybytes_deref::{PyBufferDeref, PyBytesDeref}, utils::{node_from_py_bytes, node_from_py_object}, - vfs::PyVfs, PyRevision, }; use cpython::{ @@ -22,24 +21,29 @@ }; use hg::{ errors::HgError, + fncache::FnCache, index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE}, nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, - revlog::compression::CompressionConfig, - revlog::inner_revlog::InnerRevlog as CoreInnerRevlog, - revlog::inner_revlog::RevisionBuffer, - revlog::options::{ - RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, - RevlogOpenOptions, + revlog::{ + compression::CompressionConfig, + inner_revlog::{InnerRevlog as CoreInnerRevlog, RevisionBuffer}, + nodemap::NodeMap, + options::{ + RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, + RevlogOpenOptions, + }, + Graph, NodePrefix, RevlogError, RevlogIndex, }, - revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, transaction::Transaction, utils::files::{get_bytes_from_path, get_path_from_bytes}, + vfs::FnCacheVfs, BaseRevision, Node, Revision, RevlogType, UncheckedRevision, NULL_REVISION, }; use std::{ cell::{Cell, RefCell}, collections::{HashMap, HashSet}, + sync::atomic::{AtomicBool, Ordering}, sync::OnceLock, }; use vcsgraph::graph::Graph as VCSGraph; @@ -647,6 +651,67 @@ } }); +struct PyFnCache { + fncache: PyObject, +} +impl PyFnCache { + fn new(fncache: PyObject) -> Self { + Self { fncache } + } +} + +impl Clone for PyFnCache { + fn clone(&self) -> Self { + let gil = Python::acquire_gil(); + let py = gil.python(); + Self { + fncache: self.fncache.clone_ref(py), + } + } +} + +/// Cache whether the fncache is loaded to avoid Python round-trip every time. +/// Once the fncache is loaded, it stays loaded unless we're in a very +/// long-running process, none of which we actually support for now. +static FN_CACHE_IS_LOADED: AtomicBool = AtomicBool::new(false); + +impl FnCache for PyFnCache { + fn is_loaded(&self) -> bool { + if FN_CACHE_IS_LOADED.load(Ordering::Relaxed) { + return true; + } + let gil = Python::acquire_gil(); + let py = gil.python(); + // TODO raise in case of error? + let is_loaded = self + .fncache + .getattr(py, "is_loaded") + .ok() + .map(|o| { + o.extract::<bool>(py) + .expect("is_loaded returned something other than a bool") + }) + .unwrap_or(false); + if is_loaded { + FN_CACHE_IS_LOADED.store(true, Ordering::Relaxed); + } + is_loaded + } + fn add(&self, path: &std::path::Path) { + let gil = Python::acquire_gil(); + let py = gil.python(); + // TODO raise in case of error? + self.fncache + .call_method( + py, + "add", + (PyBytes::new(py, &get_bytes_from_path(path)),), + None, + ) + .ok(); + } +} + py_class!(pub class InnerRevlog |py| { @shared data inner: CoreInnerRevlog; data nt: RefCell<Option<CoreNodeTree>>; @@ -661,7 +726,9 @@ def __new__( _cls, - opener: PyObject, + vfs_base: PyObject, + fncache: PyObject, + vfs_is_readonly: bool, index_data: PyObject, index_file: PyObject, data_file: PyObject, @@ -676,7 +743,9 @@ ) -> PyResult<Self> { Self::inner_new( py, - opener, + vfs_base, + fncache, + vfs_is_readonly, index_data, index_file, data_file, @@ -1874,7 +1943,9 @@ impl InnerRevlog { pub fn inner_new( py: Python, - opener: PyObject, + vfs_base: PyObject, + fncache: PyObject, + vfs_is_readonly: bool, index_data: PyObject, index_file: PyObject, data_file: PyObject, @@ -1887,7 +1958,6 @@ _default_compression_header: PyObject, revlog_type: usize, ) -> PyResult<Self> { - let vfs = Box::new(PyVfs::new(py, opener)?); let index_file = get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py)) .to_owned(); @@ -1907,12 +1977,20 @@ delta_config, feature_config, ); + // Safety: we keep the buffer around inside the class as `index_mmap` let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? }; let index = hg::index::Index::new(bytes, options.index_header()) .map_err(|e| revlog_error_from_msg(py, e))?; + + let base = &vfs_base.extract::<PyBytes>(py)?; + let base = get_path_from_bytes(base.data(py)).to_owned(); let core = CoreInnerRevlog::new( - vfs, + Box::new(FnCacheVfs::new( + base, + vfs_is_readonly, + Box::new(PyFnCache::new(fncache)), + )), index, index_file, data_file,