comparison rust/hg-cpython/src/revlog.rs @ 52172:72bc29f01570

revlog: add glue to use a pure-Rust VFS This will save us a lot of calling back into Python, which is always horribly expensive. We are now faster in all benchmarked cases except for `log --patch` specifically on mozilla-try. Fixing this will happen in a later patch. ``` ### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 1.760765 ~~~~~ 5559d7e63ec3: 1.555513 (-11.66%, -0.21) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 62.848869 ~~~~~ 5559d7e63ec3: 58.113051 (-7.54%, -4.74) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 3.173532 ~~~~~ 5559d7e63ec3: 3.543591 (+11.66%, +0.37) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 1.214698 ~~~~~ 5559d7e63ec3: 1.192478 (-1.83%, -0.02) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 56.205474 ~~~~~ 5559d7e63ec3: 51.520074 (-8.34%, -4.69) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 2.105419 ~~~~~ 5559d7e63ec3: 2.051849 (-2.54%, -0.05) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 0.309960 ~~~~~ 5559d7e63ec3: 0.299035 (-3.52%, -0.01) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 1.849832 ~~~~~ 5559d7e63ec3: 1.805076 (-2.42%, -0.04) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 0.289521 ~~~~~ 5559d7e63ec3: 0.279889 (-3.33%, -0.01) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 0.332270 ~~~~~ 5559d7e63ec3: 0.323324 (-2.69%, -0.01) ```
author Rapha?l Gom?s <rgomes@octobus.net>
date Mon, 29 Jul 2024 20:39:34 +0200
parents 7346f93be7a4
children bcd4962e0df9
comparison
equal deleted inserted replaced
52171:bd43465af568 52172:72bc29f01570
8 8
9 use crate::{ 9 use crate::{
10 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, 10 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
11 pybytes_deref::{PyBufferDeref, PyBytesDeref}, 11 pybytes_deref::{PyBufferDeref, PyBytesDeref},
12 utils::{node_from_py_bytes, node_from_py_object}, 12 utils::{node_from_py_bytes, node_from_py_object},
13 vfs::PyVfs,
14 PyRevision, 13 PyRevision,
15 }; 14 };
16 use cpython::{ 15 use cpython::{
17 buffer::{Element, PyBuffer}, 16 buffer::{Element, PyBuffer},
18 exc::{IndexError, ValueError}, 17 exc::{IndexError, ValueError},
20 PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python, 19 PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python,
21 PythonObject, ToPyObject, UnsafePyLeaked, 20 PythonObject, ToPyObject, UnsafePyLeaked,
22 }; 21 };
23 use hg::{ 22 use hg::{
24 errors::HgError, 23 errors::HgError,
24 fncache::FnCache,
25 index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE}, 25 index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE},
26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, 26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
27 revlog::compression::CompressionConfig, 27 revlog::{
28 revlog::inner_revlog::InnerRevlog as CoreInnerRevlog, 28 compression::CompressionConfig,
29 revlog::inner_revlog::RevisionBuffer, 29 inner_revlog::{InnerRevlog as CoreInnerRevlog, RevisionBuffer},
30 revlog::options::{ 30 nodemap::NodeMap,
31 RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, 31 options::{
32 RevlogOpenOptions, 32 RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
33 RevlogOpenOptions,
34 },
35 Graph, NodePrefix, RevlogError, RevlogIndex,
33 }, 36 },
34 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
35 transaction::Transaction, 37 transaction::Transaction,
36 utils::files::{get_bytes_from_path, get_path_from_bytes}, 38 utils::files::{get_bytes_from_path, get_path_from_bytes},
39 vfs::FnCacheVfs,
37 BaseRevision, Node, Revision, RevlogType, UncheckedRevision, 40 BaseRevision, Node, Revision, RevlogType, UncheckedRevision,
38 NULL_REVISION, 41 NULL_REVISION,
39 }; 42 };
40 use std::{ 43 use std::{
41 cell::{Cell, RefCell}, 44 cell::{Cell, RefCell},
42 collections::{HashMap, HashSet}, 45 collections::{HashMap, HashSet},
46 sync::atomic::{AtomicBool, Ordering},
43 sync::OnceLock, 47 sync::OnceLock,
44 }; 48 };
45 use vcsgraph::graph::Graph as VCSGraph; 49 use vcsgraph::graph::Graph as VCSGraph;
46 50
47 pub struct PySharedIndex { 51 pub struct PySharedIndex {
645 .exit_writing_context(); 649 .exit_writing_context();
646 Ok(py.None()) 650 Ok(py.None())
647 } 651 }
648 }); 652 });
649 653
654 struct PyFnCache {
655 fncache: PyObject,
656 }
657 impl PyFnCache {
658 fn new(fncache: PyObject) -> Self {
659 Self { fncache }
660 }
661 }
662
663 impl Clone for PyFnCache {
664 fn clone(&self) -> Self {
665 let gil = Python::acquire_gil();
666 let py = gil.python();
667 Self {
668 fncache: self.fncache.clone_ref(py),
669 }
670 }
671 }
672
673 /// Cache whether the fncache is loaded to avoid Python round-trip every time.
674 /// Once the fncache is loaded, it stays loaded unless we're in a very
675 /// long-running process, none of which we actually support for now.
676 static FN_CACHE_IS_LOADED: AtomicBool = AtomicBool::new(false);
677
678 impl FnCache for PyFnCache {
679 fn is_loaded(&self) -> bool {
680 if FN_CACHE_IS_LOADED.load(Ordering::Relaxed) {
681 return true;
682 }
683 let gil = Python::acquire_gil();
684 let py = gil.python();
685 // TODO raise in case of error?
686 let is_loaded = self
687 .fncache
688 .getattr(py, "is_loaded")
689 .ok()
690 .map(|o| {
691 o.extract::<bool>(py)
692 .expect("is_loaded returned something other than a bool")
693 })
694 .unwrap_or(false);
695 if is_loaded {
696 FN_CACHE_IS_LOADED.store(true, Ordering::Relaxed);
697 }
698 is_loaded
699 }
700 fn add(&self, path: &std::path::Path) {
701 let gil = Python::acquire_gil();
702 let py = gil.python();
703 // TODO raise in case of error?
704 self.fncache
705 .call_method(
706 py,
707 "add",
708 (PyBytes::new(py, &get_bytes_from_path(path)),),
709 None,
710 )
711 .ok();
712 }
713 }
714
650 py_class!(pub class InnerRevlog |py| { 715 py_class!(pub class InnerRevlog |py| {
651 @shared data inner: CoreInnerRevlog; 716 @shared data inner: CoreInnerRevlog;
652 data nt: RefCell<Option<CoreNodeTree>>; 717 data nt: RefCell<Option<CoreNodeTree>>;
653 data docket: RefCell<Option<PyObject>>; 718 data docket: RefCell<Option<PyObject>>;
654 // Holds a reference to the mmap'ed persistent nodemap data 719 // Holds a reference to the mmap'ed persistent nodemap data
659 data head_node_ids_py_list: RefCell<Option<PyList>>; 724 data head_node_ids_py_list: RefCell<Option<PyList>>;
660 data revision_cache: RefCell<Option<PyObject>>; 725 data revision_cache: RefCell<Option<PyObject>>;
661 726
662 def __new__( 727 def __new__(
663 _cls, 728 _cls,
664 opener: PyObject, 729 vfs_base: PyObject,
730 fncache: PyObject,
731 vfs_is_readonly: bool,
665 index_data: PyObject, 732 index_data: PyObject,
666 index_file: PyObject, 733 index_file: PyObject,
667 data_file: PyObject, 734 data_file: PyObject,
668 sidedata_file: PyObject, 735 sidedata_file: PyObject,
669 inline: bool, 736 inline: bool,
674 default_compression_header: PyObject, 741 default_compression_header: PyObject,
675 revlog_type: usize, 742 revlog_type: usize,
676 ) -> PyResult<Self> { 743 ) -> PyResult<Self> {
677 Self::inner_new( 744 Self::inner_new(
678 py, 745 py,
679 opener, 746 vfs_base,
747 fncache,
748 vfs_is_readonly,
680 index_data, 749 index_data,
681 index_file, 750 index_file,
682 data_file, 751 data_file,
683 sidedata_file, 752 sidedata_file,
684 inline, 753 inline,
1872 } 1941 }
1873 1942
1874 impl InnerRevlog { 1943 impl InnerRevlog {
1875 pub fn inner_new( 1944 pub fn inner_new(
1876 py: Python, 1945 py: Python,
1877 opener: PyObject, 1946 vfs_base: PyObject,
1947 fncache: PyObject,
1948 vfs_is_readonly: bool,
1878 index_data: PyObject, 1949 index_data: PyObject,
1879 index_file: PyObject, 1950 index_file: PyObject,
1880 data_file: PyObject, 1951 data_file: PyObject,
1881 _sidedata_file: PyObject, 1952 _sidedata_file: PyObject,
1882 inline: bool, 1953 inline: bool,
1885 feature_config: PyObject, 1956 feature_config: PyObject,
1886 _chunk_cache: PyObject, 1957 _chunk_cache: PyObject,
1887 _default_compression_header: PyObject, 1958 _default_compression_header: PyObject,
1888 revlog_type: usize, 1959 revlog_type: usize,
1889 ) -> PyResult<Self> { 1960 ) -> PyResult<Self> {
1890 let vfs = Box::new(PyVfs::new(py, opener)?);
1891 let index_file = 1961 let index_file =
1892 get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py)) 1962 get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
1893 .to_owned(); 1963 .to_owned();
1894 let data_file = 1964 let data_file =
1895 get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py)) 1965 get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py))
1905 inline, 1975 inline,
1906 data_config, 1976 data_config,
1907 delta_config, 1977 delta_config,
1908 feature_config, 1978 feature_config,
1909 ); 1979 );
1980
1910 // Safety: we keep the buffer around inside the class as `index_mmap` 1981 // Safety: we keep the buffer around inside the class as `index_mmap`
1911 let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? }; 1982 let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
1912 let index = hg::index::Index::new(bytes, options.index_header()) 1983 let index = hg::index::Index::new(bytes, options.index_header())
1913 .map_err(|e| revlog_error_from_msg(py, e))?; 1984 .map_err(|e| revlog_error_from_msg(py, e))?;
1985
1986 let base = &vfs_base.extract::<PyBytes>(py)?;
1987 let base = get_path_from_bytes(base.data(py)).to_owned();
1914 let core = CoreInnerRevlog::new( 1988 let core = CoreInnerRevlog::new(
1915 vfs, 1989 Box::new(FnCacheVfs::new(
1990 base,
1991 vfs_is_readonly,
1992 Box::new(PyFnCache::new(fncache)),
1993 )),
1916 index, 1994 index,
1917 index_file, 1995 index_file,
1918 data_file, 1996 data_file,
1919 data_config, 1997 data_config,
1920 delta_config, 1998 delta_config,