Mercurial > public > mercurial-scm > hg
comparison rust/hg-cpython/src/revlog.rs @ 52172:72bc29f01570
revlog: add glue to use a pure-Rust VFS
This will save us a lot of calling back into Python, which is always
horribly expensive.
We are now faster in all benchmarked cases except for `log --patch`
specifically on mozilla-try. Fixing this will happen in a later patch.
```
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 1.760765 ~~~~~
5559d7e63ec3: 1.555513 (-11.66%, -0.21)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 62.848869 ~~~~~
5559d7e63ec3: 58.113051 (-7.54%, -4.74)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 3.173532 ~~~~~
5559d7e63ec3: 3.543591 (+11.66%, +0.37)
### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 1.214698 ~~~~~
5559d7e63ec3: 1.192478 (-1.83%, -0.02)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 56.205474 ~~~~~
5559d7e63ec3: 51.520074 (-8.34%, -4.69)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 2.105419 ~~~~~
5559d7e63ec3: 2.051849 (-2.54%, -0.05)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 0.309960 ~~~~~
5559d7e63ec3: 0.299035 (-3.52%, -0.01)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.cat
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.files = all-root
# benchmark.variants.output = plain
# benchmark.variants.rev = tip
e679697a6ca4: 1.849832 ~~~~~
5559d7e63ec3: 1.805076 (-2.42%, -0.04)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 10
# benchmark.variants.patch = yes
# benchmark.variants.rev = none
e679697a6ca4: 0.289521 ~~~~~
5559d7e63ec3: 0.279889 (-3.33%, -0.01)
### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm
# benchmark.name = hg.command.log
# bin-env-vars.hg.flavor = rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.limit-rev = 1000
# benchmark.variants.patch = no
# benchmark.variants.rev = none
e679697a6ca4: 0.332270 ~~~~~
5559d7e63ec3: 0.323324 (-2.69%, -0.01)
```
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Mon, 29 Jul 2024 20:39:34 +0200 |
parents | 7346f93be7a4 |
children | bcd4962e0df9 |
comparison
equal
deleted
inserted
replaced
52171:bd43465af568 | 52172:72bc29f01570 |
---|---|
8 | 8 |
9 use crate::{ | 9 use crate::{ |
10 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, | 10 conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, |
11 pybytes_deref::{PyBufferDeref, PyBytesDeref}, | 11 pybytes_deref::{PyBufferDeref, PyBytesDeref}, |
12 utils::{node_from_py_bytes, node_from_py_object}, | 12 utils::{node_from_py_bytes, node_from_py_object}, |
13 vfs::PyVfs, | |
14 PyRevision, | 13 PyRevision, |
15 }; | 14 }; |
16 use cpython::{ | 15 use cpython::{ |
17 buffer::{Element, PyBuffer}, | 16 buffer::{Element, PyBuffer}, |
18 exc::{IndexError, ValueError}, | 17 exc::{IndexError, ValueError}, |
20 PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python, | 19 PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python, |
21 PythonObject, ToPyObject, UnsafePyLeaked, | 20 PythonObject, ToPyObject, UnsafePyLeaked, |
22 }; | 21 }; |
23 use hg::{ | 22 use hg::{ |
24 errors::HgError, | 23 errors::HgError, |
24 fncache::FnCache, | |
25 index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE}, | 25 index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE}, |
26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, | 26 nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, |
27 revlog::compression::CompressionConfig, | 27 revlog::{ |
28 revlog::inner_revlog::InnerRevlog as CoreInnerRevlog, | 28 compression::CompressionConfig, |
29 revlog::inner_revlog::RevisionBuffer, | 29 inner_revlog::{InnerRevlog as CoreInnerRevlog, RevisionBuffer}, |
30 revlog::options::{ | 30 nodemap::NodeMap, |
31 RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, | 31 options::{ |
32 RevlogOpenOptions, | 32 RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig, |
33 RevlogOpenOptions, | |
34 }, | |
35 Graph, NodePrefix, RevlogError, RevlogIndex, | |
33 }, | 36 }, |
34 revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, | |
35 transaction::Transaction, | 37 transaction::Transaction, |
36 utils::files::{get_bytes_from_path, get_path_from_bytes}, | 38 utils::files::{get_bytes_from_path, get_path_from_bytes}, |
39 vfs::FnCacheVfs, | |
37 BaseRevision, Node, Revision, RevlogType, UncheckedRevision, | 40 BaseRevision, Node, Revision, RevlogType, UncheckedRevision, |
38 NULL_REVISION, | 41 NULL_REVISION, |
39 }; | 42 }; |
40 use std::{ | 43 use std::{ |
41 cell::{Cell, RefCell}, | 44 cell::{Cell, RefCell}, |
42 collections::{HashMap, HashSet}, | 45 collections::{HashMap, HashSet}, |
46 sync::atomic::{AtomicBool, Ordering}, | |
43 sync::OnceLock, | 47 sync::OnceLock, |
44 }; | 48 }; |
45 use vcsgraph::graph::Graph as VCSGraph; | 49 use vcsgraph::graph::Graph as VCSGraph; |
46 | 50 |
47 pub struct PySharedIndex { | 51 pub struct PySharedIndex { |
645 .exit_writing_context(); | 649 .exit_writing_context(); |
646 Ok(py.None()) | 650 Ok(py.None()) |
647 } | 651 } |
648 }); | 652 }); |
649 | 653 |
654 struct PyFnCache { | |
655 fncache: PyObject, | |
656 } | |
657 impl PyFnCache { | |
658 fn new(fncache: PyObject) -> Self { | |
659 Self { fncache } | |
660 } | |
661 } | |
662 | |
663 impl Clone for PyFnCache { | |
664 fn clone(&self) -> Self { | |
665 let gil = Python::acquire_gil(); | |
666 let py = gil.python(); | |
667 Self { | |
668 fncache: self.fncache.clone_ref(py), | |
669 } | |
670 } | |
671 } | |
672 | |
673 /// Cache whether the fncache is loaded to avoid Python round-trip every time. | |
674 /// Once the fncache is loaded, it stays loaded unless we're in a very | |
675 /// long-running process, none of which we actually support for now. | |
676 static FN_CACHE_IS_LOADED: AtomicBool = AtomicBool::new(false); | |
677 | |
678 impl FnCache for PyFnCache { | |
679 fn is_loaded(&self) -> bool { | |
680 if FN_CACHE_IS_LOADED.load(Ordering::Relaxed) { | |
681 return true; | |
682 } | |
683 let gil = Python::acquire_gil(); | |
684 let py = gil.python(); | |
685 // TODO raise in case of error? | |
686 let is_loaded = self | |
687 .fncache | |
688 .getattr(py, "is_loaded") | |
689 .ok() | |
690 .map(|o| { | |
691 o.extract::<bool>(py) | |
692 .expect("is_loaded returned something other than a bool") | |
693 }) | |
694 .unwrap_or(false); | |
695 if is_loaded { | |
696 FN_CACHE_IS_LOADED.store(true, Ordering::Relaxed); | |
697 } | |
698 is_loaded | |
699 } | |
700 fn add(&self, path: &std::path::Path) { | |
701 let gil = Python::acquire_gil(); | |
702 let py = gil.python(); | |
703 // TODO raise in case of error? | |
704 self.fncache | |
705 .call_method( | |
706 py, | |
707 "add", | |
708 (PyBytes::new(py, &get_bytes_from_path(path)),), | |
709 None, | |
710 ) | |
711 .ok(); | |
712 } | |
713 } | |
714 | |
650 py_class!(pub class InnerRevlog |py| { | 715 py_class!(pub class InnerRevlog |py| { |
651 @shared data inner: CoreInnerRevlog; | 716 @shared data inner: CoreInnerRevlog; |
652 data nt: RefCell<Option<CoreNodeTree>>; | 717 data nt: RefCell<Option<CoreNodeTree>>; |
653 data docket: RefCell<Option<PyObject>>; | 718 data docket: RefCell<Option<PyObject>>; |
654 // Holds a reference to the mmap'ed persistent nodemap data | 719 // Holds a reference to the mmap'ed persistent nodemap data |
659 data head_node_ids_py_list: RefCell<Option<PyList>>; | 724 data head_node_ids_py_list: RefCell<Option<PyList>>; |
660 data revision_cache: RefCell<Option<PyObject>>; | 725 data revision_cache: RefCell<Option<PyObject>>; |
661 | 726 |
662 def __new__( | 727 def __new__( |
663 _cls, | 728 _cls, |
664 opener: PyObject, | 729 vfs_base: PyObject, |
730 fncache: PyObject, | |
731 vfs_is_readonly: bool, | |
665 index_data: PyObject, | 732 index_data: PyObject, |
666 index_file: PyObject, | 733 index_file: PyObject, |
667 data_file: PyObject, | 734 data_file: PyObject, |
668 sidedata_file: PyObject, | 735 sidedata_file: PyObject, |
669 inline: bool, | 736 inline: bool, |
674 default_compression_header: PyObject, | 741 default_compression_header: PyObject, |
675 revlog_type: usize, | 742 revlog_type: usize, |
676 ) -> PyResult<Self> { | 743 ) -> PyResult<Self> { |
677 Self::inner_new( | 744 Self::inner_new( |
678 py, | 745 py, |
679 opener, | 746 vfs_base, |
747 fncache, | |
748 vfs_is_readonly, | |
680 index_data, | 749 index_data, |
681 index_file, | 750 index_file, |
682 data_file, | 751 data_file, |
683 sidedata_file, | 752 sidedata_file, |
684 inline, | 753 inline, |
1872 } | 1941 } |
1873 | 1942 |
1874 impl InnerRevlog { | 1943 impl InnerRevlog { |
1875 pub fn inner_new( | 1944 pub fn inner_new( |
1876 py: Python, | 1945 py: Python, |
1877 opener: PyObject, | 1946 vfs_base: PyObject, |
1947 fncache: PyObject, | |
1948 vfs_is_readonly: bool, | |
1878 index_data: PyObject, | 1949 index_data: PyObject, |
1879 index_file: PyObject, | 1950 index_file: PyObject, |
1880 data_file: PyObject, | 1951 data_file: PyObject, |
1881 _sidedata_file: PyObject, | 1952 _sidedata_file: PyObject, |
1882 inline: bool, | 1953 inline: bool, |
1885 feature_config: PyObject, | 1956 feature_config: PyObject, |
1886 _chunk_cache: PyObject, | 1957 _chunk_cache: PyObject, |
1887 _default_compression_header: PyObject, | 1958 _default_compression_header: PyObject, |
1888 revlog_type: usize, | 1959 revlog_type: usize, |
1889 ) -> PyResult<Self> { | 1960 ) -> PyResult<Self> { |
1890 let vfs = Box::new(PyVfs::new(py, opener)?); | |
1891 let index_file = | 1961 let index_file = |
1892 get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py)) | 1962 get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py)) |
1893 .to_owned(); | 1963 .to_owned(); |
1894 let data_file = | 1964 let data_file = |
1895 get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py)) | 1965 get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py)) |
1905 inline, | 1975 inline, |
1906 data_config, | 1976 data_config, |
1907 delta_config, | 1977 delta_config, |
1908 feature_config, | 1978 feature_config, |
1909 ); | 1979 ); |
1980 | |
1910 // Safety: we keep the buffer around inside the class as `index_mmap` | 1981 // Safety: we keep the buffer around inside the class as `index_mmap` |
1911 let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? }; | 1982 let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? }; |
1912 let index = hg::index::Index::new(bytes, options.index_header()) | 1983 let index = hg::index::Index::new(bytes, options.index_header()) |
1913 .map_err(|e| revlog_error_from_msg(py, e))?; | 1984 .map_err(|e| revlog_error_from_msg(py, e))?; |
1985 | |
1986 let base = &vfs_base.extract::<PyBytes>(py)?; | |
1987 let base = get_path_from_bytes(base.data(py)).to_owned(); | |
1914 let core = CoreInnerRevlog::new( | 1988 let core = CoreInnerRevlog::new( |
1915 vfs, | 1989 Box::new(FnCacheVfs::new( |
1990 base, | |
1991 vfs_is_readonly, | |
1992 Box::new(PyFnCache::new(fncache)), | |
1993 )), | |
1916 index, | 1994 index, |
1917 index_file, | 1995 index_file, |
1918 data_file, | 1996 data_file, |
1919 data_config, | 1997 data_config, |
1920 delta_config, | 1998 delta_config, |