rust/hg-cpython/src/revlog.rs
changeset 52163 7346f93be7a4
parent 52158 f2eab4967bfc
child 52172 72bc29f01570
--- a/rust/hg-cpython/src/revlog.rs	Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Wed Jun 19 19:10:49 2024 +0200
@@ -4,32 +4,43 @@
 //
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
+#![allow(non_snake_case)]
 
 use crate::{
     conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
+    pybytes_deref::{PyBufferDeref, PyBytesDeref},
     utils::{node_from_py_bytes, node_from_py_object},
+    vfs::PyVfs,
     PyRevision,
 };
 use cpython::{
     buffer::{Element, PyBuffer},
     exc::{IndexError, ValueError},
     ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
-    PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
+    PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python,
     PythonObject, ToPyObject, UnsafePyLeaked,
 };
 use hg::{
     errors::HgError,
-    index::{
-        IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
-        INDEX_ENTRY_SIZE,
+    index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE},
+    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
+    revlog::compression::CompressionConfig,
+    revlog::inner_revlog::InnerRevlog as CoreInnerRevlog,
+    revlog::inner_revlog::RevisionBuffer,
+    revlog::options::{
+        RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
+        RevlogOpenOptions,
     },
-    nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
     revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
-    BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
+    transaction::Transaction,
+    utils::files::{get_bytes_from_path, get_path_from_bytes},
+    BaseRevision, Node, Revision, RevlogType, UncheckedRevision,
+    NULL_REVISION,
 };
 use std::{
-    cell::RefCell,
+    cell::{Cell, RefCell},
     collections::{HashMap, HashSet},
+    sync::OnceLock,
 };
 use vcsgraph::graph::Graph as VCSGraph;
 
@@ -41,12 +52,13 @@
 /// Return a Struct implementing the Graph trait
 pub(crate) fn py_rust_index_to_graph(
     py: Python,
-    index: PyObject,
+    index_proxy: PyObject,
 ) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
-    let midx = index.extract::<Index>(py)?;
-    let leaked = midx.index(py).leak_immutable();
+    let inner_revlog = index_proxy.getattr(py, "inner")?;
+    let inner_revlog = inner_revlog.extract::<InnerRevlog>(py)?;
+    let leaked = inner_revlog.inner(py).leak_immutable();
     // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
-    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
+    Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: &idx.index }) })
 }
 
 impl Clone for PySharedIndex {
@@ -91,398 +103,6 @@
     }
 }
 
-py_class!(pub class Index |py| {
-    @shared data index: hg::index::Index;
-    data nt: RefCell<Option<CoreNodeTree>>;
-    data docket: RefCell<Option<PyObject>>;
-    // Holds a reference to the mmap'ed persistent nodemap data
-    data nodemap_mmap: RefCell<Option<PyBuffer>>;
-    // Holds a reference to the mmap'ed persistent index data
-    data index_mmap: RefCell<Option<PyBuffer>>;
-    data head_revs_py_list: RefCell<Option<PyList>>;
-    data head_node_ids_py_list: RefCell<Option<PyList>>;
-
-    def __new__(
-        _cls,
-        data: PyObject,
-        default_header: u32,
-    ) -> PyResult<Self> {
-        Self::new(py, data, default_header)
-    }
-
-    /// Compatibility layer used for Python consumers needing access to the C index
-    ///
-    /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
-    /// that may need to build a custom `nodetree`, based on a specified revset.
-    /// With a Rust implementation of the nodemap, we will be able to get rid of
-    /// this, by exposing our own standalone nodemap class,
-    /// ready to accept `Index`.
-/*    def get_cindex(&self) -> PyResult<PyObject> {
-        Ok(self.cindex(py).borrow().inner().clone_ref(py))
-    }
-*/
-    // Index API involving nodemap, as defined in mercurial/pure/parsers.py
-
-    /// Return Revision if found, raises a bare `error.RevlogError`
-    /// in case of ambiguity, same as C version does
-    def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let ridx = &*self.index(py).borrow();
-        let node = node_from_py_bytes(py, &node)?;
-        let rust_rev =
-            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
-        Ok(rust_rev.map(Into::into))
-
-    }
-
-    /// same as `get_rev()` but raises a bare `error.RevlogError` if node
-    /// is not found.
-    ///
-    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
-    /// will catch and rewrap with it
-    def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
-        self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
-    }
-
-    /// return True if the node exist in the index
-    def has_node(&self, node: PyBytes) -> PyResult<bool> {
-        // TODO OPTIM we could avoid a needless conversion here,
-        // to do when scaffolding for pure Rust switch is removed,
-        // as `get_rev()` currently does the necessary assertions
-        self.get_rev(py, node).map(|opt| opt.is_some())
-    }
-
-    /// find length of shortest hex nodeid of a binary ID
-    def shortest(&self, node: PyBytes) -> PyResult<usize> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
-        {
-            Ok(Some(l)) => Ok(l),
-            Ok(None) => Err(revlog_error(py)),
-            Err(e) => Err(nodemap_error(py, e)),
-        }
-    }
-
-    def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
-        let idx = &*self.index(py).borrow();
-
-        let node_as_string = if cfg!(feature = "python3-sys") {
-            node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
-        }
-        else {
-            let node = node.extract::<PyBytes>(py)?;
-            String::from_utf8_lossy(node.data(py)).to_string()
-        };
-
-        let prefix = NodePrefix::from_hex(&node_as_string)
-            .map_err(|_| PyErr::new::<ValueError, _>(
-                py, format!("Invalid node or prefix '{}'", node_as_string))
-            )?;
-
-        nt.find_bin(idx, prefix)
-            // TODO make an inner API returning the node directly
-            .map(|opt| opt.map(
-                |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
-            .map_err(|e| nodemap_error(py, e))
-
-    }
-
-    /// append an index entry
-    def append(&self, tup: PyTuple) -> PyResult<PyObject> {
-        if tup.len(py) < 8 {
-            // this is better than the panic promised by tup.get_item()
-            return Err(
-                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
-        }
-        let node_bytes = tup.get_item(py, 7).extract(py)?;
-        let node = node_from_py_object(py, &node_bytes)?;
-
-        let rev = self.len(py)? as BaseRevision;
-
-        // This is ok since we will just add the revision to the index
-        let rev = Revision(rev);
-        self.index(py)
-            .borrow_mut()
-            .append(py_tuple_to_revision_data_params(py, tup)?)
-            .unwrap();
-        let idx = &*self.index(py).borrow();
-        self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
-            .insert(idx, &node, rev)
-            .map_err(|e| nodemap_error(py, e))?;
-        Ok(py.None())
-    }
-
-    def __delitem__(&self, key: PyObject) -> PyResult<()> {
-        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
-        let start = if let Ok(rev) = key.extract(py) {
-            UncheckedRevision(rev)
-        } else {
-            let start = key.getattr(py, "start")?;
-            UncheckedRevision(start.extract(py)?)
-        };
-        let start = self.index(py)
-            .borrow()
-            .check_revision(start)
-            .ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
-            })?;
-        self.index(py).borrow_mut().remove(start).unwrap();
-        let mut opt = self.get_nodetree(py)?.borrow_mut();
-        let nt = opt.as_mut().unwrap();
-        nt.invalidate_all();
-        self.fill_nodemap(py, nt)?;
-        Ok(())
-    }
-
-    //
-    // Index methods previously reforwarded to C index (tp_methods)
-    // Same ordering as in revlog.c
-    //
-
-    /// return the gca set of the given revs
-    def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_ancestors(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// return the heads of the common ancestors of the given revs
-    def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_commonancestorsheads(py, args)?;
-        Ok(rust_res)
-    }
-
-    /// Clear the index caches and inner py_class data.
-    /// It is Python's responsibility to call `update_nodemap_data` again.
-    def clearcaches(&self) -> PyResult<PyObject> {
-        self.nt(py).borrow_mut().take();
-        self.docket(py).borrow_mut().take();
-        self.nodemap_mmap(py).borrow_mut().take();
-        self.head_revs_py_list(py).borrow_mut().take();
-        self.head_node_ids_py_list(py).borrow_mut().take();
-        self.index(py).borrow().clear_caches();
-        Ok(py.None())
-    }
-
-    /// return the raw binary string representing a revision
-    def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
-        let rust_bytes = rindex.check_revision(rev).and_then(
-            |r| rindex.entry_binary(r))
-            .ok_or_else(|| rev_not_in_index(py, rev))?;
-        let rust_res = PyBytes::new(py, rust_bytes).into_object();
-        Ok(rust_res)
-    }
-
-    /// return a binary packed version of the header
-    def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rindex = self.index(py).borrow();
-        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
-        let rust_res = PyBytes::new(py, &packed).into_object();
-        Ok(rust_res)
-    }
-
-    /// compute phases
-    def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
-        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
-        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
-        Ok(rust_res)
-    }
-
-    /// reachableroots
-    def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_reachableroots2(
-            py,
-            UncheckedRevision(args.get_item(py, 0).extract(py)?),
-            args.get_item(py, 1),
-            args.get_item(py, 2),
-            args.get_item(py, 3).extract(py)?,
-        )?;
-        Ok(rust_res)
-    }
-
-    /// get head revisions
-    def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
-        let (filtered_revs, stop_rev) = match &args.len(py) {
-             0 => Ok((py.None(), py.None())),
-             1 => Ok((args.get_item(py, 0), py.None())),
-             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
-             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
-        }?;
-        self.inner_headrevs(py, &filtered_revs, &stop_rev)
-    }
-
-    /// get head nodeids
-    def head_node_ids(&self) -> PyResult<PyObject> {
-        let rust_res = self.inner_head_node_ids(py)?;
-        Ok(rust_res)
-    }
-
-    /// get diff in head revisions
-    def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_headrevsdiff(
-          py,
-          &args.get_item(py, 0),
-          &args.get_item(py, 1))?;
-        Ok(rust_res)
-    }
-
-    /// True if the object is a snapshot
-    def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
-        let index = self.index(py).borrow();
-        let result = index
-            .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
-            .map_err(|e| {
-                PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-            })?;
-        Ok(result)
-    }
-
-    /// Gather snapshot data in a cache dict
-    def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let cache: PyDict = args.get_item(py, 0).extract(py)?;
-        // this methods operates by setting new values in the cache,
-        // hence we will compare results by letting the C implementation
-        // operate over a deepcopy of the cache, and finally compare both
-        // caches.
-        let c_cache = PyDict::new(py);
-        for (k, v) in cache.items(py) {
-            c_cache.set_item(py, k, PySet::new(py, v)?)?;
-        }
-
-        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
-        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
-        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
-        index.find_snapshots(
-            start_rev,
-            end_rev,
-            &mut cache_wrapper,
-        ).map_err(|_| revlog_error(py))?;
-        Ok(py.None())
-    }
-
-    /// determine revisions with deltas to reconstruct fulltext
-    def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
-        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
-        let stop_rev =
-            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
-        let rev = index.check_revision(rev).ok_or_else(|| {
-            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
-        })?;
-        let stop_rev = if let Some(stop_rev) = stop_rev {
-            let stop_rev = UncheckedRevision(stop_rev);
-            Some(index.check_revision(stop_rev).ok_or_else(|| {
-                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
-            })?)
-        } else {None};
-        let using_general_delta = args.get_item(py, 2)
-            .extract::<Option<u32>>(py)?
-            .map(|i| i != 0);
-        let (chain, stopped) = index.delta_chain(
-            rev, stop_rev, using_general_delta
-        ).map_err(|e| {
-            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
-        })?;
-
-        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
-        Ok(
-            PyTuple::new(
-                py,
-                &[
-                    chain.into_py_object(py).into_object(),
-                    stopped.into_py_object(py).into_object()
-                ]
-            ).into_object()
-        )
-
-    }
-
-    /// slice planned chunk read to reach a density threshold
-    def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
-        let rust_res = self.inner_slicechunktodensity(
-            py,
-            args.get_item(py, 0),
-            args.get_item(py, 1).extract(py)?,
-            args.get_item(py, 2).extract(py)?
-        )?;
-        Ok(rust_res)
-    }
-
-    // index_sequence_methods and index_mapping_methods.
-    //
-    // Since we call back through the high level Python API,
-    // there's no point making a distinction between index_get
-    // and index_getitem.
-    // gracinet 2023: this above is no longer true for the pure Rust impl
-
-    def __len__(&self) -> PyResult<usize> {
-        self.len(py)
-    }
-
-    def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
-        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
-        Ok(rust_res)
-    }
-
-    def __contains__(&self, item: PyObject) -> PyResult<bool> {
-        // ObjectProtocol does not seem to provide contains(), so
-        // this is an equivalent implementation of the index_contains()
-        // defined in revlog.c
-        match item.extract::<i32>(py) {
-            Ok(rev) => {
-                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
-            }
-            Err(_) => {
-                let item_bytes: PyBytes = item.extract(py)?;
-                let rust_res = self.has_node(py, item_bytes)?;
-                Ok(rust_res)
-            }
-        }
-    }
-
-    def nodemap_data_all(&self) -> PyResult<PyBytes> {
-        self.inner_nodemap_data_all(py)
-    }
-
-    def nodemap_data_incremental(&self) -> PyResult<PyObject> {
-        self.inner_nodemap_data_incremental(py)
-    }
-    def update_nodemap_data(
-        &self,
-        docket: PyObject,
-        nm_data: PyObject
-    ) -> PyResult<PyObject> {
-        self.inner_update_nodemap_data(py, docket, nm_data)
-    }
-
-    @property
-    def entry_size(&self) -> PyResult<PyInt> {
-        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def rust_ext_compat(&self) -> PyResult<PyInt> {
-        // will be entirely removed when the Rust index yet useful to
-        // implement in Rust to detangle things when removing `self.cindex`
-        let rust_res: PyInt = 1.to_py_object(py);
-        Ok(rust_res)
-    }
-
-    @property
-    def is_rust(&self) -> PyResult<PyBool> {
-        Ok(false.to_py_object(py))
-    }
-
-});
-
 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
 /// buffer along with the Rust slice into said buffer. We need to keep the
 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
@@ -538,7 +158,7 @@
         .extract::<PyBytes>(py)?
         .data(py)
         .try_into()
-        .unwrap();
+        .expect("nodeid should be set");
     let flags = (offset_or_flags & 0xFFFF) as u16;
     let data_offset = offset_or_flags >> 16;
     Ok(RevisionDataParams {
@@ -622,35 +242,1168 @@
     }
 }
 
-impl Index {
-    fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
-        // Safety: we keep the buffer around inside the class as `index_mmap`
-        let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
+// There are no static generics in Rust (because their implementation is hard,
+// I'm guessing it's due to different compilation stages, etc.).
+// So manually generate all three caches and use them in `with_filelog_cache`.
+static DELTA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDeltaConfig)> =
+    OnceLock::new();
+static DATA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDataConfig)> =
+    OnceLock::new();
+static FEATURE_CONFIG_CACHE: OnceLock<(PyObject, RevlogFeatureConfig)> =
+    OnceLock::new();
+
+/// Cache the first conversion from Python -> Rust config for all filelogs to
+/// save on conversion time when called in a loop.
+fn with_filelog_cache<T: Copy>(
+    py: Python,
+    py_config: &PyObject,
+    revlog_type: RevlogType,
+    cache: &OnceLock<(PyObject, T)>,
+    callback: impl Fn() -> PyResult<T>,
+) -> PyResult<T> {
+    let mut was_cached = false;
+    if revlog_type == RevlogType::Filelog {
+        if let Some((cached_py_config, rust_config)) = cache.get() {
+            was_cached = true;
+            // All filelogs in a given repository *most likely* have the
+            // exact same config, but it's not impossible that some extensions
+            // do some magic with configs or that this code will be used
+            // for longer-running processes. So compare the source `PyObject`
+            // in case the source changed, at the cost of some overhead.
+            // We can't use `py_config.eq(cached_py_config)` because all config
+            // objects are different in Python and `a is b` is false.
+            if py_config.compare(py, cached_py_config)?.is_eq() {
+                return Ok(*rust_config);
+            }
+        }
+    }
+    let config = callback()?;
+    // Do not call the lock unnecessarily if it's already been set.
+    if !was_cached && revlog_type == RevlogType::Filelog {
+        cache.set((py_config.clone_ref(py), config)).ok();
+    }
+    Ok(config)
+}
+
+fn extract_delta_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDeltaConfig> {
+    let get_delta_config = || {
+        let max_deltachain_span = py_config
+            .getattr(py, "max_deltachain_span")?
+            .extract::<i64>(py)?;
+
+        let revlog_delta_config = RevlogDeltaConfig {
+            general_delta: py_config
+                .getattr(py, "general_delta")?
+                .extract(py)?,
+            sparse_revlog: py_config
+                .getattr(py, "sparse_revlog")?
+                .extract(py)?,
+            max_chain_len: py_config
+                .getattr(py, "max_chain_len")?
+                .extract(py)?,
+            max_deltachain_span: if max_deltachain_span < 0 {
+                None
+            } else {
+                Some(max_deltachain_span as u64)
+            },
+            upper_bound_comp: py_config
+                .getattr(py, "upper_bound_comp")?
+                .extract(py)?,
+            delta_both_parents: py_config
+                .getattr(py, "delta_both_parents")?
+                .extract(py)?,
+            candidate_group_chunk_size: py_config
+                .getattr(py, "candidate_group_chunk_size")?
+                .extract(py)?,
+            debug_delta: py_config.getattr(py, "debug_delta")?.extract(py)?,
+            lazy_delta: py_config.getattr(py, "lazy_delta")?.extract(py)?,
+            lazy_delta_base: py_config
+                .getattr(py, "lazy_delta_base")?
+                .extract(py)?,
+        };
+        Ok(revlog_delta_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DELTA_CONFIG_CACHE,
+        get_delta_config,
+    )
+}
+
+fn extract_data_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogDataConfig> {
+    let get_data_config = || {
+        Ok(RevlogDataConfig {
+            try_pending: py_config.getattr(py, "try_pending")?.extract(py)?,
+            try_split: py_config.getattr(py, "try_split")?.extract(py)?,
+            check_ambig: py_config.getattr(py, "check_ambig")?.extract(py)?,
+            mmap_large_index: py_config
+                .getattr(py, "mmap_large_index")?
+                .extract(py)?,
+            mmap_index_threshold: py_config
+                .getattr(py, "mmap_index_threshold")?
+                .extract(py)?,
+            chunk_cache_size: py_config
+                .getattr(py, "chunk_cache_size")?
+                .extract(py)?,
+            uncompressed_cache_factor: py_config
+                .getattr(py, "uncompressed_cache_factor")?
+                .extract(py)?,
+            uncompressed_cache_count: py_config
+                .getattr(py, "uncompressed_cache_count")?
+                .extract(py)?,
+            with_sparse_read: py_config
+                .getattr(py, "with_sparse_read")?
+                .extract(py)?,
+            sr_density_threshold: py_config
+                .getattr(py, "sr_density_threshold")?
+                .extract(py)?,
+            sr_min_gap_size: py_config
+                .getattr(py, "sr_min_gap_size")?
+                .extract(py)?,
+            general_delta: py_config
+                .getattr(py, "generaldelta")?
+                .extract(py)?,
+        })
+    };
+
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &DATA_CONFIG_CACHE,
+        get_data_config,
+    )
+}
 
-        Self::create_instance(
+fn extract_feature_config(
+    py: Python,
+    py_config: PyObject,
+    revlog_type: RevlogType,
+) -> PyResult<RevlogFeatureConfig> {
+    let get_feature_config = || {
+        let engine_bytes = &py_config
+            .getattr(py, "compression_engine")?
+            .extract::<PyBytes>(py)?;
+        let compression_engine = engine_bytes.data(py);
+        let compression_engine = match compression_engine {
+            b"zlib" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zlib_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zlib.level"[..]));
+                let level = if let Some(level) = zlib_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    None
+                };
+                let mut engine = CompressionConfig::default();
+                if let Some(level) = level {
+                    engine
+                        .set_level(level)
+                        .expect("invalid compression level from Python");
+                }
+                engine
+            }
+            b"zstd" => {
+                let compression_options = &py_config
+                    .getattr(py, "compression_engine_options")?
+                    .extract::<PyDict>(py)?;
+                let zstd_level = compression_options
+                    .get_item(py, PyBytes::new(py, &b"zstd.level"[..]));
+                let level = if let Some(level) = zstd_level {
+                    if level.is_none(py) {
+                        None
+                    } else {
+                        Some(level.extract(py)?)
+                    }
+                } else {
+                    let level = compression_options
+                        .get_item(py, PyBytes::new(py, &b"level"[..]));
+                    if let Some(level) = level {
+                        if level.is_none(py) {
+                            None
+                        } else {
+                            Some(level.extract(py)?)
+                        }
+                    } else {
+                        None
+                    }
+                };
+                CompressionConfig::zstd(level)
+                    .expect("invalid compression level from Python")
+            }
+            b"none" => CompressionConfig::None,
+            e => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    format!(
+                        "invalid compression engine {}",
+                        String::from_utf8_lossy(e)
+                    ),
+                ))
+            }
+        };
+        let revlog_feature_config = RevlogFeatureConfig {
+            compression_engine,
+            censorable: py_config.getattr(py, "censorable")?.extract(py)?,
+            has_side_data: py_config
+                .getattr(py, "has_side_data")?
+                .extract(py)?,
+            compute_rank: py_config
+                .getattr(py, "compute_rank")?
+                .extract(py)?,
+            canonical_parent_order: py_config
+                .getattr(py, "canonical_parent_order")?
+                .extract(py)?,
+            enable_ellipsis: py_config
+                .getattr(py, "enable_ellipsis")?
+                .extract(py)?,
+        };
+        Ok(revlog_feature_config)
+    };
+    with_filelog_cache(
+        py,
+        &py_config,
+        revlog_type,
+        &FEATURE_CONFIG_CACHE,
+        get_feature_config,
+    )
+}
+
+fn revlog_error_from_msg(py: Python, e: impl ToString) -> PyErr {
+    let msg = e.to_string();
+
+    match py
+        .import("mercurial.error")
+        .and_then(|m| m.get(py, "RevlogError"))
+    {
+        Err(e) => e,
+        Ok(cls) => {
+            let msg = PyBytes::new(py, msg.as_bytes());
+            PyErr::from_instance(
+                py,
+                cls.call(py, (msg,), None).ok().into_py_object(py),
+            )
+        }
+    }
+}
+
+py_class!(pub class ReadingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .enter_reading_context()
+            .map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow()
+                .inner(py)
+                .borrow()
+                .exit_reading_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow()
+            .inner(py)
+            .borrow()
+            .exit_reading_context();
+        Ok(py.None())
+    }
+});
+
+// Only used from Python *tests*
+py_class!(pub class PyFileHandle |py| {
+    data inner_file: RefCell<std::os::fd::RawFd>;
+
+    def tell(&self) -> PyResult<PyObject> {
+        let locals = PyDict::new(py);
+        locals.set_item(py, "os", py.import("os")?)?;
+        locals.set_item(py, "fd", *self.inner_file(py).borrow())?;
+        let f = py.eval("os.fdopen(fd)", None, Some(&locals))?;
+
+        // Prevent Python from closing the file after garbage collecting.
+        // This is fine since Rust is still holding on to the actual File.
+        // (and also because it's only used in tests).
+        std::mem::forget(f.clone_ref(py));
+
+        locals.set_item(py, "f", f)?;
+        let res = py.eval("f.tell()", None, Some(&locals))?;
+        Ok(res)
+    }
+});
+
+/// Wrapper around a Python transaction object, to keep `hg-core` oblivious
+/// of the fact it's being called from Python.
+pub struct PyTransaction {
+    inner: PyObject,
+}
+
+impl PyTransaction {
+    pub fn new(inner: PyObject) -> Self {
+        Self { inner }
+    }
+}
+
+impl Clone for PyTransaction {
+    fn clone(&self) -> Self {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        Self {
+            inner: self.inner.clone_ref(py),
+        }
+    }
+}
+
+impl Transaction for PyTransaction {
+    fn add(&mut self, file: impl AsRef<std::path::Path>, offset: usize) {
+        let gil = &Python::acquire_gil();
+        let py = gil.python();
+        let file = PyBytes::new(py, &get_bytes_from_path(file.as_ref()));
+        self.inner
+            .call_method(py, "add", (file, offset), None)
+            .expect("transaction add failed");
+    }
+}
+
+py_class!(pub class WritingContextManager |py| {
+    data inner_revlog: RefCell<InnerRevlog>;
+    data transaction: RefCell<PyTransaction>;
+    data data_end: Cell<Option<usize>>;
+
+    def __enter__(&self) -> PyResult<PyObject> {
+        let res = self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .enter_writing_context(
+                self.data_end(py).get(),
+                &mut *self.transaction(py).borrow_mut()
+            ).map_err(|e| revlog_error_from_msg(py, e));
+        if let Err(e) = res {
+            // `__exit__` is not called from Python if `__enter__` fails
+            self.inner_revlog(py)
+                .borrow_mut()
+                .inner(py)
+                .borrow_mut()
+                .exit_writing_context();
+            return Err(e)
+        }
+        Ok(py.None())
+    }
+
+    def __exit__(
+        &self,
+        ty: Option<PyType>,
+        value: PyObject,
+        traceback: PyObject
+    ) -> PyResult<PyObject> {
+        // unused arguments, keep clippy from complaining without adding
+        // a general rule
+        let _ = ty;
+        let _ = value;
+        let _ = traceback;
+
+        self.inner_revlog(py)
+            .borrow_mut()
+            .inner(py)
+            .borrow_mut()
+            .exit_writing_context();
+        Ok(py.None())
+    }
+});
+
+py_class!(pub class InnerRevlog |py| {
+    @shared data inner: CoreInnerRevlog;
+    data nt: RefCell<Option<CoreNodeTree>>;
+    data docket: RefCell<Option<PyObject>>;
+    // Holds a reference to the mmap'ed persistent nodemap data
+    data nodemap_mmap: RefCell<Option<PyBuffer>>;
+    // Holds a reference to the mmap'ed persistent index data
+    data index_mmap: RefCell<PyBuffer>;
+    data head_revs_py_list: RefCell<Option<PyList>>;
+    data head_node_ids_py_list: RefCell<Option<PyList>>;
+    data revision_cache: RefCell<Option<PyObject>>;
+
+    def __new__(
+        _cls,
+        opener: PyObject,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        chunk_cache: PyObject,
+        default_compression_header: PyObject,
+        revlog_type: usize,
+    ) -> PyResult<Self> {
+        Self::inner_new(
             py,
-            hg::index::Index::new(
-                bytes,
-                IndexHeader::parse(&header.to_be_bytes())
-                    .expect("default header is broken"),
+            opener,
+            index_data,
+            index_file,
+            data_file,
+            sidedata_file,
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+            chunk_cache,
+            default_compression_header,
+            revlog_type
+        )
+    }
+
+    def clear_cache(&self) -> PyResult<PyObject> {
+        assert!(!self.is_delaying(py)?);
+        self.revision_cache(py).borrow_mut().take();
+        self.inner(py).borrow_mut().clear_cache();
+        Ok(py.None())
+    }
+
+    @property def canonical_index_file(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py).borrow().canonical_index_file();
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    @property def is_delaying(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_delaying())
+    }
+
+    @property def _revisioncache(&self) -> PyResult<PyObject> {
+        let cache = &*self.revision_cache(py).borrow();
+        match cache {
+            None => Ok(py.None()),
+            Some(cache) => {
+                Ok(cache.clone_ref(py))
+            }
+        }
+
+    }
+
+    @property def _writinghandles(&self) -> PyResult<PyObject> {
+        use std::os::fd::AsRawFd;
+
+        let inner = self.inner(py).borrow();
+        let handles = inner.python_writing_handles();
+
+        match handles.as_ref() {
+            None => Ok(py.None()),
+            Some(handles) => {
+                let d_handle = if let Some(d_handle) = &handles.data_handle {
+                    let handle = RefCell::new(d_handle.file.as_raw_fd());
+                    Some(PyFileHandle::create_instance(py, handle)?)
+                } else {
+                    None
+                };
+                let handle =
+                    RefCell::new(handles.index_handle.file.as_raw_fd());
+                Ok(
+                    (
+                        PyFileHandle::create_instance(py, handle)?,
+                        d_handle,
+                        py.None(),  // Sidedata handle
+
+                    ).to_py_object(py).into_object()
+                )
+            }
+        }
+
+    }
+
+    @_revisioncache.setter def set_revision_cache(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        *self.revision_cache(py).borrow_mut() = value.clone_ref(py);
+        match value {
+            None => {
+                // This means the property has been deleted, *not* that the
+                // property has been set to `None`. Whatever happens is up
+                // to the implementation. Here we just set it to `None`.
+                self
+                    .inner(py)
+                    .borrow()
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held")
+                    .take();
+            },
+            Some(tuple) => {
+                if tuple.is_none(py) {
+                    self
+                        .inner(py)
+                        .borrow()
+                        .last_revision_cache
+                        .lock()
+                        .expect("lock should not be held")
+                        .take();
+                    return Ok(())
+                }
+                let node = tuple.get_item(py, 0)?.extract::<PyBytes>(py)?;
+                let node = node_from_py_bytes(py, &node)?;
+                let rev = tuple.get_item(py, 1)?.extract::<BaseRevision>(py)?;
+                // Ok because Python only sets this if the revision has been
+                // checked
+                let rev = Revision(rev);
+                let data = tuple.get_item(py, 2)?.extract::<PyBytes>(py)?;
+                let inner = self.inner(py).borrow();
+                let mut last_revision_cache = inner
+                    .last_revision_cache
+                    .lock()
+                    .expect("lock should not be held");
+                *last_revision_cache =
+                    Some((node, rev, Box::new(PyBytesDeref::new(py, data))));
+            }
+        }
+        Ok(())
+    }
+
+    @property def inline(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_inline())
+    }
+
+    @inline.setter def set_inline(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        if let Some(v) = value {
+            self.inner(py).borrow_mut().inline = v.extract(py)?;
+        };
+        Ok(())
+    }
+
+    @property def index_file(&self) -> PyResult<PyBytes> {
+        Ok(
+            PyBytes::new(
+                py,
+                &get_bytes_from_path(&self.inner(py).borrow().index_file)
             )
-            .map_err(|e| {
-                revlog_error_with_msg(py, e.to_string().as_bytes())
-            })?,
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(None),
-            RefCell::new(Some(buf)),
-            RefCell::new(None),
-            RefCell::new(None),
         )
     }
 
+    @index_file.setter def set_index_file(
+        &self,
+        value: Option<PyObject>
+    ) -> PyResult<()> {
+        let path = get_path_from_bytes(
+            value
+                .expect("don't delete the index path")
+                .extract::<PyBytes>(py)?
+                .data(py)
+        ).to_owned();
+        self.inner(py).borrow_mut().index_file = path;
+        Ok(())
+    }
+
+    @property def is_writing(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_writing())
+    }
+
+    @property def is_open(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().is_open())
+    }
+
+    def issnapshot(&self, rev: PyRevision) -> PyResult<bool> {
+        self.inner_issnapshot(py, UncheckedRevision(rev.0))
+    }
+
+    def _deltachain(&self, *args, **kw) -> PyResult<PyObject> {
+        let inner = self.inner(py).borrow();
+        let general_delta = inner.index.uses_generaldelta();
+        let args = PyTuple::new(
+            py,
+            &[
+                args.get_item(py, 0),
+                kw.and_then(|d| d.get_item(py, "stoprev")).to_py_object(py),
+                general_delta.to_py_object(py).into_object(),
+            ]
+        );
+        self._index_deltachain(py, &args, kw)
+    }
+
+    def compress(&self, data: PyObject) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let py_buffer = PyBuffer::get(py, &data)?;
+        let deref = PyBufferDeref::new(py, py_buffer)?;
+        let compressed = inner.compress(&deref)
+        .map_err(|e| revlog_error_from_msg(py, e))?;
+        let compressed = compressed.as_deref();
+        let header = if compressed.is_some() {
+            PyBytes::new(py, &b""[..])
+        } else {
+            PyBytes::new(py, &b"u"[..])
+        };
+        Ok(
+            (
+                header,
+                PyBytes::new(py, compressed.unwrap_or(&deref))
+            ).to_py_object(py)
+        )
+    }
+
+    def reading(&self) -> PyResult<ReadingContextManager> {
+        ReadingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+        )
+    }
+
+    def writing(
+        &self,
+        transaction: PyObject,
+        data_end: Option<usize>,
+        sidedata_end: Option<usize>,
+    ) -> PyResult<WritingContextManager> {
+        // Silence unused argument (only relevant for changelog v2)
+        let _ = sidedata_end;
+        WritingContextManager::create_instance(
+            py,
+            RefCell::new(self.clone_ref(py)),
+            RefCell::new(PyTransaction::new(transaction)),
+            Cell::new(data_end)
+        )
+    }
+
+    def split_inline(
+        &self,
+        _tr: PyObject,
+        header: i32,
+        new_index_file_path: Option<PyObject>
+    ) -> PyResult<PyBytes> {
+        let mut inner = self.inner(py).borrow_mut();
+        let new_index_file_path = match new_index_file_path {
+            Some(path) => {
+                let path = path.extract::<PyBytes>(py)?;
+                Some(get_path_from_bytes(path.data(py)).to_owned())
+            },
+            None => None,
+        };
+        let header = hg::index::IndexHeader::parse(&header.to_be_bytes());
+        let header = header.expect("invalid header bytes");
+        let path = inner
+            .split_inline(header, new_index_file_path)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    def get_segment_for_revs(
+        &self,
+        startrev: PyRevision,
+        endrev: PyRevision,
+    ) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let (offset, data) = inner
+            .get_segment_for_revs(Revision(startrev.0), Revision(endrev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data = PyBytes::new(py, &data);
+        Ok((offset, data).to_py_object(py))
+    }
+
+    def raw_text(
+        &self,
+        _node: PyObject,
+        rev: PyRevision
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let mut py_bytes = PyBytes::new(py, &[]);
+        inner
+            .raw_text(Revision(rev.0), |size, f| {
+                py_bytes = with_pybytes_buffer(py, size, f)?;
+                Ok(())
+            }).map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(py_bytes)
+    }
+
+    def _chunk(
+        &self,
+        rev: PyRevision,
+    ) -> PyResult<PyBytes> {
+        let inner = self.inner(py).borrow();
+        let chunk = inner
+            .chunk_for_rev(Revision(rev.0))
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let chunk = PyBytes::new(py, &chunk);
+        Ok(chunk)
+    }
+
+    def write_entry(
+        &self,
+        transaction: PyObject,
+        entry: PyObject,
+        data: PyTuple,
+        _link: PyObject,
+        offset: usize,
+        _sidedata: PyObject,
+        _sidedata_offset: PyInt,
+        index_end: Option<u64>,
+        data_end: Option<u64>,
+        _sidedata_end: Option<PyInt>,
+    ) -> PyResult<PyTuple> {
+        let mut inner = self.inner(py).borrow_mut();
+        let transaction = PyTransaction::new(transaction);
+        let py_bytes = entry.extract(py)?;
+        let entry = PyBytesDeref::new(py, py_bytes);
+        let header = data.get_item(py, 0).extract::<PyBytes>(py)?;
+        let header = header.data(py);
+        let data = data.get_item(py, 1);
+        let py_bytes = data.extract(py)?;
+        let data = PyBytesDeref::new(py, py_bytes);
+        Ok(
+            inner.write_entry(
+                transaction,
+                &entry,
+                (header, &data),
+                offset,
+                index_end,
+                data_end
+            ).map_err(|e| revlog_error_from_msg(py, e))?
+             .to_py_object(py)
+        )
+    }
+
+    def delay(&self) -> PyResult<Option<PyBytes>> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .delay()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(path.map(|p| PyBytes::new(py, &get_bytes_from_path(p))))
+    }
+
+    def write_pending(&self) -> PyResult<PyTuple> {
+        let (path, any_pending) = self.inner(py)
+            .borrow_mut()
+            .write_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let maybe_path = match path {
+            Some(path) => {
+                PyBytes::new(py, &get_bytes_from_path(path)).into_object()
+            },
+            None => {
+                py.None()
+            }
+        };
+        Ok(
+            (
+                maybe_path,
+                any_pending
+            ).to_py_object(py)
+        )
+    }
+
+    def finalize_pending(&self) -> PyResult<PyBytes> {
+        let path = self.inner(py)
+            .borrow_mut()
+            .finalize_pending()
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+    }
+
+    // -- forwarded index methods --
+
+    def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let ridx = &self.inner(py).borrow().index;
+        let node = node_from_py_bytes(py, &node)?;
+        let rust_rev =
+            nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
+        Ok(rust_rev.map(Into::into))
+    }
+
+    /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if node
+    /// is not found.
+    ///
+    /// No need to repeat `node` in the exception, `mercurial/revlog.py`
+    /// will catch and rewrap with it
+    def _index_rev(&self, node: PyBytes) -> PyResult<PyRevision> {
+        self._index_get_rev(py, node)?.ok_or_else(|| revlog_error(py))
+    }
+
+    /// return True if the node exist in the index
+    def _index_has_node(&self, node: PyBytes) -> PyResult<bool> {
+        // TODO OPTIM we could avoid a needless conversion here,
+        // to do when scaffolding for pure Rust switch is removed,
+        // as `_index_get_rev()` currently does the necessary assertions
+        self._index_get_rev(py, node).map(|opt| opt.is_some())
+    }
+
+    /// find length of shortest hex nodeid of a binary ID
+    def _index_shortest(&self, node: PyBytes) -> PyResult<usize> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+        match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
+        {
+            Ok(Some(l)) => Ok(l),
+            Ok(None) => Err(revlog_error(py)),
+            Err(e) => Err(nodemap_error(py, e)),
+        }
+    }
+
+    def _index_partialmatch(
+        &self,
+        node: PyObject
+    ) -> PyResult<Option<PyBytes>> {
+        let opt = self.get_nodetree(py)?.borrow();
+        let nt = opt.as_ref().expect("nodetree should be set");
+        let idx = &self.inner(py).borrow().index;
+
+        let node = node.extract::<PyBytes>(py)?;
+        let node_as_string = String::from_utf8_lossy(node.data(py));
+
+        let prefix = NodePrefix::from_hex(node_as_string.to_string())
+            .map_err(|_| PyErr::new::<ValueError, _>(
+                py, format!("Invalid node or prefix '{}'", node_as_string))
+            )?;
+
+        nt.find_bin(idx, prefix)
+            // TODO make an inner API returning the node directly
+            .map(|opt| opt.map(|rev| {
+                    PyBytes::new(
+                        py,
+                        idx.node(rev).expect("node should exist").as_bytes()
+                    )
+            }))
+            .map_err(|e| nodemap_error(py, e))
+
+    }
+
+    /// append an index entry
+    def _index_append(&self, tup: PyTuple) -> PyResult<PyObject> {
+        if tup.len(py) < 8 {
+            // this is better than the panic promised by tup.get_item()
+            return Err(
+                PyErr::new::<IndexError, _>(py, "tuple index out of range"))
+        }
+        let node_bytes = tup.get_item(py, 7).extract(py)?;
+        let node = node_from_py_object(py, &node_bytes)?;
+
+        let rev = self.len(py)? as BaseRevision;
+
+        // This is ok since we will just add the revision to the index
+        let rev = Revision(rev);
+        self.inner(py)
+            .borrow_mut()
+            .index
+            .append(py_tuple_to_revision_data_params(py, tup)?)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let idx = &self.inner(py).borrow().index;
+        self.get_nodetree(py)?
+            .borrow_mut()
+            .as_mut()
+            .expect("nodetree should be set")
+            .insert(idx, &node, rev)
+            .map_err(|e| nodemap_error(py, e))?;
+        Ok(py.None())
+    }
+
+    def _index___delitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
+        let start = if let Ok(rev) = key.extract(py) {
+            UncheckedRevision(rev)
+        } else {
+            let start = key.getattr(py, "start")?;
+            UncheckedRevision(start.extract(py)?)
+        };
+        let mut borrow = self.inner(py).borrow_mut();
+        let start = borrow
+            .index
+            .check_revision(start)
+            .ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
+            })?;
+        borrow.index
+            .remove(start)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        drop(borrow);
+        let mut opt = self.get_nodetree(py)?.borrow_mut();
+        let nt = opt.as_mut().expect("nodetree should be set");
+        nt.invalidate_all();
+        self.fill_nodemap(py, nt)?;
+        Ok(py.None())
+    }
+
+    /// return the gca set of the given revs
+    def _index_ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_ancestors(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// return the heads of the common ancestors of the given revs
+    def _index_commonancestorsheads(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let rust_res = self.inner_commonancestorsheads(py, args)?;
+        Ok(rust_res)
+    }
+
+    /// Clear the index caches and inner py_class data.
+    /// It is Python's responsibility to call `update_nodemap_data` again.
+    def _index_clearcaches(&self) -> PyResult<PyObject> {
+        self.nt(py).borrow_mut().take();
+        self.docket(py).borrow_mut().take();
+        self.nodemap_mmap(py).borrow_mut().take();
+        self.head_revs_py_list(py).borrow_mut().take();
+        self.head_node_ids_py_list(py).borrow_mut().take();
+        self.inner(py).borrow_mut().index.clear_caches();
+        Ok(py.None())
+    }
+
+    /// return the raw binary string representing a revision
+    def _index_entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        let rust_bytes = rindex.check_revision(rev).and_then(
+            |r| rindex.entry_binary(r)).ok_or_else(|| rev_not_in_index(py, rev)
+        )?;
+        let rust_res = PyBytes::new(py, rust_bytes).into_object();
+        Ok(rust_res)
+    }
+
+
+    /// return a binary packed version of the header
+    def _index_pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rindex = &self.inner(py).borrow().index;
+        let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
+        let rust_res = PyBytes::new(py, &packed).into_object();
+        Ok(rust_res)
+    }
+
+    /// compute phases
+    def _index_computephasesmapsets(
+        &self,
+        *args,
+        **_kw
+    ) -> PyResult<PyObject> {
+        let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
+        let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
+        Ok(rust_res)
+    }
+
+    /// reachableroots
+    def _index_reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_reachableroots2(
+            py,
+            UncheckedRevision(args.get_item(py, 0).extract(py)?),
+            args.get_item(py, 1),
+            args.get_item(py, 2),
+            args.get_item(py, 3).extract(py)?,
+        )?;
+        Ok(rust_res)
+    }
+
+    /// get head revisions
+    def _index_headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
+        let (filtered_revs, stop_rev) = match &args.len(py) {
+             0 => Ok((py.None(), py.None())),
+             1 => Ok((args.get_item(py, 0), py.None())),
+             2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
+             _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
+        }?;
+        self.inner_headrevs(py, &filtered_revs, &stop_rev)
+    }
+
+    /// get head nodeids
+    def _index_head_node_ids(&self) -> PyResult<PyObject> {
+        let rust_res = self.inner_head_node_ids(py)?;
+        Ok(rust_res)
+    }
+
+    /// get diff in head revisions
+    def _index_headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_headrevsdiff(
+          py,
+          &args.get_item(py, 0),
+          &args.get_item(py, 1))?;
+        Ok(rust_res)
+    }
+
+    /// True if the object is a snapshot
+    def _index_issnapshot(&self, *args, **_kw) -> PyResult<bool> {
+        let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+        self.inner_issnapshot(py, rev)
+    }
+
+    /// Gather snapshot data in a cache dict
+    def _index_findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let cache: PyDict = args.get_item(py, 0).extract(py)?;
+        // this methods operates by setting new values in the cache,
+        // hence we will compare results by letting the C implementation
+        // operate over a deepcopy of the cache, and finally compare both
+        // caches.
+        let c_cache = PyDict::new(py);
+        for (k, v) in cache.items(py) {
+            c_cache.set_item(py, k, PySet::new(py, v)?)?;
+        }
+
+        let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
+        let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
+        let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
+        index.find_snapshots(
+            start_rev,
+            end_rev,
+            &mut cache_wrapper,
+        ).map_err(|_| revlog_error(py))?;
+        Ok(py.None())
+    }
+
+    /// determine revisions with deltas to reconstruct fulltext
+    def _index_deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
+        let index = &self.inner(py).borrow().index;
+        let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
+        let stop_rev =
+            args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
+        let rev = index.check_revision(rev).ok_or_else(|| {
+            nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
+        })?;
+        let stop_rev = if let Some(stop_rev) = stop_rev {
+            let stop_rev = UncheckedRevision(stop_rev);
+            Some(index.check_revision(stop_rev).ok_or_else(|| {
+                nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
+            })?)
+        } else {None};
+        let using_general_delta = args.get_item(py, 2)
+            .extract::<Option<u32>>(py)?
+            .map(|i| i != 0);
+        let (chain, stopped) = index.delta_chain(
+            rev, stop_rev, using_general_delta
+        ).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+
+        let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
+        Ok(
+            PyTuple::new(
+                py,
+                &[
+                    chain.into_py_object(py).into_object(),
+                    stopped.into_py_object(py).into_object()
+                ]
+            ).into_object()
+        )
+    }
+
+    /// slice planned chunk read to reach a density threshold
+    def _index_slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_slicechunktodensity(
+            py,
+            args.get_item(py, 0),
+            args.get_item(py, 1).extract(py)?,
+            args.get_item(py, 2).extract(py)?
+        )?;
+        Ok(rust_res)
+    }
+
+    def _index___len__(&self) -> PyResult<usize> {
+        self.len(py)
+    }
+
+    def _index___getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
+        Ok(rust_res)
+    }
+
+    def _index___contains__(&self, item: PyObject) -> PyResult<bool> {
+        // ObjectProtocol does not seem to provide contains(), so
+        // this is an equivalent implementation of the index_contains()
+        // defined in revlog.c
+        match item.extract::<i32>(py) {
+            Ok(rev) => {
+                Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
+            }
+            Err(_) => {
+                let item_bytes: PyBytes = item.extract(py)?;
+                let rust_res = self._index_has_node(py, item_bytes)?;
+                Ok(rust_res)
+            }
+        }
+    }
+
+    def _index_nodemap_data_all(&self) -> PyResult<PyBytes> {
+        self.inner_nodemap_data_all(py)
+    }
+
+    def _index_nodemap_data_incremental(&self) -> PyResult<PyObject> {
+        self.inner_nodemap_data_incremental(py)
+    }
+
+    def _index_update_nodemap_data(
+        &self,
+        docket: PyObject,
+        nm_data: PyObject
+    ) -> PyResult<PyObject> {
+        self.inner_update_nodemap_data(py, docket, nm_data)
+    }
+
+    @property
+    def _index_entry_size(&self) -> PyResult<PyInt> {
+        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_rust_ext_compat(&self) -> PyResult<PyInt> {
+        // will be entirely removed when the Rust index yet useful to
+        // implement in Rust to detangle things when removing `self.cindex`
+        let rust_res: PyInt = 1.to_py_object(py);
+        Ok(rust_res)
+    }
+
+    @property
+    def _index_is_rust(&self) -> PyResult<PyBool> {
+        Ok(false.to_py_object(py))
+    }
+
+
+});
+
+/// Forwarded index methods?
+impl InnerRevlog {
     fn len(&self, py: Python) -> PyResult<usize> {
-        let rust_index_len = self.index(py).borrow().len();
+        let rust_index_len = self.inner(py).borrow().index.len();
         Ok(rust_index_len)
     }
-
     /// This is scaffolding at this point, but it could also become
     /// a way to start a persistent nodemap or perform a
     /// vacuum / repack operation
@@ -659,11 +1412,11 @@
         py: Python,
         nt: &mut CoreNodeTree,
     ) -> PyResult<PyObject> {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         for r in 0..self.len(py)? {
             let rev = Revision(r as BaseRevision);
             // in this case node() won't ever return None
-            nt.insert(&*index, index.node(rev).unwrap(), rev)
+            nt.insert(index, index.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
         Ok(py.None())
@@ -684,7 +1437,11 @@
 
     /// Returns the full nodemap bytes to be written as-is to disk
     fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
-        let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let nodemap = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
 
         // If there's anything readonly, we need to build the data again from
@@ -717,7 +1474,11 @@
             None => return Ok(py.None()),
         };
 
-        let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+        let node_tree = self
+            .get_nodetree(py)?
+            .borrow_mut()
+            .take()
+            .expect("nodetree should exist");
         let masked_blocks = node_tree.masked_readonly_blocks();
         let (_, data) = node_tree.into_readonly_and_added_bytes();
         let changed = masked_blocks * std::mem::size_of::<Block>();
@@ -747,7 +1508,7 @@
             .extract::<BaseRevision>(py)?
             .into();
         self.docket(py).borrow_mut().replace(docket.clone_ref(py));
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
             nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
         })?;
@@ -756,7 +1517,7 @@
         for r in (data_tip.0 + 1)..current_tip as BaseRevision {
             let rev = Revision(r);
             // in this case node() won't ever return None
-            nt.insert(&*idx, idx.node(rev).unwrap(), rev)
+            nt.insert(idx, idx.node(rev).expect("node should exist"), rev)
                 .map_err(|e| nodemap_error(py, e))?
         }
 
@@ -766,7 +1527,7 @@
     }
 
     fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
-        let idx = self.index(py).borrow();
+        let idx = &self.inner(py).borrow().index;
         Ok(match key.extract::<BaseRevision>(py) {
             Ok(key_as_int) => {
                 let entry_params = if key_as_int == NULL_REVISION.0 {
@@ -786,15 +1547,17 @@
                 revision_data_params_to_py_tuple(py, entry_params)
                     .into_object()
             }
-            _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
-                || py.None(),
-                |py_rev| py_rev.into_py_object(py).into_object(),
-            ),
+            _ => self
+                ._index_get_rev(py, key.extract::<PyBytes>(py)?)?
+                .map_or_else(
+                    || py.None(),
+                    |py_rev| py_rev.into_py_object(py).into_object(),
+                ),
         })
     }
 
     fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
 
         // We don't use the shortcut here, as it's actually slower to loop
         // through the cached `PyList` than to re-do the whole computation for
@@ -826,7 +1589,7 @@
         filtered_revs: &PyObject,
         stop_rev: &PyObject,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let stop_rev = if stop_rev.is_none(py) {
             None
         } else {
@@ -899,7 +1662,7 @@
     ) -> PyResult<PyObject> {
         let begin = begin.extract::<BaseRevision>(py)?;
         let end = end.extract::<BaseRevision>(py)?;
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let begin =
             Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
         let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
@@ -918,7 +1681,7 @@
         new_heads: &[Revision],
         py: Python<'_>,
     ) -> PyList {
-        let index = self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let as_vec: Vec<PyObject> = new_heads
             .iter()
             .map(|r| {
@@ -958,7 +1721,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .ancestors(&revs)
@@ -974,7 +1737,7 @@
         py: Python,
         py_revs: &PyTuple,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
         let as_vec: Vec<_> = index
             .common_ancestor_heads(&revs)
@@ -990,7 +1753,7 @@
         py: Python,
         py_roots: PyDict,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
             .items_list(py)
             .iter(py)
@@ -1037,7 +1800,7 @@
         target_density: f64,
         min_gap_size: usize,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
         let as_nested_vec =
             index.slice_chunk_to_density(&revs, target_density, min_gap_size);
@@ -1069,7 +1832,7 @@
         roots: PyObject,
         include_path: bool,
     ) -> PyResult<PyObject> {
-        let index = &*self.index(py).borrow();
+        let index = &self.inner(py).borrow().index;
         let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
             PyErr::new::<IndexError, _>(py, "head out of range")
         })?;
@@ -1091,6 +1854,84 @@
             .collect();
         Ok(PyList::new(py, &as_vec).into_object())
     }
+    fn inner_issnapshot(
+        &self,
+        py: Python,
+        rev: UncheckedRevision,
+    ) -> PyResult<bool> {
+        let inner = &self.inner(py).borrow();
+        let index = &self.inner(py).borrow().index;
+        let rev = index
+            .check_revision(rev)
+            .ok_or_else(|| rev_not_in_index(py, rev))?;
+        let result = inner.is_snapshot(rev).map_err(|e| {
+            PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+        })?;
+        Ok(result)
+    }
+}
+
+impl InnerRevlog {
+    pub fn inner_new(
+        py: Python,
+        opener: PyObject,
+        index_data: PyObject,
+        index_file: PyObject,
+        data_file: PyObject,
+        _sidedata_file: PyObject,
+        inline: bool,
+        data_config: PyObject,
+        delta_config: PyObject,
+        feature_config: PyObject,
+        _chunk_cache: PyObject,
+        _default_compression_header: PyObject,
+        revlog_type: usize,
+    ) -> PyResult<Self> {
+        let vfs = Box::new(PyVfs::new(py, opener)?);
+        let index_file =
+            get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let data_file =
+            get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py))
+                .to_owned();
+        let revlog_type = RevlogType::try_from(revlog_type)
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let data_config = extract_data_config(py, data_config, revlog_type)?;
+        let delta_config =
+            extract_delta_config(py, delta_config, revlog_type)?;
+        let feature_config =
+            extract_feature_config(py, feature_config, revlog_type)?;
+        let options = RevlogOpenOptions::new(
+            inline,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        // Safety: we keep the buffer around inside the class as `index_mmap`
+        let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
+        let index = hg::index::Index::new(bytes, options.index_header())
+            .map_err(|e| revlog_error_from_msg(py, e))?;
+        let core = CoreInnerRevlog::new(
+            vfs,
+            index,
+            index_file,
+            data_file,
+            data_config,
+            delta_config,
+            feature_config,
+        );
+        Self::create_instance(
+            py,
+            core,
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(buf),
+            RefCell::new(None),
+            RefCell::new(None),
+            RefCell::new(None),
+        )
+    }
 }
 
 py_class!(pub class NodeTree |py| {
@@ -1111,7 +1952,7 @@
     /// (generation-based guard), same as iterating on a `dict` that has
     /// been meanwhile mutated.
     def is_invalidated(&self) -> PyResult<bool> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let result = unsafe { leaked.try_borrow(py) };
         // two cases for result to be an error:
@@ -1123,7 +1964,7 @@
     }
 
     def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };
 
@@ -1135,7 +1976,7 @@
             return Err(rev_not_in_index(py, rev.into()))
         }
 
-        let entry = index.inner.get_entry(rev).unwrap();
+        let entry = index.inner.get_entry(rev).expect("entry should exist");
         let mut nt = self.nt(py).borrow_mut();
         nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
 
@@ -1158,7 +1999,7 @@
             )?;
 
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let index = &*unsafe { leaked.try_borrow(py)? };
 
@@ -1170,7 +2011,7 @@
 
     def shortest(&self, node: PyBytes) -> PyResult<usize> {
         let nt = self.nt(py).borrow();
-        let leaked = self.index(py).borrow();
+        let leaked = &self.index(py).borrow();
         // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
         let idx = &*unsafe { leaked.try_borrow(py)? };
         match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
@@ -1182,6 +2023,120 @@
     }
 });
 
+fn panic_after_error(_py: Python) -> ! {
+    unsafe {
+        python3_sys::PyErr_Print();
+    }
+    panic!("Python API called failed");
+}
+
+/// # Safety
+///
+/// Don't call this. Its only caller is taken from `PyO3`.
+unsafe fn cast_from_owned_ptr_or_panic<T>(
+    py: Python,
+    p: *mut python3_sys::PyObject,
+) -> T
+where
+    T: cpython::PythonObjectWithCheckedDowncast,
+{
+    if p.is_null() {
+        panic_after_error(py);
+    } else {
+        PyObject::from_owned_ptr(py, p).cast_into(py).unwrap()
+    }
+}
+
+fn with_pybytes_buffer<F>(
+    py: Python,
+    len: usize,
+    init: F,
+) -> Result<PyBytes, RevlogError>
+where
+    F: FnOnce(
+        &mut dyn RevisionBuffer<Target = PyBytes>,
+    ) -> Result<(), RevlogError>,
+{
+    // Largely inspired by code in PyO3
+    // https://pyo3.rs/main/doc/pyo3/types/struct.pybytes#method.new_bound_with
+    unsafe {
+        let pyptr = python3_sys::PyBytes_FromStringAndSize(
+            std::ptr::null(),
+            len as python3_sys::Py_ssize_t,
+        );
+        let pybytes = cast_from_owned_ptr_or_panic::<PyBytes>(py, pyptr);
+        let buffer: *mut u8 = python3_sys::PyBytes_AsString(pyptr).cast();
+        debug_assert!(!buffer.is_null());
+        let mut rev_buf = PyRevisionBuffer::new(pybytes, buffer, len);
+        // Initialise the bytestring in init
+        // If init returns an Err, the buffer is deallocated by `pybytes`
+        init(&mut rev_buf).map(|_| rev_buf.finish())
+    }
+}
+
+/// Wrapper around a Python-provided buffer into which the revision contents
+/// will be written. Done for speed in order to save a large allocation + copy.
+struct PyRevisionBuffer {
+    py_bytes: PyBytes,
+    _buf: *mut u8,
+    len: usize,
+    current_buf: *mut u8,
+    current_len: usize,
+}
+
+impl PyRevisionBuffer {
+    /// # Safety
+    ///
+    /// `buf` should be the start of the allocated bytes of `bytes`, and `len`
+    /// exactly the length of said allocated bytes.
+    #[inline]
+    unsafe fn new(bytes: PyBytes, buf: *mut u8, len: usize) -> Self {
+        Self {
+            py_bytes: bytes,
+            _buf: buf,
+            len,
+            current_len: 0,
+            current_buf: buf,
+        }
+    }
+
+    /// Number of bytes that have been copied to. Will be different to the
+    /// total allocated length of the buffer unless the revision is done being
+    /// written.
+    #[inline]
+    fn current_len(&self) -> usize {
+        self.current_len
+    }
+}
+
+impl RevisionBuffer for PyRevisionBuffer {
+    type Target = PyBytes;
+
+    #[inline]
+    fn extend_from_slice(&mut self, slice: &[u8]) {
+        assert!(self.current_len + slice.len() <= self.len);
+        unsafe {
+            // We cannot use `copy_from_nonoverlapping` since it's *possible*
+            // to create a slice from the same Python memory region using
+            // [`PyBytesDeref`]. Probable that LLVM has an optimization anyway?
+            self.current_buf.copy_from(slice.as_ptr(), slice.len());
+            self.current_buf = self.current_buf.add(slice.len());
+        }
+        self.current_len += slice.len()
+    }
+
+    #[inline]
+    fn finish(self) -> Self::Target {
+        // catch unzeroed bytes before it becomes undefined behavior
+        assert_eq!(
+            self.current_len(),
+            self.len,
+            "not enough bytes read for revision"
+        );
+        self.py_bytes
+    }
+}
+
 fn revlog_error(py: Python) -> PyErr {
     match py
         .import("mercurial.error")
@@ -1195,21 +2150,6 @@
     }
 }
 
-fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
-    match py
-        .import("mercurial.error")
-        .and_then(|m| m.get(py, "RevlogError"))
-    {
-        Err(e) => e,
-        Ok(cls) => PyErr::from_instance(
-            py,
-            cls.call(py, (PyBytes::new(py, msg),), None)
-                .ok()
-                .into_py_object(py),
-        ),
-    }
-}
-
 fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
     // ParentOutOfRange is currently the only alternative
     // in `hg::GraphError`. The C index always raises this simple ValueError.
@@ -1249,8 +2189,8 @@
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "RevLog - Rust implementations")?;
 
-    m.add_class::<Index>(py)?;
     m.add_class::<NodeTree>(py)?;
+    m.add_class::<InnerRevlog>(py)?;
 
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;