comparison mercurial/revlog.py @ 52163:7346f93be7a4

revlog: add the glue to use the Rust `InnerRevlog` from Python The performance of this has been looked at for quite some time, and some workflows are actually quite a bit faster than with the Python + C code. However, we are still (up to 20%) slower in some crucial places like cloning certain repos, log, cat, which makes this an incomplete rewrite. This is mostly due to the high amount of overhead in Python <-> Rust FFI, especially around the VFS code. A future patch series will rewrite the VFS code in pure Rust, which should hopefully get us up to par with current perfomance, if not better in all important cases. This is a "save state" of sorts, as this is a ton of code, and I don't want to pile up even more things in a single review. Continuing to try to match the current performance will take an extremely long time, if it's not impossible, without the aforementioned VFS work.
author Rapha?l Gom?s <rgomes@octobus.net>
date Wed, 19 Jun 2024 19:10:49 +0200
parents 13815c9decd4
children 72bc29f01570
comparison
equal deleted inserted replaced
52162:13815c9decd4 52163:7346f93be7a4
15 from __future__ import annotations 15 from __future__ import annotations
16 16
17 import binascii 17 import binascii
18 import collections 18 import collections
19 import contextlib 19 import contextlib
20 import functools
21 import io 20 import io
22 import os 21 import os
23 import struct 22 import struct
24 import typing 23 import typing
25 import weakref 24 import weakref
81 80
82 # Force pytype to use the non-vendored package 81 # Force pytype to use the non-vendored package
83 if typing.TYPE_CHECKING: 82 if typing.TYPE_CHECKING:
84 # noinspection PyPackageRequirements 83 # noinspection PyPackageRequirements
85 import attr 84 import attr
85 from .pure.parsers import BaseIndexObject
86 86
87 from . import ( 87 from . import (
88 ancestor, 88 ancestor,
89 dagop, 89 dagop,
90 error, 90 error,
379 feature_config, 379 feature_config,
380 chunk_cache, 380 chunk_cache,
381 default_compression_header, 381 default_compression_header,
382 ): 382 ):
383 self.opener = opener 383 self.opener = opener
384 self.index = index 384 self.index: BaseIndexObject = index
385 385
386 self.index_file = index_file 386 self.index_file = index_file
387 self.data_file = data_file 387 self.data_file = data_file
388 self.sidedata_file = sidedata_file 388 self.sidedata_file = sidedata_file
389 self.inline = inline 389 self.inline = inline
526 ``stoprev`` was hit. 526 ``stoprev`` was hit.
527 """ 527 """
528 generaldelta = self.delta_config.general_delta 528 generaldelta = self.delta_config.general_delta
529 # Try C implementation. 529 # Try C implementation.
530 try: 530 try:
531 return self.index.deltachain(rev, stoprev, generaldelta) 531 return self.index.deltachain(
532 rev, stoprev, generaldelta
533 ) # pytype: disable=attribute-error
532 except AttributeError: 534 except AttributeError:
533 pass 535 pass
534 536
535 chain = [] 537 chain = []
536 538
1244 msg = b"not delay or divert found on this revlog" 1246 msg = b"not delay or divert found on this revlog"
1245 raise error.ProgrammingError(msg) 1247 raise error.ProgrammingError(msg)
1246 return self.canonical_index_file 1248 return self.canonical_index_file
1247 1249
1248 1250
1251 if typing.TYPE_CHECKING:
1252 # Tell Pytype what kind of object we expect
1253 ProxyBase = BaseIndexObject
1254 else:
1255 ProxyBase = object
1256
1257
1258 class RustIndexProxy(ProxyBase):
1259 """Wrapper around the Rust index to fake having direct access to the index.
1260
1261 Rust enforces xor mutability (one mutable reference XOR 1..n non-mutable),
1262 so we can't expose the index from Rust directly, since the `InnerRevlog`
1263 already has ownership of the index. This object redirects all calls to the
1264 index through the Rust-backed `InnerRevlog` glue which defines all
1265 necessary forwarding methods.
1266 """
1267
1268 def __init__(self, inner):
1269 # Do not rename as it's being used to access the index from Rust
1270 self.inner = inner
1271
1272 # TODO possibly write all index methods manually to save on overhead?
1273 def __getattr__(self, name):
1274 return getattr(self.inner, f"_index_{name}")
1275
1276 # Magic methods need to be defined explicitely
1277 def __len__(self):
1278 return self.inner._index___len__()
1279
1280 def __getitem__(self, key):
1281 return self.inner._index___getitem__(key)
1282
1283 def __contains__(self, key):
1284 return self.inner._index___contains__(key)
1285
1286 def __delitem__(self, key):
1287 return self.inner._index___delitem__(key)
1288
1289
1290 class RustVFSWrapper:
1291 """Used to wrap a Python VFS to pass it to Rust to lower the overhead of
1292 calling back multiple times into Python.
1293 """
1294
1295 def __init__(self, inner):
1296 self.inner = inner
1297
1298 def __call__(
1299 self,
1300 path: bytes,
1301 mode: bytes = b"rb",
1302 atomictemp=False,
1303 checkambig=False,
1304 ):
1305 fd = self.inner.__call__(
1306 path=path, mode=mode, atomictemp=atomictemp, checkambig=checkambig
1307 )
1308 # Information that Rust needs to get ownership of the file that's
1309 # being opened.
1310 return (os.dup(fd.fileno()), fd._tempname if atomictemp else None)
1311
1312 def __getattr__(self, name):
1313 return getattr(self.inner, name)
1314
1315
1249 class revlog: 1316 class revlog:
1250 """ 1317 """
1251 the underlying revision storage object 1318 the underlying revision storage object
1252 1319
1253 A revlog consists of two parts, an index and the revision data. 1320 A revlog consists of two parts, an index and the revision data.
1356 self._nodemap_file = None 1423 self._nodemap_file = None
1357 self.postfix = postfix 1424 self.postfix = postfix
1358 self._trypending = trypending 1425 self._trypending = trypending
1359 self._try_split = try_split 1426 self._try_split = try_split
1360 self._may_inline = may_inline 1427 self._may_inline = may_inline
1428 self.uses_rust = False
1361 self.opener = opener 1429 self.opener = opener
1362 if persistentnodemap: 1430 if persistentnodemap:
1363 self._nodemap_file = nodemaputil.get_nodemap_file(self) 1431 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1364 1432
1365 assert target[0] in ALL_KINDS 1433 assert target[0] in ALL_KINDS
1390 self.delta_config.upper_bound_comp = upperboundcomp 1458 self.delta_config.upper_bound_comp = upperboundcomp
1391 1459
1392 # Maps rev to chain base rev. 1460 # Maps rev to chain base rev.
1393 self._chainbasecache = util.lrucachedict(100) 1461 self._chainbasecache = util.lrucachedict(100)
1394 1462
1395 self.index = None 1463 self.index: Optional[BaseIndexObject] = None
1396 self._docket = None 1464 self._docket = None
1397 self._nodemap_docket = None 1465 self._nodemap_docket = None
1398 # Mapping of partial identifiers to full nodes. 1466 # Mapping of partial identifiers to full nodes.
1399 self._pcache = {} 1467 self._pcache = {}
1400 1468
1404 # custom flags. 1472 # custom flags.
1405 self._flagprocessors = dict(flagutil.flagprocessors) 1473 self._flagprocessors = dict(flagutil.flagprocessors)
1406 # prevent nesting of addgroup 1474 # prevent nesting of addgroup
1407 self._adding_group = None 1475 self._adding_group = None
1408 1476
1409 chunk_cache = self._loadindex() 1477 index, chunk_cache = self._loadindex()
1410 self._load_inner(chunk_cache) 1478 self._load_inner(index, chunk_cache)
1411 self._concurrencychecker = concurrencychecker 1479 self._concurrencychecker = concurrencychecker
1412 1480
1413 def _init_opts(self): 1481 def _init_opts(self):
1414 """process options (from above/config) to setup associated default revlog mode 1482 """process options (from above/config) to setup associated default revlog mode
1415 1483
1705 and force_nodemap 1773 and force_nodemap
1706 and parse_index_v1_nodemap is not None 1774 and parse_index_v1_nodemap is not None
1707 ) 1775 )
1708 1776
1709 use_rust_index = False 1777 use_rust_index = False
1710 if rustrevlog is not None and self._nodemap_file is not None: 1778 rust_applicable = self._nodemap_file is not None
1779 rust_applicable = rust_applicable or self.target[0] == KIND_FILELOG
1780 rust_applicable = rust_applicable and getattr(
1781 self.opener, "rust_compatible", True
1782 )
1783 if rustrevlog is not None and rust_applicable:
1711 # we would like to use the rust_index in all case, especially 1784 # we would like to use the rust_index in all case, especially
1712 # because it is necessary for AncestorsIterator and LazyAncestors 1785 # because it is necessary for AncestorsIterator and LazyAncestors
1713 # since the 6.7 cycle. 1786 # since the 6.7 cycle.
1714 # 1787 #
1715 # However, the performance impact of inconditionnaly building the 1788 # However, the performance impact of inconditionnaly building the
1716 # nodemap is currently a problem for non-persistent nodemap 1789 # nodemap is currently a problem for non-persistent nodemap
1717 # repository. 1790 # repository.
1718 use_rust_index = True 1791 use_rust_index = True
1792
1793 if self._format_version != REVLOGV1:
1794 use_rust_index = False
1719 1795
1720 self._parse_index = parse_index_v1 1796 self._parse_index = parse_index_v1
1721 if self._format_version == REVLOGV0: 1797 if self._format_version == REVLOGV0:
1722 self._parse_index = revlogv0.parse_index_v0 1798 self._parse_index = revlogv0.parse_index_v0
1723 elif self._format_version == REVLOGV2: 1799 elif self._format_version == REVLOGV2:
1724 self._parse_index = parse_index_v2 1800 self._parse_index = parse_index_v2
1725 elif self._format_version == CHANGELOGV2: 1801 elif self._format_version == CHANGELOGV2:
1726 self._parse_index = parse_index_cl_v2 1802 self._parse_index = parse_index_cl_v2
1727 elif devel_nodemap: 1803 elif devel_nodemap:
1728 self._parse_index = parse_index_v1_nodemap 1804 self._parse_index = parse_index_v1_nodemap
1729 elif use_rust_index: 1805
1730 self._parse_index = functools.partial( 1806 if use_rust_index:
1731 parse_index_v1_rust, default_header=new_header 1807 # Let the Rust code parse its own index
1732 ) 1808 index, chunkcache = (index_data, None)
1733 try: 1809 self.uses_rust = True
1734 d = self._parse_index(index_data, self._inline) 1810 else:
1735 index, chunkcache = d 1811 try:
1736 use_nodemap = ( 1812 d = self._parse_index(index_data, self._inline)
1737 not self._inline 1813 index, chunkcache = d
1738 and self._nodemap_file is not None 1814 self._register_nodemap_info(index)
1739 and hasattr(index, 'update_nodemap_data') 1815 except (ValueError, IndexError):
1740 ) 1816 raise error.RevlogError(
1741 if use_nodemap: 1817 _(b"index %s is corrupted") % self.display_id
1742 nodemap_data = nodemaputil.persisted_data(self) 1818 )
1743 if nodemap_data is not None:
1744 docket = nodemap_data[0]
1745 if (
1746 len(d[0]) > docket.tip_rev
1747 and d[0][docket.tip_rev][7] == docket.tip_node
1748 ):
1749 # no changelog tampering
1750 self._nodemap_docket = docket
1751 index.update_nodemap_data(*nodemap_data)
1752 except (ValueError, IndexError):
1753 raise error.RevlogError(
1754 _(b"index %s is corrupted") % self.display_id
1755 )
1756 self.index = index
1757 # revnum -> (chain-length, sum-delta-length) 1819 # revnum -> (chain-length, sum-delta-length)
1758 self._chaininfocache = util.lrucachedict(500) 1820 self._chaininfocache = util.lrucachedict(500)
1759 1821
1760 return chunkcache 1822 return index, chunkcache
1761 1823
1762 def _load_inner(self, chunk_cache): 1824 def _load_inner(self, index, chunk_cache):
1763 if self._docket is None: 1825 if self._docket is None:
1764 default_compression_header = None 1826 default_compression_header = None
1765 else: 1827 else:
1766 default_compression_header = self._docket.default_compression_header 1828 default_compression_header = self._docket.default_compression_header
1767 1829
1768 self._inner = _InnerRevlog( 1830 if self.uses_rust:
1769 opener=self.opener, 1831 self._inner = rustrevlog.InnerRevlog(
1770 index=self.index, 1832 opener=RustVFSWrapper(self.opener),
1771 index_file=self._indexfile, 1833 index_data=index,
1772 data_file=self._datafile, 1834 index_file=self._indexfile,
1773 sidedata_file=self._sidedatafile, 1835 data_file=self._datafile,
1774 inline=self._inline, 1836 sidedata_file=self._sidedatafile,
1775 data_config=self.data_config, 1837 inline=self._inline,
1776 delta_config=self.delta_config, 1838 data_config=self.data_config,
1777 feature_config=self.feature_config, 1839 delta_config=self.delta_config,
1778 chunk_cache=chunk_cache, 1840 feature_config=self.feature_config,
1779 default_compression_header=default_compression_header, 1841 chunk_cache=chunk_cache,
1842 default_compression_header=default_compression_header,
1843 revlog_type=self.target[0],
1844 )
1845 self.index = RustIndexProxy(self._inner)
1846 self._register_nodemap_info(self.index)
1847 self.uses_rust = True
1848 else:
1849 self._inner = _InnerRevlog(
1850 opener=self.opener,
1851 index=index,
1852 index_file=self._indexfile,
1853 data_file=self._datafile,
1854 sidedata_file=self._sidedatafile,
1855 inline=self._inline,
1856 data_config=self.data_config,
1857 delta_config=self.delta_config,
1858 feature_config=self.feature_config,
1859 chunk_cache=chunk_cache,
1860 default_compression_header=default_compression_header,
1861 )
1862 self.index = self._inner.index
1863
1864 def _register_nodemap_info(self, index):
1865 use_nodemap = (
1866 not self._inline
1867 and self._nodemap_file is not None
1868 and hasattr(index, 'update_nodemap_data')
1780 ) 1869 )
1870 if use_nodemap:
1871 nodemap_data = nodemaputil.persisted_data(self)
1872 if nodemap_data is not None:
1873 docket = nodemap_data[0]
1874 if (
1875 len(index) > docket.tip_rev
1876 and index[docket.tip_rev][7] == docket.tip_node
1877 ):
1878 # no changelog tampering
1879 self._nodemap_docket = docket
1880 index.update_nodemap_data(
1881 *nodemap_data
1882 ) # pytype: disable=attribute-error
1781 1883
1782 def get_revlog(self): 1884 def get_revlog(self):
1783 """simple function to mirror API of other not-really-revlog API""" 1885 """simple function to mirror API of other not-really-revlog API"""
1784 return self 1886 return self
1785 1887
1867 ) 1969 )
1868 if use_nodemap: 1970 if use_nodemap:
1869 nodemap_data = nodemaputil.persisted_data(self) 1971 nodemap_data = nodemaputil.persisted_data(self)
1870 if nodemap_data is not None: 1972 if nodemap_data is not None:
1871 self._nodemap_docket = nodemap_data[0] 1973 self._nodemap_docket = nodemap_data[0]
1872 self.index.update_nodemap_data(*nodemap_data) 1974 self.index.update_nodemap_data(
1975 *nodemap_data
1976 ) # pytype: disable=attribute-error
1873 1977
1874 def rev(self, node): 1978 def rev(self, node):
1875 """return the revision number associated with a <nodeid>""" 1979 """return the revision number associated with a <nodeid>"""
1876 try: 1980 try:
1877 return self.index.rev(node) 1981 return self.index.rev(node)
2366 return (orderedout, roots, heads) 2470 return (orderedout, roots, heads)
2367 2471
2368 def headrevs(self, revs=None, stop_rev=None): 2472 def headrevs(self, revs=None, stop_rev=None):
2369 if revs is None: 2473 if revs is None:
2370 return self.index.headrevs(None, stop_rev) 2474 return self.index.headrevs(None, stop_rev)
2371 assert stop_rev is None
2372 if rustdagop is not None and self.index.rust_ext_compat: 2475 if rustdagop is not None and self.index.rust_ext_compat:
2373 return rustdagop.headrevs(self.index, revs) 2476 return rustdagop.headrevs(self.index, revs)
2374 return dagop.headrevs(revs, self._uncheckedparentrevs) 2477 return dagop.headrevs(revs, self._uncheckedparentrevs)
2375 2478
2376 def headrevsdiff(self, start, stop): 2479 def headrevsdiff(self, start, stop):
2377 try: 2480 try:
2378 return self.index.headrevsdiff(start, stop) 2481 return self.index.headrevsdiff(
2482 start, stop
2483 ) # pytype: disable=attribute-error
2379 except AttributeError: 2484 except AttributeError:
2380 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop) 2485 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2381 2486
2382 def computephases(self, roots): 2487 def computephases(self, roots):
2383 return self.index.computephasesmapsets(roots) 2488 return self.index.computephasesmapsets(
2489 roots
2490 ) # pytype: disable=attribute-error
2384 2491
2385 def _head_node_ids(self): 2492 def _head_node_ids(self):
2386 try: 2493 try:
2387 return self.index.head_node_ids() 2494 return self.index.head_node_ids() # pytype: disable=attribute-error
2388 except AttributeError: 2495 except AttributeError:
2389 return [self.node(r) for r in self.headrevs()] 2496 return [self.node(r) for r in self.headrevs()]
2390 2497
2391 def heads(self, start=None, stop=None): 2498 def heads(self, start=None, stop=None):
2392 """return the list of all nodes that have no children 2499 """return the list of all nodes that have no children
2440 return pycompat.maplist(self.node, ancs) 2547 return pycompat.maplist(self.node, ancs)
2441 2548
2442 def _commonancestorsheads(self, *revs): 2549 def _commonancestorsheads(self, *revs):
2443 """calculate all the heads of the common ancestors of revs""" 2550 """calculate all the heads of the common ancestors of revs"""
2444 try: 2551 try:
2445 ancs = self.index.commonancestorsheads(*revs) 2552 ancs = self.index.commonancestorsheads(
2553 *revs
2554 ) # pytype: disable=attribute-error
2446 except (AttributeError, OverflowError): # C implementation failed 2555 except (AttributeError, OverflowError): # C implementation failed
2447 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs) 2556 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2448 return ancs 2557 return ancs
2449 2558
2450 def isancestor(self, a, b): 2559 def isancestor(self, a, b):
2474 2583
2475 If includepath is True, return (<roots>::<heads>).""" 2584 If includepath is True, return (<roots>::<heads>)."""
2476 try: 2585 try:
2477 return self.index.reachableroots2( 2586 return self.index.reachableroots2(
2478 minroot, heads, roots, includepath 2587 minroot, heads, roots, includepath
2479 ) 2588 ) # pytype: disable=attribute-error
2480 except AttributeError: 2589 except AttributeError:
2481 return dagop._reachablerootspure( 2590 return dagop._reachablerootspure(
2482 self.parentrevs, minroot, roots, heads, includepath 2591 self.parentrevs, minroot, roots, heads, includepath
2483 ) 2592 )
2484 2593
2485 def ancestor(self, a, b): 2594 def ancestor(self, a, b):
2486 """calculate the "best" common ancestor of nodes a and b""" 2595 """calculate the "best" common ancestor of nodes a and b"""
2487 2596
2488 a, b = self.rev(a), self.rev(b) 2597 a, b = self.rev(a), self.rev(b)
2489 try: 2598 try:
2490 ancs = self.index.ancestors(a, b) 2599 ancs = self.index.ancestors(a, b) # pytype: disable=attribute-error
2491 except (AttributeError, OverflowError): 2600 except (AttributeError, OverflowError):
2492 ancs = ancestor.ancestors(self.parentrevs, a, b) 2601 ancs = ancestor.ancestors(self.parentrevs, a, b)
2493 if ancs: 2602 if ancs:
2494 # choose a consistent winner when there's a tie 2603 # choose a consistent winner when there's a tie
2495 return min(map(self.node, ancs)) 2604 return min(map(self.node, ancs))
2532 def _partialmatch(self, id): 2641 def _partialmatch(self, id):
2533 # we don't care wdirfilenodeids as they should be always full hash 2642 # we don't care wdirfilenodeids as they should be always full hash
2534 maybewdir = self.nodeconstants.wdirhex.startswith(id) 2643 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2535 ambiguous = False 2644 ambiguous = False
2536 try: 2645 try:
2537 partial = self.index.partialmatch(id) 2646 partial = self.index.partialmatch(
2647 id
2648 ) # pytype: disable=attribute-error
2538 if partial and self.hasnode(partial): 2649 if partial and self.hasnode(partial):
2539 if maybewdir: 2650 if maybewdir:
2540 # single 'ff...' match in radix tree, ambiguous with wdir 2651 # single 'ff...' match in radix tree, ambiguous with wdir
2541 ambiguous = True 2652 ambiguous = True
2542 else: 2653 else:
2634 if not maybewdir(prefix): 2745 if not maybewdir(prefix):
2635 return prefix 2746 return prefix
2636 2747
2637 if not getattr(self, 'filteredrevs', None): 2748 if not getattr(self, 'filteredrevs', None):
2638 try: 2749 try:
2639 length = max(self.index.shortest(node), minlength) 2750 shortest = self.index.shortest(
2751 node
2752 ) # pytype: disable=attribute-error
2753 length = max(shortest, minlength)
2640 return disambiguate(hexnode, length) 2754 return disambiguate(hexnode, length)
2641 except error.RevlogError: 2755 except error.RevlogError:
2642 if node != self.nodeconstants.wdirid: 2756 if node != self.nodeconstants.wdirid:
2643 raise error.LookupError( 2757 raise error.LookupError(
2644 node, self.display_id, _(b'no node') 2758 node, self.display_id, _(b'no node')
4087 4201
4088 # rewrite the new index entries 4202 # rewrite the new index entries
4089 ifh.seek(startrev * self.index.entry_size) 4203 ifh.seek(startrev * self.index.entry_size)
4090 for i, e in enumerate(new_entries): 4204 for i, e in enumerate(new_entries):
4091 rev = startrev + i 4205 rev = startrev + i
4092 self.index.replace_sidedata_info(rev, *e) 4206 self.index.replace_sidedata_info(
4207 rev, *e
4208 ) # pytype: disable=attribute-error
4093 packed = self.index.entry_binary(rev) 4209 packed = self.index.entry_binary(rev)
4094 if rev == 0 and self._docket is None: 4210 if rev == 0 and self._docket is None:
4095 header = self._format_flags | self._format_version 4211 header = self._format_flags | self._format_version
4096 header = self.index.pack_header(header) 4212 header = self.index.pack_header(header)
4097 packed = header + packed 4213 packed = header + packed