comparison mercurial/merge.py @ 52056:8b7123c8947b

update: add a Rust fast-path when updating from null (and clean) This case is easy to detect and we have all we need to generate a valid working copy and dirstate entirely in Rust, which speeds things up considerably: On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s, all while consuming 50% less system time, with all caches hot. Something to note is that further improvements will probably happen with the upcoming `InnerRevlog` series that does smarter mmap hanlding, especially for filelogs. Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled) ``` ### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 5.328762 ~~~~~ rust: 1.308654 (-75.44%, -4.02) ### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 1.693271 ~~~~~ rust: 1.151053 (-32.02%, -0.54) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 38.901613 ~~~~~ rust: 11.637880 (-70.08%, -27.26) ### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm # benchmark.name = hg.command.update # bin-env-vars.hg.py-re2-module = default # bin-env-vars.hg.changeset.node = <this change> # benchmark.variants.atomic-update = no # benchmark.variants.scenario = null-to-tip # benchmark.variants.worker = default default: 4.793727 ~~~~~ rust: 1.505905 (-68.59%, -3.29) ```
author Rapha?l Gom?s <rgomes@octobus.net>
date Tue, 01 Oct 2024 13:49:11 +0200
parents b332ae615714
children 43e15277498e
comparison
equal deleted inserted replaced
52055:b332ae615714 52056:8b7123c8947b
6 # GNU General Public License version 2 or any later version. 6 # GNU General Public License version 2 or any later version.
7 7
8 from __future__ import annotations 8 from __future__ import annotations
9 9
10 import collections 10 import collections
11 import os
11 import struct 12 import struct
12 import typing 13 import typing
13 from typing import Dict, Optional, Tuple 14 from typing import Dict, Optional, Tuple
14 15
15 from .i18n import _ 16 from .i18n import _
32 mergestate as mergestatemod, 33 mergestate as mergestatemod,
33 obsutil, 34 obsutil,
34 pathutil, 35 pathutil,
35 policy, 36 policy,
36 pycompat, 37 pycompat,
38 requirements,
37 scmutil, 39 scmutil,
38 subrepoutil, 40 subrepoutil,
39 util, 41 util,
40 worker, 42 worker,
41 ) 43 )
44
45 rust_update_mod = policy.importrust("update")
42 46
43 _pack = struct.pack 47 _pack = struct.pack
44 _unpack = struct.unpack 48 _unpack = struct.unpack
45 49
46 50
145 b'experimental', b'merge.checkpathconflicts' 149 b'experimental', b'merge.checkpathconflicts'
146 ) 150 )
147 dircache = dict() 151 dircache = dict()
148 dirstate = repo.dirstate 152 dirstate = repo.dirstate
149 wvfs = repo.wvfs 153 wvfs = repo.wvfs
154 # wouldn't it be easier to loop over unknown files (and dirs)?
155
150 if not force: 156 if not force:
151 157
152 def collectconflicts(conflicts, config): 158 def collectconflicts(conflicts, config):
153 if config == b'abort': 159 if config == b'abort':
154 abortconflicts.update(conflicts) 160 abortconflicts.update(conflicts)
1833 UPDATECHECK_ABORT = b'abort' # handled at higher layers 1839 UPDATECHECK_ABORT = b'abort' # handled at higher layers
1834 UPDATECHECK_NONE = b'none' 1840 UPDATECHECK_NONE = b'none'
1835 UPDATECHECK_LINEAR = b'linear' 1841 UPDATECHECK_LINEAR = b'linear'
1836 UPDATECHECK_NO_CONFLICT = b'noconflict' 1842 UPDATECHECK_NO_CONFLICT = b'noconflict'
1837 1843
1844 # Let extensions turn off any Rust code in the update code if that interferes
1845 # will their patching.
1846 # This being `True` does not mean that you have Rust extensions installed or
1847 # that the Rust path will be taken for any given invocation.
1848 MAYBE_USE_RUST_UPDATE = True
1849
1838 1850
1839 def _update( 1851 def _update(
1840 repo, 1852 repo,
1841 node, 1853 node,
1842 branchmerge, 1854 branchmerge,
2005 followcopies = False 2017 followcopies = False
2006 elif not pas[0]: 2018 elif not pas[0]:
2007 followcopies = False 2019 followcopies = False
2008 if not branchmerge and not wc.dirty(missing=True): 2020 if not branchmerge and not wc.dirty(missing=True):
2009 followcopies = False 2021 followcopies = False
2022
2023 update_from_null = False
2024 update_from_null_fallback = False
2025 if (
2026 MAYBE_USE_RUST_UPDATE
2027 and rust_update_mod is not None
2028 and p1.rev() == nullrev
2029 and not branchmerge
2030 # TODO it's probably not too hard to pass down the transaction and
2031 # respect the write patterns from Rust. But since it doesn't affect
2032 # a simple update from null, then it doesn't matter yet.
2033 and repo.currenttransaction() is None
2034 and matcher is None
2035 and not wc.mergestate().active()
2036 and b'.hgsubstate' not in p2
2037 ):
2038 working_dir_iter = os.scandir(repo.root)
2039 maybe_hg_folder = next(working_dir_iter)
2040 assert maybe_hg_folder is not None
2041 if maybe_hg_folder.name == b".hg":
2042 try:
2043 next(working_dir_iter)
2044 except StopIteration:
2045 update_from_null = True
2046
2047 if update_from_null:
2048 # Check the narrowspec and sparsespec here to display warnings
2049 # more easily.
2050 # TODO figure out of a way of bubbling up warnings to Python
2051 # while not polluting the Rust code (probably a channel)
2052 repo.narrowmatch()
2053 sparse.matcher(repo, [nullrev, p2.rev()])
2054 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
2055 # note that we're in the middle of an update
2056 repo.vfs.write(b'updatestate', p2.hex())
2057 try:
2058 updated_count = rust_update_mod.update_from_null(
2059 repo.root, p2.rev()
2060 )
2061 except rust_update_mod.FallbackError:
2062 update_from_null_fallback = True
2063 else:
2064 # We've changed the dirstate from Rust, we need to tell Python
2065 repo.dirstate.invalidate()
2066 # This includes setting the parents, since they are not read
2067 # again on invalidation
2068 with repo.dirstate.changing_parents(repo):
2069 repo.dirstate.setparents(fp2)
2070 repo.dirstate.setbranch(p2.branch(), repo.currenttransaction())
2071 sparse.prunetemporaryincludes(repo)
2072 repo.hook(b'update', parent1=xp1, parent2=xp2, error=0)
2073 # update completed, clear state
2074 util.unlink(repo.vfs.join(b'updatestate'))
2075 return updateresult(updated_count, 0, 0, 0)
2010 2076
2011 ### calculate phase 2077 ### calculate phase
2012 mresult = calculateupdates( 2078 mresult = calculateupdates(
2013 repo, 2079 repo,
2014 wc, 2080 wc,
2129 fp1, fp2, xp1, xp2 = fp2, repo.nullid, xp2, b'' 2195 fp1, fp2, xp1, xp2 = fp2, repo.nullid, xp2, b''
2130 # If we're doing a partial update, we need to skip updating 2196 # If we're doing a partial update, we need to skip updating
2131 # the dirstate. 2197 # the dirstate.
2132 always = matcher is None or matcher.always() 2198 always = matcher is None or matcher.always()
2133 updatedirstate = updatedirstate and always and not wc.isinmemory() 2199 updatedirstate = updatedirstate and always and not wc.isinmemory()
2134 if updatedirstate: 2200 # If we're in the fallback case, we've already done this
2201 if updatedirstate and not update_from_null_fallback:
2135 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2) 2202 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2)
2136 # note that we're in the middle of an update 2203 # note that we're in the middle of an update
2137 repo.vfs.write(b'updatestate', p2.hex()) 2204 repo.vfs.write(b'updatestate', p2.hex())
2138 2205
2206 # TODO don't run if Rust is available
2139 _advertisefsmonitor( 2207 _advertisefsmonitor(
2140 repo, mresult.len((mergestatemod.ACTION_GET,)), p1.node() 2208 repo, mresult.len((mergestatemod.ACTION_GET,)), p1.node()
2141 ) 2209 )
2142 2210
2143 wantfiledata = updatedirstate and not branchmerge 2211 wantfiledata = updatedirstate and not branchmerge
2170 2238
2171 repo.setparents(fp1, fp2) 2239 repo.setparents(fp1, fp2)
2172 mergestatemod.recordupdates( 2240 mergestatemod.recordupdates(
2173 repo, mresult.actionsdict, branchmerge, getfiledata 2241 repo, mresult.actionsdict, branchmerge, getfiledata
2174 ) 2242 )
2175 # update completed, clear state
2176 util.unlink(repo.vfs.join(b'updatestate'))
2177
2178 if not branchmerge: 2243 if not branchmerge:
2179 repo.dirstate.setbranch( 2244 repo.dirstate.setbranch(
2180 p2.branch(), repo.currenttransaction() 2245 p2.branch(), repo.currenttransaction()
2181 ) 2246 )
2247
2248 # update completed, clear state
2249 util.unlink(repo.vfs.join(b'updatestate'))
2182 2250
2183 # If we're updating to a location, clean up any stale temporary includes 2251 # If we're updating to a location, clean up any stale temporary includes
2184 # (ex: this happens during hg rebase --abort). 2252 # (ex: this happens during hg rebase --abort).
2185 if not branchmerge: 2253 if not branchmerge:
2186 sparse.prunetemporaryincludes(repo) 2254 sparse.prunetemporaryincludes(repo)