Mercurial > public > mercurial-scm > hg
comparison mercurial/merge.py @ 52056:8b7123c8947b
update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Tue, 01 Oct 2024 13:49:11 +0200 |
parents | b332ae615714 |
children | 43e15277498e |
comparison
equal
deleted
inserted
replaced
52055:b332ae615714 | 52056:8b7123c8947b |
---|---|
6 # GNU General Public License version 2 or any later version. | 6 # GNU General Public License version 2 or any later version. |
7 | 7 |
8 from __future__ import annotations | 8 from __future__ import annotations |
9 | 9 |
10 import collections | 10 import collections |
11 import os | |
11 import struct | 12 import struct |
12 import typing | 13 import typing |
13 from typing import Dict, Optional, Tuple | 14 from typing import Dict, Optional, Tuple |
14 | 15 |
15 from .i18n import _ | 16 from .i18n import _ |
32 mergestate as mergestatemod, | 33 mergestate as mergestatemod, |
33 obsutil, | 34 obsutil, |
34 pathutil, | 35 pathutil, |
35 policy, | 36 policy, |
36 pycompat, | 37 pycompat, |
38 requirements, | |
37 scmutil, | 39 scmutil, |
38 subrepoutil, | 40 subrepoutil, |
39 util, | 41 util, |
40 worker, | 42 worker, |
41 ) | 43 ) |
44 | |
45 rust_update_mod = policy.importrust("update") | |
42 | 46 |
43 _pack = struct.pack | 47 _pack = struct.pack |
44 _unpack = struct.unpack | 48 _unpack = struct.unpack |
45 | 49 |
46 | 50 |
145 b'experimental', b'merge.checkpathconflicts' | 149 b'experimental', b'merge.checkpathconflicts' |
146 ) | 150 ) |
147 dircache = dict() | 151 dircache = dict() |
148 dirstate = repo.dirstate | 152 dirstate = repo.dirstate |
149 wvfs = repo.wvfs | 153 wvfs = repo.wvfs |
154 # wouldn't it be easier to loop over unknown files (and dirs)? | |
155 | |
150 if not force: | 156 if not force: |
151 | 157 |
152 def collectconflicts(conflicts, config): | 158 def collectconflicts(conflicts, config): |
153 if config == b'abort': | 159 if config == b'abort': |
154 abortconflicts.update(conflicts) | 160 abortconflicts.update(conflicts) |
1833 UPDATECHECK_ABORT = b'abort' # handled at higher layers | 1839 UPDATECHECK_ABORT = b'abort' # handled at higher layers |
1834 UPDATECHECK_NONE = b'none' | 1840 UPDATECHECK_NONE = b'none' |
1835 UPDATECHECK_LINEAR = b'linear' | 1841 UPDATECHECK_LINEAR = b'linear' |
1836 UPDATECHECK_NO_CONFLICT = b'noconflict' | 1842 UPDATECHECK_NO_CONFLICT = b'noconflict' |
1837 | 1843 |
1844 # Let extensions turn off any Rust code in the update code if that interferes | |
1845 # will their patching. | |
1846 # This being `True` does not mean that you have Rust extensions installed or | |
1847 # that the Rust path will be taken for any given invocation. | |
1848 MAYBE_USE_RUST_UPDATE = True | |
1849 | |
1838 | 1850 |
1839 def _update( | 1851 def _update( |
1840 repo, | 1852 repo, |
1841 node, | 1853 node, |
1842 branchmerge, | 1854 branchmerge, |
2005 followcopies = False | 2017 followcopies = False |
2006 elif not pas[0]: | 2018 elif not pas[0]: |
2007 followcopies = False | 2019 followcopies = False |
2008 if not branchmerge and not wc.dirty(missing=True): | 2020 if not branchmerge and not wc.dirty(missing=True): |
2009 followcopies = False | 2021 followcopies = False |
2022 | |
2023 update_from_null = False | |
2024 update_from_null_fallback = False | |
2025 if ( | |
2026 MAYBE_USE_RUST_UPDATE | |
2027 and rust_update_mod is not None | |
2028 and p1.rev() == nullrev | |
2029 and not branchmerge | |
2030 # TODO it's probably not too hard to pass down the transaction and | |
2031 # respect the write patterns from Rust. But since it doesn't affect | |
2032 # a simple update from null, then it doesn't matter yet. | |
2033 and repo.currenttransaction() is None | |
2034 and matcher is None | |
2035 and not wc.mergestate().active() | |
2036 and b'.hgsubstate' not in p2 | |
2037 ): | |
2038 working_dir_iter = os.scandir(repo.root) | |
2039 maybe_hg_folder = next(working_dir_iter) | |
2040 assert maybe_hg_folder is not None | |
2041 if maybe_hg_folder.name == b".hg": | |
2042 try: | |
2043 next(working_dir_iter) | |
2044 except StopIteration: | |
2045 update_from_null = True | |
2046 | |
2047 if update_from_null: | |
2048 # Check the narrowspec and sparsespec here to display warnings | |
2049 # more easily. | |
2050 # TODO figure out of a way of bubbling up warnings to Python | |
2051 # while not polluting the Rust code (probably a channel) | |
2052 repo.narrowmatch() | |
2053 sparse.matcher(repo, [nullrev, p2.rev()]) | |
2054 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2) | |
2055 # note that we're in the middle of an update | |
2056 repo.vfs.write(b'updatestate', p2.hex()) | |
2057 try: | |
2058 updated_count = rust_update_mod.update_from_null( | |
2059 repo.root, p2.rev() | |
2060 ) | |
2061 except rust_update_mod.FallbackError: | |
2062 update_from_null_fallback = True | |
2063 else: | |
2064 # We've changed the dirstate from Rust, we need to tell Python | |
2065 repo.dirstate.invalidate() | |
2066 # This includes setting the parents, since they are not read | |
2067 # again on invalidation | |
2068 with repo.dirstate.changing_parents(repo): | |
2069 repo.dirstate.setparents(fp2) | |
2070 repo.dirstate.setbranch(p2.branch(), repo.currenttransaction()) | |
2071 sparse.prunetemporaryincludes(repo) | |
2072 repo.hook(b'update', parent1=xp1, parent2=xp2, error=0) | |
2073 # update completed, clear state | |
2074 util.unlink(repo.vfs.join(b'updatestate')) | |
2075 return updateresult(updated_count, 0, 0, 0) | |
2010 | 2076 |
2011 ### calculate phase | 2077 ### calculate phase |
2012 mresult = calculateupdates( | 2078 mresult = calculateupdates( |
2013 repo, | 2079 repo, |
2014 wc, | 2080 wc, |
2129 fp1, fp2, xp1, xp2 = fp2, repo.nullid, xp2, b'' | 2195 fp1, fp2, xp1, xp2 = fp2, repo.nullid, xp2, b'' |
2130 # If we're doing a partial update, we need to skip updating | 2196 # If we're doing a partial update, we need to skip updating |
2131 # the dirstate. | 2197 # the dirstate. |
2132 always = matcher is None or matcher.always() | 2198 always = matcher is None or matcher.always() |
2133 updatedirstate = updatedirstate and always and not wc.isinmemory() | 2199 updatedirstate = updatedirstate and always and not wc.isinmemory() |
2134 if updatedirstate: | 2200 # If we're in the fallback case, we've already done this |
2201 if updatedirstate and not update_from_null_fallback: | |
2135 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2) | 2202 repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2) |
2136 # note that we're in the middle of an update | 2203 # note that we're in the middle of an update |
2137 repo.vfs.write(b'updatestate', p2.hex()) | 2204 repo.vfs.write(b'updatestate', p2.hex()) |
2138 | 2205 |
2206 # TODO don't run if Rust is available | |
2139 _advertisefsmonitor( | 2207 _advertisefsmonitor( |
2140 repo, mresult.len((mergestatemod.ACTION_GET,)), p1.node() | 2208 repo, mresult.len((mergestatemod.ACTION_GET,)), p1.node() |
2141 ) | 2209 ) |
2142 | 2210 |
2143 wantfiledata = updatedirstate and not branchmerge | 2211 wantfiledata = updatedirstate and not branchmerge |
2170 | 2238 |
2171 repo.setparents(fp1, fp2) | 2239 repo.setparents(fp1, fp2) |
2172 mergestatemod.recordupdates( | 2240 mergestatemod.recordupdates( |
2173 repo, mresult.actionsdict, branchmerge, getfiledata | 2241 repo, mresult.actionsdict, branchmerge, getfiledata |
2174 ) | 2242 ) |
2175 # update completed, clear state | |
2176 util.unlink(repo.vfs.join(b'updatestate')) | |
2177 | |
2178 if not branchmerge: | 2243 if not branchmerge: |
2179 repo.dirstate.setbranch( | 2244 repo.dirstate.setbranch( |
2180 p2.branch(), repo.currenttransaction() | 2245 p2.branch(), repo.currenttransaction() |
2181 ) | 2246 ) |
2247 | |
2248 # update completed, clear state | |
2249 util.unlink(repo.vfs.join(b'updatestate')) | |
2182 | 2250 |
2183 # If we're updating to a location, clean up any stale temporary includes | 2251 # If we're updating to a location, clean up any stale temporary includes |
2184 # (ex: this happens during hg rebase --abort). | 2252 # (ex: this happens during hg rebase --abort). |
2185 if not branchmerge: | 2253 if not branchmerge: |
2186 sparse.prunetemporaryincludes(repo) | 2254 sparse.prunetemporaryincludes(repo) |