Mercurial > public > mercurial-scm > hg-stable
diff mercurial/merge.py @ 52084:8b7123c8947b
update: add a Rust fast-path when updating from null (and clean)
This case is easy to detect and we have all we need to generate a valid
working copy and dirstate entirely in Rust, which speeds things up
considerably:
On my machine updating a repo of ~300k files goes from 10.00s down to 4.2s,
all while consuming 50% less system time, with all caches hot.
Something to note is that further improvements will probably happen
with the upcoming `InnerRevlog` series that does smarter
mmap hanlding, especially for filelogs.
Here are benchmark numbers on a machine with only 4 cores (and no SMT enabled)
```
### data-env-vars.name = heptapod-public-2024-03-25-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 5.328762 ~~~~~
rust: 1.308654 (-75.44%, -4.02)
### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 1.693271 ~~~~~
rust: 1.151053 (-32.02%, -0.54)
### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 38.901613 ~~~~~
rust: 11.637880 (-70.08%, -27.26)
### data-env-vars.name = netbsd-xsrc-public-2024-09-19-ds2-pnm
# benchmark.name = hg.command.update
# bin-env-vars.hg.py-re2-module = default
# bin-env-vars.hg.changeset.node = <this change>
# benchmark.variants.atomic-update = no
# benchmark.variants.scenario = null-to-tip
# benchmark.variants.worker = default
default: 4.793727 ~~~~~
rust: 1.505905 (-68.59%, -3.29)
```
author | Rapha?l Gom?s <rgomes@octobus.net> |
---|---|
date | Tue, 01 Oct 2024 13:49:11 +0200 |
parents | b332ae615714 |
children | 43e15277498e |
line wrap: on
line diff
--- a/mercurial/merge.py Wed Oct 16 19:14:30 2024 +0200 +++ b/mercurial/merge.py Tue Oct 01 13:49:11 2024 +0200 @@ -8,6 +8,7 @@ from __future__ import annotations import collections +import os import struct import typing from typing import Dict, Optional, Tuple @@ -34,12 +35,15 @@ pathutil, policy, pycompat, + requirements, scmutil, subrepoutil, util, worker, ) +rust_update_mod = policy.importrust("update") + _pack = struct.pack _unpack = struct.unpack @@ -147,6 +151,8 @@ dircache = dict() dirstate = repo.dirstate wvfs = repo.wvfs + # wouldn't it be easier to loop over unknown files (and dirs)? + if not force: def collectconflicts(conflicts, config): @@ -1835,6 +1841,12 @@ UPDATECHECK_LINEAR = b'linear' UPDATECHECK_NO_CONFLICT = b'noconflict' +# Let extensions turn off any Rust code in the update code if that interferes +# will their patching. +# This being `True` does not mean that you have Rust extensions installed or +# that the Rust path will be taken for any given invocation. +MAYBE_USE_RUST_UPDATE = True + def _update( repo, @@ -2008,6 +2020,60 @@ if not branchmerge and not wc.dirty(missing=True): followcopies = False + update_from_null = False + update_from_null_fallback = False + if ( + MAYBE_USE_RUST_UPDATE + and rust_update_mod is not None + and p1.rev() == nullrev + and not branchmerge + # TODO it's probably not too hard to pass down the transaction and + # respect the write patterns from Rust. But since it doesn't affect + # a simple update from null, then it doesn't matter yet. + and repo.currenttransaction() is None + and matcher is None + and not wc.mergestate().active() + and b'.hgsubstate' not in p2 + ): + working_dir_iter = os.scandir(repo.root) + maybe_hg_folder = next(working_dir_iter) + assert maybe_hg_folder is not None + if maybe_hg_folder.name == b".hg": + try: + next(working_dir_iter) + except StopIteration: + update_from_null = True + + if update_from_null: + # Check the narrowspec and sparsespec here to display warnings + # more easily. + # TODO figure out of a way of bubbling up warnings to Python + # while not polluting the Rust code (probably a channel) + repo.narrowmatch() + sparse.matcher(repo, [nullrev, p2.rev()]) + repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2) + # note that we're in the middle of an update + repo.vfs.write(b'updatestate', p2.hex()) + try: + updated_count = rust_update_mod.update_from_null( + repo.root, p2.rev() + ) + except rust_update_mod.FallbackError: + update_from_null_fallback = True + else: + # We've changed the dirstate from Rust, we need to tell Python + repo.dirstate.invalidate() + # This includes setting the parents, since they are not read + # again on invalidation + with repo.dirstate.changing_parents(repo): + repo.dirstate.setparents(fp2) + repo.dirstate.setbranch(p2.branch(), repo.currenttransaction()) + sparse.prunetemporaryincludes(repo) + repo.hook(b'update', parent1=xp1, parent2=xp2, error=0) + # update completed, clear state + util.unlink(repo.vfs.join(b'updatestate')) + return updateresult(updated_count, 0, 0, 0) + ### calculate phase mresult = calculateupdates( repo, @@ -2131,11 +2197,13 @@ # the dirstate. always = matcher is None or matcher.always() updatedirstate = updatedirstate and always and not wc.isinmemory() - if updatedirstate: + # If we're in the fallback case, we've already done this + if updatedirstate and not update_from_null_fallback: repo.hook(b'preupdate', throw=True, parent1=xp1, parent2=xp2) # note that we're in the middle of an update repo.vfs.write(b'updatestate', p2.hex()) + # TODO don't run if Rust is available _advertisefsmonitor( repo, mresult.len((mergestatemod.ACTION_GET,)), p1.node() ) @@ -2172,14 +2240,14 @@ mergestatemod.recordupdates( repo, mresult.actionsdict, branchmerge, getfiledata ) - # update completed, clear state - util.unlink(repo.vfs.join(b'updatestate')) - if not branchmerge: repo.dirstate.setbranch( p2.branch(), repo.currenttransaction() ) + # update completed, clear state + util.unlink(repo.vfs.join(b'updatestate')) + # If we're updating to a location, clean up any stale temporary includes # (ex: this happens during hg rebase --abort). if not branchmerge: