Mercurial > public > mercurial-scm > hg-stable
changeset 52971:469b9a628b51
dirstatemap: update, document and type the identity tracking
This new form should hopefully be clearer and less error prone.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 18 Feb 2025 22:24:08 +0100 |
parents | 42f78c859dd1 |
children | e189a555865c |
files | mercurial/dirstatemap.py |
diffstat | 1 files changed, 38 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/dirstatemap.py Tue Feb 18 22:49:43 2025 +0100 +++ b/mercurial/dirstatemap.py Tue Feb 18 22:24:08 2025 +0100 @@ -10,6 +10,7 @@ from typing import ( Optional, TYPE_CHECKING, + Tuple, ) from .i18n import _ @@ -109,9 +110,6 @@ # for consistent view between _pl() and _read() invocations self._pendingmode = None - def _set_identity(self) -> None: - self.identity = self._get_current_identity() - def _get_current_identity(self) -> Optional[typelib.CacheStat]: # TODO have a cleaner approach on httpstaticrepo side path = self._opener.join(self._filename) @@ -172,14 +170,34 @@ self._pendingmode = mode return fp - def _readdirstatefile(self, size: int = -1) -> bytes: + def _readdirstatefile( + self, + size: int = -1, + ) -> Tuple[Optional[typelib.CacheStat], bytes]: + """read the content of the file used as "entry point" for the dirstate + + Return a (identity, data) tuple. The identity can be used for cache + validation and concurrent changes detection and must be set as + `self.identity` if `data` is preserved. + """ + identity = self._get_current_identity() + # There is a race condition between fetching the identity and reading + # the file content. Another process might update the file after we get + # the identity information. However this is fine for our purpose as + # this will only create false-positive for "data changed" and no + # false-negative. + # + # in addition for the case that matter the most (updating the dirstate + # semantic content and not just some cache information), this will not + # happens as the lock should be held when changes to the dirstate + # content are made. testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file') try: with self._opendirstatefile() as fp: - return fp.read(size) + return identity, fp.read(size) except FileNotFoundError: # File doesn't exist, so the current state is empty - return b'' + return identity, b'' @property def docket(self) -> docketmod.DirstateDocket: @@ -189,8 +207,7 @@ raise error.ProgrammingError( b'dirstate only has a docket in v2 format' ) - self._set_identity() - data = self._readdirstatefile() + self.identity, data = self._readdirstatefile() if data == b'' or data.startswith(docketmod.V2_FORMAT_MARKER): self._docket = docketmod.DirstateDocket.parse( data, self._nodeconstants @@ -281,7 +298,7 @@ def _v1_parents(self, from_v2_exception=None): read_len = self._nodelen * 2 - st = self._readdirstatefile(read_len) + _identity, st = self._readdirstatefile(read_len) l = len(st) if l == read_len: self._parents = ( @@ -402,16 +419,14 @@ self.docket except error.CorruptedDirstate: # fall back to dirstate-v1 if we fail to read v2 - self._set_identity() - st = self._readdirstatefile() + self.identity, st = self._readdirstatefile() else: if not self.docket.uuid: return testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') st = self._read_v2_data() else: - self._set_identity() - st = self._readdirstatefile() + self.identity, st = self._readdirstatefile() if not st: return @@ -474,7 +489,14 @@ self._dirtyparents = False @propertycache - def identity(self): + def identity(self) -> Optional[typelib.CacheStat]: + """A cache identifier for the state of the file as data were read + + This must always be set with the object returned from + `self._readdirstatefile()`. assigning another value later will break + some security mechanism and can lead to misbehavior when concurrent + operation are run. + """ self._map return self.identity @@ -733,11 +755,9 @@ def _v1_map(self, from_v2_exception=None): try: - self._set_identity() + self.identity, data = self._readdirstatefile() identity = self._get_rust_identity() - self._map, parents = rustmod.DirstateMap.new_v1( - self._readdirstatefile(), identity - ) + self._map, parents = rustmod.DirstateMap.new_v1(data, identity) except OSError as e: if from_v2_exception is not None: raise e from from_v2_exception