--- a/mercurial/dirstatemap.py Tue Feb 18 22:49:43 2025 +0100
+++ b/mercurial/dirstatemap.py Tue Feb 18 22:24:08 2025 +0100
@@ -10,6 +10,7 @@
from typing import (
Optional,
TYPE_CHECKING,
+ Tuple,
)
from .i18n import _
@@ -109,9 +110,6 @@
# for consistent view between _pl() and _read() invocations
self._pendingmode = None
- def _set_identity(self) -> None:
- self.identity = self._get_current_identity()
-
def _get_current_identity(self) -> Optional[typelib.CacheStat]:
# TODO have a cleaner approach on httpstaticrepo side
path = self._opener.join(self._filename)
@@ -172,14 +170,34 @@
self._pendingmode = mode
return fp
- def _readdirstatefile(self, size: int = -1) -> bytes:
+ def _readdirstatefile(
+ self,
+ size: int = -1,
+ ) -> Tuple[Optional[typelib.CacheStat], bytes]:
+ """read the content of the file used as "entry point" for the dirstate
+
+ Return a (identity, data) tuple. The identity can be used for cache
+ validation and concurrent changes detection and must be set as
+ `self.identity` if `data` is preserved.
+ """
+ identity = self._get_current_identity()
+ # There is a race condition between fetching the identity and reading
+ # the file content. Another process might update the file after we get
+ # the identity information. However this is fine for our purpose as
+ # this will only create false-positive for "data changed" and no
+ # false-negative.
+ #
+ # in addition for the case that matter the most (updating the dirstate
+ # semantic content and not just some cache information), this will not
+ # happens as the lock should be held when changes to the dirstate
+ # content are made.
testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
try:
with self._opendirstatefile() as fp:
- return fp.read(size)
+ return identity, fp.read(size)
except FileNotFoundError:
# File doesn't exist, so the current state is empty
- return b''
+ return identity, b''
@property
def docket(self) -> docketmod.DirstateDocket:
@@ -189,8 +207,7 @@
raise error.ProgrammingError(
b'dirstate only has a docket in v2 format'
)
- self._set_identity()
- data = self._readdirstatefile()
+ self.identity, data = self._readdirstatefile()
if data == b'' or data.startswith(docketmod.V2_FORMAT_MARKER):
self._docket = docketmod.DirstateDocket.parse(
data, self._nodeconstants
@@ -281,7 +298,7 @@
def _v1_parents(self, from_v2_exception=None):
read_len = self._nodelen * 2
- st = self._readdirstatefile(read_len)
+ _identity, st = self._readdirstatefile(read_len)
l = len(st)
if l == read_len:
self._parents = (
@@ -402,16 +419,14 @@
self.docket
except error.CorruptedDirstate:
# fall back to dirstate-v1 if we fail to read v2
- self._set_identity()
- st = self._readdirstatefile()
+ self.identity, st = self._readdirstatefile()
else:
if not self.docket.uuid:
return
testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
st = self._read_v2_data()
else:
- self._set_identity()
- st = self._readdirstatefile()
+ self.identity, st = self._readdirstatefile()
if not st:
return
@@ -474,7 +489,14 @@
self._dirtyparents = False
@propertycache
- def identity(self):
+ def identity(self) -> Optional[typelib.CacheStat]:
+ """A cache identifier for the state of the file as data were read
+
+ This must always be set with the object returned from
+ `self._readdirstatefile()`. assigning another value later will break
+ some security mechanism and can lead to misbehavior when concurrent
+ operation are run.
+ """
self._map
return self.identity
@@ -733,11 +755,9 @@
def _v1_map(self, from_v2_exception=None):
try:
- self._set_identity()
+ self.identity, data = self._readdirstatefile()
identity = self._get_rust_identity()
- self._map, parents = rustmod.DirstateMap.new_v1(
- self._readdirstatefile(), identity
- )
+ self._map, parents = rustmod.DirstateMap.new_v1(data, identity)
except OSError as e:
if from_v2_exception is not None:
raise e from from_v2_exception