typing: introduce a `types` module and a MatcherT alias
This is a proposal to formalise the way we do typing and do more of it.
The initial motivation to make progress is to help break the 100+ module
cycle that is slowing pytype a lot.
--- a/contrib/import-checker.py Tue Feb 04 14:02:20 2025 -0500
+++ b/contrib/import-checker.py Thu Jan 30 18:22:01 2025 +0100
@@ -26,6 +26,8 @@
'mercurial.hgweb.request',
'mercurial.i18n',
'mercurial.interfaces',
+ 'mercurial.interfaces._basetypes',
+ 'mercurial.interfaces.types',
'mercurial.node',
'mercurial.pycompat',
# for revlog to re-export constant to extensions
--- a/hgext/git/dirstate.py Tue Feb 04 14:02:20 2025 -0500
+++ b/hgext/git/dirstate.py Thu Jan 30 18:22:01 2025 +0100
@@ -13,6 +13,7 @@
Tuple,
)
+from mercurial.interfaces.types import MatcherT
from mercurial.node import sha1nodeconstants
from mercurial import (
dirstatemap,
@@ -163,7 +164,7 @@
def status(
self,
- match: matchmod.basematcher,
+ match: MatcherT,
subrepos: bool,
ignored: bool,
clean: bool,
@@ -336,7 +337,7 @@
r = util.pathto(self._root, cwd, f)
return r
- def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
+ def matches(self, match: MatcherT) -> Iterable[bytes]:
for x in self.git.index:
p = pycompat.fsencode(x.path)
if match(p):
@@ -354,7 +355,7 @@
def walk(
self,
- match: matchmod.basematcher,
+ match: MatcherT,
subrepos: Any,
unknown: bool,
ignored: bool,
--- a/hgext/git/manifest.py Tue Feb 04 14:02:20 2025 -0500
+++ b/hgext/git/manifest.py Thu Jan 30 18:22:01 2025 +0100
@@ -11,6 +11,7 @@
)
from mercurial.node import sha1nodeconstants
+from mercurial.interfaces.types import MatcherT
from mercurial import (
match as matchmod,
@@ -288,7 +289,7 @@
elif match(realname):
yield pycompat.fsencode(realname)
- def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+ def walk(self, match: MatcherT) -> Iterator[bytes]:
# TODO: this is a very lazy way to merge in the pending
# changes. There is absolutely room for optimization here by
# being clever about walking over the sets...
--- a/hgext/largefiles/overrides.py Tue Feb 04 14:02:20 2025 -0500
+++ b/hgext/largefiles/overrides.py Thu Jan 30 18:22:01 2025 +0100
@@ -19,6 +19,8 @@
from mercurial.i18n import _
+from mercurial.interfaces.types import MatcherT
+
from mercurial.hgweb import webcommands
from mercurial import (
@@ -1232,7 +1234,7 @@
node,
kind,
decode=True,
- match: Optional[matchmod.basematcher] = None,
+ match: Optional[MatcherT] = None,
prefix=b'',
mtime=None,
subrepos=None,
@@ -1347,9 +1349,7 @@
@eh.wrapfunction(subrepo.hgsubrepo, 'archive')
-def hgsubrepoarchive(
- orig, repo, opener, prefix, match: matchmod.basematcher, decode=True
-):
+def hgsubrepoarchive(orig, repo, opener, prefix, match: MatcherT, decode=True):
lfenabled = hasattr(repo._repo, '_largefilesenabled')
if not lfenabled or not repo._repo.lfstatus:
return orig(repo, opener, prefix, match, decode)
--- a/mercurial/dirstate.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/dirstate.py Thu Jan 30 18:22:01 2025 +0100
@@ -24,6 +24,7 @@
)
from .i18n import _
+from .interfaces.types import MatcherT
from hgdemandimport import tracing
@@ -483,7 +484,7 @@
return self._map.hastrackeddir(d)
@rootcache(b'.hgignore')
- def _ignore(self) -> matchmod.basematcher:
+ def _ignore(self) -> MatcherT:
files = self._ignorefiles()
if not files:
return matchmod.never()
@@ -1359,7 +1360,7 @@
def walk(
self,
- match: matchmod.basematcher,
+ match: MatcherT,
subrepos: Any,
unknown: bool,
ignored: bool,
@@ -1639,7 +1640,7 @@
def status(
self,
- match: matchmod.basematcher,
+ match: MatcherT,
subrepos: bool,
ignored: bool,
clean: bool,
@@ -1796,7 +1797,7 @@
)
return (lookup, status, mtime_boundary)
- def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
+ def matches(self, match: MatcherT) -> Iterable[bytes]:
"""
return files in the dirstate (in whatever state) filtered by match
"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/interfaces/_basetypes.py Thu Jan 30 18:22:01 2025 +0100
@@ -0,0 +1,21 @@
+# mercurial/interfaces/_basetypes.py - internal base type aliases for interfaces
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+# This module contains trivial type aliases that other interfaces might need.
+# This is for internal usage by the modules in `mercurial.interfaces`.
+#
+# For using type aliases outside for `mercurial.interfaces`, look at the
+# `mercurial.interfaces.types` module.
+
+from __future__ import annotations
+
+UserMsgT = bytes
+"""Text (maybe) displayed to the user."""
+
+HgPathT = bytes
+"""A path usable with Mercurial's vfs."""
+
+FsPathT = bytes
+"""A path on disk (after vfs encoding)."""
--- a/mercurial/interfaces/dirstate.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/interfaces/dirstate.py Thu Jan 30 18:22:01 2025 +0100
@@ -21,11 +21,13 @@
# Almost all mercurial modules are only imported in the type checking phase
# to avoid circular imports
from .. import (
- match as matchmod,
transaction as txnmod,
)
- from . import status as istatus
+ from . import (
+ matcher,
+ status as istatus,
+ )
# TODO: finish adding type hints
AddParentChangeCallbackT = Callable[
@@ -95,7 +97,7 @@
# TODO: decorate with `@rootcache(b'.hgignore')` like dirstate class?
@property
- def _ignore(self) -> matchmod.basematcher:
+ def _ignore(self) -> matcher.IMatcher:
"""Matcher for ignored files."""
@property
@@ -307,7 +309,7 @@
@abc.abstractmethod
def walk(
self,
- match: matchmod.basematcher,
+ match: matcher.IMatcher,
subrepos: Any, # TODO: figure out what this is
unknown: bool,
ignored: bool,
@@ -327,7 +329,7 @@
@abc.abstractmethod
def status(
self,
- match: matchmod.basematcher,
+ match: matcher.IMatcher,
subrepos: bool,
ignored: bool,
clean: bool,
@@ -352,7 +354,7 @@
# TODO: could return a list, except git.dirstate is a generator
@abc.abstractmethod
- def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
+ def matches(self, match: matcher.IMatcher) -> Iterable[bytes]:
"""
return files in the dirstate (in whatever state) filtered by match
"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/interfaces/matcher.py Thu Jan 30 18:22:01 2025 +0100
@@ -0,0 +1,142 @@
+# mercurial/interfaces/matcher - typing protocol for Matcher objects
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import annotations
+
+import abc
+
+from typing import (
+ Callable,
+ List,
+ Optional,
+ Protocol,
+ Set,
+ Union,
+)
+
+from ._basetypes import (
+ HgPathT,
+ UserMsgT,
+)
+
+
+class IMatcher(Protocol):
+ @abc.abstractmethod
+ def was_tampered_with_nonrec(self) -> bool:
+ ...
+
+ @abc.abstractmethod
+ def was_tampered_with(self) -> bool:
+ ...
+
+ @abc.abstractmethod
+ def __call__(self, fn: HgPathT) -> bool:
+ ...
+
+ # Callbacks related to how the matcher is used by dirstate.walk.
+ # Subscribers to these events must monkeypatch the matcher object.
+ @abc.abstractmethod
+ def bad(self, f: HgPathT, msg: Optional[UserMsgT]) -> None:
+ ...
+
+ # If traversedir is set, it will be called when a directory discovered
+ # by recursive traversal is visited.
+ traversedir: Optional[Callable[[HgPathT], None]] = None
+
+ @property
+ @abc.abstractmethod
+ def _files(self) -> List[HgPathT]:
+ ...
+
+ @abc.abstractmethod
+ def files(self) -> List[HgPathT]:
+ ...
+
+ @property
+ @abc.abstractmethod
+ def _fileset(self) -> Set[HgPathT]:
+ ...
+
+ @abc.abstractmethod
+ def exact(self, f: HgPathT) -> bool:
+ """Returns True if f is in .files()."""
+
+ @abc.abstractmethod
+ def matchfn(self, f: HgPathT) -> bool:
+ ...
+
+ @abc.abstractmethod
+ def visitdir(self, dir: HgPathT) -> Union[bool, bytes]:
+ """Decides whether a directory should be visited based on whether it
+ has potential matches in it or one of its subdirectories. This is
+ based on the match's primary, included, and excluded patterns.
+
+ Returns the string 'all' if the given directory and all subdirectories
+ should be visited. Otherwise returns True or False indicating whether
+ the given directory should be visited.
+ """
+
+ @abc.abstractmethod
+ def visitchildrenset(self, dir: HgPathT) -> Union[Set[HgPathT], bytes]:
+ """Decides whether a directory should be visited based on whether it
+ has potential matches in it or one of its subdirectories, and
+ potentially lists which subdirectories of that directory should be
+ visited. This is based on the match's primary, included, and excluded
+ patterns.
+
+ This function is very similar to 'visitdir', and the following mapping
+ can be applied:
+
+ visitdir | visitchildrenlist
+ ----------+-------------------
+ False | set()
+ 'all' | 'all'
+ True | 'this' OR non-empty set of subdirs -or files- to visit
+
+ Example:
+ Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
+ the following values (assuming the implementation of visitchildrenset
+ is capable of recognizing this; some implementations are not).
+
+ '' -> {'foo', 'qux'}
+ 'baz' -> set()
+ 'foo' -> {'bar'}
+ # Ideally this would be 'all', but since the prefix nature of matchers
+ # is applied to the entire matcher, we have to downgrade this to
+ # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
+ # in.
+ 'foo/bar' -> 'this'
+ 'qux' -> 'this'
+
+ Important:
+ Most matchers do not know if they're representing files or
+ directories. They see ['path:dir/f'] and don't know whether 'f' is a
+ file or a directory, so visitchildrenset('dir') for most matchers will
+ return {'f'}, but if the matcher knows it's a file (like exactmatcher
+ does), it may return 'this'. Do not rely on the return being a set
+ indicating that there are no files in this dir to investigate (or
+ equivalently that if there are files to investigate in 'dir' that it
+ will always return 'this').
+ """
+
+ @abc.abstractmethod
+ def always(self) -> bool:
+ """Matcher will match everything and .files() will be empty --
+ optimization might be possible."""
+
+ @abc.abstractmethod
+ def isexact(self) -> bool:
+ """Matcher will match exactly the list of files in .files() --
+ optimization might be possible."""
+
+ @abc.abstractmethod
+ def prefix(self) -> bool:
+ """Matcher will match the paths in .files() recursively --
+ optimization might be possible."""
+
+ @abc.abstractmethod
+ def anypats(self) -> bool:
+ """None of .always(), .isexact(), and .prefix() is true --
+ optimizations will be difficult."""
--- a/mercurial/interfaces/repository.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/interfaces/repository.py Thu Jan 30 18:22:01 2025 +0100
@@ -32,7 +32,6 @@
# Almost all mercurial modules are only imported in the type checking phase
# to avoid circular imports
from .. import (
- match as matchmod,
pathutil,
util,
)
@@ -40,7 +39,10 @@
urlutil,
)
- from . import dirstate as intdirstate
+ from . import (
+ dirstate as intdirstate,
+ matcher,
+ )
# TODO: make a protocol class for this
NodeConstants = Any
@@ -1184,7 +1186,7 @@
"""Returns a bool indicating if a directory is in this manifest."""
@abc.abstractmethod
- def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+ def walk(self, match: matcher.IMatcher) -> Iterator[bytes]:
"""Generator of paths in manifest satisfying a matcher.
If the matcher has explicit files listed and they don't exist in
@@ -1195,7 +1197,7 @@
def diff(
self,
other: Any, # TODO: 'manifestdict' or (better) equivalent interface
- match: matchmod.basematcher | None = None,
+ match: matcher.IMatcher | None = None,
clean: bool = False,
) -> dict[
bytes,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/interfaces/types.py Thu Jan 30 18:22:01 2025 +0100
@@ -0,0 +1,23 @@
+# mercurial/interfaces/types.py - type alias for interfaces
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+# This is the main entry point for Mercurial code writing type annotations.
+#
+# The general principle can be summarized when dealing with <FooBar> object:
+# - to type your code: use FooBarT from `mercurial.interface.types`
+# - to implement foobar: use IFoorbar from `mercurial.interface.foo_bar`
+
+from __future__ import annotations
+
+from . import (
+ _basetypes,
+ matcher,
+)
+
+MatcherT = matcher.IMatcher
+
+UserMsgT = _basetypes.UserMsgT
+HgPathT = _basetypes.HgPathT
+FsPathT = _basetypes.FsPathT
--- a/mercurial/logcmdutil.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/logcmdutil.py Thu Jan 30 18:22:01 2025 +0100
@@ -22,6 +22,9 @@
)
from .i18n import _
+from .interfaces.types import (
+ MatcherT,
+)
from .node import wdirrev
from .thirdparty import attr
@@ -1083,9 +1086,7 @@
def makewalker(
repo: Any,
wopts: walkopts,
-) -> Tuple[
- smartset.abstractsmartset, Optional[Callable[[Any], matchmod.basematcher]]
-]:
+) -> Tuple[smartset.abstractsmartset, Optional[Callable[[Any], MatcherT]]]:
"""Build (revs, makefilematcher) to scan revision/file history
- revs is the smartset to be traversed.
--- a/mercurial/manifest.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/manifest.py Thu Jan 30 18:22:01 2025 +0100
@@ -28,6 +28,9 @@
)
from .i18n import _
+from .interfaces.types import (
+ MatcherT,
+)
from .node import (
bin,
hex,
@@ -570,7 +573,7 @@
def hasdir(self, dir: bytes) -> bool:
return dir in self._dirs
- def _filesfastpath(self, match: matchmod.basematcher) -> bool:
+ def _filesfastpath(self, match: MatcherT) -> bool:
"""Checks whether we can correctly and quickly iterate over matcher
files instead of over manifest files."""
files = match.files()
@@ -579,7 +582,7 @@
or (match.prefix() and all(fn in self for fn in files))
)
- def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+ def walk(self, match: MatcherT) -> Iterator[bytes]:
"""Generates matching file names.
Equivalent to manifest.matches(match).iterkeys(), but without creating
@@ -615,7 +618,7 @@
if not self.hasdir(fn):
match.bad(fn, None)
- def _matches(self, match: matchmod.basematcher) -> manifestdict:
+ def _matches(self, match: MatcherT) -> manifestdict:
'''generate a new manifest filtered by the match argument'''
if match.always():
return self.copy()
@@ -635,7 +638,7 @@
def diff(
self,
m2: manifestdict,
- match: Optional[matchmod.basematcher] = None,
+ match: Optional[MatcherT] = None,
clean: bool = False,
) -> Dict[
bytes,
@@ -1202,7 +1205,7 @@
return copy
def filesnotin(
- self, m2: treemanifest, match: Optional[matchmod.basematcher] = None
+ self, m2: treemanifest, match: Optional[MatcherT] = None
) -> Set[bytes]:
'''Set of files in this manifest that are not in the other'''
if match and not match.always():
@@ -1250,7 +1253,7 @@
dirslash = dir + b'/'
return dirslash in self._dirs or dirslash in self._lazydirs
- def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+ def walk(self, match: MatcherT) -> Iterator[bytes]:
"""Generates matching file names.
It also reports nonexistent files by marking them bad with match.bad().
@@ -1275,7 +1278,7 @@
if not self.hasdir(fn):
match.bad(fn, None)
- def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+ def _walk(self, match: MatcherT) -> Iterator[bytes]:
'''Recursively generates matching file names for walk().'''
visit = match.visitchildrenset(self._dir[:-1])
if not visit:
@@ -1293,13 +1296,13 @@
if not visit or p[:-1] in visit:
yield from self._dirs[p]._walk(match)
- def _matches(self, match: matchmod.basematcher) -> treemanifest:
+ def _matches(self, match: MatcherT) -> treemanifest:
"""recursively generate a new manifest filtered by the match argument."""
if match.always():
return self.copy()
return self._matches_inner(match)
- def _matches_inner(self, match: matchmod.basematcher) -> treemanifest:
+ def _matches_inner(self, match: MatcherT) -> treemanifest:
if match.always():
return self.copy()
@@ -1348,7 +1351,7 @@
def diff(
self,
m2: treemanifest,
- match: Optional[matchmod.basematcher] = None,
+ match: Optional[MatcherT] = None,
clean: bool = False,
) -> Dict[
bytes,
@@ -1482,11 +1485,11 @@
Callable[[treemanifest], None],
bytes,
bytes,
- matchmod.basematcher,
+ MatcherT,
],
None,
],
- match: matchmod.basematcher,
+ match: MatcherT,
) -> None:
self._load() # for consistency; should never have any effect here
m1._load()
@@ -1516,7 +1519,7 @@
writesubtree(subm, subp1, subp2, match)
def walksubtrees(
- self, matcher: Optional[matchmod.basematcher] = None
+ self, matcher: Optional[MatcherT] = None
) -> Iterator[treemanifest]:
"""Returns an iterator of the subtrees of this manifest, including this
manifest itself.
--- a/mercurial/match.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/match.py Thu Jan 30 18:22:01 2025 +0100
@@ -24,6 +24,9 @@
)
from .i18n import _
+from .interfaces.types import (
+ MatcherT,
+)
from . import (
encoding,
error,
@@ -34,6 +37,10 @@
)
from .utils import stringutil
+from .interfaces import (
+ matcher as int_matcher,
+)
+
rustmod = policy.importrust('dirstate')
allpatternkinds = (
@@ -403,7 +410,7 @@
return kindpats
-class basematcher:
+class basematcher(int_matcher.IMatcher):
def __init__(self, badfn=None):
self._was_tampered_with = False
if badfn is not None:
@@ -1081,7 +1088,7 @@
sub/x.txt: No such file
"""
- def __init__(self, path: bytes, matcher: basematcher) -> None:
+ def __init__(self, path: bytes, matcher: MatcherT) -> None:
super().__init__()
self._path = path
self._matcher = matcher
--- a/mercurial/subrepo.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/subrepo.py Thu Jan 30 18:22:01 2025 +0100
@@ -18,6 +18,9 @@
import xml.dom.minidom
from .i18n import _
+from .interfaces.types import (
+ MatcherT,
+)
from .node import (
bin,
hex,
@@ -367,7 +370,7 @@
"""handle the files command for this subrepo"""
return 1
- def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
+ def archive(self, opener, prefix, match: MatcherT, decode=True):
files = [f for f in self.files() if match(f)]
total = len(files)
relpath = subrelpath(self)
@@ -656,7 +659,7 @@
)
@annotatesubrepoerror
- def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
+ def archive(self, opener, prefix, match: MatcherT, decode=True):
self._get(self._state + (b'hg',))
files = [f for f in self.files() if match(f)]
rev = self._state[1]
@@ -1913,7 +1916,7 @@
else:
self.wvfs.unlink(f)
- def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
+ def archive(self, opener, prefix, match: MatcherT, decode=True):
total = 0
source, revision = self._state
if not revision:
--- a/mercurial/subrepoutil.py Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/subrepoutil.py Thu Jan 30 18:22:01 2025 +0100
@@ -57,6 +57,9 @@
)
from .interfaces import status as istatus
+ from .interfaces.types import (
+ MatcherT,
+ )
# keeps pyflakes happy
assert [
@@ -335,7 +338,7 @@
ui: uimod.ui,
wctx: context.workingcommitctx,
status: istatus.Status,
- match: matchmod.basematcher,
+ match: MatcherT,
force: bool = False,
) -> Tuple[List[bytes], Set[bytes], Substate]:
"""Calculate .hgsubstate changes that should be applied before committing