typing: introduce a `types` module and a MatcherT alias
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 30 Jan 2025 18:22:01 +0100
changeset 52741 5c48fd4c0e68
parent 52740 7405f8a67611
child 52742 14ad5fc73a9c
typing: introduce a `types` module and a MatcherT alias This is a proposal to formalise the way we do typing and do more of it. The initial motivation to make progress is to help break the 100+ module cycle that is slowing pytype a lot.
contrib/import-checker.py
hgext/git/dirstate.py
hgext/git/manifest.py
hgext/largefiles/overrides.py
mercurial/dirstate.py
mercurial/interfaces/_basetypes.py
mercurial/interfaces/dirstate.py
mercurial/interfaces/matcher.py
mercurial/interfaces/repository.py
mercurial/interfaces/types.py
mercurial/logcmdutil.py
mercurial/manifest.py
mercurial/match.py
mercurial/subrepo.py
mercurial/subrepoutil.py
--- a/contrib/import-checker.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/contrib/import-checker.py	Thu Jan 30 18:22:01 2025 +0100
@@ -26,6 +26,8 @@
     'mercurial.hgweb.request',
     'mercurial.i18n',
     'mercurial.interfaces',
+    'mercurial.interfaces._basetypes',
+    'mercurial.interfaces.types',
     'mercurial.node',
     'mercurial.pycompat',
     # for revlog to re-export constant to extensions
--- a/hgext/git/dirstate.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/hgext/git/dirstate.py	Thu Jan 30 18:22:01 2025 +0100
@@ -13,6 +13,7 @@
     Tuple,
 )
 
+from mercurial.interfaces.types import MatcherT
 from mercurial.node import sha1nodeconstants
 from mercurial import (
     dirstatemap,
@@ -163,7 +164,7 @@
 
     def status(
         self,
-        match: matchmod.basematcher,
+        match: MatcherT,
         subrepos: bool,
         ignored: bool,
         clean: bool,
@@ -336,7 +337,7 @@
         r = util.pathto(self._root, cwd, f)
         return r
 
-    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
+    def matches(self, match: MatcherT) -> Iterable[bytes]:
         for x in self.git.index:
             p = pycompat.fsencode(x.path)
             if match(p):
@@ -354,7 +355,7 @@
 
     def walk(
         self,
-        match: matchmod.basematcher,
+        match: MatcherT,
         subrepos: Any,
         unknown: bool,
         ignored: bool,
--- a/hgext/git/manifest.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/hgext/git/manifest.py	Thu Jan 30 18:22:01 2025 +0100
@@ -11,6 +11,7 @@
 )
 
 from mercurial.node import sha1nodeconstants
+from mercurial.interfaces.types import MatcherT
 
 from mercurial import (
     match as matchmod,
@@ -288,7 +289,7 @@
             elif match(realname):
                 yield pycompat.fsencode(realname)
 
-    def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+    def walk(self, match: MatcherT) -> Iterator[bytes]:
         # TODO: this is a very lazy way to merge in the pending
         # changes. There is absolutely room for optimization here by
         # being clever about walking over the sets...
--- a/hgext/largefiles/overrides.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/hgext/largefiles/overrides.py	Thu Jan 30 18:22:01 2025 +0100
@@ -19,6 +19,8 @@
 
 from mercurial.i18n import _
 
+from mercurial.interfaces.types import MatcherT
+
 from mercurial.hgweb import webcommands
 
 from mercurial import (
@@ -1232,7 +1234,7 @@
     node,
     kind,
     decode=True,
-    match: Optional[matchmod.basematcher] = None,
+    match: Optional[MatcherT] = None,
     prefix=b'',
     mtime=None,
     subrepos=None,
@@ -1347,9 +1349,7 @@
 
 
 @eh.wrapfunction(subrepo.hgsubrepo, 'archive')
-def hgsubrepoarchive(
-    orig, repo, opener, prefix, match: matchmod.basematcher, decode=True
-):
+def hgsubrepoarchive(orig, repo, opener, prefix, match: MatcherT, decode=True):
     lfenabled = hasattr(repo._repo, '_largefilesenabled')
     if not lfenabled or not repo._repo.lfstatus:
         return orig(repo, opener, prefix, match, decode)
--- a/mercurial/dirstate.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/dirstate.py	Thu Jan 30 18:22:01 2025 +0100
@@ -24,6 +24,7 @@
 )
 
 from .i18n import _
+from .interfaces.types import MatcherT
 
 from hgdemandimport import tracing
 
@@ -483,7 +484,7 @@
         return self._map.hastrackeddir(d)
 
     @rootcache(b'.hgignore')
-    def _ignore(self) -> matchmod.basematcher:
+    def _ignore(self) -> MatcherT:
         files = self._ignorefiles()
         if not files:
             return matchmod.never()
@@ -1359,7 +1360,7 @@
 
     def walk(
         self,
-        match: matchmod.basematcher,
+        match: MatcherT,
         subrepos: Any,
         unknown: bool,
         ignored: bool,
@@ -1639,7 +1640,7 @@
 
     def status(
         self,
-        match: matchmod.basematcher,
+        match: MatcherT,
         subrepos: bool,
         ignored: bool,
         clean: bool,
@@ -1796,7 +1797,7 @@
         )
         return (lookup, status, mtime_boundary)
 
-    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
+    def matches(self, match: MatcherT) -> Iterable[bytes]:
         """
         return files in the dirstate (in whatever state) filtered by match
         """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/interfaces/_basetypes.py	Thu Jan 30 18:22:01 2025 +0100
@@ -0,0 +1,21 @@
+# mercurial/interfaces/_basetypes.py - internal base type aliases for interfaces
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+# This module contains trivial type aliases that other interfaces might need.
+# This is for internal usage by the modules in `mercurial.interfaces`.
+#
+# For using type aliases outside for `mercurial.interfaces`, look at the
+# `mercurial.interfaces.types` module.
+
+from __future__ import annotations
+
+UserMsgT = bytes
+"""Text (maybe) displayed to the user."""
+
+HgPathT = bytes
+"""A path usable with Mercurial's vfs."""
+
+FsPathT = bytes
+"""A path on disk (after vfs encoding)."""
--- a/mercurial/interfaces/dirstate.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/interfaces/dirstate.py	Thu Jan 30 18:22:01 2025 +0100
@@ -21,11 +21,13 @@
     # Almost all mercurial modules are only imported in the type checking phase
     # to avoid circular imports
     from .. import (
-        match as matchmod,
         transaction as txnmod,
     )
 
-    from . import status as istatus
+    from . import (
+        matcher,
+        status as istatus,
+    )
 
     # TODO: finish adding type hints
     AddParentChangeCallbackT = Callable[
@@ -95,7 +97,7 @@
 
     # TODO: decorate with `@rootcache(b'.hgignore')` like dirstate class?
     @property
-    def _ignore(self) -> matchmod.basematcher:
+    def _ignore(self) -> matcher.IMatcher:
         """Matcher for ignored files."""
 
     @property
@@ -307,7 +309,7 @@
     @abc.abstractmethod
     def walk(
         self,
-        match: matchmod.basematcher,
+        match: matcher.IMatcher,
         subrepos: Any,  # TODO: figure out what this is
         unknown: bool,
         ignored: bool,
@@ -327,7 +329,7 @@
     @abc.abstractmethod
     def status(
         self,
-        match: matchmod.basematcher,
+        match: matcher.IMatcher,
         subrepos: bool,
         ignored: bool,
         clean: bool,
@@ -352,7 +354,7 @@
     # TODO: could return a list, except git.dirstate is a generator
 
     @abc.abstractmethod
-    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
+    def matches(self, match: matcher.IMatcher) -> Iterable[bytes]:
         """
         return files in the dirstate (in whatever state) filtered by match
         """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/interfaces/matcher.py	Thu Jan 30 18:22:01 2025 +0100
@@ -0,0 +1,142 @@
+# mercurial/interfaces/matcher - typing protocol for Matcher objects
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import annotations
+
+import abc
+
+from typing import (
+    Callable,
+    List,
+    Optional,
+    Protocol,
+    Set,
+    Union,
+)
+
+from ._basetypes import (
+    HgPathT,
+    UserMsgT,
+)
+
+
+class IMatcher(Protocol):
+    @abc.abstractmethod
+    def was_tampered_with_nonrec(self) -> bool:
+        ...
+
+    @abc.abstractmethod
+    def was_tampered_with(self) -> bool:
+        ...
+
+    @abc.abstractmethod
+    def __call__(self, fn: HgPathT) -> bool:
+        ...
+
+    # Callbacks related to how the matcher is used by dirstate.walk.
+    # Subscribers to these events must monkeypatch the matcher object.
+    @abc.abstractmethod
+    def bad(self, f: HgPathT, msg: Optional[UserMsgT]) -> None:
+        ...
+
+    # If traversedir is set, it will be called when a directory discovered
+    # by recursive traversal is visited.
+    traversedir: Optional[Callable[[HgPathT], None]] = None
+
+    @property
+    @abc.abstractmethod
+    def _files(self) -> List[HgPathT]:
+        ...
+
+    @abc.abstractmethod
+    def files(self) -> List[HgPathT]:
+        ...
+
+    @property
+    @abc.abstractmethod
+    def _fileset(self) -> Set[HgPathT]:
+        ...
+
+    @abc.abstractmethod
+    def exact(self, f: HgPathT) -> bool:
+        """Returns True if f is in .files()."""
+
+    @abc.abstractmethod
+    def matchfn(self, f: HgPathT) -> bool:
+        ...
+
+    @abc.abstractmethod
+    def visitdir(self, dir: HgPathT) -> Union[bool, bytes]:
+        """Decides whether a directory should be visited based on whether it
+        has potential matches in it or one of its subdirectories. This is
+        based on the match's primary, included, and excluded patterns.
+
+        Returns the string 'all' if the given directory and all subdirectories
+        should be visited. Otherwise returns True or False indicating whether
+        the given directory should be visited.
+        """
+
+    @abc.abstractmethod
+    def visitchildrenset(self, dir: HgPathT) -> Union[Set[HgPathT], bytes]:
+        """Decides whether a directory should be visited based on whether it
+        has potential matches in it or one of its subdirectories, and
+        potentially lists which subdirectories of that directory should be
+        visited. This is based on the match's primary, included, and excluded
+        patterns.
+
+        This function is very similar to 'visitdir', and the following mapping
+        can be applied:
+
+             visitdir | visitchildrenlist
+            ----------+-------------------
+             False    | set()
+             'all'    | 'all'
+             True     | 'this' OR non-empty set of subdirs -or files- to visit
+
+        Example:
+          Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
+          the following values (assuming the implementation of visitchildrenset
+          is capable of recognizing this; some implementations are not).
+
+          '' -> {'foo', 'qux'}
+          'baz' -> set()
+          'foo' -> {'bar'}
+          # Ideally this would be 'all', but since the prefix nature of matchers
+          # is applied to the entire matcher, we have to downgrade this to
+          # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
+          # in.
+          'foo/bar' -> 'this'
+          'qux' -> 'this'
+
+        Important:
+          Most matchers do not know if they're representing files or
+          directories. They see ['path:dir/f'] and don't know whether 'f' is a
+          file or a directory, so visitchildrenset('dir') for most matchers will
+          return {'f'}, but if the matcher knows it's a file (like exactmatcher
+          does), it may return 'this'. Do not rely on the return being a set
+          indicating that there are no files in this dir to investigate (or
+          equivalently that if there are files to investigate in 'dir' that it
+          will always return 'this').
+        """
+
+    @abc.abstractmethod
+    def always(self) -> bool:
+        """Matcher will match everything and .files() will be empty --
+        optimization might be possible."""
+
+    @abc.abstractmethod
+    def isexact(self) -> bool:
+        """Matcher will match exactly the list of files in .files() --
+        optimization might be possible."""
+
+    @abc.abstractmethod
+    def prefix(self) -> bool:
+        """Matcher will match the paths in .files() recursively --
+        optimization might be possible."""
+
+    @abc.abstractmethod
+    def anypats(self) -> bool:
+        """None of .always(), .isexact(), and .prefix() is true --
+        optimizations will be difficult."""
--- a/mercurial/interfaces/repository.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/interfaces/repository.py	Thu Jan 30 18:22:01 2025 +0100
@@ -32,7 +32,6 @@
     # Almost all mercurial modules are only imported in the type checking phase
     # to avoid circular imports
     from .. import (
-        match as matchmod,
         pathutil,
         util,
     )
@@ -40,7 +39,10 @@
         urlutil,
     )
 
-    from . import dirstate as intdirstate
+    from . import (
+        dirstate as intdirstate,
+        matcher,
+    )
 
     # TODO: make a protocol class for this
     NodeConstants = Any
@@ -1184,7 +1186,7 @@
         """Returns a bool indicating if a directory is in this manifest."""
 
     @abc.abstractmethod
-    def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+    def walk(self, match: matcher.IMatcher) -> Iterator[bytes]:
         """Generator of paths in manifest satisfying a matcher.
 
         If the matcher has explicit files listed and they don't exist in
@@ -1195,7 +1197,7 @@
     def diff(
         self,
         other: Any,  # TODO: 'manifestdict' or (better) equivalent interface
-        match: matchmod.basematcher | None = None,
+        match: matcher.IMatcher | None = None,
         clean: bool = False,
     ) -> dict[
         bytes,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/interfaces/types.py	Thu Jan 30 18:22:01 2025 +0100
@@ -0,0 +1,23 @@
+# mercurial/interfaces/types.py - type alias for interfaces
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+#
+# This is the main entry point for Mercurial code writing type annotations.
+#
+# The general principle can be summarized when dealing with <FooBar> object:
+# - to type your code: use FooBarT from `mercurial.interface.types`
+# - to implement foobar: use IFoorbar from `mercurial.interface.foo_bar`
+
+from __future__ import annotations
+
+from . import (
+    _basetypes,
+    matcher,
+)
+
+MatcherT = matcher.IMatcher
+
+UserMsgT = _basetypes.UserMsgT
+HgPathT = _basetypes.HgPathT
+FsPathT = _basetypes.FsPathT
--- a/mercurial/logcmdutil.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/logcmdutil.py	Thu Jan 30 18:22:01 2025 +0100
@@ -22,6 +22,9 @@
 )
 
 from .i18n import _
+from .interfaces.types import (
+    MatcherT,
+)
 from .node import wdirrev
 
 from .thirdparty import attr
@@ -1083,9 +1086,7 @@
 def makewalker(
     repo: Any,
     wopts: walkopts,
-) -> Tuple[
-    smartset.abstractsmartset, Optional[Callable[[Any], matchmod.basematcher]]
-]:
+) -> Tuple[smartset.abstractsmartset, Optional[Callable[[Any], MatcherT]]]:
     """Build (revs, makefilematcher) to scan revision/file history
 
     - revs is the smartset to be traversed.
--- a/mercurial/manifest.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/manifest.py	Thu Jan 30 18:22:01 2025 +0100
@@ -28,6 +28,9 @@
 )
 
 from .i18n import _
+from .interfaces.types import (
+    MatcherT,
+)
 from .node import (
     bin,
     hex,
@@ -570,7 +573,7 @@
     def hasdir(self, dir: bytes) -> bool:
         return dir in self._dirs
 
-    def _filesfastpath(self, match: matchmod.basematcher) -> bool:
+    def _filesfastpath(self, match: MatcherT) -> bool:
         """Checks whether we can correctly and quickly iterate over matcher
         files instead of over manifest files."""
         files = match.files()
@@ -579,7 +582,7 @@
             or (match.prefix() and all(fn in self for fn in files))
         )
 
-    def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+    def walk(self, match: MatcherT) -> Iterator[bytes]:
         """Generates matching file names.
 
         Equivalent to manifest.matches(match).iterkeys(), but without creating
@@ -615,7 +618,7 @@
             if not self.hasdir(fn):
                 match.bad(fn, None)
 
-    def _matches(self, match: matchmod.basematcher) -> manifestdict:
+    def _matches(self, match: MatcherT) -> manifestdict:
         '''generate a new manifest filtered by the match argument'''
         if match.always():
             return self.copy()
@@ -635,7 +638,7 @@
     def diff(
         self,
         m2: manifestdict,
-        match: Optional[matchmod.basematcher] = None,
+        match: Optional[MatcherT] = None,
         clean: bool = False,
     ) -> Dict[
         bytes,
@@ -1202,7 +1205,7 @@
         return copy
 
     def filesnotin(
-        self, m2: treemanifest, match: Optional[matchmod.basematcher] = None
+        self, m2: treemanifest, match: Optional[MatcherT] = None
     ) -> Set[bytes]:
         '''Set of files in this manifest that are not in the other'''
         if match and not match.always():
@@ -1250,7 +1253,7 @@
         dirslash = dir + b'/'
         return dirslash in self._dirs or dirslash in self._lazydirs
 
-    def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+    def walk(self, match: MatcherT) -> Iterator[bytes]:
         """Generates matching file names.
 
         It also reports nonexistent files by marking them bad with match.bad().
@@ -1275,7 +1278,7 @@
             if not self.hasdir(fn):
                 match.bad(fn, None)
 
-    def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
+    def _walk(self, match: MatcherT) -> Iterator[bytes]:
         '''Recursively generates matching file names for walk().'''
         visit = match.visitchildrenset(self._dir[:-1])
         if not visit:
@@ -1293,13 +1296,13 @@
                 if not visit or p[:-1] in visit:
                     yield from self._dirs[p]._walk(match)
 
-    def _matches(self, match: matchmod.basematcher) -> treemanifest:
+    def _matches(self, match: MatcherT) -> treemanifest:
         """recursively generate a new manifest filtered by the match argument."""
         if match.always():
             return self.copy()
         return self._matches_inner(match)
 
-    def _matches_inner(self, match: matchmod.basematcher) -> treemanifest:
+    def _matches_inner(self, match: MatcherT) -> treemanifest:
         if match.always():
             return self.copy()
 
@@ -1348,7 +1351,7 @@
     def diff(
         self,
         m2: treemanifest,
-        match: Optional[matchmod.basematcher] = None,
+        match: Optional[MatcherT] = None,
         clean: bool = False,
     ) -> Dict[
         bytes,
@@ -1482,11 +1485,11 @@
                 Callable[[treemanifest], None],
                 bytes,
                 bytes,
-                matchmod.basematcher,
+                MatcherT,
             ],
             None,
         ],
-        match: matchmod.basematcher,
+        match: MatcherT,
     ) -> None:
         self._load()  # for consistency; should never have any effect here
         m1._load()
@@ -1516,7 +1519,7 @@
             writesubtree(subm, subp1, subp2, match)
 
     def walksubtrees(
-        self, matcher: Optional[matchmod.basematcher] = None
+        self, matcher: Optional[MatcherT] = None
     ) -> Iterator[treemanifest]:
         """Returns an iterator of the subtrees of this manifest, including this
         manifest itself.
--- a/mercurial/match.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/match.py	Thu Jan 30 18:22:01 2025 +0100
@@ -24,6 +24,9 @@
 )
 
 from .i18n import _
+from .interfaces.types import (
+    MatcherT,
+)
 from . import (
     encoding,
     error,
@@ -34,6 +37,10 @@
 )
 from .utils import stringutil
 
+from .interfaces import (
+    matcher as int_matcher,
+)
+
 rustmod = policy.importrust('dirstate')
 
 allpatternkinds = (
@@ -403,7 +410,7 @@
     return kindpats
 
 
-class basematcher:
+class basematcher(int_matcher.IMatcher):
     def __init__(self, badfn=None):
         self._was_tampered_with = False
         if badfn is not None:
@@ -1081,7 +1088,7 @@
     sub/x.txt: No such file
     """
 
-    def __init__(self, path: bytes, matcher: basematcher) -> None:
+    def __init__(self, path: bytes, matcher: MatcherT) -> None:
         super().__init__()
         self._path = path
         self._matcher = matcher
--- a/mercurial/subrepo.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/subrepo.py	Thu Jan 30 18:22:01 2025 +0100
@@ -18,6 +18,9 @@
 import xml.dom.minidom
 
 from .i18n import _
+from .interfaces.types import (
+    MatcherT,
+)
 from .node import (
     bin,
     hex,
@@ -367,7 +370,7 @@
         """handle the files command for this subrepo"""
         return 1
 
-    def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
+    def archive(self, opener, prefix, match: MatcherT, decode=True):
         files = [f for f in self.files() if match(f)]
         total = len(files)
         relpath = subrelpath(self)
@@ -656,7 +659,7 @@
             )
 
     @annotatesubrepoerror
-    def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
+    def archive(self, opener, prefix, match: MatcherT, decode=True):
         self._get(self._state + (b'hg',))
         files = [f for f in self.files() if match(f)]
         rev = self._state[1]
@@ -1913,7 +1916,7 @@
             else:
                 self.wvfs.unlink(f)
 
-    def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
+    def archive(self, opener, prefix, match: MatcherT, decode=True):
         total = 0
         source, revision = self._state
         if not revision:
--- a/mercurial/subrepoutil.py	Tue Feb 04 14:02:20 2025 -0500
+++ b/mercurial/subrepoutil.py	Thu Jan 30 18:22:01 2025 +0100
@@ -57,6 +57,9 @@
     )
 
     from .interfaces import status as istatus
+    from .interfaces.types import (
+        MatcherT,
+    )
 
     # keeps pyflakes happy
     assert [
@@ -335,7 +338,7 @@
     ui: uimod.ui,
     wctx: context.workingcommitctx,
     status: istatus.Status,
-    match: matchmod.basematcher,
+    match: MatcherT,
     force: bool = False,
 ) -> Tuple[List[bytes], Set[bytes], Substate]:
     """Calculate .hgsubstate changes that should be applied before committing