Mercurial > public > mercurial-scm > hg
changeset 52741:5c48fd4c0e68
typing: introduce a `types` module and a MatcherT alias
This is a proposal to formalise the way we do typing and do more of it.
The initial motivation to make progress is to help break the 100+ module
cycle that is slowing pytype a lot.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 30 Jan 2025 18:22:01 +0100 |
parents | 7405f8a67611 |
children | 14ad5fc73a9c |
files | contrib/import-checker.py hgext/git/dirstate.py hgext/git/manifest.py hgext/largefiles/overrides.py mercurial/dirstate.py mercurial/interfaces/_basetypes.py mercurial/interfaces/dirstate.py mercurial/interfaces/matcher.py mercurial/interfaces/repository.py mercurial/interfaces/types.py mercurial/logcmdutil.py mercurial/manifest.py mercurial/match.py mercurial/subrepo.py mercurial/subrepoutil.py |
diffstat | 15 files changed, 256 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/import-checker.py Tue Feb 04 14:02:20 2025 -0500 +++ b/contrib/import-checker.py Thu Jan 30 18:22:01 2025 +0100 @@ -26,6 +26,8 @@ 'mercurial.hgweb.request', 'mercurial.i18n', 'mercurial.interfaces', + 'mercurial.interfaces._basetypes', + 'mercurial.interfaces.types', 'mercurial.node', 'mercurial.pycompat', # for revlog to re-export constant to extensions
--- a/hgext/git/dirstate.py Tue Feb 04 14:02:20 2025 -0500 +++ b/hgext/git/dirstate.py Thu Jan 30 18:22:01 2025 +0100 @@ -13,6 +13,7 @@ Tuple, ) +from mercurial.interfaces.types import MatcherT from mercurial.node import sha1nodeconstants from mercurial import ( dirstatemap, @@ -163,7 +164,7 @@ def status( self, - match: matchmod.basematcher, + match: MatcherT, subrepos: bool, ignored: bool, clean: bool, @@ -336,7 +337,7 @@ r = util.pathto(self._root, cwd, f) return r - def matches(self, match: matchmod.basematcher) -> Iterable[bytes]: + def matches(self, match: MatcherT) -> Iterable[bytes]: for x in self.git.index: p = pycompat.fsencode(x.path) if match(p): @@ -354,7 +355,7 @@ def walk( self, - match: matchmod.basematcher, + match: MatcherT, subrepos: Any, unknown: bool, ignored: bool,
--- a/hgext/git/manifest.py Tue Feb 04 14:02:20 2025 -0500 +++ b/hgext/git/manifest.py Thu Jan 30 18:22:01 2025 +0100 @@ -11,6 +11,7 @@ ) from mercurial.node import sha1nodeconstants +from mercurial.interfaces.types import MatcherT from mercurial import ( match as matchmod, @@ -288,7 +289,7 @@ elif match(realname): yield pycompat.fsencode(realname) - def walk(self, match: matchmod.basematcher) -> Iterator[bytes]: + def walk(self, match: MatcherT) -> Iterator[bytes]: # TODO: this is a very lazy way to merge in the pending # changes. There is absolutely room for optimization here by # being clever about walking over the sets...
--- a/hgext/largefiles/overrides.py Tue Feb 04 14:02:20 2025 -0500 +++ b/hgext/largefiles/overrides.py Thu Jan 30 18:22:01 2025 +0100 @@ -19,6 +19,8 @@ from mercurial.i18n import _ +from mercurial.interfaces.types import MatcherT + from mercurial.hgweb import webcommands from mercurial import ( @@ -1232,7 +1234,7 @@ node, kind, decode=True, - match: Optional[matchmod.basematcher] = None, + match: Optional[MatcherT] = None, prefix=b'', mtime=None, subrepos=None, @@ -1347,9 +1349,7 @@ @eh.wrapfunction(subrepo.hgsubrepo, 'archive') -def hgsubrepoarchive( - orig, repo, opener, prefix, match: matchmod.basematcher, decode=True -): +def hgsubrepoarchive(orig, repo, opener, prefix, match: MatcherT, decode=True): lfenabled = hasattr(repo._repo, '_largefilesenabled') if not lfenabled or not repo._repo.lfstatus: return orig(repo, opener, prefix, match, decode)
--- a/mercurial/dirstate.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/dirstate.py Thu Jan 30 18:22:01 2025 +0100 @@ -24,6 +24,7 @@ ) from .i18n import _ +from .interfaces.types import MatcherT from hgdemandimport import tracing @@ -483,7 +484,7 @@ return self._map.hastrackeddir(d) @rootcache(b'.hgignore') - def _ignore(self) -> matchmod.basematcher: + def _ignore(self) -> MatcherT: files = self._ignorefiles() if not files: return matchmod.never() @@ -1359,7 +1360,7 @@ def walk( self, - match: matchmod.basematcher, + match: MatcherT, subrepos: Any, unknown: bool, ignored: bool, @@ -1639,7 +1640,7 @@ def status( self, - match: matchmod.basematcher, + match: MatcherT, subrepos: bool, ignored: bool, clean: bool, @@ -1796,7 +1797,7 @@ ) return (lookup, status, mtime_boundary) - def matches(self, match: matchmod.basematcher) -> Iterable[bytes]: + def matches(self, match: MatcherT) -> Iterable[bytes]: """ return files in the dirstate (in whatever state) filtered by match """
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/interfaces/_basetypes.py Thu Jan 30 18:22:01 2025 +0100 @@ -0,0 +1,21 @@ +# mercurial/interfaces/_basetypes.py - internal base type aliases for interfaces +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +# +# This module contains trivial type aliases that other interfaces might need. +# This is for internal usage by the modules in `mercurial.interfaces`. +# +# For using type aliases outside for `mercurial.interfaces`, look at the +# `mercurial.interfaces.types` module. + +from __future__ import annotations + +UserMsgT = bytes +"""Text (maybe) displayed to the user.""" + +HgPathT = bytes +"""A path usable with Mercurial's vfs.""" + +FsPathT = bytes +"""A path on disk (after vfs encoding)."""
--- a/mercurial/interfaces/dirstate.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/interfaces/dirstate.py Thu Jan 30 18:22:01 2025 +0100 @@ -21,11 +21,13 @@ # Almost all mercurial modules are only imported in the type checking phase # to avoid circular imports from .. import ( - match as matchmod, transaction as txnmod, ) - from . import status as istatus + from . import ( + matcher, + status as istatus, + ) # TODO: finish adding type hints AddParentChangeCallbackT = Callable[ @@ -95,7 +97,7 @@ # TODO: decorate with `@rootcache(b'.hgignore')` like dirstate class? @property - def _ignore(self) -> matchmod.basematcher: + def _ignore(self) -> matcher.IMatcher: """Matcher for ignored files.""" @property @@ -307,7 +309,7 @@ @abc.abstractmethod def walk( self, - match: matchmod.basematcher, + match: matcher.IMatcher, subrepos: Any, # TODO: figure out what this is unknown: bool, ignored: bool, @@ -327,7 +329,7 @@ @abc.abstractmethod def status( self, - match: matchmod.basematcher, + match: matcher.IMatcher, subrepos: bool, ignored: bool, clean: bool, @@ -352,7 +354,7 @@ # TODO: could return a list, except git.dirstate is a generator @abc.abstractmethod - def matches(self, match: matchmod.basematcher) -> Iterable[bytes]: + def matches(self, match: matcher.IMatcher) -> Iterable[bytes]: """ return files in the dirstate (in whatever state) filtered by match """
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/interfaces/matcher.py Thu Jan 30 18:22:01 2025 +0100 @@ -0,0 +1,142 @@ +# mercurial/interfaces/matcher - typing protocol for Matcher objects +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import annotations + +import abc + +from typing import ( + Callable, + List, + Optional, + Protocol, + Set, + Union, +) + +from ._basetypes import ( + HgPathT, + UserMsgT, +) + + +class IMatcher(Protocol): + @abc.abstractmethod + def was_tampered_with_nonrec(self) -> bool: + ... + + @abc.abstractmethod + def was_tampered_with(self) -> bool: + ... + + @abc.abstractmethod + def __call__(self, fn: HgPathT) -> bool: + ... + + # Callbacks related to how the matcher is used by dirstate.walk. + # Subscribers to these events must monkeypatch the matcher object. + @abc.abstractmethod + def bad(self, f: HgPathT, msg: Optional[UserMsgT]) -> None: + ... + + # If traversedir is set, it will be called when a directory discovered + # by recursive traversal is visited. + traversedir: Optional[Callable[[HgPathT], None]] = None + + @property + @abc.abstractmethod + def _files(self) -> List[HgPathT]: + ... + + @abc.abstractmethod + def files(self) -> List[HgPathT]: + ... + + @property + @abc.abstractmethod + def _fileset(self) -> Set[HgPathT]: + ... + + @abc.abstractmethod + def exact(self, f: HgPathT) -> bool: + """Returns True if f is in .files().""" + + @abc.abstractmethod + def matchfn(self, f: HgPathT) -> bool: + ... + + @abc.abstractmethod + def visitdir(self, dir: HgPathT) -> Union[bool, bytes]: + """Decides whether a directory should be visited based on whether it + has potential matches in it or one of its subdirectories. This is + based on the match's primary, included, and excluded patterns. + + Returns the string 'all' if the given directory and all subdirectories + should be visited. Otherwise returns True or False indicating whether + the given directory should be visited. + """ + + @abc.abstractmethod + def visitchildrenset(self, dir: HgPathT) -> Union[Set[HgPathT], bytes]: + """Decides whether a directory should be visited based on whether it + has potential matches in it or one of its subdirectories, and + potentially lists which subdirectories of that directory should be + visited. This is based on the match's primary, included, and excluded + patterns. + + This function is very similar to 'visitdir', and the following mapping + can be applied: + + visitdir | visitchildrenlist + ----------+------------------- + False | set() + 'all' | 'all' + True | 'this' OR non-empty set of subdirs -or files- to visit + + Example: + Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return + the following values (assuming the implementation of visitchildrenset + is capable of recognizing this; some implementations are not). + + '' -> {'foo', 'qux'} + 'baz' -> set() + 'foo' -> {'bar'} + # Ideally this would be 'all', but since the prefix nature of matchers + # is applied to the entire matcher, we have to downgrade this to + # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed + # in. + 'foo/bar' -> 'this' + 'qux' -> 'this' + + Important: + Most matchers do not know if they're representing files or + directories. They see ['path:dir/f'] and don't know whether 'f' is a + file or a directory, so visitchildrenset('dir') for most matchers will + return {'f'}, but if the matcher knows it's a file (like exactmatcher + does), it may return 'this'. Do not rely on the return being a set + indicating that there are no files in this dir to investigate (or + equivalently that if there are files to investigate in 'dir' that it + will always return 'this'). + """ + + @abc.abstractmethod + def always(self) -> bool: + """Matcher will match everything and .files() will be empty -- + optimization might be possible.""" + + @abc.abstractmethod + def isexact(self) -> bool: + """Matcher will match exactly the list of files in .files() -- + optimization might be possible.""" + + @abc.abstractmethod + def prefix(self) -> bool: + """Matcher will match the paths in .files() recursively -- + optimization might be possible.""" + + @abc.abstractmethod + def anypats(self) -> bool: + """None of .always(), .isexact(), and .prefix() is true -- + optimizations will be difficult."""
--- a/mercurial/interfaces/repository.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/interfaces/repository.py Thu Jan 30 18:22:01 2025 +0100 @@ -32,7 +32,6 @@ # Almost all mercurial modules are only imported in the type checking phase # to avoid circular imports from .. import ( - match as matchmod, pathutil, util, ) @@ -40,7 +39,10 @@ urlutil, ) - from . import dirstate as intdirstate + from . import ( + dirstate as intdirstate, + matcher, + ) # TODO: make a protocol class for this NodeConstants = Any @@ -1184,7 +1186,7 @@ """Returns a bool indicating if a directory is in this manifest.""" @abc.abstractmethod - def walk(self, match: matchmod.basematcher) -> Iterator[bytes]: + def walk(self, match: matcher.IMatcher) -> Iterator[bytes]: """Generator of paths in manifest satisfying a matcher. If the matcher has explicit files listed and they don't exist in @@ -1195,7 +1197,7 @@ def diff( self, other: Any, # TODO: 'manifestdict' or (better) equivalent interface - match: matchmod.basematcher | None = None, + match: matcher.IMatcher | None = None, clean: bool = False, ) -> dict[ bytes,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/interfaces/types.py Thu Jan 30 18:22:01 2025 +0100 @@ -0,0 +1,23 @@ +# mercurial/interfaces/types.py - type alias for interfaces +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. +# +# This is the main entry point for Mercurial code writing type annotations. +# +# The general principle can be summarized when dealing with <FooBar> object: +# - to type your code: use FooBarT from `mercurial.interface.types` +# - to implement foobar: use IFoorbar from `mercurial.interface.foo_bar` + +from __future__ import annotations + +from . import ( + _basetypes, + matcher, +) + +MatcherT = matcher.IMatcher + +UserMsgT = _basetypes.UserMsgT +HgPathT = _basetypes.HgPathT +FsPathT = _basetypes.FsPathT
--- a/mercurial/logcmdutil.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/logcmdutil.py Thu Jan 30 18:22:01 2025 +0100 @@ -22,6 +22,9 @@ ) from .i18n import _ +from .interfaces.types import ( + MatcherT, +) from .node import wdirrev from .thirdparty import attr @@ -1083,9 +1086,7 @@ def makewalker( repo: Any, wopts: walkopts, -) -> Tuple[ - smartset.abstractsmartset, Optional[Callable[[Any], matchmod.basematcher]] -]: +) -> Tuple[smartset.abstractsmartset, Optional[Callable[[Any], MatcherT]]]: """Build (revs, makefilematcher) to scan revision/file history - revs is the smartset to be traversed.
--- a/mercurial/manifest.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/manifest.py Thu Jan 30 18:22:01 2025 +0100 @@ -28,6 +28,9 @@ ) from .i18n import _ +from .interfaces.types import ( + MatcherT, +) from .node import ( bin, hex, @@ -570,7 +573,7 @@ def hasdir(self, dir: bytes) -> bool: return dir in self._dirs - def _filesfastpath(self, match: matchmod.basematcher) -> bool: + def _filesfastpath(self, match: MatcherT) -> bool: """Checks whether we can correctly and quickly iterate over matcher files instead of over manifest files.""" files = match.files() @@ -579,7 +582,7 @@ or (match.prefix() and all(fn in self for fn in files)) ) - def walk(self, match: matchmod.basematcher) -> Iterator[bytes]: + def walk(self, match: MatcherT) -> Iterator[bytes]: """Generates matching file names. Equivalent to manifest.matches(match).iterkeys(), but without creating @@ -615,7 +618,7 @@ if not self.hasdir(fn): match.bad(fn, None) - def _matches(self, match: matchmod.basematcher) -> manifestdict: + def _matches(self, match: MatcherT) -> manifestdict: '''generate a new manifest filtered by the match argument''' if match.always(): return self.copy() @@ -635,7 +638,7 @@ def diff( self, m2: manifestdict, - match: Optional[matchmod.basematcher] = None, + match: Optional[MatcherT] = None, clean: bool = False, ) -> Dict[ bytes, @@ -1202,7 +1205,7 @@ return copy def filesnotin( - self, m2: treemanifest, match: Optional[matchmod.basematcher] = None + self, m2: treemanifest, match: Optional[MatcherT] = None ) -> Set[bytes]: '''Set of files in this manifest that are not in the other''' if match and not match.always(): @@ -1250,7 +1253,7 @@ dirslash = dir + b'/' return dirslash in self._dirs or dirslash in self._lazydirs - def walk(self, match: matchmod.basematcher) -> Iterator[bytes]: + def walk(self, match: MatcherT) -> Iterator[bytes]: """Generates matching file names. It also reports nonexistent files by marking them bad with match.bad(). @@ -1275,7 +1278,7 @@ if not self.hasdir(fn): match.bad(fn, None) - def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]: + def _walk(self, match: MatcherT) -> Iterator[bytes]: '''Recursively generates matching file names for walk().''' visit = match.visitchildrenset(self._dir[:-1]) if not visit: @@ -1293,13 +1296,13 @@ if not visit or p[:-1] in visit: yield from self._dirs[p]._walk(match) - def _matches(self, match: matchmod.basematcher) -> treemanifest: + def _matches(self, match: MatcherT) -> treemanifest: """recursively generate a new manifest filtered by the match argument.""" if match.always(): return self.copy() return self._matches_inner(match) - def _matches_inner(self, match: matchmod.basematcher) -> treemanifest: + def _matches_inner(self, match: MatcherT) -> treemanifest: if match.always(): return self.copy() @@ -1348,7 +1351,7 @@ def diff( self, m2: treemanifest, - match: Optional[matchmod.basematcher] = None, + match: Optional[MatcherT] = None, clean: bool = False, ) -> Dict[ bytes, @@ -1482,11 +1485,11 @@ Callable[[treemanifest], None], bytes, bytes, - matchmod.basematcher, + MatcherT, ], None, ], - match: matchmod.basematcher, + match: MatcherT, ) -> None: self._load() # for consistency; should never have any effect here m1._load() @@ -1516,7 +1519,7 @@ writesubtree(subm, subp1, subp2, match) def walksubtrees( - self, matcher: Optional[matchmod.basematcher] = None + self, matcher: Optional[MatcherT] = None ) -> Iterator[treemanifest]: """Returns an iterator of the subtrees of this manifest, including this manifest itself.
--- a/mercurial/match.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/match.py Thu Jan 30 18:22:01 2025 +0100 @@ -24,6 +24,9 @@ ) from .i18n import _ +from .interfaces.types import ( + MatcherT, +) from . import ( encoding, error, @@ -34,6 +37,10 @@ ) from .utils import stringutil +from .interfaces import ( + matcher as int_matcher, +) + rustmod = policy.importrust('dirstate') allpatternkinds = ( @@ -403,7 +410,7 @@ return kindpats -class basematcher: +class basematcher(int_matcher.IMatcher): def __init__(self, badfn=None): self._was_tampered_with = False if badfn is not None: @@ -1081,7 +1088,7 @@ sub/x.txt: No such file """ - def __init__(self, path: bytes, matcher: basematcher) -> None: + def __init__(self, path: bytes, matcher: MatcherT) -> None: super().__init__() self._path = path self._matcher = matcher
--- a/mercurial/subrepo.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/subrepo.py Thu Jan 30 18:22:01 2025 +0100 @@ -18,6 +18,9 @@ import xml.dom.minidom from .i18n import _ +from .interfaces.types import ( + MatcherT, +) from .node import ( bin, hex, @@ -367,7 +370,7 @@ """handle the files command for this subrepo""" return 1 - def archive(self, opener, prefix, match: matchmod.basematcher, decode=True): + def archive(self, opener, prefix, match: MatcherT, decode=True): files = [f for f in self.files() if match(f)] total = len(files) relpath = subrelpath(self) @@ -656,7 +659,7 @@ ) @annotatesubrepoerror - def archive(self, opener, prefix, match: matchmod.basematcher, decode=True): + def archive(self, opener, prefix, match: MatcherT, decode=True): self._get(self._state + (b'hg',)) files = [f for f in self.files() if match(f)] rev = self._state[1] @@ -1913,7 +1916,7 @@ else: self.wvfs.unlink(f) - def archive(self, opener, prefix, match: matchmod.basematcher, decode=True): + def archive(self, opener, prefix, match: MatcherT, decode=True): total = 0 source, revision = self._state if not revision:
--- a/mercurial/subrepoutil.py Tue Feb 04 14:02:20 2025 -0500 +++ b/mercurial/subrepoutil.py Thu Jan 30 18:22:01 2025 +0100 @@ -57,6 +57,9 @@ ) from .interfaces import status as istatus + from .interfaces.types import ( + MatcherT, + ) # keeps pyflakes happy assert [ @@ -335,7 +338,7 @@ ui: uimod.ui, wctx: context.workingcommitctx, status: istatus.Status, - match: matchmod.basematcher, + match: MatcherT, force: bool = False, ) -> Tuple[List[bytes], Set[bytes], Substate]: """Calculate .hgsubstate changes that should be applied before committing