diff mercurial/interfaces/dirstate.py @ 51925:93d872a06132

typing: add type annotations to the dirstate classes The basic procedure here was to use `merge-pyi` to merge the `git/dirstate.pyi` file in (after renaming the interface class to match), cleaning up the import statement mess, and then repeating the procedure for `mercurial/dirstate.pyi`. Surprisingly, git's dirstate had more hints inferred in its *.pyi file. After that, it was a manual examination of each method in the interface, and how they were implemented in the core and git classes to verify what was inferred by pytype, and fill in the missing gaps. Since this involved jumping around between three different files, I applied the same type info to all three at the same time. Complex types I rolled up into type aliases in the interface module, and used that as needed. That way if it changes, there's one place to edit. There are some hints still missing, and some documentation that doesn't match the signatures. They should all be marked with TODOs. There are also a bunch of methods on the core class that aren't on the Protocol class that seem like maybe they should be (like `set_tracked()`). There are even more methods missing from the git class. But that's a project for another time.
author Matt Harbison <matt_harbison@yahoo.com>
date Fri, 27 Sep 2024 12:30:37 -0400
parents 3688a984134b
children 2c8c46c3c401
line wrap: on
line diff
--- a/mercurial/interfaces/dirstate.py	Fri Sep 27 12:10:25 2024 -0400
+++ b/mercurial/interfaces/dirstate.py	Fri Sep 27 12:30:37 2024 -0400
@@ -1,11 +1,19 @@
 from __future__ import annotations
 
 import contextlib
+import os
 import typing
 
 from typing import (
+    Any,
     Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
     Protocol,
+    Tuple,
 )
 
 if typing.TYPE_CHECKING:
@@ -13,8 +21,52 @@
     # to avoid circular imports
     from .. import (
         match as matchmod,
+        scmutil,
+        transaction as txnmod,
     )
 
+    # TODO: finish adding type hints
+    AddParentChangeCallbackT = Callable[
+        ["idirstate", Tuple[Any, Any], Tuple[Any, Any]], Any
+    ]
+    """The callback type for dirstate.addparentchangecallback()."""
+
+    # TODO: add a Protocol for dirstatemap.DirStateItem? (It is
+    #  conditionalized with python or rust implementations.  Also,
+    #  git.dirstate needs to yield non-None from ``items()``.)
+    DirstateItemT = Any  # dirstatemap.DirstateItem
+
+    IgnoreFileAndLineT = Tuple[Optional[bytes], int, bytes]
+    """The return type of dirstate._ignorefileandline(), which holds
+    ``(file, lineno, originalline)``.
+    """
+
+    FlagFuncFallbackT = Callable[[], "FlagFuncReturnT"]
+    """The type for the dirstate.flagfunc() fallback function."""
+
+    FlagFuncReturnT = Callable[[bytes], bytes]
+    """The return type of dirstate.flagfunc()."""
+
+    # TODO: verify and complete this- it came from a pytype *.pyi file
+    StatusReturnT = Tuple[Any, scmutil.status, Any]
+    """The return type of dirstate.status()."""
+
+    # TODO: probably doesn't belong here.
+    TransactionT = txnmod.transaction
+    """The type for a transaction used with dirstate.
+
+    This is meant to help callers avoid having to remember to delay the import
+    of the transaction module.
+    """
+
+    # TODO: The value can also be mercurial.osutil.stat
+    WalkReturnT = Dict[bytes, Optional[os.stat_result]]
+    """The return type of dirstate.walk().
+
+    The matched files are keyed in the dictionary, mapped to a stat-like object
+    if the file exists.
+    """
+
 
 class idirstate(Protocol):
     # TODO: convert these constructor args to fields?
@@ -56,10 +108,10 @@
     def is_changing_files(self) -> bool:
         """True if file tracking changes in progress."""
 
-    def _ignorefiles(self):
+    def _ignorefiles(self) -> List[bytes]:
         """Return a list of files containing patterns to ignore."""
 
-    def _ignorefileandline(self, f):
+    def _ignorefileandline(self, f: bytes) -> IgnoreFileAndLineT:
         """Given a file `f`, return the ignore file and line that ignores it."""
 
     # TODO: decorate with `@util.propertycache` like dirstate class?
@@ -75,7 +127,7 @@
         """Callable for checking exec bits."""  # TODO: this comment looks stale
 
     @contextlib.contextmanager
-    def changing_parents(self, repo):
+    def changing_parents(self, repo) -> Iterator:  # TODO: typehint this
         """Context manager for handling dirstate parents.
 
         If an exception occurs in the scope of the context manager,
@@ -84,7 +136,7 @@
         """
 
     @contextlib.contextmanager
-    def changing_files(self, repo):
+    def changing_files(self, repo) -> Iterator:  # TODO: typehint this
         """Context manager for handling dirstate files.
 
         If an exception occurs in the scope of the context manager,
@@ -92,10 +144,10 @@
         released.
         """
 
-    def hasdir(self, d):
+    def hasdir(self, d: bytes) -> bool:
         pass
 
-    def flagfunc(self, buildfallback):
+    def flagfunc(self, buildfallback: FlagFuncFallbackT) -> FlagFuncReturnT:
         """build a callable that returns flags associated with a filename
 
         The information is extracted from three possible layers:
@@ -104,7 +156,7 @@
         3. a more expensive mechanism inferring the flags from the parents.
         """
 
-    def getcwd(self):
+    def getcwd(self) -> bytes:
         """Return the path from which a canonical path is calculated.
 
         This path should be used to resolve file patterns or to convert
@@ -112,19 +164,19 @@
         used to get real file paths. Use vfs functions instead.
         """
 
-    def pathto(self, f, cwd=None):
+    def pathto(self, f: bytes, cwd: Optional[bytes] = None) -> bytes:
         pass
 
-    def get_entry(self, path):
+    def get_entry(self, path: bytes) -> DirstateItemT:
         """return a DirstateItem for the associated path"""
 
-    def __contains__(self, key):
+    def __contains__(self, key: Any) -> bool:
         """Check if bytestring `key` is known to the dirstate."""
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[bytes]:
         """Iterate the dirstate's contained filenames as bytestrings."""
 
-    def items(self):
+    def items(self) -> Iterator[Tuple[bytes, DirstateItemT]]:
         """Iterate the dirstate's entries as (filename, DirstateItem.
 
         As usual, filename is a bytestring.
@@ -132,19 +184,20 @@
 
     iteritems = items
 
-    def parents(self):
+    def parents(self) -> List[bytes]:
         pass
 
-    def p1(self):
+    def p1(self) -> bytes:
         pass
 
-    def p2(self):
+    def p2(self) -> bytes:
         pass
 
-    def branch(self):
+    def branch(self) -> bytes:
         pass
 
-    def setparents(self, p1, p2=None):
+    # TODO: typehint the return.  It's a copies Map of some sort.
+    def setparents(self, p1: bytes, p2: Optional[bytes] = None):
         """Set dirstate parents to p1 and p2.
 
         When moving from two parents to one, "merged" entries a
@@ -154,26 +207,30 @@
         See localrepo.setparents()
         """
 
-    def setbranch(self, branch, transaction):
+    def setbranch(
+        self, branch: bytes, transaction: Optional[TransactionT]
+    ) -> None:
         pass
 
-    def invalidate(self):
+    def invalidate(self) -> None:
         """Causes the next access to reread the dirstate.
 
         This is different from localrepo.invalidatedirstate() because it always
         rereads the dirstate. Use localrepo.invalidatedirstate() if you want to
         check whether the dirstate has changed before rereading it."""
 
-    def copy(self, source, dest):
+    def copy(self, source: Optional[bytes], dest: bytes) -> None:
         """Mark dest as a copy of source. Unmark dest if source is None."""
 
-    def copied(self, file):
+    def copied(self, file: bytes) -> Optional[bytes]:
         pass
 
-    def copies(self):
+    def copies(self) -> Dict[bytes, bytes]:
         pass
 
-    def normalize(self, path, isknown=False, ignoremissing=False):
+    def normalize(
+        self, path: bytes, isknown: bool = False, ignoremissing: bool = False
+    ) -> bytes:
         """
         normalize the case of a pathname when on a casefolding filesystem
 
@@ -191,16 +248,23 @@
         - version provided via command arguments
         """
 
-    def clear(self):
+    def clear(self) -> None:
         pass
 
-    def rebuild(self, parent, allfiles, changedfiles=None):
+    def rebuild(
+        self,
+        parent: bytes,
+        allfiles: Iterable[bytes],  # TODO: more than iterable? (uses len())
+        changedfiles: Optional[Iterable[bytes]] = None,
+    ) -> None:
         pass
 
-    def write(self, tr):
+    def write(self, tr: Optional[TransactionT]) -> None:
         pass
 
-    def addparentchangecallback(self, category, callback):
+    def addparentchangecallback(
+        self, category: bytes, callback: AddParentChangeCallbackT
+    ) -> None:
         """add a callback to be called when the wd parents are changed
 
         Callback will be called with the following arguments:
@@ -210,7 +274,14 @@
         with a newer callback.
         """
 
-    def walk(self, match, subrepos, unknown, ignored, full=True):
+    def walk(
+        self,
+        match: matchmod.basematcher,
+        subrepos: Any,  # TODO: figure out what this is
+        unknown: bool,
+        ignored: bool,
+        full: bool = True,
+    ) -> WalkReturnT:
         """
         Walk recursively through the directory tree, finding all files
         matched by match.
@@ -222,7 +293,14 @@
 
         """
 
-    def status(self, match, subrepos, ignored, clean, unknown):
+    def status(
+        self,
+        match: matchmod.basematcher,
+        subrepos: bool,
+        ignored: bool,
+        clean: bool,
+        unknown: bool,
+    ) -> StatusReturnT:
         """Determine the status of the working copy relative to the
         dirstate and return a pair of (unsure, status), where status is of type
         scmutil.status and:
@@ -239,12 +317,18 @@
             dirstate was written
         """
 
-    def matches(self, match):
+    # TODO: could return a list, except git.dirstate is a generator
+
+    def matches(self, match: matchmod.basematcher) -> Iterable[bytes]:
         """
         return files in the dirstate (in whatever state) filtered by match
         """
 
-    def verify(self, m1, m2, p1, narrow_matcher=None):
+    # TODO: finish adding typehints here, and to subclasses
+
+    def verify(
+        self, m1, m2, p1: bytes, narrow_matcher: Optional[Any] = None
+    ) -> Iterator[bytes]:
         """
         check the dirstate contents against the parent manifest and yield errors
         """