mercurial/mdiff.py
changeset 51932 c6899b334d56
parent 51931 77e2994bd617
child 51960 d7f17819ae9e
equal deleted inserted replaced
51931:77e2994bd617 51932:c6899b334d56
     7 
     7 
     8 from __future__ import annotations
     8 from __future__ import annotations
     9 
     9 
    10 import re
    10 import re
    11 import struct
    11 import struct
       
    12 import typing
    12 import zlib
    13 import zlib
       
    14 
       
    15 from typing import (
       
    16     Iterable,
       
    17     Iterator,
       
    18     List,
       
    19     Optional,
       
    20     Sequence,
       
    21     Tuple,
       
    22     Union,
       
    23     cast,
       
    24 )
    13 
    25 
    14 from .i18n import _
    26 from .i18n import _
    15 from . import (
    27 from . import (
    16     diffhelper,
    28     diffhelper,
    17     encoding,
    29     encoding,
    33 fixws = bdiff.fixws
    45 fixws = bdiff.fixws
    34 patches = mpatch.patches
    46 patches = mpatch.patches
    35 patchedsize = mpatch.patchedsize
    47 patchedsize = mpatch.patchedsize
    36 textdiff = bdiff.bdiff
    48 textdiff = bdiff.bdiff
    37 splitnewlines = bdiff.splitnewlines
    49 splitnewlines = bdiff.splitnewlines
       
    50 
       
    51 if typing.TYPE_CHECKING:
       
    52     HunkLines = List[bytes]
       
    53     """Lines of a hunk- a header, followed by line additions and deletions."""
       
    54 
       
    55     HunkRange = Tuple[int, int, int, int]
       
    56     """HunkRange represents the range information of a hunk.
       
    57 
       
    58     The tuple (s1, l1, s2, l2) forms the header '@@ -s1,l1 +s2,l2 @@'."""
       
    59 
       
    60     Range = Tuple[int, int]
       
    61     """A (lowerbound, upperbound) range tuple."""
       
    62 
       
    63     TypedBlock = Tuple[intmod.BDiffBlock, bytes]
       
    64     """A bdiff block with its type."""
    38 
    65 
    39 
    66 
    40 # TODO: this looks like it could be an attrs, which might help pytype
    67 # TODO: this looks like it could be an attrs, which might help pytype
    41 class diffopts:
    68 class diffopts:
    42     """context is the number of context lines
    69     """context is the number of context lines
   105 
   132 
   106 
   133 
   107 defaultopts = diffopts()
   134 defaultopts = diffopts()
   108 
   135 
   109 
   136 
   110 def wsclean(opts, text, blank=True):
   137 def wsclean(opts: diffopts, text: bytes, blank: bool = True) -> bytes:
   111     if opts.ignorews:
   138     if opts.ignorews:
   112         text = bdiff.fixws(text, True)
   139         text = bdiff.fixws(text, True)
   113     elif opts.ignorewsamount:
   140     elif opts.ignorewsamount:
   114         text = bdiff.fixws(text, False)
   141         text = bdiff.fixws(text, False)
   115     if blank and opts.ignoreblanklines:
   142     if blank and opts.ignoreblanklines:
   117     if opts.ignorewseol:
   144     if opts.ignorewseol:
   118         text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
   145         text = re.sub(br'[ \t\r\f]+\n', br'\n', text)
   119     return text
   146     return text
   120 
   147 
   121 
   148 
   122 def splitblock(base1, lines1, base2, lines2, opts):
   149 def splitblock(
       
   150     base1: int,
       
   151     lines1: Iterable[bytes],
       
   152     base2: int,
       
   153     lines2: Iterable[bytes],
       
   154     opts: diffopts,
       
   155 ) -> Iterable[TypedBlock]:
   123     # The input lines matches except for interwoven blank lines. We
   156     # The input lines matches except for interwoven blank lines. We
   124     # transform it into a sequence of matching blocks and blank blocks.
   157     # transform it into a sequence of matching blocks and blank blocks.
   125     lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
   158     lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1]
   126     lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
   159     lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2]
   127     s1, e1 = 0, len(lines1)
   160     s1, e1 = 0, len(lines1)
   143         yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype
   176         yield (base1 + s1, base1 + i1, base2 + s2, base2 + i2), btype
   144         s1 = i1
   177         s1 = i1
   145         s2 = i2
   178         s2 = i2
   146 
   179 
   147 
   180 
   148 def hunkinrange(hunk, linerange):
   181 def hunkinrange(hunk: Tuple[int, int], linerange: Range) -> bool:
   149     """Return True if `hunk` defined as (start, length) is in `linerange`
   182     """Return True if `hunk` defined as (start, length) is in `linerange`
   150     defined as (lowerbound, upperbound).
   183     defined as (lowerbound, upperbound).
   151 
   184 
   152     >>> hunkinrange((5, 10), (2, 7))
   185     >>> hunkinrange((5, 10), (2, 7))
   153     True
   186     True
   169     start, length = hunk
   202     start, length = hunk
   170     lowerbound, upperbound = linerange
   203     lowerbound, upperbound = linerange
   171     return lowerbound < start + length and start < upperbound
   204     return lowerbound < start + length and start < upperbound
   172 
   205 
   173 
   206 
   174 def blocksinrange(blocks, rangeb):
   207 def blocksinrange(
       
   208     blocks: Iterable[TypedBlock], rangeb: Range
       
   209 ) -> Tuple[List[TypedBlock], Range]:
   175     """filter `blocks` like (a1, a2, b1, b2) from items outside line range
   210     """filter `blocks` like (a1, a2, b1, b2) from items outside line range
   176     `rangeb` from ``(b1, b2)`` point of view.
   211     `rangeb` from ``(b1, b2)`` point of view.
   177 
   212 
   178     Return `filteredblocks, rangea` where:
   213     Return `filteredblocks, rangea` where:
   179 
   214 
   209     if lba is None or uba is None or uba < lba:
   244     if lba is None or uba is None or uba < lba:
   210         raise error.InputError(_(b'line range exceeds file size'))
   245         raise error.InputError(_(b'line range exceeds file size'))
   211     return filteredblocks, (lba, uba)
   246     return filteredblocks, (lba, uba)
   212 
   247 
   213 
   248 
   214 def chooseblocksfunc(opts=None):
   249 def chooseblocksfunc(opts: Optional[diffopts] = None) -> intmod.BDiffBlocksFnc:
   215     if (
   250     if (
   216         opts is None
   251         opts is None
   217         or not opts.xdiff
   252         or not opts.xdiff
   218         or not getattr(bdiff, 'xdiffblocks', None)
   253         or not getattr(bdiff, 'xdiffblocks', None)
   219     ):
   254     ):
   220         return bdiff.blocks
   255         return bdiff.blocks
   221     else:
   256     else:
   222         return bdiff.xdiffblocks
   257         return bdiff.xdiffblocks
   223 
   258 
   224 
   259 
   225 def allblocks(text1, text2, opts=None, lines1=None, lines2=None):
   260 def allblocks(
       
   261     text1: bytes,
       
   262     text2: bytes,
       
   263     opts: Optional[diffopts] = None,
       
   264     lines1: Optional[Sequence[bytes]] = None,
       
   265     lines2: Optional[Sequence[bytes]] = None,
       
   266 ) -> Iterable[TypedBlock]:
   226     """Return (block, type) tuples, where block is an mdiff.blocks
   267     """Return (block, type) tuples, where block is an mdiff.blocks
   227     line entry. type is '=' for blocks matching exactly one another
   268     line entry. type is '=' for blocks matching exactly one another
   228     (bdiff blocks), '!' for non-matching blocks and '~' for blocks
   269     (bdiff blocks), '!' for non-matching blocks and '~' for blocks
   229     matching only after having filtered blank lines.
   270     matching only after having filtered blank lines.
   230     line1 and line2 are text1 and text2 split with splitnewlines() if
   271     line1 and line2 are text1 and text2 split with splitnewlines() if
   262                     type = b'~'
   303                     type = b'~'
   263             yield s, type
   304             yield s, type
   264         yield s1, b'='
   305         yield s1, b'='
   265 
   306 
   266 
   307 
   267 def unidiff(a, ad, b, bd, fn1, fn2, binary, opts=defaultopts):
   308 def unidiff(
       
   309     a: bytes,
       
   310     ad: bytes,
       
   311     b: bytes,
       
   312     bd: bytes,
       
   313     fn1: bytes,
       
   314     fn2: bytes,
       
   315     binary: bool,
       
   316     opts: diffopts = defaultopts,
       
   317 ) -> Tuple[List[bytes], Iterable[Tuple[Optional[HunkRange], HunkLines]]]:
   268     """Return a unified diff as a (headers, hunks) tuple.
   318     """Return a unified diff as a (headers, hunks) tuple.
   269 
   319 
   270     If the diff is not null, `headers` is a list with unified diff header
   320     If the diff is not null, `headers` is a list with unified diff header
   271     lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
   321     lines "--- <original>" and "+++ <new>" and `hunks` is a generator yielding
   272     (hunkrange, hunklines) coming from _unidiff().
   322     (hunkrange, hunklines) coming from _unidiff().
   273     Otherwise, `headers` and `hunks` are empty.
   323     Otherwise, `headers` and `hunks` are empty.
   274 
   324 
   275     Set binary=True if either a or b should be taken as a binary file.
   325     Set binary=True if either a or b should be taken as a binary file.
   276     """
   326     """
   277 
   327 
   278     def datetag(date, fn=None):
   328     def datetag(date: bytes, fn: Optional[bytes] = None):
   279         if not opts.git and not opts.nodates:
   329         if not opts.git and not opts.nodates:
   280             return b'\t%s' % date
   330             return b'\t%s' % date
   281         if fn and b' ' in fn:
   331         if fn and b' ' in fn:
   282             return b'\t'
   332             return b'\t'
   283         return b''
   333         return b''
   342         headerlines = [
   392         headerlines = [
   343             b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
   393             b"--- %s%s%s" % (aprefix, fn1, datetag(ad, fn1)),
   344             b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
   394             b"+++ %s%s%s" % (bprefix, fn2, datetag(bd, fn2)),
   345         ]
   395         ]
   346 
   396 
   347     return headerlines, hunks
   397     # The possible bool is consumed from the iterator above in the `next()`
   348 
   398     # call.
   349 
   399     return headerlines, cast(
   350 def _unidiff(t1, t2, opts=defaultopts):
   400         "Iterable[Tuple[Optional[HunkRange], HunkLines]]", hunks
       
   401     )
       
   402 
       
   403 
       
   404 def _unidiff(
       
   405     t1: bytes, t2: bytes, opts: diffopts = defaultopts
       
   406 ) -> Iterator[Union[bool, Tuple[HunkRange, HunkLines]]]:
   351     """Yield hunks of a headerless unified diff from t1 and t2 texts.
   407     """Yield hunks of a headerless unified diff from t1 and t2 texts.
   352 
   408 
   353     Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
   409     Each hunk consists of a (hunkrange, hunklines) tuple where `hunkrange` is a
   354     tuple (s1, l1, s2, l2) representing the range information of the hunk to
   410     tuple (s1, l1, s2, l2) representing the range information of the hunk to
   355     form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
   411     form the '@@ -s1,l1 +s2,l2 @@' header and `hunklines` is a list of lines
   373             return 0
   429             return 0
   374         return ret
   430         return ret
   375 
   431 
   376     lastfunc = [0, b'']
   432     lastfunc = [0, b'']
   377 
   433 
   378     def yieldhunk(hunk):
   434     def yieldhunk(
       
   435         hunk: Tuple[int, int, int, int, List[bytes]]
       
   436     ) -> Iterable[Tuple[HunkRange, HunkLines]]:
   379         (astart, a2, bstart, b2, delta) = hunk
   437         (astart, a2, bstart, b2, delta) = hunk
   380         aend = contextend(a2, len(l1))
   438         aend = contextend(a2, len(l1))
   381         alen = aend - astart
   439         alen = aend - astart
   382         blen = b2 - bstart + aend - a2
   440         blen = b2 - bstart + aend - a2
   383 
   441 
   493             yield x
   551             yield x
   494     elif not has_hunks:
   552     elif not has_hunks:
   495         yield False
   553         yield False
   496 
   554 
   497 
   555 
   498 def b85diff(to, tn):
   556 def b85diff(to: Optional[bytes], tn: Optional[bytes]) -> bytes:
   499     '''print base85-encoded binary diff'''
   557     '''print base85-encoded binary diff'''
   500 
   558 
   501     def fmtline(line):
   559     def fmtline(line):
   502         l = len(line)
   560         l = len(line)
   503         if l <= 26:
   561         if l <= 26:
   530     ret.append(b'\n')
   588     ret.append(b'\n')
   531 
   589 
   532     return b''.join(ret)
   590     return b''.join(ret)
   533 
   591 
   534 
   592 
   535 def patchtext(bin):
   593 def patchtext(bin: bytes) -> bytes:
   536     pos = 0
   594     pos = 0
   537     t = []
   595     t = []
   538     while pos < len(bin):
   596     while pos < len(bin):
   539         p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
   597         p1, p2, l = struct.unpack(b">lll", bin[pos : pos + 12])
   540         pos += 12
   598         pos += 12
   549         return util.buffer(bin, 12)
   607         return util.buffer(bin, 12)
   550     return mpatch.patches(a, [bin])
   608     return mpatch.patches(a, [bin])
   551 
   609 
   552 
   610 
   553 # similar to difflib.SequenceMatcher.get_matching_blocks
   611 # similar to difflib.SequenceMatcher.get_matching_blocks
   554 def get_matching_blocks(a, b):
   612 def get_matching_blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int]]:
   555     return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
   613     return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
   556 
   614 
   557 
   615 
   558 def trivialdiffheader(length):
   616 def trivialdiffheader(length: int) -> bytes:
   559     return struct.pack(b">lll", 0, 0, length) if length else b''
   617     return struct.pack(b">lll", 0, 0, length) if length else b''
   560 
   618 
   561 
   619 
   562 def replacediffheader(oldlen, newlen):
   620 def replacediffheader(oldlen: int, newlen: int) -> bytes:
   563     return struct.pack(b">lll", 0, oldlen, newlen)
   621     return struct.pack(b">lll", 0, oldlen, newlen)