mercurial/pure/bdiff.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Tue, 11 Mar 2025 02:29:42 +0100
branchstable
changeset 53042 cdd7bf612c7b
parent 51930 09f3a6790e56
permissions -rw-r--r--
bundle-spec: properly format boolean parameter (issue6960) This was breaking automatic clone bundle generation. This changeset fixes it and add a test to catch it in the future.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# bdiff.py - Python implementation of bdiff.c
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Rapha?l Gom?s <rgomes@octobus.net>
parents: 45863
diff changeset
     3
# Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7944
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
     6
# GNU General Public License version 2 or any later version.
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
51859
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 49598
diff changeset
     8
from __future__ import annotations
27335
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
     9
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
    10
import difflib
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
    11
import re
c4e3ff497f89 bdiff: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 15530
diff changeset
    12
import struct
51930
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
    13
import typing
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    14
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    15
from typing import (
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    16
    List,
51930
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
    17
    Optional,
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    18
    Tuple,
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    19
)
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    20
51930
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
    21
from ..interfaces import (
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
    22
    modules as intmod,
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
    23
)
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
    24
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    25
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    26
def splitnewlines(text: bytes) -> List[bytes]:
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    27
    '''like str.splitlines, but only split on newlines.'''
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    28
    lines = [l + b'\n' for l in text.split(b'\n')]
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    29
    if lines:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    30
        if lines[-1] == b'\n':
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    31
            lines.pop()
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    32
        else:
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    33
            lines[-1] = lines[-1][:-1]
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
    34
    return lines
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    36
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    37
def _normalizeblocks(
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    38
    a: List[bytes], b: List[bytes], blocks
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    39
) -> List[Tuple[int, int, int]]:
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
    prev = None
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    41
    r = []
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    42
    for curr in blocks:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
        if prev is None:
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    44
            prev = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    45
            continue
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    46
        shift = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    47
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
        a1, b1, l1 = prev
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
        a1end = a1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    50
        b1end = b1 + l1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    51
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
        a2, b2, l2 = curr
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    53
        a2end = a2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    54
        b2end = b2 + l2
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    55
        if a1end == a2:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    56
            while (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    57
                a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    58
            ):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    59
                shift += 1
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    60
        elif b1end == b2:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    61
            while (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    62
                b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    63
            ):
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
                shift += 1
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    65
        r.append((a1, b1, l1 + shift))
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    66
        prev = a2 + shift, b2 + shift, l2 - shift
45863
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
    67
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
    68
    if prev is not None:
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
    69
        r.append(prev)
68aedad4c11c pure: guard against empty blocks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
    70
14066
14fac6c0536a pure bdiff: don't use a generator
Dan Villiom Podlaski Christiansen <danchr@gmail.com>
parents: 10282
diff changeset
    71
    return r
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    72
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
    73
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
    74
def bdiff(a: bytes, b: bytes) -> bytes:
31641
f2b334e6c7e0 py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents: 31640
diff changeset
    75
    a = bytes(a).splitlines(True)
f2b334e6c7e0 py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
Yuya Nishihara <yuya@tcha.org>
parents: 31640
diff changeset
    76
    b = bytes(b).splitlines(True)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    77
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    78
    if not a:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    79
        s = b"".join(b)
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    80
        return s and (struct.pack(b">lll", 0, 0, len(s)) + s)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    81
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    82
    bin = []
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    83
    p = [0]
34435
5326e4ef1dab style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents: 32512
diff changeset
    84
    for i in a:
5326e4ef1dab style: never put multiple statements on one line
Alex Gaynor <agaynor@mozilla.com>
parents: 32512
diff changeset
    85
        p.append(p[-1] + len(i))
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    86
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    87
    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    88
    d = _normalizeblocks(a, b, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    89
    la = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    90
    lb = 0
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    91
    for am, bm, size in d:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    92
        s = b"".join(b[lb:bm])
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    93
        if am > la or s:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    94
            bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    95
        la = am + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    96
        lb = bm + size
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    97
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    98
    return b"".join(bin)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    99
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
   100
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
   101
def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]:
7944
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
   102
    an = splitnewlines(a)
e9b48afd0e78 pure/bdiff: fix circular import
Matt Mackall <mpm@selenic.com>
parents: 7703
diff changeset
   103
    bn = splitnewlines(b)
7703
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   104
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   105
    d = _normalizeblocks(an, bn, d)
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   106
    return [(i, i + n, j, j + n) for (i, j, n) in d]
9044d3567f6d pure Python implementation of bdiff.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
   107
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 41269
diff changeset
   108
49598
594fc56c0af7 typing: add type hints to bdiff implementations
Matt Harbison <matt_harbison@yahoo.com>
parents: 48875
diff changeset
   109
def fixws(text: bytes, allws: bool) -> bytes:
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
   110
    if allws:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   111
        text = re.sub(b'[ \t\r]+', b'', text)
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
   112
    else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   113
        text = re.sub(b'[ \t\r]+', b' ', text)
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   114
        text = text.replace(b' \n', b'\n')
15530
eeac5e179243 mdiff: replace wscleanup() regexps with C loops
Patrick Mezard <pmezard@gmail.com>
parents: 14066
diff changeset
   115
    return text
51930
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   116
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   117
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   118
# In order to adhere to the module protocol, these functions must be visible to
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   119
# the type checker, though they aren't actually implemented by this
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   120
# implementation of the module protocol.  Callers are responsible for
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   121
# checking that the implementation is available before using them.
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   122
if typing.TYPE_CHECKING:
09f3a6790e56 interfaces: add the optional `bdiff.xdiffblocks()` method
Matt Harbison <matt_harbison@yahoo.com>
parents: 51859
diff changeset
   123
    xdiffblocks: Optional[intmod.BDiffBlocksFnc] = None