8 |
8 |
9 import difflib |
9 import difflib |
10 import re |
10 import re |
11 import struct |
11 import struct |
12 |
12 |
|
13 from typing import ( |
|
14 List, |
|
15 Tuple, |
|
16 ) |
13 |
17 |
14 def splitnewlines(text): |
18 |
|
19 def splitnewlines(text: bytes) -> List[bytes]: |
15 '''like str.splitlines, but only split on newlines.''' |
20 '''like str.splitlines, but only split on newlines.''' |
16 lines = [l + b'\n' for l in text.split(b'\n')] |
21 lines = [l + b'\n' for l in text.split(b'\n')] |
17 if lines: |
22 if lines: |
18 if lines[-1] == b'\n': |
23 if lines[-1] == b'\n': |
19 lines.pop() |
24 lines.pop() |
20 else: |
25 else: |
21 lines[-1] = lines[-1][:-1] |
26 lines[-1] = lines[-1][:-1] |
22 return lines |
27 return lines |
23 |
28 |
24 |
29 |
25 def _normalizeblocks(a, b, blocks): |
30 def _normalizeblocks( |
|
31 a: List[bytes], b: List[bytes], blocks |
|
32 ) -> List[Tuple[int, int, int]]: |
26 prev = None |
33 prev = None |
27 r = [] |
34 r = [] |
28 for curr in blocks: |
35 for curr in blocks: |
29 if prev is None: |
36 if prev is None: |
30 prev = curr |
37 prev = curr |
82 lb = bm + size |
89 lb = bm + size |
83 |
90 |
84 return b"".join(bin) |
91 return b"".join(bin) |
85 |
92 |
86 |
93 |
87 def blocks(a, b): |
94 def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]: |
88 an = splitnewlines(a) |
95 an = splitnewlines(a) |
89 bn = splitnewlines(b) |
96 bn = splitnewlines(b) |
90 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
97 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
91 d = _normalizeblocks(an, bn, d) |
98 d = _normalizeblocks(an, bn, d) |
92 return [(i, i + n, j, j + n) for (i, j, n) in d] |
99 return [(i, i + n, j, j + n) for (i, j, n) in d] |
93 |
100 |
94 |
101 |
95 def fixws(text, allws): |
102 def fixws(text: bytes, allws: bool) -> bytes: |
96 if allws: |
103 if allws: |
97 text = re.sub(b'[ \t\r]+', b'', text) |
104 text = re.sub(b'[ \t\r]+', b'', text) |
98 else: |
105 else: |
99 text = re.sub(b'[ \t\r]+', b' ', text) |
106 text = re.sub(b'[ \t\r]+', b' ', text) |
100 text = text.replace(b' \n', b'\n') |
107 text = text.replace(b' \n', b'\n') |