Mercurial > public > mercurial-scm > hg-stable
annotate mercurial/mdiff.py @ 1015:22571b8d35d3
Add automatic binary file detection to diff and export
Based on a patch by Fuming Wang
- add util.binary which decides whether a file is binary if it has any NUL
characters in the first 1K.
- teach mdiff.unidiff to print "binary file <x> has changed" for binary files
- add text flag to cause unidiff and dodiff to treat all files as text
- add -a and --text flags (like diff(1)) to hg diff and export
- update docs
author | mpm@selenic.com |
---|---|
date | Tue, 23 Aug 2005 19:58:46 -0700 |
parents | df8a5a0098d4 |
children | a0fcfbbf52bb |
rev | line source |
---|---|
239
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
1 # mdiff.py - diff and patch routines for mercurial |
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
2 # |
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
3 # Copyright 2005 Matt Mackall <mpm@selenic.com> |
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
4 # |
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
5 # This software may be used and distributed according to the terms |
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
6 # of the GNU General Public License, incorporated herein by reference. |
75840796e8e2
mdiff.py: kill #! line, add copyright notice
mpm@selenic.com
parents:
184
diff
changeset
|
7 |
432 | 8 import difflib, struct, bdiff |
9 from mpatch import * | |
1015
22571b8d35d3
Add automatic binary file detection to diff and export
mpm@selenic.com
parents:
582
diff
changeset
|
10 from util import * |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
11 |
1015
22571b8d35d3
Add automatic binary file detection to diff and export
mpm@selenic.com
parents:
582
diff
changeset
|
12 def unidiff(a, ad, b, bd, fn, r=None, text=False): |
396
8f8bb77d560e
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli.
Thomas Arendsen Hein <thomas@intevation.de>
parents:
361
diff
changeset
|
13 |
35 | 14 if not a and not b: return "" |
264
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
15 |
1015
22571b8d35d3
Add automatic binary file detection to diff and export
mpm@selenic.com
parents:
582
diff
changeset
|
16 if not text and (binary(a) or binary(b)): |
22571b8d35d3
Add automatic binary file detection to diff and export
mpm@selenic.com
parents:
582
diff
changeset
|
17 l = ['Binary file %s has changed\n' % fn] |
22571b8d35d3
Add automatic binary file detection to diff and export
mpm@selenic.com
parents:
582
diff
changeset
|
18 elif a == None: |
264
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
19 b = b.splitlines(1) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
20 l1 = "--- %s\t%s\n" % ("/dev/null", ad) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
21 l2 = "+++ %s\t%s\n" % ("b/" + fn, bd) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
22 l3 = "@@ -0,0 +1,%d @@\n" % len(b) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
23 l = [l1, l2, l3] + ["+" + e for e in b] |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
24 elif b == None: |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
25 a = a.splitlines(1) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
26 l1 = "--- %s\t%s\n" % ("a/" + fn, ad) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
27 l2 = "+++ %s\t%s\n" % ("/dev/null", bd) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
28 l3 = "@@ -1,%d +0,0 @@\n" % len(a) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
29 l = [l1, l2, l3] + ["-" + e for e in a] |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
30 else: |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
31 a = a.splitlines(1) |
4c1d7072d5cd
Attempt to make diff deal with null sources properly
mpm@selenic.com
parents:
249
diff
changeset
|
32 b = b.splitlines(1) |
272
467cea2bf2d8
diff: use tab to separate date from filename
mpm@selenic.com
parents:
264
diff
changeset
|
33 l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn)) |
278
777e388c06d6
unidiff: handle empty diffs more gracefully
mpm@selenic.com
parents:
272
diff
changeset
|
34 if not l: return "" |
272
467cea2bf2d8
diff: use tab to separate date from filename
mpm@selenic.com
parents:
264
diff
changeset
|
35 # difflib uses a space, rather than a tab |
467cea2bf2d8
diff: use tab to separate date from filename
mpm@selenic.com
parents:
264
diff
changeset
|
36 l[0] = l[0][:-2] + "\t" + ad + "\n" |
467cea2bf2d8
diff: use tab to separate date from filename
mpm@selenic.com
parents:
264
diff
changeset
|
37 l[1] = l[1][:-2] + "\t" + bd + "\n" |
170 | 38 |
39 for ln in xrange(len(l)): | |
40 if l[ln][-1] != '\n': | |
41 l[ln] += "\n\ No newline at end of file\n" | |
42 | |
396
8f8bb77d560e
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli.
Thomas Arendsen Hein <thomas@intevation.de>
parents:
361
diff
changeset
|
43 if r: |
8f8bb77d560e
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli.
Thomas Arendsen Hein <thomas@intevation.de>
parents:
361
diff
changeset
|
44 l.insert(0, "diff %s %s\n" % |
8f8bb77d560e
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli.
Thomas Arendsen Hein <thomas@intevation.de>
parents:
361
diff
changeset
|
45 (' '.join(["-r %s" % rev for rev in r]), fn)) |
8f8bb77d560e
Show revisions in diffs like CVS, based on a patch from Goffredo Baroncelli.
Thomas Arendsen Hein <thomas@intevation.de>
parents:
361
diff
changeset
|
46 |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
47 return "".join(l) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
48 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
49 def sortdiff(a, b): |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
50 la = lb = 0 |
325 | 51 lena = len(a) |
52 lenb = len(b) | |
515 | 53 |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
54 while 1: |
325 | 55 am, bm, = la, lb |
56 | |
57 # walk over matching lines | |
326 | 58 while lb < lenb and la < lena and a[la] == b[lb] : |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
59 la += 1 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
60 lb += 1 |
318
2819f63b16bf
mdiff: revert grouping optimization for the time being
mpm@selenic.com
parents:
317
diff
changeset
|
61 |
325 | 62 if la > am: |
63 yield (am, bm, la - am) # return a match | |
64 | |
65 # skip mismatched lines from b | |
361 | 66 while la < lena and lb < lenb and b[lb] < a[la]: |
325 | 67 lb += 1 |
318
2819f63b16bf
mdiff: revert grouping optimization for the time being
mpm@selenic.com
parents:
317
diff
changeset
|
68 |
325 | 69 if lb >= lenb: |
70 break | |
515 | 71 |
325 | 72 # skip mismatched lines from a |
361 | 73 while la < lena and lb < lenb and b[lb] > a[la]: |
325 | 74 la += 1 |
75 | |
76 if la >= lena: | |
77 break | |
515 | 78 |
325 | 79 yield (lena, lenb, 0) |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
80 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
81 def diff(a, b, sorted=0): |
325 | 82 if not a: |
83 s = "".join(b) | |
84 return s and (struct.pack(">lll", 0, 0, len(s)) + s) | |
85 | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
86 bin = [] |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
87 p = [0] |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
88 for i in a: p.append(p[-1] + len(i)) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
89 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
90 if sorted: |
317 | 91 try: |
92 d = sortdiff(a, b) | |
93 except: | |
94 raise | |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
95 else: |
325 | 96 d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
97 la = 0 | |
98 lb = 0 | |
99 for am, bm, size in d: | |
100 s = "".join(b[lb:bm]) | |
101 if am > la or s: | |
102 bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) | |
103 la = am + size | |
104 lb = bm + size | |
515 | 105 |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
106 return "".join(bin) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
107 |
120
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
108 def patchtext(bin): |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
109 pos = 0 |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
110 t = [] |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
111 while pos < len(bin): |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
112 p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
113 pos += 12 |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
114 t.append(bin[pos:pos + l]) |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
115 pos += l |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
116 return "".join(t) |
bae6f0328f63
Add a function to return the new text from a binary diff
mpm@selenic.com
parents:
75
diff
changeset
|
117 |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
118 def patch(a, bin): |
72 | 119 return patches(a, [bin]) |
432 | 120 |
121 textdiff = bdiff.bdiff | |
122 | |
123 |