mercurial/mdiff.py
changeset 1637 3b1b44b917f4
parent 1540 8ca9f5b17257
child 1723 fde8fb2cbede
equal deleted inserted replaced
1636:7da32bb3d1d3 1637:3b1b44b917f4
     3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
     3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
     4 #
     4 #
     5 # This software may be used and distributed according to the terms
     5 # This software may be used and distributed according to the terms
     6 # of the GNU General Public License, incorporated herein by reference.
     6 # of the GNU General Public License, incorporated herein by reference.
     7 
     7 
     8 import difflib, struct, bdiff, util, mpatch
     8 from demandload import demandload
       
     9 import struct, bdiff, util, mpatch
       
    10 demandload(globals(), "re")
     9 
    11 
    10 def unidiff(a, ad, b, bd, fn, r=None, text=False):
    12 
       
    13 def unidiff(a, ad, b, bd, fn, r=None, text=False,
       
    14             showfunc=False, ignorews=False):
    11 
    15 
    12     if not a and not b: return ""
    16     if not a and not b: return ""
    13     epoch = util.datestr((0, 0))
    17     epoch = util.datestr((0, 0))
    14 
    18 
    15     if not text and (util.binary(a) or util.binary(b)):
    19     if not text and (util.binary(a) or util.binary(b)):
    25         l1 = "--- %s\t%s\n" % ("a/" + fn, ad)
    29         l1 = "--- %s\t%s\n" % ("a/" + fn, ad)
    26         l2 = "+++ %s\t%s\n" % ("/dev/null", epoch)
    30         l2 = "+++ %s\t%s\n" % ("/dev/null", epoch)
    27         l3 = "@@ -1,%d +0,0 @@\n" % len(a)
    31         l3 = "@@ -1,%d +0,0 @@\n" % len(a)
    28         l = [l1, l2, l3] + ["-" + e for e in a]
    32         l = [l1, l2, l3] + ["-" + e for e in a]
    29     else:
    33     else:
    30         a = a.splitlines(1)
    34         al = a.splitlines(1)
    31         b = b.splitlines(1)
    35         bl = b.splitlines(1)
    32         l = list(difflib.unified_diff(a, b, "a/" + fn, "b/" + fn))
    36         l = list(bunidiff(a, b, al, bl, "a/" + fn, "b/" + fn,
       
    37                           showfunc=showfunc, ignorews=ignorews))
    33         if not l: return ""
    38         if not l: return ""
    34         # difflib uses a space, rather than a tab
    39         # difflib uses a space, rather than a tab
    35         l[0] = "%s\t%s\n" % (l[0][:-2], ad)
    40         l[0] = "%s\t%s\n" % (l[0][:-2], ad)
    36         l[1] = "%s\t%s\n" % (l[1][:-2], bd)
    41         l[1] = "%s\t%s\n" % (l[1][:-2], bd)
    37 
    42 
    42     if r:
    47     if r:
    43         l.insert(0, "diff %s %s\n" %
    48         l.insert(0, "diff %s %s\n" %
    44                     (' '.join(["-r %s" % rev for rev in r]), fn))
    49                     (' '.join(["-r %s" % rev for rev in r]), fn))
    45 
    50 
    46     return "".join(l)
    51     return "".join(l)
       
    52 
       
    53 # somewhat self contained replacement for difflib.unified_diff
       
    54 # t1 and t2 are the text to be diffed
       
    55 # l1 and l2 are the text broken up into lines
       
    56 # header1 and header2 are the filenames for the diff output
       
    57 # context is the number of context lines
       
    58 # showfunc enables diff -p output
       
    59 # ignorews ignores all whitespace changes in the diff
       
    60 def bunidiff(t1, t2, l1, l2, header1, header2, context=3, showfunc=False,
       
    61              ignorews=False):
       
    62     def contextend(l, len):
       
    63         ret = l + context
       
    64         if ret > len:
       
    65             ret = len
       
    66         return ret
       
    67 
       
    68     def contextstart(l):
       
    69         ret = l - context
       
    70         if ret < 0:
       
    71             return 0
       
    72         return ret
       
    73 
       
    74     def yieldhunk(hunk, header):
       
    75         if header:
       
    76             for x in header:
       
    77                 yield x
       
    78         (astart, a2, bstart, b2, delta) = hunk
       
    79         aend = contextend(a2, len(l1))
       
    80         alen = aend - astart
       
    81         blen = b2 - bstart + aend - a2
       
    82 
       
    83         func = ""
       
    84         if showfunc:
       
    85             # walk backwards from the start of the context
       
    86             # to find a line starting with an alphanumeric char.
       
    87             for x in xrange(astart, -1, -1):
       
    88                 t = l1[x].rstrip()
       
    89                 if funcre.match(t):
       
    90                     func = ' ' + t[:40]
       
    91                     break
       
    92 
       
    93         yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen,
       
    94                                            bstart + 1, blen, func)
       
    95         for x in delta:
       
    96             yield x
       
    97         for x in xrange(a2, aend):
       
    98             yield ' ' + l1[x]
       
    99 
       
   100     header = [ "--- %s\t\n" % header1, "+++ %s\t\n" % header2 ]
       
   101 
       
   102     if showfunc:
       
   103         funcre = re.compile('\w')
       
   104     if ignorews:
       
   105         wsre = re.compile('[ \t]')
       
   106 
       
   107     # bdiff.blocks gives us the matching sequences in the files.  The loop
       
   108     # below finds the spaces between those matching sequences and translates
       
   109     # them into diff output.
       
   110     #
       
   111     diff = bdiff.blocks(t1, t2)
       
   112     hunk = None
       
   113     for i in xrange(len(diff)):
       
   114         # The first match is special.
       
   115         # we've either found a match starting at line 0 or a match later
       
   116         # in the file.  If it starts later, old and new below will both be
       
   117         # empty and we'll continue to the next match.
       
   118         if i > 0:
       
   119             s = diff[i-1]
       
   120         else:
       
   121             s = [0, 0, 0, 0]
       
   122         delta = []
       
   123         s1 = diff[i]
       
   124         a1 = s[1]
       
   125         a2 = s1[0]
       
   126         b1 = s[3]
       
   127         b2 = s1[2]
       
   128 
       
   129         old = l1[a1:a2]
       
   130         new = l2[b1:b2]
       
   131 
       
   132         # bdiff sometimes gives huge matches past eof, this check eats them,
       
   133         # and deals with the special first match case described above
       
   134         if not old and not new:
       
   135             continue
       
   136 
       
   137         if ignorews:
       
   138             wsold = wsre.sub('', "".join(old))
       
   139             wsnew = wsre.sub('', "".join(new))
       
   140             if wsold == wsnew:
       
   141                 continue
       
   142 
       
   143         astart = contextstart(a1)
       
   144         bstart = contextstart(b1)
       
   145         prev = None
       
   146         if hunk:
       
   147             # join with the previous hunk if it falls inside the context
       
   148             if astart < hunk[1] + context + 1:
       
   149                 prev = hunk
       
   150                 astart = hunk[1]
       
   151                 bstart = hunk[3]
       
   152             else:
       
   153                 for x in yieldhunk(hunk, header):
       
   154                     yield x
       
   155                 # we only want to yield the header if the files differ, and
       
   156                 # we only want to yield it once.
       
   157                 header = None
       
   158         if prev:
       
   159             # we've joined the previous hunk, record the new ending points.
       
   160             hunk[1] = a2
       
   161             hunk[3] = b2
       
   162             delta = hunk[4]
       
   163         else:
       
   164             # create a new hunk
       
   165             hunk = [ astart, a2, bstart, b2, delta ]
       
   166 
       
   167         delta[len(delta):] = [ ' ' + x for x in l1[astart:a1] ]
       
   168         delta[len(delta):] = [ '-' + x for x in old ]
       
   169         delta[len(delta):] = [ '+' + x for x in new ]
       
   170 
       
   171     if hunk:
       
   172         for x in yieldhunk(hunk, header):
       
   173             yield x
    47 
   174 
    48 def patchtext(bin):
   175 def patchtext(bin):
    49     pos = 0
   176     pos = 0
    50     t = []
   177     t = []
    51     while pos < len(bin):
   178     while pos < len(bin):