mercurial/patch.py
changeset 37732 35632d392279
parent 37731 5471348921c1
child 37764 5e67c20915a7
--- a/mercurial/patch.py	Mon Mar 19 04:28:30 2018 -0700
+++ b/mercurial/patch.py	Mon Apr 09 15:58:30 2018 -0700
@@ -50,7 +50,8 @@
 
 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
 tabsplitter = re.compile(br'(\t+|[^\t]+)')
-_nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])')
+wordsplitter = re.compile(br'(\t+| +|[a-zA-Z0-9_\x80-\xff]+|'
+                          '[^ \ta-zA-Z0-9_\x80-\xff])')
 
 PatchError = error.PatchError
 
@@ -2504,8 +2505,78 @@
         if chompline != line:
             yield (line[len(chompline):], '')
 
+def diffsinglehunkinline(hunklines):
+    """yield tokens for a list of lines in a single hunk, with inline colors"""
+    # prepare deleted, and inserted content
+    a = ''
+    b = ''
+    for line in hunklines:
+        if line[0] == '-':
+            a += line[1:]
+        elif line[0] == '+':
+            b += line[1:]
+        else:
+            raise error.ProgrammingError('unexpected hunk line: %s' % line)
+    # fast path: if either side is empty, use diffsinglehunk
+    if not a or not b:
+        for t in diffsinglehunk(hunklines):
+            yield t
+        return
+    # re-split the content into words
+    al = wordsplitter.findall(a)
+    bl = wordsplitter.findall(b)
+    # re-arrange the words to lines since the diff algorithm is line-based
+    aln = [s if s == '\n' else s + '\n' for s in al]
+    bln = [s if s == '\n' else s + '\n' for s in bl]
+    an = ''.join(aln)
+    bn = ''.join(bln)
+    # run the diff algorithm, prepare atokens and btokens
+    atokens = []
+    btokens = []
+    blocks = mdiff.allblocks(an, bn, lines1=aln, lines2=bln)
+    for (a1, a2, b1, b2), btype in blocks:
+        changed = btype == '!'
+        for token in mdiff.splitnewlines(''.join(al[a1:a2])):
+            atokens.append((changed, token))
+        for token in mdiff.splitnewlines(''.join(bl[b1:b2])):
+            btokens.append((changed, token))
+
+    # yield deleted tokens, then inserted ones
+    for prefix, label, tokens in [('-', 'diff.deleted', atokens),
+                                  ('+', 'diff.inserted', btokens)]:
+        nextisnewline = True
+        for changed, token in tokens:
+            if nextisnewline:
+                yield (prefix, label)
+                nextisnewline = False
+            # special handling line end
+            isendofline = token.endswith('\n')
+            if isendofline:
+                chomp = token[:-1] # chomp
+                token = chomp.rstrip() # detect spaces at the end
+                endspaces = chomp[len(token):]
+            # scan tabs
+            for maybetab in tabsplitter.findall(token):
+                if '\t' == maybetab[0]:
+                    currentlabel = 'diff.tab'
+                else:
+                    if changed:
+                        currentlabel = label + '.changed'
+                    else:
+                        currentlabel = label + '.unchanged'
+                yield (maybetab, currentlabel)
+            if isendofline:
+                if endspaces:
+                    yield (endspaces, 'diff.trailingwhitespace')
+                yield ('\n', '')
+                nextisnewline = True
+
 def difflabel(func, *args, **kw):
     '''yields 2-tuples of (output, label) based on the output of func()'''
+    if kw.get(r'opts') and kw[r'opts'].worddiff:
+        dodiffhunk = diffsinglehunkinline
+    else:
+        dodiffhunk = diffsinglehunk
     headprefixes = [('diff', 'diff.diffline'),
                     ('copy', 'diff.extended'),
                     ('rename', 'diff.extended'),
@@ -2525,7 +2596,7 @@
     hunkbuffer = []
     def consumehunkbuffer():
         if hunkbuffer:
-            for token in diffsinglehunk(hunkbuffer):
+            for token in dodiffhunk(hunkbuffer):
                 yield token
             hunkbuffer[:] = []