mercurial-scm/hg-stable: mercurial/patch.py comparison

comparison mercurial/patch.py @ 37731:5471348921c1

patch: buffer lines for a same hunk Instead of yielding tokens directly, buffer them if they belong to a same hunk. This makes it easier for the upcoming new worddiff algorithm to only focus on the diff hunk, instead of having to worry about other contents. This breaks how the existing experimental worddiff algorithm works, so the algorithm was removed, and related tests are disabled for now. The next patch will add a new worddiff algorithm. Differential Revision: https://phab.mercurial-scm.org/D3211

author	Jun Wu <quark@fb.com>
date	Mon, 19 Mar 2018 04:28:30 -0700
parents	8d730f96e792
children	35632d392279

comparison

equal deleted inserted replaced

-:8d730f96e792
+:5471348921c1
 from __future__ import absolute_import, print_function
 import collections
 import contextlib
 import copy
-import difflib
 import email
 import errno
 import hashlib
 import os
 import posixpath
 except GitDiffRequired:
 return difffn(opts.copy(git=True), None)
 else:
 return difffn(opts, None)
+def diffsinglehunk(hunklines):
+"""yield tokens for a list of lines in a single hunk"""
+for line in hunklines:
+# chomp
+chompline = line.rstrip('\n')
+# highlight tabs and trailing whitespace
+stripline = chompline.rstrip()
+if line[0] == '-':
+label = 'diff.deleted'
+elif line[0] == '+':
+label = 'diff.inserted'
+else:
+raise error.ProgrammingError('unexpected hunk line: %s' % line)
+for token in tabsplitter.findall(stripline):
+if '\t' == token[0]:
+yield (token, 'diff.tab')
+else:
+yield (token, label)
+if chompline != stripline:
+yield (chompline[len(stripline):], 'diff.trailingwhitespace')
+if chompline != line:
+yield (line[len(chompline):], '')
 def difflabel(func, *args, **kw):
 '''yields 2-tuples of (output, label) based on the output of func()'''
-inlinecolor = False
-if kw.get(r'opts'):
-inlinecolor = kw[r'opts'].worddiff
 headprefixes = [('diff', 'diff.diffline'),
 ('copy', 'diff.extended'),
 ('rename', 'diff.extended'),
 ('old', 'diff.extended'),
 ('new', 'diff.extended'),
 ('index', 'diff.extended'),
 ('similarity', 'diff.extended'),
 ('---', 'diff.file_a'),
 ('+++', 'diff.file_b')]
 textprefixes = [('@', 'diff.hunk'),
-('-', 'diff.deleted'),
+# - and + are handled by diffsinglehunk
-('+', 'diff.inserted')]
+]
 head = False
+# buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
+hunkbuffer = []
+def consumehunkbuffer():
+if hunkbuffer:
+for token in diffsinglehunk(hunkbuffer):
+yield token
+hunkbuffer[:] = []
 for chunk in func(*args, **kw):
 lines = chunk.split('\n')
-matches = {}
-if inlinecolor:
-matches = _findmatches(lines)
 linecount = len(lines)
 for i, line in enumerate(lines):
 if head:
 if line.startswith('@'):
 head = False
 else:
 if line and not line.startswith((' ', '+', '-', '@', '\\')):
 head = True
-stripline = line
 diffline = False
 if not head and line and line.startswith(('+', '-')):
-# highlight tabs and trailing whitespace, but only in
-# changed lines
-stripline = line.rstrip()
 diffline = True
 prefixes = textprefixes
 if head:
 prefixes = headprefixes
-for prefix, label in prefixes:
+if diffline:
-if stripline.startswith(prefix):
+# buffered
-if diffline:
+bufferedline = line
-if i in matches:
+if i + 1 < linecount:
-for t, l in _inlinediff(lines[i].rstrip(),
+bufferedline += "\n"
-lines[matches[i]].rstrip(),
+hunkbuffer.append(bufferedline)
-label):
+else:
-yield (t, l)
+# unbuffered
-else:
+for token in consumehunkbuffer():
-for token in tabsplitter.findall(stripline):
+yield token
-if token.startswith('\t'):
+stripline = line.rstrip()
-yield (token, 'diff.tab')
+for prefix, label in prefixes:
-else:
+if stripline.startswith(prefix):
-yield (token, label)
-else:
 yield (stripline, label)
-break
+if line != stripline:
-else:
+yield (line[len(stripline):],
-yield (line, '')
+'diff.trailingwhitespace')
-if line != stripline:
-yield (line[len(stripline):], 'diff.trailingwhitespace')
-if i + 1 < linecount:
-yield ('\n', '')
-def _findmatches(slist):
-'''Look for insertion matches to deletion and returns a dict of
-correspondences.
-'''
-lastmatch = 0
-matches = {}
-for i, line in enumerate(slist):
-if line == '':
-continue
-if line.startswith('-'):
-lastmatch = max(lastmatch, i)
-newgroup = False
-for j, newline in enumerate(slist[lastmatch + 1:]):
-if newline == '':
-continue
-if newline.startswith('-') and newgroup: # too far, no match
-break
-if newline.startswith('+'): # potential match
-newgroup = True
-sim = difflib.SequenceMatcher(None, line, newline).ratio()
-if sim > 0.7:
-lastmatch = lastmatch + 1 + j
-matches[i] = lastmatch
-matches[lastmatch] = i
 break
-return matches
+else:
+yield (line, '')
-def _inlinediff(s1, s2, operation):
+if i + 1 < linecount:
-'''Perform string diff to highlight specific changes.'''
+yield ('\n', '')
-operation_skip = ('+', '?') if operation == 'diff.deleted' else ('-', '?')
+for token in consumehunkbuffer():
-if operation == 'diff.deleted':
+yield token
-s2, s1 = s1, s2
-buff = []
-# we never want to higlight the leading +-
-if operation == 'diff.deleted' and s2.startswith('-'):
-label = operation
-token = '-'
-s2 = s2[1:]
-s1 = s1[1:]
-elif operation == 'diff.inserted' and s1.startswith('+'):
-label = operation
-token = '+'
-s2 = s2[1:]
-s1 = s1[1:]
-else:
-raise error.ProgrammingError("Case not expected, operation = %s" %
-operation)
-s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
-for part in s:
-if part.startswith(operation_skip) or len(part) == 2:
-continue
-l = operation + '.highlight'
-if part.startswith(' '):
-l = operation
-if part[2:] == '\t':
-l = 'diff.tab'
-if l == label: # contiguous token with same label
-token += part[2:]
-continue
-else:
-buff.append((token, label))
-label = l
-token = part[2:]
-buff.append((token, label))
-return buff
 def diffui(*args, **kw):
 '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''
 return difflabel(diff, *args, **kw)

Mercurial > public > mercurial-scm > hg-stable

comparison mercurial/patch.py @ 37731:5471348921c1