comparison mercurial/patch.py @ 35383:82c3762349ac

patch: do not break up multibyte character when highlighting word This changes {\W} to {\W - any 8bit characters} so that multibyte sequences are taken as words. Since we don't know the encoding of user content, this is the most sensible definition of a non-word.
author Yuya Nishihara <yuya@tcha.org>
date Mon, 11 Dec 2017 22:38:31 +0900
parents dce761558329
children 72b91f905065
comparison
equal deleted inserted replaced
35382:dfae14354660 35383:82c3762349ac
44 diffhelpers = policy.importmod(r'diffhelpers') 44 diffhelpers = policy.importmod(r'diffhelpers')
45 stringio = util.stringio 45 stringio = util.stringio
46 46
47 gitre = re.compile(br'diff --git a/(.*) b/(.*)') 47 gitre = re.compile(br'diff --git a/(.*) b/(.*)')
48 tabsplitter = re.compile(br'(\t+|[^\t]+)') 48 tabsplitter = re.compile(br'(\t+|[^\t]+)')
49 _nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])')
49 50
50 PatchError = error.PatchError 51 PatchError = error.PatchError
51 52
52 # public functions 53 # public functions
53 54
2576 s1 = s1[1:] 2577 s1 = s1[1:]
2577 else: 2578 else:
2578 raise error.ProgrammingError("Case not expected, operation = %s" % 2579 raise error.ProgrammingError("Case not expected, operation = %s" %
2579 operation) 2580 operation)
2580 2581
2581 s = difflib.ndiff(re.split(br'(\W)', s2), re.split(br'(\W)', s1)) 2582 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
2582 for part in s: 2583 for part in s:
2583 if part[0] in operation_skip or len(part) == 2: 2584 if part[0] in operation_skip or len(part) == 2:
2584 continue 2585 continue
2585 l = operation + '.highlight' 2586 l = operation + '.highlight'
2586 if part[0] in ' ': 2587 if part[0] in ' ':