Mercurial > public > mercurial-scm > hg
comparison mercurial/patch.py @ 35383:82c3762349ac
patch: do not break up multibyte character when highlighting word
This changes {\W} to {\W - any 8bit characters} so that multibyte sequences
are taken as words. Since we don't know the encoding of user content, this
is the most sensible definition of a non-word.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Mon, 11 Dec 2017 22:38:31 +0900 |
parents | dce761558329 |
children | 72b91f905065 |
comparison
equal
deleted
inserted
replaced
35382:dfae14354660 | 35383:82c3762349ac |
---|---|
44 diffhelpers = policy.importmod(r'diffhelpers') | 44 diffhelpers = policy.importmod(r'diffhelpers') |
45 stringio = util.stringio | 45 stringio = util.stringio |
46 | 46 |
47 gitre = re.compile(br'diff --git a/(.*) b/(.*)') | 47 gitre = re.compile(br'diff --git a/(.*) b/(.*)') |
48 tabsplitter = re.compile(br'(\t+|[^\t]+)') | 48 tabsplitter = re.compile(br'(\t+|[^\t]+)') |
49 _nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])') | |
49 | 50 |
50 PatchError = error.PatchError | 51 PatchError = error.PatchError |
51 | 52 |
52 # public functions | 53 # public functions |
53 | 54 |
2576 s1 = s1[1:] | 2577 s1 = s1[1:] |
2577 else: | 2578 else: |
2578 raise error.ProgrammingError("Case not expected, operation = %s" % | 2579 raise error.ProgrammingError("Case not expected, operation = %s" % |
2579 operation) | 2580 operation) |
2580 | 2581 |
2581 s = difflib.ndiff(re.split(br'(\W)', s2), re.split(br'(\W)', s1)) | 2582 s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1)) |
2582 for part in s: | 2583 for part in s: |
2583 if part[0] in operation_skip or len(part) == 2: | 2584 if part[0] in operation_skip or len(part) == 2: |
2584 continue | 2585 continue |
2585 l = operation + '.highlight' | 2586 l = operation + '.highlight' |
2586 if part[0] in ' ': | 2587 if part[0] in ' ': |