Mercurial > public > mercurial-scm > hg-stable
diff mercurial/patch.py @ 35392:82c3762349ac
patch: do not break up multibyte character when highlighting word
This changes {\W} to {\W - any 8bit characters} so that multibyte sequences
are taken as words. Since we don't know the encoding of user content, this
is the most sensible definition of a non-word.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Mon, 11 Dec 2017 22:38:31 +0900 |
parents | dce761558329 |
children | 72b91f905065 |
line wrap: on
line diff
--- a/mercurial/patch.py Sun Dec 10 00:16:11 2017 -0500 +++ b/mercurial/patch.py Mon Dec 11 22:38:31 2017 +0900 @@ -46,6 +46,7 @@ gitre = re.compile(br'diff --git a/(.*) b/(.*)') tabsplitter = re.compile(br'(\t+|[^\t]+)') +_nonwordre = re.compile(br'([^a-zA-Z0-9_\x80-\xff])') PatchError = error.PatchError @@ -2578,7 +2579,7 @@ raise error.ProgrammingError("Case not expected, operation = %s" % operation) - s = difflib.ndiff(re.split(br'(\W)', s2), re.split(br'(\W)', s1)) + s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1)) for part in s: if part[0] in operation_skip or len(part) == 2: continue