Mercurial > public > mercurial-scm > hg-stable
diff mercurial/util.py @ 15031:0cb27eda3a1e
util: wrap lines with multi-byte characters correctly (issue2943)
This re-introduces the unicode conversion what was lost in d320e70442a5 5 years
ago and had the comment:
To avoid corrupting multi-byte characters in line, we must wrap
a Unicode string instead of a bytestring.
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Sat, 06 Aug 2011 23:52:20 +0200 |
parents | eb97a3e38656 |
children | 79a861b8f553 |
line wrap: on
line diff
--- a/mercurial/util.py Mon Aug 08 11:34:52 2011 +0100 +++ b/mercurial/util.py Sat Aug 06 23:52:20 2011 +0200 @@ -1171,16 +1171,14 @@ def __init__(self, **kwargs): textwrap.TextWrapper.__init__(self, **kwargs) - def _cutdown(self, str, space_left): + def _cutdown(self, ucstr, space_left): l = 0 - ucstr = unicode(str, encoding.encoding) colwidth = unicodedata.east_asian_width for i in xrange(len(ucstr)): l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 if space_left < l: - return (ucstr[:i].encode(encoding.encoding), - ucstr[i:].encode(encoding.encoding)) - return str, '' + return (ucstr[:i], ucstr[i:]) + return ucstr, '' # overriding of base class def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): @@ -1202,10 +1200,13 @@ if width <= maxindent: # adjust for weird terminal size width = max(78, maxindent + 1) + line = line.decode(encoding.encoding, encoding.encodingmode) + initindent = initindent.decode(encoding.encoding, encoding.encodingmode) + hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode) wrapper = MBTextWrapper(width=width, initial_indent=initindent, subsequent_indent=hangindent) - return wrapper.fill(line) + return wrapper.fill(line).encode(encoding.encoding) def iterlines(iterator): for chunk in iterator: