Mercurial > public > mercurial-scm > hg
comparison mercurial/util.py @ 15065:24a6c3f903bb stable
util: wrap lines with multi-byte characters correctly (issue2943)
This re-introduces the unicode conversion what was lost in d320e70442a5 5 years
ago and had the comment:
To avoid corrupting multi-byte characters in line, we must wrap
a Unicode string instead of a bytestring.
author | Mads Kiilerich <mads@kiilerich.com> |
---|---|
date | Sat, 06 Aug 2011 23:52:20 +0200 |
parents | 0f1311e829c9 |
children | 24efa83d81cb |
comparison
equal
deleted
inserted
replaced
15064:1f581a8b1948 | 15065:24a6c3f903bb |
---|---|
1146 representation, or encoding of the underlying string) | 1146 representation, or encoding of the underlying string) |
1147 """ | 1147 """ |
1148 def __init__(self, **kwargs): | 1148 def __init__(self, **kwargs): |
1149 textwrap.TextWrapper.__init__(self, **kwargs) | 1149 textwrap.TextWrapper.__init__(self, **kwargs) |
1150 | 1150 |
1151 def _cutdown(self, str, space_left): | 1151 def _cutdown(self, ucstr, space_left): |
1152 l = 0 | 1152 l = 0 |
1153 ucstr = unicode(str, encoding.encoding) | |
1154 colwidth = unicodedata.east_asian_width | 1153 colwidth = unicodedata.east_asian_width |
1155 for i in xrange(len(ucstr)): | 1154 for i in xrange(len(ucstr)): |
1156 l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 | 1155 l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 |
1157 if space_left < l: | 1156 if space_left < l: |
1158 return (ucstr[:i].encode(encoding.encoding), | 1157 return (ucstr[:i], ucstr[i:]) |
1159 ucstr[i:].encode(encoding.encoding)) | 1158 return ucstr, '' |
1160 return str, '' | |
1161 | 1159 |
1162 # overriding of base class | 1160 # overriding of base class |
1163 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): | 1161 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): |
1164 space_left = max(width - cur_len, 1) | 1162 space_left = max(width - cur_len, 1) |
1165 | 1163 |
1177 def wrap(line, width, initindent='', hangindent=''): | 1175 def wrap(line, width, initindent='', hangindent=''): |
1178 maxindent = max(len(hangindent), len(initindent)) | 1176 maxindent = max(len(hangindent), len(initindent)) |
1179 if width <= maxindent: | 1177 if width <= maxindent: |
1180 # adjust for weird terminal size | 1178 # adjust for weird terminal size |
1181 width = max(78, maxindent + 1) | 1179 width = max(78, maxindent + 1) |
1180 line = line.decode(encoding.encoding, encoding.encodingmode) | |
1181 initindent = initindent.decode(encoding.encoding, encoding.encodingmode) | |
1182 hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode) | |
1182 wrapper = MBTextWrapper(width=width, | 1183 wrapper = MBTextWrapper(width=width, |
1183 initial_indent=initindent, | 1184 initial_indent=initindent, |
1184 subsequent_indent=hangindent) | 1185 subsequent_indent=hangindent) |
1185 return wrapper.fill(line) | 1186 return wrapper.fill(line).encode(encoding.encoding) |
1186 | 1187 |
1187 def iterlines(iterator): | 1188 def iterlines(iterator): |
1188 for chunk in iterator: | 1189 for chunk in iterator: |
1189 for line in chunk.splitlines(): | 1190 for line in chunk.splitlines(): |
1190 yield line | 1191 yield line |