Mercurial > public > mercurial-scm > hg-stable
diff mercurial/util.py @ 15067:cc16323e748d
merge with stable
author | Martin Geisler <mg@aragost.com> |
---|---|
date | Tue, 30 Aug 2011 15:22:10 +0200 |
parents | 81f33be0ea79 24efa83d81cb |
children | 89d9f92f6fdd |
line wrap: on
line diff
--- a/mercurial/util.py Fri Aug 26 16:08:24 2011 -0500 +++ b/mercurial/util.py Tue Aug 30 15:22:10 2011 +0200 @@ -16,7 +16,7 @@ from i18n import _ import error, osutil, encoding import errno, re, shutil, sys, tempfile, traceback -import os, time, calendar, textwrap, unicodedata, signal +import os, time, calendar, textwrap, signal import imp, socket, urllib if os.name == 'nt': @@ -1158,24 +1158,31 @@ def MBTextWrapper(**kwargs): class tw(textwrap.TextWrapper): """ - Extend TextWrapper for double-width characters. + Extend TextWrapper for width-awareness. + + Neither number of 'bytes' in any encoding nor 'characters' is + appropriate to calculate terminal columns for specified string. - Some Asian characters use two terminal columns instead of one. - A good example of this behavior can be seen with u'\u65e5\u672c', - the two Japanese characters for "Japan": - len() returns 2, but when printed to a terminal, they eat 4 columns. + Original TextWrapper implementation uses built-in 'len()' directly, + so overriding is needed to use width information of each characters. - (Note that this has nothing to do whatsoever with unicode - representation, or encoding of the underlying string) + In addition, characters classified into 'ambiguous' width are + treated as wide in east asian area, but as narrow in other. + + This requires use decision to determine width of such characters. """ def __init__(self, **kwargs): textwrap.TextWrapper.__init__(self, **kwargs) + # for compatibility between 2.4 and 2.6 + if getattr(self, 'drop_whitespace', None) is None: + self.drop_whitespace = kwargs.get('drop_whitespace', True) + def _cutdown(self, ucstr, space_left): l = 0 - colwidth = unicodedata.east_asian_width + colwidth = encoding.ucolwidth for i in xrange(len(ucstr)): - l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 + l += colwidth(ucstr[i]) if space_left < l: return (ucstr[:i], ucstr[i:]) return ucstr, '' @@ -1191,6 +1198,69 @@ elif not cur_line: cur_line.append(reversed_chunks.pop()) + # this overriding code is imported from TextWrapper of python 2.6 + # to calculate columns of string by 'encoding.ucolwidth()' + def _wrap_chunks(self, chunks): + colwidth = encoding.ucolwidth + + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + + # Arrange in reverse order so items can be efficiently popped + # from a stack of chucks. + chunks.reverse() + + while chunks: + + # Start the list of chunks that will make up the current line. + # cur_len is just the length of all the chunks in cur_line. + cur_line = [] + cur_len = 0 + + # Figure out which static string will prefix this line. + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + # Maximum width for this line. + width = self.width - len(indent) + + # First chunk on line is whitespace -- drop it, unless this + # is the very beginning of the text (ie. no lines started yet). + if self.drop_whitespace and chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = colwidth(chunks[-1]) + + # Can at least squeeze this chunk onto the current line. + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + # Nope, this line is full. + else: + break + + # The current line is full, and the next chunk is too big to + # fit on *any* line (not just this one). + if chunks and colwidth(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + + # If the last chunk on this line is all whitespace, drop it. + if (self.drop_whitespace and + cur_line and cur_line[-1].strip() == ''): + del cur_line[-1] + + # Convert current line back to a string and store it in list + # of all lines (return value). + if cur_line: + lines.append(indent + ''.join(cur_line)) + + return lines + global MBTextWrapper MBTextWrapper = tw return tw(**kwargs)