comparison mercurial/util.py @ 15067:cc16323e748d

merge with stable
author Martin Geisler <mg@aragost.com>
date Tue, 30 Aug 2011 15:22:10 +0200
parents 81f33be0ea79 24efa83d81cb
children 89d9f92f6fdd
comparison
equal deleted inserted replaced
15061:86380f24e697 15067:cc16323e748d
14 """ 14 """
15 15
16 from i18n import _ 16 from i18n import _
17 import error, osutil, encoding 17 import error, osutil, encoding
18 import errno, re, shutil, sys, tempfile, traceback 18 import errno, re, shutil, sys, tempfile, traceback
19 import os, time, calendar, textwrap, unicodedata, signal 19 import os, time, calendar, textwrap, signal
20 import imp, socket, urllib 20 import imp, socket, urllib
21 21
22 if os.name == 'nt': 22 if os.name == 'nt':
23 import windows as platform 23 import windows as platform
24 else: 24 else:
1156 1156
1157 # delay import of textwrap 1157 # delay import of textwrap
1158 def MBTextWrapper(**kwargs): 1158 def MBTextWrapper(**kwargs):
1159 class tw(textwrap.TextWrapper): 1159 class tw(textwrap.TextWrapper):
1160 """ 1160 """
1161 Extend TextWrapper for double-width characters. 1161 Extend TextWrapper for width-awareness.
1162 1162
1163 Some Asian characters use two terminal columns instead of one. 1163 Neither number of 'bytes' in any encoding nor 'characters' is
1164 A good example of this behavior can be seen with u'\u65e5\u672c', 1164 appropriate to calculate terminal columns for specified string.
1165 the two Japanese characters for "Japan": 1165
1166 len() returns 2, but when printed to a terminal, they eat 4 columns. 1166 Original TextWrapper implementation uses built-in 'len()' directly,
1167 1167 so overriding is needed to use width information of each characters.
1168 (Note that this has nothing to do whatsoever with unicode 1168
1169 representation, or encoding of the underlying string) 1169 In addition, characters classified into 'ambiguous' width are
1170 treated as wide in east asian area, but as narrow in other.
1171
1172 This requires use decision to determine width of such characters.
1170 """ 1173 """
1171 def __init__(self, **kwargs): 1174 def __init__(self, **kwargs):
1172 textwrap.TextWrapper.__init__(self, **kwargs) 1175 textwrap.TextWrapper.__init__(self, **kwargs)
1173 1176
1177 # for compatibility between 2.4 and 2.6
1178 if getattr(self, 'drop_whitespace', None) is None:
1179 self.drop_whitespace = kwargs.get('drop_whitespace', True)
1180
1174 def _cutdown(self, ucstr, space_left): 1181 def _cutdown(self, ucstr, space_left):
1175 l = 0 1182 l = 0
1176 colwidth = unicodedata.east_asian_width 1183 colwidth = encoding.ucolwidth
1177 for i in xrange(len(ucstr)): 1184 for i in xrange(len(ucstr)):
1178 l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 1185 l += colwidth(ucstr[i])
1179 if space_left < l: 1186 if space_left < l:
1180 return (ucstr[:i], ucstr[i:]) 1187 return (ucstr[:i], ucstr[i:])
1181 return ucstr, '' 1188 return ucstr, ''
1182 1189
1183 # overriding of base class 1190 # overriding of base class
1188 cut, res = self._cutdown(reversed_chunks[-1], space_left) 1195 cut, res = self._cutdown(reversed_chunks[-1], space_left)
1189 cur_line.append(cut) 1196 cur_line.append(cut)
1190 reversed_chunks[-1] = res 1197 reversed_chunks[-1] = res
1191 elif not cur_line: 1198 elif not cur_line:
1192 cur_line.append(reversed_chunks.pop()) 1199 cur_line.append(reversed_chunks.pop())
1200
1201 # this overriding code is imported from TextWrapper of python 2.6
1202 # to calculate columns of string by 'encoding.ucolwidth()'
1203 def _wrap_chunks(self, chunks):
1204 colwidth = encoding.ucolwidth
1205
1206 lines = []
1207 if self.width <= 0:
1208 raise ValueError("invalid width %r (must be > 0)" % self.width)
1209
1210 # Arrange in reverse order so items can be efficiently popped
1211 # from a stack of chucks.
1212 chunks.reverse()
1213
1214 while chunks:
1215
1216 # Start the list of chunks that will make up the current line.
1217 # cur_len is just the length of all the chunks in cur_line.
1218 cur_line = []
1219 cur_len = 0
1220
1221 # Figure out which static string will prefix this line.
1222 if lines:
1223 indent = self.subsequent_indent
1224 else:
1225 indent = self.initial_indent
1226
1227 # Maximum width for this line.
1228 width = self.width - len(indent)
1229
1230 # First chunk on line is whitespace -- drop it, unless this
1231 # is the very beginning of the text (ie. no lines started yet).
1232 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
1233 del chunks[-1]
1234
1235 while chunks:
1236 l = colwidth(chunks[-1])
1237
1238 # Can at least squeeze this chunk onto the current line.
1239 if cur_len + l <= width:
1240 cur_line.append(chunks.pop())
1241 cur_len += l
1242
1243 # Nope, this line is full.
1244 else:
1245 break
1246
1247 # The current line is full, and the next chunk is too big to
1248 # fit on *any* line (not just this one).
1249 if chunks and colwidth(chunks[-1]) > width:
1250 self._handle_long_word(chunks, cur_line, cur_len, width)
1251
1252 # If the last chunk on this line is all whitespace, drop it.
1253 if (self.drop_whitespace and
1254 cur_line and cur_line[-1].strip() == ''):
1255 del cur_line[-1]
1256
1257 # Convert current line back to a string and store it in list
1258 # of all lines (return value).
1259 if cur_line:
1260 lines.append(indent + ''.join(cur_line))
1261
1262 return lines
1193 1263
1194 global MBTextWrapper 1264 global MBTextWrapper
1195 MBTextWrapper = tw 1265 MBTextWrapper = tw
1196 return tw(**kwargs) 1266 return tw(**kwargs)
1197 1267