Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/util.py @ 15067:cc16323e748d
merge with stable
author | Martin Geisler <mg@aragost.com> |
---|---|
date | Tue, 30 Aug 2011 15:22:10 +0200 |
parents | 81f33be0ea79 24efa83d81cb |
children | 89d9f92f6fdd |
comparison
equal
deleted
inserted
replaced
15061:86380f24e697 | 15067:cc16323e748d |
---|---|
14 """ | 14 """ |
15 | 15 |
16 from i18n import _ | 16 from i18n import _ |
17 import error, osutil, encoding | 17 import error, osutil, encoding |
18 import errno, re, shutil, sys, tempfile, traceback | 18 import errno, re, shutil, sys, tempfile, traceback |
19 import os, time, calendar, textwrap, unicodedata, signal | 19 import os, time, calendar, textwrap, signal |
20 import imp, socket, urllib | 20 import imp, socket, urllib |
21 | 21 |
22 if os.name == 'nt': | 22 if os.name == 'nt': |
23 import windows as platform | 23 import windows as platform |
24 else: | 24 else: |
1156 | 1156 |
1157 # delay import of textwrap | 1157 # delay import of textwrap |
1158 def MBTextWrapper(**kwargs): | 1158 def MBTextWrapper(**kwargs): |
1159 class tw(textwrap.TextWrapper): | 1159 class tw(textwrap.TextWrapper): |
1160 """ | 1160 """ |
1161 Extend TextWrapper for double-width characters. | 1161 Extend TextWrapper for width-awareness. |
1162 | 1162 |
1163 Some Asian characters use two terminal columns instead of one. | 1163 Neither number of 'bytes' in any encoding nor 'characters' is |
1164 A good example of this behavior can be seen with u'\u65e5\u672c', | 1164 appropriate to calculate terminal columns for specified string. |
1165 the two Japanese characters for "Japan": | 1165 |
1166 len() returns 2, but when printed to a terminal, they eat 4 columns. | 1166 Original TextWrapper implementation uses built-in 'len()' directly, |
1167 | 1167 so overriding is needed to use width information of each characters. |
1168 (Note that this has nothing to do whatsoever with unicode | 1168 |
1169 representation, or encoding of the underlying string) | 1169 In addition, characters classified into 'ambiguous' width are |
1170 treated as wide in east asian area, but as narrow in other. | |
1171 | |
1172 This requires use decision to determine width of such characters. | |
1170 """ | 1173 """ |
1171 def __init__(self, **kwargs): | 1174 def __init__(self, **kwargs): |
1172 textwrap.TextWrapper.__init__(self, **kwargs) | 1175 textwrap.TextWrapper.__init__(self, **kwargs) |
1173 | 1176 |
1177 # for compatibility between 2.4 and 2.6 | |
1178 if getattr(self, 'drop_whitespace', None) is None: | |
1179 self.drop_whitespace = kwargs.get('drop_whitespace', True) | |
1180 | |
1174 def _cutdown(self, ucstr, space_left): | 1181 def _cutdown(self, ucstr, space_left): |
1175 l = 0 | 1182 l = 0 |
1176 colwidth = unicodedata.east_asian_width | 1183 colwidth = encoding.ucolwidth |
1177 for i in xrange(len(ucstr)): | 1184 for i in xrange(len(ucstr)): |
1178 l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 | 1185 l += colwidth(ucstr[i]) |
1179 if space_left < l: | 1186 if space_left < l: |
1180 return (ucstr[:i], ucstr[i:]) | 1187 return (ucstr[:i], ucstr[i:]) |
1181 return ucstr, '' | 1188 return ucstr, '' |
1182 | 1189 |
1183 # overriding of base class | 1190 # overriding of base class |
1188 cut, res = self._cutdown(reversed_chunks[-1], space_left) | 1195 cut, res = self._cutdown(reversed_chunks[-1], space_left) |
1189 cur_line.append(cut) | 1196 cur_line.append(cut) |
1190 reversed_chunks[-1] = res | 1197 reversed_chunks[-1] = res |
1191 elif not cur_line: | 1198 elif not cur_line: |
1192 cur_line.append(reversed_chunks.pop()) | 1199 cur_line.append(reversed_chunks.pop()) |
1200 | |
1201 # this overriding code is imported from TextWrapper of python 2.6 | |
1202 # to calculate columns of string by 'encoding.ucolwidth()' | |
1203 def _wrap_chunks(self, chunks): | |
1204 colwidth = encoding.ucolwidth | |
1205 | |
1206 lines = [] | |
1207 if self.width <= 0: | |
1208 raise ValueError("invalid width %r (must be > 0)" % self.width) | |
1209 | |
1210 # Arrange in reverse order so items can be efficiently popped | |
1211 # from a stack of chucks. | |
1212 chunks.reverse() | |
1213 | |
1214 while chunks: | |
1215 | |
1216 # Start the list of chunks that will make up the current line. | |
1217 # cur_len is just the length of all the chunks in cur_line. | |
1218 cur_line = [] | |
1219 cur_len = 0 | |
1220 | |
1221 # Figure out which static string will prefix this line. | |
1222 if lines: | |
1223 indent = self.subsequent_indent | |
1224 else: | |
1225 indent = self.initial_indent | |
1226 | |
1227 # Maximum width for this line. | |
1228 width = self.width - len(indent) | |
1229 | |
1230 # First chunk on line is whitespace -- drop it, unless this | |
1231 # is the very beginning of the text (ie. no lines started yet). | |
1232 if self.drop_whitespace and chunks[-1].strip() == '' and lines: | |
1233 del chunks[-1] | |
1234 | |
1235 while chunks: | |
1236 l = colwidth(chunks[-1]) | |
1237 | |
1238 # Can at least squeeze this chunk onto the current line. | |
1239 if cur_len + l <= width: | |
1240 cur_line.append(chunks.pop()) | |
1241 cur_len += l | |
1242 | |
1243 # Nope, this line is full. | |
1244 else: | |
1245 break | |
1246 | |
1247 # The current line is full, and the next chunk is too big to | |
1248 # fit on *any* line (not just this one). | |
1249 if chunks and colwidth(chunks[-1]) > width: | |
1250 self._handle_long_word(chunks, cur_line, cur_len, width) | |
1251 | |
1252 # If the last chunk on this line is all whitespace, drop it. | |
1253 if (self.drop_whitespace and | |
1254 cur_line and cur_line[-1].strip() == ''): | |
1255 del cur_line[-1] | |
1256 | |
1257 # Convert current line back to a string and store it in list | |
1258 # of all lines (return value). | |
1259 if cur_line: | |
1260 lines.append(indent + ''.join(cur_line)) | |
1261 | |
1262 return lines | |
1193 | 1263 |
1194 global MBTextWrapper | 1264 global MBTextWrapper |
1195 MBTextWrapper = tw | 1265 MBTextWrapper = tw |
1196 return tw(**kwargs) | 1266 return tw(**kwargs) |
1197 | 1267 |