comparison mercurial/encoding.py @ 30033:02dbfaa6df0b

py3: convert encoding name and mode to str Otherwise tolocal() and fromlocal() wouldn't work on Python 3. Still tolocal() can't make a valid localstr object because localstr inherits str, but it can return some object without raising exceptions. Since Py3 bytes() behaves much like bytearray() than str() of Py2, we can't simply do s/str/bytes/g. I have no good idea to handle str/bytes divergence.
author Yuya Nishihara <yuya@tcha.org>
date Wed, 28 Sep 2016 20:39:06 +0900
parents 0f6d6fdd3c2a
children e4a6b439acc5
comparison
equal deleted inserted replaced
30032:2219f4f82ede 30033:02dbfaa6df0b
14 14
15 from . import ( 15 from . import (
16 error, 16 error,
17 pycompat, 17 pycompat,
18 ) 18 )
19
20 _sysstr = pycompat.sysstr
19 21
20 if pycompat.ispy3: 22 if pycompat.ispy3:
21 unichr = chr 23 unichr = chr
22 24
23 # These unicode characters are ignored by HFS+ (Apple Technote 1150, 25 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
134 # make sure string is actually stored in UTF-8 136 # make sure string is actually stored in UTF-8
135 u = s.decode('UTF-8') 137 u = s.decode('UTF-8')
136 if encoding == 'UTF-8': 138 if encoding == 'UTF-8':
137 # fast path 139 # fast path
138 return s 140 return s
139 r = u.encode(encoding, "replace") 141 r = u.encode(_sysstr(encoding), u"replace")
140 if u == r.decode(encoding): 142 if u == r.decode(_sysstr(encoding)):
141 # r is a safe, non-lossy encoding of s 143 # r is a safe, non-lossy encoding of s
142 return r 144 return r
143 return localstr(s, r) 145 return localstr(s, r)
144 except UnicodeDecodeError: 146 except UnicodeDecodeError:
145 # we should only get here if we're looking at an ancient changeset 147 # we should only get here if we're looking at an ancient changeset
146 try: 148 try:
147 u = s.decode(fallbackencoding) 149 u = s.decode(_sysstr(fallbackencoding))
148 r = u.encode(encoding, "replace") 150 r = u.encode(_sysstr(encoding), u"replace")
149 if u == r.decode(encoding): 151 if u == r.decode(_sysstr(encoding)):
150 # r is a safe, non-lossy encoding of s 152 # r is a safe, non-lossy encoding of s
151 return r 153 return r
152 return localstr(u.encode('UTF-8'), r) 154 return localstr(u.encode('UTF-8'), r)
153 except UnicodeDecodeError: 155 except UnicodeDecodeError:
154 u = s.decode("utf-8", "replace") # last ditch 156 u = s.decode("utf-8", "replace") # last ditch
155 return u.encode(encoding, "replace") # can't round-trip 157 # can't round-trip
158 return u.encode(_sysstr(encoding), u"replace")
156 except LookupError as k: 159 except LookupError as k:
157 raise error.Abort(k, hint="please check your locale settings") 160 raise error.Abort(k, hint="please check your locale settings")
158 161
159 def fromlocal(s): 162 def fromlocal(s):
160 """ 163 """
170 # can we do a lossless round-trip? 173 # can we do a lossless round-trip?
171 if isinstance(s, localstr): 174 if isinstance(s, localstr):
172 return s._utf8 175 return s._utf8
173 176
174 try: 177 try:
175 return s.decode(encoding, encodingmode).encode("utf-8") 178 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
179 return u.encode("utf-8")
176 except UnicodeDecodeError as inst: 180 except UnicodeDecodeError as inst:
177 sub = s[max(0, inst.start - 10):inst.start + 10] 181 sub = s[max(0, inst.start - 10):inst.start + 10]
178 raise error.Abort("decoding near '%s': %s!" % (sub, inst)) 182 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
179 except LookupError as k: 183 except LookupError as k:
180 raise error.Abort(k, hint="please check your locale settings") 184 raise error.Abort(k, hint="please check your locale settings")
183 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" 187 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
184 and "WFA" or "WF") 188 and "WFA" or "WF")
185 189
186 def colwidth(s): 190 def colwidth(s):
187 "Find the column width of a string for display in the local encoding" 191 "Find the column width of a string for display in the local encoding"
188 return ucolwidth(s.decode(encoding, 'replace')) 192 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
189 193
190 def ucolwidth(d): 194 def ucolwidth(d):
191 "Find the column width of a Unicode string for display" 195 "Find the column width of a Unicode string for display"
192 eaw = getattr(unicodedata, 'east_asian_width', None) 196 eaw = getattr(unicodedata, 'east_asian_width', None)
193 if eaw is not None: 197 if eaw is not None:
263 +++ 267 +++
264 >>> print trim(t, 1, ellipsis=ellipsis) 268 >>> print trim(t, 1, ellipsis=ellipsis)
265 + 269 +
266 """ 270 """
267 try: 271 try:
268 u = s.decode(encoding) 272 u = s.decode(_sysstr(encoding))
269 except UnicodeDecodeError: 273 except UnicodeDecodeError:
270 if len(s) <= width: # trimming is not needed 274 if len(s) <= width: # trimming is not needed
271 return s 275 return s
272 width -= len(ellipsis) 276 width -= len(ellipsis)
273 if width <= 0: # no enough room even for ellipsis 277 if width <= 0: # no enough room even for ellipsis
290 uslice = lambda i: u[:-i] 294 uslice = lambda i: u[:-i]
291 concat = lambda s: s + ellipsis 295 concat = lambda s: s + ellipsis
292 for i in xrange(1, len(u)): 296 for i in xrange(1, len(u)):
293 usub = uslice(i) 297 usub = uslice(i)
294 if ucolwidth(usub) <= width: 298 if ucolwidth(usub) <= width:
295 return concat(usub.encode(encoding)) 299 return concat(usub.encode(_sysstr(encoding)))
296 return ellipsis # no enough room for multi-column characters 300 return ellipsis # no enough room for multi-column characters
297 301
298 def _asciilower(s): 302 def _asciilower(s):
299 '''convert a string to lowercase if ASCII 303 '''convert a string to lowercase if ASCII
300 304
335 pass 339 pass
336 try: 340 try:
337 if isinstance(s, localstr): 341 if isinstance(s, localstr):
338 u = s._utf8.decode("utf-8") 342 u = s._utf8.decode("utf-8")
339 else: 343 else:
340 u = s.decode(encoding, encodingmode) 344 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
341 345
342 lu = u.lower() 346 lu = u.lower()
343 if u == lu: 347 if u == lu:
344 return s # preserve localstring 348 return s # preserve localstring
345 return lu.encode(encoding) 349 return lu.encode(_sysstr(encoding))
346 except UnicodeError: 350 except UnicodeError:
347 return s.lower() # we don't know how to fold this except in ASCII 351 return s.lower() # we don't know how to fold this except in ASCII
348 except LookupError as k: 352 except LookupError as k:
349 raise error.Abort(k, hint="please check your locale settings") 353 raise error.Abort(k, hint="please check your locale settings")
350 354
358 def upperfallback(s): 362 def upperfallback(s):
359 try: 363 try:
360 if isinstance(s, localstr): 364 if isinstance(s, localstr):
361 u = s._utf8.decode("utf-8") 365 u = s._utf8.decode("utf-8")
362 else: 366 else:
363 u = s.decode(encoding, encodingmode) 367 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
364 368
365 uu = u.upper() 369 uu = u.upper()
366 if u == uu: 370 if u == uu:
367 return s # preserve localstring 371 return s # preserve localstring
368 return uu.encode(encoding) 372 return uu.encode(_sysstr(encoding))
369 except UnicodeError: 373 except UnicodeError:
370 return s.upper() # we don't know how to fold this except in ASCII 374 return s.upper() # we don't know how to fold this except in ASCII
371 except LookupError as k: 375 except LookupError as k:
372 raise error.Abort(k, hint="please check your locale settings") 376 raise error.Abort(k, hint="please check your locale settings")
373 377