Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/encoding.py @ 30033:02dbfaa6df0b
py3: convert encoding name and mode to str
Otherwise tolocal() and fromlocal() wouldn't work on Python 3. Still tolocal()
can't make a valid localstr object because localstr inherits str, but it can
return some object without raising exceptions.
Since Py3 bytes() behaves much like bytearray() than str() of Py2, we can't
simply do s/str/bytes/g. I have no good idea to handle str/bytes divergence.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Wed, 28 Sep 2016 20:39:06 +0900 |
parents | 0f6d6fdd3c2a |
children | e4a6b439acc5 |
comparison
equal
deleted
inserted
replaced
30032:2219f4f82ede | 30033:02dbfaa6df0b |
---|---|
14 | 14 |
15 from . import ( | 15 from . import ( |
16 error, | 16 error, |
17 pycompat, | 17 pycompat, |
18 ) | 18 ) |
19 | |
20 _sysstr = pycompat.sysstr | |
19 | 21 |
20 if pycompat.ispy3: | 22 if pycompat.ispy3: |
21 unichr = chr | 23 unichr = chr |
22 | 24 |
23 # These unicode characters are ignored by HFS+ (Apple Technote 1150, | 25 # These unicode characters are ignored by HFS+ (Apple Technote 1150, |
134 # make sure string is actually stored in UTF-8 | 136 # make sure string is actually stored in UTF-8 |
135 u = s.decode('UTF-8') | 137 u = s.decode('UTF-8') |
136 if encoding == 'UTF-8': | 138 if encoding == 'UTF-8': |
137 # fast path | 139 # fast path |
138 return s | 140 return s |
139 r = u.encode(encoding, "replace") | 141 r = u.encode(_sysstr(encoding), u"replace") |
140 if u == r.decode(encoding): | 142 if u == r.decode(_sysstr(encoding)): |
141 # r is a safe, non-lossy encoding of s | 143 # r is a safe, non-lossy encoding of s |
142 return r | 144 return r |
143 return localstr(s, r) | 145 return localstr(s, r) |
144 except UnicodeDecodeError: | 146 except UnicodeDecodeError: |
145 # we should only get here if we're looking at an ancient changeset | 147 # we should only get here if we're looking at an ancient changeset |
146 try: | 148 try: |
147 u = s.decode(fallbackencoding) | 149 u = s.decode(_sysstr(fallbackencoding)) |
148 r = u.encode(encoding, "replace") | 150 r = u.encode(_sysstr(encoding), u"replace") |
149 if u == r.decode(encoding): | 151 if u == r.decode(_sysstr(encoding)): |
150 # r is a safe, non-lossy encoding of s | 152 # r is a safe, non-lossy encoding of s |
151 return r | 153 return r |
152 return localstr(u.encode('UTF-8'), r) | 154 return localstr(u.encode('UTF-8'), r) |
153 except UnicodeDecodeError: | 155 except UnicodeDecodeError: |
154 u = s.decode("utf-8", "replace") # last ditch | 156 u = s.decode("utf-8", "replace") # last ditch |
155 return u.encode(encoding, "replace") # can't round-trip | 157 # can't round-trip |
158 return u.encode(_sysstr(encoding), u"replace") | |
156 except LookupError as k: | 159 except LookupError as k: |
157 raise error.Abort(k, hint="please check your locale settings") | 160 raise error.Abort(k, hint="please check your locale settings") |
158 | 161 |
159 def fromlocal(s): | 162 def fromlocal(s): |
160 """ | 163 """ |
170 # can we do a lossless round-trip? | 173 # can we do a lossless round-trip? |
171 if isinstance(s, localstr): | 174 if isinstance(s, localstr): |
172 return s._utf8 | 175 return s._utf8 |
173 | 176 |
174 try: | 177 try: |
175 return s.decode(encoding, encodingmode).encode("utf-8") | 178 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
179 return u.encode("utf-8") | |
176 except UnicodeDecodeError as inst: | 180 except UnicodeDecodeError as inst: |
177 sub = s[max(0, inst.start - 10):inst.start + 10] | 181 sub = s[max(0, inst.start - 10):inst.start + 10] |
178 raise error.Abort("decoding near '%s': %s!" % (sub, inst)) | 182 raise error.Abort("decoding near '%s': %s!" % (sub, inst)) |
179 except LookupError as k: | 183 except LookupError as k: |
180 raise error.Abort(k, hint="please check your locale settings") | 184 raise error.Abort(k, hint="please check your locale settings") |
183 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" | 187 wide = (os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" |
184 and "WFA" or "WF") | 188 and "WFA" or "WF") |
185 | 189 |
186 def colwidth(s): | 190 def colwidth(s): |
187 "Find the column width of a string for display in the local encoding" | 191 "Find the column width of a string for display in the local encoding" |
188 return ucolwidth(s.decode(encoding, 'replace')) | 192 return ucolwidth(s.decode(_sysstr(encoding), u'replace')) |
189 | 193 |
190 def ucolwidth(d): | 194 def ucolwidth(d): |
191 "Find the column width of a Unicode string for display" | 195 "Find the column width of a Unicode string for display" |
192 eaw = getattr(unicodedata, 'east_asian_width', None) | 196 eaw = getattr(unicodedata, 'east_asian_width', None) |
193 if eaw is not None: | 197 if eaw is not None: |
263 +++ | 267 +++ |
264 >>> print trim(t, 1, ellipsis=ellipsis) | 268 >>> print trim(t, 1, ellipsis=ellipsis) |
265 + | 269 + |
266 """ | 270 """ |
267 try: | 271 try: |
268 u = s.decode(encoding) | 272 u = s.decode(_sysstr(encoding)) |
269 except UnicodeDecodeError: | 273 except UnicodeDecodeError: |
270 if len(s) <= width: # trimming is not needed | 274 if len(s) <= width: # trimming is not needed |
271 return s | 275 return s |
272 width -= len(ellipsis) | 276 width -= len(ellipsis) |
273 if width <= 0: # no enough room even for ellipsis | 277 if width <= 0: # no enough room even for ellipsis |
290 uslice = lambda i: u[:-i] | 294 uslice = lambda i: u[:-i] |
291 concat = lambda s: s + ellipsis | 295 concat = lambda s: s + ellipsis |
292 for i in xrange(1, len(u)): | 296 for i in xrange(1, len(u)): |
293 usub = uslice(i) | 297 usub = uslice(i) |
294 if ucolwidth(usub) <= width: | 298 if ucolwidth(usub) <= width: |
295 return concat(usub.encode(encoding)) | 299 return concat(usub.encode(_sysstr(encoding))) |
296 return ellipsis # no enough room for multi-column characters | 300 return ellipsis # no enough room for multi-column characters |
297 | 301 |
298 def _asciilower(s): | 302 def _asciilower(s): |
299 '''convert a string to lowercase if ASCII | 303 '''convert a string to lowercase if ASCII |
300 | 304 |
335 pass | 339 pass |
336 try: | 340 try: |
337 if isinstance(s, localstr): | 341 if isinstance(s, localstr): |
338 u = s._utf8.decode("utf-8") | 342 u = s._utf8.decode("utf-8") |
339 else: | 343 else: |
340 u = s.decode(encoding, encodingmode) | 344 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
341 | 345 |
342 lu = u.lower() | 346 lu = u.lower() |
343 if u == lu: | 347 if u == lu: |
344 return s # preserve localstring | 348 return s # preserve localstring |
345 return lu.encode(encoding) | 349 return lu.encode(_sysstr(encoding)) |
346 except UnicodeError: | 350 except UnicodeError: |
347 return s.lower() # we don't know how to fold this except in ASCII | 351 return s.lower() # we don't know how to fold this except in ASCII |
348 except LookupError as k: | 352 except LookupError as k: |
349 raise error.Abort(k, hint="please check your locale settings") | 353 raise error.Abort(k, hint="please check your locale settings") |
350 | 354 |
358 def upperfallback(s): | 362 def upperfallback(s): |
359 try: | 363 try: |
360 if isinstance(s, localstr): | 364 if isinstance(s, localstr): |
361 u = s._utf8.decode("utf-8") | 365 u = s._utf8.decode("utf-8") |
362 else: | 366 else: |
363 u = s.decode(encoding, encodingmode) | 367 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
364 | 368 |
365 uu = u.upper() | 369 uu = u.upper() |
366 if u == uu: | 370 if u == uu: |
367 return s # preserve localstring | 371 return s # preserve localstring |
368 return uu.encode(encoding) | 372 return uu.encode(_sysstr(encoding)) |
369 except UnicodeError: | 373 except UnicodeError: |
370 return s.upper() # we don't know how to fold this except in ASCII | 374 return s.upper() # we don't know how to fold this except in ASCII |
371 except LookupError as k: | 375 except LookupError as k: |
372 raise error.Abort(k, hint="please check your locale settings") | 376 raise error.Abort(k, hint="please check your locale settings") |
373 | 377 |