Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/encoding.py @ 43076:2372284d9457
formatting: blacken the codebase
This is using my patch to black
(https://github.com/psf/black/pull/826) so we don't un-wrap collection
literals.
Done with:
hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S
# skip-blame mass-reformatting only
# no-check-commit reformats foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D6971
author | Augie Fackler <augie@google.com> |
---|---|
date | Sun, 06 Oct 2019 09:45:02 -0400 |
parents | 25694a78e4a4 |
children | 687b865b95ad |
comparison
equal
deleted
inserted
replaced
43075:57875cf423c9 | 43076:2372284d9457 |
---|---|
15 error, | 15 error, |
16 policy, | 16 policy, |
17 pycompat, | 17 pycompat, |
18 ) | 18 ) |
19 | 19 |
20 from .pure import ( | 20 from .pure import charencode as charencodepure |
21 charencode as charencodepure, | |
22 ) | |
23 | 21 |
24 charencode = policy.importmod(r'charencode') | 22 charencode = policy.importmod(r'charencode') |
25 | 23 |
26 isasciistr = charencode.isasciistr | 24 isasciistr = charencode.isasciistr |
27 asciilower = charencode.asciilower | 25 asciilower = charencode.asciilower |
34 unichr = chr | 32 unichr = chr |
35 | 33 |
36 # These unicode characters are ignored by HFS+ (Apple Technote 1150, | 34 # These unicode characters are ignored by HFS+ (Apple Technote 1150, |
37 # "Unicode Subtleties"), so we need to ignore them in some places for | 35 # "Unicode Subtleties"), so we need to ignore them in some places for |
38 # sanity. | 36 # sanity. |
39 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in | 37 _ignore = [ |
40 "200c 200d 200e 200f 202a 202b 202c 202d 202e " | 38 unichr(int(x, 16)).encode("utf-8") |
41 "206a 206b 206c 206d 206e 206f feff".split()] | 39 for x in "200c 200d 200e 200f 202a 202b 202c 202d 202e " |
40 "206a 206b 206c 206d 206e 206f feff".split() | |
41 ] | |
42 # verify the next function will work | 42 # verify the next function will work |
43 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore) | 43 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore) |
44 | |
44 | 45 |
45 def hfsignoreclean(s): | 46 def hfsignoreclean(s): |
46 """Remove codepoints ignored by HFS+ from s. | 47 """Remove codepoints ignored by HFS+ from s. |
47 | 48 |
48 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) | 49 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) |
53 if "\xe2" in s or "\xef" in s: | 54 if "\xe2" in s or "\xef" in s: |
54 for c in _ignore: | 55 for c in _ignore: |
55 s = s.replace(c, '') | 56 s = s.replace(c, '') |
56 return s | 57 return s |
57 | 58 |
59 | |
58 # encoding.environ is provided read-only, which may not be used to modify | 60 # encoding.environ is provided read-only, which may not be used to modify |
59 # the process environment | 61 # the process environment |
60 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ) | 62 _nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ |
61 if not pycompat.ispy3: | 63 if not pycompat.ispy3: |
62 environ = os.environ # re-exports | 64 environ = os.environ # re-exports |
63 elif _nativeenviron: | 65 elif _nativeenviron: |
64 environ = os.environb # re-exports | 66 environ = os.environb # re-exports |
65 else: | 67 else: |
66 # preferred encoding isn't known yet; use utf-8 to avoid unicode error | 68 # preferred encoding isn't known yet; use utf-8 to avoid unicode error |
67 # and recreate it once encoding is settled | 69 # and recreate it once encoding is settled |
68 environ = dict((k.encode(r'utf-8'), v.encode(r'utf-8')) | 70 environ = dict( |
69 for k, v in os.environ.items()) # re-exports | 71 (k.encode(r'utf-8'), v.encode(r'utf-8')) |
72 for k, v in os.environ.items() # re-exports | |
73 ) | |
70 | 74 |
71 _encodingrewrites = { | 75 _encodingrewrites = { |
72 '646': 'ascii', | 76 '646': 'ascii', |
73 'ANSI_X3.4-1968': 'ascii', | 77 'ANSI_X3.4-1968': 'ascii', |
74 } | 78 } |
86 except locale.Error: | 90 except locale.Error: |
87 encoding = 'ascii' | 91 encoding = 'ascii' |
88 encodingmode = environ.get("HGENCODINGMODE", "strict") | 92 encodingmode = environ.get("HGENCODINGMODE", "strict") |
89 fallbackencoding = 'ISO-8859-1' | 93 fallbackencoding = 'ISO-8859-1' |
90 | 94 |
95 | |
91 class localstr(bytes): | 96 class localstr(bytes): |
92 '''This class allows strings that are unmodified to be | 97 '''This class allows strings that are unmodified to be |
93 round-tripped to the local encoding and back''' | 98 round-tripped to the local encoding and back''' |
99 | |
94 def __new__(cls, u, l): | 100 def __new__(cls, u, l): |
95 s = bytes.__new__(cls, l) | 101 s = bytes.__new__(cls, l) |
96 s._utf8 = u | 102 s._utf8 = u |
97 return s | 103 return s |
104 | |
98 def __hash__(self): | 105 def __hash__(self): |
99 return hash(self._utf8) # avoid collisions in local string space | 106 return hash(self._utf8) # avoid collisions in local string space |
107 | |
100 | 108 |
101 class safelocalstr(bytes): | 109 class safelocalstr(bytes): |
102 """Tagged string denoting it was previously an internal UTF-8 string, | 110 """Tagged string denoting it was previously an internal UTF-8 string, |
103 and can be converted back to UTF-8 losslessly | 111 and can be converted back to UTF-8 losslessly |
104 | 112 |
105 >>> assert safelocalstr(b'\\xc3') == b'\\xc3' | 113 >>> assert safelocalstr(b'\\xc3') == b'\\xc3' |
106 >>> assert b'\\xc3' == safelocalstr(b'\\xc3') | 114 >>> assert b'\\xc3' == safelocalstr(b'\\xc3') |
107 >>> assert b'\\xc3' in {safelocalstr(b'\\xc3'): 0} | 115 >>> assert b'\\xc3' in {safelocalstr(b'\\xc3'): 0} |
108 >>> assert safelocalstr(b'\\xc3') in {b'\\xc3': 0} | 116 >>> assert safelocalstr(b'\\xc3') in {b'\\xc3': 0} |
109 """ | 117 """ |
118 | |
110 | 119 |
111 def tolocal(s): | 120 def tolocal(s): |
112 """ | 121 """ |
113 Convert a string from internal UTF-8 to local encoding | 122 Convert a string from internal UTF-8 to local encoding |
114 | 123 |
165 if u == r.decode(_sysstr(encoding)): | 174 if u == r.decode(_sysstr(encoding)): |
166 # r is a safe, non-lossy encoding of s | 175 # r is a safe, non-lossy encoding of s |
167 return safelocalstr(r) | 176 return safelocalstr(r) |
168 return localstr(u.encode('UTF-8'), r) | 177 return localstr(u.encode('UTF-8'), r) |
169 except UnicodeDecodeError: | 178 except UnicodeDecodeError: |
170 u = s.decode("utf-8", "replace") # last ditch | 179 u = s.decode("utf-8", "replace") # last ditch |
171 # can't round-trip | 180 # can't round-trip |
172 return u.encode(_sysstr(encoding), r"replace") | 181 return u.encode(_sysstr(encoding), r"replace") |
173 except LookupError as k: | 182 except LookupError as k: |
174 raise error.Abort(k, hint="please check your locale settings") | 183 raise error.Abort(k, hint="please check your locale settings") |
184 | |
175 | 185 |
176 def fromlocal(s): | 186 def fromlocal(s): |
177 """ | 187 """ |
178 Convert a string from the local character encoding to UTF-8 | 188 Convert a string from the local character encoding to UTF-8 |
179 | 189 |
192 | 202 |
193 try: | 203 try: |
194 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) | 204 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
195 return u.encode("utf-8") | 205 return u.encode("utf-8") |
196 except UnicodeDecodeError as inst: | 206 except UnicodeDecodeError as inst: |
197 sub = s[max(0, inst.start - 10):inst.start + 10] | 207 sub = s[max(0, inst.start - 10) : inst.start + 10] |
198 raise error.Abort("decoding near '%s': %s!" | 208 raise error.Abort( |
199 % (sub, pycompat.bytestr(inst))) | 209 "decoding near '%s': %s!" % (sub, pycompat.bytestr(inst)) |
210 ) | |
200 except LookupError as k: | 211 except LookupError as k: |
201 raise error.Abort(k, hint="please check your locale settings") | 212 raise error.Abort(k, hint="please check your locale settings") |
213 | |
202 | 214 |
203 def unitolocal(u): | 215 def unitolocal(u): |
204 """Convert a unicode string to a byte string of local encoding""" | 216 """Convert a unicode string to a byte string of local encoding""" |
205 return tolocal(u.encode('utf-8')) | 217 return tolocal(u.encode('utf-8')) |
206 | 218 |
219 | |
207 def unifromlocal(s): | 220 def unifromlocal(s): |
208 """Convert a byte string of local encoding to a unicode string""" | 221 """Convert a byte string of local encoding to a unicode string""" |
209 return fromlocal(s).decode('utf-8') | 222 return fromlocal(s).decode('utf-8') |
210 | 223 |
224 | |
211 def unimethod(bytesfunc): | 225 def unimethod(bytesfunc): |
212 """Create a proxy method that forwards __unicode__() and __str__() of | 226 """Create a proxy method that forwards __unicode__() and __str__() of |
213 Python 3 to __bytes__()""" | 227 Python 3 to __bytes__()""" |
228 | |
214 def unifunc(obj): | 229 def unifunc(obj): |
215 return unifromlocal(bytesfunc(obj)) | 230 return unifromlocal(bytesfunc(obj)) |
231 | |
216 return unifunc | 232 return unifunc |
233 | |
217 | 234 |
218 # converter functions between native str and byte string. use these if the | 235 # converter functions between native str and byte string. use these if the |
219 # character encoding is not aware (e.g. exception message) or is known to | 236 # character encoding is not aware (e.g. exception message) or is known to |
220 # be locale dependent (e.g. date formatting.) | 237 # be locale dependent (e.g. date formatting.) |
221 if pycompat.ispy3: | 238 if pycompat.ispy3: |
228 strmethod = pycompat.identity | 245 strmethod = pycompat.identity |
229 | 246 |
230 if not _nativeenviron: | 247 if not _nativeenviron: |
231 # now encoding and helper functions are available, recreate the environ | 248 # now encoding and helper functions are available, recreate the environ |
232 # dict to be exported to other modules | 249 # dict to be exported to other modules |
233 environ = dict((tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8'))) | 250 environ = dict( |
234 for k, v in os.environ.items()) # re-exports | 251 (tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8'))) |
252 for k, v in os.environ.items() # re-exports | |
253 ) | |
235 | 254 |
236 if pycompat.ispy3: | 255 if pycompat.ispy3: |
237 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which | 256 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which |
238 # returns bytes. | 257 # returns bytes. |
239 if pycompat.iswindows: | 258 if pycompat.iswindows: |
244 getcwd = os.getcwdb # re-exports | 263 getcwd = os.getcwdb # re-exports |
245 else: | 264 else: |
246 getcwd = os.getcwd # re-exports | 265 getcwd = os.getcwd # re-exports |
247 | 266 |
248 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. | 267 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. |
249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" | 268 _wide = _sysstr( |
250 and "WFA" or "WF") | 269 environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" and "WFA" or "WF" |
270 ) | |
271 | |
251 | 272 |
252 def colwidth(s): | 273 def colwidth(s): |
253 "Find the column width of a string for display in the local encoding" | 274 "Find the column width of a string for display in the local encoding" |
254 return ucolwidth(s.decode(_sysstr(encoding), r'replace')) | 275 return ucolwidth(s.decode(_sysstr(encoding), r'replace')) |
276 | |
255 | 277 |
256 def ucolwidth(d): | 278 def ucolwidth(d): |
257 "Find the column width of a Unicode string for display" | 279 "Find the column width of a Unicode string for display" |
258 eaw = getattr(unicodedata, 'east_asian_width', None) | 280 eaw = getattr(unicodedata, 'east_asian_width', None) |
259 if eaw is not None: | 281 if eaw is not None: |
260 return sum([eaw(c) in _wide and 2 or 1 for c in d]) | 282 return sum([eaw(c) in _wide and 2 or 1 for c in d]) |
261 return len(d) | 283 return len(d) |
284 | |
262 | 285 |
263 def getcols(s, start, c): | 286 def getcols(s, start, c): |
264 '''Use colwidth to find a c-column substring of s starting at byte | 287 '''Use colwidth to find a c-column substring of s starting at byte |
265 index start''' | 288 index start''' |
266 for x in pycompat.xrange(start + c, len(s)): | 289 for x in pycompat.xrange(start + c, len(s)): |
267 t = s[start:x] | 290 t = s[start:x] |
268 if colwidth(t) == c: | 291 if colwidth(t) == c: |
269 return t | 292 return t |
293 | |
270 | 294 |
271 def trim(s, width, ellipsis='', leftside=False): | 295 def trim(s, width, ellipsis='', leftside=False): |
272 """Trim string 's' to at most 'width' columns (including 'ellipsis'). | 296 """Trim string 's' to at most 'width' columns (including 'ellipsis'). |
273 | 297 |
274 If 'leftside' is True, left side of string 's' is trimmed. | 298 If 'leftside' is True, left side of string 's' is trimmed. |
334 + | 358 + |
335 """ | 359 """ |
336 try: | 360 try: |
337 u = s.decode(_sysstr(encoding)) | 361 u = s.decode(_sysstr(encoding)) |
338 except UnicodeDecodeError: | 362 except UnicodeDecodeError: |
339 if len(s) <= width: # trimming is not needed | 363 if len(s) <= width: # trimming is not needed |
340 return s | 364 return s |
341 width -= len(ellipsis) | 365 width -= len(ellipsis) |
342 if width <= 0: # no enough room even for ellipsis | 366 if width <= 0: # no enough room even for ellipsis |
343 return ellipsis[:width + len(ellipsis)] | 367 return ellipsis[: width + len(ellipsis)] |
344 if leftside: | 368 if leftside: |
345 return ellipsis + s[-width:] | 369 return ellipsis + s[-width:] |
346 return s[:width] + ellipsis | 370 return s[:width] + ellipsis |
347 | 371 |
348 if ucolwidth(u) <= width: # trimming is not needed | 372 if ucolwidth(u) <= width: # trimming is not needed |
349 return s | 373 return s |
350 | 374 |
351 width -= len(ellipsis) | 375 width -= len(ellipsis) |
352 if width <= 0: # no enough room even for ellipsis | 376 if width <= 0: # no enough room even for ellipsis |
353 return ellipsis[:width + len(ellipsis)] | 377 return ellipsis[: width + len(ellipsis)] |
354 | 378 |
355 if leftside: | 379 if leftside: |
356 uslice = lambda i: u[i:] | 380 uslice = lambda i: u[i:] |
357 concat = lambda s: ellipsis + s | 381 concat = lambda s: ellipsis + s |
358 else: | 382 else: |
360 concat = lambda s: s + ellipsis | 384 concat = lambda s: s + ellipsis |
361 for i in pycompat.xrange(1, len(u)): | 385 for i in pycompat.xrange(1, len(u)): |
362 usub = uslice(i) | 386 usub = uslice(i) |
363 if ucolwidth(usub) <= width: | 387 if ucolwidth(usub) <= width: |
364 return concat(usub.encode(_sysstr(encoding))) | 388 return concat(usub.encode(_sysstr(encoding))) |
365 return ellipsis # no enough room for multi-column characters | 389 return ellipsis # no enough room for multi-column characters |
390 | |
366 | 391 |
367 def lower(s): | 392 def lower(s): |
368 "best-effort encoding-aware case-folding of local string s" | 393 "best-effort encoding-aware case-folding of local string s" |
369 try: | 394 try: |
370 return asciilower(s) | 395 return asciilower(s) |
376 else: | 401 else: |
377 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) | 402 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
378 | 403 |
379 lu = u.lower() | 404 lu = u.lower() |
380 if u == lu: | 405 if u == lu: |
381 return s # preserve localstring | 406 return s # preserve localstring |
382 return lu.encode(_sysstr(encoding)) | 407 return lu.encode(_sysstr(encoding)) |
383 except UnicodeError: | 408 except UnicodeError: |
384 return s.lower() # we don't know how to fold this except in ASCII | 409 return s.lower() # we don't know how to fold this except in ASCII |
385 except LookupError as k: | 410 except LookupError as k: |
386 raise error.Abort(k, hint="please check your locale settings") | 411 raise error.Abort(k, hint="please check your locale settings") |
412 | |
387 | 413 |
388 def upper(s): | 414 def upper(s): |
389 "best-effort encoding-aware case-folding of local string s" | 415 "best-effort encoding-aware case-folding of local string s" |
390 try: | 416 try: |
391 return asciiupper(s) | 417 return asciiupper(s) |
392 except UnicodeDecodeError: | 418 except UnicodeDecodeError: |
393 return upperfallback(s) | 419 return upperfallback(s) |
420 | |
394 | 421 |
395 def upperfallback(s): | 422 def upperfallback(s): |
396 try: | 423 try: |
397 if isinstance(s, localstr): | 424 if isinstance(s, localstr): |
398 u = s._utf8.decode("utf-8") | 425 u = s._utf8.decode("utf-8") |
399 else: | 426 else: |
400 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) | 427 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
401 | 428 |
402 uu = u.upper() | 429 uu = u.upper() |
403 if u == uu: | 430 if u == uu: |
404 return s # preserve localstring | 431 return s # preserve localstring |
405 return uu.encode(_sysstr(encoding)) | 432 return uu.encode(_sysstr(encoding)) |
406 except UnicodeError: | 433 except UnicodeError: |
407 return s.upper() # we don't know how to fold this except in ASCII | 434 return s.upper() # we don't know how to fold this except in ASCII |
408 except LookupError as k: | 435 except LookupError as k: |
409 raise error.Abort(k, hint="please check your locale settings") | 436 raise error.Abort(k, hint="please check your locale settings") |
437 | |
410 | 438 |
411 class normcasespecs(object): | 439 class normcasespecs(object): |
412 '''what a platform's normcase does to ASCII strings | 440 '''what a platform's normcase does to ASCII strings |
413 | 441 |
414 This is specified per platform, and should be consistent with what normcase | 442 This is specified per platform, and should be consistent with what normcase |
417 lower: normcase lowercases ASCII strings | 445 lower: normcase lowercases ASCII strings |
418 upper: normcase uppercases ASCII strings | 446 upper: normcase uppercases ASCII strings |
419 other: the fallback function should always be called | 447 other: the fallback function should always be called |
420 | 448 |
421 This should be kept in sync with normcase_spec in util.h.''' | 449 This should be kept in sync with normcase_spec in util.h.''' |
450 | |
422 lower = -1 | 451 lower = -1 |
423 upper = 1 | 452 upper = 1 |
424 other = 0 | 453 other = 0 |
454 | |
425 | 455 |
426 def jsonescape(s, paranoid=False): | 456 def jsonescape(s, paranoid=False): |
427 '''returns a string suitable for JSON | 457 '''returns a string suitable for JSON |
428 | 458 |
429 JSON is problematic for us because it doesn't support non-Unicode | 459 JSON is problematic for us because it doesn't support non-Unicode |
473 return _jsonescapeu8fast(u8chars, paranoid) | 503 return _jsonescapeu8fast(u8chars, paranoid) |
474 except ValueError: | 504 except ValueError: |
475 pass | 505 pass |
476 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) | 506 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) |
477 | 507 |
508 | |
478 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 | 509 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 |
479 # bytes are mapped to that range. | 510 # bytes are mapped to that range. |
480 if pycompat.ispy3: | 511 if pycompat.ispy3: |
481 _utf8strict = r'surrogatepass' | 512 _utf8strict = r'surrogatepass' |
482 else: | 513 else: |
483 _utf8strict = r'strict' | 514 _utf8strict = r'strict' |
484 | 515 |
485 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | 516 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
486 | 517 |
518 | |
487 def getutf8char(s, pos): | 519 def getutf8char(s, pos): |
488 '''get the next full utf-8 character in the given string, starting at pos | 520 '''get the next full utf-8 character in the given string, starting at pos |
489 | 521 |
490 Raises a UnicodeError if the given location does not start a valid | 522 Raises a UnicodeError if the given location does not start a valid |
491 utf-8 character. | 523 utf-8 character. |
492 ''' | 524 ''' |
493 | 525 |
494 # find how many bytes to attempt decoding from first nibble | 526 # find how many bytes to attempt decoding from first nibble |
495 l = _utf8len[ord(s[pos:pos + 1]) >> 4] | 527 l = _utf8len[ord(s[pos : pos + 1]) >> 4] |
496 if not l: # ascii | 528 if not l: # ascii |
497 return s[pos:pos + 1] | 529 return s[pos : pos + 1] |
498 | 530 |
499 c = s[pos:pos + l] | 531 c = s[pos : pos + l] |
500 # validate with attempted decode | 532 # validate with attempted decode |
501 c.decode("utf-8", _utf8strict) | 533 c.decode("utf-8", _utf8strict) |
502 return c | 534 return c |
535 | |
503 | 536 |
504 def toutf8b(s): | 537 def toutf8b(s): |
505 '''convert a local, possibly-binary string into UTF-8b | 538 '''convert a local, possibly-binary string into UTF-8b |
506 | 539 |
507 This is intended as a generic method to preserve data when working | 540 This is intended as a generic method to preserve data when working |
556 while pos < l: | 589 while pos < l: |
557 try: | 590 try: |
558 c = getutf8char(s, pos) | 591 c = getutf8char(s, pos) |
559 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": | 592 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": |
560 # have to re-escape existing U+DCxx characters | 593 # have to re-escape existing U+DCxx characters |
561 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict) | 594 c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict) |
562 pos += 1 | 595 pos += 1 |
563 else: | 596 else: |
564 pos += len(c) | 597 pos += len(c) |
565 except UnicodeDecodeError: | 598 except UnicodeDecodeError: |
566 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict) | 599 c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict) |
567 pos += 1 | 600 pos += 1 |
568 r += c | 601 r += c |
569 return r | 602 return r |
603 | |
570 | 604 |
571 def fromutf8b(s): | 605 def fromutf8b(s): |
572 '''Given a UTF-8b string, return a local, possibly-binary string. | 606 '''Given a UTF-8b string, return a local, possibly-binary string. |
573 | 607 |
574 return the original binary string. This | 608 return the original binary string. This |
609 while pos < l: | 643 while pos < l: |
610 c = getutf8char(s, pos) | 644 c = getutf8char(s, pos) |
611 pos += len(c) | 645 pos += len(c) |
612 # unescape U+DCxx characters | 646 # unescape U+DCxx characters |
613 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": | 647 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": |
614 c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xff) | 648 c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xFF) |
615 r += c | 649 r += c |
616 return r | 650 return r |