Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/encoding.py @ 43544:2ade00f3b03b
encoding: add comment-based type hints for pytype
Differential Revision: https://phab.mercurial-scm.org/D7275
author | Augie Fackler <augie@google.com> |
---|---|
date | Wed, 06 Nov 2019 14:48:34 -0500 |
parents | 5f2a8dabb0d8 |
children | 313e3a279828 |
comparison
equal
deleted
inserted
replaced
43543:daade078f1f0 | 43544:2ade00f3b03b |
---|---|
17 policy, | 17 policy, |
18 pycompat, | 18 pycompat, |
19 ) | 19 ) |
20 | 20 |
21 from .pure import charencode as charencodepure | 21 from .pure import charencode as charencodepure |
22 | |
23 if not globals(): # hide this from non-pytype users | |
24 from typing import ( | |
25 Any, | |
26 Callable, | |
27 List, | |
28 Text, | |
29 Type, | |
30 TypeVar, | |
31 Union, | |
32 ) | |
33 | |
34 # keep pyflakes happy | |
35 for t in (Any, Callable, List, Text, Type, Union): | |
36 assert t | |
37 | |
38 _Tlocalstr = TypeVar('_Tlocalstr', bound=localstr) | |
22 | 39 |
23 charencode = policy.importmod(r'charencode') | 40 charencode = policy.importmod(r'charencode') |
24 | 41 |
25 isasciistr = charencode.isasciistr | 42 isasciistr = charencode.isasciistr |
26 asciilower = charencode.asciilower | 43 asciilower = charencode.asciilower |
43 # verify the next function will work | 60 # verify the next function will work |
44 assert all(i.startswith((b"\xe2", b"\xef")) for i in _ignore) | 61 assert all(i.startswith((b"\xe2", b"\xef")) for i in _ignore) |
45 | 62 |
46 | 63 |
47 def hfsignoreclean(s): | 64 def hfsignoreclean(s): |
65 # type: (bytes) -> bytes | |
48 """Remove codepoints ignored by HFS+ from s. | 66 """Remove codepoints ignored by HFS+ from s. |
49 | 67 |
50 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) | 68 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) |
51 '.hg' | 69 '.hg' |
52 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8')) | 70 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8')) |
97 class localstr(bytes): | 115 class localstr(bytes): |
98 '''This class allows strings that are unmodified to be | 116 '''This class allows strings that are unmodified to be |
99 round-tripped to the local encoding and back''' | 117 round-tripped to the local encoding and back''' |
100 | 118 |
101 def __new__(cls, u, l): | 119 def __new__(cls, u, l): |
120 # type: (Type[_Tlocalstr], Text, bytes) -> _Tlocalstr | |
102 s = bytes.__new__(cls, l) | 121 s = bytes.__new__(cls, l) |
103 s._utf8 = u | 122 s._utf8 = u |
104 return s | 123 return s |
105 | 124 |
106 def __hash__(self): | 125 def __hash__(self): |
117 >>> assert safelocalstr(b'\\xc3') in {b'\\xc3': 0} | 136 >>> assert safelocalstr(b'\\xc3') in {b'\\xc3': 0} |
118 """ | 137 """ |
119 | 138 |
120 | 139 |
121 def tolocal(s): | 140 def tolocal(s): |
141 # type: (Text) -> bytes | |
122 """ | 142 """ |
123 Convert a string from internal UTF-8 to local encoding | 143 Convert a string from internal UTF-8 to local encoding |
124 | 144 |
125 All internal strings should be UTF-8 but some repos before the | 145 All internal strings should be UTF-8 but some repos before the |
126 implementation of locale support may contain latin1 or possibly | 146 implementation of locale support may contain latin1 or possibly |
183 except LookupError as k: | 203 except LookupError as k: |
184 raise error.Abort(k, hint=b"please check your locale settings") | 204 raise error.Abort(k, hint=b"please check your locale settings") |
185 | 205 |
186 | 206 |
187 def fromlocal(s): | 207 def fromlocal(s): |
208 # type: (bytes) -> Text | |
188 """ | 209 """ |
189 Convert a string from the local character encoding to UTF-8 | 210 Convert a string from the local character encoding to UTF-8 |
190 | 211 |
191 We attempt to decode strings using the encoding mode set by | 212 We attempt to decode strings using the encoding mode set by |
192 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown | 213 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown |
212 except LookupError as k: | 233 except LookupError as k: |
213 raise error.Abort(k, hint=b"please check your locale settings") | 234 raise error.Abort(k, hint=b"please check your locale settings") |
214 | 235 |
215 | 236 |
216 def unitolocal(u): | 237 def unitolocal(u): |
238 # type: (Text) -> bytes | |
217 """Convert a unicode string to a byte string of local encoding""" | 239 """Convert a unicode string to a byte string of local encoding""" |
218 return tolocal(u.encode('utf-8')) | 240 return tolocal(u.encode('utf-8')) |
219 | 241 |
220 | 242 |
221 def unifromlocal(s): | 243 def unifromlocal(s): |
244 # type: (bytes) -> Text | |
222 """Convert a byte string of local encoding to a unicode string""" | 245 """Convert a byte string of local encoding to a unicode string""" |
223 return fromlocal(s).decode('utf-8') | 246 return fromlocal(s).decode('utf-8') |
224 | 247 |
225 | 248 |
226 def unimethod(bytesfunc): | 249 def unimethod(bytesfunc): |
250 # type: (Callable[[Any], bytes]) -> Callable[[Any], Text] | |
227 """Create a proxy method that forwards __unicode__() and __str__() of | 251 """Create a proxy method that forwards __unicode__() and __str__() of |
228 Python 3 to __bytes__()""" | 252 Python 3 to __bytes__()""" |
229 | 253 |
230 def unifunc(obj): | 254 def unifunc(obj): |
231 return unifromlocal(bytesfunc(obj)) | 255 return unifromlocal(bytesfunc(obj)) |
279 or b"WF" | 303 or b"WF" |
280 ) | 304 ) |
281 | 305 |
282 | 306 |
283 def colwidth(s): | 307 def colwidth(s): |
308 # type: (bytes) -> int | |
284 b"Find the column width of a string for display in the local encoding" | 309 b"Find the column width of a string for display in the local encoding" |
285 return ucolwidth(s.decode(_sysstr(encoding), r'replace')) | 310 return ucolwidth(s.decode(_sysstr(encoding), r'replace')) |
286 | 311 |
287 | 312 |
288 def ucolwidth(d): | 313 def ucolwidth(d): |
314 # type: (Text) -> int | |
289 b"Find the column width of a Unicode string for display" | 315 b"Find the column width of a Unicode string for display" |
290 eaw = getattr(unicodedata, 'east_asian_width', None) | 316 eaw = getattr(unicodedata, 'east_asian_width', None) |
291 if eaw is not None: | 317 if eaw is not None: |
292 return sum([eaw(c) in _wide and 2 or 1 for c in d]) | 318 return sum([eaw(c) in _wide and 2 or 1 for c in d]) |
293 return len(d) | 319 return len(d) |
294 | 320 |
295 | 321 |
296 def getcols(s, start, c): | 322 def getcols(s, start, c): |
323 # type: (bytes, int, int) -> bytes | |
297 '''Use colwidth to find a c-column substring of s starting at byte | 324 '''Use colwidth to find a c-column substring of s starting at byte |
298 index start''' | 325 index start''' |
299 for x in pycompat.xrange(start + c, len(s)): | 326 for x in pycompat.xrange(start + c, len(s)): |
300 t = s[start:x] | 327 t = s[start:x] |
301 if colwidth(t) == c: | 328 if colwidth(t) == c: |
302 return t | 329 return t |
303 | 330 |
304 | 331 |
305 def trim(s, width, ellipsis=b'', leftside=False): | 332 def trim(s, width, ellipsis=b'', leftside=False): |
333 # type: (bytes, int, bytes, bool) -> bytes | |
306 """Trim string 's' to at most 'width' columns (including 'ellipsis'). | 334 """Trim string 's' to at most 'width' columns (including 'ellipsis'). |
307 | 335 |
308 If 'leftside' is True, left side of string 's' is trimmed. | 336 If 'leftside' is True, left side of string 's' is trimmed. |
309 'ellipsis' is always placed at trimmed side. | 337 'ellipsis' is always placed at trimmed side. |
310 | 338 |
398 return concat(usub.encode(_sysstr(encoding))) | 426 return concat(usub.encode(_sysstr(encoding))) |
399 return ellipsis # no enough room for multi-column characters | 427 return ellipsis # no enough room for multi-column characters |
400 | 428 |
401 | 429 |
402 def lower(s): | 430 def lower(s): |
431 # type: (bytes) -> bytes | |
403 b"best-effort encoding-aware case-folding of local string s" | 432 b"best-effort encoding-aware case-folding of local string s" |
404 try: | 433 try: |
405 return asciilower(s) | 434 return asciilower(s) |
406 except UnicodeDecodeError: | 435 except UnicodeDecodeError: |
407 pass | 436 pass |
420 except LookupError as k: | 449 except LookupError as k: |
421 raise error.Abort(k, hint=b"please check your locale settings") | 450 raise error.Abort(k, hint=b"please check your locale settings") |
422 | 451 |
423 | 452 |
424 def upper(s): | 453 def upper(s): |
454 # type: (bytes) -> bytes | |
425 b"best-effort encoding-aware case-folding of local string s" | 455 b"best-effort encoding-aware case-folding of local string s" |
426 try: | 456 try: |
427 return asciiupper(s) | 457 return asciiupper(s) |
428 except UnicodeDecodeError: | 458 except UnicodeDecodeError: |
429 return upperfallback(s) | 459 return upperfallback(s) |
430 | 460 |
431 | 461 |
432 def upperfallback(s): | 462 def upperfallback(s): |
463 # type: (Any) -> Any | |
433 try: | 464 try: |
434 if isinstance(s, localstr): | 465 if isinstance(s, localstr): |
435 u = s._utf8.decode("utf-8") | 466 u = s._utf8.decode("utf-8") |
436 else: | 467 else: |
437 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) | 468 u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) |
462 upper = 1 | 493 upper = 1 |
463 other = 0 | 494 other = 0 |
464 | 495 |
465 | 496 |
466 def jsonescape(s, paranoid=False): | 497 def jsonescape(s, paranoid=False): |
498 # type: (Any, Any) -> Any | |
467 '''returns a string suitable for JSON | 499 '''returns a string suitable for JSON |
468 | 500 |
469 JSON is problematic for us because it doesn't support non-Unicode | 501 JSON is problematic for us because it doesn't support non-Unicode |
470 bytes. To deal with this, we take the following approach: | 502 bytes. To deal with this, we take the following approach: |
471 | 503 |
525 | 557 |
526 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | 558 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
527 | 559 |
528 | 560 |
529 def getutf8char(s, pos): | 561 def getutf8char(s, pos): |
562 # type: (Any, Any) -> Any | |
530 '''get the next full utf-8 character in the given string, starting at pos | 563 '''get the next full utf-8 character in the given string, starting at pos |
531 | 564 |
532 Raises a UnicodeError if the given location does not start a valid | 565 Raises a UnicodeError if the given location does not start a valid |
533 utf-8 character. | 566 utf-8 character. |
534 ''' | 567 ''' |
543 c.decode("utf-8", _utf8strict) | 576 c.decode("utf-8", _utf8strict) |
544 return c | 577 return c |
545 | 578 |
546 | 579 |
547 def toutf8b(s): | 580 def toutf8b(s): |
581 # type: (Any) -> Any | |
548 '''convert a local, possibly-binary string into UTF-8b | 582 '''convert a local, possibly-binary string into UTF-8b |
549 | 583 |
550 This is intended as a generic method to preserve data when working | 584 This is intended as a generic method to preserve data when working |
551 with schemes like JSON and XML that have no provision for | 585 with schemes like JSON and XML that have no provision for |
552 arbitrary byte strings. As Mercurial often doesn't know | 586 arbitrary byte strings. As Mercurial often doesn't know |
611 r += c | 645 r += c |
612 return r | 646 return r |
613 | 647 |
614 | 648 |
615 def fromutf8b(s): | 649 def fromutf8b(s): |
650 # type: (Text) -> bytes | |
616 '''Given a UTF-8b string, return a local, possibly-binary string. | 651 '''Given a UTF-8b string, return a local, possibly-binary string. |
617 | 652 |
618 return the original binary string. This | 653 return the original binary string. This |
619 is a round-trip process for strings like filenames, but metadata | 654 is a round-trip process for strings like filenames, but metadata |
620 that's was passed through tolocal will remain in UTF-8. | 655 that's was passed through tolocal will remain in UTF-8. |