equal
deleted
inserted
replaced
492 arbitrary bytes into an internal Unicode format that can be |
492 arbitrary bytes into an internal Unicode format that can be |
493 re-encoded back into the original. Here we are exposing the |
493 re-encoded back into the original. Here we are exposing the |
494 internal surrogate encoding as a UTF-8 string.) |
494 internal surrogate encoding as a UTF-8 string.) |
495 ''' |
495 ''' |
496 |
496 |
|
497 if not isinstance(s, localstr) and isasciistr(s): |
|
498 return s |
497 if "\xed" not in s: |
499 if "\xed" not in s: |
498 if isinstance(s, localstr): |
500 if isinstance(s, localstr): |
499 return s._utf8 |
501 return s._utf8 |
500 try: |
502 try: |
501 s.decode('utf-8') |
503 s.decode('utf-8') |
542 True |
544 True |
543 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80") |
545 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80") |
544 True |
546 True |
545 ''' |
547 ''' |
546 |
548 |
|
549 if isasciistr(s): |
|
550 return s |
547 # fast path - look for uDxxx prefixes in s |
551 # fast path - look for uDxxx prefixes in s |
548 if "\xed" not in s: |
552 if "\xed" not in s: |
549 return s |
553 return s |
550 |
554 |
551 # We could do this with the unicode type but some Python builds |
555 # We could do this with the unicode type but some Python builds |