equal
deleted
inserted
replaced
468 |
468 |
469 try: |
469 try: |
470 s.decode('utf-8') |
470 s.decode('utf-8') |
471 return s |
471 return s |
472 except UnicodeDecodeError: |
472 except UnicodeDecodeError: |
473 # surrogate-encode any characters that don't round-trip |
473 pass |
474 s2 = s.decode('utf-8', 'ignore').encode('utf-8') |
474 |
475 r = "" |
475 r = "" |
476 pos = 0 |
476 pos = 0 |
477 for c in s: |
477 l = len(s) |
478 if s2[pos:pos + 1] == c: |
478 while pos < l: |
479 r += c |
479 try: |
480 pos += 1 |
480 c = getutf8char(s, pos) |
481 else: |
481 pos += len(c) |
482 r += unichr(0xdc00 + ord(c)).encode('utf-8') |
482 except UnicodeDecodeError: |
483 return r |
483 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') |
|
484 pos += 1 |
|
485 r += c |
|
486 return r |
484 |
487 |
485 def fromutf8b(s): |
488 def fromutf8b(s): |
486 '''Given a UTF-8b string, return a local, possibly-binary string. |
489 '''Given a UTF-8b string, return a local, possibly-binary string. |
487 |
490 |
488 return the original binary string. This |
491 return the original binary string. This |