mercurial/encoding.py
changeset 26878 d7e83f106459
parent 26877 cb467a9d7593
child 26879 a24b98f4e03c
equal deleted inserted replaced
26877:cb467a9d7593 26878:d7e83f106459
   468 
   468 
   469     try:
   469     try:
   470         s.decode('utf-8')
   470         s.decode('utf-8')
   471         return s
   471         return s
   472     except UnicodeDecodeError:
   472     except UnicodeDecodeError:
   473         # surrogate-encode any characters that don't round-trip
   473         pass
   474         s2 = s.decode('utf-8', 'ignore').encode('utf-8')
   474 
   475         r = ""
   475     r = ""
   476         pos = 0
   476     pos = 0
   477         for c in s:
   477     l = len(s)
   478             if s2[pos:pos + 1] == c:
   478     while pos < l:
   479                 r += c
   479         try:
   480                 pos += 1
   480             c = getutf8char(s, pos)
   481             else:
   481             pos += len(c)
   482                 r += unichr(0xdc00 + ord(c)).encode('utf-8')
   482         except UnicodeDecodeError:
   483         return r
   483             c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
       
   484             pos += 1
       
   485         r += c
       
   486     return r
   484 
   487 
   485 def fromutf8b(s):
   488 def fromutf8b(s):
   486     '''Given a UTF-8b string, return a local, possibly-binary string.
   489     '''Given a UTF-8b string, return a local, possibly-binary string.
   487 
   490 
   488     return the original binary string. This
   491     return the original binary string. This