diff -r d7e83f106459 -r a24b98f4e03c mercurial/encoding.py --- a/mercurial/encoding.py Thu Nov 05 17:21:43 2015 -0600 +++ b/mercurial/encoding.py Thu Nov 05 17:30:10 2015 -0600 @@ -463,14 +463,14 @@ internal surrogate encoding as a UTF-8 string.) ''' - if isinstance(s, localstr): - return s._utf8 - - try: - s.decode('utf-8') - return s - except UnicodeDecodeError: - pass + if "\xed" not in s: + if isinstance(s, localstr): + return s._utf8 + try: + s.decode('utf-8') + return s + except UnicodeDecodeError: + pass r = "" pos = 0 @@ -478,7 +478,12 @@ while pos < l: try: c = getutf8char(s, pos) - pos += len(c) + if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": + # have to re-escape existing U+DCxx characters + c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') + pos += 1 + else: + pos += len(c) except UnicodeDecodeError: c = unichr(0xdc00 + ord(s[pos])).encode('utf-8') pos += 1