Mercurial > public > mercurial-scm > hg-stable
diff mercurial/pure/charencode.py @ 34225:aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
It's disallowed by default on Python 3.
https://docs.python.org/3/library/codecs.html#error-handlers
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 16 Sep 2017 22:55:48 +0900 |
parents | 5307cc57f271 |
children | 2372284d9457 |
line wrap: on
line diff
--- a/mercurial/pure/charencode.py Sat Sep 16 22:42:19 2017 +0900 +++ b/mercurial/pure/charencode.py Sat Sep 16 22:55:48 2017 +0900 @@ -64,6 +64,11 @@ except IndexError: raise ValueError +if pycompat.ispy3: + _utf8strict = r'surrogatepass' +else: + _utf8strict = r'strict' + def jsonescapeu8fallback(u8chars, paranoid): """Convert a UTF-8 byte string to JSON-escaped form (slow path) @@ -74,6 +79,7 @@ else: jm = _jsonmap # non-BMP char is represented as UTF-16 surrogate pair - u16codes = array.array(r'H', u8chars.decode('utf-8').encode('utf-16')) + u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) + u16codes = array.array(r'H', u16b) u16codes.pop(0) # drop BOM return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)