diff mercurial/pure/charencode.py @ 34225:aa877860d4d7

py3: use 'surrogatepass' error handler to process U+DCxx transparently It's disallowed by default on Python 3. https://docs.python.org/3/library/codecs.html#error-handlers
author Yuya Nishihara <yuya@tcha.org>
date Sat, 16 Sep 2017 22:55:48 +0900
parents 5307cc57f271
children 2372284d9457
line wrap: on
line diff
--- a/mercurial/pure/charencode.py	Sat Sep 16 22:42:19 2017 +0900
+++ b/mercurial/pure/charencode.py	Sat Sep 16 22:55:48 2017 +0900
@@ -64,6 +64,11 @@
     except IndexError:
         raise ValueError
 
+if pycompat.ispy3:
+    _utf8strict = r'surrogatepass'
+else:
+    _utf8strict = r'strict'
+
 def jsonescapeu8fallback(u8chars, paranoid):
     """Convert a UTF-8 byte string to JSON-escaped form (slow path)
 
@@ -74,6 +79,7 @@
     else:
         jm = _jsonmap
     # non-BMP char is represented as UTF-16 surrogate pair
-    u16codes = array.array(r'H', u8chars.decode('utf-8').encode('utf-16'))
+    u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
+    u16codes = array.array(r'H', u16b)
     u16codes.pop(0)  # drop BOM
     return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)