Mercurial > public > mercurial-scm > hg-stable
comparison tests/test-encoding-func.py @ 37990:57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
If 's' is a localstr, 's._utf8' must be returned to get the original UTF-8
sequence back. Because of this, it was totally wrong to test if '"\xed" not
in s', which should be either '"\xed" not in s._utf8' or just omitted.
This patch moves the localstr handling to top as the validity of 's._utf8'
should be pre-checked by encoding.tolocal().
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 22 Apr 2018 11:38:53 +0900 |
parents | 6c119dbfd0c0 |
children | 3ea3c96ada54 |
comparison
equal
deleted
inserted
replaced
37989:bfe8ef6e370e | 37990:57b0c7221dba |
---|---|
33 s = b'\0' * 100 | 33 s = b'\0' * 100 |
34 self.assertTrue(s is encoding.tolocal(s)) | 34 self.assertTrue(s is encoding.tolocal(s)) |
35 self.assertTrue(s is encoding.fromlocal(s)) | 35 self.assertTrue(s is encoding.fromlocal(s)) |
36 | 36 |
37 class Utf8bEncodingTest(unittest.TestCase): | 37 class Utf8bEncodingTest(unittest.TestCase): |
38 def setUp(self): | |
39 self.origencoding = encoding.encoding | |
40 | |
41 def tearDown(self): | |
42 encoding.encoding = self.origencoding | |
43 | |
38 def testasciifastpath(self): | 44 def testasciifastpath(self): |
39 s = b'\0' * 100 | 45 s = b'\0' * 100 |
40 self.assertTrue(s is encoding.toutf8b(s)) | 46 self.assertTrue(s is encoding.toutf8b(s)) |
41 self.assertTrue(s is encoding.fromutf8b(s)) | 47 self.assertTrue(s is encoding.fromutf8b(s)) |
42 | 48 |
49 def testlossylatin(self): | |
50 encoding.encoding = b'ascii' | |
51 s = u'\xc0'.encode('utf-8') | |
52 l = encoding.tolocal(s) | |
53 self.assertEqual(l, b'?') # lossy | |
54 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved | |
55 | |
56 def testlossy0xed(self): | |
57 encoding.encoding = b'euc-kr' # U+Dxxx Hangul | |
58 s = u'\ud1bc\xc0'.encode('utf-8') | |
59 l = encoding.tolocal(s) | |
60 self.assertIn(b'\xed', l) | |
61 self.assertTrue(l.endswith(b'?')) # lossy | |
62 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved | |
63 | |
43 if __name__ == '__main__': | 64 if __name__ == '__main__': |
44 import silenttestrunner | 65 import silenttestrunner |
45 silenttestrunner.main(__name__) | 66 silenttestrunner.main(__name__) |