Mercurial > public > mercurial-scm > hg-stable
view tests/test-encoding-func.py @ 37990:57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
If 's' is a localstr, 's._utf8' must be returned to get the original UTF-8
sequence back. Because of this, it was totally wrong to test if '"\xed" not
in s', which should be either '"\xed" not in s._utf8' or just omitted.
This patch moves the localstr handling to top as the validity of 's._utf8'
should be pre-checked by encoding.tolocal().
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 22 Apr 2018 11:38:53 +0900 |
parents | 6c119dbfd0c0 |
children | 3ea3c96ada54 |
line wrap: on
line source
from __future__ import absolute_import import unittest from mercurial import ( encoding, ) class IsasciistrTest(unittest.TestCase): asciistrs = [ b'a', b'ab', b'abc', b'abcd', b'abcde', b'abcdefghi', b'abcd\0fghi', ] def testascii(self): for s in self.asciistrs: self.assertTrue(encoding.isasciistr(s)) def testnonasciichar(self): for s in self.asciistrs: for i in range(len(s)): t = bytearray(s) t[i] |= 0x80 self.assertFalse(encoding.isasciistr(bytes(t))) class LocalEncodingTest(unittest.TestCase): def testasciifastpath(self): s = b'\0' * 100 self.assertTrue(s is encoding.tolocal(s)) self.assertTrue(s is encoding.fromlocal(s)) class Utf8bEncodingTest(unittest.TestCase): def setUp(self): self.origencoding = encoding.encoding def tearDown(self): encoding.encoding = self.origencoding def testasciifastpath(self): s = b'\0' * 100 self.assertTrue(s is encoding.toutf8b(s)) self.assertTrue(s is encoding.fromutf8b(s)) def testlossylatin(self): encoding.encoding = b'ascii' s = u'\xc0'.encode('utf-8') l = encoding.tolocal(s) self.assertEqual(l, b'?') # lossy self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved def testlossy0xed(self): encoding.encoding = b'euc-kr' # U+Dxxx Hangul s = u'\ud1bc\xc0'.encode('utf-8') l = encoding.tolocal(s) self.assertIn(b'\xed', l) self.assertTrue(l.endswith(b'?')) # lossy self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved if __name__ == '__main__': import silenttestrunner silenttestrunner.main(__name__)