comparison mercurial/encoding.py @ 28507:9bcbd9412225

encoding: make HFS+ ignore code Python 3 compatible unichr() doesn't exist in Python 3. chr() is the equivalent there. Unfortunately, we can't use chr() outright because Python 2 only accepts values smaller than 256. Also, Python 3 returns an int when accessing a character of a bytes type (s[x]). So, we have to ord() the values in the assert statement.
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 11 Mar 2016 21:23:34 -0800
parents b2d24c2898f9
children 3c6e94d0811c
comparison
equal deleted inserted replaced
28506:10252652c6e4 28507:9bcbd9412225
8 from __future__ import absolute_import 8 from __future__ import absolute_import
9 9
10 import array 10 import array
11 import locale 11 import locale
12 import os 12 import os
13 import sys
13 import unicodedata 14 import unicodedata
14 15
15 from . import ( 16 from . import (
16 error, 17 error,
17 ) 18 )
19
20 if sys.version_info[0] >= 3:
21 unichr = chr
18 22
19 # These unicode characters are ignored by HFS+ (Apple Technote 1150, 23 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
20 # "Unicode Subtleties"), so we need to ignore them in some places for 24 # "Unicode Subtleties"), so we need to ignore them in some places for
21 # sanity. 25 # sanity.
22 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in 26 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
23 "200c 200d 200e 200f 202a 202b 202c 202d 202e " 27 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
24 "206a 206b 206c 206d 206e 206f feff".split()] 28 "206a 206b 206c 206d 206e 206f feff".split()]
25 # verify the next function will work 29 # verify the next function will work
26 assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"]) 30 if sys.version_info[0] >= 3:
31 assert set(i[0] for i in _ignore) == set([ord(b'\xe2'), ord(b'\xef')])
32 else:
33 assert set(i[0] for i in _ignore) == set(["\xe2", "\xef"])
27 34
28 def hfsignoreclean(s): 35 def hfsignoreclean(s):
29 """Remove codepoints ignored by HFS+ from s. 36 """Remove codepoints ignored by HFS+ from s.
30 37
31 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) 38 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))