Mercurial > public > mercurial-scm > hg
comparison mercurial/encoding.py @ 28507:9bcbd9412225
encoding: make HFS+ ignore code Python 3 compatible
unichr() doesn't exist in Python 3. chr() is the equivalent there.
Unfortunately, we can't use chr() outright because Python 2 only
accepts values smaller than 256.
Also, Python 3 returns an int when accessing a character of a
bytes type (s[x]). So, we have to ord() the values in the assert
statement.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Fri, 11 Mar 2016 21:23:34 -0800 |
parents | b2d24c2898f9 |
children | 3c6e94d0811c |
comparison
equal
deleted
inserted
replaced
28506:10252652c6e4 | 28507:9bcbd9412225 |
---|---|
8 from __future__ import absolute_import | 8 from __future__ import absolute_import |
9 | 9 |
10 import array | 10 import array |
11 import locale | 11 import locale |
12 import os | 12 import os |
13 import sys | |
13 import unicodedata | 14 import unicodedata |
14 | 15 |
15 from . import ( | 16 from . import ( |
16 error, | 17 error, |
17 ) | 18 ) |
19 | |
20 if sys.version_info[0] >= 3: | |
21 unichr = chr | |
18 | 22 |
19 # These unicode characters are ignored by HFS+ (Apple Technote 1150, | 23 # These unicode characters are ignored by HFS+ (Apple Technote 1150, |
20 # "Unicode Subtleties"), so we need to ignore them in some places for | 24 # "Unicode Subtleties"), so we need to ignore them in some places for |
21 # sanity. | 25 # sanity. |
22 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in | 26 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in |
23 "200c 200d 200e 200f 202a 202b 202c 202d 202e " | 27 "200c 200d 200e 200f 202a 202b 202c 202d 202e " |
24 "206a 206b 206c 206d 206e 206f feff".split()] | 28 "206a 206b 206c 206d 206e 206f feff".split()] |
25 # verify the next function will work | 29 # verify the next function will work |
26 assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"]) | 30 if sys.version_info[0] >= 3: |
31 assert set(i[0] for i in _ignore) == set([ord(b'\xe2'), ord(b'\xef')]) | |
32 else: | |
33 assert set(i[0] for i in _ignore) == set(["\xe2", "\xef"]) | |
27 | 34 |
28 def hfsignoreclean(s): | 35 def hfsignoreclean(s): |
29 """Remove codepoints ignored by HFS+ from s. | 36 """Remove codepoints ignored by HFS+ from s. |
30 | 37 |
31 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) | 38 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) |