comparison mercurial/encoding.py @ 11892:2be70ca17311 stable

encoding: improve handling of buggy getpreferredencoding() on Mac OS X Prior to version 2.7, calling locale.getpreferredencoding() would always return 'mac-roman' on Mac OS X. Previously, this was handled by a call to locale.setlocale(). Unfortunately, Python 2.6.5 and older have a bug where isspace() would incorrectly report True for 0x85 and 0xa0 after such a call. In order to fix this, we replace the previous _encodingfixup mapping to an _encodingfixers mapping. Rather than mapping encodings to their replacement, it maps them to a function returning the replacement. This allows us to provide an simplified implementation of getpreferredencoding() which extracts the expected encoding and restores the locale. This fix is based on a patch originally submitted by Martijn Pieters as well as feedback from Brodie Rao.
author Dan Villiom Podlaski Christiansen <danchr@gmail.com>
date Sat, 14 Aug 2010 01:30:54 +0200
parents d320e70442a5
children c327bfa5e831
comparison
equal deleted inserted replaced
11891:0bedf3a2062a 11892:2be70ca17311
6 # GNU General Public License version 2 or any later version. 6 # GNU General Public License version 2 or any later version.
7 7
8 import error 8 import error
9 import sys, unicodedata, locale, os 9 import sys, unicodedata, locale, os
10 10
11 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'} 11 def _getpreferredencoding():
12 '''
13 On darwin, getpreferredencoding ignores the locale environment and
14 always returns mac-roman. http://bugs.python.org/issue6202 fixes this
15 for Python 2.7 and up. This is the same corrected code for earlier
16 Python versions.
17
18 However, we can't use a version check for this method, as some distributions
19 patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
20 encoding, as it is unlikely that this encoding is the actually expected.
21 '''
22 try:
23 locale.CODESET
24 except AttributeError:
25 # Fall back to parsing environment variables :-(
26 return locale.getdefaultlocale()[1]
27
28 oldloc = locale.setlocale(locale.LC_CTYPE)
29 locale.setlocale(locale.LC_CTYPE, "")
30 result = locale.nl_langinfo(locale.CODESET)
31 locale.setlocale(locale.LC_CTYPE, oldloc)
32
33 return result
34
35 _encodingfixers = {
36 '646': lambda: 'ascii',
37 'ANSI_X3.4-1968': lambda: 'ascii',
38 'mac-roman': _getpreferredencoding
39 }
12 40
13 try: 41 try:
14 encoding = os.environ.get("HGENCODING") 42 encoding = os.environ.get("HGENCODING")
15 if sys.platform == 'darwin' and not encoding:
16 # On darwin, getpreferredencoding ignores the locale environment and
17 # always returns mac-roman. We override this if the environment is
18 # not C (has been customized by the user).
19 lc = locale.setlocale(locale.LC_CTYPE, '')
20 if lc == 'UTF-8':
21 locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8')
22 encoding = locale.getlocale()[1]
23 if not encoding: 43 if not encoding:
24 encoding = locale.getpreferredencoding() or 'ascii' 44 encoding = locale.getpreferredencoding() or 'ascii'
25 encoding = _encodingfixup.get(encoding, encoding) 45 encoding = _encodingfixers.get(encoding, lambda: encoding)()
26 except locale.Error: 46 except locale.Error:
27 encoding = 'ascii' 47 encoding = 'ascii'
28 encodingmode = os.environ.get("HGENCODINGMODE", "strict") 48 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
29 fallbackencoding = 'ISO-8859-1' 49 fallbackencoding = 'ISO-8859-1'
30 50