Mercurial > public > mercurial-scm > hg
comparison mercurial/util.py @ 7948:de377b1a9a84
move encoding bits from util to encoding
In addition to cleaning up util, this gets rid of some circular dependencies.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Fri, 03 Apr 2009 14:51:48 -0500 |
parents | a454eeb1b827 |
children | 8c6f823efcc9 |
comparison
equal
deleted
inserted
replaced
7947:a454eeb1b827 | 7948:de377b1a9a84 |
---|---|
12 platform-specific details from the core. | 12 platform-specific details from the core. |
13 """ | 13 """ |
14 | 14 |
15 from i18n import _ | 15 from i18n import _ |
16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error | 16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error |
17 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil | 17 import os, stat, threading, time, calendar, ConfigParser, glob, osutil |
18 import imp, unicodedata | 18 import imp |
19 | 19 |
20 # Python compatibility | 20 # Python compatibility |
21 | 21 |
22 try: | 22 try: |
23 set = set | 23 set = set |
78 subprocess = None | 78 subprocess = None |
79 from popen2 import Popen3 | 79 from popen2 import Popen3 |
80 popen2 = os.popen2 | 80 popen2 = os.popen2 |
81 popen3 = os.popen3 | 81 popen3 = os.popen3 |
82 | 82 |
83 | |
84 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'} | |
85 | |
86 try: | |
87 _encoding = os.environ.get("HGENCODING") | |
88 if sys.platform == 'darwin' and not _encoding: | |
89 # On darwin, getpreferredencoding ignores the locale environment and | |
90 # always returns mac-roman. We override this if the environment is | |
91 # not C (has been customized by the user). | |
92 locale.setlocale(locale.LC_CTYPE, '') | |
93 _encoding = locale.getlocale()[1] | |
94 if not _encoding: | |
95 _encoding = locale.getpreferredencoding() or 'ascii' | |
96 _encoding = _encodingfixup.get(_encoding, _encoding) | |
97 except locale.Error: | |
98 _encoding = 'ascii' | |
99 _encodingmode = os.environ.get("HGENCODINGMODE", "strict") | |
100 _fallbackencoding = 'ISO-8859-1' | |
101 | |
102 def tolocal(s): | |
103 """ | |
104 Convert a string from internal UTF-8 to local encoding | |
105 | |
106 All internal strings should be UTF-8 but some repos before the | |
107 implementation of locale support may contain latin1 or possibly | |
108 other character sets. We attempt to decode everything strictly | |
109 using UTF-8, then Latin-1, and failing that, we use UTF-8 and | |
110 replace unknown characters. | |
111 """ | |
112 for e in ('UTF-8', _fallbackencoding): | |
113 try: | |
114 u = s.decode(e) # attempt strict decoding | |
115 return u.encode(_encoding, "replace") | |
116 except LookupError, k: | |
117 raise Abort(_("%s, please check your locale settings") % k) | |
118 except UnicodeDecodeError: | |
119 pass | |
120 u = s.decode("utf-8", "replace") # last ditch | |
121 return u.encode(_encoding, "replace") | |
122 | |
123 def fromlocal(s): | |
124 """ | |
125 Convert a string from the local character encoding to UTF-8 | |
126 | |
127 We attempt to decode strings using the encoding mode set by | |
128 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown | |
129 characters will cause an error message. Other modes include | |
130 'replace', which replaces unknown characters with a special | |
131 Unicode character, and 'ignore', which drops the character. | |
132 """ | |
133 try: | |
134 return s.decode(_encoding, _encodingmode).encode("utf-8") | |
135 except UnicodeDecodeError, inst: | |
136 sub = s[max(0, inst.start-10):inst.start+10] | |
137 raise Abort("decoding near '%s': %s!" % (sub, inst)) | |
138 except LookupError, k: | |
139 raise Abort(_("%s, please check your locale settings") % k) | |
140 | |
141 def colwidth(s): | |
142 "Find the column width of a UTF-8 string for display" | |
143 d = s.decode(_encoding, 'replace') | |
144 if hasattr(unicodedata, 'east_asian_width'): | |
145 w = unicodedata.east_asian_width | |
146 return sum([w(c) in 'WF' and 2 or 1 for c in d]) | |
147 return len(d) | |
148 | 83 |
149 def version(): | 84 def version(): |
150 """Return version information if available.""" | 85 """Return version information if available.""" |
151 try: | 86 try: |
152 import __version__ | 87 import __version__ |