comparison mercurial/util.py @ 7948:de377b1a9a84

move encoding bits from util to encoding In addition to cleaning up util, this gets rid of some circular dependencies.
author Matt Mackall <mpm@selenic.com>
date Fri, 03 Apr 2009 14:51:48 -0500
parents a454eeb1b827
children 8c6f823efcc9
comparison
equal deleted inserted replaced
7947:a454eeb1b827 7948:de377b1a9a84
12 platform-specific details from the core. 12 platform-specific details from the core.
13 """ 13 """
14 14
15 from i18n import _ 15 from i18n import _
16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error 16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
17 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil 17 import os, stat, threading, time, calendar, ConfigParser, glob, osutil
18 import imp, unicodedata 18 import imp
19 19
20 # Python compatibility 20 # Python compatibility
21 21
22 try: 22 try:
23 set = set 23 set = set
78 subprocess = None 78 subprocess = None
79 from popen2 import Popen3 79 from popen2 import Popen3
80 popen2 = os.popen2 80 popen2 = os.popen2
81 popen3 = os.popen3 81 popen3 = os.popen3
82 82
83
84 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
85
86 try:
87 _encoding = os.environ.get("HGENCODING")
88 if sys.platform == 'darwin' and not _encoding:
89 # On darwin, getpreferredencoding ignores the locale environment and
90 # always returns mac-roman. We override this if the environment is
91 # not C (has been customized by the user).
92 locale.setlocale(locale.LC_CTYPE, '')
93 _encoding = locale.getlocale()[1]
94 if not _encoding:
95 _encoding = locale.getpreferredencoding() or 'ascii'
96 _encoding = _encodingfixup.get(_encoding, _encoding)
97 except locale.Error:
98 _encoding = 'ascii'
99 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
100 _fallbackencoding = 'ISO-8859-1'
101
102 def tolocal(s):
103 """
104 Convert a string from internal UTF-8 to local encoding
105
106 All internal strings should be UTF-8 but some repos before the
107 implementation of locale support may contain latin1 or possibly
108 other character sets. We attempt to decode everything strictly
109 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
110 replace unknown characters.
111 """
112 for e in ('UTF-8', _fallbackencoding):
113 try:
114 u = s.decode(e) # attempt strict decoding
115 return u.encode(_encoding, "replace")
116 except LookupError, k:
117 raise Abort(_("%s, please check your locale settings") % k)
118 except UnicodeDecodeError:
119 pass
120 u = s.decode("utf-8", "replace") # last ditch
121 return u.encode(_encoding, "replace")
122
123 def fromlocal(s):
124 """
125 Convert a string from the local character encoding to UTF-8
126
127 We attempt to decode strings using the encoding mode set by
128 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
129 characters will cause an error message. Other modes include
130 'replace', which replaces unknown characters with a special
131 Unicode character, and 'ignore', which drops the character.
132 """
133 try:
134 return s.decode(_encoding, _encodingmode).encode("utf-8")
135 except UnicodeDecodeError, inst:
136 sub = s[max(0, inst.start-10):inst.start+10]
137 raise Abort("decoding near '%s': %s!" % (sub, inst))
138 except LookupError, k:
139 raise Abort(_("%s, please check your locale settings") % k)
140
141 def colwidth(s):
142 "Find the column width of a UTF-8 string for display"
143 d = s.decode(_encoding, 'replace')
144 if hasattr(unicodedata, 'east_asian_width'):
145 w = unicodedata.east_asian_width
146 return sum([w(c) in 'WF' and 2 or 1 for c in d])
147 return len(d)
148 83
149 def version(): 84 def version():
150 """Return version information if available.""" 85 """Return version information if available."""
151 try: 86 try:
152 import __version__ 87 import __version__