Mercurial > public > mercurial-scm > hg
comparison mercurial/encoding.py @ 41836:25694a78e4a4
encoding: use raw strings for encoding arguments
This prevents the internals of Python from coercing a unicode to str
on Python 2 and makes tests run with HGUNICODEPEDANTRY=1 a lot
happier.
Differential Revision: https://phab.mercurial-scm.org/D6051
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 02 Mar 2019 13:07:58 -0800 |
parents | 9e8fcd2e78c1 |
children | 2372284d9457 |
comparison
equal
deleted
inserted
replaced
41835:ddb174511f1b | 41836:25694a78e4a4 |
---|---|
63 elif _nativeenviron: | 63 elif _nativeenviron: |
64 environ = os.environb # re-exports | 64 environ = os.environb # re-exports |
65 else: | 65 else: |
66 # preferred encoding isn't known yet; use utf-8 to avoid unicode error | 66 # preferred encoding isn't known yet; use utf-8 to avoid unicode error |
67 # and recreate it once encoding is settled | 67 # and recreate it once encoding is settled |
68 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8')) | 68 environ = dict((k.encode(r'utf-8'), v.encode(r'utf-8')) |
69 for k, v in os.environ.items()) # re-exports | 69 for k, v in os.environ.items()) # re-exports |
70 | 70 |
71 _encodingrewrites = { | 71 _encodingrewrites = { |
72 '646': 'ascii', | 72 '646': 'ascii', |
73 'ANSI_X3.4-1968': 'ascii', | 73 'ANSI_X3.4-1968': 'ascii', |
150 # make sure string is actually stored in UTF-8 | 150 # make sure string is actually stored in UTF-8 |
151 u = s.decode('UTF-8') | 151 u = s.decode('UTF-8') |
152 if encoding == 'UTF-8': | 152 if encoding == 'UTF-8': |
153 # fast path | 153 # fast path |
154 return s | 154 return s |
155 r = u.encode(_sysstr(encoding), u"replace") | 155 r = u.encode(_sysstr(encoding), r"replace") |
156 if u == r.decode(_sysstr(encoding)): | 156 if u == r.decode(_sysstr(encoding)): |
157 # r is a safe, non-lossy encoding of s | 157 # r is a safe, non-lossy encoding of s |
158 return safelocalstr(r) | 158 return safelocalstr(r) |
159 return localstr(s, r) | 159 return localstr(s, r) |
160 except UnicodeDecodeError: | 160 except UnicodeDecodeError: |
161 # we should only get here if we're looking at an ancient changeset | 161 # we should only get here if we're looking at an ancient changeset |
162 try: | 162 try: |
163 u = s.decode(_sysstr(fallbackencoding)) | 163 u = s.decode(_sysstr(fallbackencoding)) |
164 r = u.encode(_sysstr(encoding), u"replace") | 164 r = u.encode(_sysstr(encoding), r"replace") |
165 if u == r.decode(_sysstr(encoding)): | 165 if u == r.decode(_sysstr(encoding)): |
166 # r is a safe, non-lossy encoding of s | 166 # r is a safe, non-lossy encoding of s |
167 return safelocalstr(r) | 167 return safelocalstr(r) |
168 return localstr(u.encode('UTF-8'), r) | 168 return localstr(u.encode('UTF-8'), r) |
169 except UnicodeDecodeError: | 169 except UnicodeDecodeError: |
170 u = s.decode("utf-8", "replace") # last ditch | 170 u = s.decode("utf-8", "replace") # last ditch |
171 # can't round-trip | 171 # can't round-trip |
172 return u.encode(_sysstr(encoding), u"replace") | 172 return u.encode(_sysstr(encoding), r"replace") |
173 except LookupError as k: | 173 except LookupError as k: |
174 raise error.Abort(k, hint="please check your locale settings") | 174 raise error.Abort(k, hint="please check your locale settings") |
175 | 175 |
176 def fromlocal(s): | 176 def fromlocal(s): |
177 """ | 177 """ |
228 strmethod = pycompat.identity | 228 strmethod = pycompat.identity |
229 | 229 |
230 if not _nativeenviron: | 230 if not _nativeenviron: |
231 # now encoding and helper functions are available, recreate the environ | 231 # now encoding and helper functions are available, recreate the environ |
232 # dict to be exported to other modules | 232 # dict to be exported to other modules |
233 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8'))) | 233 environ = dict((tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8'))) |
234 for k, v in os.environ.items()) # re-exports | 234 for k, v in os.environ.items()) # re-exports |
235 | 235 |
236 if pycompat.ispy3: | 236 if pycompat.ispy3: |
237 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which | 237 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which |
238 # returns bytes. | 238 # returns bytes. |
249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" | 249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" |
250 and "WFA" or "WF") | 250 and "WFA" or "WF") |
251 | 251 |
252 def colwidth(s): | 252 def colwidth(s): |
253 "Find the column width of a string for display in the local encoding" | 253 "Find the column width of a string for display in the local encoding" |
254 return ucolwidth(s.decode(_sysstr(encoding), u'replace')) | 254 return ucolwidth(s.decode(_sysstr(encoding), r'replace')) |
255 | 255 |
256 def ucolwidth(d): | 256 def ucolwidth(d): |
257 "Find the column width of a Unicode string for display" | 257 "Find the column width of a Unicode string for display" |
258 eaw = getattr(unicodedata, 'east_asian_width', None) | 258 eaw = getattr(unicodedata, 'east_asian_width', None) |
259 if eaw is not None: | 259 if eaw is not None: |