44 asciiupper = charencode.asciiupper |
44 asciiupper = charencode.asciiupper |
45 _jsonescapeu8fast = charencode.jsonescapeu8fast |
45 _jsonescapeu8fast = charencode.jsonescapeu8fast |
46 |
46 |
47 _sysstr = pycompat.sysstr |
47 _sysstr = pycompat.sysstr |
48 |
48 |
49 if pycompat.ispy3: |
49 unichr = chr |
50 unichr = chr |
|
51 |
50 |
52 # These unicode characters are ignored by HFS+ (Apple Technote 1150, |
51 # These unicode characters are ignored by HFS+ (Apple Technote 1150, |
53 # "Unicode Subtleties"), so we need to ignore them in some places for |
52 # "Unicode Subtleties"), so we need to ignore them in some places for |
54 # sanity. |
53 # sanity. |
55 _ignore = [ |
54 _ignore = [ |
76 return s |
75 return s |
77 |
76 |
78 |
77 |
79 # encoding.environ is provided read-only, which may not be used to modify |
78 # encoding.environ is provided read-only, which may not be used to modify |
80 # the process environment |
79 # the process environment |
81 _nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ |
80 _nativeenviron = os.supports_bytes_environ |
82 if not pycompat.ispy3: |
81 if _nativeenviron: |
83 environ = os.environ # re-exports |
|
84 elif _nativeenviron: |
|
85 environ = os.environb # re-exports |
82 environ = os.environb # re-exports |
86 else: |
83 else: |
87 # preferred encoding isn't known yet; use utf-8 to avoid unicode error |
84 # preferred encoding isn't known yet; use utf-8 to avoid unicode error |
88 # and recreate it once encoding is settled |
85 # and recreate it once encoding is settled |
89 environ = { |
86 environ = { |
96 b'ANSI_X3.4-1968': b'ascii', |
93 b'ANSI_X3.4-1968': b'ascii', |
97 } |
94 } |
98 # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. |
95 # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. |
99 # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. |
96 # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. |
100 # https://bugs.python.org/issue13216 |
97 # https://bugs.python.org/issue13216 |
101 if pycompat.iswindows and not pycompat.ispy3: |
98 if pycompat.iswindows: |
102 _encodingrewrites[b'cp65001'] = b'utf-8' |
99 _encodingrewrites[b'cp65001'] = b'utf-8' |
103 |
100 |
104 try: |
101 try: |
105 encoding = environ.get(b"HGENCODING") |
102 encoding = environ.get(b"HGENCODING") |
106 if not encoding: |
103 if not encoding: |
268 |
265 |
269 |
266 |
270 # converter functions between native str and byte string. use these if the |
267 # converter functions between native str and byte string. use these if the |
271 # character encoding is not aware (e.g. exception message) or is known to |
268 # character encoding is not aware (e.g. exception message) or is known to |
272 # be locale dependent (e.g. date formatting.) |
269 # be locale dependent (e.g. date formatting.) |
273 if pycompat.ispy3: |
270 strtolocal = unitolocal |
274 strtolocal = unitolocal |
271 strfromlocal = unifromlocal |
275 strfromlocal = unifromlocal |
272 strmethod = unimethod |
276 strmethod = unimethod |
|
277 else: |
|
278 |
|
279 def strtolocal(s): |
|
280 # type: (str) -> bytes |
|
281 return s # pytype: disable=bad-return-type |
|
282 |
|
283 def strfromlocal(s): |
|
284 # type: (bytes) -> str |
|
285 return s # pytype: disable=bad-return-type |
|
286 |
|
287 strmethod = pycompat.identity |
|
288 |
273 |
289 |
274 |
290 def lower(s): |
275 def lower(s): |
291 # type: (bytes) -> bytes |
276 # type: (bytes) -> bytes |
292 """best-effort encoding-aware case-folding of local string s""" |
277 """best-effort encoding-aware case-folding of local string s""" |
342 |
327 |
343 |
328 |
344 if not _nativeenviron: |
329 if not _nativeenviron: |
345 # now encoding and helper functions are available, recreate the environ |
330 # now encoding and helper functions are available, recreate the environ |
346 # dict to be exported to other modules |
331 # dict to be exported to other modules |
347 if pycompat.iswindows and pycompat.ispy3: |
332 if pycompat.iswindows: |
348 |
333 |
349 class WindowsEnviron(dict): |
334 class WindowsEnviron(dict): |
350 """`os.environ` normalizes environment variables to uppercase on windows""" |
335 """`os.environ` normalizes environment variables to uppercase on windows""" |
351 |
336 |
352 def get(self, key, default=None): |
337 def get(self, key, default=None): |
358 environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) |
343 environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) |
359 |
344 |
360 |
345 |
361 DRIVE_RE = re.compile(b'^[a-z]:') |
346 DRIVE_RE = re.compile(b'^[a-z]:') |
362 |
347 |
363 if pycompat.ispy3: |
348 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which |
364 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which |
349 # returns bytes. |
365 # returns bytes. |
350 if pycompat.iswindows: |
366 if pycompat.iswindows: |
351 # Python 3 on Windows issues a DeprecationWarning about using the bytes |
367 # Python 3 on Windows issues a DeprecationWarning about using the bytes |
352 # API when os.getcwdb() is called. |
368 # API when os.getcwdb() is called. |
353 # |
369 # |
354 # Additionally, py3.8+ uppercases the drive letter when calling |
370 # Additionally, py3.8+ uppercases the drive letter when calling |
355 # os.path.realpath(), which is used on ``repo.root``. Since those |
371 # os.path.realpath(), which is used on ``repo.root``. Since those |
356 # strings are compared in various places as simple strings, also call |
372 # strings are compared in various places as simple strings, also call |
357 # realpath here. See https://bugs.python.org/issue40368 |
373 # realpath here. See https://bugs.python.org/issue40368 |
358 # |
374 # |
359 # However this is not reliable, so lets explicitly make this drive |
375 # However this is not reliable, so lets explicitly make this drive |
360 # letter upper case. |
376 # letter upper case. |
361 # |
377 # |
362 # note: we should consider dropping realpath here since it seems to |
378 # note: we should consider dropping realpath here since it seems to |
363 # change the semantic of `getcwd`. |
379 # change the semantic of `getcwd`. |
364 |
380 |
365 def getcwd(): |
381 def getcwd(): |
366 cwd = os.getcwd() # re-exports |
382 cwd = os.getcwd() # re-exports |
367 cwd = os.path.realpath(cwd) |
383 cwd = os.path.realpath(cwd) |
368 cwd = strtolocal(cwd) |
384 cwd = strtolocal(cwd) |
369 if DRIVE_RE.match(cwd): |
385 if DRIVE_RE.match(cwd): |
370 cwd = cwd[0:1].upper() + cwd[1:] |
386 cwd = cwd[0:1].upper() + cwd[1:] |
371 return cwd |
387 return cwd |
372 |
388 |
373 |
389 else: |
|
390 getcwd = os.getcwdb # re-exports |
|
391 else: |
374 else: |
392 getcwd = os.getcwd # re-exports |
375 getcwd = os.getcwdb # re-exports |
393 |
376 |
394 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. |
377 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. |
395 _wide = _sysstr( |
378 _wide = _sysstr( |
396 environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" |
379 environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" |
397 and b"WFA" |
380 and b"WFA" |
598 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) |
581 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) |
599 |
582 |
600 |
583 |
601 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 |
584 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 |
602 # bytes are mapped to that range. |
585 # bytes are mapped to that range. |
603 if pycompat.ispy3: |
586 _utf8strict = r'surrogatepass' |
604 _utf8strict = r'surrogatepass' |
|
605 else: |
|
606 _utf8strict = r'strict' |
|
607 |
587 |
608 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
588 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
609 |
589 |
610 |
590 |
611 def getutf8char(s, pos): |
591 def getutf8char(s, pos): |