Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/encoding.py @ 48983:fa2b1a46d92e
encoding: remove Python 2 support code
Differential Revision: https://phab.mercurial-scm.org/D12295
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 03 Mar 2022 07:58:29 -0800 |
parents | 6000f5b25c9b |
children | 642e31cb55f0 |
comparison
equal
deleted
inserted
replaced
48982:4eae533354ae | 48983:fa2b1a46d92e |
---|---|
44 asciiupper = charencode.asciiupper | 44 asciiupper = charencode.asciiupper |
45 _jsonescapeu8fast = charencode.jsonescapeu8fast | 45 _jsonescapeu8fast = charencode.jsonescapeu8fast |
46 | 46 |
47 _sysstr = pycompat.sysstr | 47 _sysstr = pycompat.sysstr |
48 | 48 |
49 if pycompat.ispy3: | 49 unichr = chr |
50 unichr = chr | |
51 | 50 |
52 # These unicode characters are ignored by HFS+ (Apple Technote 1150, | 51 # These unicode characters are ignored by HFS+ (Apple Technote 1150, |
53 # "Unicode Subtleties"), so we need to ignore them in some places for | 52 # "Unicode Subtleties"), so we need to ignore them in some places for |
54 # sanity. | 53 # sanity. |
55 _ignore = [ | 54 _ignore = [ |
76 return s | 75 return s |
77 | 76 |
78 | 77 |
79 # encoding.environ is provided read-only, which may not be used to modify | 78 # encoding.environ is provided read-only, which may not be used to modify |
80 # the process environment | 79 # the process environment |
81 _nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ | 80 _nativeenviron = os.supports_bytes_environ |
82 if not pycompat.ispy3: | 81 if _nativeenviron: |
83 environ = os.environ # re-exports | |
84 elif _nativeenviron: | |
85 environ = os.environb # re-exports | 82 environ = os.environb # re-exports |
86 else: | 83 else: |
87 # preferred encoding isn't known yet; use utf-8 to avoid unicode error | 84 # preferred encoding isn't known yet; use utf-8 to avoid unicode error |
88 # and recreate it once encoding is settled | 85 # and recreate it once encoding is settled |
89 environ = { | 86 environ = { |
96 b'ANSI_X3.4-1968': b'ascii', | 93 b'ANSI_X3.4-1968': b'ascii', |
97 } | 94 } |
98 # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. | 95 # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. |
99 # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. | 96 # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. |
100 # https://bugs.python.org/issue13216 | 97 # https://bugs.python.org/issue13216 |
101 if pycompat.iswindows and not pycompat.ispy3: | 98 if pycompat.iswindows: |
102 _encodingrewrites[b'cp65001'] = b'utf-8' | 99 _encodingrewrites[b'cp65001'] = b'utf-8' |
103 | 100 |
104 try: | 101 try: |
105 encoding = environ.get(b"HGENCODING") | 102 encoding = environ.get(b"HGENCODING") |
106 if not encoding: | 103 if not encoding: |
268 | 265 |
269 | 266 |
270 # converter functions between native str and byte string. use these if the | 267 # converter functions between native str and byte string. use these if the |
271 # character encoding is not aware (e.g. exception message) or is known to | 268 # character encoding is not aware (e.g. exception message) or is known to |
272 # be locale dependent (e.g. date formatting.) | 269 # be locale dependent (e.g. date formatting.) |
273 if pycompat.ispy3: | 270 strtolocal = unitolocal |
274 strtolocal = unitolocal | 271 strfromlocal = unifromlocal |
275 strfromlocal = unifromlocal | 272 strmethod = unimethod |
276 strmethod = unimethod | |
277 else: | |
278 | |
279 def strtolocal(s): | |
280 # type: (str) -> bytes | |
281 return s # pytype: disable=bad-return-type | |
282 | |
283 def strfromlocal(s): | |
284 # type: (bytes) -> str | |
285 return s # pytype: disable=bad-return-type | |
286 | |
287 strmethod = pycompat.identity | |
288 | 273 |
289 | 274 |
290 def lower(s): | 275 def lower(s): |
291 # type: (bytes) -> bytes | 276 # type: (bytes) -> bytes |
292 """best-effort encoding-aware case-folding of local string s""" | 277 """best-effort encoding-aware case-folding of local string s""" |
342 | 327 |
343 | 328 |
344 if not _nativeenviron: | 329 if not _nativeenviron: |
345 # now encoding and helper functions are available, recreate the environ | 330 # now encoding and helper functions are available, recreate the environ |
346 # dict to be exported to other modules | 331 # dict to be exported to other modules |
347 if pycompat.iswindows and pycompat.ispy3: | 332 if pycompat.iswindows: |
348 | 333 |
349 class WindowsEnviron(dict): | 334 class WindowsEnviron(dict): |
350 """`os.environ` normalizes environment variables to uppercase on windows""" | 335 """`os.environ` normalizes environment variables to uppercase on windows""" |
351 | 336 |
352 def get(self, key, default=None): | 337 def get(self, key, default=None): |
358 environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) | 343 environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) |
359 | 344 |
360 | 345 |
361 DRIVE_RE = re.compile(b'^[a-z]:') | 346 DRIVE_RE = re.compile(b'^[a-z]:') |
362 | 347 |
363 if pycompat.ispy3: | 348 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which |
364 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which | 349 # returns bytes. |
365 # returns bytes. | 350 if pycompat.iswindows: |
366 if pycompat.iswindows: | 351 # Python 3 on Windows issues a DeprecationWarning about using the bytes |
367 # Python 3 on Windows issues a DeprecationWarning about using the bytes | 352 # API when os.getcwdb() is called. |
368 # API when os.getcwdb() is called. | 353 # |
369 # | 354 # Additionally, py3.8+ uppercases the drive letter when calling |
370 # Additionally, py3.8+ uppercases the drive letter when calling | 355 # os.path.realpath(), which is used on ``repo.root``. Since those |
371 # os.path.realpath(), which is used on ``repo.root``. Since those | 356 # strings are compared in various places as simple strings, also call |
372 # strings are compared in various places as simple strings, also call | 357 # realpath here. See https://bugs.python.org/issue40368 |
373 # realpath here. See https://bugs.python.org/issue40368 | 358 # |
374 # | 359 # However this is not reliable, so lets explicitly make this drive |
375 # However this is not reliable, so lets explicitly make this drive | 360 # letter upper case. |
376 # letter upper case. | 361 # |
377 # | 362 # note: we should consider dropping realpath here since it seems to |
378 # note: we should consider dropping realpath here since it seems to | 363 # change the semantic of `getcwd`. |
379 # change the semantic of `getcwd`. | 364 |
380 | 365 def getcwd(): |
381 def getcwd(): | 366 cwd = os.getcwd() # re-exports |
382 cwd = os.getcwd() # re-exports | 367 cwd = os.path.realpath(cwd) |
383 cwd = os.path.realpath(cwd) | 368 cwd = strtolocal(cwd) |
384 cwd = strtolocal(cwd) | 369 if DRIVE_RE.match(cwd): |
385 if DRIVE_RE.match(cwd): | 370 cwd = cwd[0:1].upper() + cwd[1:] |
386 cwd = cwd[0:1].upper() + cwd[1:] | 371 return cwd |
387 return cwd | 372 |
388 | 373 |
389 else: | |
390 getcwd = os.getcwdb # re-exports | |
391 else: | 374 else: |
392 getcwd = os.getcwd # re-exports | 375 getcwd = os.getcwdb # re-exports |
393 | 376 |
394 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. | 377 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. |
395 _wide = _sysstr( | 378 _wide = _sysstr( |
396 environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" | 379 environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" |
397 and b"WFA" | 380 and b"WFA" |
598 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) | 581 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) |
599 | 582 |
600 | 583 |
601 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 | 584 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 |
602 # bytes are mapped to that range. | 585 # bytes are mapped to that range. |
603 if pycompat.ispy3: | 586 _utf8strict = r'surrogatepass' |
604 _utf8strict = r'surrogatepass' | |
605 else: | |
606 _utf8strict = r'strict' | |
607 | 587 |
608 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | 588 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
609 | 589 |
610 | 590 |
611 def getutf8char(s, pos): | 591 def getutf8char(s, pos): |