Mercurial > public > mercurial-scm > hg-stable
diff mercurial/encoding.py @ 43076:2372284d9457
formatting: blacken the codebase
This is using my patch to black
(https://github.com/psf/black/pull/826) so we don't un-wrap collection
literals.
Done with:
hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S
# skip-blame mass-reformatting only
# no-check-commit reformats foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D6971
author | Augie Fackler <augie@google.com> |
---|---|
date | Sun, 06 Oct 2019 09:45:02 -0400 |
parents | 25694a78e4a4 |
children | 687b865b95ad |
line wrap: on
line diff
--- a/mercurial/encoding.py Sat Oct 05 10:29:34 2019 -0400 +++ b/mercurial/encoding.py Sun Oct 06 09:45:02 2019 -0400 @@ -17,9 +17,7 @@ pycompat, ) -from .pure import ( - charencode as charencodepure, -) +from .pure import charencode as charencodepure charencode = policy.importmod(r'charencode') @@ -36,12 +34,15 @@ # These unicode characters are ignored by HFS+ (Apple Technote 1150, # "Unicode Subtleties"), so we need to ignore them in some places for # sanity. -_ignore = [unichr(int(x, 16)).encode("utf-8") for x in - "200c 200d 200e 200f 202a 202b 202c 202d 202e " - "206a 206b 206c 206d 206e 206f feff".split()] +_ignore = [ + unichr(int(x, 16)).encode("utf-8") + for x in "200c 200d 200e 200f 202a 202b 202c 202d 202e " + "206a 206b 206c 206d 206e 206f feff".split() +] # verify the next function will work assert all(i.startswith(("\xe2", "\xef")) for i in _ignore) + def hfsignoreclean(s): """Remove codepoints ignored by HFS+ from s. @@ -55,9 +56,10 @@ s = s.replace(c, '') return s + # encoding.environ is provided read-only, which may not be used to modify # the process environment -_nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ) +_nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ if not pycompat.ispy3: environ = os.environ # re-exports elif _nativeenviron: @@ -65,8 +67,10 @@ else: # preferred encoding isn't known yet; use utf-8 to avoid unicode error # and recreate it once encoding is settled - environ = dict((k.encode(r'utf-8'), v.encode(r'utf-8')) - for k, v in os.environ.items()) # re-exports + environ = dict( + (k.encode(r'utf-8'), v.encode(r'utf-8')) + for k, v in os.environ.items() # re-exports + ) _encodingrewrites = { '646': 'ascii', @@ -88,15 +92,19 @@ encodingmode = environ.get("HGENCODINGMODE", "strict") fallbackencoding = 'ISO-8859-1' + class localstr(bytes): '''This class allows strings that are unmodified to be round-tripped to the local encoding and back''' + def __new__(cls, u, l): s = bytes.__new__(cls, l) s._utf8 = u return s + def __hash__(self): - return hash(self._utf8) # avoid collisions in local string space + return hash(self._utf8) # avoid collisions in local string space + class safelocalstr(bytes): """Tagged string denoting it was previously an internal UTF-8 string, @@ -108,6 +116,7 @@ >>> assert safelocalstr(b'\\xc3') in {b'\\xc3': 0} """ + def tolocal(s): """ Convert a string from internal UTF-8 to local encoding @@ -167,12 +176,13 @@ return safelocalstr(r) return localstr(u.encode('UTF-8'), r) except UnicodeDecodeError: - u = s.decode("utf-8", "replace") # last ditch + u = s.decode("utf-8", "replace") # last ditch # can't round-trip return u.encode(_sysstr(encoding), r"replace") except LookupError as k: raise error.Abort(k, hint="please check your locale settings") + def fromlocal(s): """ Convert a string from the local character encoding to UTF-8 @@ -194,27 +204,34 @@ u = s.decode(_sysstr(encoding), _sysstr(encodingmode)) return u.encode("utf-8") except UnicodeDecodeError as inst: - sub = s[max(0, inst.start - 10):inst.start + 10] - raise error.Abort("decoding near '%s': %s!" - % (sub, pycompat.bytestr(inst))) + sub = s[max(0, inst.start - 10) : inst.start + 10] + raise error.Abort( + "decoding near '%s': %s!" % (sub, pycompat.bytestr(inst)) + ) except LookupError as k: raise error.Abort(k, hint="please check your locale settings") + def unitolocal(u): """Convert a unicode string to a byte string of local encoding""" return tolocal(u.encode('utf-8')) + def unifromlocal(s): """Convert a byte string of local encoding to a unicode string""" return fromlocal(s).decode('utf-8') + def unimethod(bytesfunc): """Create a proxy method that forwards __unicode__() and __str__() of Python 3 to __bytes__()""" + def unifunc(obj): return unifromlocal(bytesfunc(obj)) + return unifunc + # converter functions between native str and byte string. use these if the # character encoding is not aware (e.g. exception message) or is known to # be locale dependent (e.g. date formatting.) @@ -230,8 +247,10 @@ if not _nativeenviron: # now encoding and helper functions are available, recreate the environ # dict to be exported to other modules - environ = dict((tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8'))) - for k, v in os.environ.items()) # re-exports + environ = dict( + (tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8'))) + for k, v in os.environ.items() # re-exports + ) if pycompat.ispy3: # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which @@ -246,13 +265,16 @@ getcwd = os.getcwd # re-exports # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. -_wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" - and "WFA" or "WF") +_wide = _sysstr( + environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" and "WFA" or "WF" +) + def colwidth(s): "Find the column width of a string for display in the local encoding" return ucolwidth(s.decode(_sysstr(encoding), r'replace')) + def ucolwidth(d): "Find the column width of a Unicode string for display" eaw = getattr(unicodedata, 'east_asian_width', None) @@ -260,6 +282,7 @@ return sum([eaw(c) in _wide and 2 or 1 for c in d]) return len(d) + def getcols(s, start, c): '''Use colwidth to find a c-column substring of s starting at byte index start''' @@ -268,6 +291,7 @@ if colwidth(t) == c: return t + def trim(s, width, ellipsis='', leftside=False): """Trim string 's' to at most 'width' columns (including 'ellipsis'). @@ -336,21 +360,21 @@ try: u = s.decode(_sysstr(encoding)) except UnicodeDecodeError: - if len(s) <= width: # trimming is not needed + if len(s) <= width: # trimming is not needed return s width -= len(ellipsis) - if width <= 0: # no enough room even for ellipsis - return ellipsis[:width + len(ellipsis)] + if width <= 0: # no enough room even for ellipsis + return ellipsis[: width + len(ellipsis)] if leftside: return ellipsis + s[-width:] return s[:width] + ellipsis - if ucolwidth(u) <= width: # trimming is not needed + if ucolwidth(u) <= width: # trimming is not needed return s width -= len(ellipsis) - if width <= 0: # no enough room even for ellipsis - return ellipsis[:width + len(ellipsis)] + if width <= 0: # no enough room even for ellipsis + return ellipsis[: width + len(ellipsis)] if leftside: uslice = lambda i: u[i:] @@ -362,7 +386,8 @@ usub = uslice(i) if ucolwidth(usub) <= width: return concat(usub.encode(_sysstr(encoding))) - return ellipsis # no enough room for multi-column characters + return ellipsis # no enough room for multi-column characters + def lower(s): "best-effort encoding-aware case-folding of local string s" @@ -378,13 +403,14 @@ lu = u.lower() if u == lu: - return s # preserve localstring + return s # preserve localstring return lu.encode(_sysstr(encoding)) except UnicodeError: - return s.lower() # we don't know how to fold this except in ASCII + return s.lower() # we don't know how to fold this except in ASCII except LookupError as k: raise error.Abort(k, hint="please check your locale settings") + def upper(s): "best-effort encoding-aware case-folding of local string s" try: @@ -392,6 +418,7 @@ except UnicodeDecodeError: return upperfallback(s) + def upperfallback(s): try: if isinstance(s, localstr): @@ -401,13 +428,14 @@ uu = u.upper() if u == uu: - return s # preserve localstring + return s # preserve localstring return uu.encode(_sysstr(encoding)) except UnicodeError: - return s.upper() # we don't know how to fold this except in ASCII + return s.upper() # we don't know how to fold this except in ASCII except LookupError as k: raise error.Abort(k, hint="please check your locale settings") + class normcasespecs(object): '''what a platform's normcase does to ASCII strings @@ -419,10 +447,12 @@ other: the fallback function should always be called This should be kept in sync with normcase_spec in util.h.''' + lower = -1 upper = 1 other = 0 + def jsonescape(s, paranoid=False): '''returns a string suitable for JSON @@ -475,6 +505,7 @@ pass return charencodepure.jsonescapeu8fallback(u8chars, paranoid) + # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 # bytes are mapped to that range. if pycompat.ispy3: @@ -484,6 +515,7 @@ _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] + def getutf8char(s, pos): '''get the next full utf-8 character in the given string, starting at pos @@ -492,15 +524,16 @@ ''' # find how many bytes to attempt decoding from first nibble - l = _utf8len[ord(s[pos:pos + 1]) >> 4] - if not l: # ascii - return s[pos:pos + 1] + l = _utf8len[ord(s[pos : pos + 1]) >> 4] + if not l: # ascii + return s[pos : pos + 1] - c = s[pos:pos + l] + c = s[pos : pos + l] # validate with attempted decode c.decode("utf-8", _utf8strict) return c + def toutf8b(s): '''convert a local, possibly-binary string into UTF-8b @@ -558,16 +591,17 @@ c = getutf8char(s, pos) if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": # have to re-escape existing U+DCxx characters - c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict) + c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict) pos += 1 else: pos += len(c) except UnicodeDecodeError: - c = unichr(0xdc00 + ord(s[pos])).encode('utf-8', _utf8strict) + c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict) pos += 1 r += c return r + def fromutf8b(s): '''Given a UTF-8b string, return a local, possibly-binary string. @@ -611,6 +645,6 @@ pos += len(c) # unescape U+DCxx characters if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf": - c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xff) + c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xFF) r += c return r