mercurial-scm/hg: comparison mercurial/pycompat.py

equal deleted inserted replaced

-:df56e6bd37f6
+:79009cca491e
 """
 from __future__ import absolute_import
 import builtins
+import codecs
 import concurrent.futures as futures
+import functools
 import getopt
 import http.client as httplib
 import http.cookiejar as cookielib
 import inspect
+import io
 import json
 import os
 import pickle
 import queue
 import shlex
 import socketserver
+import struct
 import sys
 import tempfile
 import xmlrpc.client as xmlrpclib
 ispy3 = sys.version_info[0] >= 3
 ispypy = '__pypy__' in sys.builtin_module_names
 TYPE_CHECKING = False
 # fast path mainly for py2
 return xs
 return _rapply(f, xs)
-if ispy3:
+if os.name == r'nt' and sys.version_info >= (3, 6):
-import builtins
+# MBCS (or ANSI) filesystem encoding must be used as before.
-import codecs
+# Otherwise non-ASCII filenames in existing repositories would be
-import functools
+# corrupted.
-import io
+# This must be set once prior to any fsencode/fsdecode calls.
-import struct
+sys._enablelegacywindowsfsencoding()  # pytype: disable=module-attr
-if os.name == r'nt' and sys.version_info >= (3, 6):
+fsencode = os.fsencode
-# MBCS (or ANSI) filesystem encoding must be used as before.
+fsdecode = os.fsdecode
-# Otherwise non-ASCII filenames in existing repositories would be
+oscurdir = os.curdir.encode('ascii')
-# corrupted.
+oslinesep = os.linesep.encode('ascii')
-# This must be set once prior to any fsencode/fsdecode calls.
+osname = os.name.encode('ascii')
-sys._enablelegacywindowsfsencoding()  # pytype: disable=module-attr
+ospathsep = os.pathsep.encode('ascii')
+ospardir = os.pardir.encode('ascii')
-fsencode = os.fsencode
+ossep = os.sep.encode('ascii')
-fsdecode = os.fsdecode
+osaltsep = os.altsep
-oscurdir = os.curdir.encode('ascii')
+if osaltsep:
-oslinesep = os.linesep.encode('ascii')
+osaltsep = osaltsep.encode('ascii')
-osname = os.name.encode('ascii')
+osdevnull = os.devnull.encode('ascii')
-ospathsep = os.pathsep.encode('ascii')
-ospardir = os.pardir.encode('ascii')
+sysplatform = sys.platform.encode('ascii')
-ossep = os.sep.encode('ascii')
+sysexecutable = sys.executable
-osaltsep = os.altsep
+if sysexecutable:
-if osaltsep:
+sysexecutable = os.fsencode(sysexecutable)
-osaltsep = osaltsep.encode('ascii')
+bytesio = io.BytesIO
-osdevnull = os.devnull.encode('ascii')
+# TODO deprecate stringio name, as it is a lie on Python 3.
+stringio = bytesio
-sysplatform = sys.platform.encode('ascii')
-sysexecutable = sys.executable
-if sysexecutable:
+def maplist(*args):
-sysexecutable = os.fsencode(sysexecutable)
+return list(map(*args))
-bytesio = io.BytesIO
-# TODO deprecate stringio name, as it is a lie on Python 3.
-stringio = bytesio
+def rangelist(*args):
+return list(range(*args))
-def maplist(*args):
-return list(map(*args))
+def ziplist(*args):
-def rangelist(*args):
+return list(zip(*args))
-return list(range(*args))
-def ziplist(*args):
+rawinput = input
-return list(zip(*args))
+getargspec = inspect.getfullargspec
-rawinput = input
+long = int
-getargspec = inspect.getfullargspec
+if getattr(sys, 'argv', None) is not None:
-long = int
+# On POSIX, the char** argv array is converted to Python str using
+# Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
-if getattr(sys, 'argv', None) is not None:
+# isn't directly callable from Python code. In practice, os.fsencode()
-# On POSIX, the char** argv array is converted to Python str using
+# can be used instead (this is recommended by Python's documentation
-# Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
+# for sys.argv).
-# isn't directly callable from Python code. In practice, os.fsencode()
+#
-# can be used instead (this is recommended by Python's documentation
+# On Windows, the wchar_t **argv is passed into the interpreter as-is.
-# for sys.argv).
+# Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
-#
+# there's an additional wrinkle. What we really want to access is the
-# On Windows, the wchar_t **argv is passed into the interpreter as-is.
+# ANSI codepage representation of the arguments, as this is what
-# Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
+# `int main()` would receive if Python 3 didn't define `int wmain()`
-# there's an additional wrinkle. What we really want to access is the
+# (this is how Python 2 worked). To get that, we encode with the mbcs
-# ANSI codepage representation of the arguments, as this is what
+# encoding, which will pass CP_ACP to the underlying Windows API to
-# `int main()` would receive if Python 3 didn't define `int wmain()`
+# produce bytes.
-# (this is how Python 2 worked). To get that, we encode with the mbcs
+if os.name == r'nt':
-# encoding, which will pass CP_ACP to the underlying Windows API to
+sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
-# produce bytes.
+else:
-if os.name == r'nt':
+sysargv = [fsencode(a) for a in sys.argv]
-sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
-else:
+bytechr = struct.Struct('>B').pack
-sysargv = [fsencode(a) for a in sys.argv]
+byterepr = b'%r'.__mod__
-bytechr = struct.Struct('>B').pack
-byterepr = b'%r'.__mod__
+class bytestr(bytes):
+"""A bytes which mostly acts as a Python 2 str
-class bytestr(bytes):
-"""A bytes which mostly acts as a Python 2 str
+>>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
+('', 'foo', 'ascii', '1')
->>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
+>>> s = bytestr(b'foo')
-('', 'foo', 'ascii', '1')
+>>> assert s is bytestr(s)
->>> s = bytestr(b'foo')
->>> assert s is bytestr(s)
+__bytes__() should be called if provided:
-__bytes__() should be called if provided:
+>>> class bytesable(object):
+...     def __bytes__(self):
->>> class bytesable(object):
+...         return b'bytes'
-...     def __bytes__(self):
+>>> bytestr(bytesable())
-...         return b'bytes'
+'bytes'
->>> bytestr(bytesable())
-'bytes'
+There's no implicit conversion from non-ascii str as its encoding is
+unknown:
-There's no implicit conversion from non-ascii str as its encoding is
-unknown:
+>>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
+Traceback (most recent call last):
->>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
+...
-Traceback (most recent call last):
+UnicodeEncodeError: ...
-...
-UnicodeEncodeError: ...
+Comparison between bytestr and bytes should work:
-Comparison between bytestr and bytes should work:
+>>> assert bytestr(b'foo') == b'foo'
+>>> assert b'foo' == bytestr(b'foo')
->>> assert bytestr(b'foo') == b'foo'
+>>> assert b'f' in bytestr(b'foo')
->>> assert b'foo' == bytestr(b'foo')
+>>> assert bytestr(b'f') in b'foo'
->>> assert b'f' in bytestr(b'foo')
->>> assert bytestr(b'f') in b'foo'
+Sliced elements should be bytes, not integer:
-Sliced elements should be bytes, not integer:
+>>> s[1], s[:2]
+(b'o', b'fo')
->>> s[1], s[:2]
+>>> list(s), list(reversed(s))
-(b'o', b'fo')
+([b'f', b'o', b'o'], [b'o', b'o', b'f'])
->>> list(s), list(reversed(s))
-([b'f', b'o', b'o'], [b'o', b'o', b'f'])
+As bytestr type isn't propagated across operations, you need to cast
+bytes to bytestr explicitly:
-As bytestr type isn't propagated across operations, you need to cast
-bytes to bytestr explicitly:
+>>> s = bytestr(b'foo').upper()
+>>> t = bytestr(s)
->>> s = bytestr(b'foo').upper()
+>>> s[0], t[0]
->>> t = bytestr(s)
+(70, b'F')
->>> s[0], t[0]
-(70, b'F')
+Be careful to not pass a bytestr object to a function which expects
+bytearray-like behavior.
-Be careful to not pass a bytestr object to a function which expects
-bytearray-like behavior.
+>>> t = bytes(t)  # cast to bytes
+>>> assert type(t) is bytes
->>> t = bytes(t)  # cast to bytes
+"""
->>> assert type(t) is bytes
-"""
+# Trick pytype into not demanding Iterable[int] be passed to __new__(),
+# since the appropriate bytes format is done internally.
-# Trick pytype into not demanding Iterable[int] be passed to __new__(),
+#
-# since the appropriate bytes format is done internally.
+# https://github.com/google/pytype/issues/500
-#
+if TYPE_CHECKING:
-# https://github.com/google/pytype/issues/500
-if TYPE_CHECKING:
+def __init__(self, s=b''):
+pass
-def __init__(self, s=b''):
-pass
+def __new__(cls, s=b''):
+if isinstance(s, bytestr):
-def __new__(cls, s=b''):
-if isinstance(s, bytestr):
-return s
-if not isinstance(
-s, (bytes, bytearray)
-) and not hasattr(  # hasattr-py3-only
-s, u'__bytes__'
-):
-s = str(s).encode('ascii')
-return bytes.__new__(cls, s)
-def __getitem__(self, key):
-s = bytes.__getitem__(self, key)
-if not isinstance(s, bytes):
-s = bytechr(s)
 return s
+if not isinstance(
-def __iter__(self):
+s, (bytes, bytearray)
-return iterbytestr(bytes.__iter__(self))
+) and not hasattr(  # hasattr-py3-only
+s, u'__bytes__'
-def __repr__(self):
+):
-return bytes.__repr__(self)[1:]  # drop b''
+s = str(s).encode('ascii')
+return bytes.__new__(cls, s)
-def iterbytestr(s):
-"""Iterate bytes as if it were a str object of Python 2"""
+def __getitem__(self, key):
-return map(bytechr, s)
+s = bytes.__getitem__(self, key)
+if not isinstance(s, bytes):
-def maybebytestr(s):
+s = bytechr(s)
-"""Promote bytes to bytestr"""
-if isinstance(s, bytes):
-return bytestr(s)
 return s
-def sysbytes(s):
+def __iter__(self):
-"""Convert an internal str (e.g. keyword, __doc__) back to bytes
+return iterbytestr(bytes.__iter__(self))
-This never raises UnicodeEncodeError, but only ASCII characters
+def __repr__(self):
-can be round-trip by sysstr(sysbytes(s)).
+return bytes.__repr__(self)[1:]  # drop b''
-"""
-if isinstance(s, bytes):
-return s
+def iterbytestr(s):
-return s.encode('utf-8')
+"""Iterate bytes as if it were a str object of Python 2"""
+return map(bytechr, s)
-def sysstr(s):
-"""Return a keyword str to be passed to Python functions such as
-getattr() and str.encode()
+def maybebytestr(s):
+"""Promote bytes to bytestr"""
-This never raises UnicodeDecodeError. Non-ascii characters are
+if isinstance(s, bytes):
-considered invalid and mapped to arbitrary but unique code points
+return bytestr(s)
-such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
+return s
-"""
-if isinstance(s, builtins.str):
-return s
+def sysbytes(s):
-return s.decode('latin-1')
+"""Convert an internal str (e.g. keyword, __doc__) back to bytes
-def strurl(url):
+This never raises UnicodeEncodeError, but only ASCII characters
-"""Converts a bytes url back to str"""
+can be round-trip by sysstr(sysbytes(s)).
-if isinstance(url, bytes):
+"""
-return url.decode('ascii')
+if isinstance(s, bytes):
-return url
+return s
+return s.encode('utf-8')
-def bytesurl(url):
-"""Converts a str url to bytes by encoding in ascii"""
-if isinstance(url, str):
+def sysstr(s):
-return url.encode('ascii')
+"""Return a keyword str to be passed to Python functions such as
-return url
+getattr() and str.encode()
-def raisewithtb(exc, tb):
+This never raises UnicodeDecodeError. Non-ascii characters are
-"""Raise exception with the given traceback"""
+considered invalid and mapped to arbitrary but unique code points
-raise exc.with_traceback(tb)
+such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
+"""
-def getdoc(obj):
+if isinstance(s, builtins.str):
-"""Get docstring as bytes; may be None so gettext() won't confuse it
+return s
-with _('')"""
+return s.decode('latin-1')
-doc = getattr(obj, '__doc__', None)
-if doc is None:
-return doc
+def strurl(url):
-return sysbytes(doc)
+"""Converts a bytes url back to str"""
+if isinstance(url, bytes):
-def _wrapattrfunc(f):
+return url.decode('ascii')
-@functools.wraps(f)
+return url
-def w(object, name, *args):
-return f(object, sysstr(name), *args)
+def bytesurl(url):
-return w
+"""Converts a str url to bytes by encoding in ascii"""
+if isinstance(url, str):
-# these wrappers are automagically imported by hgloader
+return url.encode('ascii')
-delattr = _wrapattrfunc(builtins.delattr)
+return url
-getattr = _wrapattrfunc(builtins.getattr)
-hasattr = _wrapattrfunc(builtins.hasattr)
-setattr = _wrapattrfunc(builtins.setattr)
+def raisewithtb(exc, tb):
-xrange = builtins.range
+"""Raise exception with the given traceback"""
-unicode = str
+raise exc.with_traceback(tb)
-def open(name, mode=b'r', buffering=-1, encoding=None):
-return builtins.open(name, sysstr(mode), buffering, encoding)
+def getdoc(obj):
+"""Get docstring as bytes; may be None so gettext() won't confuse it
-safehasattr = _wrapattrfunc(builtins.hasattr)
+with _('')"""
+doc = getattr(obj, '__doc__', None)
-def _getoptbwrapper(orig, args, shortlist, namelist):
+if doc is None:
-"""
+return doc
-Takes bytes arguments, converts them to unicode, pass them to
+return sysbytes(doc)
-getopt.getopt(), convert the returned values back to bytes and then
-return them for Python 3 compatibility as getopt.getopt() don't accepts
-bytes on Python 3.
+def _wrapattrfunc(f):
-"""
+@functools.wraps(f)
-args = [a.decode('latin-1') for a in args]
+def w(object, name, *args):
-shortlist = shortlist.decode('latin-1')
+return f(object, sysstr(name), *args)
-namelist = [a.decode('latin-1') for a in namelist]
-opts, args = orig(args, shortlist, namelist)
+return w
-opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
-args = [a.encode('latin-1') for a in args]
-return opts, args
+# these wrappers are automagically imported by hgloader
+delattr = _wrapattrfunc(builtins.delattr)
-def strkwargs(dic):
+getattr = _wrapattrfunc(builtins.getattr)
-"""
+hasattr = _wrapattrfunc(builtins.hasattr)
-Converts the keys of a python dictonary to str i.e. unicodes so that
+setattr = _wrapattrfunc(builtins.setattr)
-they can be passed as keyword arguments as dictionaries with bytes keys
+xrange = builtins.range
-can't be passed as keyword arguments to functions on Python 3.
+unicode = str
-"""
-dic = {k.decode('latin-1'): v for k, v in dic.items()}
-return dic
+def open(name, mode=b'r', buffering=-1, encoding=None):
+return builtins.open(name, sysstr(mode), buffering, encoding)
-def byteskwargs(dic):
-"""
-Converts keys of python dictionaries to bytes as they were converted to
+safehasattr = _wrapattrfunc(builtins.hasattr)
-str to pass that dictonary as a keyword argument on Python 3.
-"""
-dic = {k.encode('latin-1'): v for k, v in dic.items()}
+def _getoptbwrapper(orig, args, shortlist, namelist):
-return dic
+"""
+Takes bytes arguments, converts them to unicode, pass them to
-# TODO: handle shlex.shlex().
+getopt.getopt(), convert the returned values back to bytes and then
-def shlexsplit(s, comments=False, posix=True):
+return them for Python 3 compatibility as getopt.getopt() don't accepts
-"""
+bytes on Python 3.
-Takes bytes argument, convert it to str i.e. unicodes, pass that into
+"""
-shlex.split(), convert the returned value to bytes and return that for
+args = [a.decode('latin-1') for a in args]
-Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
+shortlist = shortlist.decode('latin-1')
-"""
+namelist = [a.decode('latin-1') for a in namelist]
-ret = shlex.split(s.decode('latin-1'), comments, posix)
+opts, args = orig(args, shortlist, namelist)
-return [a.encode('latin-1') for a in ret]
+opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
+args = [a.encode('latin-1') for a in args]
-iteritems = lambda x: x.items()
+return opts, args
-itervalues = lambda x: x.values()
-# Python 3.5's json.load and json.loads require str. We polyfill its
+def strkwargs(dic):
-# code for detecting encoding from bytes.
+"""
-if sys.version_info[0:2] < (3, 6):
+Converts the keys of a python dictonary to str i.e. unicodes so that
+they can be passed as keyword arguments as dictionaries with bytes keys
-def _detect_encoding(b):
+can't be passed as keyword arguments to functions on Python 3.
-bstartswith = b.startswith
+"""
-if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+dic = {k.decode('latin-1'): v for k, v in dic.items()}
-return 'utf-32'
+return dic
-if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
-return 'utf-16'
-if bstartswith(codecs.BOM_UTF8):
+def byteskwargs(dic):
-return 'utf-8-sig'
+"""
+Converts keys of python dictionaries to bytes as they were converted to
-if len(b) >= 4:
+str to pass that dictonary as a keyword argument on Python 3.
-if not b[0]:
+"""
-# 00 00 -- -- - utf-32-be
+dic = {k.encode('latin-1'): v for k, v in dic.items()}
-# 00 XX -- -- - utf-16-be
+return dic
-return 'utf-16-be' if b[1] else 'utf-32-be'
-if not b[1]:
-# XX 00 00 00 - utf-32-le
+# TODO: handle shlex.shlex().
-# XX 00 00 XX - utf-16-le
+def shlexsplit(s, comments=False, posix=True):
-# XX 00 XX -- - utf-16-le
+"""
-return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+Takes bytes argument, convert it to str i.e. unicodes, pass that into
-elif len(b) == 2:
+shlex.split(), convert the returned value to bytes and return that for
-if not b[0]:
+Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
-# 00 XX - utf-16-be
+"""
-return 'utf-16-be'
+ret = shlex.split(s.decode('latin-1'), comments, posix)
-if not b[1]:
+return [a.encode('latin-1') for a in ret]
-# XX 00 - utf-16-le
-return 'utf-16-le'
-# default
+iteritems = lambda x: x.items()
-return 'utf-8'
+itervalues = lambda x: x.values()
-def json_loads(s, *args, **kwargs):
+# Python 3.5's json.load and json.loads require str. We polyfill its
-if isinstance(s, (bytes, bytearray)):
+# code for detecting encoding from bytes.
-s = s.decode(_detect_encoding(s), 'surrogatepass')
+if sys.version_info[0:2] < (3, 6):
-return json.loads(s, *args, **kwargs)
+def _detect_encoding(b):
+bstartswith = b.startswith
-else:
+if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
-json_loads = json.loads
+return 'utf-32'
+if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+return 'utf-16'
+if bstartswith(codecs.BOM_UTF8):
+return 'utf-8-sig'
+if len(b) >= 4:
+if not b[0]:
+# 00 00 -- -- - utf-32-be
+# 00 XX -- -- - utf-16-be
+return 'utf-16-be' if b[1] else 'utf-32-be'
+if not b[1]:
+# XX 00 00 00 - utf-32-le
+# XX 00 00 XX - utf-16-le
+# XX 00 XX -- - utf-16-le
+return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+elif len(b) == 2:
+if not b[0]:
+# 00 XX - utf-16-be
+return 'utf-16-be'
+if not b[1]:
+# XX 00 - utf-16-le
+return 'utf-16-le'
+# default
+return 'utf-8'
+def json_loads(s, *args, **kwargs):
+if isinstance(s, (bytes, bytearray)):
+s = s.decode(_detect_encoding(s), 'surrogatepass')
+return json.loads(s, *args, **kwargs)
 else:
-import cStringIO
-xrange = xrange
-unicode = unicode
-bytechr = chr
-byterepr = repr
-bytestr = str
-iterbytestr = iter
-maybebytestr = identity
-sysbytes = identity
-sysstr = identity
-strurl = identity
-bytesurl = identity
-open = open
-delattr = delattr
-getattr = getattr
-hasattr = hasattr
-setattr = setattr
-# this can't be parsed on Python 3
-exec(b'def raisewithtb(exc, tb):\n    raise exc, None, tb\n')
-def fsencode(filename):
-"""
-Partial backport from os.py in Python 3, which only accepts bytes.
-In Python 2, our paths should only ever be bytes, a unicode path
-indicates a bug.
-"""
-if isinstance(filename, str):
-return filename
-else:
-raise TypeError("expect str, not %s" % type(filename).__name__)
-# In Python 2, fsdecode() has a very chance to receive bytes. So it's
-# better not to touch Python 2 part as it's already working fine.
-fsdecode = identity
-def getdoc(obj):
-return getattr(obj, '__doc__', None)
-_notset = object()
-def safehasattr(thing, attr):
-return getattr(thing, attr, _notset) is not _notset
-def _getoptbwrapper(orig, args, shortlist, namelist):
-return orig(args, shortlist, namelist)
-strkwargs = identity
-byteskwargs = identity
-oscurdir = os.curdir
-oslinesep = os.linesep
-osname = os.name
-ospathsep = os.pathsep
-ospardir = os.pardir
-ossep = os.sep
-osaltsep = os.altsep
-osdevnull = os.devnull
-long = long
-if getattr(sys, 'argv', None) is not None:
-sysargv = sys.argv
-sysplatform = sys.platform
-sysexecutable = sys.executable
-shlexsplit = shlex.split
-bytesio = cStringIO.StringIO
-stringio = bytesio
-maplist = map
-rangelist = range
-ziplist = zip
-rawinput = raw_input
-getargspec = inspect.getargspec
-iteritems = lambda x: x.iteritems()
-itervalues = lambda x: x.itervalues()
 json_loads = json.loads
 isjython = sysplatform.startswith(b'java')
 isdarwin = sysplatform.startswith(b'darwin')

changeset 48871	79009cca491e
parent 48869	57b58413dad1
child 48873	5aafc3c5bdec