mercurial/pycompat.py
changeset 48871 79009cca491e
parent 48869 57b58413dad1
child 48873 5aafc3c5bdec
equal deleted inserted replaced
48870:df56e6bd37f6 48871:79009cca491e
     9 """
     9 """
    10 
    10 
    11 from __future__ import absolute_import
    11 from __future__ import absolute_import
    12 
    12 
    13 import builtins
    13 import builtins
       
    14 import codecs
    14 import concurrent.futures as futures
    15 import concurrent.futures as futures
       
    16 import functools
    15 import getopt
    17 import getopt
    16 import http.client as httplib
    18 import http.client as httplib
    17 import http.cookiejar as cookielib
    19 import http.cookiejar as cookielib
    18 import inspect
    20 import inspect
       
    21 import io
    19 import json
    22 import json
    20 import os
    23 import os
    21 import pickle
    24 import pickle
    22 import queue
    25 import queue
    23 import shlex
    26 import shlex
    24 import socketserver
    27 import socketserver
       
    28 import struct
    25 import sys
    29 import sys
    26 import tempfile
    30 import tempfile
    27 import xmlrpc.client as xmlrpclib
    31 import xmlrpc.client as xmlrpclib
       
    32 
    28 
    33 
    29 ispy3 = sys.version_info[0] >= 3
    34 ispy3 = sys.version_info[0] >= 3
    30 ispypy = '__pypy__' in sys.builtin_module_names
    35 ispypy = '__pypy__' in sys.builtin_module_names
    31 TYPE_CHECKING = False
    36 TYPE_CHECKING = False
    32 
    37 
    80         # fast path mainly for py2
    85         # fast path mainly for py2
    81         return xs
    86         return xs
    82     return _rapply(f, xs)
    87     return _rapply(f, xs)
    83 
    88 
    84 
    89 
    85 if ispy3:
    90 if os.name == r'nt' and sys.version_info >= (3, 6):
    86     import builtins
    91     # MBCS (or ANSI) filesystem encoding must be used as before.
    87     import codecs
    92     # Otherwise non-ASCII filenames in existing repositories would be
    88     import functools
    93     # corrupted.
    89     import io
    94     # This must be set once prior to any fsencode/fsdecode calls.
    90     import struct
    95     sys._enablelegacywindowsfsencoding()  # pytype: disable=module-attr
    91 
    96 
    92     if os.name == r'nt' and sys.version_info >= (3, 6):
    97 fsencode = os.fsencode
    93         # MBCS (or ANSI) filesystem encoding must be used as before.
    98 fsdecode = os.fsdecode
    94         # Otherwise non-ASCII filenames in existing repositories would be
    99 oscurdir = os.curdir.encode('ascii')
    95         # corrupted.
   100 oslinesep = os.linesep.encode('ascii')
    96         # This must be set once prior to any fsencode/fsdecode calls.
   101 osname = os.name.encode('ascii')
    97         sys._enablelegacywindowsfsencoding()  # pytype: disable=module-attr
   102 ospathsep = os.pathsep.encode('ascii')
    98 
   103 ospardir = os.pardir.encode('ascii')
    99     fsencode = os.fsencode
   104 ossep = os.sep.encode('ascii')
   100     fsdecode = os.fsdecode
   105 osaltsep = os.altsep
   101     oscurdir = os.curdir.encode('ascii')
   106 if osaltsep:
   102     oslinesep = os.linesep.encode('ascii')
   107     osaltsep = osaltsep.encode('ascii')
   103     osname = os.name.encode('ascii')
   108 osdevnull = os.devnull.encode('ascii')
   104     ospathsep = os.pathsep.encode('ascii')
   109 
   105     ospardir = os.pardir.encode('ascii')
   110 sysplatform = sys.platform.encode('ascii')
   106     ossep = os.sep.encode('ascii')
   111 sysexecutable = sys.executable
   107     osaltsep = os.altsep
   112 if sysexecutable:
   108     if osaltsep:
   113     sysexecutable = os.fsencode(sysexecutable)
   109         osaltsep = osaltsep.encode('ascii')
   114 bytesio = io.BytesIO
   110     osdevnull = os.devnull.encode('ascii')
   115 # TODO deprecate stringio name, as it is a lie on Python 3.
   111 
   116 stringio = bytesio
   112     sysplatform = sys.platform.encode('ascii')
   117 
   113     sysexecutable = sys.executable
   118 
   114     if sysexecutable:
   119 def maplist(*args):
   115         sysexecutable = os.fsencode(sysexecutable)
   120     return list(map(*args))
   116     bytesio = io.BytesIO
   121 
   117     # TODO deprecate stringio name, as it is a lie on Python 3.
   122 
   118     stringio = bytesio
   123 def rangelist(*args):
   119 
   124     return list(range(*args))
   120     def maplist(*args):
   125 
   121         return list(map(*args))
   126 
   122 
   127 def ziplist(*args):
   123     def rangelist(*args):
   128     return list(zip(*args))
   124         return list(range(*args))
   129 
   125 
   130 
   126     def ziplist(*args):
   131 rawinput = input
   127         return list(zip(*args))
   132 getargspec = inspect.getfullargspec
   128 
   133 
   129     rawinput = input
   134 long = int
   130     getargspec = inspect.getfullargspec
   135 
   131 
   136 if getattr(sys, 'argv', None) is not None:
   132     long = int
   137     # On POSIX, the char** argv array is converted to Python str using
   133 
   138     # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
   134     if getattr(sys, 'argv', None) is not None:
   139     # isn't directly callable from Python code. In practice, os.fsencode()
   135         # On POSIX, the char** argv array is converted to Python str using
   140     # can be used instead (this is recommended by Python's documentation
   136         # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
   141     # for sys.argv).
   137         # isn't directly callable from Python code. In practice, os.fsencode()
   142     #
   138         # can be used instead (this is recommended by Python's documentation
   143     # On Windows, the wchar_t **argv is passed into the interpreter as-is.
   139         # for sys.argv).
   144     # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
   140         #
   145     # there's an additional wrinkle. What we really want to access is the
   141         # On Windows, the wchar_t **argv is passed into the interpreter as-is.
   146     # ANSI codepage representation of the arguments, as this is what
   142         # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
   147     # `int main()` would receive if Python 3 didn't define `int wmain()`
   143         # there's an additional wrinkle. What we really want to access is the
   148     # (this is how Python 2 worked). To get that, we encode with the mbcs
   144         # ANSI codepage representation of the arguments, as this is what
   149     # encoding, which will pass CP_ACP to the underlying Windows API to
   145         # `int main()` would receive if Python 3 didn't define `int wmain()`
   150     # produce bytes.
   146         # (this is how Python 2 worked). To get that, we encode with the mbcs
   151     if os.name == r'nt':
   147         # encoding, which will pass CP_ACP to the underlying Windows API to
   152         sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
   148         # produce bytes.
   153     else:
   149         if os.name == r'nt':
   154         sysargv = [fsencode(a) for a in sys.argv]
   150             sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
   155 
   151         else:
   156 bytechr = struct.Struct('>B').pack
   152             sysargv = [fsencode(a) for a in sys.argv]
   157 byterepr = b'%r'.__mod__
   153 
   158 
   154     bytechr = struct.Struct('>B').pack
   159 
   155     byterepr = b'%r'.__mod__
   160 class bytestr(bytes):
   156 
   161     """A bytes which mostly acts as a Python 2 str
   157     class bytestr(bytes):
   162 
   158         """A bytes which mostly acts as a Python 2 str
   163     >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
   159 
   164     ('', 'foo', 'ascii', '1')
   160         >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
   165     >>> s = bytestr(b'foo')
   161         ('', 'foo', 'ascii', '1')
   166     >>> assert s is bytestr(s)
   162         >>> s = bytestr(b'foo')
   167 
   163         >>> assert s is bytestr(s)
   168     __bytes__() should be called if provided:
   164 
   169 
   165         __bytes__() should be called if provided:
   170     >>> class bytesable(object):
   166 
   171     ...     def __bytes__(self):
   167         >>> class bytesable(object):
   172     ...         return b'bytes'
   168         ...     def __bytes__(self):
   173     >>> bytestr(bytesable())
   169         ...         return b'bytes'
   174     'bytes'
   170         >>> bytestr(bytesable())
   175 
   171         'bytes'
   176     There's no implicit conversion from non-ascii str as its encoding is
   172 
   177     unknown:
   173         There's no implicit conversion from non-ascii str as its encoding is
   178 
   174         unknown:
   179     >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
   175 
   180     Traceback (most recent call last):
   176         >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
   181       ...
   177         Traceback (most recent call last):
   182     UnicodeEncodeError: ...
   178           ...
   183 
   179         UnicodeEncodeError: ...
   184     Comparison between bytestr and bytes should work:
   180 
   185 
   181         Comparison between bytestr and bytes should work:
   186     >>> assert bytestr(b'foo') == b'foo'
   182 
   187     >>> assert b'foo' == bytestr(b'foo')
   183         >>> assert bytestr(b'foo') == b'foo'
   188     >>> assert b'f' in bytestr(b'foo')
   184         >>> assert b'foo' == bytestr(b'foo')
   189     >>> assert bytestr(b'f') in b'foo'
   185         >>> assert b'f' in bytestr(b'foo')
   190 
   186         >>> assert bytestr(b'f') in b'foo'
   191     Sliced elements should be bytes, not integer:
   187 
   192 
   188         Sliced elements should be bytes, not integer:
   193     >>> s[1], s[:2]
   189 
   194     (b'o', b'fo')
   190         >>> s[1], s[:2]
   195     >>> list(s), list(reversed(s))
   191         (b'o', b'fo')
   196     ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
   192         >>> list(s), list(reversed(s))
   197 
   193         ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
   198     As bytestr type isn't propagated across operations, you need to cast
   194 
   199     bytes to bytestr explicitly:
   195         As bytestr type isn't propagated across operations, you need to cast
   200 
   196         bytes to bytestr explicitly:
   201     >>> s = bytestr(b'foo').upper()
   197 
   202     >>> t = bytestr(s)
   198         >>> s = bytestr(b'foo').upper()
   203     >>> s[0], t[0]
   199         >>> t = bytestr(s)
   204     (70, b'F')
   200         >>> s[0], t[0]
   205 
   201         (70, b'F')
   206     Be careful to not pass a bytestr object to a function which expects
   202 
   207     bytearray-like behavior.
   203         Be careful to not pass a bytestr object to a function which expects
   208 
   204         bytearray-like behavior.
   209     >>> t = bytes(t)  # cast to bytes
   205 
   210     >>> assert type(t) is bytes
   206         >>> t = bytes(t)  # cast to bytes
   211     """
   207         >>> assert type(t) is bytes
   212 
   208         """
   213     # Trick pytype into not demanding Iterable[int] be passed to __new__(),
   209 
   214     # since the appropriate bytes format is done internally.
   210         # Trick pytype into not demanding Iterable[int] be passed to __new__(),
   215     #
   211         # since the appropriate bytes format is done internally.
   216     # https://github.com/google/pytype/issues/500
   212         #
   217     if TYPE_CHECKING:
   213         # https://github.com/google/pytype/issues/500
   218 
   214         if TYPE_CHECKING:
   219         def __init__(self, s=b''):
   215 
   220             pass
   216             def __init__(self, s=b''):
   221 
   217                 pass
   222     def __new__(cls, s=b''):
   218 
   223         if isinstance(s, bytestr):
   219         def __new__(cls, s=b''):
       
   220             if isinstance(s, bytestr):
       
   221                 return s
       
   222             if not isinstance(
       
   223                 s, (bytes, bytearray)
       
   224             ) and not hasattr(  # hasattr-py3-only
       
   225                 s, u'__bytes__'
       
   226             ):
       
   227                 s = str(s).encode('ascii')
       
   228             return bytes.__new__(cls, s)
       
   229 
       
   230         def __getitem__(self, key):
       
   231             s = bytes.__getitem__(self, key)
       
   232             if not isinstance(s, bytes):
       
   233                 s = bytechr(s)
       
   234             return s
   224             return s
   235 
   225         if not isinstance(
   236         def __iter__(self):
   226             s, (bytes, bytearray)
   237             return iterbytestr(bytes.__iter__(self))
   227         ) and not hasattr(  # hasattr-py3-only
   238 
   228             s, u'__bytes__'
   239         def __repr__(self):
   229         ):
   240             return bytes.__repr__(self)[1:]  # drop b''
   230             s = str(s).encode('ascii')
   241 
   231         return bytes.__new__(cls, s)
   242     def iterbytestr(s):
   232 
   243         """Iterate bytes as if it were a str object of Python 2"""
   233     def __getitem__(self, key):
   244         return map(bytechr, s)
   234         s = bytes.__getitem__(self, key)
   245 
   235         if not isinstance(s, bytes):
   246     def maybebytestr(s):
   236             s = bytechr(s)
   247         """Promote bytes to bytestr"""
       
   248         if isinstance(s, bytes):
       
   249             return bytestr(s)
       
   250         return s
   237         return s
   251 
   238 
   252     def sysbytes(s):
   239     def __iter__(self):
   253         """Convert an internal str (e.g. keyword, __doc__) back to bytes
   240         return iterbytestr(bytes.__iter__(self))
   254 
   241 
   255         This never raises UnicodeEncodeError, but only ASCII characters
   242     def __repr__(self):
   256         can be round-trip by sysstr(sysbytes(s)).
   243         return bytes.__repr__(self)[1:]  # drop b''
   257         """
   244 
   258         if isinstance(s, bytes):
   245 
   259             return s
   246 def iterbytestr(s):
   260         return s.encode('utf-8')
   247     """Iterate bytes as if it were a str object of Python 2"""
   261 
   248     return map(bytechr, s)
   262     def sysstr(s):
   249 
   263         """Return a keyword str to be passed to Python functions such as
   250 
   264         getattr() and str.encode()
   251 def maybebytestr(s):
   265 
   252     """Promote bytes to bytestr"""
   266         This never raises UnicodeDecodeError. Non-ascii characters are
   253     if isinstance(s, bytes):
   267         considered invalid and mapped to arbitrary but unique code points
   254         return bytestr(s)
   268         such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
   255     return s
   269         """
   256 
   270         if isinstance(s, builtins.str):
   257 
   271             return s
   258 def sysbytes(s):
   272         return s.decode('latin-1')
   259     """Convert an internal str (e.g. keyword, __doc__) back to bytes
   273 
   260 
   274     def strurl(url):
   261     This never raises UnicodeEncodeError, but only ASCII characters
   275         """Converts a bytes url back to str"""
   262     can be round-trip by sysstr(sysbytes(s)).
   276         if isinstance(url, bytes):
   263     """
   277             return url.decode('ascii')
   264     if isinstance(s, bytes):
   278         return url
   265         return s
   279 
   266     return s.encode('utf-8')
   280     def bytesurl(url):
   267 
   281         """Converts a str url to bytes by encoding in ascii"""
   268 
   282         if isinstance(url, str):
   269 def sysstr(s):
   283             return url.encode('ascii')
   270     """Return a keyword str to be passed to Python functions such as
   284         return url
   271     getattr() and str.encode()
   285 
   272 
   286     def raisewithtb(exc, tb):
   273     This never raises UnicodeDecodeError. Non-ascii characters are
   287         """Raise exception with the given traceback"""
   274     considered invalid and mapped to arbitrary but unique code points
   288         raise exc.with_traceback(tb)
   275     such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
   289 
   276     """
   290     def getdoc(obj):
   277     if isinstance(s, builtins.str):
   291         """Get docstring as bytes; may be None so gettext() won't confuse it
   278         return s
   292         with _('')"""
   279     return s.decode('latin-1')
   293         doc = getattr(obj, '__doc__', None)
   280 
   294         if doc is None:
   281 
   295             return doc
   282 def strurl(url):
   296         return sysbytes(doc)
   283     """Converts a bytes url back to str"""
   297 
   284     if isinstance(url, bytes):
   298     def _wrapattrfunc(f):
   285         return url.decode('ascii')
   299         @functools.wraps(f)
   286     return url
   300         def w(object, name, *args):
   287 
   301             return f(object, sysstr(name), *args)
   288 
   302 
   289 def bytesurl(url):
   303         return w
   290     """Converts a str url to bytes by encoding in ascii"""
   304 
   291     if isinstance(url, str):
   305     # these wrappers are automagically imported by hgloader
   292         return url.encode('ascii')
   306     delattr = _wrapattrfunc(builtins.delattr)
   293     return url
   307     getattr = _wrapattrfunc(builtins.getattr)
   294 
   308     hasattr = _wrapattrfunc(builtins.hasattr)
   295 
   309     setattr = _wrapattrfunc(builtins.setattr)
   296 def raisewithtb(exc, tb):
   310     xrange = builtins.range
   297     """Raise exception with the given traceback"""
   311     unicode = str
   298     raise exc.with_traceback(tb)
   312 
   299 
   313     def open(name, mode=b'r', buffering=-1, encoding=None):
   300 
   314         return builtins.open(name, sysstr(mode), buffering, encoding)
   301 def getdoc(obj):
   315 
   302     """Get docstring as bytes; may be None so gettext() won't confuse it
   316     safehasattr = _wrapattrfunc(builtins.hasattr)
   303     with _('')"""
   317 
   304     doc = getattr(obj, '__doc__', None)
   318     def _getoptbwrapper(orig, args, shortlist, namelist):
   305     if doc is None:
   319         """
   306         return doc
   320         Takes bytes arguments, converts them to unicode, pass them to
   307     return sysbytes(doc)
   321         getopt.getopt(), convert the returned values back to bytes and then
   308 
   322         return them for Python 3 compatibility as getopt.getopt() don't accepts
   309 
   323         bytes on Python 3.
   310 def _wrapattrfunc(f):
   324         """
   311     @functools.wraps(f)
   325         args = [a.decode('latin-1') for a in args]
   312     def w(object, name, *args):
   326         shortlist = shortlist.decode('latin-1')
   313         return f(object, sysstr(name), *args)
   327         namelist = [a.decode('latin-1') for a in namelist]
   314 
   328         opts, args = orig(args, shortlist, namelist)
   315     return w
   329         opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
   316 
   330         args = [a.encode('latin-1') for a in args]
   317 
   331         return opts, args
   318 # these wrappers are automagically imported by hgloader
   332 
   319 delattr = _wrapattrfunc(builtins.delattr)
   333     def strkwargs(dic):
   320 getattr = _wrapattrfunc(builtins.getattr)
   334         """
   321 hasattr = _wrapattrfunc(builtins.hasattr)
   335         Converts the keys of a python dictonary to str i.e. unicodes so that
   322 setattr = _wrapattrfunc(builtins.setattr)
   336         they can be passed as keyword arguments as dictionaries with bytes keys
   323 xrange = builtins.range
   337         can't be passed as keyword arguments to functions on Python 3.
   324 unicode = str
   338         """
   325 
   339         dic = {k.decode('latin-1'): v for k, v in dic.items()}
   326 
   340         return dic
   327 def open(name, mode=b'r', buffering=-1, encoding=None):
   341 
   328     return builtins.open(name, sysstr(mode), buffering, encoding)
   342     def byteskwargs(dic):
   329 
   343         """
   330 
   344         Converts keys of python dictionaries to bytes as they were converted to
   331 safehasattr = _wrapattrfunc(builtins.hasattr)
   345         str to pass that dictonary as a keyword argument on Python 3.
   332 
   346         """
   333 
   347         dic = {k.encode('latin-1'): v for k, v in dic.items()}
   334 def _getoptbwrapper(orig, args, shortlist, namelist):
   348         return dic
   335     """
   349 
   336     Takes bytes arguments, converts them to unicode, pass them to
   350     # TODO: handle shlex.shlex().
   337     getopt.getopt(), convert the returned values back to bytes and then
   351     def shlexsplit(s, comments=False, posix=True):
   338     return them for Python 3 compatibility as getopt.getopt() don't accepts
   352         """
   339     bytes on Python 3.
   353         Takes bytes argument, convert it to str i.e. unicodes, pass that into
   340     """
   354         shlex.split(), convert the returned value to bytes and return that for
   341     args = [a.decode('latin-1') for a in args]
   355         Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
   342     shortlist = shortlist.decode('latin-1')
   356         """
   343     namelist = [a.decode('latin-1') for a in namelist]
   357         ret = shlex.split(s.decode('latin-1'), comments, posix)
   344     opts, args = orig(args, shortlist, namelist)
   358         return [a.encode('latin-1') for a in ret]
   345     opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
   359 
   346     args = [a.encode('latin-1') for a in args]
   360     iteritems = lambda x: x.items()
   347     return opts, args
   361     itervalues = lambda x: x.values()
   348 
   362 
   349 
   363     # Python 3.5's json.load and json.loads require str. We polyfill its
   350 def strkwargs(dic):
   364     # code for detecting encoding from bytes.
   351     """
   365     if sys.version_info[0:2] < (3, 6):
   352     Converts the keys of a python dictonary to str i.e. unicodes so that
   366 
   353     they can be passed as keyword arguments as dictionaries with bytes keys
   367         def _detect_encoding(b):
   354     can't be passed as keyword arguments to functions on Python 3.
   368             bstartswith = b.startswith
   355     """
   369             if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
   356     dic = {k.decode('latin-1'): v for k, v in dic.items()}
   370                 return 'utf-32'
   357     return dic
   371             if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
   358 
   372                 return 'utf-16'
   359 
   373             if bstartswith(codecs.BOM_UTF8):
   360 def byteskwargs(dic):
   374                 return 'utf-8-sig'
   361     """
   375 
   362     Converts keys of python dictionaries to bytes as they were converted to
   376             if len(b) >= 4:
   363     str to pass that dictonary as a keyword argument on Python 3.
   377                 if not b[0]:
   364     """
   378                     # 00 00 -- -- - utf-32-be
   365     dic = {k.encode('latin-1'): v for k, v in dic.items()}
   379                     # 00 XX -- -- - utf-16-be
   366     return dic
   380                     return 'utf-16-be' if b[1] else 'utf-32-be'
   367 
   381                 if not b[1]:
   368 
   382                     # XX 00 00 00 - utf-32-le
   369 # TODO: handle shlex.shlex().
   383                     # XX 00 00 XX - utf-16-le
   370 def shlexsplit(s, comments=False, posix=True):
   384                     # XX 00 XX -- - utf-16-le
   371     """
   385                     return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
   372     Takes bytes argument, convert it to str i.e. unicodes, pass that into
   386             elif len(b) == 2:
   373     shlex.split(), convert the returned value to bytes and return that for
   387                 if not b[0]:
   374     Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
   388                     # 00 XX - utf-16-be
   375     """
   389                     return 'utf-16-be'
   376     ret = shlex.split(s.decode('latin-1'), comments, posix)
   390                 if not b[1]:
   377     return [a.encode('latin-1') for a in ret]
   391                     # XX 00 - utf-16-le
   378 
   392                     return 'utf-16-le'
   379 
   393             # default
   380 iteritems = lambda x: x.items()
   394             return 'utf-8'
   381 itervalues = lambda x: x.values()
   395 
   382 
   396         def json_loads(s, *args, **kwargs):
   383 # Python 3.5's json.load and json.loads require str. We polyfill its
   397             if isinstance(s, (bytes, bytearray)):
   384 # code for detecting encoding from bytes.
   398                 s = s.decode(_detect_encoding(s), 'surrogatepass')
   385 if sys.version_info[0:2] < (3, 6):
   399 
   386 
   400             return json.loads(s, *args, **kwargs)
   387     def _detect_encoding(b):
   401 
   388         bstartswith = b.startswith
   402     else:
   389         if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
   403         json_loads = json.loads
   390             return 'utf-32'
       
   391         if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
       
   392             return 'utf-16'
       
   393         if bstartswith(codecs.BOM_UTF8):
       
   394             return 'utf-8-sig'
       
   395 
       
   396         if len(b) >= 4:
       
   397             if not b[0]:
       
   398                 # 00 00 -- -- - utf-32-be
       
   399                 # 00 XX -- -- - utf-16-be
       
   400                 return 'utf-16-be' if b[1] else 'utf-32-be'
       
   401             if not b[1]:
       
   402                 # XX 00 00 00 - utf-32-le
       
   403                 # XX 00 00 XX - utf-16-le
       
   404                 # XX 00 XX -- - utf-16-le
       
   405                 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
       
   406         elif len(b) == 2:
       
   407             if not b[0]:
       
   408                 # 00 XX - utf-16-be
       
   409                 return 'utf-16-be'
       
   410             if not b[1]:
       
   411                 # XX 00 - utf-16-le
       
   412                 return 'utf-16-le'
       
   413         # default
       
   414         return 'utf-8'
       
   415 
       
   416     def json_loads(s, *args, **kwargs):
       
   417         if isinstance(s, (bytes, bytearray)):
       
   418             s = s.decode(_detect_encoding(s), 'surrogatepass')
       
   419 
       
   420         return json.loads(s, *args, **kwargs)
       
   421 
   404 
   422 
   405 else:
   423 else:
   406     import cStringIO
       
   407 
       
   408     xrange = xrange
       
   409     unicode = unicode
       
   410     bytechr = chr
       
   411     byterepr = repr
       
   412     bytestr = str
       
   413     iterbytestr = iter
       
   414     maybebytestr = identity
       
   415     sysbytes = identity
       
   416     sysstr = identity
       
   417     strurl = identity
       
   418     bytesurl = identity
       
   419     open = open
       
   420     delattr = delattr
       
   421     getattr = getattr
       
   422     hasattr = hasattr
       
   423     setattr = setattr
       
   424 
       
   425     # this can't be parsed on Python 3
       
   426     exec(b'def raisewithtb(exc, tb):\n    raise exc, None, tb\n')
       
   427 
       
   428     def fsencode(filename):
       
   429         """
       
   430         Partial backport from os.py in Python 3, which only accepts bytes.
       
   431         In Python 2, our paths should only ever be bytes, a unicode path
       
   432         indicates a bug.
       
   433         """
       
   434         if isinstance(filename, str):
       
   435             return filename
       
   436         else:
       
   437             raise TypeError("expect str, not %s" % type(filename).__name__)
       
   438 
       
   439     # In Python 2, fsdecode() has a very chance to receive bytes. So it's
       
   440     # better not to touch Python 2 part as it's already working fine.
       
   441     fsdecode = identity
       
   442 
       
   443     def getdoc(obj):
       
   444         return getattr(obj, '__doc__', None)
       
   445 
       
   446     _notset = object()
       
   447 
       
   448     def safehasattr(thing, attr):
       
   449         return getattr(thing, attr, _notset) is not _notset
       
   450 
       
   451     def _getoptbwrapper(orig, args, shortlist, namelist):
       
   452         return orig(args, shortlist, namelist)
       
   453 
       
   454     strkwargs = identity
       
   455     byteskwargs = identity
       
   456 
       
   457     oscurdir = os.curdir
       
   458     oslinesep = os.linesep
       
   459     osname = os.name
       
   460     ospathsep = os.pathsep
       
   461     ospardir = os.pardir
       
   462     ossep = os.sep
       
   463     osaltsep = os.altsep
       
   464     osdevnull = os.devnull
       
   465     long = long
       
   466     if getattr(sys, 'argv', None) is not None:
       
   467         sysargv = sys.argv
       
   468     sysplatform = sys.platform
       
   469     sysexecutable = sys.executable
       
   470     shlexsplit = shlex.split
       
   471     bytesio = cStringIO.StringIO
       
   472     stringio = bytesio
       
   473     maplist = map
       
   474     rangelist = range
       
   475     ziplist = zip
       
   476     rawinput = raw_input
       
   477     getargspec = inspect.getargspec
       
   478     iteritems = lambda x: x.iteritems()
       
   479     itervalues = lambda x: x.itervalues()
       
   480     json_loads = json.loads
   424     json_loads = json.loads
   481 
   425 
   482 isjython = sysplatform.startswith(b'java')
   426 isjython = sysplatform.startswith(b'java')
   483 
   427 
   484 isdarwin = sysplatform.startswith(b'darwin')
   428 isdarwin = sysplatform.startswith(b'darwin')