comparison mercurial/pycompat.py @ 43380:579672b347d2 stable

py3: define and use json.loads polyfill Python 3.5's json.loads() requires a str. Only Python 3.6+ supports passing a bytes or bytearray. This commit implements a json.loads() polyfill on Python 3.5 so that we can use bytes. The added function to detect encodings comes verbatim from Python 3.7.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 02 Nov 2019 12:09:35 -0700
parents 8ff1ecfadcd1
children 93f74a7d3f07
comparison
equal deleted inserted replaced
43379:bb509f39d387 43380:579672b347d2
10 10
11 from __future__ import absolute_import 11 from __future__ import absolute_import
12 12
13 import getopt 13 import getopt
14 import inspect 14 import inspect
15 import json
15 import os 16 import os
16 import shlex 17 import shlex
17 import sys 18 import sys
18 import tempfile 19 import tempfile
19 20
86 return _rapply(f, xs) 87 return _rapply(f, xs)
87 88
88 89
89 if ispy3: 90 if ispy3:
90 import builtins 91 import builtins
92 import codecs
91 import functools 93 import functools
92 import io 94 import io
93 import struct 95 import struct
94 96
95 fsencode = os.fsencode 97 fsencode = os.fsencode
337 ret = shlex.split(s.decode('latin-1'), comments, posix) 339 ret = shlex.split(s.decode('latin-1'), comments, posix)
338 return [a.encode('latin-1') for a in ret] 340 return [a.encode('latin-1') for a in ret]
339 341
340 iteritems = lambda x: x.items() 342 iteritems = lambda x: x.items()
341 itervalues = lambda x: x.values() 343 itervalues = lambda x: x.values()
344
345 # Python 3.5's json.load and json.loads require str. We polyfill its
346 # code for detecting encoding from bytes.
347 if sys.version_info[0:2] < (3, 6):
348
349 def _detect_encoding(b):
350 bstartswith = b.startswith
351 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
352 return 'utf-32'
353 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
354 return 'utf-16'
355 if bstartswith(codecs.BOM_UTF8):
356 return 'utf-8-sig'
357
358 if len(b) >= 4:
359 if not b[0]:
360 # 00 00 -- -- - utf-32-be
361 # 00 XX -- -- - utf-16-be
362 return 'utf-16-be' if b[1] else 'utf-32-be'
363 if not b[1]:
364 # XX 00 00 00 - utf-32-le
365 # XX 00 00 XX - utf-16-le
366 # XX 00 XX -- - utf-16-le
367 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
368 elif len(b) == 2:
369 if not b[0]:
370 # 00 XX - utf-16-be
371 return 'utf-16-be'
372 if not b[1]:
373 # XX 00 - utf-16-le
374 return 'utf-16-le'
375 # default
376 return 'utf-8'
377
378 def json_loads(s, *args, **kwargs):
379 if isinstance(s, (bytes, bytearray)):
380 s = s.decode(_detect_encoding(s), 'surrogatepass')
381
382 return json.loads(s, *args, **kwargs)
383
384 else:
385 json_loads = json.loads
342 386
343 else: 387 else:
344 import cStringIO 388 import cStringIO
345 389
346 xrange = xrange 390 xrange = xrange
415 ziplist = zip 459 ziplist = zip
416 rawinput = raw_input 460 rawinput = raw_input
417 getargspec = inspect.getargspec 461 getargspec = inspect.getargspec
418 iteritems = lambda x: x.iteritems() 462 iteritems = lambda x: x.iteritems()
419 itervalues = lambda x: x.itervalues() 463 itervalues = lambda x: x.itervalues()
464 json_loads = json.loads
420 465
421 isjython = sysplatform.startswith(b'java') 466 isjython = sysplatform.startswith(b'java')
422 467
423 isdarwin = sysplatform.startswith(b'darwin') 468 isdarwin = sysplatform.startswith(b'darwin')
424 islinux = sysplatform.startswith(b'linux') 469 islinux = sysplatform.startswith(b'linux')