Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/pycompat.py @ 43380:579672b347d2 stable
py3: define and use json.loads polyfill
Python 3.5's json.loads() requires a str. Only Python 3.6+
supports passing a bytes or bytearray.
This commit implements a json.loads() polyfill on Python 3.5
so that we can use bytes. The added function to detect encodings
comes verbatim from Python 3.7.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 02 Nov 2019 12:09:35 -0700 |
parents | 8ff1ecfadcd1 |
children | 93f74a7d3f07 |
comparison
equal
deleted
inserted
replaced
43379:bb509f39d387 | 43380:579672b347d2 |
---|---|
10 | 10 |
11 from __future__ import absolute_import | 11 from __future__ import absolute_import |
12 | 12 |
13 import getopt | 13 import getopt |
14 import inspect | 14 import inspect |
15 import json | |
15 import os | 16 import os |
16 import shlex | 17 import shlex |
17 import sys | 18 import sys |
18 import tempfile | 19 import tempfile |
19 | 20 |
86 return _rapply(f, xs) | 87 return _rapply(f, xs) |
87 | 88 |
88 | 89 |
89 if ispy3: | 90 if ispy3: |
90 import builtins | 91 import builtins |
92 import codecs | |
91 import functools | 93 import functools |
92 import io | 94 import io |
93 import struct | 95 import struct |
94 | 96 |
95 fsencode = os.fsencode | 97 fsencode = os.fsencode |
337 ret = shlex.split(s.decode('latin-1'), comments, posix) | 339 ret = shlex.split(s.decode('latin-1'), comments, posix) |
338 return [a.encode('latin-1') for a in ret] | 340 return [a.encode('latin-1') for a in ret] |
339 | 341 |
340 iteritems = lambda x: x.items() | 342 iteritems = lambda x: x.items() |
341 itervalues = lambda x: x.values() | 343 itervalues = lambda x: x.values() |
344 | |
345 # Python 3.5's json.load and json.loads require str. We polyfill its | |
346 # code for detecting encoding from bytes. | |
347 if sys.version_info[0:2] < (3, 6): | |
348 | |
349 def _detect_encoding(b): | |
350 bstartswith = b.startswith | |
351 if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): | |
352 return 'utf-32' | |
353 if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): | |
354 return 'utf-16' | |
355 if bstartswith(codecs.BOM_UTF8): | |
356 return 'utf-8-sig' | |
357 | |
358 if len(b) >= 4: | |
359 if not b[0]: | |
360 # 00 00 -- -- - utf-32-be | |
361 # 00 XX -- -- - utf-16-be | |
362 return 'utf-16-be' if b[1] else 'utf-32-be' | |
363 if not b[1]: | |
364 # XX 00 00 00 - utf-32-le | |
365 # XX 00 00 XX - utf-16-le | |
366 # XX 00 XX -- - utf-16-le | |
367 return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' | |
368 elif len(b) == 2: | |
369 if not b[0]: | |
370 # 00 XX - utf-16-be | |
371 return 'utf-16-be' | |
372 if not b[1]: | |
373 # XX 00 - utf-16-le | |
374 return 'utf-16-le' | |
375 # default | |
376 return 'utf-8' | |
377 | |
378 def json_loads(s, *args, **kwargs): | |
379 if isinstance(s, (bytes, bytearray)): | |
380 s = s.decode(_detect_encoding(s), 'surrogatepass') | |
381 | |
382 return json.loads(s, *args, **kwargs) | |
383 | |
384 else: | |
385 json_loads = json.loads | |
342 | 386 |
343 else: | 387 else: |
344 import cStringIO | 388 import cStringIO |
345 | 389 |
346 xrange = xrange | 390 xrange = xrange |
415 ziplist = zip | 459 ziplist = zip |
416 rawinput = raw_input | 460 rawinput = raw_input |
417 getargspec = inspect.getargspec | 461 getargspec = inspect.getargspec |
418 iteritems = lambda x: x.iteritems() | 462 iteritems = lambda x: x.iteritems() |
419 itervalues = lambda x: x.itervalues() | 463 itervalues = lambda x: x.itervalues() |
464 json_loads = json.loads | |
420 | 465 |
421 isjython = sysplatform.startswith(b'java') | 466 isjython = sysplatform.startswith(b'java') |
422 | 467 |
423 isdarwin = sysplatform.startswith(b'darwin') | 468 isdarwin = sysplatform.startswith(b'darwin') |
424 islinux = sysplatform.startswith(b'linux') | 469 islinux = sysplatform.startswith(b'linux') |