changeset 33924 | b9101467d88b |
parent 33852 | f18b11534274 |
child 33925 | 2c37f9dabc32 |
33923:e6d421566906 | 33924:b9101467d88b |
---|---|
5 # This software may be used and distributed according to the terms of the |
5 # This software may be used and distributed according to the terms of the |
6 # GNU General Public License version 2 or any later version. |
6 # GNU General Public License version 2 or any later version. |
7 |
7 |
8 from __future__ import absolute_import |
8 from __future__ import absolute_import |
9 |
9 |
10 import array |
|
11 import io |
10 import io |
12 import locale |
11 import locale |
13 import os |
12 import os |
14 import unicodedata |
13 import unicodedata |
15 |
14 |
17 error, |
16 error, |
18 policy, |
17 policy, |
19 pycompat, |
18 pycompat, |
20 ) |
19 ) |
21 |
20 |
21 from .pure import ( |
|
22 charencode as charencodepure, |
|
23 ) |
|
24 |
|
22 charencode = policy.importmod(r'charencode') |
25 charencode = policy.importmod(r'charencode') |
23 |
26 |
24 asciilower = charencode.asciilower |
27 asciilower = charencode.asciilower |
25 asciiupper = charencode.asciiupper |
28 asciiupper = charencode.asciiupper |
29 _jsonescapeu8fast = charencodepure.jsonescapeu8fast # TODO: no "pure" |
|
26 |
30 |
27 _sysstr = pycompat.sysstr |
31 _sysstr = pycompat.sysstr |
28 |
32 |
29 if pycompat.ispy3: |
33 if pycompat.ispy3: |
30 unichr = chr |
34 unichr = chr |
381 This should be kept in sync with normcase_spec in util.h.''' |
385 This should be kept in sync with normcase_spec in util.h.''' |
382 lower = -1 |
386 lower = -1 |
383 upper = 1 |
387 upper = 1 |
384 other = 0 |
388 other = 0 |
385 |
389 |
386 _jsonmap = [] |
|
387 _jsonmap.extend("\\u%04x" % x for x in range(32)) |
|
388 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) |
|
389 _jsonmap.append('\\u007f') |
|
390 _jsonmap[0x09] = '\\t' |
|
391 _jsonmap[0x0a] = '\\n' |
|
392 _jsonmap[0x22] = '\\"' |
|
393 _jsonmap[0x5c] = '\\\\' |
|
394 _jsonmap[0x08] = '\\b' |
|
395 _jsonmap[0x0c] = '\\f' |
|
396 _jsonmap[0x0d] = '\\r' |
|
397 _paranoidjsonmap = _jsonmap[:] |
|
398 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>") |
|
399 _paranoidjsonmap[0x3e] = '\\u003e' # '>' |
|
400 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) |
|
401 |
|
402 def jsonescape(s, paranoid=False): |
390 def jsonescape(s, paranoid=False): |
403 '''returns a string suitable for JSON |
391 '''returns a string suitable for JSON |
404 |
392 |
405 JSON is problematic for us because it doesn't support non-Unicode |
393 JSON is problematic for us because it doesn't support non-Unicode |
406 bytes. To deal with this, we take the following approach: |
394 bytes. To deal with this, we take the following approach: |
438 'non-BMP: \\\\ud834\\\\udd1e' |
426 'non-BMP: \\\\ud834\\\\udd1e' |
439 >>> jsonescape('<foo@example.org>', paranoid=True) |
427 >>> jsonescape('<foo@example.org>', paranoid=True) |
440 '\\\\u003cfoo@example.org\\\\u003e' |
428 '\\\\u003cfoo@example.org\\\\u003e' |
441 ''' |
429 ''' |
442 |
430 |
443 if paranoid: |
|
444 jm = _paranoidjsonmap |
|
445 else: |
|
446 jm = _jsonmap |
|
447 |
|
448 u8chars = toutf8b(s) |
431 u8chars = toutf8b(s) |
449 try: |
432 try: |
450 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path |
433 return _jsonescapeu8fast(u8chars, paranoid) |
451 except IndexError: |
434 except ValueError: |
452 pass |
435 pass |
453 # non-BMP char is represented as UTF-16 surrogate pair |
436 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) |
454 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16')) |
|
455 u16codes.pop(0) # drop BOM |
|
456 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes) |
|
457 |
437 |
458 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
438 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
459 |
439 |
460 def getutf8char(s, pos): |
440 def getutf8char(s, pos): |
461 '''get the next full utf-8 character in the given string, starting at pos |
441 '''get the next full utf-8 character in the given string, starting at pos |