Mercurial > public > mercurial-scm > hg
comparison mercurial/encoding.py @ 28066:d1cc07123243
encoding: change jsonmap to a list indexed by code point
This is slightly faster and convenient to implement a paranoid escaping.
$ python -m timeit \
-s 'from mercurial import encoding; data = str(bytearray(xrange(128)))' \
'encoding.jsonescape(data)'
original: 100000 loops, best of 3: 15.1 usec per loop
this patch: 100000 loops, best of 3: 13.7 usec per loop
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 30 Jan 2016 19:41:34 +0900 |
parents | ffa599f3f503 |
children | 69a02b1e947c |
comparison
equal
deleted
inserted
replaced
28065:6b1fc09c699a | 28066:d1cc07123243 |
---|---|
376 This should be kept in sync with normcase_spec in util.h.''' | 376 This should be kept in sync with normcase_spec in util.h.''' |
377 lower = -1 | 377 lower = -1 |
378 upper = 1 | 378 upper = 1 |
379 other = 0 | 379 other = 0 |
380 | 380 |
381 _jsonmap = {} | 381 _jsonmap = [] |
382 | 382 |
383 def jsonescape(s): | 383 def jsonescape(s): |
384 '''returns a string suitable for JSON | 384 '''returns a string suitable for JSON |
385 | 385 |
386 JSON is problematic for us because it doesn't support non-Unicode | 386 JSON is problematic for us because it doesn't support non-Unicode |
406 >>> jsonescape('') | 406 >>> jsonescape('') |
407 '' | 407 '' |
408 ''' | 408 ''' |
409 | 409 |
410 if not _jsonmap: | 410 if not _jsonmap: |
411 for x in xrange(32): | 411 _jsonmap.extend("\\u%04x" % x for x in xrange(32)) |
412 _jsonmap[chr(x)] = "\\u%04x" % x | 412 _jsonmap.extend(chr(x) for x in xrange(32, 256)) |
413 for x in xrange(32, 256): | 413 _jsonmap[0x7f] = '\\u007f' |
414 c = chr(x) | 414 _jsonmap[0x09] = '\\t' |
415 _jsonmap[c] = c | 415 _jsonmap[0x0a] = '\\n' |
416 _jsonmap['\x7f'] = '\\u007f' | 416 _jsonmap[0x22] = '\\"' |
417 _jsonmap['\t'] = '\\t' | 417 _jsonmap[0x5c] = '\\\\' |
418 _jsonmap['\n'] = '\\n' | 418 _jsonmap[0x08] = '\\b' |
419 _jsonmap['\"'] = '\\"' | 419 _jsonmap[0x0c] = '\\f' |
420 _jsonmap['\\'] = '\\\\' | 420 _jsonmap[0x0d] = '\\r' |
421 _jsonmap['\b'] = '\\b' | 421 |
422 _jsonmap['\f'] = '\\f' | 422 return ''.join(_jsonmap[x] for x in bytearray(toutf8b(s))) |
423 _jsonmap['\r'] = '\\r' | |
424 | |
425 return ''.join(_jsonmap[c] for c in toutf8b(s)) | |
426 | 423 |
427 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] | 424 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
428 | 425 |
429 def getutf8char(s, pos): | 426 def getutf8char(s, pos): |
430 '''get the next full utf-8 character in the given string, starting at pos | 427 '''get the next full utf-8 character in the given string, starting at pos |