comparison mercurial/encoding.py @ 28066:d1cc07123243

encoding: change jsonmap to a list indexed by code point This is slightly faster and convenient to implement a paranoid escaping. $ python -m timeit \ -s 'from mercurial import encoding; data = str(bytearray(xrange(128)))' \ 'encoding.jsonescape(data)' original: 100000 loops, best of 3: 15.1 usec per loop this patch: 100000 loops, best of 3: 13.7 usec per loop
author Yuya Nishihara <yuya@tcha.org>
date Sat, 30 Jan 2016 19:41:34 +0900
parents ffa599f3f503
children 69a02b1e947c
comparison
equal deleted inserted replaced
28065:6b1fc09c699a 28066:d1cc07123243
376 This should be kept in sync with normcase_spec in util.h.''' 376 This should be kept in sync with normcase_spec in util.h.'''
377 lower = -1 377 lower = -1
378 upper = 1 378 upper = 1
379 other = 0 379 other = 0
380 380
381 _jsonmap = {} 381 _jsonmap = []
382 382
383 def jsonescape(s): 383 def jsonescape(s):
384 '''returns a string suitable for JSON 384 '''returns a string suitable for JSON
385 385
386 JSON is problematic for us because it doesn't support non-Unicode 386 JSON is problematic for us because it doesn't support non-Unicode
406 >>> jsonescape('') 406 >>> jsonescape('')
407 '' 407 ''
408 ''' 408 '''
409 409
410 if not _jsonmap: 410 if not _jsonmap:
411 for x in xrange(32): 411 _jsonmap.extend("\\u%04x" % x for x in xrange(32))
412 _jsonmap[chr(x)] = "\\u%04x" % x 412 _jsonmap.extend(chr(x) for x in xrange(32, 256))
413 for x in xrange(32, 256): 413 _jsonmap[0x7f] = '\\u007f'
414 c = chr(x) 414 _jsonmap[0x09] = '\\t'
415 _jsonmap[c] = c 415 _jsonmap[0x0a] = '\\n'
416 _jsonmap['\x7f'] = '\\u007f' 416 _jsonmap[0x22] = '\\"'
417 _jsonmap['\t'] = '\\t' 417 _jsonmap[0x5c] = '\\\\'
418 _jsonmap['\n'] = '\\n' 418 _jsonmap[0x08] = '\\b'
419 _jsonmap['\"'] = '\\"' 419 _jsonmap[0x0c] = '\\f'
420 _jsonmap['\\'] = '\\\\' 420 _jsonmap[0x0d] = '\\r'
421 _jsonmap['\b'] = '\\b' 421
422 _jsonmap['\f'] = '\\f' 422 return ''.join(_jsonmap[x] for x in bytearray(toutf8b(s)))
423 _jsonmap['\r'] = '\\r'
424
425 return ''.join(_jsonmap[c] for c in toutf8b(s))
426 423
427 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] 424 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
428 425
429 def getutf8char(s, pos): 426 def getutf8char(s, pos):
430 '''get the next full utf-8 character in the given string, starting at pos 427 '''get the next full utf-8 character in the given string, starting at pos