412 _jsonmap['\f'] = '\\f' |
412 _jsonmap['\f'] = '\\f' |
413 _jsonmap['\r'] = '\\r' |
413 _jsonmap['\r'] = '\\r' |
414 |
414 |
415 return ''.join(_jsonmap[c] for c in toutf8b(s)) |
415 return ''.join(_jsonmap[c] for c in toutf8b(s)) |
416 |
416 |
|
417 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
|
418 |
|
419 def getutf8char(s, pos): |
|
420 '''get the next full utf-8 character in the given string, starting at pos |
|
421 |
|
422 Raises a UnicodeError if the given location does not start a valid |
|
423 utf-8 character. |
|
424 ''' |
|
425 |
|
426 # find how many bytes to attempt decoding from first nibble |
|
427 l = _utf8len[ord(s[pos]) >> 4] |
|
428 if not l: # ascii |
|
429 return s[pos] |
|
430 |
|
431 c = s[pos:pos + l] |
|
432 # validate with attempted decode |
|
433 c.decode("utf-8") |
|
434 return c |
|
435 |
417 def toutf8b(s): |
436 def toutf8b(s): |
418 '''convert a local, possibly-binary string into UTF-8b |
437 '''convert a local, possibly-binary string into UTF-8b |
419 |
438 |
420 This is intended as a generic method to preserve data when working |
439 This is intended as a generic method to preserve data when working |
421 with schemes like JSON and XML that have no provision for |
440 with schemes like JSON and XML that have no provision for |