comparison mercurial/encoding.py @ 22426:f6b533e64ed6

encoding: add json escaping filter This ends up here because it needs to be somewhat encoding aware.
author Matt Mackall <mpm@selenic.com>
date Mon, 15 Sep 2014 13:12:49 -0500
parents 6fd944c204a9
children 80f2b63dd83a
comparison
equal deleted inserted replaced
22425:6fd944c204a9 22426:f6b533e64ed6
300 except UnicodeError: 300 except UnicodeError:
301 return s.upper() # we don't know how to fold this except in ASCII 301 return s.upper() # we don't know how to fold this except in ASCII
302 except LookupError, k: 302 except LookupError, k:
303 raise error.Abort(k, hint="please check your locale settings") 303 raise error.Abort(k, hint="please check your locale settings")
304 304
305 _jsonmap = {}
306
307 def jsonescape(s):
308 '''returns a string suitable for JSON
309
310 JSON is problematic for us because it doesn't support non-Unicode
311 bytes. To deal with this, we take the following approach:
312
313 - localstr objects are converted back to UTF-8
314 - valid UTF-8/ASCII strings are passed as-is
315 - other strings are converted to UTF-8b surrogate encoding
316 - apply JSON-specified string escaping
317
318 (escapes are doubled in these tests)
319
320 >>> jsonescape('this is a test')
321 'this is a test'
322 >>> jsonescape('escape characters: \\0 \\x0b \\t \\n \\r \\" \\\\')
323 'escape characters: \\\\u0000 \\\\u000b \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
324 >>> jsonescape('a weird byte: \\xdd')
325 'a weird byte: \\xed\\xb3\\x9d'
326 >>> jsonescape('utf-8: caf\\xc3\\xa9')
327 'utf-8: caf\\xc3\\xa9'
328 >>> jsonescape('')
329 ''
330 '''
331
332 if not _jsonmap:
333 for x in xrange(32):
334 _jsonmap[chr(x)] = "\u%04x" %x
335 for x in xrange(32, 256):
336 c = chr(x)
337 _jsonmap[c] = c
338 _jsonmap['\t'] = '\\t'
339 _jsonmap['\n'] = '\\n'
340 _jsonmap['\"'] = '\\"'
341 _jsonmap['\\'] = '\\\\'
342 _jsonmap['\b'] = '\\b'
343 _jsonmap['\f'] = '\\f'
344 _jsonmap['\r'] = '\\r'
345
346 return ''.join(_jsonmap[c] for c in toutf8b(s))
347
305 def toutf8b(s): 348 def toutf8b(s):
306 '''convert a local, possibly-binary string into UTF-8b 349 '''convert a local, possibly-binary string into UTF-8b
307 350
308 This is intended as a generic method to preserve data when working 351 This is intended as a generic method to preserve data when working
309 with schemes like JSON and XML that have no provision for 352 with schemes like JSON and XML that have no provision for