Mercurial > public > mercurial-scm > hg
comparison mercurial/encoding.py @ 22426:f6b533e64ed6
encoding: add json escaping filter
This ends up here because it needs to be somewhat encoding aware.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Mon, 15 Sep 2014 13:12:49 -0500 |
parents | 6fd944c204a9 |
children | 80f2b63dd83a |
comparison
equal
deleted
inserted
replaced
22425:6fd944c204a9 | 22426:f6b533e64ed6 |
---|---|
300 except UnicodeError: | 300 except UnicodeError: |
301 return s.upper() # we don't know how to fold this except in ASCII | 301 return s.upper() # we don't know how to fold this except in ASCII |
302 except LookupError, k: | 302 except LookupError, k: |
303 raise error.Abort(k, hint="please check your locale settings") | 303 raise error.Abort(k, hint="please check your locale settings") |
304 | 304 |
305 _jsonmap = {} | |
306 | |
307 def jsonescape(s): | |
308 '''returns a string suitable for JSON | |
309 | |
310 JSON is problematic for us because it doesn't support non-Unicode | |
311 bytes. To deal with this, we take the following approach: | |
312 | |
313 - localstr objects are converted back to UTF-8 | |
314 - valid UTF-8/ASCII strings are passed as-is | |
315 - other strings are converted to UTF-8b surrogate encoding | |
316 - apply JSON-specified string escaping | |
317 | |
318 (escapes are doubled in these tests) | |
319 | |
320 >>> jsonescape('this is a test') | |
321 'this is a test' | |
322 >>> jsonescape('escape characters: \\0 \\x0b \\t \\n \\r \\" \\\\') | |
323 'escape characters: \\\\u0000 \\\\u000b \\\\t \\\\n \\\\r \\\\" \\\\\\\\' | |
324 >>> jsonescape('a weird byte: \\xdd') | |
325 'a weird byte: \\xed\\xb3\\x9d' | |
326 >>> jsonescape('utf-8: caf\\xc3\\xa9') | |
327 'utf-8: caf\\xc3\\xa9' | |
328 >>> jsonescape('') | |
329 '' | |
330 ''' | |
331 | |
332 if not _jsonmap: | |
333 for x in xrange(32): | |
334 _jsonmap[chr(x)] = "\u%04x" %x | |
335 for x in xrange(32, 256): | |
336 c = chr(x) | |
337 _jsonmap[c] = c | |
338 _jsonmap['\t'] = '\\t' | |
339 _jsonmap['\n'] = '\\n' | |
340 _jsonmap['\"'] = '\\"' | |
341 _jsonmap['\\'] = '\\\\' | |
342 _jsonmap['\b'] = '\\b' | |
343 _jsonmap['\f'] = '\\f' | |
344 _jsonmap['\r'] = '\\r' | |
345 | |
346 return ''.join(_jsonmap[c] for c in toutf8b(s)) | |
347 | |
305 def toutf8b(s): | 348 def toutf8b(s): |
306 '''convert a local, possibly-binary string into UTF-8b | 349 '''convert a local, possibly-binary string into UTF-8b |
307 | 350 |
308 This is intended as a generic method to preserve data when working | 351 This is intended as a generic method to preserve data when working |
309 with schemes like JSON and XML that have no provision for | 352 with schemes like JSON and XML that have no provision for |