comparison mercurial/encoding.py @ 34131:0fa781320203

doctest: bulk-replace string literals with b'' for Python 3 Our code transformer can't rewrite string literals in docstrings, and I don't want to make the transformer more complex.
author Yuya Nishihara <yuya@tcha.org>
date Sun, 03 Sep 2017 14:32:11 +0900
parents 6c119dbfd0c0
children e9e225f16932
comparison
equal deleted inserted replaced
34130:ada8a19672ab 34131:0fa781320203
106 106
107 The localstr class is used to cache the known UTF-8 encoding of 107 The localstr class is used to cache the known UTF-8 encoding of
108 strings next to their local representation to allow lossless 108 strings next to their local representation to allow lossless
109 round-trip conversion back to UTF-8. 109 round-trip conversion back to UTF-8.
110 110
111 >>> u = 'foo: \\xc3\\xa4' # utf-8 111 >>> u = b'foo: \\xc3\\xa4' # utf-8
112 >>> l = tolocal(u) 112 >>> l = tolocal(u)
113 >>> l 113 >>> l
114 'foo: ?' 114 'foo: ?'
115 >>> fromlocal(l) 115 >>> fromlocal(l)
116 'foo: \\xc3\\xa4' 116 'foo: \\xc3\\xa4'
117 >>> u2 = 'foo: \\xc3\\xa1' 117 >>> u2 = b'foo: \\xc3\\xa1'
118 >>> d = { l: 1, tolocal(u2): 2 } 118 >>> d = { l: 1, tolocal(u2): 2 }
119 >>> len(d) # no collision 119 >>> len(d) # no collision
120 2 120 2
121 >>> 'foo: ?' in d 121 >>> b'foo: ?' in d
122 False 122 False
123 >>> l1 = 'foo: \\xe4' # historical latin1 fallback 123 >>> l1 = b'foo: \\xe4' # historical latin1 fallback
124 >>> l = tolocal(l1) 124 >>> l = tolocal(l1)
125 >>> l 125 >>> l
126 'foo: ?' 126 'foo: ?'
127 >>> fromlocal(l) # magically in utf-8 127 >>> fromlocal(l) # magically in utf-8
128 'foo: \\xc3\\xa4' 128 'foo: \\xc3\\xa4'
245 """Trim string 's' to at most 'width' columns (including 'ellipsis'). 245 """Trim string 's' to at most 'width' columns (including 'ellipsis').
246 246
247 If 'leftside' is True, left side of string 's' is trimmed. 247 If 'leftside' is True, left side of string 's' is trimmed.
248 'ellipsis' is always placed at trimmed side. 248 'ellipsis' is always placed at trimmed side.
249 249
250 >>> ellipsis = '+++' 250 >>> ellipsis = b'+++'
251 >>> from . import encoding 251 >>> from . import encoding
252 >>> encoding.encoding = 'utf-8' 252 >>> encoding.encoding = b'utf-8'
253 >>> t= '1234567890' 253 >>> t = b'1234567890'
254 >>> print trim(t, 12, ellipsis=ellipsis) 254 >>> print trim(t, 12, ellipsis=ellipsis)
255 1234567890 255 1234567890
256 >>> print trim(t, 10, ellipsis=ellipsis) 256 >>> print trim(t, 10, ellipsis=ellipsis)
257 1234567890 257 1234567890
258 >>> print trim(t, 8, ellipsis=ellipsis) 258 >>> print trim(t, 8, ellipsis=ellipsis)
283 \xe3\x81\x88\xe3\x81\x8a 283 \xe3\x81\x88\xe3\x81\x8a
284 >>> print trim(t, 4, ellipsis=ellipsis) 284 >>> print trim(t, 4, ellipsis=ellipsis)
285 +++ 285 +++
286 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True) 286 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
287 +++ 287 +++
288 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence 288 >>> t = b'\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
289 >>> print trim(t, 12, ellipsis=ellipsis) 289 >>> print trim(t, 12, ellipsis=ellipsis)
290 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa 290 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
291 >>> print trim(t, 10, ellipsis=ellipsis) 291 >>> print trim(t, 10, ellipsis=ellipsis)
292 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa 292 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
293 >>> print trim(t, 8, ellipsis=ellipsis) 293 >>> print trim(t, 8, ellipsis=ellipsis)
404 - other strings are converted to UTF-8b surrogate encoding 404 - other strings are converted to UTF-8b surrogate encoding
405 - apply JSON-specified string escaping 405 - apply JSON-specified string escaping
406 406
407 (escapes are doubled in these tests) 407 (escapes are doubled in these tests)
408 408
409 >>> jsonescape('this is a test') 409 >>> jsonescape(b'this is a test')
410 'this is a test' 410 'this is a test'
411 >>> jsonescape('escape characters: \\0 \\x0b \\x7f') 411 >>> jsonescape(b'escape characters: \\0 \\x0b \\x7f')
412 'escape characters: \\\\u0000 \\\\u000b \\\\u007f' 412 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
413 >>> jsonescape('escape characters: \\b \\t \\n \\f \\r \\" \\\\') 413 >>> jsonescape(b'escape characters: \\b \\t \\n \\f \\r \\" \\\\')
414 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\' 414 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\'
415 >>> jsonescape('a weird byte: \\xdd') 415 >>> jsonescape(b'a weird byte: \\xdd')
416 'a weird byte: \\xed\\xb3\\x9d' 416 'a weird byte: \\xed\\xb3\\x9d'
417 >>> jsonescape('utf-8: caf\\xc3\\xa9') 417 >>> jsonescape(b'utf-8: caf\\xc3\\xa9')
418 'utf-8: caf\\xc3\\xa9' 418 'utf-8: caf\\xc3\\xa9'
419 >>> jsonescape('') 419 >>> jsonescape(b'')
420 '' 420 ''
421 421
422 If paranoid, non-ascii and common troublesome characters are also escaped. 422 If paranoid, non-ascii and common troublesome characters are also escaped.
423 This is suitable for web output. 423 This is suitable for web output.
424 424
425 >>> s = 'escape characters: \\0 \\x0b \\x7f' 425 >>> s = b'escape characters: \\0 \\x0b \\x7f'
426 >>> assert jsonescape(s) == jsonescape(s, paranoid=True) 426 >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
427 >>> s = 'escape characters: \\b \\t \\n \\f \\r \\" \\\\' 427 >>> s = b'escape characters: \\b \\t \\n \\f \\r \\" \\\\'
428 >>> assert jsonescape(s) == jsonescape(s, paranoid=True) 428 >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
429 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True) 429 >>> jsonescape(b'escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
430 'escape boundary: ~ \\\\u007f \\\\u0080' 430 'escape boundary: ~ \\\\u007f \\\\u0080'
431 >>> jsonescape('a weird byte: \\xdd', paranoid=True) 431 >>> jsonescape(b'a weird byte: \\xdd', paranoid=True)
432 'a weird byte: \\\\udcdd' 432 'a weird byte: \\\\udcdd'
433 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True) 433 >>> jsonescape(b'utf-8: caf\\xc3\\xa9', paranoid=True)
434 'utf-8: caf\\\\u00e9' 434 'utf-8: caf\\\\u00e9'
435 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True) 435 >>> jsonescape(b'non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
436 'non-BMP: \\\\ud834\\\\udd1e' 436 'non-BMP: \\\\ud834\\\\udd1e'
437 >>> jsonescape('<foo@example.org>', paranoid=True) 437 >>> jsonescape(b'<foo@example.org>', paranoid=True)
438 '\\\\u003cfoo@example.org\\\\u003e' 438 '\\\\u003cfoo@example.org\\\\u003e'
439 ''' 439 '''
440 440
441 u8chars = toutf8b(s) 441 u8chars = toutf8b(s)
442 try: 442 try:
529 return the original binary string. This 529 return the original binary string. This
530 is a round-trip process for strings like filenames, but metadata 530 is a round-trip process for strings like filenames, but metadata
531 that's was passed through tolocal will remain in UTF-8. 531 that's was passed through tolocal will remain in UTF-8.
532 532
533 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x 533 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
534 >>> m = "\\xc3\\xa9\\x99abcd" 534 >>> m = b"\\xc3\\xa9\\x99abcd"
535 >>> toutf8b(m) 535 >>> toutf8b(m)
536 '\\xc3\\xa9\\xed\\xb2\\x99abcd' 536 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
537 >>> roundtrip(m) 537 >>> roundtrip(m)
538 True 538 True
539 >>> roundtrip("\\xc2\\xc2\\x80") 539 >>> roundtrip(b"\\xc2\\xc2\\x80")
540 True 540 True
541 >>> roundtrip("\\xef\\xbf\\xbd") 541 >>> roundtrip(b"\\xef\\xbf\\xbd")
542 True 542 True
543 >>> roundtrip("\\xef\\xef\\xbf\\xbd") 543 >>> roundtrip(b"\\xef\\xef\\xbf\\xbd")
544 True 544 True
545 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80") 545 >>> roundtrip(b"\\xf1\\x80\\x80\\x80\\x80")
546 True 546 True
547 ''' 547 '''
548 548
549 if isasciistr(s): 549 if isasciistr(s):
550 return s 550 return s