Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/encoding.py @ 34146:0fa781320203
doctest: bulk-replace string literals with b'' for Python 3
Our code transformer can't rewrite string literals in docstrings, and I
don't want to make the transformer more complex.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 03 Sep 2017 14:32:11 +0900 |
parents | 6c119dbfd0c0 |
children | e9e225f16932 |
comparison
equal
deleted
inserted
replaced
34145:ada8a19672ab | 34146:0fa781320203 |
---|---|
106 | 106 |
107 The localstr class is used to cache the known UTF-8 encoding of | 107 The localstr class is used to cache the known UTF-8 encoding of |
108 strings next to their local representation to allow lossless | 108 strings next to their local representation to allow lossless |
109 round-trip conversion back to UTF-8. | 109 round-trip conversion back to UTF-8. |
110 | 110 |
111 >>> u = 'foo: \\xc3\\xa4' # utf-8 | 111 >>> u = b'foo: \\xc3\\xa4' # utf-8 |
112 >>> l = tolocal(u) | 112 >>> l = tolocal(u) |
113 >>> l | 113 >>> l |
114 'foo: ?' | 114 'foo: ?' |
115 >>> fromlocal(l) | 115 >>> fromlocal(l) |
116 'foo: \\xc3\\xa4' | 116 'foo: \\xc3\\xa4' |
117 >>> u2 = 'foo: \\xc3\\xa1' | 117 >>> u2 = b'foo: \\xc3\\xa1' |
118 >>> d = { l: 1, tolocal(u2): 2 } | 118 >>> d = { l: 1, tolocal(u2): 2 } |
119 >>> len(d) # no collision | 119 >>> len(d) # no collision |
120 2 | 120 2 |
121 >>> 'foo: ?' in d | 121 >>> b'foo: ?' in d |
122 False | 122 False |
123 >>> l1 = 'foo: \\xe4' # historical latin1 fallback | 123 >>> l1 = b'foo: \\xe4' # historical latin1 fallback |
124 >>> l = tolocal(l1) | 124 >>> l = tolocal(l1) |
125 >>> l | 125 >>> l |
126 'foo: ?' | 126 'foo: ?' |
127 >>> fromlocal(l) # magically in utf-8 | 127 >>> fromlocal(l) # magically in utf-8 |
128 'foo: \\xc3\\xa4' | 128 'foo: \\xc3\\xa4' |
245 """Trim string 's' to at most 'width' columns (including 'ellipsis'). | 245 """Trim string 's' to at most 'width' columns (including 'ellipsis'). |
246 | 246 |
247 If 'leftside' is True, left side of string 's' is trimmed. | 247 If 'leftside' is True, left side of string 's' is trimmed. |
248 'ellipsis' is always placed at trimmed side. | 248 'ellipsis' is always placed at trimmed side. |
249 | 249 |
250 >>> ellipsis = '+++' | 250 >>> ellipsis = b'+++' |
251 >>> from . import encoding | 251 >>> from . import encoding |
252 >>> encoding.encoding = 'utf-8' | 252 >>> encoding.encoding = b'utf-8' |
253 >>> t= '1234567890' | 253 >>> t = b'1234567890' |
254 >>> print trim(t, 12, ellipsis=ellipsis) | 254 >>> print trim(t, 12, ellipsis=ellipsis) |
255 1234567890 | 255 1234567890 |
256 >>> print trim(t, 10, ellipsis=ellipsis) | 256 >>> print trim(t, 10, ellipsis=ellipsis) |
257 1234567890 | 257 1234567890 |
258 >>> print trim(t, 8, ellipsis=ellipsis) | 258 >>> print trim(t, 8, ellipsis=ellipsis) |
283 \xe3\x81\x88\xe3\x81\x8a | 283 \xe3\x81\x88\xe3\x81\x8a |
284 >>> print trim(t, 4, ellipsis=ellipsis) | 284 >>> print trim(t, 4, ellipsis=ellipsis) |
285 +++ | 285 +++ |
286 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True) | 286 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True) |
287 +++ | 287 +++ |
288 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence | 288 >>> t = b'\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence |
289 >>> print trim(t, 12, ellipsis=ellipsis) | 289 >>> print trim(t, 12, ellipsis=ellipsis) |
290 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa | 290 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa |
291 >>> print trim(t, 10, ellipsis=ellipsis) | 291 >>> print trim(t, 10, ellipsis=ellipsis) |
292 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa | 292 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa |
293 >>> print trim(t, 8, ellipsis=ellipsis) | 293 >>> print trim(t, 8, ellipsis=ellipsis) |
404 - other strings are converted to UTF-8b surrogate encoding | 404 - other strings are converted to UTF-8b surrogate encoding |
405 - apply JSON-specified string escaping | 405 - apply JSON-specified string escaping |
406 | 406 |
407 (escapes are doubled in these tests) | 407 (escapes are doubled in these tests) |
408 | 408 |
409 >>> jsonescape('this is a test') | 409 >>> jsonescape(b'this is a test') |
410 'this is a test' | 410 'this is a test' |
411 >>> jsonescape('escape characters: \\0 \\x0b \\x7f') | 411 >>> jsonescape(b'escape characters: \\0 \\x0b \\x7f') |
412 'escape characters: \\\\u0000 \\\\u000b \\\\u007f' | 412 'escape characters: \\\\u0000 \\\\u000b \\\\u007f' |
413 >>> jsonescape('escape characters: \\b \\t \\n \\f \\r \\" \\\\') | 413 >>> jsonescape(b'escape characters: \\b \\t \\n \\f \\r \\" \\\\') |
414 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\' | 414 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\' |
415 >>> jsonescape('a weird byte: \\xdd') | 415 >>> jsonescape(b'a weird byte: \\xdd') |
416 'a weird byte: \\xed\\xb3\\x9d' | 416 'a weird byte: \\xed\\xb3\\x9d' |
417 >>> jsonescape('utf-8: caf\\xc3\\xa9') | 417 >>> jsonescape(b'utf-8: caf\\xc3\\xa9') |
418 'utf-8: caf\\xc3\\xa9' | 418 'utf-8: caf\\xc3\\xa9' |
419 >>> jsonescape('') | 419 >>> jsonescape(b'') |
420 '' | 420 '' |
421 | 421 |
422 If paranoid, non-ascii and common troublesome characters are also escaped. | 422 If paranoid, non-ascii and common troublesome characters are also escaped. |
423 This is suitable for web output. | 423 This is suitable for web output. |
424 | 424 |
425 >>> s = 'escape characters: \\0 \\x0b \\x7f' | 425 >>> s = b'escape characters: \\0 \\x0b \\x7f' |
426 >>> assert jsonescape(s) == jsonescape(s, paranoid=True) | 426 >>> assert jsonescape(s) == jsonescape(s, paranoid=True) |
427 >>> s = 'escape characters: \\b \\t \\n \\f \\r \\" \\\\' | 427 >>> s = b'escape characters: \\b \\t \\n \\f \\r \\" \\\\' |
428 >>> assert jsonescape(s) == jsonescape(s, paranoid=True) | 428 >>> assert jsonescape(s) == jsonescape(s, paranoid=True) |
429 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True) | 429 >>> jsonescape(b'escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True) |
430 'escape boundary: ~ \\\\u007f \\\\u0080' | 430 'escape boundary: ~ \\\\u007f \\\\u0080' |
431 >>> jsonescape('a weird byte: \\xdd', paranoid=True) | 431 >>> jsonescape(b'a weird byte: \\xdd', paranoid=True) |
432 'a weird byte: \\\\udcdd' | 432 'a weird byte: \\\\udcdd' |
433 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True) | 433 >>> jsonescape(b'utf-8: caf\\xc3\\xa9', paranoid=True) |
434 'utf-8: caf\\\\u00e9' | 434 'utf-8: caf\\\\u00e9' |
435 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True) | 435 >>> jsonescape(b'non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True) |
436 'non-BMP: \\\\ud834\\\\udd1e' | 436 'non-BMP: \\\\ud834\\\\udd1e' |
437 >>> jsonescape('<foo@example.org>', paranoid=True) | 437 >>> jsonescape(b'<foo@example.org>', paranoid=True) |
438 '\\\\u003cfoo@example.org\\\\u003e' | 438 '\\\\u003cfoo@example.org\\\\u003e' |
439 ''' | 439 ''' |
440 | 440 |
441 u8chars = toutf8b(s) | 441 u8chars = toutf8b(s) |
442 try: | 442 try: |
529 return the original binary string. This | 529 return the original binary string. This |
530 is a round-trip process for strings like filenames, but metadata | 530 is a round-trip process for strings like filenames, but metadata |
531 that's was passed through tolocal will remain in UTF-8. | 531 that's was passed through tolocal will remain in UTF-8. |
532 | 532 |
533 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x | 533 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x |
534 >>> m = "\\xc3\\xa9\\x99abcd" | 534 >>> m = b"\\xc3\\xa9\\x99abcd" |
535 >>> toutf8b(m) | 535 >>> toutf8b(m) |
536 '\\xc3\\xa9\\xed\\xb2\\x99abcd' | 536 '\\xc3\\xa9\\xed\\xb2\\x99abcd' |
537 >>> roundtrip(m) | 537 >>> roundtrip(m) |
538 True | 538 True |
539 >>> roundtrip("\\xc2\\xc2\\x80") | 539 >>> roundtrip(b"\\xc2\\xc2\\x80") |
540 True | 540 True |
541 >>> roundtrip("\\xef\\xbf\\xbd") | 541 >>> roundtrip(b"\\xef\\xbf\\xbd") |
542 True | 542 True |
543 >>> roundtrip("\\xef\\xef\\xbf\\xbd") | 543 >>> roundtrip(b"\\xef\\xef\\xbf\\xbd") |
544 True | 544 True |
545 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80") | 545 >>> roundtrip(b"\\xf1\\x80\\x80\\x80\\x80") |
546 True | 546 True |
547 ''' | 547 ''' |
548 | 548 |
549 if isasciistr(s): | 549 if isasciistr(s): |
550 return s | 550 return s |