changeset 43682 | 83a349aaeba3 |
parent 43681 | b65fcccd9100 |
child 43683 | 7f51bc36194d |
43681:b65fcccd9100 | 43682:83a349aaeba3 |
---|---|
558 |
558 |
559 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
559 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] |
560 |
560 |
561 |
561 |
562 def getutf8char(s, pos): |
562 def getutf8char(s, pos): |
563 # type: (Any, Any) -> Any |
563 # type: (bytes, int) -> bytes |
564 '''get the next full utf-8 character in the given string, starting at pos |
564 '''get the next full utf-8 character in the given string, starting at pos |
565 |
565 |
566 Raises a UnicodeError if the given location does not start a valid |
566 Raises a UnicodeError if the given location does not start a valid |
567 utf-8 character. |
567 utf-8 character. |
568 ''' |
568 ''' |
577 c.decode("utf-8", _utf8strict) |
577 c.decode("utf-8", _utf8strict) |
578 return c |
578 return c |
579 |
579 |
580 |
580 |
581 def toutf8b(s): |
581 def toutf8b(s): |
582 # type: (Any) -> Any |
582 # type: (bytes) -> bytes |
583 '''convert a local, possibly-binary string into UTF-8b |
583 '''convert a local, possibly-binary string into UTF-8b |
584 |
584 |
585 This is intended as a generic method to preserve data when working |
585 This is intended as a generic method to preserve data when working |
586 with schemes like JSON and XML that have no provision for |
586 with schemes like JSON and XML that have no provision for |
587 arbitrary byte strings. As Mercurial often doesn't know |
587 arbitrary byte strings. As Mercurial often doesn't know |