mercurial/encoding.py
changeset 26875 cf47bdb2183c
parent 25660 328739ea70c3
child 26877 cb467a9d7593
equal deleted inserted replaced
26874:853154f27525 26875:cf47bdb2183c
   412         _jsonmap['\f'] = '\\f'
   412         _jsonmap['\f'] = '\\f'
   413         _jsonmap['\r'] = '\\r'
   413         _jsonmap['\r'] = '\\r'
   414 
   414 
   415     return ''.join(_jsonmap[c] for c in toutf8b(s))
   415     return ''.join(_jsonmap[c] for c in toutf8b(s))
   416 
   416 
       
   417 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
       
   418 
       
   419 def getutf8char(s, pos):
       
   420     '''get the next full utf-8 character in the given string, starting at pos
       
   421 
       
   422     Raises a UnicodeError if the given location does not start a valid
       
   423     utf-8 character.
       
   424     '''
       
   425 
       
   426     # find how many bytes to attempt decoding from first nibble
       
   427     l = _utf8len[ord(s[pos]) >> 4]
       
   428     if not l: # ascii
       
   429         return s[pos]
       
   430 
       
   431     c = s[pos:pos + l]
       
   432     # validate with attempted decode
       
   433     c.decode("utf-8")
       
   434     return c
       
   435 
   417 def toutf8b(s):
   436 def toutf8b(s):
   418     '''convert a local, possibly-binary string into UTF-8b
   437     '''convert a local, possibly-binary string into UTF-8b
   419 
   438 
   420     This is intended as a generic method to preserve data when working
   439     This is intended as a generic method to preserve data when working
   421     with schemes like JSON and XML that have no provision for
   440     with schemes like JSON and XML that have no provision for