mercurial/posix.py
changeset 26876 b8381832ce2b
parent 26248 99b6afff09ae
child 26885 8b2fbe3f59b1
equal deleted inserted replaced
26875:cf47bdb2183c 26876:b8381832ce2b
   253         try:
   253         try:
   254             u = path.decode('utf-8')
   254             u = path.decode('utf-8')
   255         except UnicodeDecodeError:
   255         except UnicodeDecodeError:
   256             # OS X percent-encodes any bytes that aren't valid utf-8
   256             # OS X percent-encodes any bytes that aren't valid utf-8
   257             s = ''
   257             s = ''
   258             g = ''
   258             pos = 0
   259             l = 0
   259             l = len(s)
   260             for c in path:
   260             while pos < l:
   261                 o = ord(c)
   261                 try:
   262                 if l and o < 128 or o >= 192:
   262                     c = encoding.getutf8char(path, pos)
   263                     # we want a continuation byte, but didn't get one
   263                     pos += len(c)
   264                     s += ''.join(["%%%02X" % ord(x) for x in g])
   264                 except ValueError:
   265                     g = ''
   265                     c = '%%%%02X' % path[pos]
   266                     l = 0
   266                     pos += 1
   267                 if l == 0 and o < 128:
   267                 s += c
   268                     # ascii
   268 
   269                     s += c
       
   270                 elif l == 0 and 194 <= o < 245:
       
   271                     # valid leading bytes
       
   272                     if o < 224:
       
   273                         l = 1
       
   274                     elif o < 240:
       
   275                         l = 2
       
   276                     else:
       
   277                         l = 3
       
   278                     g = c
       
   279                 elif l > 0 and 128 <= o < 192:
       
   280                     # valid continuations
       
   281                     g += c
       
   282                     l -= 1
       
   283                     if not l:
       
   284                         s += g
       
   285                         g = ''
       
   286                 else:
       
   287                     # invalid
       
   288                     s += "%%%02X" % o
       
   289 
       
   290             # any remaining partial characters
       
   291             s += ''.join(["%%%02X" % ord(x) for x in g])
       
   292             u = s.decode('utf-8')
   269             u = s.decode('utf-8')
   293 
   270 
   294         # Decompose then lowercase (HFS+ technote specifies lower)
   271         # Decompose then lowercase (HFS+ technote specifies lower)
   295         enc = unicodedata.normalize('NFD', u).lower().encode('utf-8')
   272         enc = unicodedata.normalize('NFD', u).lower().encode('utf-8')
   296         # drop HFS+ ignored characters
   273         # drop HFS+ ignored characters