253 try: |
253 try: |
254 u = path.decode('utf-8') |
254 u = path.decode('utf-8') |
255 except UnicodeDecodeError: |
255 except UnicodeDecodeError: |
256 # OS X percent-encodes any bytes that aren't valid utf-8 |
256 # OS X percent-encodes any bytes that aren't valid utf-8 |
257 s = '' |
257 s = '' |
258 g = '' |
258 pos = 0 |
259 l = 0 |
259 l = len(s) |
260 for c in path: |
260 while pos < l: |
261 o = ord(c) |
261 try: |
262 if l and o < 128 or o >= 192: |
262 c = encoding.getutf8char(path, pos) |
263 # we want a continuation byte, but didn't get one |
263 pos += len(c) |
264 s += ''.join(["%%%02X" % ord(x) for x in g]) |
264 except ValueError: |
265 g = '' |
265 c = '%%%%02X' % path[pos] |
266 l = 0 |
266 pos += 1 |
267 if l == 0 and o < 128: |
267 s += c |
268 # ascii |
268 |
269 s += c |
|
270 elif l == 0 and 194 <= o < 245: |
|
271 # valid leading bytes |
|
272 if o < 224: |
|
273 l = 1 |
|
274 elif o < 240: |
|
275 l = 2 |
|
276 else: |
|
277 l = 3 |
|
278 g = c |
|
279 elif l > 0 and 128 <= o < 192: |
|
280 # valid continuations |
|
281 g += c |
|
282 l -= 1 |
|
283 if not l: |
|
284 s += g |
|
285 g = '' |
|
286 else: |
|
287 # invalid |
|
288 s += "%%%02X" % o |
|
289 |
|
290 # any remaining partial characters |
|
291 s += ''.join(["%%%02X" % ord(x) for x in g]) |
|
292 u = s.decode('utf-8') |
269 u = s.decode('utf-8') |
293 |
270 |
294 # Decompose then lowercase (HFS+ technote specifies lower) |
271 # Decompose then lowercase (HFS+ technote specifies lower) |
295 enc = unicodedata.normalize('NFD', u).lower().encode('utf-8') |
272 enc = unicodedata.normalize('NFD', u).lower().encode('utf-8') |
296 # drop HFS+ ignored characters |
273 # drop HFS+ ignored characters |