comparison mercurial/posix.py @ 15551:1fa41d1f1351 stable

posix: add extended support for OS X path folding OS X does the following transformation on paths for comparisons: a) 8-bit strings are decoded as UTF-8 to UTF-16 b) undecodable bytes are percent-escaped c) accented characters are converted to NFD decomposed form, approximately d) characters are converted to _lowercase_ using internal tables Both (c) and (d) are done using internal tables that vary from release to release and match Unicode specs to greater or lesser extent. We approximate these functions using Python's internal Unicode data. With this change, Mercurial will (in almost all cases) match OS X folding and not report unknown file aliases for files in UTF-8 or other encodings.
author Matt Mackall <mpm@selenic.com>
date Tue, 22 Nov 2011 17:26:32 -0600
parents 58f96703a9ab
children b61fa7481a68
comparison
equal deleted inserted replaced
15550:b2fd4746414a 15551:1fa41d1f1351
4 # 4 #
5 # This software may be used and distributed according to the terms of the 5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version. 6 # GNU General Public License version 2 or any later version.
7 7
8 from i18n import _ 8 from i18n import _
9 import os, sys, errno, stat, getpass, pwd, grp, tempfile 9 import os, sys, errno, stat, getpass, pwd, grp, tempfile, unicodedata
10 10
11 posixfile = open 11 posixfile = open
12 nulldev = '/dev/null' 12 nulldev = '/dev/null'
13 normpath = os.path.normpath 13 normpath = os.path.normpath
14 samestat = os.path.samestat 14 samestat = os.path.samestat
168 def normcase(path): 168 def normcase(path):
169 return path.lower() 169 return path.lower()
170 170
171 if sys.platform == 'darwin': 171 if sys.platform == 'darwin':
172 import fcntl # only needed on darwin, missing on jython 172 import fcntl # only needed on darwin, missing on jython
173
174 def normcase(path):
175 try:
176 u = path.decode('utf-8')
177 except UnicodeDecodeError:
178 # percent-encode any characters that don't round-trip
179 p2 = path.decode('utf-8', 'replace').encode('utf-8')
180 s = ""
181 for a, b in zip(path, p2):
182 if a != b:
183 s += "%%%02X" % ord(a)
184 else:
185 s += a
186 u = s.decode('utf-8')
187
188 # Decompose then lowercase (HFS+ technote specifies lower)
189 return unicodedata.normalize('NFD', u).lower().encode('utf-8')
190
173 def realpath(path): 191 def realpath(path):
174 ''' 192 '''
175 Returns the true, canonical file system path equivalent to the given 193 Returns the true, canonical file system path equivalent to the given
176 path. 194 path.
177 195