68 'the~07quick~adshot' |
68 'the~07quick~adshot' |
69 >>> dec('the~07quick~adshot') |
69 >>> dec('the~07quick~adshot') |
70 'the\\x07quick\\xadshot' |
70 'the\\x07quick\\xadshot' |
71 ''' |
71 ''' |
72 e = '_' |
72 e = '_' |
73 win_reserved = [ord(x) for x in '\\:*?"<>|'] |
73 winreserved = [ord(x) for x in '\\:*?"<>|'] |
74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) |
74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) |
75 for x in (range(32) + range(126, 256) + win_reserved): |
75 for x in (range(32) + range(126, 256) + winreserved): |
76 cmap[chr(x)] = "~%02x" % x |
76 cmap[chr(x)] = "~%02x" % x |
77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]: |
77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]: |
78 cmap[chr(x)] = e + chr(x).lower() |
78 cmap[chr(x)] = e + chr(x).lower() |
79 dmap = {} |
79 dmap = {} |
80 for k, v in cmap.iteritems(): |
80 for k, v in cmap.iteritems(): |
94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]), |
94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]), |
95 lambda s: decodedir("".join(list(decode(s))))) |
95 lambda s: decodedir("".join(list(decode(s))))) |
96 |
96 |
97 encodefilename, decodefilename = _buildencodefun() |
97 encodefilename, decodefilename = _buildencodefun() |
98 |
98 |
99 def _build_lower_encodefun(): |
99 def _buildlowerencodefun(): |
100 ''' |
100 ''' |
101 >>> f = _build_lower_encodefun() |
101 >>> f = _buildlowerencodefun() |
102 >>> f('nothing/special.txt') |
102 >>> f('nothing/special.txt') |
103 'nothing/special.txt' |
103 'nothing/special.txt' |
104 >>> f('HELLO') |
104 >>> f('HELLO') |
105 'hello' |
105 'hello' |
106 >>> f('hello:world?') |
106 >>> f('hello:world?') |
107 'hello~3aworld~3f' |
107 'hello~3aworld~3f' |
108 >>> f('the\x07quick\xADshot') |
108 >>> f('the\x07quick\xADshot') |
109 'the~07quick~adshot' |
109 'the~07quick~adshot' |
110 ''' |
110 ''' |
111 win_reserved = [ord(x) for x in '\\:*?"<>|'] |
111 winreserved = [ord(x) for x in '\\:*?"<>|'] |
112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) |
112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) |
113 for x in (range(32) + range(126, 256) + win_reserved): |
113 for x in (range(32) + range(126, 256) + winreserved): |
114 cmap[chr(x)] = "~%02x" % x |
114 cmap[chr(x)] = "~%02x" % x |
115 for x in range(ord("A"), ord("Z")+1): |
115 for x in range(ord("A"), ord("Z")+1): |
116 cmap[chr(x)] = chr(x).lower() |
116 cmap[chr(x)] = chr(x).lower() |
117 return lambda s: "".join([cmap[c] for c in s]) |
117 return lambda s: "".join([cmap[c] for c in s]) |
118 |
118 |
119 lowerencode = _build_lower_encodefun() |
119 lowerencode = _buildlowerencodefun() |
120 |
120 |
121 _windows_reserved_filenames = '''con prn aux nul |
121 _winreservednames = '''con prn aux nul |
122 com1 com2 com3 com4 com5 com6 com7 com8 com9 |
122 com1 com2 com3 com4 com5 com6 com7 com8 com9 |
123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() |
123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() |
124 def _auxencode(path, dotencode): |
124 def _auxencode(path, dotencode): |
125 ''' |
125 ''' |
126 Encodes filenames containing names reserved by Windows or which end in |
126 Encodes filenames containing names reserved by Windows or which end in |
141 ''' |
141 ''' |
142 res = [] |
142 res = [] |
143 for n in path.split('/'): |
143 for n in path.split('/'): |
144 if n: |
144 if n: |
145 base = n.split('.')[0] |
145 base = n.split('.')[0] |
146 if base and (base in _windows_reserved_filenames): |
146 if base and (base in _winreservednames): |
147 # encode third letter ('aux' -> 'au~78') |
147 # encode third letter ('aux' -> 'au~78') |
148 ec = "~%02x" % ord(n[2]) |
148 ec = "~%02x" % ord(n[2]) |
149 n = n[0:2] + ec + n[3:] |
149 n = n[0:2] + ec + n[3:] |
150 if n[-1] in '. ': |
150 if n[-1] in '. ': |
151 # encode last period or space ('foo...' -> 'foo..~2e') |
151 # encode last period or space ('foo...' -> 'foo..~2e') |
153 if dotencode and n[0] in '. ': |
153 if dotencode and n[0] in '. ': |
154 n = "~%02x" % ord(n[0]) + n[1:] |
154 n = "~%02x" % ord(n[0]) + n[1:] |
155 res.append(n) |
155 res.append(n) |
156 return '/'.join(res) |
156 return '/'.join(res) |
157 |
157 |
158 MAX_PATH_LEN_IN_HGSTORE = 120 |
158 _maxstorepathlen = 120 |
159 DIR_PREFIX_LEN = 8 |
159 _dirprefixlen = 8 |
160 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4 |
160 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4 |
161 def _hybridencode(path, auxencode): |
161 def _hybridencode(path, auxencode): |
162 '''encodes path with a length limit |
162 '''encodes path with a length limit |
163 |
163 |
164 Encodes all paths that begin with 'data/', according to the following. |
164 Encodes all paths that begin with 'data/', according to the following. |
165 |
165 |
171 Relevant path components consisting of Windows reserved filenames are |
171 Relevant path components consisting of Windows reserved filenames are |
172 masked by encoding the third character ('aux' -> 'au~78', see auxencode). |
172 masked by encoding the third character ('aux' -> 'au~78', see auxencode). |
173 |
173 |
174 Hashed encoding (not reversible): |
174 Hashed encoding (not reversible): |
175 |
175 |
176 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a |
176 If the default-encoded path is longer than _maxstorepathlen, a |
177 non-reversible hybrid hashing of the path is done instead. |
177 non-reversible hybrid hashing of the path is done instead. |
178 This encoding uses up to DIR_PREFIX_LEN characters of all directory |
178 This encoding uses up to _dirprefixlen characters of all directory |
179 levels of the lowerencoded path, but not more levels than can fit into |
179 levels of the lowerencoded path, but not more levels than can fit into |
180 _MAX_SHORTENED_DIRS_LEN. |
180 _maxshortdirslen. |
181 Then follows the filler followed by the sha digest of the full path. |
181 Then follows the filler followed by the sha digest of the full path. |
182 The filler is the beginning of the basename of the lowerencoded path |
182 The filler is the beginning of the basename of the lowerencoded path |
183 (the basename is everything after the last path separator). The filler |
183 (the basename is everything after the last path separator). The filler |
184 is as long as possible, filling in characters from the basename until |
184 is as long as possible, filling in characters from the basename until |
185 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars |
185 the encoded path has _maxstorepathlen characters (or all chars of the |
186 of the basename have been taken). |
186 basename have been taken). |
187 The extension (e.g. '.i' or '.d') is preserved. |
187 The extension (e.g. '.i' or '.d') is preserved. |
188 |
188 |
189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed |
189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed |
190 encoding was used. |
190 encoding was used. |
191 ''' |
191 ''' |
193 return path |
193 return path |
194 # escape directories ending with .i and .d |
194 # escape directories ending with .i and .d |
195 path = encodedir(path) |
195 path = encodedir(path) |
196 ndpath = path[len('data/'):] |
196 ndpath = path[len('data/'):] |
197 res = 'data/' + auxencode(encodefilename(ndpath)) |
197 res = 'data/' + auxencode(encodefilename(ndpath)) |
198 if len(res) > MAX_PATH_LEN_IN_HGSTORE: |
198 if len(res) > _maxstorepathlen: |
199 digest = _sha(path).hexdigest() |
199 digest = _sha(path).hexdigest() |
200 aep = auxencode(lowerencode(ndpath)) |
200 aep = auxencode(lowerencode(ndpath)) |
201 _root, ext = os.path.splitext(aep) |
201 _root, ext = os.path.splitext(aep) |
202 parts = aep.split('/') |
202 parts = aep.split('/') |
203 basename = parts[-1] |
203 basename = parts[-1] |
204 sdirs = [] |
204 sdirs = [] |
205 for p in parts[:-1]: |
205 for p in parts[:-1]: |
206 d = p[:DIR_PREFIX_LEN] |
206 d = p[:_dirprefixlen] |
207 if d[-1] in '. ': |
207 if d[-1] in '. ': |
208 # Windows can't access dirs ending in period or space |
208 # Windows can't access dirs ending in period or space |
209 d = d[:-1] + '_' |
209 d = d[:-1] + '_' |
210 t = '/'.join(sdirs) + '/' + d |
210 t = '/'.join(sdirs) + '/' + d |
211 if len(t) > _MAX_SHORTENED_DIRS_LEN: |
211 if len(t) > _maxshortdirslen: |
212 break |
212 break |
213 sdirs.append(d) |
213 sdirs.append(d) |
214 dirs = '/'.join(sdirs) |
214 dirs = '/'.join(sdirs) |
215 if len(dirs) > 0: |
215 if len(dirs) > 0: |
216 dirs += '/' |
216 dirs += '/' |
217 res = 'dh/' + dirs + digest + ext |
217 res = 'dh/' + dirs + digest + ext |
218 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res) |
218 spaceleft = _maxstorepathlen - len(res) |
219 if space_left > 0: |
219 if spaceleft > 0: |
220 filler = basename[:space_left] |
220 filler = basename[:spaceleft] |
221 res = 'dh/' + dirs + filler + digest + ext |
221 res = 'dh/' + dirs + filler + digest + ext |
222 return res |
222 return res |
223 |
223 |
224 def _calcmode(path): |
224 def _calcmode(path): |
225 try: |
225 try: |