Mercurial > public > mercurial-scm > hg
comparison mercurial/url.py @ 13770:4e8f2310f310
url: provide url object
This adds a url object that re-implements urlsplit() and
unsplit(). The implementation splits out usernames, passwords, and
ports.
The implementation is based on the behavior specified by RFC
2396[1]. However, it is much more forgiving than the RFC's
specification; it places no specific restrictions on what characters
are allowed in each segment of the URL other than what is necessary to
split the URL into its constituent parts.
[1]: http://www.ietf.org/rfc/rfc2396.txt
author | Brodie Rao <brodie@bitheap.org> |
---|---|
date | Fri, 25 Mar 2011 22:58:56 -0700 |
parents | 66d65bccbf06 |
children | 463aca32a937 |
comparison
equal
deleted
inserted
replaced
13769:8796fb6af67e | 13770:4e8f2310f310 |
---|---|
20 not result.startswith(scheme + '://') and | 20 not result.startswith(scheme + '://') and |
21 url.startswith(scheme + '://') | 21 url.startswith(scheme + '://') |
22 ): | 22 ): |
23 result = scheme + '://' + result[len(scheme + ':'):] | 23 result = scheme + '://' + result[len(scheme + ':'):] |
24 return result | 24 return result |
25 | |
26 class url(object): | |
27 """Reliable URL parser. | |
28 | |
29 This parses URLs and provides attributes for the following | |
30 components: | |
31 | |
32 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | |
33 | |
34 Missing components are set to None. The only exception is | |
35 fragment, which is set to '' if present but empty. | |
36 | |
37 If parse_fragment is False, fragment is included in query. If | |
38 parse_query is False, query is included in path. If both are | |
39 False, both fragment and query are included in path. | |
40 | |
41 See http://www.ietf.org/rfc/rfc2396.txt for more information. | |
42 | |
43 Examples: | |
44 | |
45 >>> url('http://www.ietf.org/rfc/rfc2396.txt') | |
46 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | |
47 >>> url('ssh://[::1]:2200//home/joe/repo') | |
48 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | |
49 >>> url('file:///home/joe/repo') | |
50 <url scheme: 'file', path: '/home/joe/repo'> | |
51 >>> url('bundle:foo') | |
52 <url scheme: 'bundle', path: 'foo'> | |
53 | |
54 Authentication credentials: | |
55 | |
56 >>> url('ssh://joe:xyz@x/repo') | |
57 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | |
58 >>> url('ssh://joe@x/repo') | |
59 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | |
60 | |
61 Query strings and fragments: | |
62 | |
63 >>> url('http://host/a?b#c') | |
64 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | |
65 >>> url('http://host/a?b#c', parse_query=False, parse_fragment=False) | |
66 <url scheme: 'http', host: 'host', path: 'a?b#c'> | |
67 """ | |
68 | |
69 _safechars = "!~*'()+" | |
70 _safepchars = "/!~*'()+" | |
71 | |
72 def __init__(self, path, parse_query=True, parse_fragment=True): | |
73 # We slowly chomp away at path until we have only the path left | |
74 self.scheme = self.user = self.passwd = self.host = None | |
75 self.port = self.path = self.query = self.fragment = None | |
76 self._localpath = True | |
77 | |
78 if not path.startswith('/') and ':' in path: | |
79 parts = path.split(':', 1) | |
80 if parts[0]: | |
81 self.scheme, path = parts | |
82 self._localpath = False | |
83 | |
84 if not path: | |
85 path = None | |
86 if self._localpath: | |
87 self.path = '' | |
88 return | |
89 else: | |
90 if parse_fragment and '#' in path: | |
91 path, self.fragment = path.split('#', 1) | |
92 if not path: | |
93 path = None | |
94 if self._localpath: | |
95 self.path = path | |
96 return | |
97 | |
98 if parse_query and '?' in path: | |
99 path, self.query = path.split('?', 1) | |
100 if not path: | |
101 path = None | |
102 if not self.query: | |
103 self.query = None | |
104 | |
105 # // is required to specify a host/authority | |
106 if path and path.startswith('//'): | |
107 parts = path[2:].split('/', 1) | |
108 if len(parts) > 1: | |
109 self.host, path = parts | |
110 path = path | |
111 else: | |
112 self.host = parts[0] | |
113 path = None | |
114 if not self.host: | |
115 self.host = None | |
116 if path: | |
117 path = '/' + path | |
118 | |
119 if self.host and '@' in self.host: | |
120 self.user, self.host = self.host.rsplit('@', 1) | |
121 if ':' in self.user: | |
122 self.user, self.passwd = self.user.split(':', 1) | |
123 if not self.host: | |
124 self.host = None | |
125 | |
126 # Don't split on colons in IPv6 addresses without ports | |
127 if (self.host and ':' in self.host and | |
128 not (self.host.startswith('[') and self.host.endswith(']'))): | |
129 self.host, self.port = self.host.rsplit(':', 1) | |
130 if not self.host: | |
131 self.host = None | |
132 self.path = path | |
133 | |
134 for a in ('user', 'passwd', 'host', 'port', | |
135 'path', 'query', 'fragment'): | |
136 v = getattr(self, a) | |
137 if v is not None: | |
138 setattr(self, a, urllib.unquote(v)) | |
139 | |
140 def __repr__(self): | |
141 attrs = [] | |
142 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | |
143 'query', 'fragment'): | |
144 v = getattr(self, a) | |
145 if v is not None: | |
146 attrs.append('%s: %r' % (a, v)) | |
147 return '<url %s>' % ', '.join(attrs) | |
148 | |
149 def __str__(self): | |
150 """Join the URL's components back into a URL string. | |
151 | |
152 Examples: | |
153 | |
154 >>> str(url('http://user:pw@host:80/?foo#bar')) | |
155 'http://user:pw@host:80/?foo#bar' | |
156 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | |
157 'ssh://user:pw@[::1]:2200//home/joe#' | |
158 >>> str(url('http://localhost:80//')) | |
159 'http://localhost:80//' | |
160 >>> str(url('http://localhost:80/')) | |
161 'http://localhost:80/' | |
162 >>> str(url('http://localhost:80')) | |
163 'http://localhost:80' | |
164 >>> str(url('bundle:foo')) | |
165 'bundle:foo' | |
166 >>> str(url('path')) | |
167 'path' | |
168 """ | |
169 if self._localpath: | |
170 s = self.path | |
171 if self.fragment: | |
172 s += '#' + self.fragment | |
173 return s | |
174 | |
175 s = self.scheme + ':' | |
176 if (self.user or self.passwd or self.host or | |
177 self.scheme and not self.path): | |
178 s += '//' | |
179 if self.user: | |
180 s += urllib.quote(self.user, safe=self._safechars) | |
181 if self.passwd: | |
182 s += ':' + urllib.quote(self.passwd, safe=self._safechars) | |
183 if self.user or self.passwd: | |
184 s += '@' | |
185 if self.host: | |
186 if not (self.host.startswith('[') and self.host.endswith(']')): | |
187 s += urllib.quote(self.host) | |
188 else: | |
189 s += self.host | |
190 if self.port: | |
191 s += ':' + urllib.quote(self.port) | |
192 if ((self.host and self.path is not None) or | |
193 (self.host and self.query or self.fragment)): | |
194 s += '/' | |
195 if self.path: | |
196 s += urllib.quote(self.path, safe=self._safepchars) | |
197 if self.query: | |
198 s += '?' + urllib.quote(self.query, safe=self._safepchars) | |
199 if self.fragment is not None: | |
200 s += '#' + urllib.quote(self.fragment, safe=self._safepchars) | |
201 return s | |
202 | |
203 def authinfo(self): | |
204 user, passwd = self.user, self.passwd | |
205 try: | |
206 self.user, self.passwd = None, None | |
207 s = str(self) | |
208 finally: | |
209 self.user, self.passwd = user, passwd | |
210 if not self.user: | |
211 return (s, None) | |
212 return (s, (None, (str(self), self.host), | |
213 self.user, self.passwd or '')) | |
214 | |
215 def has_scheme(path): | |
216 return bool(url(path).scheme) | |
25 | 217 |
26 def hidepassword(url): | 218 def hidepassword(url): |
27 '''hide user credential in a url string''' | 219 '''hide user credential in a url string''' |
28 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) | 220 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) |
29 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc) | 221 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc) |