Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/url.py @ 7270:2db33c1a5654
factor out the url handling from httprepo
Create url.py to handle all the url handling:
- proxy handling
- workaround various python bugs
- handle username/password embedded in the url
author | Benoit Boissinot <benoit.boissinot@ens-lyon.org> |
---|---|
date | Mon, 27 Oct 2008 21:50:01 +0100 |
parents | |
children | ac81ffac0f35 |
comparison
equal
deleted
inserted
replaced
7269:95a53961d7a6 | 7270:2db33c1a5654 |
---|---|
1 # url.py - HTTP handling for mercurial | |
2 # | |
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com> | |
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br> | |
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | |
6 # | |
7 # This software may be used and distributed according to the terms | |
8 # of the GNU General Public License, incorporated herein by reference. | |
9 | |
10 import urllib, urllib2, urlparse, httplib, os, re | |
11 from i18n import _ | |
12 import keepalive, util | |
13 | |
14 def hidepassword(url): | |
15 '''hide user credential in a url string''' | |
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) | |
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc) | |
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) | |
19 | |
20 def removeauth(url): | |
21 '''remove all authentication information from a url string''' | |
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) | |
23 netloc = netloc[netloc.find('@')+1:] | |
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) | |
25 | |
26 def netlocsplit(netloc): | |
27 '''split [user[:passwd]@]host[:port] into 4-tuple.''' | |
28 | |
29 a = netloc.find('@') | |
30 if a == -1: | |
31 user, passwd = None, None | |
32 else: | |
33 userpass, netloc = netloc[:a], netloc[a+1:] | |
34 c = userpass.find(':') | |
35 if c == -1: | |
36 user, passwd = urllib.unquote(userpass), None | |
37 else: | |
38 user = urllib.unquote(userpass[:c]) | |
39 passwd = urllib.unquote(userpass[c+1:]) | |
40 c = netloc.find(':') | |
41 if c == -1: | |
42 host, port = netloc, None | |
43 else: | |
44 host, port = netloc[:c], netloc[c+1:] | |
45 return host, port, user, passwd | |
46 | |
47 def netlocunsplit(host, port, user=None, passwd=None): | |
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].''' | |
49 if port: | |
50 hostport = host + ':' + port | |
51 else: | |
52 hostport = host | |
53 if user: | |
54 if passwd: | |
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd) | |
56 else: | |
57 userpass = urllib.quote(user) | |
58 return userpass + '@' + hostport | |
59 return hostport | |
60 | |
61 _safe = ('abcdefghijklmnopqrstuvwxyz' | |
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
63 '0123456789' '_.-/') | |
64 _safeset = None | |
65 _hex = None | |
66 def quotepath(path): | |
67 '''quote the path part of a URL | |
68 | |
69 This is similar to urllib.quote, but it also tries to avoid | |
70 quoting things twice (inspired by wget): | |
71 | |
72 >>> quotepath('abc def') | |
73 'abc%20def' | |
74 >>> quotepath('abc%20def') | |
75 'abc%20def' | |
76 >>> quotepath('abc%20 def') | |
77 'abc%20%20def' | |
78 >>> quotepath('abc def%20') | |
79 'abc%20def%20' | |
80 >>> quotepath('abc def%2') | |
81 'abc%20def%252' | |
82 >>> quotepath('abc def%') | |
83 'abc%20def%25' | |
84 ''' | |
85 global _safeset, _hex | |
86 if _safeset is None: | |
87 _safeset = util.set(_safe) | |
88 _hex = util.set('abcdefABCDEF0123456789') | |
89 l = list(path) | |
90 for i in xrange(len(l)): | |
91 c = l[i] | |
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex): | |
93 pass | |
94 elif c not in _safeset: | |
95 l[i] = '%%%02X' % ord(c) | |
96 return ''.join(l) | |
97 | |
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm): | |
99 def __init__(self, ui): | |
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self) | |
101 self.ui = ui | |
102 | |
103 def find_user_password(self, realm, authuri): | |
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password( | |
105 self, realm, authuri) | |
106 user, passwd = authinfo | |
107 if user and passwd: | |
108 return (user, passwd) | |
109 | |
110 if not self.ui.interactive: | |
111 raise util.Abort(_('http authorization required')) | |
112 | |
113 self.ui.write(_("http authorization required\n")) | |
114 self.ui.status(_("realm: %s\n") % realm) | |
115 if user: | |
116 self.ui.status(_("user: %s\n") % user) | |
117 else: | |
118 user = self.ui.prompt(_("user:"), default=None) | |
119 | |
120 if not passwd: | |
121 passwd = self.ui.getpass() | |
122 | |
123 self.add_password(realm, authuri, user, passwd) | |
124 return (user, passwd) | |
125 | |
126 class proxyhandler(urllib2.ProxyHandler): | |
127 def __init__(self, ui): | |
128 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy') | |
129 # XXX proxyauthinfo = None | |
130 | |
131 if proxyurl: | |
132 # proxy can be proper url or host[:port] | |
133 if not (proxyurl.startswith('http:') or | |
134 proxyurl.startswith('https:')): | |
135 proxyurl = 'http://' + proxyurl + '/' | |
136 snpqf = urlparse.urlsplit(proxyurl) | |
137 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf | |
138 hpup = netlocsplit(proxynetloc) | |
139 | |
140 proxyhost, proxyport, proxyuser, proxypasswd = hpup | |
141 if not proxyuser: | |
142 proxyuser = ui.config("http_proxy", "user") | |
143 proxypasswd = ui.config("http_proxy", "passwd") | |
144 | |
145 # see if we should use a proxy for this url | |
146 no_list = [ "localhost", "127.0.0.1" ] | |
147 no_list.extend([p.lower() for | |
148 p in ui.configlist("http_proxy", "no")]) | |
149 no_list.extend([p.strip().lower() for | |
150 p in os.getenv("no_proxy", '').split(',') | |
151 if p.strip()]) | |
152 # "http_proxy.always" config is for running tests on localhost | |
153 if ui.configbool("http_proxy", "always"): | |
154 self.no_list = [] | |
155 else: | |
156 self.no_list = no_list | |
157 | |
158 proxyurl = urlparse.urlunsplit(( | |
159 proxyscheme, netlocunsplit(proxyhost, proxyport, | |
160 proxyuser, proxypasswd or ''), | |
161 proxypath, proxyquery, proxyfrag)) | |
162 proxies = {'http': proxyurl, 'https': proxyurl} | |
163 ui.debug(_('proxying through http://%s:%s\n') % | |
164 (proxyhost, proxyport)) | |
165 else: | |
166 proxies = {} | |
167 | |
168 # urllib2 takes proxy values from the environment and those | |
169 # will take precedence if found, so drop them | |
170 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]: | |
171 try: | |
172 if env in os.environ: | |
173 del os.environ[env] | |
174 except OSError: | |
175 pass | |
176 | |
177 urllib2.ProxyHandler.__init__(self, proxies) | |
178 self.ui = ui | |
179 | |
180 def proxy_open(self, req, proxy, type_): | |
181 host = req.get_host().split(':')[0] | |
182 if host in self.no_list: | |
183 return None | |
184 | |
185 # work around a bug in Python < 2.4.2 | |
186 # (it leaves a "\n" at the end of Proxy-authorization headers) | |
187 baseclass = req.__class__ | |
188 class _request(baseclass): | |
189 def add_header(self, key, val): | |
190 if key.lower() == 'proxy-authorization': | |
191 val = val.strip() | |
192 return baseclass.add_header(self, key, val) | |
193 req.__class__ = _request | |
194 | |
195 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_) | |
196 | |
197 class httpsendfile(file): | |
198 def __len__(self): | |
199 return os.fstat(self.fileno()).st_size | |
200 | |
201 def _gen_sendfile(connection): | |
202 def _sendfile(self, data): | |
203 # send a file | |
204 if isinstance(data, httpsendfile): | |
205 # if auth required, some data sent twice, so rewind here | |
206 data.seek(0) | |
207 for chunk in util.filechunkiter(data): | |
208 connection.send(self, chunk) | |
209 else: | |
210 connection.send(self, data) | |
211 return _sendfile | |
212 | |
213 class httpconnection(keepalive.HTTPConnection): | |
214 # must be able to send big bundle as stream. | |
215 send = _gen_sendfile(keepalive.HTTPConnection) | |
216 | |
217 class httphandler(keepalive.HTTPHandler): | |
218 def http_open(self, req): | |
219 return self.do_open(httpconnection, req) | |
220 | |
221 def __del__(self): | |
222 self.close_all() | |
223 | |
224 has_https = hasattr(urllib2, 'HTTPSHandler') | |
225 if has_https: | |
226 class httpsconnection(httplib.HTTPSConnection): | |
227 response_class = keepalive.HTTPResponse | |
228 # must be able to send big bundle as stream. | |
229 send = _gen_sendfile(httplib.HTTPSConnection) | |
230 | |
231 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler): | |
232 def https_open(self, req): | |
233 return self.do_open(httpsconnection, req) | |
234 | |
235 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if | |
236 # it doesn't know about the auth type requested. This can happen if | |
237 # somebody is using BasicAuth and types a bad password. | |
238 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler): | |
239 def http_error_auth_reqed(self, auth_header, host, req, headers): | |
240 try: | |
241 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed( | |
242 self, auth_header, host, req, headers) | |
243 except ValueError, inst: | |
244 arg = inst.args[0] | |
245 if arg.startswith("AbstractDigestAuthHandler doesn't know "): | |
246 return | |
247 raise | |
248 | |
249 def getauthinfo(path): | |
250 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path) | |
251 if not urlpath: | |
252 urlpath = '/' | |
253 urlpath = quotepath(urlpath) | |
254 host, port, user, passwd = netlocsplit(netloc) | |
255 | |
256 # urllib cannot handle URLs with embedded user or passwd | |
257 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port), | |
258 urlpath, query, frag)) | |
259 if user: | |
260 netloc = host | |
261 if port: | |
262 netloc += ':' + port | |
263 # Python < 2.4.3 uses only the netloc to search for a password | |
264 authinfo = (None, (url, netloc), user, passwd or '') | |
265 else: | |
266 authinfo = None | |
267 return url, authinfo | |
268 | |
269 def opener(ui, authinfo=None): | |
270 ''' | |
271 construct an opener suitable for urllib2 | |
272 authinfo will be added to the password manager | |
273 ''' | |
274 handlers = [httphandler()] | |
275 if has_https: | |
276 handlers.append(httpshandler()) | |
277 | |
278 handlers.append(proxyhandler(ui)) | |
279 | |
280 passmgr = passwordmgr(ui) | |
281 if authinfo is not None: | |
282 passmgr.add_password(*authinfo) | |
283 user, passwd = authinfo[2:4] | |
284 ui.debug(_('http auth: user %s, password %s\n') % | |
285 (user, passwd and '*' * len(passwd) or 'not set')) | |
286 | |
287 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr), | |
288 httpdigestauthhandler(passmgr))) | |
289 opener = urllib2.build_opener(*handlers) | |
290 | |
291 # 1.0 here is the _protocol_ version | |
292 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')] | |
293 opener.addheaders.append(('Accept', 'application/mercurial-0.1')) | |
294 return opener | |
295 | |
296 def open(ui, url, data=None): | |
297 scheme = urlparse.urlsplit(url)[0] | |
298 if not scheme: | |
299 url, authinfo = 'file://' + util.normpath(os.path.abspath(url)), None | |
300 else: | |
301 url, authinfo = getauthinfo(url) | |
302 return opener(ui, authinfo).open(url, data) |