comparison mercurial/url.py @ 7270:2db33c1a5654

factor out the url handling from httprepo Create url.py to handle all the url handling: - proxy handling - workaround various python bugs - handle username/password embedded in the url
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Mon, 27 Oct 2008 21:50:01 +0100
parents
children ac81ffac0f35
comparison
equal deleted inserted replaced
7269:95a53961d7a6 7270:2db33c1a5654
1 # url.py - HTTP handling for mercurial
2 #
3 # Copyright 2005, 2006, 2007, 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2006, 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
7 # This software may be used and distributed according to the terms
8 # of the GNU General Public License, incorporated herein by reference.
9
10 import urllib, urllib2, urlparse, httplib, os, re
11 from i18n import _
12 import keepalive, util
13
14 def hidepassword(url):
15 '''hide user credential in a url string'''
16 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
17 netloc = re.sub('([^:]*):([^@]*)@(.*)', r'\1:***@\3', netloc)
18 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
19
20 def removeauth(url):
21 '''remove all authentication information from a url string'''
22 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
23 netloc = netloc[netloc.find('@')+1:]
24 return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))
25
26 def netlocsplit(netloc):
27 '''split [user[:passwd]@]host[:port] into 4-tuple.'''
28
29 a = netloc.find('@')
30 if a == -1:
31 user, passwd = None, None
32 else:
33 userpass, netloc = netloc[:a], netloc[a+1:]
34 c = userpass.find(':')
35 if c == -1:
36 user, passwd = urllib.unquote(userpass), None
37 else:
38 user = urllib.unquote(userpass[:c])
39 passwd = urllib.unquote(userpass[c+1:])
40 c = netloc.find(':')
41 if c == -1:
42 host, port = netloc, None
43 else:
44 host, port = netloc[:c], netloc[c+1:]
45 return host, port, user, passwd
46
47 def netlocunsplit(host, port, user=None, passwd=None):
48 '''turn host, port, user, passwd into [user[:passwd]@]host[:port].'''
49 if port:
50 hostport = host + ':' + port
51 else:
52 hostport = host
53 if user:
54 if passwd:
55 userpass = urllib.quote(user) + ':' + urllib.quote(passwd)
56 else:
57 userpass = urllib.quote(user)
58 return userpass + '@' + hostport
59 return hostport
60
61 _safe = ('abcdefghijklmnopqrstuvwxyz'
62 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
63 '0123456789' '_.-/')
64 _safeset = None
65 _hex = None
66 def quotepath(path):
67 '''quote the path part of a URL
68
69 This is similar to urllib.quote, but it also tries to avoid
70 quoting things twice (inspired by wget):
71
72 >>> quotepath('abc def')
73 'abc%20def'
74 >>> quotepath('abc%20def')
75 'abc%20def'
76 >>> quotepath('abc%20 def')
77 'abc%20%20def'
78 >>> quotepath('abc def%20')
79 'abc%20def%20'
80 >>> quotepath('abc def%2')
81 'abc%20def%252'
82 >>> quotepath('abc def%')
83 'abc%20def%25'
84 '''
85 global _safeset, _hex
86 if _safeset is None:
87 _safeset = util.set(_safe)
88 _hex = util.set('abcdefABCDEF0123456789')
89 l = list(path)
90 for i in xrange(len(l)):
91 c = l[i]
92 if c == '%' and i + 2 < len(l) and (l[i+1] in _hex and l[i+2] in _hex):
93 pass
94 elif c not in _safeset:
95 l[i] = '%%%02X' % ord(c)
96 return ''.join(l)
97
98 class passwordmgr(urllib2.HTTPPasswordMgrWithDefaultRealm):
99 def __init__(self, ui):
100 urllib2.HTTPPasswordMgrWithDefaultRealm.__init__(self)
101 self.ui = ui
102
103 def find_user_password(self, realm, authuri):
104 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm.find_user_password(
105 self, realm, authuri)
106 user, passwd = authinfo
107 if user and passwd:
108 return (user, passwd)
109
110 if not self.ui.interactive:
111 raise util.Abort(_('http authorization required'))
112
113 self.ui.write(_("http authorization required\n"))
114 self.ui.status(_("realm: %s\n") % realm)
115 if user:
116 self.ui.status(_("user: %s\n") % user)
117 else:
118 user = self.ui.prompt(_("user:"), default=None)
119
120 if not passwd:
121 passwd = self.ui.getpass()
122
123 self.add_password(realm, authuri, user, passwd)
124 return (user, passwd)
125
126 class proxyhandler(urllib2.ProxyHandler):
127 def __init__(self, ui):
128 proxyurl = ui.config("http_proxy", "host") or os.getenv('http_proxy')
129 # XXX proxyauthinfo = None
130
131 if proxyurl:
132 # proxy can be proper url or host[:port]
133 if not (proxyurl.startswith('http:') or
134 proxyurl.startswith('https:')):
135 proxyurl = 'http://' + proxyurl + '/'
136 snpqf = urlparse.urlsplit(proxyurl)
137 proxyscheme, proxynetloc, proxypath, proxyquery, proxyfrag = snpqf
138 hpup = netlocsplit(proxynetloc)
139
140 proxyhost, proxyport, proxyuser, proxypasswd = hpup
141 if not proxyuser:
142 proxyuser = ui.config("http_proxy", "user")
143 proxypasswd = ui.config("http_proxy", "passwd")
144
145 # see if we should use a proxy for this url
146 no_list = [ "localhost", "127.0.0.1" ]
147 no_list.extend([p.lower() for
148 p in ui.configlist("http_proxy", "no")])
149 no_list.extend([p.strip().lower() for
150 p in os.getenv("no_proxy", '').split(',')
151 if p.strip()])
152 # "http_proxy.always" config is for running tests on localhost
153 if ui.configbool("http_proxy", "always"):
154 self.no_list = []
155 else:
156 self.no_list = no_list
157
158 proxyurl = urlparse.urlunsplit((
159 proxyscheme, netlocunsplit(proxyhost, proxyport,
160 proxyuser, proxypasswd or ''),
161 proxypath, proxyquery, proxyfrag))
162 proxies = {'http': proxyurl, 'https': proxyurl}
163 ui.debug(_('proxying through http://%s:%s\n') %
164 (proxyhost, proxyport))
165 else:
166 proxies = {}
167
168 # urllib2 takes proxy values from the environment and those
169 # will take precedence if found, so drop them
170 for env in ["HTTP_PROXY", "http_proxy", "no_proxy"]:
171 try:
172 if env in os.environ:
173 del os.environ[env]
174 except OSError:
175 pass
176
177 urllib2.ProxyHandler.__init__(self, proxies)
178 self.ui = ui
179
180 def proxy_open(self, req, proxy, type_):
181 host = req.get_host().split(':')[0]
182 if host in self.no_list:
183 return None
184
185 # work around a bug in Python < 2.4.2
186 # (it leaves a "\n" at the end of Proxy-authorization headers)
187 baseclass = req.__class__
188 class _request(baseclass):
189 def add_header(self, key, val):
190 if key.lower() == 'proxy-authorization':
191 val = val.strip()
192 return baseclass.add_header(self, key, val)
193 req.__class__ = _request
194
195 return urllib2.ProxyHandler.proxy_open(self, req, proxy, type_)
196
197 class httpsendfile(file):
198 def __len__(self):
199 return os.fstat(self.fileno()).st_size
200
201 def _gen_sendfile(connection):
202 def _sendfile(self, data):
203 # send a file
204 if isinstance(data, httpsendfile):
205 # if auth required, some data sent twice, so rewind here
206 data.seek(0)
207 for chunk in util.filechunkiter(data):
208 connection.send(self, chunk)
209 else:
210 connection.send(self, data)
211 return _sendfile
212
213 class httpconnection(keepalive.HTTPConnection):
214 # must be able to send big bundle as stream.
215 send = _gen_sendfile(keepalive.HTTPConnection)
216
217 class httphandler(keepalive.HTTPHandler):
218 def http_open(self, req):
219 return self.do_open(httpconnection, req)
220
221 def __del__(self):
222 self.close_all()
223
224 has_https = hasattr(urllib2, 'HTTPSHandler')
225 if has_https:
226 class httpsconnection(httplib.HTTPSConnection):
227 response_class = keepalive.HTTPResponse
228 # must be able to send big bundle as stream.
229 send = _gen_sendfile(httplib.HTTPSConnection)
230
231 class httpshandler(keepalive.KeepAliveHandler, urllib2.HTTPSHandler):
232 def https_open(self, req):
233 return self.do_open(httpsconnection, req)
234
235 # In python < 2.5 AbstractDigestAuthHandler raises a ValueError if
236 # it doesn't know about the auth type requested. This can happen if
237 # somebody is using BasicAuth and types a bad password.
238 class httpdigestauthhandler(urllib2.HTTPDigestAuthHandler):
239 def http_error_auth_reqed(self, auth_header, host, req, headers):
240 try:
241 return urllib2.HTTPDigestAuthHandler.http_error_auth_reqed(
242 self, auth_header, host, req, headers)
243 except ValueError, inst:
244 arg = inst.args[0]
245 if arg.startswith("AbstractDigestAuthHandler doesn't know "):
246 return
247 raise
248
249 def getauthinfo(path):
250 scheme, netloc, urlpath, query, frag = urlparse.urlsplit(path)
251 if not urlpath:
252 urlpath = '/'
253 urlpath = quotepath(urlpath)
254 host, port, user, passwd = netlocsplit(netloc)
255
256 # urllib cannot handle URLs with embedded user or passwd
257 url = urlparse.urlunsplit((scheme, netlocunsplit(host, port),
258 urlpath, query, frag))
259 if user:
260 netloc = host
261 if port:
262 netloc += ':' + port
263 # Python < 2.4.3 uses only the netloc to search for a password
264 authinfo = (None, (url, netloc), user, passwd or '')
265 else:
266 authinfo = None
267 return url, authinfo
268
269 def opener(ui, authinfo=None):
270 '''
271 construct an opener suitable for urllib2
272 authinfo will be added to the password manager
273 '''
274 handlers = [httphandler()]
275 if has_https:
276 handlers.append(httpshandler())
277
278 handlers.append(proxyhandler(ui))
279
280 passmgr = passwordmgr(ui)
281 if authinfo is not None:
282 passmgr.add_password(*authinfo)
283 user, passwd = authinfo[2:4]
284 ui.debug(_('http auth: user %s, password %s\n') %
285 (user, passwd and '*' * len(passwd) or 'not set'))
286
287 handlers.extend((urllib2.HTTPBasicAuthHandler(passmgr),
288 httpdigestauthhandler(passmgr)))
289 opener = urllib2.build_opener(*handlers)
290
291 # 1.0 here is the _protocol_ version
292 opener.addheaders = [('User-agent', 'mercurial/proto-1.0')]
293 opener.addheaders.append(('Accept', 'application/mercurial-0.1'))
294 return opener
295
296 def open(ui, url, data=None):
297 scheme = urlparse.urlsplit(url)[0]
298 if not scheme:
299 url, authinfo = 'file://' + util.normpath(os.path.abspath(url)), None
300 else:
301 url, authinfo = getauthinfo(url)
302 return opener(ui, authinfo).open(url, data)