diff -r bc101902a68d -r 924c82157d46 mercurial/url.py --- a/mercurial/url.py Sat Apr 30 16:33:47 2011 +0200 +++ b/mercurial/url.py Sat Apr 30 09:43:20 2011 -0700 @@ -7,273 +7,11 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. -import urllib, urllib2, httplib, os, socket, cStringIO, re +import urllib, urllib2, httplib, os, socket, cStringIO import __builtin__ from i18n import _ import keepalive, util -class url(object): - """Reliable URL parser. - - This parses URLs and provides attributes for the following - components: - - ://:@:/?# - - Missing components are set to None. The only exception is - fragment, which is set to '' if present but empty. - - If parsefragment is False, fragment is included in query. If - parsequery is False, query is included in path. If both are - False, both fragment and query are included in path. - - See http://www.ietf.org/rfc/rfc2396.txt for more information. - - Note that for backward compatibility reasons, bundle URLs do not - take host names. That means 'bundle://../' has a path of '../'. - - Examples: - - >>> url('http://www.ietf.org/rfc/rfc2396.txt') - - >>> url('ssh://[::1]:2200//home/joe/repo') - - >>> url('file:///home/joe/repo') - - >>> url('bundle:foo') - - >>> url('bundle://../foo') - - >>> url('c:\\\\foo\\\\bar') - - - Authentication credentials: - - >>> url('ssh://joe:xyz@x/repo') - - >>> url('ssh://joe@x/repo') - - - Query strings and fragments: - - >>> url('http://host/a?b#c') - - >>> url('http://host/a?b#c', parsequery=False, parsefragment=False) - - """ - - _safechars = "!~*'()+" - _safepchars = "/!~*'()+" - _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match - - def __init__(self, path, parsequery=True, parsefragment=True): - # We slowly chomp away at path until we have only the path left - self.scheme = self.user = self.passwd = self.host = None - self.port = self.path = self.query = self.fragment = None - self._localpath = True - self._hostport = '' - self._origpath = path - - # special case for Windows drive letters - if hasdriveletter(path): - self.path = path - return - - # For compatibility reasons, we can't handle bundle paths as - # normal URLS - if path.startswith('bundle:'): - self.scheme = 'bundle' - path = path[7:] - if path.startswith('//'): - path = path[2:] - self.path = path - return - - if self._matchscheme(path): - parts = path.split(':', 1) - if parts[0]: - self.scheme, path = parts - self._localpath = False - - if not path: - path = None - if self._localpath: - self.path = '' - return - else: - if parsefragment and '#' in path: - path, self.fragment = path.split('#', 1) - if not path: - path = None - if self._localpath: - self.path = path - return - - if parsequery and '?' in path: - path, self.query = path.split('?', 1) - if not path: - path = None - if not self.query: - self.query = None - - # // is required to specify a host/authority - if path and path.startswith('//'): - parts = path[2:].split('/', 1) - if len(parts) > 1: - self.host, path = parts - path = path - else: - self.host = parts[0] - path = None - if not self.host: - self.host = None - if path: - path = '/' + path - - if self.host and '@' in self.host: - self.user, self.host = self.host.rsplit('@', 1) - if ':' in self.user: - self.user, self.passwd = self.user.split(':', 1) - if not self.host: - self.host = None - - # Don't split on colons in IPv6 addresses without ports - if (self.host and ':' in self.host and - not (self.host.startswith('[') and self.host.endswith(']'))): - self._hostport = self.host - self.host, self.port = self.host.rsplit(':', 1) - if not self.host: - self.host = None - - if (self.host and self.scheme == 'file' and - self.host not in ('localhost', '127.0.0.1', '[::1]')): - raise util.Abort(_('file:// URLs can only refer to localhost')) - - self.path = path - - for a in ('user', 'passwd', 'host', 'port', - 'path', 'query', 'fragment'): - v = getattr(self, a) - if v is not None: - setattr(self, a, urllib.unquote(v)) - - def __repr__(self): - attrs = [] - for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', - 'query', 'fragment'): - v = getattr(self, a) - if v is not None: - attrs.append('%s: %r' % (a, v)) - return '' % ', '.join(attrs) - - def __str__(self): - """Join the URL's components back into a URL string. - - Examples: - - >>> str(url('http://user:pw@host:80/?foo#bar')) - 'http://user:pw@host:80/?foo#bar' - >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) - 'ssh://user:pw@[::1]:2200//home/joe#' - >>> str(url('http://localhost:80//')) - 'http://localhost:80//' - >>> str(url('http://localhost:80/')) - 'http://localhost:80/' - >>> str(url('http://localhost:80')) - 'http://localhost:80/' - >>> str(url('bundle:foo')) - 'bundle:foo' - >>> str(url('bundle://../foo')) - 'bundle:../foo' - >>> str(url('path')) - 'path' - """ - if self._localpath: - s = self.path - if self.scheme == 'bundle': - s = 'bundle:' + s - if self.fragment: - s += '#' + self.fragment - return s - - s = self.scheme + ':' - if (self.user or self.passwd or self.host or - self.scheme and not self.path): - s += '//' - if self.user: - s += urllib.quote(self.user, safe=self._safechars) - if self.passwd: - s += ':' + urllib.quote(self.passwd, safe=self._safechars) - if self.user or self.passwd: - s += '@' - if self.host: - if not (self.host.startswith('[') and self.host.endswith(']')): - s += urllib.quote(self.host) - else: - s += self.host - if self.port: - s += ':' + urllib.quote(self.port) - if self.host: - s += '/' - if self.path: - s += urllib.quote(self.path, safe=self._safepchars) - if self.query: - s += '?' + urllib.quote(self.query, safe=self._safepchars) - if self.fragment is not None: - s += '#' + urllib.quote(self.fragment, safe=self._safepchars) - return s - - def authinfo(self): - user, passwd = self.user, self.passwd - try: - self.user, self.passwd = None, None - s = str(self) - finally: - self.user, self.passwd = user, passwd - if not self.user: - return (s, None) - return (s, (None, (str(self), self.host), - self.user, self.passwd or '')) - - def localpath(self): - if self.scheme == 'file' or self.scheme == 'bundle': - path = self.path or '/' - # For Windows, we need to promote hosts containing drive - # letters to paths with drive letters. - if hasdriveletter(self._hostport): - path = self._hostport + '/' + self.path - elif self.host is not None and self.path: - path = '/' + path - # We also need to handle the case of file:///C:/, which - # should return C:/, not /C:/. - elif hasdriveletter(path): - # Strip leading slash from paths with drive names - return path[1:] - return path - return self._origpath - -def hasscheme(path): - return bool(url(path).scheme) - -def hasdriveletter(path): - return path[1:2] == ':' and path[0:1].isalpha() - -def localpath(path): - return url(path, parsequery=False, parsefragment=False).localpath() - -def hidepassword(u): - '''hide user credential in a url string''' - u = url(u) - if u.passwd: - u.passwd = '***' - return str(u) - -def removeauth(u): - '''remove all authentication information from a url string''' - u = url(u) - u.user = u.passwd = None - return str(u) - def readauthforuri(ui, uri): # Read configuration config = dict() @@ -357,7 +95,7 @@ if not (proxyurl.startswith('http:') or proxyurl.startswith('https:')): proxyurl = 'http://' + proxyurl + '/' - proxy = url(proxyurl) + proxy = util.url(proxyurl) if not proxy.user: proxy.user = ui.config("http_proxy", "user") proxy.passwd = ui.config("http_proxy", "passwd") @@ -545,7 +283,7 @@ new_tunnel = False if new_tunnel or tunnel_host == req.get_full_url(): # has proxy - u = url(tunnel_host) + u = util.url(tunnel_host) if new_tunnel or u.scheme == 'https': # only use CONNECT for HTTPS h.realhostport = ':'.join([u.host, (u.port or '443')]) h.headers = req.headers.copy() @@ -876,7 +614,7 @@ return opener def open(ui, url_, data=None): - u = url(url_) + u = util.url(url_) if u.scheme: u.scheme = u.scheme.lower() url_, authinfo = u.authinfo()