Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/url.py @ 13848:b2798c1defff
url: be stricter about detecting schemes
While the URL parser is very forgiving about what characters are
allowed in each component, it's useful to be strict about the scheme
so we don't accidentally interpret local paths with colons as URLs.
This restricts schemes to containing alphanumeric characters, dashes,
pluses, and dots (as specified in RFC 2396).
author | Brodie Rao <brodie@bitheap.org> |
---|---|
date | Thu, 31 Mar 2011 17:37:33 -0700 |
parents | f1823b9f073b |
children | fab10e7cacd6 |
comparison
equal
deleted
inserted
replaced
13845:ddcb57a2eaeb | 13848:b2798c1defff |
---|---|
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> |
6 # | 6 # |
7 # This software may be used and distributed according to the terms of the | 7 # This software may be used and distributed according to the terms of the |
8 # GNU General Public License version 2 or any later version. | 8 # GNU General Public License version 2 or any later version. |
9 | 9 |
10 import urllib, urllib2, httplib, os, socket, cStringIO | 10 import urllib, urllib2, httplib, os, socket, cStringIO, re |
11 import __builtin__ | 11 import __builtin__ |
12 from i18n import _ | 12 from i18n import _ |
13 import keepalive, util | 13 import keepalive, util |
14 | 14 |
15 class url(object): | 15 class url(object): |
62 <url scheme: 'http', host: 'host', path: 'a?b#c'> | 62 <url scheme: 'http', host: 'host', path: 'a?b#c'> |
63 """ | 63 """ |
64 | 64 |
65 _safechars = "!~*'()+" | 65 _safechars = "!~*'()+" |
66 _safepchars = "/!~*'()+" | 66 _safepchars = "/!~*'()+" |
67 _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match | |
67 | 68 |
68 def __init__(self, path, parsequery=True, parsefragment=True): | 69 def __init__(self, path, parsequery=True, parsefragment=True): |
69 # We slowly chomp away at path until we have only the path left | 70 # We slowly chomp away at path until we have only the path left |
70 self.scheme = self.user = self.passwd = self.host = None | 71 self.scheme = self.user = self.passwd = self.host = None |
71 self.port = self.path = self.query = self.fragment = None | 72 self.port = self.path = self.query = self.fragment = None |
86 if path.startswith('//'): | 87 if path.startswith('//'): |
87 path = path[2:] | 88 path = path[2:] |
88 self.path = path | 89 self.path = path |
89 return | 90 return |
90 | 91 |
91 if not path.startswith('/') and ':' in path: | 92 if self._matchscheme(path): |
92 parts = path.split(':', 1) | 93 parts = path.split(':', 1) |
93 if parts[0]: | 94 if parts[0]: |
94 self.scheme, path = parts | 95 self.scheme, path = parts |
95 self._localpath = False | 96 self._localpath = False |
96 | 97 |