Mercurial > public > mercurial-scm > hg
comparison mercurial/hgweb/request.py @ 36900:219b23359f4c
hgweb: support constructing URLs from an alternate base URL
The web.baseurl config option allows server operators to define a
custom URL for hosted content.
The way it works today is that hgwebdir parses this config
option into URL components then updates the appropriate
WSGI environment variables so the request "lies" about its
details. For example, SERVER_NAME is updated to reflect the
alternate base URL's hostname.
The WSGI environment should not be modified because WSGI
applications may want to know the original request details (for
debugging, etc).
This commit teaches our request parser about the existence of
an alternate base URL. If defined, the advertised URL and other
self-reflected paths will take the alternate base URL into account.
The hgweb WSGI application didn't use web.baseurl. But hgwebdir
did. We update hgwebdir to alter the environment parsing
accordingly. The old code around environment manipulation
has been removed.
With this change, parserequestfromenv() has grown to a bit
unwieldy. Now that practically everyone is using it, it is
obvious that there is some unused features that can be trimmed.
So look for this in follow-up commits.
Differential Revision: https://phab.mercurial-scm.org/D2822
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sun, 11 Mar 2018 15:33:56 -0700 |
parents | e67a2e05fa8a |
children | cd6ae9ab7bd8 |
comparison
equal
deleted
inserted
replaced
36899:e67a2e05fa8a | 36900:219b23359f4c |
---|---|
155 # insensitive keys. | 155 # insensitive keys. |
156 headers = attr.ib() | 156 headers = attr.ib() |
157 # Request body input stream. | 157 # Request body input stream. |
158 bodyfh = attr.ib() | 158 bodyfh = attr.ib() |
159 | 159 |
160 def parserequestfromenv(env, bodyfh, reponame=None): | 160 def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None): |
161 """Parse URL components from environment variables. | 161 """Parse URL components from environment variables. |
162 | 162 |
163 WSGI defines request attributes via environment variables. This function | 163 WSGI defines request attributes via environment variables. This function |
164 parses the environment variables into a data structure. | 164 parses the environment variables into a data structure. |
165 | 165 |
166 If ``reponame`` is defined, the leading path components matching that | 166 If ``reponame`` is defined, the leading path components matching that |
167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. | 167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. |
168 This simulates the world view of a WSGI application that processes | 168 This simulates the world view of a WSGI application that processes |
169 requests from the base URL of a repo. | 169 requests from the base URL of a repo. |
170 | |
171 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option) | |
172 is defined, it is used - instead of the WSGI environment variables - for | |
173 constructing URL components up to and including the WSGI application path. | |
174 For example, if the current WSGI application is at ``/repo`` and a request | |
175 is made to ``/rev/@`` with this argument set to | |
176 ``http://myserver:9000/prefix``, the URL and path components will resolve as | |
177 if the request were to ``http://myserver:9000/prefix/rev/@``. In other | |
178 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and | |
179 ``SCRIPT_NAME`` are all effectively replaced by components from this URL. | |
170 """ | 180 """ |
171 # PEP-0333 defines the WSGI spec and is a useful reference for this code. | 181 # PEP 3333 defines the WSGI spec and is a useful reference for this code. |
172 | 182 |
173 # We first validate that the incoming object conforms with the WSGI spec. | 183 # We first validate that the incoming object conforms with the WSGI spec. |
174 # We only want to be dealing with spec-conforming WSGI implementations. | 184 # We only want to be dealing with spec-conforming WSGI implementations. |
175 # TODO enable this once we fix internal violations. | 185 # TODO enable this once we fix internal violations. |
176 #wsgiref.validate.check_environ(env) | 186 #wsgiref.validate.check_environ(env) |
182 if pycompat.ispy3: | 192 if pycompat.ispy3: |
183 env = {k.encode('latin-1'): v for k, v in env.iteritems()} | 193 env = {k.encode('latin-1'): v for k, v in env.iteritems()} |
184 env = {k: v.encode('latin-1') if isinstance(v, str) else v | 194 env = {k: v.encode('latin-1') if isinstance(v, str) else v |
185 for k, v in env.iteritems()} | 195 for k, v in env.iteritems()} |
186 | 196 |
197 if altbaseurl: | |
198 altbaseurl = util.url(altbaseurl) | |
199 | |
187 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines | 200 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines |
188 # the environment variables. | 201 # the environment variables. |
189 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines | 202 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines |
190 # how URLs are reconstructed. | 203 # how URLs are reconstructed. |
191 fullurl = env['wsgi.url_scheme'] + '://' | 204 fullurl = env['wsgi.url_scheme'] + '://' |
192 advertisedfullurl = fullurl | 205 |
193 | 206 if altbaseurl and altbaseurl.scheme: |
194 def addport(s): | 207 advertisedfullurl = altbaseurl.scheme + '://' |
195 if env['wsgi.url_scheme'] == 'https': | 208 else: |
196 if env['SERVER_PORT'] != '443': | 209 advertisedfullurl = fullurl |
197 s += ':' + env['SERVER_PORT'] | 210 |
211 def addport(s, port): | |
212 if s.startswith('https://'): | |
213 if port != '443': | |
214 s += ':' + port | |
198 else: | 215 else: |
199 if env['SERVER_PORT'] != '80': | 216 if port != '80': |
200 s += ':' + env['SERVER_PORT'] | 217 s += ':' + port |
201 | 218 |
202 return s | 219 return s |
203 | 220 |
204 if env.get('HTTP_HOST'): | 221 if env.get('HTTP_HOST'): |
205 fullurl += env['HTTP_HOST'] | 222 fullurl += env['HTTP_HOST'] |
206 else: | 223 else: |
207 fullurl += env['SERVER_NAME'] | 224 fullurl += env['SERVER_NAME'] |
208 fullurl = addport(fullurl) | 225 fullurl = addport(fullurl, env['SERVER_PORT']) |
209 | 226 |
210 advertisedfullurl += env['SERVER_NAME'] | 227 if altbaseurl and altbaseurl.host: |
211 advertisedfullurl = addport(advertisedfullurl) | 228 advertisedfullurl += altbaseurl.host |
229 | |
230 if altbaseurl.port: | |
231 port = altbaseurl.port | |
232 elif altbaseurl.scheme == 'http' and not altbaseurl.port: | |
233 port = '80' | |
234 elif altbaseurl.scheme == 'https' and not altbaseurl.port: | |
235 port = '443' | |
236 else: | |
237 port = env['SERVER_PORT'] | |
238 | |
239 advertisedfullurl = addport(advertisedfullurl, port) | |
240 else: | |
241 advertisedfullurl += env['SERVER_NAME'] | |
242 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT']) | |
212 | 243 |
213 baseurl = fullurl | 244 baseurl = fullurl |
214 advertisedbaseurl = advertisedfullurl | 245 advertisedbaseurl = advertisedfullurl |
215 | 246 |
216 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | 247 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) |
217 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | |
218 fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | 248 fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) |
249 | |
250 if altbaseurl: | |
251 path = altbaseurl.path or '' | |
252 if path and not path.startswith('/'): | |
253 path = '/' + path | |
254 advertisedfullurl += util.urlreq.quote(path) | |
255 else: | |
256 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) | |
257 | |
219 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) | 258 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) |
220 | 259 |
221 if env.get('QUERY_STRING'): | 260 if env.get('QUERY_STRING'): |
222 fullurl += '?' + env['QUERY_STRING'] | 261 fullurl += '?' + env['QUERY_STRING'] |
223 advertisedfullurl += '?' + env['QUERY_STRING'] | 262 advertisedfullurl += '?' + env['QUERY_STRING'] |
224 | 263 |
225 # If ``reponame`` is defined, that must be a prefix on PATH_INFO | 264 # If ``reponame`` is defined, that must be a prefix on PATH_INFO |
226 # that represents the repository being dispatched to. When computing | 265 # that represents the repository being dispatched to. When computing |
227 # the dispatch info, we ignore these leading path components. | 266 # the dispatch info, we ignore these leading path components. |
228 | 267 |
229 apppath = env.get('SCRIPT_NAME', '') | 268 if altbaseurl: |
269 apppath = altbaseurl.path or '' | |
270 if apppath and not apppath.startswith('/'): | |
271 apppath = '/' + apppath | |
272 else: | |
273 apppath = env.get('SCRIPT_NAME', '') | |
230 | 274 |
231 if reponame: | 275 if reponame: |
232 repoprefix = '/' + reponame.strip('/') | 276 repoprefix = '/' + reponame.strip('/') |
233 | 277 |
234 if not env.get('PATH_INFO'): | 278 if not env.get('PATH_INFO'): |
543 | 587 |
544 WSGI applications are invoked with 2 arguments. They are used to | 588 WSGI applications are invoked with 2 arguments. They are used to |
545 instantiate instances of this class, which provides higher-level APIs | 589 instantiate instances of this class, which provides higher-level APIs |
546 for obtaining request parameters, writing HTTP output, etc. | 590 for obtaining request parameters, writing HTTP output, etc. |
547 """ | 591 """ |
548 def __init__(self, wsgienv, start_response): | 592 def __init__(self, wsgienv, start_response, altbaseurl=None): |
549 version = wsgienv[r'wsgi.version'] | 593 version = wsgienv[r'wsgi.version'] |
550 if (version < (1, 0)) or (version >= (2, 0)): | 594 if (version < (1, 0)) or (version >= (2, 0)): |
551 raise RuntimeError("Unknown and unsupported WSGI version %d.%d" | 595 raise RuntimeError("Unknown and unsupported WSGI version %d.%d" |
552 % version) | 596 % version) |
553 | 597 |
561 self.err = wsgienv[r'wsgi.errors'] | 605 self.err = wsgienv[r'wsgi.errors'] |
562 self.threaded = wsgienv[r'wsgi.multithread'] | 606 self.threaded = wsgienv[r'wsgi.multithread'] |
563 self.multiprocess = wsgienv[r'wsgi.multiprocess'] | 607 self.multiprocess = wsgienv[r'wsgi.multiprocess'] |
564 self.run_once = wsgienv[r'wsgi.run_once'] | 608 self.run_once = wsgienv[r'wsgi.run_once'] |
565 self.env = wsgienv | 609 self.env = wsgienv |
566 self.req = parserequestfromenv(wsgienv, inp) | 610 self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl) |
567 self.res = wsgiresponse(self.req, start_response) | 611 self.res = wsgiresponse(self.req, start_response) |
568 self._start_response = start_response | 612 self._start_response = start_response |
569 self.server_write = None | 613 self.server_write = None |
570 self.headers = [] | 614 self.headers = [] |
571 | 615 |