comparison mercurial/hgweb/request.py @ 36900:219b23359f4c

hgweb: support constructing URLs from an alternate base URL The web.baseurl config option allows server operators to define a custom URL for hosted content. The way it works today is that hgwebdir parses this config option into URL components then updates the appropriate WSGI environment variables so the request "lies" about its details. For example, SERVER_NAME is updated to reflect the alternate base URL's hostname. The WSGI environment should not be modified because WSGI applications may want to know the original request details (for debugging, etc). This commit teaches our request parser about the existence of an alternate base URL. If defined, the advertised URL and other self-reflected paths will take the alternate base URL into account. The hgweb WSGI application didn't use web.baseurl. But hgwebdir did. We update hgwebdir to alter the environment parsing accordingly. The old code around environment manipulation has been removed. With this change, parserequestfromenv() has grown to a bit unwieldy. Now that practically everyone is using it, it is obvious that there is some unused features that can be trimmed. So look for this in follow-up commits. Differential Revision: https://phab.mercurial-scm.org/D2822
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 11 Mar 2018 15:33:56 -0700
parents e67a2e05fa8a
children cd6ae9ab7bd8
comparison
equal deleted inserted replaced
36899:e67a2e05fa8a 36900:219b23359f4c
155 # insensitive keys. 155 # insensitive keys.
156 headers = attr.ib() 156 headers = attr.ib()
157 # Request body input stream. 157 # Request body input stream.
158 bodyfh = attr.ib() 158 bodyfh = attr.ib()
159 159
160 def parserequestfromenv(env, bodyfh, reponame=None): 160 def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None):
161 """Parse URL components from environment variables. 161 """Parse URL components from environment variables.
162 162
163 WSGI defines request attributes via environment variables. This function 163 WSGI defines request attributes via environment variables. This function
164 parses the environment variables into a data structure. 164 parses the environment variables into a data structure.
165 165
166 If ``reponame`` is defined, the leading path components matching that 166 If ``reponame`` is defined, the leading path components matching that
167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``. 167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
168 This simulates the world view of a WSGI application that processes 168 This simulates the world view of a WSGI application that processes
169 requests from the base URL of a repo. 169 requests from the base URL of a repo.
170
171 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
172 is defined, it is used - instead of the WSGI environment variables - for
173 constructing URL components up to and including the WSGI application path.
174 For example, if the current WSGI application is at ``/repo`` and a request
175 is made to ``/rev/@`` with this argument set to
176 ``http://myserver:9000/prefix``, the URL and path components will resolve as
177 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
178 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
179 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
170 """ 180 """
171 # PEP-0333 defines the WSGI spec and is a useful reference for this code. 181 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
172 182
173 # We first validate that the incoming object conforms with the WSGI spec. 183 # We first validate that the incoming object conforms with the WSGI spec.
174 # We only want to be dealing with spec-conforming WSGI implementations. 184 # We only want to be dealing with spec-conforming WSGI implementations.
175 # TODO enable this once we fix internal violations. 185 # TODO enable this once we fix internal violations.
176 #wsgiref.validate.check_environ(env) 186 #wsgiref.validate.check_environ(env)
182 if pycompat.ispy3: 192 if pycompat.ispy3:
183 env = {k.encode('latin-1'): v for k, v in env.iteritems()} 193 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
184 env = {k: v.encode('latin-1') if isinstance(v, str) else v 194 env = {k: v.encode('latin-1') if isinstance(v, str) else v
185 for k, v in env.iteritems()} 195 for k, v in env.iteritems()}
186 196
197 if altbaseurl:
198 altbaseurl = util.url(altbaseurl)
199
187 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines 200 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
188 # the environment variables. 201 # the environment variables.
189 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines 202 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
190 # how URLs are reconstructed. 203 # how URLs are reconstructed.
191 fullurl = env['wsgi.url_scheme'] + '://' 204 fullurl = env['wsgi.url_scheme'] + '://'
192 advertisedfullurl = fullurl 205
193 206 if altbaseurl and altbaseurl.scheme:
194 def addport(s): 207 advertisedfullurl = altbaseurl.scheme + '://'
195 if env['wsgi.url_scheme'] == 'https': 208 else:
196 if env['SERVER_PORT'] != '443': 209 advertisedfullurl = fullurl
197 s += ':' + env['SERVER_PORT'] 210
211 def addport(s, port):
212 if s.startswith('https://'):
213 if port != '443':
214 s += ':' + port
198 else: 215 else:
199 if env['SERVER_PORT'] != '80': 216 if port != '80':
200 s += ':' + env['SERVER_PORT'] 217 s += ':' + port
201 218
202 return s 219 return s
203 220
204 if env.get('HTTP_HOST'): 221 if env.get('HTTP_HOST'):
205 fullurl += env['HTTP_HOST'] 222 fullurl += env['HTTP_HOST']
206 else: 223 else:
207 fullurl += env['SERVER_NAME'] 224 fullurl += env['SERVER_NAME']
208 fullurl = addport(fullurl) 225 fullurl = addport(fullurl, env['SERVER_PORT'])
209 226
210 advertisedfullurl += env['SERVER_NAME'] 227 if altbaseurl and altbaseurl.host:
211 advertisedfullurl = addport(advertisedfullurl) 228 advertisedfullurl += altbaseurl.host
229
230 if altbaseurl.port:
231 port = altbaseurl.port
232 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
233 port = '80'
234 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
235 port = '443'
236 else:
237 port = env['SERVER_PORT']
238
239 advertisedfullurl = addport(advertisedfullurl, port)
240 else:
241 advertisedfullurl += env['SERVER_NAME']
242 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
212 243
213 baseurl = fullurl 244 baseurl = fullurl
214 advertisedbaseurl = advertisedfullurl 245 advertisedbaseurl = advertisedfullurl
215 246
216 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) 247 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
217 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
218 fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) 248 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
249
250 if altbaseurl:
251 path = altbaseurl.path or ''
252 if path and not path.startswith('/'):
253 path = '/' + path
254 advertisedfullurl += util.urlreq.quote(path)
255 else:
256 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
257
219 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) 258 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
220 259
221 if env.get('QUERY_STRING'): 260 if env.get('QUERY_STRING'):
222 fullurl += '?' + env['QUERY_STRING'] 261 fullurl += '?' + env['QUERY_STRING']
223 advertisedfullurl += '?' + env['QUERY_STRING'] 262 advertisedfullurl += '?' + env['QUERY_STRING']
224 263
225 # If ``reponame`` is defined, that must be a prefix on PATH_INFO 264 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
226 # that represents the repository being dispatched to. When computing 265 # that represents the repository being dispatched to. When computing
227 # the dispatch info, we ignore these leading path components. 266 # the dispatch info, we ignore these leading path components.
228 267
229 apppath = env.get('SCRIPT_NAME', '') 268 if altbaseurl:
269 apppath = altbaseurl.path or ''
270 if apppath and not apppath.startswith('/'):
271 apppath = '/' + apppath
272 else:
273 apppath = env.get('SCRIPT_NAME', '')
230 274
231 if reponame: 275 if reponame:
232 repoprefix = '/' + reponame.strip('/') 276 repoprefix = '/' + reponame.strip('/')
233 277
234 if not env.get('PATH_INFO'): 278 if not env.get('PATH_INFO'):
543 587
544 WSGI applications are invoked with 2 arguments. They are used to 588 WSGI applications are invoked with 2 arguments. They are used to
545 instantiate instances of this class, which provides higher-level APIs 589 instantiate instances of this class, which provides higher-level APIs
546 for obtaining request parameters, writing HTTP output, etc. 590 for obtaining request parameters, writing HTTP output, etc.
547 """ 591 """
548 def __init__(self, wsgienv, start_response): 592 def __init__(self, wsgienv, start_response, altbaseurl=None):
549 version = wsgienv[r'wsgi.version'] 593 version = wsgienv[r'wsgi.version']
550 if (version < (1, 0)) or (version >= (2, 0)): 594 if (version < (1, 0)) or (version >= (2, 0)):
551 raise RuntimeError("Unknown and unsupported WSGI version %d.%d" 595 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
552 % version) 596 % version)
553 597
561 self.err = wsgienv[r'wsgi.errors'] 605 self.err = wsgienv[r'wsgi.errors']
562 self.threaded = wsgienv[r'wsgi.multithread'] 606 self.threaded = wsgienv[r'wsgi.multithread']
563 self.multiprocess = wsgienv[r'wsgi.multiprocess'] 607 self.multiprocess = wsgienv[r'wsgi.multiprocess']
564 self.run_once = wsgienv[r'wsgi.run_once'] 608 self.run_once = wsgienv[r'wsgi.run_once']
565 self.env = wsgienv 609 self.env = wsgienv
566 self.req = parserequestfromenv(wsgienv, inp) 610 self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl)
567 self.res = wsgiresponse(self.req, start_response) 611 self.res = wsgiresponse(self.req, start_response)
568 self._start_response = start_response 612 self._start_response = start_response
569 self.server_write = None 613 self.server_write = None
570 self.headers = [] 614 self.headers = []
571 615