6 # This software may be used and distributed according to the terms of the |
6 # This software may be used and distributed according to the terms of the |
7 # GNU General Public License version 2 or any later version. |
7 # GNU General Public License version 2 or any later version. |
8 |
8 |
9 from __future__ import absolute_import |
9 from __future__ import absolute_import |
10 |
10 |
11 #import wsgiref.validate |
11 # import wsgiref.validate |
12 |
12 |
13 from ..thirdparty import ( |
13 from ..thirdparty import attr |
14 attr, |
|
15 ) |
|
16 from .. import ( |
14 from .. import ( |
17 error, |
15 error, |
18 pycompat, |
16 pycompat, |
19 util, |
17 util, |
20 ) |
18 ) |
21 |
19 |
|
20 |
22 class multidict(object): |
21 class multidict(object): |
23 """A dict like object that can store multiple values for a key. |
22 """A dict like object that can store multiple values for a key. |
24 |
23 |
25 Used to store parsed request parameters. |
24 Used to store parsed request parameters. |
26 |
25 |
27 This is inspired by WebOb's class of the same name. |
26 This is inspired by WebOb's class of the same name. |
28 """ |
27 """ |
|
28 |
29 def __init__(self): |
29 def __init__(self): |
30 self._items = {} |
30 self._items = {} |
31 |
31 |
32 def __getitem__(self, key): |
32 def __getitem__(self, key): |
33 """Returns the last set value for a key.""" |
33 """Returns the last set value for a key.""" |
122 # Request body input stream. |
123 # Request body input stream. |
123 bodyfh = attr.ib() |
124 bodyfh = attr.ib() |
124 # WSGI environment dict, unmodified. |
125 # WSGI environment dict, unmodified. |
125 rawenv = attr.ib() |
126 rawenv = attr.ib() |
126 |
127 |
|
128 |
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None): |
129 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None): |
128 """Parse URL components from environment variables. |
130 """Parse URL components from environment variables. |
129 |
131 |
130 WSGI defines request attributes via environment variables. This function |
132 WSGI defines request attributes via environment variables. This function |
131 parses the environment variables into a data structure. |
133 parses the environment variables into a data structure. |
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code. |
153 # PEP 3333 defines the WSGI spec and is a useful reference for this code. |
152 |
154 |
153 # We first validate that the incoming object conforms with the WSGI spec. |
155 # We first validate that the incoming object conforms with the WSGI spec. |
154 # We only want to be dealing with spec-conforming WSGI implementations. |
156 # We only want to be dealing with spec-conforming WSGI implementations. |
155 # TODO enable this once we fix internal violations. |
157 # TODO enable this once we fix internal violations. |
156 #wsgiref.validate.check_environ(env) |
158 # wsgiref.validate.check_environ(env) |
157 |
159 |
158 # PEP-0333 states that environment keys and values are native strings |
160 # PEP-0333 states that environment keys and values are native strings |
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode |
161 # (bytes on Python 2 and str on Python 3). The code points for the Unicode |
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes |
162 # strings on Python 3 must be between \00000-\000FF. We deal with bytes |
161 # in Mercurial, so mass convert string keys and values to bytes. |
163 # in Mercurial, so mass convert string keys and values to bytes. |
162 if pycompat.ispy3: |
164 if pycompat.ispy3: |
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()} |
165 env = {k.encode('latin-1'): v for k, v in env.iteritems()} |
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v |
166 env = { |
165 for k, v in env.iteritems()} |
167 k: v.encode('latin-1') if isinstance(v, str) else v |
|
168 for k, v in env.iteritems() |
|
169 } |
166 |
170 |
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly |
171 # Some hosting solutions are emulating hgwebdir, and dispatching directly |
168 # to an hgweb instance using this environment variable. This was always |
172 # to an hgweb instance using this environment variable. This was always |
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them. |
173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them. |
170 if not reponame: |
174 if not reponame: |
253 |
257 |
254 if not env.get('PATH_INFO'): |
258 if not env.get('PATH_INFO'): |
255 raise error.ProgrammingError('reponame requires PATH_INFO') |
259 raise error.ProgrammingError('reponame requires PATH_INFO') |
256 |
260 |
257 if not env['PATH_INFO'].startswith(repoprefix): |
261 if not env['PATH_INFO'].startswith(repoprefix): |
258 raise error.ProgrammingError('PATH_INFO does not begin with repo ' |
262 raise error.ProgrammingError( |
259 'name: %s (%s)' % (env['PATH_INFO'], |
263 'PATH_INFO does not begin with repo ' |
260 reponame)) |
264 'name: %s (%s)' % (env['PATH_INFO'], reponame) |
261 |
265 ) |
262 dispatchpath = env['PATH_INFO'][len(repoprefix):] |
266 |
|
267 dispatchpath = env['PATH_INFO'][len(repoprefix) :] |
263 |
268 |
264 if dispatchpath and not dispatchpath.startswith('/'): |
269 if dispatchpath and not dispatchpath.startswith('/'): |
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does ' |
270 raise error.ProgrammingError( |
266 'not end at path delimiter: %s (%s)' % |
271 'reponame prefix of PATH_INFO does ' |
267 (env['PATH_INFO'], reponame)) |
272 'not end at path delimiter: %s (%s)' |
|
273 % (env['PATH_INFO'], reponame) |
|
274 ) |
268 |
275 |
269 apppath = apppath.rstrip('/') + repoprefix |
276 apppath = apppath.rstrip('/') + repoprefix |
270 dispatchparts = dispatchpath.strip('/').split('/') |
277 dispatchparts = dispatchpath.strip('/').split('/') |
271 dispatchpath = '/'.join(dispatchparts) |
278 dispatchpath = '/'.join(dispatchparts) |
272 |
279 |
293 # perform case normalization for us. We just rewrite underscore to dash |
300 # perform case normalization for us. We just rewrite underscore to dash |
294 # so keys match what likely went over the wire. |
301 # so keys match what likely went over the wire. |
295 headers = [] |
302 headers = [] |
296 for k, v in env.iteritems(): |
303 for k, v in env.iteritems(): |
297 if k.startswith('HTTP_'): |
304 if k.startswith('HTTP_'): |
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v)) |
305 headers.append((k[len('HTTP_') :].replace('_', '-'), v)) |
299 |
306 |
300 from . import wsgiheaders # avoid cycle |
307 from . import wsgiheaders # avoid cycle |
|
308 |
301 headers = wsgiheaders.Headers(headers) |
309 headers = wsgiheaders.Headers(headers) |
302 |
310 |
303 # This is kind of a lie because the HTTP header wasn't explicitly |
311 # This is kind of a lie because the HTTP header wasn't explicitly |
304 # sent. But for all intents and purposes it should be OK to lie about |
312 # sent. But for all intents and purposes it should be OK to lie about |
305 # this, since a consumer will either either value to determine how many |
313 # this, since a consumer will either either value to determine how many |
311 headers['Content-Type'] = env['CONTENT_TYPE'] |
319 headers['Content-Type'] = env['CONTENT_TYPE'] |
312 |
320 |
313 if bodyfh is None: |
321 if bodyfh is None: |
314 bodyfh = env['wsgi.input'] |
322 bodyfh = env['wsgi.input'] |
315 if 'Content-Length' in headers: |
323 if 'Content-Length' in headers: |
316 bodyfh = util.cappedreader(bodyfh, |
324 bodyfh = util.cappedreader( |
317 int(headers['Content-Length'] or '0')) |
325 bodyfh, int(headers['Content-Length'] or '0') |
318 |
326 ) |
319 return parsedrequest(method=env['REQUEST_METHOD'], |
327 |
320 url=fullurl, baseurl=baseurl, |
328 return parsedrequest( |
321 advertisedurl=advertisedfullurl, |
329 method=env['REQUEST_METHOD'], |
322 advertisedbaseurl=advertisedbaseurl, |
330 url=fullurl, |
323 urlscheme=env['wsgi.url_scheme'], |
331 baseurl=baseurl, |
324 remoteuser=env.get('REMOTE_USER'), |
332 advertisedurl=advertisedfullurl, |
325 remotehost=env.get('REMOTE_HOST'), |
333 advertisedbaseurl=advertisedbaseurl, |
326 apppath=apppath, |
334 urlscheme=env['wsgi.url_scheme'], |
327 dispatchparts=dispatchparts, dispatchpath=dispatchpath, |
335 remoteuser=env.get('REMOTE_USER'), |
328 reponame=reponame, |
336 remotehost=env.get('REMOTE_HOST'), |
329 querystring=querystring, |
337 apppath=apppath, |
330 qsparams=qsparams, |
338 dispatchparts=dispatchparts, |
331 headers=headers, |
339 dispatchpath=dispatchpath, |
332 bodyfh=bodyfh, |
340 reponame=reponame, |
333 rawenv=env) |
341 querystring=querystring, |
|
342 qsparams=qsparams, |
|
343 headers=headers, |
|
344 bodyfh=bodyfh, |
|
345 rawenv=env, |
|
346 ) |
|
347 |
334 |
348 |
335 class offsettrackingwriter(object): |
349 class offsettrackingwriter(object): |
336 """A file object like object that is append only and tracks write count. |
350 """A file object like object that is append only and tracks write count. |
337 |
351 |
338 Instances are bound to a callable. This callable is called with data |
352 Instances are bound to a callable. This callable is called with data |
343 |
357 |
344 The intent of this class is to wrap the ``write()`` function returned by |
358 The intent of this class is to wrap the ``write()`` function returned by |
345 a WSGI ``start_response()`` function. Since ``write()`` is a callable and |
359 a WSGI ``start_response()`` function. Since ``write()`` is a callable and |
346 not a file object, it doesn't implement other file object methods. |
360 not a file object, it doesn't implement other file object methods. |
347 """ |
361 """ |
|
362 |
348 def __init__(self, writefn): |
363 def __init__(self, writefn): |
349 self._write = writefn |
364 self._write = writefn |
350 self._offset = 0 |
365 self._offset = 0 |
351 |
366 |
352 def write(self, s): |
367 def write(self, s): |
387 """ |
403 """ |
388 self._req = req |
404 self._req = req |
389 self._startresponse = startresponse |
405 self._startresponse = startresponse |
390 |
406 |
391 self.status = None |
407 self.status = None |
392 from . import wsgiheaders # avoid cycle |
408 from . import wsgiheaders # avoid cycle |
|
409 |
393 self.headers = wsgiheaders.Headers([]) |
410 self.headers = wsgiheaders.Headers([]) |
394 |
411 |
395 self._bodybytes = None |
412 self._bodybytes = None |
396 self._bodygen = None |
413 self._bodygen = None |
397 self._bodywillwrite = False |
414 self._bodywillwrite = False |
398 self._started = False |
415 self._started = False |
399 self._bodywritefn = None |
416 self._bodywritefn = None |
400 |
417 |
401 def _verifybody(self): |
418 def _verifybody(self): |
402 if (self._bodybytes is not None or self._bodygen is not None |
419 if ( |
403 or self._bodywillwrite): |
420 self._bodybytes is not None |
|
421 or self._bodygen is not None |
|
422 or self._bodywillwrite |
|
423 ): |
404 raise error.ProgrammingError('cannot define body multiple times') |
424 raise error.ProgrammingError('cannot define body multiple times') |
405 |
425 |
406 def setbodybytes(self, b): |
426 def setbodybytes(self, b): |
407 """Define the response body as static bytes. |
427 """Define the response body as static bytes. |
408 |
428 |
448 self._started = True |
468 self._started = True |
449 |
469 |
450 if not self.status: |
470 if not self.status: |
451 raise error.ProgrammingError('status line not defined') |
471 raise error.ProgrammingError('status line not defined') |
452 |
472 |
453 if (self._bodybytes is None and self._bodygen is None |
473 if ( |
454 and not self._bodywillwrite): |
474 self._bodybytes is None |
|
475 and self._bodygen is None |
|
476 and not self._bodywillwrite |
|
477 ): |
455 raise error.ProgrammingError('response body not defined') |
478 raise error.ProgrammingError('response body not defined') |
456 |
479 |
457 # RFC 7232 Section 4.1 states that a 304 MUST generate one of |
480 # RFC 7232 Section 4.1 states that a 304 MUST generate one of |
458 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary} |
481 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary} |
459 # and SHOULD NOT generate other headers unless they could be used |
482 # and SHOULD NOT generate other headers unless they could be used |
467 if self.headers.get('Content-Length') == '0': |
490 if self.headers.get('Content-Length') == '0': |
468 del self.headers['Content-Length'] |
491 del self.headers['Content-Length'] |
469 |
492 |
470 # Strictly speaking, this is too strict. But until it causes |
493 # Strictly speaking, this is too strict. But until it causes |
471 # problems, let's be strict. |
494 # problems, let's be strict. |
472 badheaders = {k for k in self.headers.keys() |
495 badheaders = { |
473 if k.lower() not in ('date', 'etag', 'expires', |
496 k |
474 'cache-control', |
497 for k in self.headers.keys() |
475 'content-location', |
498 if k.lower() |
476 'content-security-policy', |
499 not in ( |
477 'vary')} |
500 'date', |
|
501 'etag', |
|
502 'expires', |
|
503 'cache-control', |
|
504 'content-location', |
|
505 'content-security-policy', |
|
506 'vary', |
|
507 ) |
|
508 } |
478 if badheaders: |
509 if badheaders: |
479 raise error.ProgrammingError( |
510 raise error.ProgrammingError( |
480 'illegal header on 304 response: %s' % |
511 'illegal header on 304 response: %s' |
481 ', '.join(sorted(badheaders))) |
512 % ', '.join(sorted(badheaders)) |
|
513 ) |
482 |
514 |
483 if self._bodygen is not None or self._bodywillwrite: |
515 if self._bodygen is not None or self._bodywillwrite: |
484 raise error.ProgrammingError("must use setbodybytes('') with " |
516 raise error.ProgrammingError( |
485 "304 responses") |
517 "must use setbodybytes('') with " "304 responses" |
|
518 ) |
486 |
519 |
487 # Various HTTP clients (notably httplib) won't read the HTTP response |
520 # Various HTTP clients (notably httplib) won't read the HTTP response |
488 # until the HTTP request has been sent in full. If servers (us) send a |
521 # until the HTTP request has been sent in full. If servers (us) send a |
489 # response before the HTTP request has been fully sent, the connection |
522 # response before the HTTP request has been fully sent, the connection |
490 # may deadlock because neither end is reading. |
523 # may deadlock because neither end is reading. |
529 while True: |
562 while True: |
530 chunk = self._req.bodyfh.read(32768) |
563 chunk = self._req.bodyfh.read(32768) |
531 if not chunk: |
564 if not chunk: |
532 break |
565 break |
533 |
566 |
534 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for |
567 strheaders = [ |
535 k, v in self.headers.items()] |
568 (pycompat.strurl(k), pycompat.strurl(v)) |
536 write = self._startresponse(pycompat.sysstr(self.status), |
569 for k, v in self.headers.items() |
537 strheaders) |
570 ] |
|
571 write = self._startresponse(pycompat.sysstr(self.status), strheaders) |
538 |
572 |
539 if self._bodybytes: |
573 if self._bodybytes: |
540 yield self._bodybytes |
574 yield self._bodybytes |
541 elif self._bodygen: |
575 elif self._bodygen: |
542 for chunk in self._bodygen: |
576 for chunk in self._bodygen: |
564 """ |
598 """ |
565 if not self._bodywillwrite: |
599 if not self._bodywillwrite: |
566 raise error.ProgrammingError('must call setbodywillwrite() first') |
600 raise error.ProgrammingError('must call setbodywillwrite() first') |
567 |
601 |
568 if not self._started: |
602 if not self._started: |
569 raise error.ProgrammingError('must call sendresponse() first; did ' |
603 raise error.ProgrammingError( |
570 'you remember to consume it since it ' |
604 'must call sendresponse() first; did ' |
571 'is a generator?') |
605 'you remember to consume it since it ' |
|
606 'is a generator?' |
|
607 ) |
572 |
608 |
573 assert self._bodywritefn |
609 assert self._bodywritefn |
574 return offsettrackingwriter(self._bodywritefn) |
610 return offsettrackingwriter(self._bodywritefn) |
|
611 |
575 |
612 |
576 def wsgiapplication(app_maker): |
613 def wsgiapplication(app_maker): |
577 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() |
614 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() |
578 can and should now be used as a WSGI application.''' |
615 can and should now be used as a WSGI application.''' |
579 application = app_maker() |
616 application = app_maker() |
|
617 |
580 def run_wsgi(env, respond): |
618 def run_wsgi(env, respond): |
581 return application(env, respond) |
619 return application(env, respond) |
|
620 |
582 return run_wsgi |
621 return run_wsgi |