Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/hgweb/request.py @ 43076:2372284d9457
formatting: blacken the codebase
This is using my patch to black
(https://github.com/psf/black/pull/826) so we don't un-wrap collection
literals.
Done with:
hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S
# skip-blame mass-reformatting only
# no-check-commit reformats foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D6971
author | Augie Fackler <augie@google.com> |
---|---|
date | Sun, 06 Oct 2019 09:45:02 -0400 |
parents | 6107d4549fcc |
children | 687b865b95ad |
comparison
equal
deleted
inserted
replaced
43075:57875cf423c9 | 43076:2372284d9457 |
---|---|
6 # This software may be used and distributed according to the terms of the | 6 # This software may be used and distributed according to the terms of the |
7 # GNU General Public License version 2 or any later version. | 7 # GNU General Public License version 2 or any later version. |
8 | 8 |
9 from __future__ import absolute_import | 9 from __future__ import absolute_import |
10 | 10 |
11 #import wsgiref.validate | 11 # import wsgiref.validate |
12 | 12 |
13 from ..thirdparty import ( | 13 from ..thirdparty import attr |
14 attr, | |
15 ) | |
16 from .. import ( | 14 from .. import ( |
17 error, | 15 error, |
18 pycompat, | 16 pycompat, |
19 util, | 17 util, |
20 ) | 18 ) |
21 | 19 |
20 | |
22 class multidict(object): | 21 class multidict(object): |
23 """A dict like object that can store multiple values for a key. | 22 """A dict like object that can store multiple values for a key. |
24 | 23 |
25 Used to store parsed request parameters. | 24 Used to store parsed request parameters. |
26 | 25 |
27 This is inspired by WebOb's class of the same name. | 26 This is inspired by WebOb's class of the same name. |
28 """ | 27 """ |
28 | |
29 def __init__(self): | 29 def __init__(self): |
30 self._items = {} | 30 self._items = {} |
31 | 31 |
32 def __getitem__(self, key): | 32 def __getitem__(self, key): |
33 """Returns the last set value for a key.""" | 33 """Returns the last set value for a key.""" |
73 | 73 |
74 return vals[0] | 74 return vals[0] |
75 | 75 |
76 def asdictoflists(self): | 76 def asdictoflists(self): |
77 return {k: list(v) for k, v in self._items.iteritems()} | 77 return {k: list(v) for k, v in self._items.iteritems()} |
78 | |
78 | 79 |
79 @attr.s(frozen=True) | 80 @attr.s(frozen=True) |
80 class parsedrequest(object): | 81 class parsedrequest(object): |
81 """Represents a parsed WSGI request. | 82 """Represents a parsed WSGI request. |
82 | 83 |
122 # Request body input stream. | 123 # Request body input stream. |
123 bodyfh = attr.ib() | 124 bodyfh = attr.ib() |
124 # WSGI environment dict, unmodified. | 125 # WSGI environment dict, unmodified. |
125 rawenv = attr.ib() | 126 rawenv = attr.ib() |
126 | 127 |
128 | |
127 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None): | 129 def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None): |
128 """Parse URL components from environment variables. | 130 """Parse URL components from environment variables. |
129 | 131 |
130 WSGI defines request attributes via environment variables. This function | 132 WSGI defines request attributes via environment variables. This function |
131 parses the environment variables into a data structure. | 133 parses the environment variables into a data structure. |
151 # PEP 3333 defines the WSGI spec and is a useful reference for this code. | 153 # PEP 3333 defines the WSGI spec and is a useful reference for this code. |
152 | 154 |
153 # We first validate that the incoming object conforms with the WSGI spec. | 155 # We first validate that the incoming object conforms with the WSGI spec. |
154 # We only want to be dealing with spec-conforming WSGI implementations. | 156 # We only want to be dealing with spec-conforming WSGI implementations. |
155 # TODO enable this once we fix internal violations. | 157 # TODO enable this once we fix internal violations. |
156 #wsgiref.validate.check_environ(env) | 158 # wsgiref.validate.check_environ(env) |
157 | 159 |
158 # PEP-0333 states that environment keys and values are native strings | 160 # PEP-0333 states that environment keys and values are native strings |
159 # (bytes on Python 2 and str on Python 3). The code points for the Unicode | 161 # (bytes on Python 2 and str on Python 3). The code points for the Unicode |
160 # strings on Python 3 must be between \00000-\000FF. We deal with bytes | 162 # strings on Python 3 must be between \00000-\000FF. We deal with bytes |
161 # in Mercurial, so mass convert string keys and values to bytes. | 163 # in Mercurial, so mass convert string keys and values to bytes. |
162 if pycompat.ispy3: | 164 if pycompat.ispy3: |
163 env = {k.encode('latin-1'): v for k, v in env.iteritems()} | 165 env = {k.encode('latin-1'): v for k, v in env.iteritems()} |
164 env = {k: v.encode('latin-1') if isinstance(v, str) else v | 166 env = { |
165 for k, v in env.iteritems()} | 167 k: v.encode('latin-1') if isinstance(v, str) else v |
168 for k, v in env.iteritems() | |
169 } | |
166 | 170 |
167 # Some hosting solutions are emulating hgwebdir, and dispatching directly | 171 # Some hosting solutions are emulating hgwebdir, and dispatching directly |
168 # to an hgweb instance using this environment variable. This was always | 172 # to an hgweb instance using this environment variable. This was always |
169 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them. | 173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them. |
170 if not reponame: | 174 if not reponame: |
253 | 257 |
254 if not env.get('PATH_INFO'): | 258 if not env.get('PATH_INFO'): |
255 raise error.ProgrammingError('reponame requires PATH_INFO') | 259 raise error.ProgrammingError('reponame requires PATH_INFO') |
256 | 260 |
257 if not env['PATH_INFO'].startswith(repoprefix): | 261 if not env['PATH_INFO'].startswith(repoprefix): |
258 raise error.ProgrammingError('PATH_INFO does not begin with repo ' | 262 raise error.ProgrammingError( |
259 'name: %s (%s)' % (env['PATH_INFO'], | 263 'PATH_INFO does not begin with repo ' |
260 reponame)) | 264 'name: %s (%s)' % (env['PATH_INFO'], reponame) |
261 | 265 ) |
262 dispatchpath = env['PATH_INFO'][len(repoprefix):] | 266 |
267 dispatchpath = env['PATH_INFO'][len(repoprefix) :] | |
263 | 268 |
264 if dispatchpath and not dispatchpath.startswith('/'): | 269 if dispatchpath and not dispatchpath.startswith('/'): |
265 raise error.ProgrammingError('reponame prefix of PATH_INFO does ' | 270 raise error.ProgrammingError( |
266 'not end at path delimiter: %s (%s)' % | 271 'reponame prefix of PATH_INFO does ' |
267 (env['PATH_INFO'], reponame)) | 272 'not end at path delimiter: %s (%s)' |
273 % (env['PATH_INFO'], reponame) | |
274 ) | |
268 | 275 |
269 apppath = apppath.rstrip('/') + repoprefix | 276 apppath = apppath.rstrip('/') + repoprefix |
270 dispatchparts = dispatchpath.strip('/').split('/') | 277 dispatchparts = dispatchpath.strip('/').split('/') |
271 dispatchpath = '/'.join(dispatchparts) | 278 dispatchpath = '/'.join(dispatchparts) |
272 | 279 |
293 # perform case normalization for us. We just rewrite underscore to dash | 300 # perform case normalization for us. We just rewrite underscore to dash |
294 # so keys match what likely went over the wire. | 301 # so keys match what likely went over the wire. |
295 headers = [] | 302 headers = [] |
296 for k, v in env.iteritems(): | 303 for k, v in env.iteritems(): |
297 if k.startswith('HTTP_'): | 304 if k.startswith('HTTP_'): |
298 headers.append((k[len('HTTP_'):].replace('_', '-'), v)) | 305 headers.append((k[len('HTTP_') :].replace('_', '-'), v)) |
299 | 306 |
300 from . import wsgiheaders # avoid cycle | 307 from . import wsgiheaders # avoid cycle |
308 | |
301 headers = wsgiheaders.Headers(headers) | 309 headers = wsgiheaders.Headers(headers) |
302 | 310 |
303 # This is kind of a lie because the HTTP header wasn't explicitly | 311 # This is kind of a lie because the HTTP header wasn't explicitly |
304 # sent. But for all intents and purposes it should be OK to lie about | 312 # sent. But for all intents and purposes it should be OK to lie about |
305 # this, since a consumer will either either value to determine how many | 313 # this, since a consumer will either either value to determine how many |
311 headers['Content-Type'] = env['CONTENT_TYPE'] | 319 headers['Content-Type'] = env['CONTENT_TYPE'] |
312 | 320 |
313 if bodyfh is None: | 321 if bodyfh is None: |
314 bodyfh = env['wsgi.input'] | 322 bodyfh = env['wsgi.input'] |
315 if 'Content-Length' in headers: | 323 if 'Content-Length' in headers: |
316 bodyfh = util.cappedreader(bodyfh, | 324 bodyfh = util.cappedreader( |
317 int(headers['Content-Length'] or '0')) | 325 bodyfh, int(headers['Content-Length'] or '0') |
318 | 326 ) |
319 return parsedrequest(method=env['REQUEST_METHOD'], | 327 |
320 url=fullurl, baseurl=baseurl, | 328 return parsedrequest( |
321 advertisedurl=advertisedfullurl, | 329 method=env['REQUEST_METHOD'], |
322 advertisedbaseurl=advertisedbaseurl, | 330 url=fullurl, |
323 urlscheme=env['wsgi.url_scheme'], | 331 baseurl=baseurl, |
324 remoteuser=env.get('REMOTE_USER'), | 332 advertisedurl=advertisedfullurl, |
325 remotehost=env.get('REMOTE_HOST'), | 333 advertisedbaseurl=advertisedbaseurl, |
326 apppath=apppath, | 334 urlscheme=env['wsgi.url_scheme'], |
327 dispatchparts=dispatchparts, dispatchpath=dispatchpath, | 335 remoteuser=env.get('REMOTE_USER'), |
328 reponame=reponame, | 336 remotehost=env.get('REMOTE_HOST'), |
329 querystring=querystring, | 337 apppath=apppath, |
330 qsparams=qsparams, | 338 dispatchparts=dispatchparts, |
331 headers=headers, | 339 dispatchpath=dispatchpath, |
332 bodyfh=bodyfh, | 340 reponame=reponame, |
333 rawenv=env) | 341 querystring=querystring, |
342 qsparams=qsparams, | |
343 headers=headers, | |
344 bodyfh=bodyfh, | |
345 rawenv=env, | |
346 ) | |
347 | |
334 | 348 |
335 class offsettrackingwriter(object): | 349 class offsettrackingwriter(object): |
336 """A file object like object that is append only and tracks write count. | 350 """A file object like object that is append only and tracks write count. |
337 | 351 |
338 Instances are bound to a callable. This callable is called with data | 352 Instances are bound to a callable. This callable is called with data |
343 | 357 |
344 The intent of this class is to wrap the ``write()`` function returned by | 358 The intent of this class is to wrap the ``write()`` function returned by |
345 a WSGI ``start_response()`` function. Since ``write()`` is a callable and | 359 a WSGI ``start_response()`` function. Since ``write()`` is a callable and |
346 not a file object, it doesn't implement other file object methods. | 360 not a file object, it doesn't implement other file object methods. |
347 """ | 361 """ |
362 | |
348 def __init__(self, writefn): | 363 def __init__(self, writefn): |
349 self._write = writefn | 364 self._write = writefn |
350 self._offset = 0 | 365 self._offset = 0 |
351 | 366 |
352 def write(self, s): | 367 def write(self, s): |
361 pass | 376 pass |
362 | 377 |
363 def tell(self): | 378 def tell(self): |
364 return self._offset | 379 return self._offset |
365 | 380 |
381 | |
366 class wsgiresponse(object): | 382 class wsgiresponse(object): |
367 """Represents a response to a WSGI request. | 383 """Represents a response to a WSGI request. |
368 | 384 |
369 A response consists of a status line, headers, and a body. | 385 A response consists of a status line, headers, and a body. |
370 | 386 |
387 """ | 403 """ |
388 self._req = req | 404 self._req = req |
389 self._startresponse = startresponse | 405 self._startresponse = startresponse |
390 | 406 |
391 self.status = None | 407 self.status = None |
392 from . import wsgiheaders # avoid cycle | 408 from . import wsgiheaders # avoid cycle |
409 | |
393 self.headers = wsgiheaders.Headers([]) | 410 self.headers = wsgiheaders.Headers([]) |
394 | 411 |
395 self._bodybytes = None | 412 self._bodybytes = None |
396 self._bodygen = None | 413 self._bodygen = None |
397 self._bodywillwrite = False | 414 self._bodywillwrite = False |
398 self._started = False | 415 self._started = False |
399 self._bodywritefn = None | 416 self._bodywritefn = None |
400 | 417 |
401 def _verifybody(self): | 418 def _verifybody(self): |
402 if (self._bodybytes is not None or self._bodygen is not None | 419 if ( |
403 or self._bodywillwrite): | 420 self._bodybytes is not None |
421 or self._bodygen is not None | |
422 or self._bodywillwrite | |
423 ): | |
404 raise error.ProgrammingError('cannot define body multiple times') | 424 raise error.ProgrammingError('cannot define body multiple times') |
405 | 425 |
406 def setbodybytes(self, b): | 426 def setbodybytes(self, b): |
407 """Define the response body as static bytes. | 427 """Define the response body as static bytes. |
408 | 428 |
448 self._started = True | 468 self._started = True |
449 | 469 |
450 if not self.status: | 470 if not self.status: |
451 raise error.ProgrammingError('status line not defined') | 471 raise error.ProgrammingError('status line not defined') |
452 | 472 |
453 if (self._bodybytes is None and self._bodygen is None | 473 if ( |
454 and not self._bodywillwrite): | 474 self._bodybytes is None |
475 and self._bodygen is None | |
476 and not self._bodywillwrite | |
477 ): | |
455 raise error.ProgrammingError('response body not defined') | 478 raise error.ProgrammingError('response body not defined') |
456 | 479 |
457 # RFC 7232 Section 4.1 states that a 304 MUST generate one of | 480 # RFC 7232 Section 4.1 states that a 304 MUST generate one of |
458 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary} | 481 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary} |
459 # and SHOULD NOT generate other headers unless they could be used | 482 # and SHOULD NOT generate other headers unless they could be used |
467 if self.headers.get('Content-Length') == '0': | 490 if self.headers.get('Content-Length') == '0': |
468 del self.headers['Content-Length'] | 491 del self.headers['Content-Length'] |
469 | 492 |
470 # Strictly speaking, this is too strict. But until it causes | 493 # Strictly speaking, this is too strict. But until it causes |
471 # problems, let's be strict. | 494 # problems, let's be strict. |
472 badheaders = {k for k in self.headers.keys() | 495 badheaders = { |
473 if k.lower() not in ('date', 'etag', 'expires', | 496 k |
474 'cache-control', | 497 for k in self.headers.keys() |
475 'content-location', | 498 if k.lower() |
476 'content-security-policy', | 499 not in ( |
477 'vary')} | 500 'date', |
501 'etag', | |
502 'expires', | |
503 'cache-control', | |
504 'content-location', | |
505 'content-security-policy', | |
506 'vary', | |
507 ) | |
508 } | |
478 if badheaders: | 509 if badheaders: |
479 raise error.ProgrammingError( | 510 raise error.ProgrammingError( |
480 'illegal header on 304 response: %s' % | 511 'illegal header on 304 response: %s' |
481 ', '.join(sorted(badheaders))) | 512 % ', '.join(sorted(badheaders)) |
513 ) | |
482 | 514 |
483 if self._bodygen is not None or self._bodywillwrite: | 515 if self._bodygen is not None or self._bodywillwrite: |
484 raise error.ProgrammingError("must use setbodybytes('') with " | 516 raise error.ProgrammingError( |
485 "304 responses") | 517 "must use setbodybytes('') with " "304 responses" |
518 ) | |
486 | 519 |
487 # Various HTTP clients (notably httplib) won't read the HTTP response | 520 # Various HTTP clients (notably httplib) won't read the HTTP response |
488 # until the HTTP request has been sent in full. If servers (us) send a | 521 # until the HTTP request has been sent in full. If servers (us) send a |
489 # response before the HTTP request has been fully sent, the connection | 522 # response before the HTTP request has been fully sent, the connection |
490 # may deadlock because neither end is reading. | 523 # may deadlock because neither end is reading. |
529 while True: | 562 while True: |
530 chunk = self._req.bodyfh.read(32768) | 563 chunk = self._req.bodyfh.read(32768) |
531 if not chunk: | 564 if not chunk: |
532 break | 565 break |
533 | 566 |
534 strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for | 567 strheaders = [ |
535 k, v in self.headers.items()] | 568 (pycompat.strurl(k), pycompat.strurl(v)) |
536 write = self._startresponse(pycompat.sysstr(self.status), | 569 for k, v in self.headers.items() |
537 strheaders) | 570 ] |
571 write = self._startresponse(pycompat.sysstr(self.status), strheaders) | |
538 | 572 |
539 if self._bodybytes: | 573 if self._bodybytes: |
540 yield self._bodybytes | 574 yield self._bodybytes |
541 elif self._bodygen: | 575 elif self._bodygen: |
542 for chunk in self._bodygen: | 576 for chunk in self._bodygen: |
564 """ | 598 """ |
565 if not self._bodywillwrite: | 599 if not self._bodywillwrite: |
566 raise error.ProgrammingError('must call setbodywillwrite() first') | 600 raise error.ProgrammingError('must call setbodywillwrite() first') |
567 | 601 |
568 if not self._started: | 602 if not self._started: |
569 raise error.ProgrammingError('must call sendresponse() first; did ' | 603 raise error.ProgrammingError( |
570 'you remember to consume it since it ' | 604 'must call sendresponse() first; did ' |
571 'is a generator?') | 605 'you remember to consume it since it ' |
606 'is a generator?' | |
607 ) | |
572 | 608 |
573 assert self._bodywritefn | 609 assert self._bodywritefn |
574 return offsettrackingwriter(self._bodywritefn) | 610 return offsettrackingwriter(self._bodywritefn) |
611 | |
575 | 612 |
576 def wsgiapplication(app_maker): | 613 def wsgiapplication(app_maker): |
577 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() | 614 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir() |
578 can and should now be used as a WSGI application.''' | 615 can and should now be used as a WSGI application.''' |
579 application = app_maker() | 616 application = app_maker() |
617 | |
580 def run_wsgi(env, respond): | 618 def run_wsgi(env, respond): |
581 return application(env, respond) | 619 return application(env, respond) |
620 | |
582 return run_wsgi | 621 return run_wsgi |