comparison mercurial/hgweb/request.py @ 36878:ccb70a77f746

hgweb: refactor 304 handling code We had generic code in wsgirequest for handling HTTP 304 responses. We also had a special case for it in the catch all exception handler in the WSGI application. We only ever raise 304 in one place. So, we don't need to treat it specially in the catch all exception handler. But it is useful to validate behavior of 304 responses. We port the code that sends a 304 to use the new response API. We then move the code for screening 304 sanity into the new response API. As part of doing so, we discovered that we would send Content-Length: 0. This is not allowed. So, we fix our response code to not emit that header for empty response bodies. Differential Revision: https://phab.mercurial-scm.org/D2794
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 10 Mar 2018 18:42:00 -0800
parents 97f44b0720e2
children b2a3308d6a21
comparison
equal deleted inserted replaced
36877:02bea04b4c54 36878:ccb70a77f746
13 import wsgiref.headers as wsgiheaders 13 import wsgiref.headers as wsgiheaders
14 #import wsgiref.validate 14 #import wsgiref.validate
15 15
16 from .common import ( 16 from .common import (
17 ErrorResponse, 17 ErrorResponse,
18 HTTP_NOT_MODIFIED,
19 statusmessage, 18 statusmessage,
20 ) 19 )
21 20
22 from ..thirdparty import ( 21 from ..thirdparty import (
23 attr, 22 attr,
359 if (self._bodybytes is not None or self._bodygen is not None 358 if (self._bodybytes is not None or self._bodygen is not None
360 or self._bodywillwrite): 359 or self._bodywillwrite):
361 raise error.ProgrammingError('cannot define body multiple times') 360 raise error.ProgrammingError('cannot define body multiple times')
362 361
363 def setbodybytes(self, b): 362 def setbodybytes(self, b):
364 """Define the response body as static bytes.""" 363 """Define the response body as static bytes.
364
365 The empty string signals that there is no response body.
366 """
365 self._verifybody() 367 self._verifybody()
366 self._bodybytes = b 368 self._bodybytes = b
367 self.headers['Content-Length'] = '%d' % len(b) 369 self.headers['Content-Length'] = '%d' % len(b)
368 370
369 def setbodygen(self, gen): 371 def setbodygen(self, gen):
405 raise error.ProgrammingError('status line not defined') 407 raise error.ProgrammingError('status line not defined')
406 408
407 if (self._bodybytes is None and self._bodygen is None 409 if (self._bodybytes is None and self._bodygen is None
408 and not self._bodywillwrite): 410 and not self._bodywillwrite):
409 raise error.ProgrammingError('response body not defined') 411 raise error.ProgrammingError('response body not defined')
412
413 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
414 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
415 # and SHOULD NOT generate other headers unless they could be used
416 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
417 # states that no response body can be issued. Content-Length can
418 # be sent. But if it is present, it should be the size of the response
419 # that wasn't transferred.
420 if self.status.startswith('304 '):
421 # setbodybytes('') will set C-L to 0. This doesn't conform with the
422 # spec. So remove it.
423 if self.headers.get('Content-Length') == '0':
424 del self.headers['Content-Length']
425
426 # Strictly speaking, this is too strict. But until it causes
427 # problems, let's be strict.
428 badheaders = {k for k in self.headers.keys()
429 if k.lower() not in ('date', 'etag', 'expires',
430 'cache-control',
431 'content-location',
432 'vary')}
433 if badheaders:
434 raise error.ProgrammingError(
435 'illegal header on 304 response: %s' %
436 ', '.join(sorted(badheaders)))
437
438 if self._bodygen is not None or self._bodywillwrite:
439 raise error.ProgrammingError("must use setbodybytes('') with "
440 "304 responses")
410 441
411 # Various HTTP clients (notably httplib) won't read the HTTP response 442 # Various HTTP clients (notably httplib) won't read the HTTP response
412 # until the HTTP request has been sent in full. If servers (us) send a 443 # until the HTTP request has been sent in full. If servers (us) send a
413 # response before the HTTP request has been fully sent, the connection 444 # response before the HTTP request has been fully sent, the connection
414 # may deadlock because neither end is reading. 445 # may deadlock because neither end is reading.
537 if not isinstance(v, str): 568 if not isinstance(v, str):
538 raise TypeError('header value must be string: %r' % (v,)) 569 raise TypeError('header value must be string: %r' % (v,))
539 570
540 if isinstance(status, ErrorResponse): 571 if isinstance(status, ErrorResponse):
541 self.headers.extend(status.headers) 572 self.headers.extend(status.headers)
542 if status.code == HTTP_NOT_MODIFIED:
543 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
544 # it MUST NOT include any headers other than these and no
545 # body
546 self.headers = [(k, v) for (k, v) in self.headers if
547 k in ('Date', 'ETag', 'Expires',
548 'Cache-Control', 'Vary')]
549 status = statusmessage(status.code, pycompat.bytestr(status)) 573 status = statusmessage(status.code, pycompat.bytestr(status))
550 elif status == 200: 574 elif status == 200:
551 status = '200 Script output follows' 575 status = '200 Script output follows'
552 elif isinstance(status, int): 576 elif isinstance(status, int):
553 status = statusmessage(status) 577 status = statusmessage(status)