comparison mercurial/hgweb/request.py @ 43077:687b865b95ad

formatting: byteify all mercurial/ and hgext/ string literals Done with python3.7 contrib/byteify-strings.py -i $(hg files 'set:mercurial/**.py - mercurial/thirdparty/** + hgext/**.py - hgext/fsmonitor/pywatchman/** - mercurial/__init__.py') black -l 80 -t py33 -S $(hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**" - hgext/fsmonitor/pywatchman/**') # skip-blame mass-reformatting only Differential Revision: https://phab.mercurial-scm.org/D6972
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:48:39 -0400
parents 2372284d9457
children d783f945a701
comparison
equal deleted inserted replaced
43076:2372284d9457 43077:687b865b95ad
67 Raises KeyError if key not defined or it has multiple values set. 67 Raises KeyError if key not defined or it has multiple values set.
68 """ 68 """
69 vals = self._items[key] 69 vals = self._items[key]
70 70
71 if len(vals) > 1: 71 if len(vals) > 1:
72 raise KeyError('multiple values for %r' % key) 72 raise KeyError(b'multiple values for %r' % key)
73 73
74 return vals[0] 74 return vals[0]
75 75
76 def asdictoflists(self): 76 def asdictoflists(self):
77 return {k: list(v) for k, v in self._items.iteritems()} 77 return {k: list(v) for k, v in self._items.iteritems()}
170 170
171 # Some hosting solutions are emulating hgwebdir, and dispatching directly 171 # Some hosting solutions are emulating hgwebdir, and dispatching directly
172 # to an hgweb instance using this environment variable. This was always 172 # to an hgweb instance using this environment variable. This was always
173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them. 173 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
174 if not reponame: 174 if not reponame:
175 reponame = env.get('REPO_NAME') 175 reponame = env.get(b'REPO_NAME')
176 176
177 if altbaseurl: 177 if altbaseurl:
178 altbaseurl = util.url(altbaseurl) 178 altbaseurl = util.url(altbaseurl)
179 179
180 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines 180 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
181 # the environment variables. 181 # the environment variables.
182 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines 182 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
183 # how URLs are reconstructed. 183 # how URLs are reconstructed.
184 fullurl = env['wsgi.url_scheme'] + '://' 184 fullurl = env[b'wsgi.url_scheme'] + b'://'
185 185
186 if altbaseurl and altbaseurl.scheme: 186 if altbaseurl and altbaseurl.scheme:
187 advertisedfullurl = altbaseurl.scheme + '://' 187 advertisedfullurl = altbaseurl.scheme + b'://'
188 else: 188 else:
189 advertisedfullurl = fullurl 189 advertisedfullurl = fullurl
190 190
191 def addport(s, port): 191 def addport(s, port):
192 if s.startswith('https://'): 192 if s.startswith(b'https://'):
193 if port != '443': 193 if port != b'443':
194 s += ':' + port 194 s += b':' + port
195 else: 195 else:
196 if port != '80': 196 if port != b'80':
197 s += ':' + port 197 s += b':' + port
198 198
199 return s 199 return s
200 200
201 if env.get('HTTP_HOST'): 201 if env.get(b'HTTP_HOST'):
202 fullurl += env['HTTP_HOST'] 202 fullurl += env[b'HTTP_HOST']
203 else: 203 else:
204 fullurl += env['SERVER_NAME'] 204 fullurl += env[b'SERVER_NAME']
205 fullurl = addport(fullurl, env['SERVER_PORT']) 205 fullurl = addport(fullurl, env[b'SERVER_PORT'])
206 206
207 if altbaseurl and altbaseurl.host: 207 if altbaseurl and altbaseurl.host:
208 advertisedfullurl += altbaseurl.host 208 advertisedfullurl += altbaseurl.host
209 209
210 if altbaseurl.port: 210 if altbaseurl.port:
211 port = altbaseurl.port 211 port = altbaseurl.port
212 elif altbaseurl.scheme == 'http' and not altbaseurl.port: 212 elif altbaseurl.scheme == b'http' and not altbaseurl.port:
213 port = '80' 213 port = b'80'
214 elif altbaseurl.scheme == 'https' and not altbaseurl.port: 214 elif altbaseurl.scheme == b'https' and not altbaseurl.port:
215 port = '443' 215 port = b'443'
216 else: 216 else:
217 port = env['SERVER_PORT'] 217 port = env[b'SERVER_PORT']
218 218
219 advertisedfullurl = addport(advertisedfullurl, port) 219 advertisedfullurl = addport(advertisedfullurl, port)
220 else: 220 else:
221 advertisedfullurl += env['SERVER_NAME'] 221 advertisedfullurl += env[b'SERVER_NAME']
222 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT']) 222 advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
223 223
224 baseurl = fullurl 224 baseurl = fullurl
225 advertisedbaseurl = advertisedfullurl 225 advertisedbaseurl = advertisedfullurl
226 226
227 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) 227 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
228 fullurl += util.urlreq.quote(env.get('PATH_INFO', '')) 228 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
229 229
230 if altbaseurl: 230 if altbaseurl:
231 path = altbaseurl.path or '' 231 path = altbaseurl.path or b''
232 if path and not path.startswith('/'): 232 if path and not path.startswith(b'/'):
233 path = '/' + path 233 path = b'/' + path
234 advertisedfullurl += util.urlreq.quote(path) 234 advertisedfullurl += util.urlreq.quote(path)
235 else: 235 else:
236 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', '')) 236 advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
237 237
238 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', '')) 238 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
239 239
240 if env.get('QUERY_STRING'): 240 if env.get(b'QUERY_STRING'):
241 fullurl += '?' + env['QUERY_STRING'] 241 fullurl += b'?' + env[b'QUERY_STRING']
242 advertisedfullurl += '?' + env['QUERY_STRING'] 242 advertisedfullurl += b'?' + env[b'QUERY_STRING']
243 243
244 # If ``reponame`` is defined, that must be a prefix on PATH_INFO 244 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
245 # that represents the repository being dispatched to. When computing 245 # that represents the repository being dispatched to. When computing
246 # the dispatch info, we ignore these leading path components. 246 # the dispatch info, we ignore these leading path components.
247 247
248 if altbaseurl: 248 if altbaseurl:
249 apppath = altbaseurl.path or '' 249 apppath = altbaseurl.path or b''
250 if apppath and not apppath.startswith('/'): 250 if apppath and not apppath.startswith(b'/'):
251 apppath = '/' + apppath 251 apppath = b'/' + apppath
252 else: 252 else:
253 apppath = env.get('SCRIPT_NAME', '') 253 apppath = env.get(b'SCRIPT_NAME', b'')
254 254
255 if reponame: 255 if reponame:
256 repoprefix = '/' + reponame.strip('/') 256 repoprefix = b'/' + reponame.strip(b'/')
257 257
258 if not env.get('PATH_INFO'): 258 if not env.get(b'PATH_INFO'):
259 raise error.ProgrammingError('reponame requires PATH_INFO') 259 raise error.ProgrammingError(b'reponame requires PATH_INFO')
260 260
261 if not env['PATH_INFO'].startswith(repoprefix): 261 if not env[b'PATH_INFO'].startswith(repoprefix):
262 raise error.ProgrammingError( 262 raise error.ProgrammingError(
263 'PATH_INFO does not begin with repo ' 263 b'PATH_INFO does not begin with repo '
264 'name: %s (%s)' % (env['PATH_INFO'], reponame) 264 b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
265 ) 265 )
266 266
267 dispatchpath = env['PATH_INFO'][len(repoprefix) :] 267 dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
268 268
269 if dispatchpath and not dispatchpath.startswith('/'): 269 if dispatchpath and not dispatchpath.startswith(b'/'):
270 raise error.ProgrammingError( 270 raise error.ProgrammingError(
271 'reponame prefix of PATH_INFO does ' 271 b'reponame prefix of PATH_INFO does '
272 'not end at path delimiter: %s (%s)' 272 b'not end at path delimiter: %s (%s)'
273 % (env['PATH_INFO'], reponame) 273 % (env[b'PATH_INFO'], reponame)
274 ) 274 )
275 275
276 apppath = apppath.rstrip('/') + repoprefix 276 apppath = apppath.rstrip(b'/') + repoprefix
277 dispatchparts = dispatchpath.strip('/').split('/') 277 dispatchparts = dispatchpath.strip(b'/').split(b'/')
278 dispatchpath = '/'.join(dispatchparts) 278 dispatchpath = b'/'.join(dispatchparts)
279 279
280 elif 'PATH_INFO' in env: 280 elif b'PATH_INFO' in env:
281 if env['PATH_INFO'].strip('/'): 281 if env[b'PATH_INFO'].strip(b'/'):
282 dispatchparts = env['PATH_INFO'].strip('/').split('/') 282 dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
283 dispatchpath = '/'.join(dispatchparts) 283 dispatchpath = b'/'.join(dispatchparts)
284 else: 284 else:
285 dispatchparts = [] 285 dispatchparts = []
286 dispatchpath = '' 286 dispatchpath = b''
287 else: 287 else:
288 dispatchparts = [] 288 dispatchparts = []
289 dispatchpath = None 289 dispatchpath = None
290 290
291 querystring = env.get('QUERY_STRING', '') 291 querystring = env.get(b'QUERY_STRING', b'')
292 292
293 # We store as a list so we have ordering information. We also store as 293 # We store as a list so we have ordering information. We also store as
294 # a dict to facilitate fast lookup. 294 # a dict to facilitate fast lookup.
295 qsparams = multidict() 295 qsparams = multidict()
296 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True): 296 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
299 # HTTP_* keys contain HTTP request headers. The Headers structure should 299 # HTTP_* keys contain HTTP request headers. The Headers structure should
300 # perform case normalization for us. We just rewrite underscore to dash 300 # perform case normalization for us. We just rewrite underscore to dash
301 # so keys match what likely went over the wire. 301 # so keys match what likely went over the wire.
302 headers = [] 302 headers = []
303 for k, v in env.iteritems(): 303 for k, v in env.iteritems():
304 if k.startswith('HTTP_'): 304 if k.startswith(b'HTTP_'):
305 headers.append((k[len('HTTP_') :].replace('_', '-'), v)) 305 headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
306 306
307 from . import wsgiheaders # avoid cycle 307 from . import wsgiheaders # avoid cycle
308 308
309 headers = wsgiheaders.Headers(headers) 309 headers = wsgiheaders.Headers(headers)
310 310
311 # This is kind of a lie because the HTTP header wasn't explicitly 311 # This is kind of a lie because the HTTP header wasn't explicitly
312 # sent. But for all intents and purposes it should be OK to lie about 312 # sent. But for all intents and purposes it should be OK to lie about
313 # this, since a consumer will either either value to determine how many 313 # this, since a consumer will either either value to determine how many
314 # bytes are available to read. 314 # bytes are available to read.
315 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env: 315 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
316 headers['Content-Length'] = env['CONTENT_LENGTH'] 316 headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
317 317
318 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env: 318 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
319 headers['Content-Type'] = env['CONTENT_TYPE'] 319 headers[b'Content-Type'] = env[b'CONTENT_TYPE']
320 320
321 if bodyfh is None: 321 if bodyfh is None:
322 bodyfh = env['wsgi.input'] 322 bodyfh = env[b'wsgi.input']
323 if 'Content-Length' in headers: 323 if b'Content-Length' in headers:
324 bodyfh = util.cappedreader( 324 bodyfh = util.cappedreader(
325 bodyfh, int(headers['Content-Length'] or '0') 325 bodyfh, int(headers[b'Content-Length'] or b'0')
326 ) 326 )
327 327
328 return parsedrequest( 328 return parsedrequest(
329 method=env['REQUEST_METHOD'], 329 method=env[b'REQUEST_METHOD'],
330 url=fullurl, 330 url=fullurl,
331 baseurl=baseurl, 331 baseurl=baseurl,
332 advertisedurl=advertisedfullurl, 332 advertisedurl=advertisedfullurl,
333 advertisedbaseurl=advertisedbaseurl, 333 advertisedbaseurl=advertisedbaseurl,
334 urlscheme=env['wsgi.url_scheme'], 334 urlscheme=env[b'wsgi.url_scheme'],
335 remoteuser=env.get('REMOTE_USER'), 335 remoteuser=env.get(b'REMOTE_USER'),
336 remotehost=env.get('REMOTE_HOST'), 336 remotehost=env.get(b'REMOTE_HOST'),
337 apppath=apppath, 337 apppath=apppath,
338 dispatchparts=dispatchparts, 338 dispatchparts=dispatchparts,
339 dispatchpath=dispatchpath, 339 dispatchpath=dispatchpath,
340 reponame=reponame, 340 reponame=reponame,
341 querystring=querystring, 341 querystring=querystring,
419 if ( 419 if (
420 self._bodybytes is not None 420 self._bodybytes is not None
421 or self._bodygen is not None 421 or self._bodygen is not None
422 or self._bodywillwrite 422 or self._bodywillwrite
423 ): 423 ):
424 raise error.ProgrammingError('cannot define body multiple times') 424 raise error.ProgrammingError(b'cannot define body multiple times')
425 425
426 def setbodybytes(self, b): 426 def setbodybytes(self, b):
427 """Define the response body as static bytes. 427 """Define the response body as static bytes.
428 428
429 The empty string signals that there is no response body. 429 The empty string signals that there is no response body.
430 """ 430 """
431 self._verifybody() 431 self._verifybody()
432 self._bodybytes = b 432 self._bodybytes = b
433 self.headers['Content-Length'] = '%d' % len(b) 433 self.headers[b'Content-Length'] = b'%d' % len(b)
434 434
435 def setbodygen(self, gen): 435 def setbodygen(self, gen):
436 """Define the response body as a generator of bytes.""" 436 """Define the response body as a generator of bytes."""
437 self._verifybody() 437 self._verifybody()
438 self._bodygen = gen 438 self._bodygen = gen
461 ``setbodybytes()`` or ``setbodygen()`` must be called. 461 ``setbodybytes()`` or ``setbodygen()`` must be called.
462 462
463 Calling this method multiple times is not allowed. 463 Calling this method multiple times is not allowed.
464 """ 464 """
465 if self._started: 465 if self._started:
466 raise error.ProgrammingError('sendresponse() called multiple times') 466 raise error.ProgrammingError(
467 b'sendresponse() called multiple times'
468 )
467 469
468 self._started = True 470 self._started = True
469 471
470 if not self.status: 472 if not self.status:
471 raise error.ProgrammingError('status line not defined') 473 raise error.ProgrammingError(b'status line not defined')
472 474
473 if ( 475 if (
474 self._bodybytes is None 476 self._bodybytes is None
475 and self._bodygen is None 477 and self._bodygen is None
476 and not self._bodywillwrite 478 and not self._bodywillwrite
477 ): 479 ):
478 raise error.ProgrammingError('response body not defined') 480 raise error.ProgrammingError(b'response body not defined')
479 481
480 # RFC 7232 Section 4.1 states that a 304 MUST generate one of 482 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
481 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary} 483 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
482 # and SHOULD NOT generate other headers unless they could be used 484 # and SHOULD NOT generate other headers unless they could be used
483 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2 485 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
484 # states that no response body can be issued. Content-Length can 486 # states that no response body can be issued. Content-Length can
485 # be sent. But if it is present, it should be the size of the response 487 # be sent. But if it is present, it should be the size of the response
486 # that wasn't transferred. 488 # that wasn't transferred.
487 if self.status.startswith('304 '): 489 if self.status.startswith(b'304 '):
488 # setbodybytes('') will set C-L to 0. This doesn't conform with the 490 # setbodybytes('') will set C-L to 0. This doesn't conform with the
489 # spec. So remove it. 491 # spec. So remove it.
490 if self.headers.get('Content-Length') == '0': 492 if self.headers.get(b'Content-Length') == b'0':
491 del self.headers['Content-Length'] 493 del self.headers[b'Content-Length']
492 494
493 # Strictly speaking, this is too strict. But until it causes 495 # Strictly speaking, this is too strict. But until it causes
494 # problems, let's be strict. 496 # problems, let's be strict.
495 badheaders = { 497 badheaders = {
496 k 498 k
497 for k in self.headers.keys() 499 for k in self.headers.keys()
498 if k.lower() 500 if k.lower()
499 not in ( 501 not in (
500 'date', 502 b'date',
501 'etag', 503 b'etag',
502 'expires', 504 b'expires',
503 'cache-control', 505 b'cache-control',
504 'content-location', 506 b'content-location',
505 'content-security-policy', 507 b'content-security-policy',
506 'vary', 508 b'vary',
507 ) 509 )
508 } 510 }
509 if badheaders: 511 if badheaders:
510 raise error.ProgrammingError( 512 raise error.ProgrammingError(
511 'illegal header on 304 response: %s' 513 b'illegal header on 304 response: %s'
512 % ', '.join(sorted(badheaders)) 514 % b', '.join(sorted(badheaders))
513 ) 515 )
514 516
515 if self._bodygen is not None or self._bodywillwrite: 517 if self._bodygen is not None or self._bodywillwrite:
516 raise error.ProgrammingError( 518 raise error.ProgrammingError(
517 "must use setbodybytes('') with " "304 responses" 519 b"must use setbodybytes('') with " b"304 responses"
518 ) 520 )
519 521
520 # Various HTTP clients (notably httplib) won't read the HTTP response 522 # Various HTTP clients (notably httplib) won't read the HTTP response
521 # until the HTTP request has been sent in full. If servers (us) send a 523 # until the HTTP request has been sent in full. If servers (us) send a
522 # response before the HTTP request has been fully sent, the connection 524 # response before the HTTP request has been fully sent, the connection
528 close = False 530 close = False
529 531
530 # If the client sent Expect: 100-continue, we assume it is smart enough 532 # If the client sent Expect: 100-continue, we assume it is smart enough
531 # to deal with the server sending a response before reading the request. 533 # to deal with the server sending a response before reading the request.
532 # (httplib doesn't do this.) 534 # (httplib doesn't do this.)
533 if self._req.headers.get('Expect', '').lower() == '100-continue': 535 if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
534 pass 536 pass
535 # Only tend to request methods that have bodies. Strictly speaking, 537 # Only tend to request methods that have bodies. Strictly speaking,
536 # we should sniff for a body. But this is fine for our existing 538 # we should sniff for a body. But this is fine for our existing
537 # WSGI applications. 539 # WSGI applications.
538 elif self._req.method not in ('POST', 'PUT'): 540 elif self._req.method not in (b'POST', b'PUT'):
539 pass 541 pass
540 else: 542 else:
541 # If we don't know how much data to read, there's no guarantee 543 # If we don't know how much data to read, there's no guarantee
542 # that we can drain the request responsibly. The WSGI 544 # that we can drain the request responsibly. The WSGI
543 # specification only says that servers *should* ensure the 545 # specification only says that servers *should* ensure the
553 # drain, it is safe to read from that stream. We'll either do 555 # drain, it is safe to read from that stream. We'll either do
554 # a drain or no-op if we're already at EOF. 556 # a drain or no-op if we're already at EOF.
555 drain = True 557 drain = True
556 558
557 if close: 559 if close:
558 self.headers['Connection'] = 'Close' 560 self.headers[b'Connection'] = b'Close'
559 561
560 if drain: 562 if drain:
561 assert isinstance(self._req.bodyfh, util.cappedreader) 563 assert isinstance(self._req.bodyfh, util.cappedreader)
562 while True: 564 while True:
563 chunk = self._req.bodyfh.read(32768) 565 chunk = self._req.bodyfh.read(32768)
582 584
583 yield chunk 585 yield chunk
584 elif self._bodywillwrite: 586 elif self._bodywillwrite:
585 self._bodywritefn = write 587 self._bodywritefn = write
586 else: 588 else:
587 error.ProgrammingError('do not know how to send body') 589 error.ProgrammingError(b'do not know how to send body')
588 590
589 def getbodyfile(self): 591 def getbodyfile(self):
590 """Obtain a file object like object representing the response body. 592 """Obtain a file object like object representing the response body.
591 593
592 For this to work, you must call ``setbodywillwrite()`` and then 594 For this to work, you must call ``setbodywillwrite()`` and then
595 generator yields not items. The easiest way to consume it is with 597 generator yields not items. The easiest way to consume it is with
596 ``list(res.sendresponse())``, which should resolve to an empty list - 598 ``list(res.sendresponse())``, which should resolve to an empty list -
597 ``[]``. 599 ``[]``.
598 """ 600 """
599 if not self._bodywillwrite: 601 if not self._bodywillwrite:
600 raise error.ProgrammingError('must call setbodywillwrite() first') 602 raise error.ProgrammingError(b'must call setbodywillwrite() first')
601 603
602 if not self._started: 604 if not self._started:
603 raise error.ProgrammingError( 605 raise error.ProgrammingError(
604 'must call sendresponse() first; did ' 606 b'must call sendresponse() first; did '
605 'you remember to consume it since it ' 607 b'you remember to consume it since it '
606 'is a generator?' 608 b'is a generator?'
607 ) 609 )
608 610
609 assert self._bodywritefn 611 assert self._bodywritefn
610 return offsettrackingwriter(self._bodywritefn) 612 return offsettrackingwriter(self._bodywritefn)
611 613