comparison mercurial/hgweb/request.py @ 36863:da4e2f87167d

hgweb: expose input stream on parsed WSGI request object Our next step towards moving away from wsgirequest to our newer, friendlier parsedrequest type is input stream access. This commit exposes the input stream on the instance. Consumers in the HTTP protocol server switch to it. Because there were very few consumers of the input stream, we stopped storing a reference to the input stream on wsgirequest directly. All access now goes through parsedrequest. However, wsgirequest still may read from this stream as part of cgi.parse(). So we still need to create the stream from wsgirequest. Differential Revision: https://phab.mercurial-scm.org/D2771
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 10 Mar 2018 11:06:13 -0800
parents 1f7d9024674c
children 01f6bba64424
comparison
equal deleted inserted replaced
36862:1f7d9024674c 36863:da4e2f87167d
59 pycompat.bytesurl(i.strip()) for i in v] 59 pycompat.bytesurl(i.strip()) for i in v]
60 return bytesform 60 return bytesform
61 61
62 @attr.s(frozen=True) 62 @attr.s(frozen=True)
63 class parsedrequest(object): 63 class parsedrequest(object):
64 """Represents a parsed WSGI request / static HTTP request parameters.""" 64 """Represents a parsed WSGI request.
65
66 Contains both parsed parameters as well as a handle on the input stream.
67 """
65 68
66 # Request method. 69 # Request method.
67 method = attr.ib() 70 method = attr.ib()
68 # Full URL for this request. 71 # Full URL for this request.
69 url = attr.ib() 72 url = attr.ib()
89 # Dict of query string arguments. Values are lists with at least 1 item. 92 # Dict of query string arguments. Values are lists with at least 1 item.
90 querystringdict = attr.ib() 93 querystringdict = attr.ib()
91 # wsgiref.headers.Headers instance. Operates like a dict with case 94 # wsgiref.headers.Headers instance. Operates like a dict with case
92 # insensitive keys. 95 # insensitive keys.
93 headers = attr.ib() 96 headers = attr.ib()
94 97 # Request body input stream.
95 def parserequestfromenv(env): 98 bodyfh = attr.ib()
99
100 def parserequestfromenv(env, bodyfh):
96 """Parse URL components from environment variables. 101 """Parse URL components from environment variables.
97 102
98 WSGI defines request attributes via environment variables. This function 103 WSGI defines request attributes via environment variables. This function
99 parses the environment variables into a data structure. 104 parses the environment variables into a data structure.
100 """ 105 """
206 # sent. But for all intents and purposes it should be OK to lie about 211 # sent. But for all intents and purposes it should be OK to lie about
207 # this, since a consumer will either either value to determine how many 212 # this, since a consumer will either either value to determine how many
208 # bytes are available to read. 213 # bytes are available to read.
209 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env: 214 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
210 headers['Content-Length'] = env['CONTENT_LENGTH'] 215 headers['Content-Length'] = env['CONTENT_LENGTH']
216
217 # TODO do this once we remove wsgirequest.inp, otherwise we could have
218 # multiple readers from the underlying input stream.
219 #bodyfh = env['wsgi.input']
220 #if 'Content-Length' in headers:
221 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
211 222
212 return parsedrequest(method=env['REQUEST_METHOD'], 223 return parsedrequest(method=env['REQUEST_METHOD'],
213 url=fullurl, baseurl=baseurl, 224 url=fullurl, baseurl=baseurl,
214 advertisedurl=advertisedfullurl, 225 advertisedurl=advertisedfullurl,
215 advertisedbaseurl=advertisedbaseurl, 226 advertisedbaseurl=advertisedbaseurl,
217 dispatchparts=dispatchparts, dispatchpath=dispatchpath, 228 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
218 havepathinfo='PATH_INFO' in env, 229 havepathinfo='PATH_INFO' in env,
219 querystring=querystring, 230 querystring=querystring,
220 querystringlist=querystringlist, 231 querystringlist=querystringlist,
221 querystringdict=querystringdict, 232 querystringdict=querystringdict,
222 headers=headers) 233 headers=headers,
234 bodyfh=bodyfh)
223 235
224 class wsgirequest(object): 236 class wsgirequest(object):
225 """Higher-level API for a WSGI request. 237 """Higher-level API for a WSGI request.
226 238
227 WSGI applications are invoked with 2 arguments. They are used to 239 WSGI applications are invoked with 2 arguments. They are used to
231 def __init__(self, wsgienv, start_response): 243 def __init__(self, wsgienv, start_response):
232 version = wsgienv[r'wsgi.version'] 244 version = wsgienv[r'wsgi.version']
233 if (version < (1, 0)) or (version >= (2, 0)): 245 if (version < (1, 0)) or (version >= (2, 0)):
234 raise RuntimeError("Unknown and unsupported WSGI version %d.%d" 246 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
235 % version) 247 % version)
236 self.inp = wsgienv[r'wsgi.input'] 248
249 inp = wsgienv[r'wsgi.input']
237 250
238 if r'HTTP_CONTENT_LENGTH' in wsgienv: 251 if r'HTTP_CONTENT_LENGTH' in wsgienv:
239 self.inp = util.cappedreader(self.inp, 252 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
240 int(wsgienv[r'HTTP_CONTENT_LENGTH']))
241 elif r'CONTENT_LENGTH' in wsgienv: 253 elif r'CONTENT_LENGTH' in wsgienv:
242 self.inp = util.cappedreader(self.inp, 254 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
243 int(wsgienv[r'CONTENT_LENGTH']))
244 255
245 self.err = wsgienv[r'wsgi.errors'] 256 self.err = wsgienv[r'wsgi.errors']
246 self.threaded = wsgienv[r'wsgi.multithread'] 257 self.threaded = wsgienv[r'wsgi.multithread']
247 self.multiprocess = wsgienv[r'wsgi.multiprocess'] 258 self.multiprocess = wsgienv[r'wsgi.multiprocess']
248 self.run_once = wsgienv[r'wsgi.run_once'] 259 self.run_once = wsgienv[r'wsgi.run_once']
249 self.env = wsgienv 260 self.env = wsgienv
250 self.form = normalize(cgi.parse(self.inp, 261 self.form = normalize(cgi.parse(inp,
251 self.env, 262 self.env,
252 keep_blank_values=1)) 263 keep_blank_values=1))
253 self._start_response = start_response 264 self._start_response = start_response
254 self.server_write = None 265 self.server_write = None
255 self.headers = [] 266 self.headers = []
256 267
257 self.req = parserequestfromenv(wsgienv) 268 self.req = parserequestfromenv(wsgienv, inp)
258 269
259 def respond(self, status, type, filename=None, body=None): 270 def respond(self, status, type, filename=None, body=None):
260 if not isinstance(type, str): 271 if not isinstance(type, str):
261 type = pycompat.sysstr(type) 272 type = pycompat.sysstr(type)
262 if self._start_response is not None: 273 if self._start_response is not None:
313 # that we can drain the request responsibly. The WSGI 324 # that we can drain the request responsibly. The WSGI
314 # specification only says that servers *should* ensure the 325 # specification only says that servers *should* ensure the
315 # input stream doesn't overrun the actual request. So there's 326 # input stream doesn't overrun the actual request. So there's
316 # no guarantee that reading until EOF won't corrupt the stream 327 # no guarantee that reading until EOF won't corrupt the stream
317 # state. 328 # state.
318 if not isinstance(self.inp, util.cappedreader): 329 if not isinstance(self.req.bodyfh, util.cappedreader):
319 close = True 330 close = True
320 else: 331 else:
321 # We /could/ only drain certain HTTP response codes. But 200 332 # We /could/ only drain certain HTTP response codes. But 200
322 # and non-200 wire protocol responses both require draining. 333 # and non-200 wire protocol responses both require draining.
323 # Since we have a capped reader in place for all situations 334 # Since we have a capped reader in place for all situations
327 338
328 if close: 339 if close:
329 self.headers.append((r'Connection', r'Close')) 340 self.headers.append((r'Connection', r'Close'))
330 341
331 if drain: 342 if drain:
332 assert isinstance(self.inp, util.cappedreader) 343 assert isinstance(self.req.bodyfh, util.cappedreader)
333 while True: 344 while True:
334 chunk = self.inp.read(32768) 345 chunk = self.req.bodyfh.read(32768)
335 if not chunk: 346 if not chunk:
336 break 347 break
337 348
338 self.server_write = self._start_response( 349 self.server_write = self._start_response(
339 pycompat.sysstr(status), self.headers) 350 pycompat.sysstr(status), self.headers)