Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/hgweb/request.py @ 36863:da4e2f87167d
hgweb: expose input stream on parsed WSGI request object
Our next step towards moving away from wsgirequest to our newer,
friendlier parsedrequest type is input stream access.
This commit exposes the input stream on the instance. Consumers
in the HTTP protocol server switch to it.
Because there were very few consumers of the input stream, we stopped
storing a reference to the input stream on wsgirequest directly. All
access now goes through parsedrequest. However, wsgirequest still
may read from this stream as part of cgi.parse(). So we still need to
create the stream from wsgirequest.
Differential Revision: https://phab.mercurial-scm.org/D2771
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 10 Mar 2018 11:06:13 -0800 |
parents | 1f7d9024674c |
children | 01f6bba64424 |
comparison
equal
deleted
inserted
replaced
36862:1f7d9024674c | 36863:da4e2f87167d |
---|---|
59 pycompat.bytesurl(i.strip()) for i in v] | 59 pycompat.bytesurl(i.strip()) for i in v] |
60 return bytesform | 60 return bytesform |
61 | 61 |
62 @attr.s(frozen=True) | 62 @attr.s(frozen=True) |
63 class parsedrequest(object): | 63 class parsedrequest(object): |
64 """Represents a parsed WSGI request / static HTTP request parameters.""" | 64 """Represents a parsed WSGI request. |
65 | |
66 Contains both parsed parameters as well as a handle on the input stream. | |
67 """ | |
65 | 68 |
66 # Request method. | 69 # Request method. |
67 method = attr.ib() | 70 method = attr.ib() |
68 # Full URL for this request. | 71 # Full URL for this request. |
69 url = attr.ib() | 72 url = attr.ib() |
89 # Dict of query string arguments. Values are lists with at least 1 item. | 92 # Dict of query string arguments. Values are lists with at least 1 item. |
90 querystringdict = attr.ib() | 93 querystringdict = attr.ib() |
91 # wsgiref.headers.Headers instance. Operates like a dict with case | 94 # wsgiref.headers.Headers instance. Operates like a dict with case |
92 # insensitive keys. | 95 # insensitive keys. |
93 headers = attr.ib() | 96 headers = attr.ib() |
94 | 97 # Request body input stream. |
95 def parserequestfromenv(env): | 98 bodyfh = attr.ib() |
99 | |
100 def parserequestfromenv(env, bodyfh): | |
96 """Parse URL components from environment variables. | 101 """Parse URL components from environment variables. |
97 | 102 |
98 WSGI defines request attributes via environment variables. This function | 103 WSGI defines request attributes via environment variables. This function |
99 parses the environment variables into a data structure. | 104 parses the environment variables into a data structure. |
100 """ | 105 """ |
206 # sent. But for all intents and purposes it should be OK to lie about | 211 # sent. But for all intents and purposes it should be OK to lie about |
207 # this, since a consumer will either either value to determine how many | 212 # this, since a consumer will either either value to determine how many |
208 # bytes are available to read. | 213 # bytes are available to read. |
209 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env: | 214 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env: |
210 headers['Content-Length'] = env['CONTENT_LENGTH'] | 215 headers['Content-Length'] = env['CONTENT_LENGTH'] |
216 | |
217 # TODO do this once we remove wsgirequest.inp, otherwise we could have | |
218 # multiple readers from the underlying input stream. | |
219 #bodyfh = env['wsgi.input'] | |
220 #if 'Content-Length' in headers: | |
221 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length'])) | |
211 | 222 |
212 return parsedrequest(method=env['REQUEST_METHOD'], | 223 return parsedrequest(method=env['REQUEST_METHOD'], |
213 url=fullurl, baseurl=baseurl, | 224 url=fullurl, baseurl=baseurl, |
214 advertisedurl=advertisedfullurl, | 225 advertisedurl=advertisedfullurl, |
215 advertisedbaseurl=advertisedbaseurl, | 226 advertisedbaseurl=advertisedbaseurl, |
217 dispatchparts=dispatchparts, dispatchpath=dispatchpath, | 228 dispatchparts=dispatchparts, dispatchpath=dispatchpath, |
218 havepathinfo='PATH_INFO' in env, | 229 havepathinfo='PATH_INFO' in env, |
219 querystring=querystring, | 230 querystring=querystring, |
220 querystringlist=querystringlist, | 231 querystringlist=querystringlist, |
221 querystringdict=querystringdict, | 232 querystringdict=querystringdict, |
222 headers=headers) | 233 headers=headers, |
234 bodyfh=bodyfh) | |
223 | 235 |
224 class wsgirequest(object): | 236 class wsgirequest(object): |
225 """Higher-level API for a WSGI request. | 237 """Higher-level API for a WSGI request. |
226 | 238 |
227 WSGI applications are invoked with 2 arguments. They are used to | 239 WSGI applications are invoked with 2 arguments. They are used to |
231 def __init__(self, wsgienv, start_response): | 243 def __init__(self, wsgienv, start_response): |
232 version = wsgienv[r'wsgi.version'] | 244 version = wsgienv[r'wsgi.version'] |
233 if (version < (1, 0)) or (version >= (2, 0)): | 245 if (version < (1, 0)) or (version >= (2, 0)): |
234 raise RuntimeError("Unknown and unsupported WSGI version %d.%d" | 246 raise RuntimeError("Unknown and unsupported WSGI version %d.%d" |
235 % version) | 247 % version) |
236 self.inp = wsgienv[r'wsgi.input'] | 248 |
249 inp = wsgienv[r'wsgi.input'] | |
237 | 250 |
238 if r'HTTP_CONTENT_LENGTH' in wsgienv: | 251 if r'HTTP_CONTENT_LENGTH' in wsgienv: |
239 self.inp = util.cappedreader(self.inp, | 252 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH'])) |
240 int(wsgienv[r'HTTP_CONTENT_LENGTH'])) | |
241 elif r'CONTENT_LENGTH' in wsgienv: | 253 elif r'CONTENT_LENGTH' in wsgienv: |
242 self.inp = util.cappedreader(self.inp, | 254 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH'])) |
243 int(wsgienv[r'CONTENT_LENGTH'])) | |
244 | 255 |
245 self.err = wsgienv[r'wsgi.errors'] | 256 self.err = wsgienv[r'wsgi.errors'] |
246 self.threaded = wsgienv[r'wsgi.multithread'] | 257 self.threaded = wsgienv[r'wsgi.multithread'] |
247 self.multiprocess = wsgienv[r'wsgi.multiprocess'] | 258 self.multiprocess = wsgienv[r'wsgi.multiprocess'] |
248 self.run_once = wsgienv[r'wsgi.run_once'] | 259 self.run_once = wsgienv[r'wsgi.run_once'] |
249 self.env = wsgienv | 260 self.env = wsgienv |
250 self.form = normalize(cgi.parse(self.inp, | 261 self.form = normalize(cgi.parse(inp, |
251 self.env, | 262 self.env, |
252 keep_blank_values=1)) | 263 keep_blank_values=1)) |
253 self._start_response = start_response | 264 self._start_response = start_response |
254 self.server_write = None | 265 self.server_write = None |
255 self.headers = [] | 266 self.headers = [] |
256 | 267 |
257 self.req = parserequestfromenv(wsgienv) | 268 self.req = parserequestfromenv(wsgienv, inp) |
258 | 269 |
259 def respond(self, status, type, filename=None, body=None): | 270 def respond(self, status, type, filename=None, body=None): |
260 if not isinstance(type, str): | 271 if not isinstance(type, str): |
261 type = pycompat.sysstr(type) | 272 type = pycompat.sysstr(type) |
262 if self._start_response is not None: | 273 if self._start_response is not None: |
313 # that we can drain the request responsibly. The WSGI | 324 # that we can drain the request responsibly. The WSGI |
314 # specification only says that servers *should* ensure the | 325 # specification only says that servers *should* ensure the |
315 # input stream doesn't overrun the actual request. So there's | 326 # input stream doesn't overrun the actual request. So there's |
316 # no guarantee that reading until EOF won't corrupt the stream | 327 # no guarantee that reading until EOF won't corrupt the stream |
317 # state. | 328 # state. |
318 if not isinstance(self.inp, util.cappedreader): | 329 if not isinstance(self.req.bodyfh, util.cappedreader): |
319 close = True | 330 close = True |
320 else: | 331 else: |
321 # We /could/ only drain certain HTTP response codes. But 200 | 332 # We /could/ only drain certain HTTP response codes. But 200 |
322 # and non-200 wire protocol responses both require draining. | 333 # and non-200 wire protocol responses both require draining. |
323 # Since we have a capped reader in place for all situations | 334 # Since we have a capped reader in place for all situations |
327 | 338 |
328 if close: | 339 if close: |
329 self.headers.append((r'Connection', r'Close')) | 340 self.headers.append((r'Connection', r'Close')) |
330 | 341 |
331 if drain: | 342 if drain: |
332 assert isinstance(self.inp, util.cappedreader) | 343 assert isinstance(self.req.bodyfh, util.cappedreader) |
333 while True: | 344 while True: |
334 chunk = self.inp.read(32768) | 345 chunk = self.req.bodyfh.read(32768) |
335 if not chunk: | 346 if not chunk: |
336 break | 347 break |
337 | 348 |
338 self.server_write = self._start_response( | 349 self.server_write = self._start_response( |
339 pycompat.sysstr(status), self.headers) | 350 pycompat.sysstr(status), self.headers) |