Mercurial > public > mercurial-scm > hg-stable
diff mercurial/httpclient/__init__.py @ 16643:24dbef11f477
httpclient: update to revision 892730fe7f46 of httpplus
author | Augie Fackler <raf@durin42.com> |
---|---|
date | Fri, 04 May 2012 16:00:33 -0500 |
parents | c81dce8a7bb6 |
children | 69af967b6d6f |
line wrap: on
line diff
--- a/mercurial/httpclient/__init__.py Sat May 12 00:06:11 2012 +0200 +++ b/mercurial/httpclient/__init__.py Fri May 04 16:00:33 2012 -0500 @@ -45,6 +45,7 @@ import select import socket +import _readers import socketutil logger = logging.getLogger(__name__) @@ -54,8 +55,6 @@ HTTP_VER_1_0 = 'HTTP/1.0' HTTP_VER_1_1 = 'HTTP/1.1' -_LEN_CLOSE_IS_END = -1 - OUTGOING_BUFFER_SIZE = 1 << 15 INCOMING_BUFFER_SIZE = 1 << 20 @@ -83,23 +82,19 @@ The response will continue to load as available. If you need the complete response before continuing, check the .complete() method. """ - def __init__(self, sock, timeout): + def __init__(self, sock, timeout, method): self.sock = sock + self.method = method self.raw_response = '' - self._body = None self._headers_len = 0 - self._content_len = 0 self.headers = None self.will_close = False self.status_line = '' self.status = None + self.continued = False self.http_version = None self.reason = None - self._chunked = False - self._chunked_done = False - self._chunked_until_next = 0 - self._chunked_skip_bytes = 0 - self._chunked_preloaded_block = None + self._reader = None self._read_location = 0 self._eol = EOL @@ -117,11 +112,12 @@ socket is closed, this will nearly always return False, even in cases where all the data has actually been loaded. """ - if self._chunked: - return self._chunked_done - if self._content_len == _LEN_CLOSE_IS_END: - return False - return self._body is not None and len(self._body) >= self._content_len + if self._reader: + return self._reader.done() + + def _close(self): + if self._reader is not None: + self._reader._close() def readline(self): """Read a single line from the response body. @@ -129,30 +125,34 @@ This may block until either a line ending is found or the response is complete. """ - eol = self._body.find('\n', self._read_location) - while eol == -1 and not self.complete(): + # TODO: move this into the reader interface where it can be + # smarter (and probably avoid copies) + bytes = [] + while not bytes: + try: + bytes = [self._reader.read(1)] + except _readers.ReadNotReady: + self._select() + while bytes[-1] != '\n' and not self.complete(): self._select() - eol = self._body.find('\n', self._read_location) - if eol != -1: - eol += 1 - else: - eol = len(self._body) - data = self._body[self._read_location:eol] - self._read_location = eol - return data + bytes.append(self._reader.read(1)) + if bytes[-1] != '\n': + next = self._reader.read(1) + while next and next != '\n': + bytes.append(next) + next = self._reader.read(1) + bytes.append(next) + return ''.join(bytes) def read(self, length=None): # if length is None, unbounded read while (not self.complete() # never select on a finished read and (not length # unbounded, so we wait for complete() - or (self._read_location + length) > len(self._body))): + or length > self._reader.available_data)): self._select() if not length: - length = len(self._body) - self._read_location - elif len(self._body) < (self._read_location + length): - length = len(self._body) - self._read_location - r = self._body[self._read_location:self._read_location + length] - self._read_location += len(r) + length = self._reader.available_data + r = self._reader.read(length) if self.complete() and self.will_close: self.sock.close() return r @@ -160,93 +160,35 @@ def _select(self): r, _, _ = select.select([self.sock], [], [], self._timeout) if not r: - # socket was not readable. If the response is not complete - # and we're not a _LEN_CLOSE_IS_END response, raise a timeout. - # If we are a _LEN_CLOSE_IS_END response and we have no data, - # raise a timeout. - if not (self.complete() or - (self._content_len == _LEN_CLOSE_IS_END and self._body)): + # socket was not readable. If the response is not + # complete, raise a timeout. + if not self.complete(): logger.info('timed out with timeout of %s', self._timeout) raise HTTPTimeoutException('timeout reading data') - logger.info('cl: %r body: %r', self._content_len, self._body) try: data = self.sock.recv(INCOMING_BUFFER_SIZE) - # If the socket was readable and no data was read, that - # means the socket was closed. If this isn't a - # _CLOSE_IS_END socket, then something is wrong if we're - # here (we shouldn't enter _select() if the response is - # complete), so abort. - if not data and self._content_len != _LEN_CLOSE_IS_END: - raise HTTPRemoteClosedError( - 'server appears to have closed the socket mid-response') except socket.sslerror, e: if e.args[0] != socket.SSL_ERROR_WANT_READ: raise logger.debug('SSL_WANT_READ in _select, should retry later') return True logger.debug('response read %d data during _select', len(data)) + # If the socket was readable and no data was read, that means + # the socket was closed. Inform the reader (if any) so it can + # raise an exception if this is an invalid situation. if not data: - if self.headers and self._content_len == _LEN_CLOSE_IS_END: - self._content_len = len(self._body) + if self._reader: + self._reader._close() return False else: self._load_response(data) return True - def _chunked_parsedata(self, data): - if self._chunked_preloaded_block: - data = self._chunked_preloaded_block + data - self._chunked_preloaded_block = None - while data: - logger.debug('looping with %d data remaining', len(data)) - # Slice out anything we should skip - if self._chunked_skip_bytes: - if len(data) <= self._chunked_skip_bytes: - self._chunked_skip_bytes -= len(data) - data = '' - break - else: - data = data[self._chunked_skip_bytes:] - self._chunked_skip_bytes = 0 - - # determine how much is until the next chunk - if self._chunked_until_next: - amt = self._chunked_until_next - logger.debug('reading remaining %d of existing chunk', amt) - self._chunked_until_next = 0 - body = data - else: - try: - amt, body = data.split(self._eol, 1) - except ValueError: - self._chunked_preloaded_block = data - logger.debug('saving %r as a preloaded block for chunked', - self._chunked_preloaded_block) - return - amt = int(amt, base=16) - logger.debug('reading chunk of length %d', amt) - if amt == 0: - self._chunked_done = True - - # read through end of what we have or the chunk - self._body += body[:amt] - if len(body) >= amt: - data = body[amt:] - self._chunked_skip_bytes = len(self._eol) - else: - self._chunked_until_next = amt - len(body) - self._chunked_skip_bytes = 0 - data = '' - def _load_response(self, data): - if self._chunked: - self._chunked_parsedata(data) - return - elif self._body is not None: - self._body += data - return - - # We haven't seen end of headers yet + # Being here implies we're not at the end of the headers yet, + # since at the end of this method if headers were completely + # loaded we replace this method with the load() method of the + # reader we created. self.raw_response += data # This is a bogus server with bad line endings if self._eol not in self.raw_response: @@ -270,6 +212,7 @@ http_ver, status = hdrs.split(' ', 1) if status.startswith('100'): self.raw_response = body + self.continued = True logger.debug('continue seen, setting body to %r', body) return @@ -289,23 +232,46 @@ if self._eol != EOL: hdrs = hdrs.replace(self._eol, '\r\n') headers = rfc822.Message(cStringIO.StringIO(hdrs)) + content_len = None if HDR_CONTENT_LENGTH in headers: - self._content_len = int(headers[HDR_CONTENT_LENGTH]) + content_len = int(headers[HDR_CONTENT_LENGTH]) if self.http_version == HTTP_VER_1_0: self.will_close = True elif HDR_CONNECTION_CTRL in headers: self.will_close = ( headers[HDR_CONNECTION_CTRL].lower() == CONNECTION_CLOSE) - if self._content_len == 0: - self._content_len = _LEN_CLOSE_IS_END if (HDR_XFER_ENCODING in headers and headers[HDR_XFER_ENCODING].lower() == XFER_ENCODING_CHUNKED): - self._body = '' - self._chunked_parsedata(body) - self._chunked = True - if self._body is None: - self._body = body + self._reader = _readers.ChunkedReader(self._eol) + logger.debug('using a chunked reader') + else: + # HEAD responses are forbidden from returning a body, and + # it's implausible for a CONNECT response to use + # close-is-end logic for an OK response. + if (self.method == 'HEAD' or + (self.method == 'CONNECT' and content_len is None)): + content_len = 0 + if content_len is not None: + logger.debug('using a content-length reader with length %d', + content_len) + self._reader = _readers.ContentLengthReader(content_len) + else: + # Response body had no length specified and is not + # chunked, so the end of the body will only be + # identifiable by the termination of the socket by the + # server. My interpretation of the spec means that we + # are correct in hitting this case if + # transfer-encoding, content-length, and + # connection-control were left unspecified. + self._reader = _readers.CloseIsEndReader() + logger.debug('using a close-is-end reader') + self.will_close = True + + if body: + self._reader._load(body) + logger.debug('headers complete') self.headers = headers + self._load_response = self._reader._load class HTTPConnection(object): @@ -382,13 +348,14 @@ {}, HTTP_VER_1_0) sock.send(data) sock.setblocking(0) - r = self.response_class(sock, self.timeout) + r = self.response_class(sock, self.timeout, 'CONNECT') timeout_exc = HTTPTimeoutException( 'Timed out waiting for CONNECT response from proxy') while not r.complete(): try: if not r._select(): - raise timeout_exc + if not r.complete(): + raise timeout_exc except HTTPTimeoutException: # This raise/except pattern looks goofy, but # _select can raise the timeout as well as the @@ -527,7 +494,7 @@ out = outgoing_headers or body blocking_on_continue = False if expect_continue and not outgoing_headers and not ( - response and response.headers): + response and (response.headers or response.continued)): logger.info( 'waiting up to %s seconds for' ' continue response from server', @@ -550,11 +517,6 @@ 'server, optimistically sending request body') else: raise HTTPTimeoutException('timeout sending data') - # TODO exceptional conditions with select? (what are those be?) - # TODO if the response is loading, must we finish sending at all? - # - # Certainly not if it's going to close the connection and/or - # the response is already done...I think. was_first = first # incoming data @@ -572,11 +534,11 @@ logger.info('socket appears closed in read') self.sock = None self._current_response = None + if response is not None: + response._close() # This if/elif ladder is a bit subtle, # comments in each branch should help. - if response is not None and ( - response.complete() or - response._content_len == _LEN_CLOSE_IS_END): + if response is not None and response.complete(): # Server responded completely and then # closed the socket. We should just shut # things down and let the caller get their @@ -605,7 +567,7 @@ 'response was missing or incomplete!') logger.debug('read %d bytes in request()', len(data)) if response is None: - response = self.response_class(r[0], self.timeout) + response = self.response_class(r[0], self.timeout, method) response._load_response(data) # Jump to the next select() call so we load more # data if the server is still sending us content. @@ -613,10 +575,6 @@ except socket.error, e: if e[0] != errno.EPIPE and not was_first: raise - if (response._content_len - and response._content_len != _LEN_CLOSE_IS_END): - outgoing_headers = sent_data + outgoing_headers - reconnect('read') # outgoing data if w and out: @@ -661,7 +619,7 @@ # close if the server response said to or responded before eating # the whole request if response is None: - response = self.response_class(self.sock, self.timeout) + response = self.response_class(self.sock, self.timeout, method) complete = response.complete() data_left = bool(outgoing_headers or body) if data_left: @@ -679,7 +637,8 @@ raise httplib.ResponseNotReady() r = self._current_response while r.headers is None: - r._select() + if not r._select() and not r.complete(): + raise _readers.HTTPRemoteClosedError() if r.will_close: self.sock = None self._current_response = None @@ -705,7 +664,7 @@ class HTTPStateError(httplib.HTTPException): """Invalid internal state encountered.""" - -class HTTPRemoteClosedError(httplib.HTTPException): - """The server closed the remote socket in the middle of a response.""" +# Forward this exception type from _readers since it needs to be part +# of the public API. +HTTPRemoteClosedError = _readers.HTTPRemoteClosedError # no-check-code