comparison mercurial/keepalive.py @ 30686:8352c42a0a0d

keepalive: don't concatenate strings when reading chunked transfer Surprisingly, this didn't appear to speed up HTTP-based stream cloning on my machine. I suspect this has more to do with the fact we're using small HTTP chunks and string concatenation overhead isn't so bad. However, the reasons for this change are solid: we know string concatenation can be a performance sink.
author Gregory Szorc <gregory.szorc@gmail.com>
date Wed, 07 Oct 2015 15:33:52 -0700
parents 88a448a12ae8
children 5d06f6b73a57
comparison
equal deleted inserted replaced
30685:95325386cd1a 30686:8352c42a0a0d
397 return s 397 return s
398 398
399 # stolen from Python SVN #68532 to fix issue1088 399 # stolen from Python SVN #68532 to fix issue1088
400 def _read_chunked(self, amt): 400 def _read_chunked(self, amt):
401 chunk_left = self.chunk_left 401 chunk_left = self.chunk_left
402 value = '' 402 parts = []
403 403
404 # XXX This accumulates chunks by repeated string concatenation,
405 # which is not efficient as the number or size of chunks gets big.
406 while True: 404 while True:
407 if chunk_left is None: 405 if chunk_left is None:
408 line = self.fp.readline() 406 line = self.fp.readline()
409 i = line.find(';') 407 i = line.find(';')
410 if i >= 0: 408 if i >= 0:
413 chunk_left = int(line, 16) 411 chunk_left = int(line, 16)
414 except ValueError: 412 except ValueError:
415 # close the connection as protocol synchronization is 413 # close the connection as protocol synchronization is
416 # probably lost 414 # probably lost
417 self.close() 415 self.close()
418 raise httplib.IncompleteRead(value) 416 raise httplib.IncompleteRead(''.join(parts))
419 if chunk_left == 0: 417 if chunk_left == 0:
420 break 418 break
421 if amt is None: 419 if amt is None:
422 value += self._safe_read(chunk_left) 420 parts.append(self._safe_read(chunk_left))
423 elif amt < chunk_left: 421 elif amt < chunk_left:
424 value += self._safe_read(amt) 422 parts.append(self._safe_read(amt))
425 self.chunk_left = chunk_left - amt 423 self.chunk_left = chunk_left - amt
426 return value 424 return ''.join(parts)
427 elif amt == chunk_left: 425 elif amt == chunk_left:
428 value += self._safe_read(amt) 426 parts.append(self._safe_read(amt))
429 self._safe_read(2) # toss the CRLF at the end of the chunk 427 self._safe_read(2) # toss the CRLF at the end of the chunk
430 self.chunk_left = None 428 self.chunk_left = None
431 return value 429 return ''.join(parts)
432 else: 430 else:
433 value += self._safe_read(chunk_left) 431 parts.append(self._safe_read(chunk_left))
434 amt -= chunk_left 432 amt -= chunk_left
435 433
436 # we read the whole chunk, get another 434 # we read the whole chunk, get another
437 self._safe_read(2) # toss the CRLF at the end of the chunk 435 self._safe_read(2) # toss the CRLF at the end of the chunk
438 chunk_left = None 436 chunk_left = None
449 break 447 break
450 448
451 # we read everything; close the "file" 449 # we read everything; close the "file"
452 self.close() 450 self.close()
453 451
454 return value 452 return ''.join(parts)
455 453
456 def readline(self, limit=-1): 454 def readline(self, limit=-1):
457 i = self._rbuf.find('\n') 455 i = self._rbuf.find('\n')
458 while i < 0 and not (0 < limit <= len(self._rbuf)): 456 while i < 0 and not (0 < limit <= len(self._rbuf)):
459 new = self._raw_read(self._rbufsize) 457 new = self._raw_read(self._rbufsize)