mercurial-scm/hg: comparison mercurial/utils/cborutil.py

equal deleted inserted replaced

-:e2697acd9381
+:62160d3077cd
 TODO consider adding limits as to the maximum amount of data that can
 be buffered.
 """
 def __init__(self):
 self._decoder = sansiodecoder()
-self._leftover = None
+self._chunks = []
+self._wanted = 0
 def decode(self, b):
 """Attempt to decode bytes to CBOR values.
 Returns a tuple with the following fields:
 * Bool indicating whether new values are available for retrieval.
 * Integer number of bytes decoded from the new input.
 * Integer number of bytes wanted to decode the next value.
 """
+# Our strategy for buffering is to aggregate the incoming chunks in a
-if self._leftover:
+# list until we've received enough data to decode the next item.
-oldlen = len(self._leftover)
+# This is slightly more complicated than using an ``io.BytesIO``
-b = self._leftover + b
+# or continuously concatenating incoming data. However, because it
-self._leftover = None
+# isn't constantly reallocating backing memory for a growing buffer,
+# it prevents excessive memory thrashing and is significantly faster,
+# especially in cases where the percentage of input chunks that don't
+# decode into a full item is high.
+if self._chunks:
+# A previous call said we needed N bytes to decode the next item.
+# But this call doesn't provide enough data. We buffer the incoming
+# chunk without attempting to decode.
+if len(b) < self._wanted:
+self._chunks.append(b)
+self._wanted -= len(b)
+return False, 0, self._wanted
+# Else we may have enough data to decode the next item. Aggregate
+# old data with new and reset the buffer.
+newlen = len(b)
+self._chunks.append(b)
+b = b''.join(self._chunks)
+self._chunks = []
+oldlen = len(b) - newlen
 else:
-b = b
 oldlen = 0
 available, readcount, wanted = self._decoder.decode(b)
+self._wanted = wanted
 if readcount < len(b):
-self._leftover = b[readcount:]
+self._chunks.append(b[readcount:])
 return available, readcount - oldlen, wanted
 def getavailable(self):
 return self._decoder.getavailable()

changeset 40030	62160d3077cd
parent 39456	8d858fbf2759
child 40124	b638219a23c3