mercurial/utils/cborutil.py
changeset 39411 aeb551a3bb8a
parent 37898 2ae6a3134362
child 39412 a40d3da89b7d
--- a/mercurial/utils/cborutil.py	Tue Aug 28 15:22:06 2018 -0700
+++ b/mercurial/utils/cborutil.py	Tue Aug 28 15:02:48 2018 -0700
@@ -8,6 +8,7 @@
 from __future__ import absolute_import
 
 import struct
+import sys
 
 from ..thirdparty.cbor.cbor2 import (
     decoder as decodermod,
@@ -35,11 +36,16 @@
 
 SUBTYPE_MASK = 0b00011111
 
+SUBTYPE_FALSE = 20
+SUBTYPE_TRUE = 21
+SUBTYPE_NULL = 22
 SUBTYPE_HALF_FLOAT = 25
 SUBTYPE_SINGLE_FLOAT = 26
 SUBTYPE_DOUBLE_FLOAT = 27
 SUBTYPE_INDEFINITE = 31
 
+SEMANTIC_TAG_FINITE_SET = 258
+
 # Indefinite types begin with their major type ORd with information value 31.
 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
     r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
@@ -146,7 +152,7 @@
 def streamencodeset(s):
     # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
     # semantic tag 258 for finite sets.
-    yield encodelength(MAJOR_TYPE_SEMANTIC, 258)
+    yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
 
     for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
         yield chunk
@@ -260,3 +266,710 @@
                     len(chunk), length))
 
         yield chunk
+
+class CBORDecodeError(Exception):
+    """Represents an error decoding CBOR."""
+
+if sys.version_info.major >= 3:
+    def _elementtointeger(b, i):
+        return b[i]
+else:
+    def _elementtointeger(b, i):
+        return ord(b[i])
+
+STRUCT_BIG_UBYTE = struct.Struct(r'>B')
+STRUCT_BIG_USHORT = struct.Struct('>H')
+STRUCT_BIG_ULONG = struct.Struct('>L')
+STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
+
+SPECIAL_NONE = 0
+SPECIAL_START_INDEFINITE_BYTESTRING = 1
+SPECIAL_START_ARRAY = 2
+SPECIAL_START_MAP = 3
+SPECIAL_START_SET = 4
+SPECIAL_INDEFINITE_BREAK = 5
+
+def decodeitem(b, offset=0):
+    """Decode a new CBOR value from a buffer at offset.
+
+    This function attempts to decode up to one complete CBOR value
+    from ``b`` starting at offset ``offset``.
+
+    The beginning of a collection (such as an array, map, set, or
+    indefinite length bytestring) counts as a single value. For these
+    special cases, a state flag will indicate that a special value was seen.
+
+    When called, the function either returns a decoded value or gives
+    a hint as to how many more bytes are needed to do so. By calling
+    the function repeatedly given a stream of bytes, the caller can
+    build up the original values.
+
+    Returns a tuple with the following elements:
+
+    * Bool indicating whether a complete value was decoded.
+    * A decoded value if first value is True otherwise None
+    * Integer number of bytes. If positive, the number of bytes
+      read. If negative, the number of bytes we need to read to
+      decode this value or the next chunk in this value.
+    * One of the ``SPECIAL_*`` constants indicating special treatment
+      for this value. ``SPECIAL_NONE`` means this is a fully decoded
+      simple value (such as an integer or bool).
+    """
+
+    initial = _elementtointeger(b, offset)
+    offset += 1
+
+    majortype = initial >> 5
+    subtype = initial & SUBTYPE_MASK
+
+    if majortype == MAJOR_TYPE_UINT:
+        complete, value, readcount = decodeuint(subtype, b, offset)
+
+        if complete:
+            return True, value, readcount + 1, SPECIAL_NONE
+        else:
+            return False, None, readcount, SPECIAL_NONE
+
+    elif majortype == MAJOR_TYPE_NEGINT:
+        # Negative integers are the same as UINT except inverted minus 1.
+        complete, value, readcount = decodeuint(subtype, b, offset)
+
+        if complete:
+            return True, -value - 1, readcount + 1, SPECIAL_NONE
+        else:
+            return False, None, readcount, SPECIAL_NONE
+
+    elif majortype == MAJOR_TYPE_BYTESTRING:
+        # Beginning of bytestrings are treated as uints in order to
+        # decode their length, which may be indefinite.
+        complete, size, readcount = decodeuint(subtype, b, offset,
+                                               allowindefinite=True)
+
+        # We don't know the size of the bytestring. It must be a definitive
+        # length since the indefinite subtype would be encoded in the initial
+        # byte.
+        if not complete:
+            return False, None, readcount, SPECIAL_NONE
+
+        # We know the length of the bytestring.
+        if size is not None:
+            # And the data is available in the buffer.
+            if offset + readcount + size <= len(b):
+                value = b[offset + readcount:offset + readcount + size]
+                return True, value, readcount + size + 1, SPECIAL_NONE
+
+            # And we need more data in order to return the bytestring.
+            else:
+                wanted = len(b) - offset - readcount - size
+                return False, None, wanted, SPECIAL_NONE
+
+        # It is an indefinite length bytestring.
+        else:
+            return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
+
+    elif majortype == MAJOR_TYPE_STRING:
+        raise CBORDecodeError('string major type not supported')
+
+    elif majortype == MAJOR_TYPE_ARRAY:
+        # Beginning of arrays are treated as uints in order to decode their
+        # length. We don't allow indefinite length arrays.
+        complete, size, readcount = decodeuint(subtype, b, offset)
+
+        if complete:
+            return True, size, readcount + 1, SPECIAL_START_ARRAY
+        else:
+            return False, None, readcount, SPECIAL_NONE
+
+    elif majortype == MAJOR_TYPE_MAP:
+        # Beginning of maps are treated as uints in order to decode their
+        # number of elements. We don't allow indefinite length arrays.
+        complete, size, readcount = decodeuint(subtype, b, offset)
+
+        if complete:
+            return True, size, readcount + 1, SPECIAL_START_MAP
+        else:
+            return False, None, readcount, SPECIAL_NONE
+
+    elif majortype == MAJOR_TYPE_SEMANTIC:
+        # Semantic tag value is read the same as a uint.
+        complete, tagvalue, readcount = decodeuint(subtype, b, offset)
+
+        if not complete:
+            return False, None, readcount, SPECIAL_NONE
+
+        # This behavior here is a little wonky. The main type being "decorated"
+        # by this semantic tag follows. A more robust parser would probably emit
+        # a special flag indicating this as a semantic tag and let the caller
+        # deal with the types that follow. But since we don't support many
+        # semantic tags, it is easier to deal with the special cases here and
+        # hide complexity from the caller. If we add support for more semantic
+        # tags, we should probably move semantic tag handling into the caller.
+        if tagvalue == SEMANTIC_TAG_FINITE_SET:
+            if offset + readcount >= len(b):
+                return False, None, -1, SPECIAL_NONE
+
+            complete, size, readcount2, special = decodeitem(b,
+                                                             offset + readcount)
+
+            if not complete:
+                return False, None, readcount2, SPECIAL_NONE
+
+            if special != SPECIAL_START_ARRAY:
+                raise CBORDecodeError('expected array after finite set '
+                                      'semantic tag')
+
+            return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
+
+        else:
+            raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
+
+    elif majortype == MAJOR_TYPE_SPECIAL:
+        # Only specific values for the information field are allowed.
+        if subtype == SUBTYPE_FALSE:
+            return True, False, 1, SPECIAL_NONE
+        elif subtype == SUBTYPE_TRUE:
+            return True, True, 1, SPECIAL_NONE
+        elif subtype == SUBTYPE_NULL:
+            return True, None, 1, SPECIAL_NONE
+        elif subtype == SUBTYPE_INDEFINITE:
+            return True, None, 1, SPECIAL_INDEFINITE_BREAK
+        # If value is 24, subtype is in next byte.
+        else:
+            raise CBORDecodeError('special type %d not allowed' % subtype)
+    else:
+        assert False
+
+def decodeuint(subtype, b, offset=0, allowindefinite=False):
+    """Decode an unsigned integer.
+
+    ``subtype`` is the lower 5 bits from the initial byte CBOR item
+    "header." ``b`` is a buffer containing bytes. ``offset`` points to
+    the index of the first byte after the byte that ``subtype`` was
+    derived from.
+
+    ``allowindefinite`` allows the special indefinite length value
+    indicator.
+
+    Returns a 3-tuple of (successful, value, count).
+
+    The first element is a bool indicating if decoding completed. The 2nd
+    is the decoded integer value or None if not fully decoded or the subtype
+    is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
+    If positive, it is the number of additional bytes decoded. If negative,
+    it is the number of additional bytes needed to decode this value.
+    """
+
+    # Small values are inline.
+    if subtype < 24:
+        return True, subtype, 0
+    # Indefinite length specifier.
+    elif subtype == 31:
+        if allowindefinite:
+            return True, None, 0
+        else:
+            raise CBORDecodeError('indefinite length uint not allowed here')
+    elif subtype >= 28:
+        raise CBORDecodeError('unsupported subtype on integer type: %d' %
+                              subtype)
+
+    if subtype == 24:
+        s = STRUCT_BIG_UBYTE
+    elif subtype == 25:
+        s = STRUCT_BIG_USHORT
+    elif subtype == 26:
+        s = STRUCT_BIG_ULONG
+    elif subtype == 27:
+        s = STRUCT_BIG_ULONGLONG
+    else:
+        raise CBORDecodeError('bounds condition checking violation')
+
+    if len(b) - offset >= s.size:
+        return True, s.unpack_from(b, offset)[0], s.size
+    else:
+        return False, None, len(b) - offset - s.size
+
+class bytestringchunk(bytes):
+    """Represents a chunk/segment in an indefinite length bytestring.
+
+    This behaves like a ``bytes`` but in addition has the ``isfirst``
+    and ``islast`` attributes indicating whether this chunk is the first
+    or last in an indefinite length bytestring.
+    """
+
+    def __new__(cls, v, first=False, last=False):
+        self = bytes.__new__(cls, v)
+        self.isfirst = first
+        self.islast = last
+
+        return self
+
+class sansiodecoder(object):
+    """A CBOR decoder that doesn't perform its own I/O.
+
+    To use, construct an instance and feed it segments containing
+    CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
+    indicates whether a fully-decoded value is available, how many bytes
+    were consumed, and offers a hint as to how many bytes should be fed
+    in next time to decode the next value.
+
+    The decoder assumes it will decode N discrete CBOR values, not just
+    a single value. i.e. if the bytestream contains uints packed one after
+    the other, the decoder will decode them all, rather than just the initial
+    one.
+
+    When ``decode()`` indicates a value is available, call ``getavailable()``
+    to return all fully decoded values.
+
+    ``decode()`` can partially decode input. It is up to the caller to keep
+    track of what data was consumed and to pass unconsumed data in on the
+    next invocation.
+
+    The decoder decodes atomically at the *item* level. See ``decodeitem()``.
+    If an *item* cannot be fully decoded, the decoder won't record it as
+    partially consumed. Instead, the caller will be instructed to pass in
+    the initial bytes of this item on the next invocation. This does result
+    in some redundant parsing. But the overhead should be minimal.
+
+    This decoder only supports a subset of CBOR as required by Mercurial.
+    It lacks support for:
+
+    * Indefinite length arrays
+    * Indefinite length maps
+    * Use of indefinite length bytestrings as keys or values within
+      arrays, maps, or sets.
+    * Nested arrays, maps, or sets within sets
+    * Any semantic tag that isn't a mathematical finite set
+    * Floating point numbers
+    * Undefined special value
+
+    CBOR types are decoded to Python types as follows:
+
+    uint -> int
+    negint -> int
+    bytestring -> bytes
+    map -> dict
+    array -> list
+    True -> bool
+    False -> bool
+    null -> None
+    indefinite length bytestring chunk -> [bytestringchunk]
+
+    The only non-obvious mapping here is an indefinite length bytestring
+    to the ``bytestringchunk`` type. This is to facilitate streaming
+    indefinite length bytestrings out of the decoder and to differentiate
+    a regular bytestring from an indefinite length bytestring.
+    """
+
+    _STATE_NONE = 0
+    _STATE_WANT_MAP_KEY = 1
+    _STATE_WANT_MAP_VALUE = 2
+    _STATE_WANT_ARRAY_VALUE = 3
+    _STATE_WANT_SET_VALUE = 4
+    _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
+    _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
+
+    def __init__(self):
+        # TODO add support for limiting size of bytestrings
+        # TODO add support for limiting number of keys / values in collections
+        # TODO add support for limiting size of buffered partial values
+
+        self.decodedbytecount = 0
+
+        self._state = self._STATE_NONE
+
+        # Stack of active nested collections. Each entry is a dict describing
+        # the collection.
+        self._collectionstack = []
+
+        # Fully decoded key to use for the current map.
+        self._currentmapkey = None
+
+        # Fully decoded values available for retrieval.
+        self._decodedvalues = []
+
+    @property
+    def inprogress(self):
+        """Whether the decoder has partially decoded a value."""
+        return self._state != self._STATE_NONE
+
+    def decode(self, b, offset=0):
+        """Attempt to decode bytes from an input buffer.
+
+        ``b`` is a collection of bytes and ``offset`` is the byte
+        offset within that buffer from which to begin reading data.
+
+        ``b`` must support ``len()`` and accessing bytes slices via
+        ``__slice__``. Typically ``bytes`` instances are used.
+
+        Returns a tuple with the following fields:
+
+        * Bool indicating whether values are available for retrieval.
+        * Integer indicating the number of bytes that were fully consumed,
+          starting from ``offset``.
+        * Integer indicating the number of bytes that are desired for the
+          next call in order to decode an item.
+        """
+        if not b:
+            return bool(self._decodedvalues), 0, 0
+
+        initialoffset = offset
+
+        # We could easily split the body of this loop into a function. But
+        # Python performance is sensitive to function calls and collections
+        # are composed of many items. So leaving as a while loop could help
+        # with performance. One thing that may not help is the use of
+        # if..elif versus a lookup/dispatch table. There may be value
+        # in switching that.
+        while offset < len(b):
+            # Attempt to decode an item. This could be a whole value or a
+            # special value indicating an event, such as start or end of a
+            # collection or indefinite length type.
+            complete, value, readcount, special = decodeitem(b, offset)
+
+            if readcount > 0:
+                self.decodedbytecount += readcount
+
+            if not complete:
+                assert readcount < 0
+                return (
+                    bool(self._decodedvalues),
+                    offset - initialoffset,
+                    -readcount,
+                )
+
+            offset += readcount
+
+            # No nested state. We either have a full value or beginning of a
+            # complex value to deal with.
+            if self._state == self._STATE_NONE:
+                # A normal value.
+                if special == SPECIAL_NONE:
+                    self._decodedvalues.append(value)
+
+                elif special == SPECIAL_START_ARRAY:
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': [],
+                    })
+                    self._state = self._STATE_WANT_ARRAY_VALUE
+
+                elif special == SPECIAL_START_MAP:
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': {},
+                    })
+                    self._state = self._STATE_WANT_MAP_KEY
+
+                elif special == SPECIAL_START_SET:
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': set(),
+                    })
+                    self._state = self._STATE_WANT_SET_VALUE
+
+                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
+                    self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
+
+                else:
+                    raise CBORDecodeError('unhandled special state: %d' %
+                                          special)
+
+            # This value becomes an element of the current array.
+            elif self._state == self._STATE_WANT_ARRAY_VALUE:
+                # Simple values get appended.
+                if special == SPECIAL_NONE:
+                    c = self._collectionstack[-1]
+                    c['v'].append(value)
+                    c['remaining'] -= 1
+
+                    # self._state doesn't need changed.
+
+                # An array nested within an array.
+                elif special == SPECIAL_START_ARRAY:
+                    lastc = self._collectionstack[-1]
+                    newvalue = []
+
+                    lastc['v'].append(newvalue)
+                    lastc['remaining'] -= 1
+
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': newvalue,
+                    })
+
+                    # self._state doesn't need changed.
+
+                # A map nested within an array.
+                elif special == SPECIAL_START_MAP:
+                    lastc = self._collectionstack[-1]
+                    newvalue = {}
+
+                    lastc['v'].append(newvalue)
+                    lastc['remaining'] -= 1
+
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': newvalue
+                    })
+
+                    self._state = self._STATE_WANT_MAP_KEY
+
+                elif special == SPECIAL_START_SET:
+                    lastc = self._collectionstack[-1]
+                    newvalue = set()
+
+                    lastc['v'].append(newvalue)
+                    lastc['remaining'] -= 1
+
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': newvalue,
+                    })
+
+                    self._state = self._STATE_WANT_SET_VALUE
+
+                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
+                    raise CBORDecodeError('indefinite length bytestrings '
+                                          'not allowed as array values')
+
+                else:
+                    raise CBORDecodeError('unhandled special item when '
+                                          'expecting array value: %d' % special)
+
+            # This value becomes the key of the current map instance.
+            elif self._state == self._STATE_WANT_MAP_KEY:
+                if special == SPECIAL_NONE:
+                    self._currentmapkey = value
+                    self._state = self._STATE_WANT_MAP_VALUE
+
+                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
+                    raise CBORDecodeError('indefinite length bytestrings '
+                                          'not allowed as map keys')
+
+                elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,
+                                 SPECIAL_START_SET):
+                    raise CBORDecodeError('collections not supported as map '
+                                          'keys')
+
+                # We do not allow special values to be used as map keys.
+                else:
+                    raise CBORDecodeError('unhandled special item when '
+                                          'expecting map key: %d' % special)
+
+            # This value becomes the value of the current map key.
+            elif self._state == self._STATE_WANT_MAP_VALUE:
+                # Simple values simply get inserted into the map.
+                if special == SPECIAL_NONE:
+                    lastc = self._collectionstack[-1]
+                    lastc['v'][self._currentmapkey] = value
+                    lastc['remaining'] -= 1
+
+                    self._state = self._STATE_WANT_MAP_KEY
+
+                # A new array is used as the map value.
+                elif special == SPECIAL_START_ARRAY:
+                    lastc = self._collectionstack[-1]
+                    newvalue = []
+
+                    lastc['v'][self._currentmapkey] = newvalue
+                    lastc['remaining'] -= 1
+
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': newvalue,
+                    })
+
+                    self._state = self._STATE_WANT_ARRAY_VALUE
+
+                # A new map is used as the map value.
+                elif special == SPECIAL_START_MAP:
+                    lastc = self._collectionstack[-1]
+                    newvalue = {}
+
+                    lastc['v'][self._currentmapkey] = newvalue
+                    lastc['remaining'] -= 1
+
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': newvalue,
+                    })
+
+                    self._state = self._STATE_WANT_MAP_KEY
+
+                # A new set is used as the map value.
+                elif special == SPECIAL_START_SET:
+                    lastc = self._collectionstack[-1]
+                    newvalue = set()
+
+                    lastc['v'][self._currentmapkey] = newvalue
+                    lastc['remaining'] -= 1
+
+                    self._collectionstack.append({
+                        'remaining': value,
+                        'v': newvalue,
+                    })
+
+                    self._state = self._STATE_WANT_SET_VALUE
+
+                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
+                    raise CBORDecodeError('indefinite length bytestrings not '
+                                          'allowed as map values')
+
+                else:
+                    raise CBORDecodeError('unhandled special item when '
+                                          'expecting map value: %d' % special)
+
+                self._currentmapkey = None
+
+            # This value is added to the current set.
+            elif self._state == self._STATE_WANT_SET_VALUE:
+                if special == SPECIAL_NONE:
+                    lastc = self._collectionstack[-1]
+                    lastc['v'].add(value)
+                    lastc['remaining'] -= 1
+
+                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
+                    raise CBORDecodeError('indefinite length bytestrings not '
+                                          'allowed as set values')
+
+                elif special in (SPECIAL_START_ARRAY,
+                                 SPECIAL_START_MAP,
+                                 SPECIAL_START_SET):
+                    raise CBORDecodeError('collections not allowed as set '
+                                          'values')
+
+                # We don't allow non-trivial types to exist as set values.
+                else:
+                    raise CBORDecodeError('unhandled special item when '
+                                          'expecting set value: %d' % special)
+
+            # This value represents the first chunk in an indefinite length
+            # bytestring.
+            elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
+                # We received a full chunk.
+                if special == SPECIAL_NONE:
+                    self._decodedvalues.append(bytestringchunk(value,
+                                                               first=True))
+
+                    self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
+
+                # The end of stream marker. This means it is an empty
+                # indefinite length bytestring.
+                elif special == SPECIAL_INDEFINITE_BREAK:
+                    # We /could/ convert this to a b''. But we want to preserve
+                    # the nature of the underlying data so consumers expecting
+                    # an indefinite length bytestring get one.
+                    self._decodedvalues.append(bytestringchunk(b'',
+                                                               first=True,
+                                                               last=True))
+
+                    # Since indefinite length bytestrings can't be used in
+                    # collections, we must be at the root level.
+                    assert not self._collectionstack
+                    self._state = self._STATE_NONE
+
+                else:
+                    raise CBORDecodeError('unexpected special value when '
+                                          'expecting bytestring chunk: %d' %
+                                          special)
+
+            # This value represents the non-initial chunk in an indefinite
+            # length bytestring.
+            elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
+                # We received a full chunk.
+                if special == SPECIAL_NONE:
+                    self._decodedvalues.append(bytestringchunk(value))
+
+                # The end of stream marker.
+                elif special == SPECIAL_INDEFINITE_BREAK:
+                    self._decodedvalues.append(bytestringchunk(b'', last=True))
+
+                    # Since indefinite length bytestrings can't be used in
+                    # collections, we must be at the root level.
+                    assert not self._collectionstack
+                    self._state = self._STATE_NONE
+
+                else:
+                    raise CBORDecodeError('unexpected special value when '
+                                          'expecting bytestring chunk: %d' %
+                                          special)
+
+            else:
+                raise CBORDecodeError('unhandled decoder state: %d' %
+                                      self._state)
+
+            # We could have just added the final value in a collection. End
+            # all complete collections at the top of the stack.
+            while True:
+                # Bail if we're not waiting on a new collection item.
+                if self._state not in (self._STATE_WANT_ARRAY_VALUE,
+                                       self._STATE_WANT_MAP_KEY,
+                                       self._STATE_WANT_SET_VALUE):
+                    break
+
+                # Or we are expecting more items for this collection.
+                lastc = self._collectionstack[-1]
+
+                if lastc['remaining']:
+                    break
+
+                # The collection at the top of the stack is complete.
+
+                # Discard it, as it isn't needed for future items.
+                self._collectionstack.pop()
+
+                # If this is a nested collection, we don't emit it, since it
+                # will be emitted by its parent collection. But we do need to
+                # update state to reflect what the new top-most collection
+                # on the stack is.
+                if self._collectionstack:
+                    self._state = {
+                        list: self._STATE_WANT_ARRAY_VALUE,
+                        dict: self._STATE_WANT_MAP_KEY,
+                        set: self._STATE_WANT_SET_VALUE,
+                    }[type(self._collectionstack[-1]['v'])]
+
+                # If this is the root collection, emit it.
+                else:
+                    self._decodedvalues.append(lastc['v'])
+                    self._state = self._STATE_NONE
+
+        return (
+            bool(self._decodedvalues),
+            offset - initialoffset,
+            0,
+        )
+
+    def getavailable(self):
+        """Returns an iterator over fully decoded values.
+
+        Once values are retrieved, they won't be available on the next call.
+        """
+
+        l = list(self._decodedvalues)
+        self._decodedvalues = []
+        return l
+
+def decodeall(b):
+    """Decode all CBOR items present in an iterable of bytes.
+
+    In addition to regular decode errors, raises CBORDecodeError if the
+    entirety of the passed buffer does not fully decode to complete CBOR
+    values. This includes failure to decode any value, incomplete collection
+    types, incomplete indefinite length items, and extra data at the end of
+    the buffer.
+    """
+    if not b:
+        return []
+
+    decoder = sansiodecoder()
+
+    havevalues, readcount, wantbytes = decoder.decode(b)
+
+    if readcount != len(b):
+        raise CBORDecodeError('input data not fully consumed')
+
+    if decoder.inprogress:
+        raise CBORDecodeError('input data not complete')
+
+    return decoder.getavailable()