mercurial/utils/cborutil.py
changeset 43076 2372284d9457
parent 42480 b6387a65851d
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    44 
    44 
    45 SEMANTIC_TAG_FINITE_SET = 258
    45 SEMANTIC_TAG_FINITE_SET = 258
    46 
    46 
    47 # Indefinite types begin with their major type ORd with information value 31.
    47 # Indefinite types begin with their major type ORd with information value 31.
    48 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
    48 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
    49     r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
    49     r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE
       
    50 )
    50 BEGIN_INDEFINITE_ARRAY = struct.pack(
    51 BEGIN_INDEFINITE_ARRAY = struct.pack(
    51     r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
    52     r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE
       
    53 )
    52 BEGIN_INDEFINITE_MAP = struct.pack(
    54 BEGIN_INDEFINITE_MAP = struct.pack(
    53     r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
    55     r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE
       
    56 )
    54 
    57 
    55 ENCODED_LENGTH_1 = struct.Struct(r'>B')
    58 ENCODED_LENGTH_1 = struct.Struct(r'>B')
    56 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
    59 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
    57 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
    60 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
    58 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
    61 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
    59 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
    62 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
    60 
    63 
    61 # The break ends an indefinite length item.
    64 # The break ends an indefinite length item.
    62 BREAK = b'\xff'
    65 BREAK = b'\xff'
    63 BREAK_INT = 255
    66 BREAK_INT = 255
       
    67 
    64 
    68 
    65 def encodelength(majortype, length):
    69 def encodelength(majortype, length):
    66     """Obtain a value encoding the major type and its length."""
    70     """Obtain a value encoding the major type and its length."""
    67     if length < 24:
    71     if length < 24:
    68         return ENCODED_LENGTH_1.pack(majortype << 5 | length)
    72         return ENCODED_LENGTH_1.pack(majortype << 5 | length)
    73     elif length < 4294967296:
    77     elif length < 4294967296:
    74         return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
    78         return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
    75     else:
    79     else:
    76         return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
    80         return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
    77 
    81 
       
    82 
    78 def streamencodebytestring(v):
    83 def streamencodebytestring(v):
    79     yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
    84     yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
    80     yield v
    85     yield v
       
    86 
    81 
    87 
    82 def streamencodebytestringfromiter(it):
    88 def streamencodebytestringfromiter(it):
    83     """Convert an iterator of chunks to an indefinite bytestring.
    89     """Convert an iterator of chunks to an indefinite bytestring.
    84 
    90 
    85     Given an input that is iterable and each element in the iterator is
    91     Given an input that is iterable and each element in the iterator is
    91         yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
    97         yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
    92         yield chunk
    98         yield chunk
    93 
    99 
    94     yield BREAK
   100     yield BREAK
    95 
   101 
       
   102 
    96 def streamencodeindefinitebytestring(source, chunksize=65536):
   103 def streamencodeindefinitebytestring(source, chunksize=65536):
    97     """Given a large source buffer, emit as an indefinite length bytestring.
   104     """Given a large source buffer, emit as an indefinite length bytestring.
    98 
   105 
    99     This is a generator of chunks constituting the encoded CBOR data.
   106     This is a generator of chunks constituting the encoded CBOR data.
   100     """
   107     """
   102 
   109 
   103     i = 0
   110     i = 0
   104     l = len(source)
   111     l = len(source)
   105 
   112 
   106     while True:
   113     while True:
   107         chunk = source[i:i + chunksize]
   114         chunk = source[i : i + chunksize]
   108         i += len(chunk)
   115         i += len(chunk)
   109 
   116 
   110         yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
   117         yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
   111         yield chunk
   118         yield chunk
   112 
   119 
   113         if i >= l:
   120         if i >= l:
   114             break
   121             break
   115 
   122 
   116     yield BREAK
   123     yield BREAK
       
   124 
   117 
   125 
   118 def streamencodeint(v):
   126 def streamencodeint(v):
   119     if v >= 18446744073709551616 or v < -18446744073709551616:
   127     if v >= 18446744073709551616 or v < -18446744073709551616:
   120         raise ValueError('big integers not supported')
   128         raise ValueError('big integers not supported')
   121 
   129 
   122     if v >= 0:
   130     if v >= 0:
   123         yield encodelength(MAJOR_TYPE_UINT, v)
   131         yield encodelength(MAJOR_TYPE_UINT, v)
   124     else:
   132     else:
   125         yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
   133         yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
   126 
   134 
       
   135 
   127 def streamencodearray(l):
   136 def streamencodearray(l):
   128     """Encode a known size iterable to an array."""
   137     """Encode a known size iterable to an array."""
   129 
   138 
   130     yield encodelength(MAJOR_TYPE_ARRAY, len(l))
   139     yield encodelength(MAJOR_TYPE_ARRAY, len(l))
   131 
   140 
   132     for i in l:
   141     for i in l:
   133         for chunk in streamencode(i):
   142         for chunk in streamencode(i):
   134             yield chunk
   143             yield chunk
   135 
   144 
       
   145 
   136 def streamencodearrayfromiter(it):
   146 def streamencodearrayfromiter(it):
   137     """Encode an iterator of items to an indefinite length array."""
   147     """Encode an iterator of items to an indefinite length array."""
   138 
   148 
   139     yield BEGIN_INDEFINITE_ARRAY
   149     yield BEGIN_INDEFINITE_ARRAY
   140 
   150 
   142         for chunk in streamencode(i):
   152         for chunk in streamencode(i):
   143             yield chunk
   153             yield chunk
   144 
   154 
   145     yield BREAK
   155     yield BREAK
   146 
   156 
       
   157 
   147 def _mixedtypesortkey(v):
   158 def _mixedtypesortkey(v):
   148     return type(v).__name__, v
   159     return type(v).__name__, v
       
   160 
   149 
   161 
   150 def streamencodeset(s):
   162 def streamencodeset(s):
   151     # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
   163     # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
   152     # semantic tag 258 for finite sets.
   164     # semantic tag 258 for finite sets.
   153     yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
   165     yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
   154 
   166 
   155     for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
   167     for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
   156         yield chunk
   168         yield chunk
   157 
   169 
       
   170 
   158 def streamencodemap(d):
   171 def streamencodemap(d):
   159     """Encode dictionary to a generator.
   172     """Encode dictionary to a generator.
   160 
   173 
   161     Does not supporting indefinite length dictionaries.
   174     Does not supporting indefinite length dictionaries.
   162     """
   175     """
   163     yield encodelength(MAJOR_TYPE_MAP, len(d))
   176     yield encodelength(MAJOR_TYPE_MAP, len(d))
   164 
   177 
   165     for key, value in sorted(d.iteritems(),
   178     for key, value in sorted(
   166                              key=lambda x: _mixedtypesortkey(x[0])):
   179         d.iteritems(), key=lambda x: _mixedtypesortkey(x[0])
       
   180     ):
   167         for chunk in streamencode(key):
   181         for chunk in streamencode(key):
   168             yield chunk
   182             yield chunk
   169         for chunk in streamencode(value):
   183         for chunk in streamencode(value):
   170             yield chunk
   184             yield chunk
       
   185 
   171 
   186 
   172 def streamencodemapfromiter(it):
   187 def streamencodemapfromiter(it):
   173     """Given an iterable of (key, value), encode to an indefinite length map."""
   188     """Given an iterable of (key, value), encode to an indefinite length map."""
   174     yield BEGIN_INDEFINITE_MAP
   189     yield BEGIN_INDEFINITE_MAP
   175 
   190 
   179         for chunk in streamencode(value):
   194         for chunk in streamencode(value):
   180             yield chunk
   195             yield chunk
   181 
   196 
   182     yield BREAK
   197     yield BREAK
   183 
   198 
       
   199 
   184 def streamencodebool(b):
   200 def streamencodebool(b):
   185     # major type 7, simple value 20 and 21.
   201     # major type 7, simple value 20 and 21.
   186     yield b'\xf5' if b else b'\xf4'
   202     yield b'\xf5' if b else b'\xf4'
   187 
   203 
       
   204 
   188 def streamencodenone(v):
   205 def streamencodenone(v):
   189     # major type 7, simple value 22.
   206     # major type 7, simple value 22.
   190     yield b'\xf6'
   207     yield b'\xf6'
       
   208 
   191 
   209 
   192 STREAM_ENCODERS = {
   210 STREAM_ENCODERS = {
   193     bytes: streamencodebytestring,
   211     bytes: streamencodebytestring,
   194     int: streamencodeint,
   212     int: streamencodeint,
   195     pycompat.long: streamencodeint,
   213     pycompat.long: streamencodeint,
   199     set: streamencodeset,
   217     set: streamencodeset,
   200     bool: streamencodebool,
   218     bool: streamencodebool,
   201     type(None): streamencodenone,
   219     type(None): streamencodenone,
   202 }
   220 }
   203 
   221 
       
   222 
   204 def streamencode(v):
   223 def streamencode(v):
   205     """Encode a value in a streaming manner.
   224     """Encode a value in a streaming manner.
   206 
   225 
   207     Given an input object, encode it to CBOR recursively.
   226     Given an input object, encode it to CBOR recursively.
   208 
   227 
   224     if not fn:
   243     if not fn:
   225         raise ValueError('do not know how to encode %s' % type(v))
   244         raise ValueError('do not know how to encode %s' % type(v))
   226 
   245 
   227     return fn(v)
   246     return fn(v)
   228 
   247 
       
   248 
   229 class CBORDecodeError(Exception):
   249 class CBORDecodeError(Exception):
   230     """Represents an error decoding CBOR."""
   250     """Represents an error decoding CBOR."""
   231 
   251 
       
   252 
   232 if sys.version_info.major >= 3:
   253 if sys.version_info.major >= 3:
       
   254 
   233     def _elementtointeger(b, i):
   255     def _elementtointeger(b, i):
   234         return b[i]
   256         return b[i]
       
   257 
       
   258 
   235 else:
   259 else:
       
   260 
   236     def _elementtointeger(b, i):
   261     def _elementtointeger(b, i):
   237         return ord(b[i])
   262         return ord(b[i])
       
   263 
   238 
   264 
   239 STRUCT_BIG_UBYTE = struct.Struct(r'>B')
   265 STRUCT_BIG_UBYTE = struct.Struct(r'>B')
   240 STRUCT_BIG_USHORT = struct.Struct('>H')
   266 STRUCT_BIG_USHORT = struct.Struct('>H')
   241 STRUCT_BIG_ULONG = struct.Struct('>L')
   267 STRUCT_BIG_ULONG = struct.Struct('>L')
   242 STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
   268 STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
   245 SPECIAL_START_INDEFINITE_BYTESTRING = 1
   271 SPECIAL_START_INDEFINITE_BYTESTRING = 1
   246 SPECIAL_START_ARRAY = 2
   272 SPECIAL_START_ARRAY = 2
   247 SPECIAL_START_MAP = 3
   273 SPECIAL_START_MAP = 3
   248 SPECIAL_START_SET = 4
   274 SPECIAL_START_SET = 4
   249 SPECIAL_INDEFINITE_BREAK = 5
   275 SPECIAL_INDEFINITE_BREAK = 5
       
   276 
   250 
   277 
   251 def decodeitem(b, offset=0):
   278 def decodeitem(b, offset=0):
   252     """Decode a new CBOR value from a buffer at offset.
   279     """Decode a new CBOR value from a buffer at offset.
   253 
   280 
   254     This function attempts to decode up to one complete CBOR value
   281     This function attempts to decode up to one complete CBOR value
   299             return False, None, readcount, SPECIAL_NONE
   326             return False, None, readcount, SPECIAL_NONE
   300 
   327 
   301     elif majortype == MAJOR_TYPE_BYTESTRING:
   328     elif majortype == MAJOR_TYPE_BYTESTRING:
   302         # Beginning of bytestrings are treated as uints in order to
   329         # Beginning of bytestrings are treated as uints in order to
   303         # decode their length, which may be indefinite.
   330         # decode their length, which may be indefinite.
   304         complete, size, readcount = decodeuint(subtype, b, offset,
   331         complete, size, readcount = decodeuint(
   305                                                allowindefinite=True)
   332             subtype, b, offset, allowindefinite=True
       
   333         )
   306 
   334 
   307         # We don't know the size of the bytestring. It must be a definitive
   335         # We don't know the size of the bytestring. It must be a definitive
   308         # length since the indefinite subtype would be encoded in the initial
   336         # length since the indefinite subtype would be encoded in the initial
   309         # byte.
   337         # byte.
   310         if not complete:
   338         if not complete:
   312 
   340 
   313         # We know the length of the bytestring.
   341         # We know the length of the bytestring.
   314         if size is not None:
   342         if size is not None:
   315             # And the data is available in the buffer.
   343             # And the data is available in the buffer.
   316             if offset + readcount + size <= len(b):
   344             if offset + readcount + size <= len(b):
   317                 value = b[offset + readcount:offset + readcount + size]
   345                 value = b[offset + readcount : offset + readcount + size]
   318                 return True, value, readcount + size + 1, SPECIAL_NONE
   346                 return True, value, readcount + size + 1, SPECIAL_NONE
   319 
   347 
   320             # And we need more data in order to return the bytestring.
   348             # And we need more data in order to return the bytestring.
   321             else:
   349             else:
   322                 wanted = len(b) - offset - readcount - size
   350                 wanted = len(b) - offset - readcount - size
   365         # tags, we should probably move semantic tag handling into the caller.
   393         # tags, we should probably move semantic tag handling into the caller.
   366         if tagvalue == SEMANTIC_TAG_FINITE_SET:
   394         if tagvalue == SEMANTIC_TAG_FINITE_SET:
   367             if offset + readcount >= len(b):
   395             if offset + readcount >= len(b):
   368                 return False, None, -1, SPECIAL_NONE
   396                 return False, None, -1, SPECIAL_NONE
   369 
   397 
   370             complete, size, readcount2, special = decodeitem(b,
   398             complete, size, readcount2, special = decodeitem(
   371                                                              offset + readcount)
   399                 b, offset + readcount
       
   400             )
   372 
   401 
   373             if not complete:
   402             if not complete:
   374                 return False, None, readcount2, SPECIAL_NONE
   403                 return False, None, readcount2, SPECIAL_NONE
   375 
   404 
   376             if special != SPECIAL_START_ARRAY:
   405             if special != SPECIAL_START_ARRAY:
   377                 raise CBORDecodeError('expected array after finite set '
   406                 raise CBORDecodeError(
   378                                       'semantic tag')
   407                     'expected array after finite set ' 'semantic tag'
       
   408                 )
   379 
   409 
   380             return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
   410             return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
   381 
   411 
   382         else:
   412         else:
   383             raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
   413             raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
   396         else:
   426         else:
   397             raise CBORDecodeError('special type %d not allowed' % subtype)
   427             raise CBORDecodeError('special type %d not allowed' % subtype)
   398     else:
   428     else:
   399         assert False
   429         assert False
   400 
   430 
       
   431 
   401 def decodeuint(subtype, b, offset=0, allowindefinite=False):
   432 def decodeuint(subtype, b, offset=0, allowindefinite=False):
   402     """Decode an unsigned integer.
   433     """Decode an unsigned integer.
   403 
   434 
   404     ``subtype`` is the lower 5 bits from the initial byte CBOR item
   435     ``subtype`` is the lower 5 bits from the initial byte CBOR item
   405     "header." ``b`` is a buffer containing bytes. ``offset`` points to
   436     "header." ``b`` is a buffer containing bytes. ``offset`` points to
   426         if allowindefinite:
   457         if allowindefinite:
   427             return True, None, 0
   458             return True, None, 0
   428         else:
   459         else:
   429             raise CBORDecodeError('indefinite length uint not allowed here')
   460             raise CBORDecodeError('indefinite length uint not allowed here')
   430     elif subtype >= 28:
   461     elif subtype >= 28:
   431         raise CBORDecodeError('unsupported subtype on integer type: %d' %
   462         raise CBORDecodeError(
   432                               subtype)
   463             'unsupported subtype on integer type: %d' % subtype
       
   464         )
   433 
   465 
   434     if subtype == 24:
   466     if subtype == 24:
   435         s = STRUCT_BIG_UBYTE
   467         s = STRUCT_BIG_UBYTE
   436     elif subtype == 25:
   468     elif subtype == 25:
   437         s = STRUCT_BIG_USHORT
   469         s = STRUCT_BIG_USHORT
   445     if len(b) - offset >= s.size:
   477     if len(b) - offset >= s.size:
   446         return True, s.unpack_from(b, offset)[0], s.size
   478         return True, s.unpack_from(b, offset)[0], s.size
   447     else:
   479     else:
   448         return False, None, len(b) - offset - s.size
   480         return False, None, len(b) - offset - s.size
   449 
   481 
       
   482 
   450 class bytestringchunk(bytes):
   483 class bytestringchunk(bytes):
   451     """Represents a chunk/segment in an indefinite length bytestring.
   484     """Represents a chunk/segment in an indefinite length bytestring.
   452 
   485 
   453     This behaves like a ``bytes`` but in addition has the ``isfirst``
   486     This behaves like a ``bytes`` but in addition has the ``isfirst``
   454     and ``islast`` attributes indicating whether this chunk is the first
   487     and ``islast`` attributes indicating whether this chunk is the first
   459         self = bytes.__new__(cls, v)
   492         self = bytes.__new__(cls, v)
   460         self.isfirst = first
   493         self.isfirst = first
   461         self.islast = last
   494         self.islast = last
   462 
   495 
   463         return self
   496         return self
       
   497 
   464 
   498 
   465 class sansiodecoder(object):
   499 class sansiodecoder(object):
   466     """A CBOR decoder that doesn't perform its own I/O.
   500     """A CBOR decoder that doesn't perform its own I/O.
   467 
   501 
   468     To use, construct an instance and feed it segments containing
   502     To use, construct an instance and feed it segments containing
   604                 # A normal value.
   638                 # A normal value.
   605                 if special == SPECIAL_NONE:
   639                 if special == SPECIAL_NONE:
   606                     self._decodedvalues.append(value)
   640                     self._decodedvalues.append(value)
   607 
   641 
   608                 elif special == SPECIAL_START_ARRAY:
   642                 elif special == SPECIAL_START_ARRAY:
   609                     self._collectionstack.append({
   643                     self._collectionstack.append(
   610                         'remaining': value,
   644                         {'remaining': value, 'v': [],}
   611                         'v': [],
   645                     )
   612                     })
       
   613                     self._state = self._STATE_WANT_ARRAY_VALUE
   646                     self._state = self._STATE_WANT_ARRAY_VALUE
   614 
   647 
   615                 elif special == SPECIAL_START_MAP:
   648                 elif special == SPECIAL_START_MAP:
   616                     self._collectionstack.append({
   649                     self._collectionstack.append(
   617                         'remaining': value,
   650                         {'remaining': value, 'v': {},}
   618                         'v': {},
   651                     )
   619                     })
       
   620                     self._state = self._STATE_WANT_MAP_KEY
   652                     self._state = self._STATE_WANT_MAP_KEY
   621 
   653 
   622                 elif special == SPECIAL_START_SET:
   654                 elif special == SPECIAL_START_SET:
   623                     self._collectionstack.append({
   655                     self._collectionstack.append(
   624                         'remaining': value,
   656                         {'remaining': value, 'v': set(),}
   625                         'v': set(),
   657                     )
   626                     })
       
   627                     self._state = self._STATE_WANT_SET_VALUE
   658                     self._state = self._STATE_WANT_SET_VALUE
   628 
   659 
   629                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   660                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   630                     self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
   661                     self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
   631 
   662 
   632                 else:
   663                 else:
   633                     raise CBORDecodeError('unhandled special state: %d' %
   664                     raise CBORDecodeError(
   634                                           special)
   665                         'unhandled special state: %d' % special
       
   666                     )
   635 
   667 
   636             # This value becomes an element of the current array.
   668             # This value becomes an element of the current array.
   637             elif self._state == self._STATE_WANT_ARRAY_VALUE:
   669             elif self._state == self._STATE_WANT_ARRAY_VALUE:
   638                 # Simple values get appended.
   670                 # Simple values get appended.
   639                 if special == SPECIAL_NONE:
   671                 if special == SPECIAL_NONE:
   649                     newvalue = []
   681                     newvalue = []
   650 
   682 
   651                     lastc['v'].append(newvalue)
   683                     lastc['v'].append(newvalue)
   652                     lastc['remaining'] -= 1
   684                     lastc['remaining'] -= 1
   653 
   685 
   654                     self._collectionstack.append({
   686                     self._collectionstack.append(
   655                         'remaining': value,
   687                         {'remaining': value, 'v': newvalue,}
   656                         'v': newvalue,
   688                     )
   657                     })
       
   658 
   689 
   659                     # self._state doesn't need changed.
   690                     # self._state doesn't need changed.
   660 
   691 
   661                 # A map nested within an array.
   692                 # A map nested within an array.
   662                 elif special == SPECIAL_START_MAP:
   693                 elif special == SPECIAL_START_MAP:
   664                     newvalue = {}
   695                     newvalue = {}
   665 
   696 
   666                     lastc['v'].append(newvalue)
   697                     lastc['v'].append(newvalue)
   667                     lastc['remaining'] -= 1
   698                     lastc['remaining'] -= 1
   668 
   699 
   669                     self._collectionstack.append({
   700                     self._collectionstack.append(
   670                         'remaining': value,
   701                         {'remaining': value, 'v': newvalue}
   671                         'v': newvalue
   702                     )
   672                     })
       
   673 
   703 
   674                     self._state = self._STATE_WANT_MAP_KEY
   704                     self._state = self._STATE_WANT_MAP_KEY
   675 
   705 
   676                 elif special == SPECIAL_START_SET:
   706                 elif special == SPECIAL_START_SET:
   677                     lastc = self._collectionstack[-1]
   707                     lastc = self._collectionstack[-1]
   678                     newvalue = set()
   708                     newvalue = set()
   679 
   709 
   680                     lastc['v'].append(newvalue)
   710                     lastc['v'].append(newvalue)
   681                     lastc['remaining'] -= 1
   711                     lastc['remaining'] -= 1
   682 
   712 
   683                     self._collectionstack.append({
   713                     self._collectionstack.append(
   684                         'remaining': value,
   714                         {'remaining': value, 'v': newvalue,}
   685                         'v': newvalue,
   715                     )
   686                     })
       
   687 
   716 
   688                     self._state = self._STATE_WANT_SET_VALUE
   717                     self._state = self._STATE_WANT_SET_VALUE
   689 
   718 
   690                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   719                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   691                     raise CBORDecodeError('indefinite length bytestrings '
   720                     raise CBORDecodeError(
   692                                           'not allowed as array values')
   721                         'indefinite length bytestrings '
       
   722                         'not allowed as array values'
       
   723                     )
   693 
   724 
   694                 else:
   725                 else:
   695                     raise CBORDecodeError('unhandled special item when '
   726                     raise CBORDecodeError(
   696                                           'expecting array value: %d' % special)
   727                         'unhandled special item when '
       
   728                         'expecting array value: %d' % special
       
   729                     )
   697 
   730 
   698             # This value becomes the key of the current map instance.
   731             # This value becomes the key of the current map instance.
   699             elif self._state == self._STATE_WANT_MAP_KEY:
   732             elif self._state == self._STATE_WANT_MAP_KEY:
   700                 if special == SPECIAL_NONE:
   733                 if special == SPECIAL_NONE:
   701                     self._currentmapkey = value
   734                     self._currentmapkey = value
   702                     self._state = self._STATE_WANT_MAP_VALUE
   735                     self._state = self._STATE_WANT_MAP_VALUE
   703 
   736 
   704                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   737                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   705                     raise CBORDecodeError('indefinite length bytestrings '
   738                     raise CBORDecodeError(
   706                                           'not allowed as map keys')
   739                         'indefinite length bytestrings '
   707 
   740                         'not allowed as map keys'
   708                 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,
   741                     )
   709                                  SPECIAL_START_SET):
   742 
   710                     raise CBORDecodeError('collections not supported as map '
   743                 elif special in (
   711                                           'keys')
   744                     SPECIAL_START_ARRAY,
       
   745                     SPECIAL_START_MAP,
       
   746                     SPECIAL_START_SET,
       
   747                 ):
       
   748                     raise CBORDecodeError(
       
   749                         'collections not supported as map ' 'keys'
       
   750                     )
   712 
   751 
   713                 # We do not allow special values to be used as map keys.
   752                 # We do not allow special values to be used as map keys.
   714                 else:
   753                 else:
   715                     raise CBORDecodeError('unhandled special item when '
   754                     raise CBORDecodeError(
   716                                           'expecting map key: %d' % special)
   755                         'unhandled special item when '
       
   756                         'expecting map key: %d' % special
       
   757                     )
   717 
   758 
   718             # This value becomes the value of the current map key.
   759             # This value becomes the value of the current map key.
   719             elif self._state == self._STATE_WANT_MAP_VALUE:
   760             elif self._state == self._STATE_WANT_MAP_VALUE:
   720                 # Simple values simply get inserted into the map.
   761                 # Simple values simply get inserted into the map.
   721                 if special == SPECIAL_NONE:
   762                 if special == SPECIAL_NONE:
   731                     newvalue = []
   772                     newvalue = []
   732 
   773 
   733                     lastc['v'][self._currentmapkey] = newvalue
   774                     lastc['v'][self._currentmapkey] = newvalue
   734                     lastc['remaining'] -= 1
   775                     lastc['remaining'] -= 1
   735 
   776 
   736                     self._collectionstack.append({
   777                     self._collectionstack.append(
   737                         'remaining': value,
   778                         {'remaining': value, 'v': newvalue,}
   738                         'v': newvalue,
   779                     )
   739                     })
       
   740 
   780 
   741                     self._state = self._STATE_WANT_ARRAY_VALUE
   781                     self._state = self._STATE_WANT_ARRAY_VALUE
   742 
   782 
   743                 # A new map is used as the map value.
   783                 # A new map is used as the map value.
   744                 elif special == SPECIAL_START_MAP:
   784                 elif special == SPECIAL_START_MAP:
   746                     newvalue = {}
   786                     newvalue = {}
   747 
   787 
   748                     lastc['v'][self._currentmapkey] = newvalue
   788                     lastc['v'][self._currentmapkey] = newvalue
   749                     lastc['remaining'] -= 1
   789                     lastc['remaining'] -= 1
   750 
   790 
   751                     self._collectionstack.append({
   791                     self._collectionstack.append(
   752                         'remaining': value,
   792                         {'remaining': value, 'v': newvalue,}
   753                         'v': newvalue,
   793                     )
   754                     })
       
   755 
   794 
   756                     self._state = self._STATE_WANT_MAP_KEY
   795                     self._state = self._STATE_WANT_MAP_KEY
   757 
   796 
   758                 # A new set is used as the map value.
   797                 # A new set is used as the map value.
   759                 elif special == SPECIAL_START_SET:
   798                 elif special == SPECIAL_START_SET:
   761                     newvalue = set()
   800                     newvalue = set()
   762 
   801 
   763                     lastc['v'][self._currentmapkey] = newvalue
   802                     lastc['v'][self._currentmapkey] = newvalue
   764                     lastc['remaining'] -= 1
   803                     lastc['remaining'] -= 1
   765 
   804 
   766                     self._collectionstack.append({
   805                     self._collectionstack.append(
   767                         'remaining': value,
   806                         {'remaining': value, 'v': newvalue,}
   768                         'v': newvalue,
   807                     )
   769                     })
       
   770 
   808 
   771                     self._state = self._STATE_WANT_SET_VALUE
   809                     self._state = self._STATE_WANT_SET_VALUE
   772 
   810 
   773                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   811                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   774                     raise CBORDecodeError('indefinite length bytestrings not '
   812                     raise CBORDecodeError(
   775                                           'allowed as map values')
   813                         'indefinite length bytestrings not '
       
   814                         'allowed as map values'
       
   815                     )
   776 
   816 
   777                 else:
   817                 else:
   778                     raise CBORDecodeError('unhandled special item when '
   818                     raise CBORDecodeError(
   779                                           'expecting map value: %d' % special)
   819                         'unhandled special item when '
       
   820                         'expecting map value: %d' % special
       
   821                     )
   780 
   822 
   781                 self._currentmapkey = None
   823                 self._currentmapkey = None
   782 
   824 
   783             # This value is added to the current set.
   825             # This value is added to the current set.
   784             elif self._state == self._STATE_WANT_SET_VALUE:
   826             elif self._state == self._STATE_WANT_SET_VALUE:
   786                     lastc = self._collectionstack[-1]
   828                     lastc = self._collectionstack[-1]
   787                     lastc['v'].add(value)
   829                     lastc['v'].add(value)
   788                     lastc['remaining'] -= 1
   830                     lastc['remaining'] -= 1
   789 
   831 
   790                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   832                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
   791                     raise CBORDecodeError('indefinite length bytestrings not '
   833                     raise CBORDecodeError(
   792                                           'allowed as set values')
   834                         'indefinite length bytestrings not '
   793 
   835                         'allowed as set values'
   794                 elif special in (SPECIAL_START_ARRAY,
   836                     )
   795                                  SPECIAL_START_MAP,
   837 
   796                                  SPECIAL_START_SET):
   838                 elif special in (
   797                     raise CBORDecodeError('collections not allowed as set '
   839                     SPECIAL_START_ARRAY,
   798                                           'values')
   840                     SPECIAL_START_MAP,
       
   841                     SPECIAL_START_SET,
       
   842                 ):
       
   843                     raise CBORDecodeError(
       
   844                         'collections not allowed as set ' 'values'
       
   845                     )
   799 
   846 
   800                 # We don't allow non-trivial types to exist as set values.
   847                 # We don't allow non-trivial types to exist as set values.
   801                 else:
   848                 else:
   802                     raise CBORDecodeError('unhandled special item when '
   849                     raise CBORDecodeError(
   803                                           'expecting set value: %d' % special)
   850                         'unhandled special item when '
       
   851                         'expecting set value: %d' % special
       
   852                     )
   804 
   853 
   805             # This value represents the first chunk in an indefinite length
   854             # This value represents the first chunk in an indefinite length
   806             # bytestring.
   855             # bytestring.
   807             elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
   856             elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
   808                 # We received a full chunk.
   857                 # We received a full chunk.
   809                 if special == SPECIAL_NONE:
   858                 if special == SPECIAL_NONE:
   810                     self._decodedvalues.append(bytestringchunk(value,
   859                     self._decodedvalues.append(
   811                                                                first=True))
   860                         bytestringchunk(value, first=True)
       
   861                     )
   812 
   862 
   813                     self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
   863                     self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
   814 
   864 
   815                 # The end of stream marker. This means it is an empty
   865                 # The end of stream marker. This means it is an empty
   816                 # indefinite length bytestring.
   866                 # indefinite length bytestring.
   817                 elif special == SPECIAL_INDEFINITE_BREAK:
   867                 elif special == SPECIAL_INDEFINITE_BREAK:
   818                     # We /could/ convert this to a b''. But we want to preserve
   868                     # We /could/ convert this to a b''. But we want to preserve
   819                     # the nature of the underlying data so consumers expecting
   869                     # the nature of the underlying data so consumers expecting
   820                     # an indefinite length bytestring get one.
   870                     # an indefinite length bytestring get one.
   821                     self._decodedvalues.append(bytestringchunk(b'',
   871                     self._decodedvalues.append(
   822                                                                first=True,
   872                         bytestringchunk(b'', first=True, last=True)
   823                                                                last=True))
   873                     )
   824 
   874 
   825                     # Since indefinite length bytestrings can't be used in
   875                     # Since indefinite length bytestrings can't be used in
   826                     # collections, we must be at the root level.
   876                     # collections, we must be at the root level.
   827                     assert not self._collectionstack
   877                     assert not self._collectionstack
   828                     self._state = self._STATE_NONE
   878                     self._state = self._STATE_NONE
   829 
   879 
   830                 else:
   880                 else:
   831                     raise CBORDecodeError('unexpected special value when '
   881                     raise CBORDecodeError(
   832                                           'expecting bytestring chunk: %d' %
   882                         'unexpected special value when '
   833                                           special)
   883                         'expecting bytestring chunk: %d' % special
       
   884                     )
   834 
   885 
   835             # This value represents the non-initial chunk in an indefinite
   886             # This value represents the non-initial chunk in an indefinite
   836             # length bytestring.
   887             # length bytestring.
   837             elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
   888             elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
   838                 # We received a full chunk.
   889                 # We received a full chunk.
   847                     # collections, we must be at the root level.
   898                     # collections, we must be at the root level.
   848                     assert not self._collectionstack
   899                     assert not self._collectionstack
   849                     self._state = self._STATE_NONE
   900                     self._state = self._STATE_NONE
   850 
   901 
   851                 else:
   902                 else:
   852                     raise CBORDecodeError('unexpected special value when '
   903                     raise CBORDecodeError(
   853                                           'expecting bytestring chunk: %d' %
   904                         'unexpected special value when '
   854                                           special)
   905                         'expecting bytestring chunk: %d' % special
       
   906                     )
   855 
   907 
   856             else:
   908             else:
   857                 raise CBORDecodeError('unhandled decoder state: %d' %
   909                 raise CBORDecodeError(
   858                                       self._state)
   910                     'unhandled decoder state: %d' % self._state
       
   911                 )
   859 
   912 
   860             # We could have just added the final value in a collection. End
   913             # We could have just added the final value in a collection. End
   861             # all complete collections at the top of the stack.
   914             # all complete collections at the top of the stack.
   862             while True:
   915             while True:
   863                 # Bail if we're not waiting on a new collection item.
   916                 # Bail if we're not waiting on a new collection item.
   864                 if self._state not in (self._STATE_WANT_ARRAY_VALUE,
   917                 if self._state not in (
   865                                        self._STATE_WANT_MAP_KEY,
   918                     self._STATE_WANT_ARRAY_VALUE,
   866                                        self._STATE_WANT_SET_VALUE):
   919                     self._STATE_WANT_MAP_KEY,
       
   920                     self._STATE_WANT_SET_VALUE,
       
   921                 ):
   867                     break
   922                     break
   868 
   923 
   869                 # Or we are expecting more items for this collection.
   924                 # Or we are expecting more items for this collection.
   870                 lastc = self._collectionstack[-1]
   925                 lastc = self._collectionstack[-1]
   871 
   926 
   907 
   962 
   908         l = list(self._decodedvalues)
   963         l = list(self._decodedvalues)
   909         self._decodedvalues = []
   964         self._decodedvalues = []
   910         return l
   965         return l
   911 
   966 
       
   967 
   912 class bufferingdecoder(object):
   968 class bufferingdecoder(object):
   913     """A CBOR decoder that buffers undecoded input.
   969     """A CBOR decoder that buffers undecoded input.
   914 
   970 
   915     This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
   971     This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
   916     layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
   972     layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
   917     and concatenated with any new input that arrives later.
   973     and concatenated with any new input that arrives later.
   918 
   974 
   919     TODO consider adding limits as to the maximum amount of data that can
   975     TODO consider adding limits as to the maximum amount of data that can
   920     be buffered.
   976     be buffered.
   921     """
   977     """
       
   978 
   922     def __init__(self):
   979     def __init__(self):
   923         self._decoder = sansiodecoder()
   980         self._decoder = sansiodecoder()
   924         self._chunks = []
   981         self._chunks = []
   925         self._wanted = 0
   982         self._wanted = 0
   926 
   983 
   976         return available, readcount - oldlen, wanted
  1033         return available, readcount - oldlen, wanted
   977 
  1034 
   978     def getavailable(self):
  1035     def getavailable(self):
   979         return self._decoder.getavailable()
  1036         return self._decoder.getavailable()
   980 
  1037 
       
  1038 
   981 def decodeall(b):
  1039 def decodeall(b):
   982     """Decode all CBOR items present in an iterable of bytes.
  1040     """Decode all CBOR items present in an iterable of bytes.
   983 
  1041 
   984     In addition to regular decode errors, raises CBORDecodeError if the
  1042     In addition to regular decode errors, raises CBORDecodeError if the
   985     entirety of the passed buffer does not fully decode to complete CBOR
  1043     entirety of the passed buffer does not fully decode to complete CBOR