Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/utils/cborutil.py @ 43076:2372284d9457
formatting: blacken the codebase
This is using my patch to black
(https://github.com/psf/black/pull/826) so we don't un-wrap collection
literals.
Done with:
hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S
# skip-blame mass-reformatting only
# no-check-commit reformats foo_bar functions
Differential Revision: https://phab.mercurial-scm.org/D6971
author | Augie Fackler <augie@google.com> |
---|---|
date | Sun, 06 Oct 2019 09:45:02 -0400 |
parents | b6387a65851d |
children | 687b865b95ad |
comparison
equal
deleted
inserted
replaced
43075:57875cf423c9 | 43076:2372284d9457 |
---|---|
44 | 44 |
45 SEMANTIC_TAG_FINITE_SET = 258 | 45 SEMANTIC_TAG_FINITE_SET = 258 |
46 | 46 |
47 # Indefinite types begin with their major type ORd with information value 31. | 47 # Indefinite types begin with their major type ORd with information value 31. |
48 BEGIN_INDEFINITE_BYTESTRING = struct.pack( | 48 BEGIN_INDEFINITE_BYTESTRING = struct.pack( |
49 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE) | 49 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE |
50 ) | |
50 BEGIN_INDEFINITE_ARRAY = struct.pack( | 51 BEGIN_INDEFINITE_ARRAY = struct.pack( |
51 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE) | 52 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE |
53 ) | |
52 BEGIN_INDEFINITE_MAP = struct.pack( | 54 BEGIN_INDEFINITE_MAP = struct.pack( |
53 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE) | 55 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE |
56 ) | |
54 | 57 |
55 ENCODED_LENGTH_1 = struct.Struct(r'>B') | 58 ENCODED_LENGTH_1 = struct.Struct(r'>B') |
56 ENCODED_LENGTH_2 = struct.Struct(r'>BB') | 59 ENCODED_LENGTH_2 = struct.Struct(r'>BB') |
57 ENCODED_LENGTH_3 = struct.Struct(r'>BH') | 60 ENCODED_LENGTH_3 = struct.Struct(r'>BH') |
58 ENCODED_LENGTH_4 = struct.Struct(r'>BL') | 61 ENCODED_LENGTH_4 = struct.Struct(r'>BL') |
59 ENCODED_LENGTH_5 = struct.Struct(r'>BQ') | 62 ENCODED_LENGTH_5 = struct.Struct(r'>BQ') |
60 | 63 |
61 # The break ends an indefinite length item. | 64 # The break ends an indefinite length item. |
62 BREAK = b'\xff' | 65 BREAK = b'\xff' |
63 BREAK_INT = 255 | 66 BREAK_INT = 255 |
67 | |
64 | 68 |
65 def encodelength(majortype, length): | 69 def encodelength(majortype, length): |
66 """Obtain a value encoding the major type and its length.""" | 70 """Obtain a value encoding the major type and its length.""" |
67 if length < 24: | 71 if length < 24: |
68 return ENCODED_LENGTH_1.pack(majortype << 5 | length) | 72 return ENCODED_LENGTH_1.pack(majortype << 5 | length) |
73 elif length < 4294967296: | 77 elif length < 4294967296: |
74 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length) | 78 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length) |
75 else: | 79 else: |
76 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length) | 80 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length) |
77 | 81 |
82 | |
78 def streamencodebytestring(v): | 83 def streamencodebytestring(v): |
79 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v)) | 84 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v)) |
80 yield v | 85 yield v |
86 | |
81 | 87 |
82 def streamencodebytestringfromiter(it): | 88 def streamencodebytestringfromiter(it): |
83 """Convert an iterator of chunks to an indefinite bytestring. | 89 """Convert an iterator of chunks to an indefinite bytestring. |
84 | 90 |
85 Given an input that is iterable and each element in the iterator is | 91 Given an input that is iterable and each element in the iterator is |
91 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk)) | 97 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk)) |
92 yield chunk | 98 yield chunk |
93 | 99 |
94 yield BREAK | 100 yield BREAK |
95 | 101 |
102 | |
96 def streamencodeindefinitebytestring(source, chunksize=65536): | 103 def streamencodeindefinitebytestring(source, chunksize=65536): |
97 """Given a large source buffer, emit as an indefinite length bytestring. | 104 """Given a large source buffer, emit as an indefinite length bytestring. |
98 | 105 |
99 This is a generator of chunks constituting the encoded CBOR data. | 106 This is a generator of chunks constituting the encoded CBOR data. |
100 """ | 107 """ |
102 | 109 |
103 i = 0 | 110 i = 0 |
104 l = len(source) | 111 l = len(source) |
105 | 112 |
106 while True: | 113 while True: |
107 chunk = source[i:i + chunksize] | 114 chunk = source[i : i + chunksize] |
108 i += len(chunk) | 115 i += len(chunk) |
109 | 116 |
110 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk)) | 117 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk)) |
111 yield chunk | 118 yield chunk |
112 | 119 |
113 if i >= l: | 120 if i >= l: |
114 break | 121 break |
115 | 122 |
116 yield BREAK | 123 yield BREAK |
124 | |
117 | 125 |
118 def streamencodeint(v): | 126 def streamencodeint(v): |
119 if v >= 18446744073709551616 or v < -18446744073709551616: | 127 if v >= 18446744073709551616 or v < -18446744073709551616: |
120 raise ValueError('big integers not supported') | 128 raise ValueError('big integers not supported') |
121 | 129 |
122 if v >= 0: | 130 if v >= 0: |
123 yield encodelength(MAJOR_TYPE_UINT, v) | 131 yield encodelength(MAJOR_TYPE_UINT, v) |
124 else: | 132 else: |
125 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1) | 133 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1) |
126 | 134 |
135 | |
127 def streamencodearray(l): | 136 def streamencodearray(l): |
128 """Encode a known size iterable to an array.""" | 137 """Encode a known size iterable to an array.""" |
129 | 138 |
130 yield encodelength(MAJOR_TYPE_ARRAY, len(l)) | 139 yield encodelength(MAJOR_TYPE_ARRAY, len(l)) |
131 | 140 |
132 for i in l: | 141 for i in l: |
133 for chunk in streamencode(i): | 142 for chunk in streamencode(i): |
134 yield chunk | 143 yield chunk |
135 | 144 |
145 | |
136 def streamencodearrayfromiter(it): | 146 def streamencodearrayfromiter(it): |
137 """Encode an iterator of items to an indefinite length array.""" | 147 """Encode an iterator of items to an indefinite length array.""" |
138 | 148 |
139 yield BEGIN_INDEFINITE_ARRAY | 149 yield BEGIN_INDEFINITE_ARRAY |
140 | 150 |
142 for chunk in streamencode(i): | 152 for chunk in streamencode(i): |
143 yield chunk | 153 yield chunk |
144 | 154 |
145 yield BREAK | 155 yield BREAK |
146 | 156 |
157 | |
147 def _mixedtypesortkey(v): | 158 def _mixedtypesortkey(v): |
148 return type(v).__name__, v | 159 return type(v).__name__, v |
160 | |
149 | 161 |
150 def streamencodeset(s): | 162 def streamencodeset(s): |
151 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines | 163 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines |
152 # semantic tag 258 for finite sets. | 164 # semantic tag 258 for finite sets. |
153 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET) | 165 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET) |
154 | 166 |
155 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)): | 167 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)): |
156 yield chunk | 168 yield chunk |
157 | 169 |
170 | |
158 def streamencodemap(d): | 171 def streamencodemap(d): |
159 """Encode dictionary to a generator. | 172 """Encode dictionary to a generator. |
160 | 173 |
161 Does not supporting indefinite length dictionaries. | 174 Does not supporting indefinite length dictionaries. |
162 """ | 175 """ |
163 yield encodelength(MAJOR_TYPE_MAP, len(d)) | 176 yield encodelength(MAJOR_TYPE_MAP, len(d)) |
164 | 177 |
165 for key, value in sorted(d.iteritems(), | 178 for key, value in sorted( |
166 key=lambda x: _mixedtypesortkey(x[0])): | 179 d.iteritems(), key=lambda x: _mixedtypesortkey(x[0]) |
180 ): | |
167 for chunk in streamencode(key): | 181 for chunk in streamencode(key): |
168 yield chunk | 182 yield chunk |
169 for chunk in streamencode(value): | 183 for chunk in streamencode(value): |
170 yield chunk | 184 yield chunk |
185 | |
171 | 186 |
172 def streamencodemapfromiter(it): | 187 def streamencodemapfromiter(it): |
173 """Given an iterable of (key, value), encode to an indefinite length map.""" | 188 """Given an iterable of (key, value), encode to an indefinite length map.""" |
174 yield BEGIN_INDEFINITE_MAP | 189 yield BEGIN_INDEFINITE_MAP |
175 | 190 |
179 for chunk in streamencode(value): | 194 for chunk in streamencode(value): |
180 yield chunk | 195 yield chunk |
181 | 196 |
182 yield BREAK | 197 yield BREAK |
183 | 198 |
199 | |
184 def streamencodebool(b): | 200 def streamencodebool(b): |
185 # major type 7, simple value 20 and 21. | 201 # major type 7, simple value 20 and 21. |
186 yield b'\xf5' if b else b'\xf4' | 202 yield b'\xf5' if b else b'\xf4' |
187 | 203 |
204 | |
188 def streamencodenone(v): | 205 def streamencodenone(v): |
189 # major type 7, simple value 22. | 206 # major type 7, simple value 22. |
190 yield b'\xf6' | 207 yield b'\xf6' |
208 | |
191 | 209 |
192 STREAM_ENCODERS = { | 210 STREAM_ENCODERS = { |
193 bytes: streamencodebytestring, | 211 bytes: streamencodebytestring, |
194 int: streamencodeint, | 212 int: streamencodeint, |
195 pycompat.long: streamencodeint, | 213 pycompat.long: streamencodeint, |
199 set: streamencodeset, | 217 set: streamencodeset, |
200 bool: streamencodebool, | 218 bool: streamencodebool, |
201 type(None): streamencodenone, | 219 type(None): streamencodenone, |
202 } | 220 } |
203 | 221 |
222 | |
204 def streamencode(v): | 223 def streamencode(v): |
205 """Encode a value in a streaming manner. | 224 """Encode a value in a streaming manner. |
206 | 225 |
207 Given an input object, encode it to CBOR recursively. | 226 Given an input object, encode it to CBOR recursively. |
208 | 227 |
224 if not fn: | 243 if not fn: |
225 raise ValueError('do not know how to encode %s' % type(v)) | 244 raise ValueError('do not know how to encode %s' % type(v)) |
226 | 245 |
227 return fn(v) | 246 return fn(v) |
228 | 247 |
248 | |
229 class CBORDecodeError(Exception): | 249 class CBORDecodeError(Exception): |
230 """Represents an error decoding CBOR.""" | 250 """Represents an error decoding CBOR.""" |
231 | 251 |
252 | |
232 if sys.version_info.major >= 3: | 253 if sys.version_info.major >= 3: |
254 | |
233 def _elementtointeger(b, i): | 255 def _elementtointeger(b, i): |
234 return b[i] | 256 return b[i] |
257 | |
258 | |
235 else: | 259 else: |
260 | |
236 def _elementtointeger(b, i): | 261 def _elementtointeger(b, i): |
237 return ord(b[i]) | 262 return ord(b[i]) |
263 | |
238 | 264 |
239 STRUCT_BIG_UBYTE = struct.Struct(r'>B') | 265 STRUCT_BIG_UBYTE = struct.Struct(r'>B') |
240 STRUCT_BIG_USHORT = struct.Struct('>H') | 266 STRUCT_BIG_USHORT = struct.Struct('>H') |
241 STRUCT_BIG_ULONG = struct.Struct('>L') | 267 STRUCT_BIG_ULONG = struct.Struct('>L') |
242 STRUCT_BIG_ULONGLONG = struct.Struct('>Q') | 268 STRUCT_BIG_ULONGLONG = struct.Struct('>Q') |
245 SPECIAL_START_INDEFINITE_BYTESTRING = 1 | 271 SPECIAL_START_INDEFINITE_BYTESTRING = 1 |
246 SPECIAL_START_ARRAY = 2 | 272 SPECIAL_START_ARRAY = 2 |
247 SPECIAL_START_MAP = 3 | 273 SPECIAL_START_MAP = 3 |
248 SPECIAL_START_SET = 4 | 274 SPECIAL_START_SET = 4 |
249 SPECIAL_INDEFINITE_BREAK = 5 | 275 SPECIAL_INDEFINITE_BREAK = 5 |
276 | |
250 | 277 |
251 def decodeitem(b, offset=0): | 278 def decodeitem(b, offset=0): |
252 """Decode a new CBOR value from a buffer at offset. | 279 """Decode a new CBOR value from a buffer at offset. |
253 | 280 |
254 This function attempts to decode up to one complete CBOR value | 281 This function attempts to decode up to one complete CBOR value |
299 return False, None, readcount, SPECIAL_NONE | 326 return False, None, readcount, SPECIAL_NONE |
300 | 327 |
301 elif majortype == MAJOR_TYPE_BYTESTRING: | 328 elif majortype == MAJOR_TYPE_BYTESTRING: |
302 # Beginning of bytestrings are treated as uints in order to | 329 # Beginning of bytestrings are treated as uints in order to |
303 # decode their length, which may be indefinite. | 330 # decode their length, which may be indefinite. |
304 complete, size, readcount = decodeuint(subtype, b, offset, | 331 complete, size, readcount = decodeuint( |
305 allowindefinite=True) | 332 subtype, b, offset, allowindefinite=True |
333 ) | |
306 | 334 |
307 # We don't know the size of the bytestring. It must be a definitive | 335 # We don't know the size of the bytestring. It must be a definitive |
308 # length since the indefinite subtype would be encoded in the initial | 336 # length since the indefinite subtype would be encoded in the initial |
309 # byte. | 337 # byte. |
310 if not complete: | 338 if not complete: |
312 | 340 |
313 # We know the length of the bytestring. | 341 # We know the length of the bytestring. |
314 if size is not None: | 342 if size is not None: |
315 # And the data is available in the buffer. | 343 # And the data is available in the buffer. |
316 if offset + readcount + size <= len(b): | 344 if offset + readcount + size <= len(b): |
317 value = b[offset + readcount:offset + readcount + size] | 345 value = b[offset + readcount : offset + readcount + size] |
318 return True, value, readcount + size + 1, SPECIAL_NONE | 346 return True, value, readcount + size + 1, SPECIAL_NONE |
319 | 347 |
320 # And we need more data in order to return the bytestring. | 348 # And we need more data in order to return the bytestring. |
321 else: | 349 else: |
322 wanted = len(b) - offset - readcount - size | 350 wanted = len(b) - offset - readcount - size |
365 # tags, we should probably move semantic tag handling into the caller. | 393 # tags, we should probably move semantic tag handling into the caller. |
366 if tagvalue == SEMANTIC_TAG_FINITE_SET: | 394 if tagvalue == SEMANTIC_TAG_FINITE_SET: |
367 if offset + readcount >= len(b): | 395 if offset + readcount >= len(b): |
368 return False, None, -1, SPECIAL_NONE | 396 return False, None, -1, SPECIAL_NONE |
369 | 397 |
370 complete, size, readcount2, special = decodeitem(b, | 398 complete, size, readcount2, special = decodeitem( |
371 offset + readcount) | 399 b, offset + readcount |
400 ) | |
372 | 401 |
373 if not complete: | 402 if not complete: |
374 return False, None, readcount2, SPECIAL_NONE | 403 return False, None, readcount2, SPECIAL_NONE |
375 | 404 |
376 if special != SPECIAL_START_ARRAY: | 405 if special != SPECIAL_START_ARRAY: |
377 raise CBORDecodeError('expected array after finite set ' | 406 raise CBORDecodeError( |
378 'semantic tag') | 407 'expected array after finite set ' 'semantic tag' |
408 ) | |
379 | 409 |
380 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET | 410 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET |
381 | 411 |
382 else: | 412 else: |
383 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue) | 413 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue) |
396 else: | 426 else: |
397 raise CBORDecodeError('special type %d not allowed' % subtype) | 427 raise CBORDecodeError('special type %d not allowed' % subtype) |
398 else: | 428 else: |
399 assert False | 429 assert False |
400 | 430 |
431 | |
401 def decodeuint(subtype, b, offset=0, allowindefinite=False): | 432 def decodeuint(subtype, b, offset=0, allowindefinite=False): |
402 """Decode an unsigned integer. | 433 """Decode an unsigned integer. |
403 | 434 |
404 ``subtype`` is the lower 5 bits from the initial byte CBOR item | 435 ``subtype`` is the lower 5 bits from the initial byte CBOR item |
405 "header." ``b`` is a buffer containing bytes. ``offset`` points to | 436 "header." ``b`` is a buffer containing bytes. ``offset`` points to |
426 if allowindefinite: | 457 if allowindefinite: |
427 return True, None, 0 | 458 return True, None, 0 |
428 else: | 459 else: |
429 raise CBORDecodeError('indefinite length uint not allowed here') | 460 raise CBORDecodeError('indefinite length uint not allowed here') |
430 elif subtype >= 28: | 461 elif subtype >= 28: |
431 raise CBORDecodeError('unsupported subtype on integer type: %d' % | 462 raise CBORDecodeError( |
432 subtype) | 463 'unsupported subtype on integer type: %d' % subtype |
464 ) | |
433 | 465 |
434 if subtype == 24: | 466 if subtype == 24: |
435 s = STRUCT_BIG_UBYTE | 467 s = STRUCT_BIG_UBYTE |
436 elif subtype == 25: | 468 elif subtype == 25: |
437 s = STRUCT_BIG_USHORT | 469 s = STRUCT_BIG_USHORT |
445 if len(b) - offset >= s.size: | 477 if len(b) - offset >= s.size: |
446 return True, s.unpack_from(b, offset)[0], s.size | 478 return True, s.unpack_from(b, offset)[0], s.size |
447 else: | 479 else: |
448 return False, None, len(b) - offset - s.size | 480 return False, None, len(b) - offset - s.size |
449 | 481 |
482 | |
450 class bytestringchunk(bytes): | 483 class bytestringchunk(bytes): |
451 """Represents a chunk/segment in an indefinite length bytestring. | 484 """Represents a chunk/segment in an indefinite length bytestring. |
452 | 485 |
453 This behaves like a ``bytes`` but in addition has the ``isfirst`` | 486 This behaves like a ``bytes`` but in addition has the ``isfirst`` |
454 and ``islast`` attributes indicating whether this chunk is the first | 487 and ``islast`` attributes indicating whether this chunk is the first |
459 self = bytes.__new__(cls, v) | 492 self = bytes.__new__(cls, v) |
460 self.isfirst = first | 493 self.isfirst = first |
461 self.islast = last | 494 self.islast = last |
462 | 495 |
463 return self | 496 return self |
497 | |
464 | 498 |
465 class sansiodecoder(object): | 499 class sansiodecoder(object): |
466 """A CBOR decoder that doesn't perform its own I/O. | 500 """A CBOR decoder that doesn't perform its own I/O. |
467 | 501 |
468 To use, construct an instance and feed it segments containing | 502 To use, construct an instance and feed it segments containing |
604 # A normal value. | 638 # A normal value. |
605 if special == SPECIAL_NONE: | 639 if special == SPECIAL_NONE: |
606 self._decodedvalues.append(value) | 640 self._decodedvalues.append(value) |
607 | 641 |
608 elif special == SPECIAL_START_ARRAY: | 642 elif special == SPECIAL_START_ARRAY: |
609 self._collectionstack.append({ | 643 self._collectionstack.append( |
610 'remaining': value, | 644 {'remaining': value, 'v': [],} |
611 'v': [], | 645 ) |
612 }) | |
613 self._state = self._STATE_WANT_ARRAY_VALUE | 646 self._state = self._STATE_WANT_ARRAY_VALUE |
614 | 647 |
615 elif special == SPECIAL_START_MAP: | 648 elif special == SPECIAL_START_MAP: |
616 self._collectionstack.append({ | 649 self._collectionstack.append( |
617 'remaining': value, | 650 {'remaining': value, 'v': {},} |
618 'v': {}, | 651 ) |
619 }) | |
620 self._state = self._STATE_WANT_MAP_KEY | 652 self._state = self._STATE_WANT_MAP_KEY |
621 | 653 |
622 elif special == SPECIAL_START_SET: | 654 elif special == SPECIAL_START_SET: |
623 self._collectionstack.append({ | 655 self._collectionstack.append( |
624 'remaining': value, | 656 {'remaining': value, 'v': set(),} |
625 'v': set(), | 657 ) |
626 }) | |
627 self._state = self._STATE_WANT_SET_VALUE | 658 self._state = self._STATE_WANT_SET_VALUE |
628 | 659 |
629 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: | 660 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
630 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST | 661 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST |
631 | 662 |
632 else: | 663 else: |
633 raise CBORDecodeError('unhandled special state: %d' % | 664 raise CBORDecodeError( |
634 special) | 665 'unhandled special state: %d' % special |
666 ) | |
635 | 667 |
636 # This value becomes an element of the current array. | 668 # This value becomes an element of the current array. |
637 elif self._state == self._STATE_WANT_ARRAY_VALUE: | 669 elif self._state == self._STATE_WANT_ARRAY_VALUE: |
638 # Simple values get appended. | 670 # Simple values get appended. |
639 if special == SPECIAL_NONE: | 671 if special == SPECIAL_NONE: |
649 newvalue = [] | 681 newvalue = [] |
650 | 682 |
651 lastc['v'].append(newvalue) | 683 lastc['v'].append(newvalue) |
652 lastc['remaining'] -= 1 | 684 lastc['remaining'] -= 1 |
653 | 685 |
654 self._collectionstack.append({ | 686 self._collectionstack.append( |
655 'remaining': value, | 687 {'remaining': value, 'v': newvalue,} |
656 'v': newvalue, | 688 ) |
657 }) | |
658 | 689 |
659 # self._state doesn't need changed. | 690 # self._state doesn't need changed. |
660 | 691 |
661 # A map nested within an array. | 692 # A map nested within an array. |
662 elif special == SPECIAL_START_MAP: | 693 elif special == SPECIAL_START_MAP: |
664 newvalue = {} | 695 newvalue = {} |
665 | 696 |
666 lastc['v'].append(newvalue) | 697 lastc['v'].append(newvalue) |
667 lastc['remaining'] -= 1 | 698 lastc['remaining'] -= 1 |
668 | 699 |
669 self._collectionstack.append({ | 700 self._collectionstack.append( |
670 'remaining': value, | 701 {'remaining': value, 'v': newvalue} |
671 'v': newvalue | 702 ) |
672 }) | |
673 | 703 |
674 self._state = self._STATE_WANT_MAP_KEY | 704 self._state = self._STATE_WANT_MAP_KEY |
675 | 705 |
676 elif special == SPECIAL_START_SET: | 706 elif special == SPECIAL_START_SET: |
677 lastc = self._collectionstack[-1] | 707 lastc = self._collectionstack[-1] |
678 newvalue = set() | 708 newvalue = set() |
679 | 709 |
680 lastc['v'].append(newvalue) | 710 lastc['v'].append(newvalue) |
681 lastc['remaining'] -= 1 | 711 lastc['remaining'] -= 1 |
682 | 712 |
683 self._collectionstack.append({ | 713 self._collectionstack.append( |
684 'remaining': value, | 714 {'remaining': value, 'v': newvalue,} |
685 'v': newvalue, | 715 ) |
686 }) | |
687 | 716 |
688 self._state = self._STATE_WANT_SET_VALUE | 717 self._state = self._STATE_WANT_SET_VALUE |
689 | 718 |
690 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: | 719 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
691 raise CBORDecodeError('indefinite length bytestrings ' | 720 raise CBORDecodeError( |
692 'not allowed as array values') | 721 'indefinite length bytestrings ' |
722 'not allowed as array values' | |
723 ) | |
693 | 724 |
694 else: | 725 else: |
695 raise CBORDecodeError('unhandled special item when ' | 726 raise CBORDecodeError( |
696 'expecting array value: %d' % special) | 727 'unhandled special item when ' |
728 'expecting array value: %d' % special | |
729 ) | |
697 | 730 |
698 # This value becomes the key of the current map instance. | 731 # This value becomes the key of the current map instance. |
699 elif self._state == self._STATE_WANT_MAP_KEY: | 732 elif self._state == self._STATE_WANT_MAP_KEY: |
700 if special == SPECIAL_NONE: | 733 if special == SPECIAL_NONE: |
701 self._currentmapkey = value | 734 self._currentmapkey = value |
702 self._state = self._STATE_WANT_MAP_VALUE | 735 self._state = self._STATE_WANT_MAP_VALUE |
703 | 736 |
704 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: | 737 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
705 raise CBORDecodeError('indefinite length bytestrings ' | 738 raise CBORDecodeError( |
706 'not allowed as map keys') | 739 'indefinite length bytestrings ' |
707 | 740 'not allowed as map keys' |
708 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP, | 741 ) |
709 SPECIAL_START_SET): | 742 |
710 raise CBORDecodeError('collections not supported as map ' | 743 elif special in ( |
711 'keys') | 744 SPECIAL_START_ARRAY, |
745 SPECIAL_START_MAP, | |
746 SPECIAL_START_SET, | |
747 ): | |
748 raise CBORDecodeError( | |
749 'collections not supported as map ' 'keys' | |
750 ) | |
712 | 751 |
713 # We do not allow special values to be used as map keys. | 752 # We do not allow special values to be used as map keys. |
714 else: | 753 else: |
715 raise CBORDecodeError('unhandled special item when ' | 754 raise CBORDecodeError( |
716 'expecting map key: %d' % special) | 755 'unhandled special item when ' |
756 'expecting map key: %d' % special | |
757 ) | |
717 | 758 |
718 # This value becomes the value of the current map key. | 759 # This value becomes the value of the current map key. |
719 elif self._state == self._STATE_WANT_MAP_VALUE: | 760 elif self._state == self._STATE_WANT_MAP_VALUE: |
720 # Simple values simply get inserted into the map. | 761 # Simple values simply get inserted into the map. |
721 if special == SPECIAL_NONE: | 762 if special == SPECIAL_NONE: |
731 newvalue = [] | 772 newvalue = [] |
732 | 773 |
733 lastc['v'][self._currentmapkey] = newvalue | 774 lastc['v'][self._currentmapkey] = newvalue |
734 lastc['remaining'] -= 1 | 775 lastc['remaining'] -= 1 |
735 | 776 |
736 self._collectionstack.append({ | 777 self._collectionstack.append( |
737 'remaining': value, | 778 {'remaining': value, 'v': newvalue,} |
738 'v': newvalue, | 779 ) |
739 }) | |
740 | 780 |
741 self._state = self._STATE_WANT_ARRAY_VALUE | 781 self._state = self._STATE_WANT_ARRAY_VALUE |
742 | 782 |
743 # A new map is used as the map value. | 783 # A new map is used as the map value. |
744 elif special == SPECIAL_START_MAP: | 784 elif special == SPECIAL_START_MAP: |
746 newvalue = {} | 786 newvalue = {} |
747 | 787 |
748 lastc['v'][self._currentmapkey] = newvalue | 788 lastc['v'][self._currentmapkey] = newvalue |
749 lastc['remaining'] -= 1 | 789 lastc['remaining'] -= 1 |
750 | 790 |
751 self._collectionstack.append({ | 791 self._collectionstack.append( |
752 'remaining': value, | 792 {'remaining': value, 'v': newvalue,} |
753 'v': newvalue, | 793 ) |
754 }) | |
755 | 794 |
756 self._state = self._STATE_WANT_MAP_KEY | 795 self._state = self._STATE_WANT_MAP_KEY |
757 | 796 |
758 # A new set is used as the map value. | 797 # A new set is used as the map value. |
759 elif special == SPECIAL_START_SET: | 798 elif special == SPECIAL_START_SET: |
761 newvalue = set() | 800 newvalue = set() |
762 | 801 |
763 lastc['v'][self._currentmapkey] = newvalue | 802 lastc['v'][self._currentmapkey] = newvalue |
764 lastc['remaining'] -= 1 | 803 lastc['remaining'] -= 1 |
765 | 804 |
766 self._collectionstack.append({ | 805 self._collectionstack.append( |
767 'remaining': value, | 806 {'remaining': value, 'v': newvalue,} |
768 'v': newvalue, | 807 ) |
769 }) | |
770 | 808 |
771 self._state = self._STATE_WANT_SET_VALUE | 809 self._state = self._STATE_WANT_SET_VALUE |
772 | 810 |
773 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: | 811 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
774 raise CBORDecodeError('indefinite length bytestrings not ' | 812 raise CBORDecodeError( |
775 'allowed as map values') | 813 'indefinite length bytestrings not ' |
814 'allowed as map values' | |
815 ) | |
776 | 816 |
777 else: | 817 else: |
778 raise CBORDecodeError('unhandled special item when ' | 818 raise CBORDecodeError( |
779 'expecting map value: %d' % special) | 819 'unhandled special item when ' |
820 'expecting map value: %d' % special | |
821 ) | |
780 | 822 |
781 self._currentmapkey = None | 823 self._currentmapkey = None |
782 | 824 |
783 # This value is added to the current set. | 825 # This value is added to the current set. |
784 elif self._state == self._STATE_WANT_SET_VALUE: | 826 elif self._state == self._STATE_WANT_SET_VALUE: |
786 lastc = self._collectionstack[-1] | 828 lastc = self._collectionstack[-1] |
787 lastc['v'].add(value) | 829 lastc['v'].add(value) |
788 lastc['remaining'] -= 1 | 830 lastc['remaining'] -= 1 |
789 | 831 |
790 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: | 832 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
791 raise CBORDecodeError('indefinite length bytestrings not ' | 833 raise CBORDecodeError( |
792 'allowed as set values') | 834 'indefinite length bytestrings not ' |
793 | 835 'allowed as set values' |
794 elif special in (SPECIAL_START_ARRAY, | 836 ) |
795 SPECIAL_START_MAP, | 837 |
796 SPECIAL_START_SET): | 838 elif special in ( |
797 raise CBORDecodeError('collections not allowed as set ' | 839 SPECIAL_START_ARRAY, |
798 'values') | 840 SPECIAL_START_MAP, |
841 SPECIAL_START_SET, | |
842 ): | |
843 raise CBORDecodeError( | |
844 'collections not allowed as set ' 'values' | |
845 ) | |
799 | 846 |
800 # We don't allow non-trivial types to exist as set values. | 847 # We don't allow non-trivial types to exist as set values. |
801 else: | 848 else: |
802 raise CBORDecodeError('unhandled special item when ' | 849 raise CBORDecodeError( |
803 'expecting set value: %d' % special) | 850 'unhandled special item when ' |
851 'expecting set value: %d' % special | |
852 ) | |
804 | 853 |
805 # This value represents the first chunk in an indefinite length | 854 # This value represents the first chunk in an indefinite length |
806 # bytestring. | 855 # bytestring. |
807 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST: | 856 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST: |
808 # We received a full chunk. | 857 # We received a full chunk. |
809 if special == SPECIAL_NONE: | 858 if special == SPECIAL_NONE: |
810 self._decodedvalues.append(bytestringchunk(value, | 859 self._decodedvalues.append( |
811 first=True)) | 860 bytestringchunk(value, first=True) |
861 ) | |
812 | 862 |
813 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT | 863 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT |
814 | 864 |
815 # The end of stream marker. This means it is an empty | 865 # The end of stream marker. This means it is an empty |
816 # indefinite length bytestring. | 866 # indefinite length bytestring. |
817 elif special == SPECIAL_INDEFINITE_BREAK: | 867 elif special == SPECIAL_INDEFINITE_BREAK: |
818 # We /could/ convert this to a b''. But we want to preserve | 868 # We /could/ convert this to a b''. But we want to preserve |
819 # the nature of the underlying data so consumers expecting | 869 # the nature of the underlying data so consumers expecting |
820 # an indefinite length bytestring get one. | 870 # an indefinite length bytestring get one. |
821 self._decodedvalues.append(bytestringchunk(b'', | 871 self._decodedvalues.append( |
822 first=True, | 872 bytestringchunk(b'', first=True, last=True) |
823 last=True)) | 873 ) |
824 | 874 |
825 # Since indefinite length bytestrings can't be used in | 875 # Since indefinite length bytestrings can't be used in |
826 # collections, we must be at the root level. | 876 # collections, we must be at the root level. |
827 assert not self._collectionstack | 877 assert not self._collectionstack |
828 self._state = self._STATE_NONE | 878 self._state = self._STATE_NONE |
829 | 879 |
830 else: | 880 else: |
831 raise CBORDecodeError('unexpected special value when ' | 881 raise CBORDecodeError( |
832 'expecting bytestring chunk: %d' % | 882 'unexpected special value when ' |
833 special) | 883 'expecting bytestring chunk: %d' % special |
884 ) | |
834 | 885 |
835 # This value represents the non-initial chunk in an indefinite | 886 # This value represents the non-initial chunk in an indefinite |
836 # length bytestring. | 887 # length bytestring. |
837 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT: | 888 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT: |
838 # We received a full chunk. | 889 # We received a full chunk. |
847 # collections, we must be at the root level. | 898 # collections, we must be at the root level. |
848 assert not self._collectionstack | 899 assert not self._collectionstack |
849 self._state = self._STATE_NONE | 900 self._state = self._STATE_NONE |
850 | 901 |
851 else: | 902 else: |
852 raise CBORDecodeError('unexpected special value when ' | 903 raise CBORDecodeError( |
853 'expecting bytestring chunk: %d' % | 904 'unexpected special value when ' |
854 special) | 905 'expecting bytestring chunk: %d' % special |
906 ) | |
855 | 907 |
856 else: | 908 else: |
857 raise CBORDecodeError('unhandled decoder state: %d' % | 909 raise CBORDecodeError( |
858 self._state) | 910 'unhandled decoder state: %d' % self._state |
911 ) | |
859 | 912 |
860 # We could have just added the final value in a collection. End | 913 # We could have just added the final value in a collection. End |
861 # all complete collections at the top of the stack. | 914 # all complete collections at the top of the stack. |
862 while True: | 915 while True: |
863 # Bail if we're not waiting on a new collection item. | 916 # Bail if we're not waiting on a new collection item. |
864 if self._state not in (self._STATE_WANT_ARRAY_VALUE, | 917 if self._state not in ( |
865 self._STATE_WANT_MAP_KEY, | 918 self._STATE_WANT_ARRAY_VALUE, |
866 self._STATE_WANT_SET_VALUE): | 919 self._STATE_WANT_MAP_KEY, |
920 self._STATE_WANT_SET_VALUE, | |
921 ): | |
867 break | 922 break |
868 | 923 |
869 # Or we are expecting more items for this collection. | 924 # Or we are expecting more items for this collection. |
870 lastc = self._collectionstack[-1] | 925 lastc = self._collectionstack[-1] |
871 | 926 |
907 | 962 |
908 l = list(self._decodedvalues) | 963 l = list(self._decodedvalues) |
909 self._decodedvalues = [] | 964 self._decodedvalues = [] |
910 return l | 965 return l |
911 | 966 |
967 | |
912 class bufferingdecoder(object): | 968 class bufferingdecoder(object): |
913 """A CBOR decoder that buffers undecoded input. | 969 """A CBOR decoder that buffers undecoded input. |
914 | 970 |
915 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering | 971 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering |
916 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered | 972 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered |
917 and concatenated with any new input that arrives later. | 973 and concatenated with any new input that arrives later. |
918 | 974 |
919 TODO consider adding limits as to the maximum amount of data that can | 975 TODO consider adding limits as to the maximum amount of data that can |
920 be buffered. | 976 be buffered. |
921 """ | 977 """ |
978 | |
922 def __init__(self): | 979 def __init__(self): |
923 self._decoder = sansiodecoder() | 980 self._decoder = sansiodecoder() |
924 self._chunks = [] | 981 self._chunks = [] |
925 self._wanted = 0 | 982 self._wanted = 0 |
926 | 983 |
976 return available, readcount - oldlen, wanted | 1033 return available, readcount - oldlen, wanted |
977 | 1034 |
978 def getavailable(self): | 1035 def getavailable(self): |
979 return self._decoder.getavailable() | 1036 return self._decoder.getavailable() |
980 | 1037 |
1038 | |
981 def decodeall(b): | 1039 def decodeall(b): |
982 """Decode all CBOR items present in an iterable of bytes. | 1040 """Decode all CBOR items present in an iterable of bytes. |
983 | 1041 |
984 In addition to regular decode errors, raises CBORDecodeError if the | 1042 In addition to regular decode errors, raises CBORDecodeError if the |
985 entirety of the passed buffer does not fully decode to complete CBOR | 1043 entirety of the passed buffer does not fully decode to complete CBOR |