view mercurial/utils/cborutil.py @ 45892:06b64fabf91c

copies: cache the ancestor checking call when tracing copy A good share of the time spent in this function is spent doing ancestors checking. To avoid spending time in duplicated call, we cache the result of calls. In the slower case, this provide a quite significant performance boost. Below are the result for a set of selected pairs (many of them pathological): (And further down is another table that summarize the current state of filelog based vs changeset base copy tracing) The benchmark have been configured to be killed after 6 minutes of runtime, which mean that any detect slower than 2 minutes will be marked as "killed". This drop some useful information about how much slower these case are? but also prevent 99% of the benchmark time to be spent on case that can be labelled "very slow" anyway. Repo Case Source-Rev Dest-Rev Old-Time New-Time Difference Factor ------------------------------------------------------------------------------------------------------------------------------------ mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 0.000044 s, 0.000044 s, +0.000000 s, ? 1.0000 mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 0.000138 s, 0.000138 s, +0.000000 s, ? 1.0000 mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 0.005067 s, 0.005052 s, -0.000015 s, ? 0.9970 pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 0.000218 s, 0.000219 s, +0.000001 s, ? 1.0046 pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 0.000053 s, 0.000055 s, +0.000002 s, ? 1.0377 pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 0.000125 s, 0.000128 s, +0.000003 s, ? 1.0240 pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 0.001098 s, 0.001089 s, -0.000009 s, ? 0.9918 pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 0.017546 s, 0.017407 s, -0.000139 s, ? 0.9921 pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 0.096723 s, 0.094175 s, -0.002548 s, ? 0.9737 pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 0.271796 s, 0.238009 s, -0.033787 s, ? 0.8757 pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 0.128602 s, 0.125876 s, -0.002726 s, ? 0.9788 pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 7.086742 s, 3.581556 s, -3.505186 s, ? 0.5054 pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 0.016634 s, 0.016721 s, +0.000087 s, ? 1.0052 pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 0.254225 s, 0.242367 s, -0.011858 s, ? 0.9534 netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 0.000166 s, 0.000165 s, -0.000001 s, ? 0.9940 netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 0.000118 s, 0.000114 s, -0.000004 s, ? 0.9661 netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 0.000296 s, 0.000296 s, +0.000000 s, ? 1.0000 netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 0.001137 s, 0.001124 s, -0.000013 s, ? 0.9886 netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 0.014133 s, 0.013060 s, -0.001073 s, ? 0.9241 netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 0.016988 s, 0.017112 s, +0.000124 s, ? 1.0073 netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 0.676361 s, 0.660350 s, -0.016011 s, ? 0.9763 netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 12.515149 s, 10.032499 s, -2.482650 s, ? 0.8016 mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 0.000186 s, 0.000189 s, +0.000003 s, ? 1.0161 mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 0.000459 s, 0.000462 s, +0.000003 s, ? 1.0065 mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 0.000273 s, 0.000270 s, -0.000003 s, ? 0.9890 mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 0.001503 s, 0.001474 s, -0.000029 s, ? 0.9807 mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 0.004862 s, 0.004806 s, -0.000056 s, ? 0.9885 mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 0.088291 s, 0.085150 s, -0.003141 s, ? 0.9644 mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.007113 s, 0.007064 s, -0.000049 s, ? 0.9931 mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.004687 s, 0.004741 s, +0.000054 s, ? 1.0115 mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 0.198710 s, 0.190133 s, -0.008577 s, ? 0.9568 mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 0.036068 s, 0.035651 s, -0.000417 s, ? 0.9884 mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 0.465362 s, 0.440694 s, -0.024668 s, ? 0.9470 mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 24.519684 s, 18.454163 s, -6.065521 s, ? 0.7526 mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 42.711897 s, 31.562719 s, -11.149178 s, ? 0.7390 mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 0.001201 s, 0.001189 s, -0.000012 s, ? 0.9900 mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 0.001216 s, 0.001204 s, -0.000012 s, ? 0.9901 mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 0.000595 s, 0.000586 s, -0.000009 s, ? 0.9849 mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 0.001856 s, 0.001845 s, -0.000011 s, ? 0.9941 mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 0.064936 s, 0.063822 s, -0.001114 s, ? 0.9828 mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 0.090601 s, 0.088038 s, -0.002563 s, ? 0.9717 mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.007510 s, 0.007389 s, -0.000121 s, ? 0.9839 mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.004911 s, 0.004868 s, -0.000043 s, ? 0.9912 mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 0.233231 s, 0.222450 s, -0.010781 s, ? 0.9538 mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 0.419989 s, 0.370675 s, -0.049314 s, ? 0.8826 mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 0.401521 s, 0.358020 s, -0.043501 s, ? 0.8917 mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 0.179555 s, 0.145235 s, -0.034320 s, ? 0.8089 mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 0.038004 s, 0.037606 s, -0.000398 s, ? 0.9895 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 52.838482 s, 7.382439 s, -45.456043 s, ? 0.1397 mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 8.705874 s, 7.273506 s, -1.432368 s, ? 0.8355 mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 1.126708 s, 1.074593 s, -0.052115 s, ? 0.9537 mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 83.854020 s, 27.746195 s, -56.107825 s, ? 0.3309 Below is a table comparing the runtime of the current "filelog centric" algorithm, with the "changeset centric" one, we just modified. The changeset centric algorithm is a significant win in many scenario, but they are still various cases where it is quite slower. When many revision has to be considered the cost of retrieving the copy information, creating new dictionaries, merging dictionaries and checking if revision are ancestors of each other can slow things down. The rest of this series, will introduce a rust version of the copy tracing code to deal with most of theses issues. Repo Case Source-Rev Dest-Rev filelog sidedata Difference Factor --------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 0.000914 s, 0.000044 s, - 0.000870 s, ? 0.048140 mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 0.001812 s, 0.000138 s, - 0.001674 s, ? 0.076159 mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 0.017954 s, 0.005052 s, - 0.012902 s, ? 0.281386 pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 0.001509 s, 0.000219 s, - 0.001290 s, ? 0.145129 pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 0.206881 s, 0.000055 s, - 0.206826 s, ? 0.000266 pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 0.016951 s, 0.000128 s, - 0.016823 s, ? 0.007551 pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 0.019096 s, 0.001089 s, - 0.018007 s, ? 0.057028 pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 0.762506 s, 0.017407 s, - 0.745099 s, ? 0.022829 pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 1.179211 s, 0.094175 s, - 1.085036 s, ? 0.079863 pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 1.249058 s, 0.238009 s, - 1.011049 s, ? 0.190551 pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 1.614107 s, 0.125876 s, - 1.488231 s, ? 0.077985 pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 0.001064 s, 3.581556 s, + 3.580492 s, ? 3366.124060 pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 1.061275 s, 0.016721 s, - 1.044554 s, ? 0.015756 pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 1.341119 s, 0.242367 s, - 1.098752 s, ? 0.180720 netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 0.027803 s, 0.000165 s, - 0.027638 s, ? 0.005935 netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 0.130014 s, 0.000114 s, - 0.129900 s, ? 0.000877 netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 0.024990 s, 0.000296 s, - 0.024694 s, ? 0.011845 netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 0.052201 s, 0.001124 s, - 0.051077 s, ? 0.021532 netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 0.037642 s, 0.013060 s, - 0.024582 s, ? 0.346953 netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 0.197086 s, 0.017112 s, - 0.179974 s, ? 0.086825 netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 0.935148 s, 0.660350 s, - 0.274798 s, ? 0.706145 netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 3.920674 s, 10.032499 s, + 6.111825 s, ? 2.558871 mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 0.024232 s, 0.000189 s, - 0.024043 s, ? 0.007800 mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 0.141483 s, 0.000462 s, - 0.141021 s, ? 0.003265 mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 0.025775 s, 0.000270 s, - 0.025505 s, ? 0.010475 mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 0.084922 s, 0.001474 s, - 0.083448 s, ? 0.017357 mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 0.194784 s, 0.004806 s, - 0.189978 s, ? 0.024673 mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 2.161103 s, 0.085150 s, - 2.075953 s, ? 0.039401 mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.089347 s, 0.007064 s, - 0.082283 s, ? 0.079063 mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.732171 s, 0.004741 s, - 0.727430 s, ? 0.006475 mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 1.157287 s, 0.190133 s, - 0.967154 s, ? 0.164292 mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 6.726568 s, 0.035651 s, - 6.690917 s, ? 0.005300 mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 3.266229 s, 0.440694 s, - 2.825535 s, ? 0.134924 mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 15.860534 s, 18.454163 s, + 2.593629 s, ? 1.163527 mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 20.450475 s, 31.562719 s, +11.112244 s, ? 1.543373 mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 0.080442 s, 0.001189 s, - 0.079253 s, ? 0.014781 mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 0.497672 s, 0.001204 s, - 0.496468 s, ? 0.002419 mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 0.021183 s, 0.000586 s, - 0.020597 s, ? 0.027664 mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 0.230991 s, 0.001845 s, - 0.229146 s, ? 0.007987 mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1.118461 s, 0.063822 s, - 1.054639 s, ? 0.057062 mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 2.206083 s, 0.088038 s, - 2.118045 s, ? 0.039907 mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 0.089404 s, 0.007389 s, - 0.082015 s, ? 0.082647 mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 0.733043 s, 0.004868 s, - 0.728175 s, ? 0.006641 mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 1.163367 s, 0.222450 s, - 0.940917 s, ? 0.191212 mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 0.085456 s, 0.370675 s, + 0.285219 s, ? 4.337612 mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 0.083601 s, 0.358020 s, + 0.274419 s, ? 4.282485 mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 7.366614 s, 0.145235 s, - 7.221379 s, ? 0.019715 mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 6.664464 s, 0.037606 s, - 6.626858 s, ? 0.005643 mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 7.467836 s, 7.382439 s, - 0.085397 s, ? 0.988565 mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 9.801294 s, 7.273506 s, - 2.527788 s, ? 0.742097 mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 0.091886 s, killed mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 26.491140 s, 1.074593 s, -25.416547 s, ? 0.040564 mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 0.092863 s, killed mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 0.226823 s, killed mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 18.914630 s, 27.746195 s, + 8.831565 s, ? 1.466917 mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 21.198903 s, killed mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 24.952268 s, killed Differential Revision: https://phab.mercurial-scm.org/D9296
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 02 Nov 2020 11:03:56 +0100
parents 9f70512ae2cf
children 89a2afe31e82
line wrap: on
line source

# cborutil.py - CBOR extensions
#
# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import struct
import sys

from .. import pycompat

# Very short very of RFC 7049...
#
# Each item begins with a byte. The 3 high bits of that byte denote the
# "major type." The lower 5 bits denote the "subtype." Each major type
# has its own encoding mechanism.
#
# Most types have lengths. However, bytestring, string, array, and map
# can be indefinite length. These are denotes by a subtype with value 31.
# Sub-components of those types then come afterwards and are terminated
# by a "break" byte.

MAJOR_TYPE_UINT = 0
MAJOR_TYPE_NEGINT = 1
MAJOR_TYPE_BYTESTRING = 2
MAJOR_TYPE_STRING = 3
MAJOR_TYPE_ARRAY = 4
MAJOR_TYPE_MAP = 5
MAJOR_TYPE_SEMANTIC = 6
MAJOR_TYPE_SPECIAL = 7

SUBTYPE_MASK = 0b00011111

SUBTYPE_FALSE = 20
SUBTYPE_TRUE = 21
SUBTYPE_NULL = 22
SUBTYPE_HALF_FLOAT = 25
SUBTYPE_SINGLE_FLOAT = 26
SUBTYPE_DOUBLE_FLOAT = 27
SUBTYPE_INDEFINITE = 31

SEMANTIC_TAG_FINITE_SET = 258

# Indefinite types begin with their major type ORd with information value 31.
BEGIN_INDEFINITE_BYTESTRING = struct.pack(
    '>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE
)
BEGIN_INDEFINITE_ARRAY = struct.pack(
    '>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE
)
BEGIN_INDEFINITE_MAP = struct.pack(
    '>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE
)

ENCODED_LENGTH_1 = struct.Struct('>B')
ENCODED_LENGTH_2 = struct.Struct('>BB')
ENCODED_LENGTH_3 = struct.Struct('>BH')
ENCODED_LENGTH_4 = struct.Struct('>BL')
ENCODED_LENGTH_5 = struct.Struct('>BQ')

# The break ends an indefinite length item.
BREAK = b'\xff'
BREAK_INT = 255


def encodelength(majortype, length):
    """Obtain a value encoding the major type and its length."""
    if length < 24:
        return ENCODED_LENGTH_1.pack(majortype << 5 | length)
    elif length < 256:
        return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
    elif length < 65536:
        return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
    elif length < 4294967296:
        return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
    else:
        return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)


def streamencodebytestring(v):
    yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
    yield v


def streamencodebytestringfromiter(it):
    """Convert an iterator of chunks to an indefinite bytestring.

    Given an input that is iterable and each element in the iterator is
    representable as bytes, emit an indefinite length bytestring.
    """
    yield BEGIN_INDEFINITE_BYTESTRING

    for chunk in it:
        yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
        yield chunk

    yield BREAK


def streamencodeindefinitebytestring(source, chunksize=65536):
    """Given a large source buffer, emit as an indefinite length bytestring.

    This is a generator of chunks constituting the encoded CBOR data.
    """
    yield BEGIN_INDEFINITE_BYTESTRING

    i = 0
    l = len(source)

    while True:
        chunk = source[i : i + chunksize]
        i += len(chunk)

        yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
        yield chunk

        if i >= l:
            break

    yield BREAK


def streamencodeint(v):
    if v >= 18446744073709551616 or v < -18446744073709551616:
        raise ValueError(b'big integers not supported')

    if v >= 0:
        yield encodelength(MAJOR_TYPE_UINT, v)
    else:
        yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)


def streamencodearray(l):
    """Encode a known size iterable to an array."""

    yield encodelength(MAJOR_TYPE_ARRAY, len(l))

    for i in l:
        for chunk in streamencode(i):
            yield chunk


def streamencodearrayfromiter(it):
    """Encode an iterator of items to an indefinite length array."""

    yield BEGIN_INDEFINITE_ARRAY

    for i in it:
        for chunk in streamencode(i):
            yield chunk

    yield BREAK


def _mixedtypesortkey(v):
    return type(v).__name__, v


def streamencodeset(s):
    # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
    # semantic tag 258 for finite sets.
    yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)

    for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
        yield chunk


def streamencodemap(d):
    """Encode dictionary to a generator.

    Does not supporting indefinite length dictionaries.
    """
    yield encodelength(MAJOR_TYPE_MAP, len(d))

    for key, value in sorted(
        pycompat.iteritems(d), key=lambda x: _mixedtypesortkey(x[0])
    ):
        for chunk in streamencode(key):
            yield chunk
        for chunk in streamencode(value):
            yield chunk


def streamencodemapfromiter(it):
    """Given an iterable of (key, value), encode to an indefinite length map."""
    yield BEGIN_INDEFINITE_MAP

    for key, value in it:
        for chunk in streamencode(key):
            yield chunk
        for chunk in streamencode(value):
            yield chunk

    yield BREAK


def streamencodebool(b):
    # major type 7, simple value 20 and 21.
    yield b'\xf5' if b else b'\xf4'


def streamencodenone(v):
    # major type 7, simple value 22.
    yield b'\xf6'


STREAM_ENCODERS = {
    bytes: streamencodebytestring,
    int: streamencodeint,
    pycompat.long: streamencodeint,
    list: streamencodearray,
    tuple: streamencodearray,
    dict: streamencodemap,
    set: streamencodeset,
    bool: streamencodebool,
    type(None): streamencodenone,
}


def streamencode(v):
    """Encode a value in a streaming manner.

    Given an input object, encode it to CBOR recursively.

    Returns a generator of CBOR encoded bytes. There is no guarantee
    that each emitted chunk fully decodes to a value or sub-value.

    Encoding is deterministic - unordered collections are sorted.
    """
    fn = STREAM_ENCODERS.get(v.__class__)

    if not fn:
        # handle subtypes such as encoding.localstr and util.sortdict
        for ty in STREAM_ENCODERS:
            if not isinstance(v, ty):
                continue
            fn = STREAM_ENCODERS[ty]
            break

    if not fn:
        raise ValueError(b'do not know how to encode %s' % type(v))

    return fn(v)


class CBORDecodeError(Exception):
    """Represents an error decoding CBOR."""


if sys.version_info.major >= 3:

    def _elementtointeger(b, i):
        return b[i]


else:

    def _elementtointeger(b, i):
        return ord(b[i])


STRUCT_BIG_UBYTE = struct.Struct('>B')
STRUCT_BIG_USHORT = struct.Struct(b'>H')
STRUCT_BIG_ULONG = struct.Struct(b'>L')
STRUCT_BIG_ULONGLONG = struct.Struct(b'>Q')

SPECIAL_NONE = 0
SPECIAL_START_INDEFINITE_BYTESTRING = 1
SPECIAL_START_ARRAY = 2
SPECIAL_START_MAP = 3
SPECIAL_START_SET = 4
SPECIAL_INDEFINITE_BREAK = 5


def decodeitem(b, offset=0):
    """Decode a new CBOR value from a buffer at offset.

    This function attempts to decode up to one complete CBOR value
    from ``b`` starting at offset ``offset``.

    The beginning of a collection (such as an array, map, set, or
    indefinite length bytestring) counts as a single value. For these
    special cases, a state flag will indicate that a special value was seen.

    When called, the function either returns a decoded value or gives
    a hint as to how many more bytes are needed to do so. By calling
    the function repeatedly given a stream of bytes, the caller can
    build up the original values.

    Returns a tuple with the following elements:

    * Bool indicating whether a complete value was decoded.
    * A decoded value if first value is True otherwise None
    * Integer number of bytes. If positive, the number of bytes
      read. If negative, the number of bytes we need to read to
      decode this value or the next chunk in this value.
    * One of the ``SPECIAL_*`` constants indicating special treatment
      for this value. ``SPECIAL_NONE`` means this is a fully decoded
      simple value (such as an integer or bool).
    """

    initial = _elementtointeger(b, offset)
    offset += 1

    majortype = initial >> 5
    subtype = initial & SUBTYPE_MASK

    if majortype == MAJOR_TYPE_UINT:
        complete, value, readcount = decodeuint(subtype, b, offset)

        if complete:
            return True, value, readcount + 1, SPECIAL_NONE
        else:
            return False, None, readcount, SPECIAL_NONE

    elif majortype == MAJOR_TYPE_NEGINT:
        # Negative integers are the same as UINT except inverted minus 1.
        complete, value, readcount = decodeuint(subtype, b, offset)

        if complete:
            return True, -value - 1, readcount + 1, SPECIAL_NONE
        else:
            return False, None, readcount, SPECIAL_NONE

    elif majortype == MAJOR_TYPE_BYTESTRING:
        # Beginning of bytestrings are treated as uints in order to
        # decode their length, which may be indefinite.
        complete, size, readcount = decodeuint(
            subtype, b, offset, allowindefinite=True
        )

        # We don't know the size of the bytestring. It must be a definitive
        # length since the indefinite subtype would be encoded in the initial
        # byte.
        if not complete:
            return False, None, readcount, SPECIAL_NONE

        # We know the length of the bytestring.
        if size is not None:
            # And the data is available in the buffer.
            if offset + readcount + size <= len(b):
                value = b[offset + readcount : offset + readcount + size]
                return True, value, readcount + size + 1, SPECIAL_NONE

            # And we need more data in order to return the bytestring.
            else:
                wanted = len(b) - offset - readcount - size
                return False, None, wanted, SPECIAL_NONE

        # It is an indefinite length bytestring.
        else:
            return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING

    elif majortype == MAJOR_TYPE_STRING:
        raise CBORDecodeError(b'string major type not supported')

    elif majortype == MAJOR_TYPE_ARRAY:
        # Beginning of arrays are treated as uints in order to decode their
        # length. We don't allow indefinite length arrays.
        complete, size, readcount = decodeuint(subtype, b, offset)

        if complete:
            return True, size, readcount + 1, SPECIAL_START_ARRAY
        else:
            return False, None, readcount, SPECIAL_NONE

    elif majortype == MAJOR_TYPE_MAP:
        # Beginning of maps are treated as uints in order to decode their
        # number of elements. We don't allow indefinite length arrays.
        complete, size, readcount = decodeuint(subtype, b, offset)

        if complete:
            return True, size, readcount + 1, SPECIAL_START_MAP
        else:
            return False, None, readcount, SPECIAL_NONE

    elif majortype == MAJOR_TYPE_SEMANTIC:
        # Semantic tag value is read the same as a uint.
        complete, tagvalue, readcount = decodeuint(subtype, b, offset)

        if not complete:
            return False, None, readcount, SPECIAL_NONE

        # This behavior here is a little wonky. The main type being "decorated"
        # by this semantic tag follows. A more robust parser would probably emit
        # a special flag indicating this as a semantic tag and let the caller
        # deal with the types that follow. But since we don't support many
        # semantic tags, it is easier to deal with the special cases here and
        # hide complexity from the caller. If we add support for more semantic
        # tags, we should probably move semantic tag handling into the caller.
        if tagvalue == SEMANTIC_TAG_FINITE_SET:
            if offset + readcount >= len(b):
                return False, None, -1, SPECIAL_NONE

            complete, size, readcount2, special = decodeitem(
                b, offset + readcount
            )

            if not complete:
                return False, None, readcount2, SPECIAL_NONE

            if special != SPECIAL_START_ARRAY:
                raise CBORDecodeError(
                    b'expected array after finite set semantic tag'
                )

            return True, size, readcount + readcount2 + 1, SPECIAL_START_SET

        else:
            raise CBORDecodeError(b'semantic tag %d not allowed' % tagvalue)

    elif majortype == MAJOR_TYPE_SPECIAL:
        # Only specific values for the information field are allowed.
        if subtype == SUBTYPE_FALSE:
            return True, False, 1, SPECIAL_NONE
        elif subtype == SUBTYPE_TRUE:
            return True, True, 1, SPECIAL_NONE
        elif subtype == SUBTYPE_NULL:
            return True, None, 1, SPECIAL_NONE
        elif subtype == SUBTYPE_INDEFINITE:
            return True, None, 1, SPECIAL_INDEFINITE_BREAK
        # If value is 24, subtype is in next byte.
        else:
            raise CBORDecodeError(b'special type %d not allowed' % subtype)
    else:
        assert False


def decodeuint(subtype, b, offset=0, allowindefinite=False):
    """Decode an unsigned integer.

    ``subtype`` is the lower 5 bits from the initial byte CBOR item
    "header." ``b`` is a buffer containing bytes. ``offset`` points to
    the index of the first byte after the byte that ``subtype`` was
    derived from.

    ``allowindefinite`` allows the special indefinite length value
    indicator.

    Returns a 3-tuple of (successful, value, count).

    The first element is a bool indicating if decoding completed. The 2nd
    is the decoded integer value or None if not fully decoded or the subtype
    is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
    If positive, it is the number of additional bytes decoded. If negative,
    it is the number of additional bytes needed to decode this value.
    """

    # Small values are inline.
    if subtype < 24:
        return True, subtype, 0
    # Indefinite length specifier.
    elif subtype == 31:
        if allowindefinite:
            return True, None, 0
        else:
            raise CBORDecodeError(b'indefinite length uint not allowed here')
    elif subtype >= 28:
        raise CBORDecodeError(
            b'unsupported subtype on integer type: %d' % subtype
        )

    if subtype == 24:
        s = STRUCT_BIG_UBYTE
    elif subtype == 25:
        s = STRUCT_BIG_USHORT
    elif subtype == 26:
        s = STRUCT_BIG_ULONG
    elif subtype == 27:
        s = STRUCT_BIG_ULONGLONG
    else:
        raise CBORDecodeError(b'bounds condition checking violation')

    if len(b) - offset >= s.size:
        return True, s.unpack_from(b, offset)[0], s.size
    else:
        return False, None, len(b) - offset - s.size


class bytestringchunk(bytes):
    """Represents a chunk/segment in an indefinite length bytestring.

    This behaves like a ``bytes`` but in addition has the ``isfirst``
    and ``islast`` attributes indicating whether this chunk is the first
    or last in an indefinite length bytestring.
    """

    def __new__(cls, v, first=False, last=False):
        self = bytes.__new__(cls, v)
        self.isfirst = first
        self.islast = last

        return self


class sansiodecoder(object):
    """A CBOR decoder that doesn't perform its own I/O.

    To use, construct an instance and feed it segments containing
    CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
    indicates whether a fully-decoded value is available, how many bytes
    were consumed, and offers a hint as to how many bytes should be fed
    in next time to decode the next value.

    The decoder assumes it will decode N discrete CBOR values, not just
    a single value. i.e. if the bytestream contains uints packed one after
    the other, the decoder will decode them all, rather than just the initial
    one.

    When ``decode()`` indicates a value is available, call ``getavailable()``
    to return all fully decoded values.

    ``decode()`` can partially decode input. It is up to the caller to keep
    track of what data was consumed and to pass unconsumed data in on the
    next invocation.

    The decoder decodes atomically at the *item* level. See ``decodeitem()``.
    If an *item* cannot be fully decoded, the decoder won't record it as
    partially consumed. Instead, the caller will be instructed to pass in
    the initial bytes of this item on the next invocation. This does result
    in some redundant parsing. But the overhead should be minimal.

    This decoder only supports a subset of CBOR as required by Mercurial.
    It lacks support for:

    * Indefinite length arrays
    * Indefinite length maps
    * Use of indefinite length bytestrings as keys or values within
      arrays, maps, or sets.
    * Nested arrays, maps, or sets within sets
    * Any semantic tag that isn't a mathematical finite set
    * Floating point numbers
    * Undefined special value

    CBOR types are decoded to Python types as follows:

    uint -> int
    negint -> int
    bytestring -> bytes
    map -> dict
    array -> list
    True -> bool
    False -> bool
    null -> None
    indefinite length bytestring chunk -> [bytestringchunk]

    The only non-obvious mapping here is an indefinite length bytestring
    to the ``bytestringchunk`` type. This is to facilitate streaming
    indefinite length bytestrings out of the decoder and to differentiate
    a regular bytestring from an indefinite length bytestring.
    """

    _STATE_NONE = 0
    _STATE_WANT_MAP_KEY = 1
    _STATE_WANT_MAP_VALUE = 2
    _STATE_WANT_ARRAY_VALUE = 3
    _STATE_WANT_SET_VALUE = 4
    _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
    _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6

    def __init__(self):
        # TODO add support for limiting size of bytestrings
        # TODO add support for limiting number of keys / values in collections
        # TODO add support for limiting size of buffered partial values

        self.decodedbytecount = 0

        self._state = self._STATE_NONE

        # Stack of active nested collections. Each entry is a dict describing
        # the collection.
        self._collectionstack = []

        # Fully decoded key to use for the current map.
        self._currentmapkey = None

        # Fully decoded values available for retrieval.
        self._decodedvalues = []

    @property
    def inprogress(self):
        """Whether the decoder has partially decoded a value."""
        return self._state != self._STATE_NONE

    def decode(self, b, offset=0):
        """Attempt to decode bytes from an input buffer.

        ``b`` is a collection of bytes and ``offset`` is the byte
        offset within that buffer from which to begin reading data.

        ``b`` must support ``len()`` and accessing bytes slices via
        ``__slice__``. Typically ``bytes`` instances are used.

        Returns a tuple with the following fields:

        * Bool indicating whether values are available for retrieval.
        * Integer indicating the number of bytes that were fully consumed,
          starting from ``offset``.
        * Integer indicating the number of bytes that are desired for the
          next call in order to decode an item.
        """
        if not b:
            return bool(self._decodedvalues), 0, 0

        initialoffset = offset

        # We could easily split the body of this loop into a function. But
        # Python performance is sensitive to function calls and collections
        # are composed of many items. So leaving as a while loop could help
        # with performance. One thing that may not help is the use of
        # if..elif versus a lookup/dispatch table. There may be value
        # in switching that.
        while offset < len(b):
            # Attempt to decode an item. This could be a whole value or a
            # special value indicating an event, such as start or end of a
            # collection or indefinite length type.
            complete, value, readcount, special = decodeitem(b, offset)

            if readcount > 0:
                self.decodedbytecount += readcount

            if not complete:
                assert readcount < 0
                return (
                    bool(self._decodedvalues),
                    offset - initialoffset,
                    -readcount,
                )

            offset += readcount

            # No nested state. We either have a full value or beginning of a
            # complex value to deal with.
            if self._state == self._STATE_NONE:
                # A normal value.
                if special == SPECIAL_NONE:
                    self._decodedvalues.append(value)

                elif special == SPECIAL_START_ARRAY:
                    self._collectionstack.append(
                        {b'remaining': value, b'v': [],}
                    )
                    self._state = self._STATE_WANT_ARRAY_VALUE

                elif special == SPECIAL_START_MAP:
                    self._collectionstack.append(
                        {b'remaining': value, b'v': {},}
                    )
                    self._state = self._STATE_WANT_MAP_KEY

                elif special == SPECIAL_START_SET:
                    self._collectionstack.append(
                        {b'remaining': value, b'v': set(),}
                    )
                    self._state = self._STATE_WANT_SET_VALUE

                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                    self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST

                else:
                    raise CBORDecodeError(
                        b'unhandled special state: %d' % special
                    )

            # This value becomes an element of the current array.
            elif self._state == self._STATE_WANT_ARRAY_VALUE:
                # Simple values get appended.
                if special == SPECIAL_NONE:
                    c = self._collectionstack[-1]
                    c[b'v'].append(value)
                    c[b'remaining'] -= 1

                    # self._state doesn't need changed.

                # An array nested within an array.
                elif special == SPECIAL_START_ARRAY:
                    lastc = self._collectionstack[-1]
                    newvalue = []

                    lastc[b'v'].append(newvalue)
                    lastc[b'remaining'] -= 1

                    self._collectionstack.append(
                        {b'remaining': value, b'v': newvalue,}
                    )

                    # self._state doesn't need changed.

                # A map nested within an array.
                elif special == SPECIAL_START_MAP:
                    lastc = self._collectionstack[-1]
                    newvalue = {}

                    lastc[b'v'].append(newvalue)
                    lastc[b'remaining'] -= 1

                    self._collectionstack.append(
                        {b'remaining': value, b'v': newvalue}
                    )

                    self._state = self._STATE_WANT_MAP_KEY

                elif special == SPECIAL_START_SET:
                    lastc = self._collectionstack[-1]
                    newvalue = set()

                    lastc[b'v'].append(newvalue)
                    lastc[b'remaining'] -= 1

                    self._collectionstack.append(
                        {b'remaining': value, b'v': newvalue,}
                    )

                    self._state = self._STATE_WANT_SET_VALUE

                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                    raise CBORDecodeError(
                        b'indefinite length bytestrings '
                        b'not allowed as array values'
                    )

                else:
                    raise CBORDecodeError(
                        b'unhandled special item when '
                        b'expecting array value: %d' % special
                    )

            # This value becomes the key of the current map instance.
            elif self._state == self._STATE_WANT_MAP_KEY:
                if special == SPECIAL_NONE:
                    self._currentmapkey = value
                    self._state = self._STATE_WANT_MAP_VALUE

                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                    raise CBORDecodeError(
                        b'indefinite length bytestrings '
                        b'not allowed as map keys'
                    )

                elif special in (
                    SPECIAL_START_ARRAY,
                    SPECIAL_START_MAP,
                    SPECIAL_START_SET,
                ):
                    raise CBORDecodeError(
                        b'collections not supported as map keys'
                    )

                # We do not allow special values to be used as map keys.
                else:
                    raise CBORDecodeError(
                        b'unhandled special item when '
                        b'expecting map key: %d' % special
                    )

            # This value becomes the value of the current map key.
            elif self._state == self._STATE_WANT_MAP_VALUE:
                # Simple values simply get inserted into the map.
                if special == SPECIAL_NONE:
                    lastc = self._collectionstack[-1]
                    lastc[b'v'][self._currentmapkey] = value
                    lastc[b'remaining'] -= 1

                    self._state = self._STATE_WANT_MAP_KEY

                # A new array is used as the map value.
                elif special == SPECIAL_START_ARRAY:
                    lastc = self._collectionstack[-1]
                    newvalue = []

                    lastc[b'v'][self._currentmapkey] = newvalue
                    lastc[b'remaining'] -= 1

                    self._collectionstack.append(
                        {b'remaining': value, b'v': newvalue,}
                    )

                    self._state = self._STATE_WANT_ARRAY_VALUE

                # A new map is used as the map value.
                elif special == SPECIAL_START_MAP:
                    lastc = self._collectionstack[-1]
                    newvalue = {}

                    lastc[b'v'][self._currentmapkey] = newvalue
                    lastc[b'remaining'] -= 1

                    self._collectionstack.append(
                        {b'remaining': value, b'v': newvalue,}
                    )

                    self._state = self._STATE_WANT_MAP_KEY

                # A new set is used as the map value.
                elif special == SPECIAL_START_SET:
                    lastc = self._collectionstack[-1]
                    newvalue = set()

                    lastc[b'v'][self._currentmapkey] = newvalue
                    lastc[b'remaining'] -= 1

                    self._collectionstack.append(
                        {b'remaining': value, b'v': newvalue,}
                    )

                    self._state = self._STATE_WANT_SET_VALUE

                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                    raise CBORDecodeError(
                        b'indefinite length bytestrings not '
                        b'allowed as map values'
                    )

                else:
                    raise CBORDecodeError(
                        b'unhandled special item when '
                        b'expecting map value: %d' % special
                    )

                self._currentmapkey = None

            # This value is added to the current set.
            elif self._state == self._STATE_WANT_SET_VALUE:
                if special == SPECIAL_NONE:
                    lastc = self._collectionstack[-1]
                    lastc[b'v'].add(value)
                    lastc[b'remaining'] -= 1

                elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
                    raise CBORDecodeError(
                        b'indefinite length bytestrings not '
                        b'allowed as set values'
                    )

                elif special in (
                    SPECIAL_START_ARRAY,
                    SPECIAL_START_MAP,
                    SPECIAL_START_SET,
                ):
                    raise CBORDecodeError(
                        b'collections not allowed as set values'
                    )

                # We don't allow non-trivial types to exist as set values.
                else:
                    raise CBORDecodeError(
                        b'unhandled special item when '
                        b'expecting set value: %d' % special
                    )

            # This value represents the first chunk in an indefinite length
            # bytestring.
            elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
                # We received a full chunk.
                if special == SPECIAL_NONE:
                    self._decodedvalues.append(
                        bytestringchunk(value, first=True)
                    )

                    self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT

                # The end of stream marker. This means it is an empty
                # indefinite length bytestring.
                elif special == SPECIAL_INDEFINITE_BREAK:
                    # We /could/ convert this to a b''. But we want to preserve
                    # the nature of the underlying data so consumers expecting
                    # an indefinite length bytestring get one.
                    self._decodedvalues.append(
                        bytestringchunk(b'', first=True, last=True)
                    )

                    # Since indefinite length bytestrings can't be used in
                    # collections, we must be at the root level.
                    assert not self._collectionstack
                    self._state = self._STATE_NONE

                else:
                    raise CBORDecodeError(
                        b'unexpected special value when '
                        b'expecting bytestring chunk: %d' % special
                    )

            # This value represents the non-initial chunk in an indefinite
            # length bytestring.
            elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
                # We received a full chunk.
                if special == SPECIAL_NONE:
                    self._decodedvalues.append(bytestringchunk(value))

                # The end of stream marker.
                elif special == SPECIAL_INDEFINITE_BREAK:
                    self._decodedvalues.append(bytestringchunk(b'', last=True))

                    # Since indefinite length bytestrings can't be used in
                    # collections, we must be at the root level.
                    assert not self._collectionstack
                    self._state = self._STATE_NONE

                else:
                    raise CBORDecodeError(
                        b'unexpected special value when '
                        b'expecting bytestring chunk: %d' % special
                    )

            else:
                raise CBORDecodeError(
                    b'unhandled decoder state: %d' % self._state
                )

            # We could have just added the final value in a collection. End
            # all complete collections at the top of the stack.
            while True:
                # Bail if we're not waiting on a new collection item.
                if self._state not in (
                    self._STATE_WANT_ARRAY_VALUE,
                    self._STATE_WANT_MAP_KEY,
                    self._STATE_WANT_SET_VALUE,
                ):
                    break

                # Or we are expecting more items for this collection.
                lastc = self._collectionstack[-1]

                if lastc[b'remaining']:
                    break

                # The collection at the top of the stack is complete.

                # Discard it, as it isn't needed for future items.
                self._collectionstack.pop()

                # If this is a nested collection, we don't emit it, since it
                # will be emitted by its parent collection. But we do need to
                # update state to reflect what the new top-most collection
                # on the stack is.
                if self._collectionstack:
                    self._state = {
                        list: self._STATE_WANT_ARRAY_VALUE,
                        dict: self._STATE_WANT_MAP_KEY,
                        set: self._STATE_WANT_SET_VALUE,
                    }[type(self._collectionstack[-1][b'v'])]

                # If this is the root collection, emit it.
                else:
                    self._decodedvalues.append(lastc[b'v'])
                    self._state = self._STATE_NONE

        return (
            bool(self._decodedvalues),
            offset - initialoffset,
            0,
        )

    def getavailable(self):
        """Returns an iterator over fully decoded values.

        Once values are retrieved, they won't be available on the next call.
        """

        l = list(self._decodedvalues)
        self._decodedvalues = []
        return l


class bufferingdecoder(object):
    """A CBOR decoder that buffers undecoded input.

    This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
    layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
    and concatenated with any new input that arrives later.

    TODO consider adding limits as to the maximum amount of data that can
    be buffered.
    """

    def __init__(self):
        self._decoder = sansiodecoder()
        self._chunks = []
        self._wanted = 0

    def decode(self, b):
        """Attempt to decode bytes to CBOR values.

        Returns a tuple with the following fields:

        * Bool indicating whether new values are available for retrieval.
        * Integer number of bytes decoded from the new input.
        * Integer number of bytes wanted to decode the next value.
        """
        # We /might/ be able to support passing a bytearray all the
        # way through. For now, let's cheat.
        if isinstance(b, bytearray):
            b = bytes(b)

        # Our strategy for buffering is to aggregate the incoming chunks in a
        # list until we've received enough data to decode the next item.
        # This is slightly more complicated than using an ``io.BytesIO``
        # or continuously concatenating incoming data. However, because it
        # isn't constantly reallocating backing memory for a growing buffer,
        # it prevents excessive memory thrashing and is significantly faster,
        # especially in cases where the percentage of input chunks that don't
        # decode into a full item is high.

        if self._chunks:
            # A previous call said we needed N bytes to decode the next item.
            # But this call doesn't provide enough data. We buffer the incoming
            # chunk without attempting to decode.
            if len(b) < self._wanted:
                self._chunks.append(b)
                self._wanted -= len(b)
                return False, 0, self._wanted

            # Else we may have enough data to decode the next item. Aggregate
            # old data with new and reset the buffer.
            newlen = len(b)
            self._chunks.append(b)
            b = b''.join(self._chunks)
            self._chunks = []
            oldlen = len(b) - newlen

        else:
            oldlen = 0

        available, readcount, wanted = self._decoder.decode(b)
        self._wanted = wanted

        if readcount < len(b):
            self._chunks.append(b[readcount:])

        return available, readcount - oldlen, wanted

    def getavailable(self):
        return self._decoder.getavailable()


def decodeall(b):
    """Decode all CBOR items present in an iterable of bytes.

    In addition to regular decode errors, raises CBORDecodeError if the
    entirety of the passed buffer does not fully decode to complete CBOR
    values. This includes failure to decode any value, incomplete collection
    types, incomplete indefinite length items, and extra data at the end of
    the buffer.
    """
    if not b:
        return []

    decoder = sansiodecoder()

    havevalues, readcount, wantbytes = decoder.decode(b)

    if readcount != len(b):
        raise CBORDecodeError(b'input data not fully consumed')

    if decoder.inprogress:
        raise CBORDecodeError(b'input data not complete')

    return decoder.getavailable()