mercurial-scm/hg: comparison mercurial/utils/cborutil.py

equal deleted inserted replaced

-:0a5fe2a08e82
+:65a23cc8e75b
+# cborutil.py - CBOR extensions
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+import struct
+from ..thirdparty.cbor.cbor2 import (
+decoder as decodermod,
+)
+# Very short very of RFC 7049...
+#
+# Each item begins with a byte. The 3 high bits of that byte denote the
+# "major type." The lower 5 bits denote the "subtype." Each major type
+# has its own encoding mechanism.
+#
+# Most types have lengths. However, bytestring, string, array, and map
+# can be indefinite length. These are denotes by a subtype with value 31.
+# Sub-components of those types then come afterwards and are terminated
+# by a "break" byte.
+MAJOR_TYPE_UINT = 0
+MAJOR_TYPE_NEGINT = 1
+MAJOR_TYPE_BYTESTRING = 2
+MAJOR_TYPE_STRING = 3
+MAJOR_TYPE_ARRAY = 4
+MAJOR_TYPE_MAP = 5
+MAJOR_TYPE_SEMANTIC = 6
+MAJOR_TYPE_SPECIAL = 7
+SUBTYPE_MASK = 0b00011111
+SUBTYPE_HALF_FLOAT = 25
+SUBTYPE_SINGLE_FLOAT = 26
+SUBTYPE_DOUBLE_FLOAT = 27
+SUBTYPE_INDEFINITE = 31
+# Indefinite types begin with their major type ORd with information value 31.
+BEGIN_INDEFINITE_BYTESTRING = struct.pack(
+r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
+BEGIN_INDEFINITE_ARRAY = struct.pack(
+r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
+BEGIN_INDEFINITE_MAP = struct.pack(
+r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
+ENCODED_LENGTH_1 = struct.Struct(r'>B')
+ENCODED_LENGTH_2 = struct.Struct(r'>BB')
+ENCODED_LENGTH_3 = struct.Struct(r'>BH')
+ENCODED_LENGTH_4 = struct.Struct(r'>BL')
+ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
+# The break ends an indefinite length item.
+BREAK = b'\xff'
+BREAK_INT = 255
+def encodelength(majortype, length):
+"""Obtain a value encoding the major type and its length."""
+if length < 24:
+return ENCODED_LENGTH_1.pack(majortype << 5 | length)
+elif length < 256:
+return ENCODED_LENGTH_2.pack(majortype << 5 | 24, length)
+elif length < 65536:
+return ENCODED_LENGTH_3.pack(majortype << 5 | 25, length)
+elif length < 4294967296:
+return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
+else:
+return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
+def streamencodebytestring(v):
+yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
+yield v
+def streamencodebytestringfromiter(it):
+"""Convert an iterator of chunks to an indefinite bytestring.
+Given an input that is iterable and each element in the iterator is
+representable as bytes, emit an indefinite length bytestring.
+"""
+yield BEGIN_INDEFINITE_BYTESTRING
+for chunk in it:
+yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
+yield chunk
+yield BREAK
+def streamencodeindefinitebytestring(source, chunksize=65536):
+"""Given a large source buffer, emit as an indefinite length bytestring.
+This is a generator of chunks constituting the encoded CBOR data.
+"""
+yield BEGIN_INDEFINITE_BYTESTRING
+i = 0
+l = len(source)
+while True:
+chunk = source[i:i + chunksize]
+i += len(chunk)
+yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
+yield chunk
+if i >= l:
+break
+yield BREAK
+def streamencodeint(v):
+if v >= 18446744073709551616 or v < -18446744073709551616:
+raise ValueError('big integers not supported')
+if v >= 0:
+yield encodelength(MAJOR_TYPE_UINT, v)
+else:
+yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
+def streamencodearray(l):
+"""Encode a known size iterable to an array."""
+yield encodelength(MAJOR_TYPE_ARRAY, len(l))
+for i in l:
+for chunk in streamencode(i):
+yield chunk
+def streamencodearrayfromiter(it):
+"""Encode an iterator of items to an indefinite length array."""
+yield BEGIN_INDEFINITE_ARRAY
+for i in it:
+for chunk in streamencode(i):
+yield chunk
+yield BREAK
+def streamencodeset(s):
+# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
+# semantic tag 258 for finite sets.
+yield encodelength(MAJOR_TYPE_SEMANTIC, 258)
+for chunk in streamencodearray(sorted(s)):
+yield chunk
+def streamencodemap(d):
+"""Encode dictionary to a generator.
+Does not supporting indefinite length dictionaries.
+"""
+yield encodelength(MAJOR_TYPE_MAP, len(d))
+for key, value in sorted(d.iteritems()):
+for chunk in streamencode(key):
+yield chunk
+for chunk in streamencode(value):
+yield chunk
+def streamencodemapfromiter(it):
+"""Given an iterable of (key, value), encode to an indefinite length map."""
+yield BEGIN_INDEFINITE_MAP
+for key, value in it:
+for chunk in streamencode(key):
+yield chunk
+for chunk in streamencode(value):
+yield chunk
+yield BREAK
+def streamencodebool(b):
+# major type 7, simple value 20 and 21.
+yield b'\xf5' if b else b'\xf4'
+def streamencodenone(v):
+# major type 7, simple value 22.
+yield b'\xf6'
+STREAM_ENCODERS = {
+bytes: streamencodebytestring,
+int: streamencodeint,
+list: streamencodearray,
+tuple: streamencodearray,
+dict: streamencodemap,
+set: streamencodeset,
+bool: streamencodebool,
+type(None): streamencodenone,
+}
+def streamencode(v):
+"""Encode a value in a streaming manner.
+Given an input object, encode it to CBOR recursively.
+Returns a generator of CBOR encoded bytes. There is no guarantee
+that each emitted chunk fully decodes to a value or sub-value.
+Encoding is deterministic - unordered collections are sorted.
+"""
+fn = STREAM_ENCODERS.get(v.__class__)
+if not fn:
+raise ValueError('do not know how to encode %s' % type(v))
+return fn(v)
+def readindefinitebytestringtoiter(fh, expectheader=True):
+"""Read an indefinite bytestring to a generator.
+Receives an object with a ``read(X)`` method to read N bytes.
+If ``expectheader`` is True, it is expected that the first byte read
+will represent an indefinite length bytestring. Otherwise, we
+expect the first byte to be part of the first bytestring chunk.
+"""
+read = fh.read
+decodeuint = decodermod.decode_uint
+byteasinteger = decodermod.byte_as_integer
+if expectheader:
+initial = decodermod.byte_as_integer(read(1))
+majortype = initial >> 5
+subtype = initial & SUBTYPE_MASK
+if majortype != MAJOR_TYPE_BYTESTRING:
+raise decodermod.CBORDecodeError(
+'expected major type %d; got %d' % (MAJOR_TYPE_BYTESTRING,
+majortype))
+if subtype != SUBTYPE_INDEFINITE:
+raise decodermod.CBORDecodeError(
+'expected indefinite subtype; got %d' % subtype)
+# The indefinite bytestring is composed of chunks of normal bytestrings.
+# Read chunks until we hit a BREAK byte.
+while True:
+# We need to sniff for the BREAK byte.
+initial = byteasinteger(read(1))
+if initial == BREAK_INT:
+break
+length = decodeuint(fh, initial & SUBTYPE_MASK)
+chunk = read(length)
+if len(chunk) != length:
+raise decodermod.CBORDecodeError(
+'failed to read bytestring chunk: got %d bytes; expected %d' % (
+len(chunk), length))
+yield chunk

changeset 37711	65a23cc8e75b
child 37898	2ae6a3134362