Mercurial > public > mercurial-scm > hg-stable
diff contrib/python-zstandard/zstd.c @ 37495:b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
This was just released. It features a number of goodies. More info at
https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/.
The clang-format ignore list was updated to reflect the new source
of files.
The project contains a vendored copy of zstandard 1.3.4. The old
version was 1.1.3. One of the changes between those versions is that
zstandard is now dual licensed BSD + GPLv2 and the patent rights grant
has been removed. Good riddance.
The API should be backwards compatible. So no changes in core
should be needed. However, there were a number of changes in the
library that we'll want to adapt to. Those will be addressed in
subsequent commits.
Differential Revision: https://phab.mercurial-scm.org/D3198
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 09 Apr 2018 10:13:29 -0700 |
parents | 39d36c2db68e |
children | 73fef626dae3 |
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd.c Sun Apr 08 01:08:43 2018 +0200 +++ b/contrib/python-zstandard/zstd.c Mon Apr 09 10:13:29 2018 -0700 @@ -20,12 +20,6 @@ PyObject *ZstdError; -PyDoc_STRVAR(estimate_compression_context_size__doc__, -"estimate_compression_context_size(compression_parameters)\n" -"\n" -"Give the amount of memory allocated for a compression context given a\n" -"CompressionParameters instance"); - PyDoc_STRVAR(estimate_decompression_context_size__doc__, "estimate_decompression_context_size()\n" "\n" @@ -36,11 +30,101 @@ return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); } -PyDoc_STRVAR(get_compression_parameters__doc__, -"get_compression_parameters(compression_level[, source_size[, dict_size]])\n" +PyDoc_STRVAR(frame_content_size__doc__, +"frame_content_size(data)\n" "\n" -"Obtains a ``CompressionParameters`` instance from a compression level and\n" -"optional input size and dictionary size"); +"Obtain the decompressed size of a frame." +); + +static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) { + static char* kwlist[] = { + "source", + NULL + }; + + Py_buffer source; + PyObject* result = NULL; + unsigned long long size; + +#if PY_MAJOR_VERSION >= 3 + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size", +#else + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size", +#endif + kwlist, &source)) { + return NULL; + } + + if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { + PyErr_SetString(PyExc_ValueError, + "data buffer should be contiguous and have at most one dimension"); + goto finally; + } + + size = ZSTD_getFrameContentSize(source.buf, source.len); + + if (size == ZSTD_CONTENTSIZE_ERROR) { + PyErr_SetString(ZstdError, "error when determining content size"); + } + else if (size == ZSTD_CONTENTSIZE_UNKNOWN) { + result = PyLong_FromLong(-1); + } + else { + result = PyLong_FromUnsignedLongLong(size); + } + +finally: + PyBuffer_Release(&source); + + return result; +} + +PyDoc_STRVAR(frame_header_size__doc__, +"frame_header_size(data)\n" +"\n" +"Obtain the size of a frame header.\n" +); + +static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) { + static char* kwlist[] = { + "source", + NULL + }; + + Py_buffer source; + PyObject* result = NULL; + size_t zresult; + +#if PY_MAJOR_VERSION >= 3 + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size", +#else + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size", +#endif + kwlist, &source)) { + return NULL; + } + + if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { + PyErr_SetString(PyExc_ValueError, + "data buffer should be contiguous and have at most one dimension"); + goto finally; + } + + zresult = ZSTD_frameHeaderSize(source.buf, source.len); + if (ZSTD_isError(zresult)) { + PyErr_Format(ZstdError, "could not determine frame header size: %s", + ZSTD_getErrorName(zresult)); + } + else { + result = PyLong_FromSize_t(zresult); + } + +finally: + + PyBuffer_Release(&source); + + return result; +} PyDoc_STRVAR(get_frame_parameters__doc__, "get_frame_parameters(data)\n" @@ -48,43 +132,48 @@ "Obtains a ``FrameParameters`` instance by parsing data.\n"); PyDoc_STRVAR(train_dictionary__doc__, -"train_dictionary(dict_size, samples)\n" -"\n" -"Train a dictionary from sample data.\n" -"\n" -"A compression dictionary of size ``dict_size`` will be created from the\n" -"iterable of samples provided by ``samples``.\n" -"\n" -"The raw dictionary content will be returned\n"); - -PyDoc_STRVAR(train_cover_dictionary__doc__, -"train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n" +"train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n" +" threads=None,notifications=0, dict_id=0, level=0)\n" "\n" "Train a dictionary from sample data using the COVER algorithm.\n" "\n" -"This behaves like ``train_dictionary()`` except a different algorithm is\n" -"used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n" -"``d``. These control the *segment size* and *dmer size*. A reasonable range\n" -"for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" +"A compression dictionary of size ``dict_size`` will be created from the\n" +"iterable of ``samples``. The raw dictionary bytes will be returned.\n" +"\n" +"The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n" +"*segment size* and *dmer size*. A reasonable range for ``k`` is\n" +"``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" "``d`` must be less than or equal to ``k``.\n" +"\n" +"``steps`` can be specified to control the number of steps through potential\n" +"values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n" +"those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n" +"will be varied in this mode.\n" +"\n" +"``threads`` can specify how many threads to use to test various ``k`` and\n" +"``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n" +"a single thread is used.\n" +"\n" +"When ``k`` and ``d`` are not defined, default values are used and the\n" +"algorithm will perform multiple iterations - or steps - to try to find\n" +"ideal parameters. If both ``k`` and ``d`` are specified, then those values\n" +"will be used. ``steps`` or ``threads`` triggers optimization mode to test\n" +"multiple ``k`` and ``d`` variations.\n" ); static char zstd_doc[] = "Interface to zstandard"; static PyMethodDef zstd_methods[] = { - /* TODO remove since it is a method on CompressionParameters. */ - { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size, - METH_VARARGS, estimate_compression_context_size__doc__ }, { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, METH_NOARGS, estimate_decompression_context_size__doc__ }, - { "get_compression_parameters", (PyCFunction)get_compression_parameters, - METH_VARARGS, get_compression_parameters__doc__ }, + { "frame_content_size", (PyCFunction)frame_content_size, + METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ }, + { "frame_header_size", (PyCFunction)frame_header_size, + METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ }, { "get_frame_parameters", (PyCFunction)get_frame_parameters, - METH_VARARGS, get_frame_parameters__doc__ }, + METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ }, { "train_dictionary", (PyCFunction)train_dictionary, METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, - { "train_cover_dictionary", (PyCFunction)train_cover_dictionary, - METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ }, { NULL, NULL } }; @@ -94,10 +183,12 @@ void compressionparams_module_init(PyObject* mod); void constants_module_init(PyObject* mod); void compressiondict_module_init(PyObject* mod); +void compressionreader_module_init(PyObject* mod); void compressionwriter_module_init(PyObject* mod); void compressoriterator_module_init(PyObject* mod); void decompressor_module_init(PyObject* mod); void decompressobj_module_init(PyObject* mod); +void decompressionreader_module_init(PyObject *mod); void decompressionwriter_module_init(PyObject* mod); void decompressoriterator_module_init(PyObject* mod); void frameparams_module_init(PyObject* mod); @@ -118,7 +209,7 @@ We detect this mismatch here and refuse to load the module if this scenario is detected. */ - if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) { + if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) { PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); return; } @@ -128,16 +219,24 @@ compressiondict_module_init(m); compressobj_module_init(m); compressor_module_init(m); + compressionreader_module_init(m); compressionwriter_module_init(m); compressoriterator_module_init(m); constants_module_init(m); decompressor_module_init(m); decompressobj_module_init(m); + decompressionreader_module_init(m); decompressionwriter_module_init(m); decompressoriterator_module_init(m); frameparams_module_init(m); } +#if defined(__GNUC__) && (__GNUC__ >= 4) +# define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default"))) +#else +# define PYTHON_ZSTD_VISIBILITY +#endif + #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef zstd_module = { PyModuleDef_HEAD_INIT, @@ -147,7 +246,7 @@ zstd_methods }; -PyMODINIT_FUNC PyInit_zstd(void) { +PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) { PyObject *m = PyModule_Create(&zstd_module); if (m) { zstd_module_init(m); @@ -159,7 +258,7 @@ return m; } #else -PyMODINIT_FUNC initzstd(void) { +PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) { PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); if (m) { zstd_module_init(m); @@ -211,3 +310,33 @@ return i; } + +/* Safer version of _PyBytes_Resize(). + * + * _PyBytes_Resize() only works if the refcount is 1. In some scenarios, + * we can get an object with a refcount > 1, even if it was just created + * with PyBytes_FromStringAndSize()! That's because (at least) CPython + * pre-allocates PyBytes instances of size 1 for every possible byte value. + * + * If non-0 is returned, obj may or may not be NULL. + */ +int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) { + PyObject* tmp; + + if ((*obj)->ob_refcnt == 1) { + return _PyBytes_Resize(obj, size); + } + + tmp = PyBytes_FromStringAndSize(NULL, size); + if (!tmp) { + return -1; + } + + memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj), + PyBytes_GET_SIZE(*obj)); + + Py_DECREF(*obj); + *obj = tmp; + + return 0; +} \ No newline at end of file