diff contrib/python-zstandard/zstd.c @ 37495:b1fb341d8a61

zstandard: vendor python-zstandard 0.9.0 This was just released. It features a number of goodies. More info at https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.4. The old version was 1.1.3. One of the changes between those versions is that zstandard is now dual licensed BSD + GPLv2 and the patent rights grant has been removed. Good riddance. The API should be backwards compatible. So no changes in core should be needed. However, there were a number of changes in the library that we'll want to adapt to. Those will be addressed in subsequent commits. Differential Revision: https://phab.mercurial-scm.org/D3198
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 09 Apr 2018 10:13:29 -0700
parents 39d36c2db68e
children 73fef626dae3
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd.c	Sun Apr 08 01:08:43 2018 +0200
+++ b/contrib/python-zstandard/zstd.c	Mon Apr 09 10:13:29 2018 -0700
@@ -20,12 +20,6 @@
 
 PyObject *ZstdError;
 
-PyDoc_STRVAR(estimate_compression_context_size__doc__,
-"estimate_compression_context_size(compression_parameters)\n"
-"\n"
-"Give the amount of memory allocated for a compression context given a\n"
-"CompressionParameters instance");
-
 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
 "estimate_decompression_context_size()\n"
 "\n"
@@ -36,11 +30,101 @@
 	return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
 }
 
-PyDoc_STRVAR(get_compression_parameters__doc__,
-"get_compression_parameters(compression_level[, source_size[, dict_size]])\n"
+PyDoc_STRVAR(frame_content_size__doc__,
+"frame_content_size(data)\n"
 "\n"
-"Obtains a ``CompressionParameters`` instance from a compression level and\n"
-"optional input size and dictionary size");
+"Obtain the decompressed size of a frame."
+);
+
+static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
+	static char* kwlist[] = {
+		"source",
+		NULL
+	};
+
+	Py_buffer source;
+	PyObject* result = NULL;
+	unsigned long long size;
+
+#if PY_MAJOR_VERSION >= 3
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
+#else
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
+#endif
+		kwlist, &source)) {
+		return NULL;
+	}
+
+	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+		PyErr_SetString(PyExc_ValueError,
+			"data buffer should be contiguous and have at most one dimension");
+		goto finally;
+	}
+
+	size = ZSTD_getFrameContentSize(source.buf, source.len);
+
+	if (size == ZSTD_CONTENTSIZE_ERROR) {
+		PyErr_SetString(ZstdError, "error when determining content size");
+	}
+	else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
+		result = PyLong_FromLong(-1);
+	}
+	else {
+		result = PyLong_FromUnsignedLongLong(size);
+	}
+
+finally:
+	PyBuffer_Release(&source);
+
+	return result;
+}
+
+PyDoc_STRVAR(frame_header_size__doc__,
+"frame_header_size(data)\n"
+"\n"
+"Obtain the size of a frame header.\n"
+);
+
+static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
+	static char* kwlist[] = {
+		"source",
+		NULL
+	};
+
+	Py_buffer source;
+	PyObject* result = NULL;
+	size_t zresult;
+
+#if PY_MAJOR_VERSION >= 3
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
+#else
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
+#endif
+		kwlist, &source)) {
+		return NULL;
+	}
+
+	if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+		PyErr_SetString(PyExc_ValueError,
+			"data buffer should be contiguous and have at most one dimension");
+		goto finally;
+	}
+
+	zresult = ZSTD_frameHeaderSize(source.buf, source.len);
+	if (ZSTD_isError(zresult)) {
+		PyErr_Format(ZstdError, "could not determine frame header size: %s",
+			ZSTD_getErrorName(zresult));
+	}
+	else {
+		result = PyLong_FromSize_t(zresult);
+	}
+
+finally:
+
+	PyBuffer_Release(&source);
+
+	return result;
+}
 
 PyDoc_STRVAR(get_frame_parameters__doc__,
 "get_frame_parameters(data)\n"
@@ -48,43 +132,48 @@
 "Obtains a ``FrameParameters`` instance by parsing data.\n");
 
 PyDoc_STRVAR(train_dictionary__doc__,
-"train_dictionary(dict_size, samples)\n"
-"\n"
-"Train a dictionary from sample data.\n"
-"\n"
-"A compression dictionary of size ``dict_size`` will be created from the\n"
-"iterable of samples provided by ``samples``.\n"
-"\n"
-"The raw dictionary content will be returned\n");
-
-PyDoc_STRVAR(train_cover_dictionary__doc__,
-"train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n"
+"train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
+"                 threads=None,notifications=0, dict_id=0, level=0)\n"
 "\n"
 "Train a dictionary from sample data using the COVER algorithm.\n"
 "\n"
-"This behaves like ``train_dictionary()`` except a different algorithm is\n"
-"used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n"
-"``d``. These control the *segment size* and *dmer size*. A reasonable range\n"
-"for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
+"A compression dictionary of size ``dict_size`` will be created from the\n"
+"iterable of ``samples``. The raw dictionary bytes will be returned.\n"
+"\n"
+"The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
+"*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
+"``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
 "``d`` must be less than or equal to ``k``.\n"
+"\n"
+"``steps`` can be specified to control the number of steps through potential\n"
+"values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
+"those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
+"will be varied in this mode.\n"
+"\n"
+"``threads`` can specify how many threads to use to test various ``k`` and\n"
+"``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
+"a single thread is used.\n"
+"\n"
+"When ``k`` and ``d`` are not defined, default values are used and the\n"
+"algorithm will perform multiple iterations - or steps - to try to find\n"
+"ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
+"will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
+"multiple ``k`` and ``d`` variations.\n"
 );
 
 static char zstd_doc[] = "Interface to zstandard";
 
 static PyMethodDef zstd_methods[] = {
-	/* TODO remove since it is a method on CompressionParameters. */
-	{ "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size,
-	METH_VARARGS, estimate_compression_context_size__doc__ },
 	{ "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
 	METH_NOARGS, estimate_decompression_context_size__doc__ },
-	{ "get_compression_parameters", (PyCFunction)get_compression_parameters,
-	METH_VARARGS, get_compression_parameters__doc__ },
+	{ "frame_content_size", (PyCFunction)frame_content_size,
+	METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
+	{ "frame_header_size", (PyCFunction)frame_header_size,
+	METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
 	{ "get_frame_parameters", (PyCFunction)get_frame_parameters,
-	METH_VARARGS, get_frame_parameters__doc__ },
+	METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
 	{ "train_dictionary", (PyCFunction)train_dictionary,
 	METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
-	{ "train_cover_dictionary", (PyCFunction)train_cover_dictionary,
-	METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ },
 	{ NULL, NULL }
 };
 
@@ -94,10 +183,12 @@
 void compressionparams_module_init(PyObject* mod);
 void constants_module_init(PyObject* mod);
 void compressiondict_module_init(PyObject* mod);
+void compressionreader_module_init(PyObject* mod);
 void compressionwriter_module_init(PyObject* mod);
 void compressoriterator_module_init(PyObject* mod);
 void decompressor_module_init(PyObject* mod);
 void decompressobj_module_init(PyObject* mod);
+void decompressionreader_module_init(PyObject *mod);
 void decompressionwriter_module_init(PyObject* mod);
 void decompressoriterator_module_init(PyObject* mod);
 void frameparams_module_init(PyObject* mod);
@@ -118,7 +209,7 @@
 	   We detect this mismatch here and refuse to load the module if this
 	   scenario is detected.
 	*/
-	if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) {
+	if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) {
 		PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
 		return;
 	}
@@ -128,16 +219,24 @@
 	compressiondict_module_init(m);
 	compressobj_module_init(m);
 	compressor_module_init(m);
+	compressionreader_module_init(m);
 	compressionwriter_module_init(m);
 	compressoriterator_module_init(m);
 	constants_module_init(m);
 	decompressor_module_init(m);
 	decompressobj_module_init(m);
+	decompressionreader_module_init(m);
 	decompressionwriter_module_init(m);
 	decompressoriterator_module_init(m);
 	frameparams_module_init(m);
 }
 
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#  define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
+#else
+#  define PYTHON_ZSTD_VISIBILITY
+#endif
+
 #if PY_MAJOR_VERSION >= 3
 static struct PyModuleDef zstd_module = {
 	PyModuleDef_HEAD_INIT,
@@ -147,7 +246,7 @@
 	zstd_methods
 };
 
-PyMODINIT_FUNC PyInit_zstd(void) {
+PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
 	PyObject *m = PyModule_Create(&zstd_module);
 	if (m) {
 		zstd_module_init(m);
@@ -159,7 +258,7 @@
 	return m;
 }
 #else
-PyMODINIT_FUNC initzstd(void) {
+PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
 	PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
 	if (m) {
 		zstd_module_init(m);
@@ -211,3 +310,33 @@
 
 	return i;
 }
+
+/* Safer version of _PyBytes_Resize().
+ *
+ * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
+ * we can get an object with a refcount > 1, even if it was just created
+ * with PyBytes_FromStringAndSize()! That's because (at least) CPython
+ * pre-allocates PyBytes instances of size 1 for every possible byte value.
+ *
+ * If non-0 is returned, obj may or may not be NULL.
+ */
+int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
+	PyObject* tmp;
+
+	if ((*obj)->ob_refcnt == 1) {
+		return _PyBytes_Resize(obj, size);
+	}
+
+	tmp = PyBytes_FromStringAndSize(NULL, size);
+	if (!tmp) {
+		return -1;
+	}
+
+	memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
+		PyBytes_GET_SIZE(*obj));
+
+	Py_DECREF(*obj);
+	*obj = tmp;
+
+	return 0;
+}
\ No newline at end of file