diff contrib/python-zstandard/c-ext/decompressionreader.c @ 37495:b1fb341d8a61

zstandard: vendor python-zstandard 0.9.0 This was just released. It features a number of goodies. More info at https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.4. The old version was 1.1.3. One of the changes between those versions is that zstandard is now dual licensed BSD + GPLv2 and the patent rights grant has been removed. Good riddance. The API should be backwards compatible. So no changes in core should be needed. However, there were a number of changes in the library that we'll want to adapt to. Those will be addressed in subsequent commits. Differential Revision: https://phab.mercurial-scm.org/D3198
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 09 Apr 2018 10:13:29 -0700
parents
children 73fef626dae3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/c-ext/decompressionreader.c	Mon Apr 09 10:13:29 2018 -0700
@@ -0,0 +1,459 @@
+/**
+* Copyright (c) 2017-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+static void set_unsupported_operation(void) {
+	PyObject* iomod;
+	PyObject* exc;
+
+	iomod = PyImport_ImportModule("io");
+	if (NULL == iomod) {
+		return;
+	}
+
+	exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
+	if (NULL == exc) {
+		Py_DECREF(iomod);
+		return;
+	}
+
+	PyErr_SetNone(exc);
+	Py_DECREF(exc);
+	Py_DECREF(iomod);
+}
+
+static void reader_dealloc(ZstdDecompressionReader* self) {
+	Py_XDECREF(self->decompressor);
+	Py_XDECREF(self->reader);
+
+	if (self->buffer.buf) {
+		PyBuffer_Release(&self->buffer);
+	}
+
+	PyObject_Del(self);
+}
+
+static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
+	if (self->entered) {
+		PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
+		return NULL;
+	}
+
+	if (ensure_dctx(self->decompressor, 1)) {
+		return NULL;
+	}
+
+	self->entered = 1;
+
+	Py_INCREF(self);
+	return self;
+}
+
+static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
+	PyObject* exc_type;
+	PyObject* exc_value;
+	PyObject* exc_tb;
+
+	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
+		return NULL;
+	}
+
+	self->entered = 0;
+	self->closed = 1;
+
+	/* Release resources. */
+	Py_CLEAR(self->reader);
+	if (self->buffer.buf) {
+		PyBuffer_Release(&self->buffer);
+		memset(&self->buffer, 0, sizeof(self->buffer));
+	}
+
+	Py_CLEAR(self->decompressor);
+
+	Py_RETURN_FALSE;
+}
+
+static PyObject* reader_readable(PyObject* self) {
+	Py_RETURN_TRUE;
+}
+
+static PyObject* reader_writable(PyObject* self) {
+	Py_RETURN_FALSE;
+}
+
+static PyObject* reader_seekable(PyObject* self) {
+	Py_RETURN_TRUE;
+}
+
+static PyObject* reader_close(ZstdDecompressionReader* self) {
+	self->closed = 1;
+	Py_RETURN_NONE;
+}
+
+static PyObject* reader_closed(ZstdDecompressionReader* self) {
+	if (self->closed) {
+		Py_RETURN_TRUE;
+	}
+	else {
+		Py_RETURN_FALSE;
+	}
+}
+
+static PyObject* reader_flush(PyObject* self) {
+	Py_RETURN_NONE;
+}
+
+static PyObject* reader_isatty(PyObject* self) {
+	Py_RETURN_FALSE;
+}
+
+static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
+	static char* kwlist[] = {
+		"size",
+		NULL
+	};
+
+	Py_ssize_t size = -1;
+	PyObject* result = NULL;
+	char* resultBuffer;
+	Py_ssize_t resultSize;
+	ZSTD_outBuffer output;
+	size_t zresult;
+
+	if (!self->entered) {
+		PyErr_SetString(ZstdError, "read() must be called from an active context manager");
+		return NULL;
+	}
+
+	if (self->closed) {
+		PyErr_SetString(PyExc_ValueError, "stream is closed");
+		return NULL;
+	}
+
+	if (self->finishedOutput) {
+		return PyBytes_FromStringAndSize("", 0);
+	}
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
+		return NULL;
+	}
+
+	if (size < 1) {
+		PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
+		return NULL;
+	}
+
+	result = PyBytes_FromStringAndSize(NULL, size);
+	if (NULL == result) {
+		return NULL;
+	}
+
+	PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
+
+	output.dst = resultBuffer;
+	output.size = resultSize;
+	output.pos = 0;
+
+readinput:
+
+	/* Consume input data left over from last time. */
+	if (self->input.pos < self->input.size) {
+		Py_BEGIN_ALLOW_THREADS
+		zresult = ZSTD_decompress_generic(self->decompressor->dctx,
+			&output, &self->input);
+		Py_END_ALLOW_THREADS
+
+		/* Input exhausted. Clear our state tracking. */
+		if (self->input.pos == self->input.size) {
+			memset(&self->input, 0, sizeof(self->input));
+			Py_CLEAR(self->readResult);
+
+			if (self->buffer.buf) {
+				self->finishedInput = 1;
+			}
+		}
+
+		if (ZSTD_isError(zresult)) {
+			PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
+			return NULL;
+		}
+		else if (0 == zresult) {
+			self->finishedOutput = 1;
+		}
+
+		/* We fulfilled the full read request. Emit it. */
+		if (output.pos && output.pos == output.size) {
+			self->bytesDecompressed += output.size;
+			return result;
+		}
+
+		/*
+		 * There is more room in the output. Fall through to try to collect
+		 * more data so we can try to fill the output.
+		 */
+	}
+
+	if (!self->finishedInput) {
+		if (self->reader) {
+			Py_buffer buffer;
+
+			assert(self->readResult == NULL);
+			self->readResult = PyObject_CallMethod(self->reader, "read",
+				"k", self->readSize);
+			if (NULL == self->readResult) {
+				return NULL;
+			}
+
+			memset(&buffer, 0, sizeof(buffer));
+
+			if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
+				return NULL;
+			}
+
+			/* EOF */
+			if (0 == buffer.len) {
+				self->finishedInput = 1;
+				Py_CLEAR(self->readResult);
+			}
+			else {
+				self->input.src = buffer.buf;
+				self->input.size = buffer.len;
+				self->input.pos = 0;
+			}
+
+			PyBuffer_Release(&buffer);
+		}
+		else {
+			assert(self->buffer.buf);
+			/*
+			 * We should only get here once since above block will exhaust
+			 * source buffer until finishedInput is set.
+			 */
+			assert(self->input.src == NULL);
+
+			self->input.src = self->buffer.buf;
+			self->input.size = self->buffer.len;
+			self->input.pos = 0;
+		}
+	}
+
+	if (self->input.size) {
+		goto readinput;
+	}
+
+	/* EOF */
+	self->bytesDecompressed += output.pos;
+
+	if (safe_pybytes_resize(&result, output.pos)) {
+		Py_XDECREF(result);
+		return NULL;
+	}
+
+	return result;
+}
+
+static PyObject* reader_readall(PyObject* self) {
+	PyErr_SetNone(PyExc_NotImplementedError);
+	return NULL;
+}
+
+static PyObject* reader_readline(PyObject* self) {
+	PyErr_SetNone(PyExc_NotImplementedError);
+	return NULL;
+}
+
+static PyObject* reader_readlines(PyObject* self) {
+	PyErr_SetNone(PyExc_NotImplementedError);
+	return NULL;
+}
+
+static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
+	Py_ssize_t pos;
+	int whence = 0;
+	unsigned long long readAmount = 0;
+	size_t defaultOutSize = ZSTD_DStreamOutSize();
+
+	if (!self->entered) {
+		PyErr_SetString(ZstdError, "seek() must be called from an active context manager");
+		return NULL;
+	}
+
+	if (self->closed) {
+		PyErr_SetString(PyExc_ValueError, "stream is closed");
+		return NULL;
+	}
+
+	if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
+		return NULL;
+	}
+
+	if (whence == SEEK_SET) {
+		if (pos < 0) {
+			PyErr_SetString(PyExc_ValueError,
+				"cannot seek to negative position with SEEK_SET");
+			return NULL;
+		}
+
+		if ((unsigned long long)pos < self->bytesDecompressed) {
+			PyErr_SetString(PyExc_ValueError,
+				"cannot seek zstd decompression stream backwards");
+			return NULL;
+		}
+
+		readAmount = pos - self->bytesDecompressed;
+	}
+	else if (whence == SEEK_CUR) {
+		if (pos < 0) {
+			PyErr_SetString(PyExc_ValueError,
+				"cannot seek zstd decompression stream backwards");
+			return NULL;
+		}
+
+		readAmount = pos;
+	}
+	else if (whence == SEEK_END) {
+		/* We /could/ support this with pos==0. But let's not do that until someone
+		   needs it. */
+		PyErr_SetString(PyExc_ValueError,
+			"zstd decompression streams cannot be seeked with SEEK_END");
+		return NULL;
+	}
+
+	/* It is a bit inefficient to do this via the Python API. But since there
+	   is a bit of state tracking involved to read from this type, it is the
+	   easiest to implement. */
+	while (readAmount) {
+		Py_ssize_t readSize;
+		PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
+			readAmount < defaultOutSize ? readAmount : defaultOutSize);
+
+		if (!readResult) {
+			return NULL;
+		}
+
+		readSize = PyBytes_GET_SIZE(readResult);
+
+		/* Empty read means EOF. */
+		if (!readSize) {
+			break;
+		}
+
+		readAmount -= readSize;
+	}
+
+	return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
+}
+
+static PyObject* reader_tell(ZstdDecompressionReader* self) {
+	/* TODO should this raise OSError since stream isn't seekable? */
+	return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
+}
+
+static PyObject* reader_write(PyObject* self, PyObject* args) {
+	set_unsupported_operation();
+	return NULL;
+}
+
+static PyObject* reader_writelines(PyObject* self, PyObject* args) {
+	set_unsupported_operation();
+	return NULL;
+}
+
+static PyObject* reader_iter(PyObject* self) {
+	PyErr_SetNone(PyExc_NotImplementedError);
+	return NULL;
+}
+
+static PyObject* reader_iternext(PyObject* self) {
+	PyErr_SetNone(PyExc_NotImplementedError);
+	return NULL;
+}
+
+static PyMethodDef reader_methods[] = {
+	{ "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
+	PyDoc_STR("Enter a compression context") },
+	{ "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
+	PyDoc_STR("Exit a compression context") },
+	{ "close", (PyCFunction)reader_close, METH_NOARGS,
+	PyDoc_STR("Close the stream so it cannot perform any more operations") },
+	{ "closed", (PyCFunction)reader_closed, METH_NOARGS,
+	PyDoc_STR("Whether stream is closed") },
+	{ "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
+	{ "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
+	{ "readable", (PyCFunction)reader_readable, METH_NOARGS,
+	PyDoc_STR("Returns True") },
+	{ "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
+	PyDoc_STR("read compressed data") },
+	{ "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
+	{ "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
+	{ "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
+	{ "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
+	{ "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
+	PyDoc_STR("Returns True") },
+	{ "tell", (PyCFunction)reader_tell, METH_NOARGS,
+	PyDoc_STR("Returns current number of bytes compressed") },
+	{ "writable", (PyCFunction)reader_writable, METH_NOARGS,
+	PyDoc_STR("Returns False") },
+	{ "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
+	{ "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
+	{ NULL, NULL }
+};
+
+PyTypeObject ZstdDecompressionReaderType = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"zstd.ZstdDecompressionReader", /* tp_name */
+	sizeof(ZstdDecompressionReader), /* tp_basicsize */
+	0, /* tp_itemsize */
+	(destructor)reader_dealloc, /* tp_dealloc */
+	0, /* tp_print */
+	0, /* tp_getattr */
+	0, /* tp_setattr */
+	0, /* tp_compare */
+	0, /* tp_repr */
+	0, /* tp_as_number */
+	0, /* tp_as_sequence */
+	0, /* tp_as_mapping */
+	0, /* tp_hash */
+	0, /* tp_call */
+	0, /* tp_str */
+	0, /* tp_getattro */
+	0, /* tp_setattro */
+	0, /* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT, /* tp_flags */
+	0, /* tp_doc */
+	0, /* tp_traverse */
+	0, /* tp_clear */
+	0, /* tp_richcompare */
+	0, /* tp_weaklistoffset */
+	reader_iter, /* tp_iter */
+	reader_iternext, /* tp_iternext */
+	reader_methods, /* tp_methods */
+	0, /* tp_members */
+	0, /* tp_getset */
+	0, /* tp_base */
+	0, /* tp_dict */
+	0, /* tp_descr_get */
+	0, /* tp_descr_set */
+	0, /* tp_dictoffset */
+	0, /* tp_init */
+	0, /* tp_alloc */
+	PyType_GenericNew, /* tp_new */
+};
+
+
+void decompressionreader_module_init(PyObject* mod) {
+	/* TODO make reader a sub-class of io.RawIOBase */
+
+	Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
+	if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
+		return;
+	}
+}