contrib/python-zstandard/c-ext/decompressoriterator.c
changeset 30435 b86a448a2965
child 31796 e0dc40530c5a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/c-ext/decompressoriterator.c	Thu Nov 10 22:15:58 2016 -0800
@@ -0,0 +1,254 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
+"Represents an iterator of decompressed data.\n"
+);
+
+static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
+	Py_XDECREF(self->decompressor);
+	Py_XDECREF(self->reader);
+
+	if (self->buffer) {
+		PyBuffer_Release(self->buffer);
+		PyMem_FREE(self->buffer);
+		self->buffer = NULL;
+	}
+
+	if (self->dstream) {
+		ZSTD_freeDStream(self->dstream);
+		self->dstream = NULL;
+	}
+
+	if (self->input.src) {
+		PyMem_Free((void*)self->input.src);
+		self->input.src = NULL;
+	}
+
+	PyObject_Del(self);
+}
+
+static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
+	Py_INCREF(self);
+	return self;
+}
+
+static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
+	size_t zresult;
+	PyObject* chunk;
+	DecompressorIteratorResult result;
+	size_t oldInputPos = self->input.pos;
+
+	result.chunk = NULL;
+
+	chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
+	if (!chunk) {
+		result.errored = 1;
+		return result;
+	}
+
+	self->output.dst = PyBytes_AsString(chunk);
+	self->output.size = self->outSize;
+	self->output.pos = 0;
+
+	Py_BEGIN_ALLOW_THREADS
+	zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
+	Py_END_ALLOW_THREADS
+
+	/* We're done with the pointer. Nullify to prevent anyone from getting a
+	handle on a Python object. */
+	self->output.dst = NULL;
+
+	if (ZSTD_isError(zresult)) {
+		Py_DECREF(chunk);
+		PyErr_Format(ZstdError, "zstd decompress error: %s",
+			ZSTD_getErrorName(zresult));
+		result.errored = 1;
+		return result;
+	}
+
+	self->readCount += self->input.pos - oldInputPos;
+
+	/* Frame is fully decoded. Input exhausted and output sitting in buffer. */
+	if (0 == zresult) {
+		self->finishedInput = 1;
+		self->finishedOutput = 1;
+	}
+
+	/* If it produced output data, return it. */
+	if (self->output.pos) {
+		if (self->output.pos < self->outSize) {
+			if (_PyBytes_Resize(&chunk, self->output.pos)) {
+				result.errored = 1;
+				return result;
+			}
+		}
+	}
+	else {
+		Py_DECREF(chunk);
+		chunk = NULL;
+	}
+
+	result.errored = 0;
+	result.chunk = chunk;
+
+	return result;
+}
+
+static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
+	PyObject* readResult = NULL;
+	char* readBuffer;
+	Py_ssize_t readSize;
+	Py_ssize_t bufferRemaining;
+	DecompressorIteratorResult result;
+
+	if (self->finishedOutput) {
+		PyErr_SetString(PyExc_StopIteration, "output flushed");
+		return NULL;
+	}
+
+	/* If we have data left in the input, consume it. */
+	if (self->input.pos < self->input.size) {
+		result = read_decompressor_iterator(self);
+		if (result.chunk || result.errored) {
+			return result.chunk;
+		}
+
+		/* Else fall through to get more data from input. */
+	}
+
+read_from_source:
+
+	if (!self->finishedInput) {
+		if (self->reader) {
+			readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
+			if (!readResult) {
+				return NULL;
+			}
+
+			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
+		}
+		else {
+			assert(self->buffer && self->buffer->buf);
+
+			/* Only support contiguous C arrays for now */
+			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
+			assert(self->buffer->itemsize == 1);
+
+			/* TODO avoid memcpy() below */
+			readBuffer = (char *)self->buffer->buf + self->bufferOffset;
+			bufferRemaining = self->buffer->len - self->bufferOffset;
+			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
+			self->bufferOffset += readSize;
+		}
+
+		if (readSize) {
+			if (!self->readCount && self->skipBytes) {
+				assert(self->skipBytes < self->inSize);
+				if ((Py_ssize_t)self->skipBytes >= readSize) {
+					PyErr_SetString(PyExc_ValueError,
+						"skip_bytes larger than first input chunk; "
+						"this scenario is currently unsupported");
+					Py_DecRef(readResult);
+					return NULL;
+				}
+
+				readBuffer = readBuffer + self->skipBytes;
+				readSize -= self->skipBytes;
+			}
+
+			/* Copy input into previously allocated buffer because it can live longer
+			than a single function call and we don't want to keep a ref to a Python
+			object around. This could be changed... */
+			memcpy((void*)self->input.src, readBuffer, readSize);
+			self->input.size = readSize;
+			self->input.pos = 0;
+		}
+		/* No bytes on first read must mean an empty input stream. */
+		else if (!self->readCount) {
+			self->finishedInput = 1;
+			self->finishedOutput = 1;
+			Py_DecRef(readResult);
+			PyErr_SetString(PyExc_StopIteration, "empty input");
+			return NULL;
+		}
+		else {
+			self->finishedInput = 1;
+		}
+
+		/* We've copied the data managed by memory. Discard the Python object. */
+		Py_DecRef(readResult);
+	}
+
+	result = read_decompressor_iterator(self);
+	if (result.errored || result.chunk) {
+		return result.chunk;
+	}
+
+	/* No new output data. Try again unless we know there is no more data. */
+	if (!self->finishedInput) {
+		goto read_from_source;
+	}
+
+	PyErr_SetString(PyExc_StopIteration, "input exhausted");
+	return NULL;
+}
+
+PyTypeObject ZstdDecompressorIteratorType = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"zstd.ZstdDecompressorIterator",   /* tp_name */
+	sizeof(ZstdDecompressorIterator),  /* tp_basicsize */
+	0,                                 /* tp_itemsize */
+	(destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
+	0,                                 /* tp_print */
+	0,                                 /* tp_getattr */
+	0,                                 /* tp_setattr */
+	0,                                 /* tp_compare */
+	0,                                 /* tp_repr */
+	0,                                 /* tp_as_number */
+	0,                                 /* tp_as_sequence */
+	0,                                 /* tp_as_mapping */
+	0,                                 /* tp_hash */
+	0,                                 /* tp_call */
+	0,                                 /* tp_str */
+	0,                                 /* tp_getattro */
+	0,                                 /* tp_setattro */
+	0,                                 /* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+	ZstdDecompressorIterator__doc__,   /* tp_doc */
+	0,                                 /* tp_traverse */
+	0,                                 /* tp_clear */
+	0,                                 /* tp_richcompare */
+	0,                                 /* tp_weaklistoffset */
+	ZstdDecompressorIterator_iter,     /* tp_iter */
+	(iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
+	0,                                 /* tp_methods */
+	0,                                 /* tp_members */
+	0,                                 /* tp_getset */
+	0,                                 /* tp_base */
+	0,                                 /* tp_dict */
+	0,                                 /* tp_descr_get */
+	0,                                 /* tp_descr_set */
+	0,                                 /* tp_dictoffset */
+	0,                                 /* tp_init */
+	0,                                 /* tp_alloc */
+	PyType_GenericNew,                 /* tp_new */
+};
+
+void decompressoriterator_module_init(PyObject* mod) {
+	Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
+	if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
+		return;
+	}
+}