--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/c-ext/decompressoriterator.c Thu Nov 10 22:15:58 2016 -0800
@@ -0,0 +1,254 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
+"Represents an iterator of decompressed data.\n"
+);
+
+static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
+ Py_XDECREF(self->decompressor);
+ Py_XDECREF(self->reader);
+
+ if (self->buffer) {
+ PyBuffer_Release(self->buffer);
+ PyMem_FREE(self->buffer);
+ self->buffer = NULL;
+ }
+
+ if (self->dstream) {
+ ZSTD_freeDStream(self->dstream);
+ self->dstream = NULL;
+ }
+
+ if (self->input.src) {
+ PyMem_Free((void*)self->input.src);
+ self->input.src = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
+ Py_INCREF(self);
+ return self;
+}
+
+static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
+ size_t zresult;
+ PyObject* chunk;
+ DecompressorIteratorResult result;
+ size_t oldInputPos = self->input.pos;
+
+ result.chunk = NULL;
+
+ chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
+ if (!chunk) {
+ result.errored = 1;
+ return result;
+ }
+
+ self->output.dst = PyBytes_AsString(chunk);
+ self->output.size = self->outSize;
+ self->output.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
+ Py_END_ALLOW_THREADS
+
+ /* We're done with the pointer. Nullify to prevent anyone from getting a
+ handle on a Python object. */
+ self->output.dst = NULL;
+
+ if (ZSTD_isError(zresult)) {
+ Py_DECREF(chunk);
+ PyErr_Format(ZstdError, "zstd decompress error: %s",
+ ZSTD_getErrorName(zresult));
+ result.errored = 1;
+ return result;
+ }
+
+ self->readCount += self->input.pos - oldInputPos;
+
+ /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
+ if (0 == zresult) {
+ self->finishedInput = 1;
+ self->finishedOutput = 1;
+ }
+
+ /* If it produced output data, return it. */
+ if (self->output.pos) {
+ if (self->output.pos < self->outSize) {
+ if (_PyBytes_Resize(&chunk, self->output.pos)) {
+ result.errored = 1;
+ return result;
+ }
+ }
+ }
+ else {
+ Py_DECREF(chunk);
+ chunk = NULL;
+ }
+
+ result.errored = 0;
+ result.chunk = chunk;
+
+ return result;
+}
+
+static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
+ PyObject* readResult = NULL;
+ char* readBuffer;
+ Py_ssize_t readSize;
+ Py_ssize_t bufferRemaining;
+ DecompressorIteratorResult result;
+
+ if (self->finishedOutput) {
+ PyErr_SetString(PyExc_StopIteration, "output flushed");
+ return NULL;
+ }
+
+ /* If we have data left in the input, consume it. */
+ if (self->input.pos < self->input.size) {
+ result = read_decompressor_iterator(self);
+ if (result.chunk || result.errored) {
+ return result.chunk;
+ }
+
+ /* Else fall through to get more data from input. */
+ }
+
+read_from_source:
+
+ if (!self->finishedInput) {
+ if (self->reader) {
+ readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
+ if (!readResult) {
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
+ }
+ else {
+ assert(self->buffer && self->buffer->buf);
+
+ /* Only support contiguous C arrays for now */
+ assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
+ assert(self->buffer->itemsize == 1);
+
+ /* TODO avoid memcpy() below */
+ readBuffer = (char *)self->buffer->buf + self->bufferOffset;
+ bufferRemaining = self->buffer->len - self->bufferOffset;
+ readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
+ self->bufferOffset += readSize;
+ }
+
+ if (readSize) {
+ if (!self->readCount && self->skipBytes) {
+ assert(self->skipBytes < self->inSize);
+ if ((Py_ssize_t)self->skipBytes >= readSize) {
+ PyErr_SetString(PyExc_ValueError,
+ "skip_bytes larger than first input chunk; "
+ "this scenario is currently unsupported");
+ Py_DecRef(readResult);
+ return NULL;
+ }
+
+ readBuffer = readBuffer + self->skipBytes;
+ readSize -= self->skipBytes;
+ }
+
+ /* Copy input into previously allocated buffer because it can live longer
+ than a single function call and we don't want to keep a ref to a Python
+ object around. This could be changed... */
+ memcpy((void*)self->input.src, readBuffer, readSize);
+ self->input.size = readSize;
+ self->input.pos = 0;
+ }
+ /* No bytes on first read must mean an empty input stream. */
+ else if (!self->readCount) {
+ self->finishedInput = 1;
+ self->finishedOutput = 1;
+ Py_DecRef(readResult);
+ PyErr_SetString(PyExc_StopIteration, "empty input");
+ return NULL;
+ }
+ else {
+ self->finishedInput = 1;
+ }
+
+ /* We've copied the data managed by memory. Discard the Python object. */
+ Py_DecRef(readResult);
+ }
+
+ result = read_decompressor_iterator(self);
+ if (result.errored || result.chunk) {
+ return result.chunk;
+ }
+
+ /* No new output data. Try again unless we know there is no more data. */
+ if (!self->finishedInput) {
+ goto read_from_source;
+ }
+
+ PyErr_SetString(PyExc_StopIteration, "input exhausted");
+ return NULL;
+}
+
+PyTypeObject ZstdDecompressorIteratorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdDecompressorIterator", /* tp_name */
+ sizeof(ZstdDecompressorIterator), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdDecompressorIterator__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ ZstdDecompressorIterator_iter, /* tp_iter */
+ (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void decompressoriterator_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
+ if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
+ return;
+ }
+}