contrib/python-zstandard/c-ext/decompressoriterator.c
changeset 30435 b86a448a2965
child 31796 e0dc40530c5a
equal deleted inserted replaced
30434:2e484bdea8c4 30435:b86a448a2965
       
     1 /**
       
     2 * Copyright (c) 2016-present, Gregory Szorc
       
     3 * All rights reserved.
       
     4 *
       
     5 * This software may be modified and distributed under the terms
       
     6 * of the BSD license. See the LICENSE file for details.
       
     7 */
       
     8 
       
     9 #include "python-zstandard.h"
       
    10 
       
    11 #define min(a, b) (((a) < (b)) ? (a) : (b))
       
    12 
       
    13 extern PyObject* ZstdError;
       
    14 
       
    15 PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
       
    16 "Represents an iterator of decompressed data.\n"
       
    17 );
       
    18 
       
    19 static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
       
    20 	Py_XDECREF(self->decompressor);
       
    21 	Py_XDECREF(self->reader);
       
    22 
       
    23 	if (self->buffer) {
       
    24 		PyBuffer_Release(self->buffer);
       
    25 		PyMem_FREE(self->buffer);
       
    26 		self->buffer = NULL;
       
    27 	}
       
    28 
       
    29 	if (self->dstream) {
       
    30 		ZSTD_freeDStream(self->dstream);
       
    31 		self->dstream = NULL;
       
    32 	}
       
    33 
       
    34 	if (self->input.src) {
       
    35 		PyMem_Free((void*)self->input.src);
       
    36 		self->input.src = NULL;
       
    37 	}
       
    38 
       
    39 	PyObject_Del(self);
       
    40 }
       
    41 
       
    42 static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
       
    43 	Py_INCREF(self);
       
    44 	return self;
       
    45 }
       
    46 
       
    47 static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
       
    48 	size_t zresult;
       
    49 	PyObject* chunk;
       
    50 	DecompressorIteratorResult result;
       
    51 	size_t oldInputPos = self->input.pos;
       
    52 
       
    53 	result.chunk = NULL;
       
    54 
       
    55 	chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
       
    56 	if (!chunk) {
       
    57 		result.errored = 1;
       
    58 		return result;
       
    59 	}
       
    60 
       
    61 	self->output.dst = PyBytes_AsString(chunk);
       
    62 	self->output.size = self->outSize;
       
    63 	self->output.pos = 0;
       
    64 
       
    65 	Py_BEGIN_ALLOW_THREADS
       
    66 	zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input);
       
    67 	Py_END_ALLOW_THREADS
       
    68 
       
    69 	/* We're done with the pointer. Nullify to prevent anyone from getting a
       
    70 	handle on a Python object. */
       
    71 	self->output.dst = NULL;
       
    72 
       
    73 	if (ZSTD_isError(zresult)) {
       
    74 		Py_DECREF(chunk);
       
    75 		PyErr_Format(ZstdError, "zstd decompress error: %s",
       
    76 			ZSTD_getErrorName(zresult));
       
    77 		result.errored = 1;
       
    78 		return result;
       
    79 	}
       
    80 
       
    81 	self->readCount += self->input.pos - oldInputPos;
       
    82 
       
    83 	/* Frame is fully decoded. Input exhausted and output sitting in buffer. */
       
    84 	if (0 == zresult) {
       
    85 		self->finishedInput = 1;
       
    86 		self->finishedOutput = 1;
       
    87 	}
       
    88 
       
    89 	/* If it produced output data, return it. */
       
    90 	if (self->output.pos) {
       
    91 		if (self->output.pos < self->outSize) {
       
    92 			if (_PyBytes_Resize(&chunk, self->output.pos)) {
       
    93 				result.errored = 1;
       
    94 				return result;
       
    95 			}
       
    96 		}
       
    97 	}
       
    98 	else {
       
    99 		Py_DECREF(chunk);
       
   100 		chunk = NULL;
       
   101 	}
       
   102 
       
   103 	result.errored = 0;
       
   104 	result.chunk = chunk;
       
   105 
       
   106 	return result;
       
   107 }
       
   108 
       
   109 static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
       
   110 	PyObject* readResult = NULL;
       
   111 	char* readBuffer;
       
   112 	Py_ssize_t readSize;
       
   113 	Py_ssize_t bufferRemaining;
       
   114 	DecompressorIteratorResult result;
       
   115 
       
   116 	if (self->finishedOutput) {
       
   117 		PyErr_SetString(PyExc_StopIteration, "output flushed");
       
   118 		return NULL;
       
   119 	}
       
   120 
       
   121 	/* If we have data left in the input, consume it. */
       
   122 	if (self->input.pos < self->input.size) {
       
   123 		result = read_decompressor_iterator(self);
       
   124 		if (result.chunk || result.errored) {
       
   125 			return result.chunk;
       
   126 		}
       
   127 
       
   128 		/* Else fall through to get more data from input. */
       
   129 	}
       
   130 
       
   131 read_from_source:
       
   132 
       
   133 	if (!self->finishedInput) {
       
   134 		if (self->reader) {
       
   135 			readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
       
   136 			if (!readResult) {
       
   137 				return NULL;
       
   138 			}
       
   139 
       
   140 			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
       
   141 		}
       
   142 		else {
       
   143 			assert(self->buffer && self->buffer->buf);
       
   144 
       
   145 			/* Only support contiguous C arrays for now */
       
   146 			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
       
   147 			assert(self->buffer->itemsize == 1);
       
   148 
       
   149 			/* TODO avoid memcpy() below */
       
   150 			readBuffer = (char *)self->buffer->buf + self->bufferOffset;
       
   151 			bufferRemaining = self->buffer->len - self->bufferOffset;
       
   152 			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
       
   153 			self->bufferOffset += readSize;
       
   154 		}
       
   155 
       
   156 		if (readSize) {
       
   157 			if (!self->readCount && self->skipBytes) {
       
   158 				assert(self->skipBytes < self->inSize);
       
   159 				if ((Py_ssize_t)self->skipBytes >= readSize) {
       
   160 					PyErr_SetString(PyExc_ValueError,
       
   161 						"skip_bytes larger than first input chunk; "
       
   162 						"this scenario is currently unsupported");
       
   163 					Py_DecRef(readResult);
       
   164 					return NULL;
       
   165 				}
       
   166 
       
   167 				readBuffer = readBuffer + self->skipBytes;
       
   168 				readSize -= self->skipBytes;
       
   169 			}
       
   170 
       
   171 			/* Copy input into previously allocated buffer because it can live longer
       
   172 			than a single function call and we don't want to keep a ref to a Python
       
   173 			object around. This could be changed... */
       
   174 			memcpy((void*)self->input.src, readBuffer, readSize);
       
   175 			self->input.size = readSize;
       
   176 			self->input.pos = 0;
       
   177 		}
       
   178 		/* No bytes on first read must mean an empty input stream. */
       
   179 		else if (!self->readCount) {
       
   180 			self->finishedInput = 1;
       
   181 			self->finishedOutput = 1;
       
   182 			Py_DecRef(readResult);
       
   183 			PyErr_SetString(PyExc_StopIteration, "empty input");
       
   184 			return NULL;
       
   185 		}
       
   186 		else {
       
   187 			self->finishedInput = 1;
       
   188 		}
       
   189 
       
   190 		/* We've copied the data managed by memory. Discard the Python object. */
       
   191 		Py_DecRef(readResult);
       
   192 	}
       
   193 
       
   194 	result = read_decompressor_iterator(self);
       
   195 	if (result.errored || result.chunk) {
       
   196 		return result.chunk;
       
   197 	}
       
   198 
       
   199 	/* No new output data. Try again unless we know there is no more data. */
       
   200 	if (!self->finishedInput) {
       
   201 		goto read_from_source;
       
   202 	}
       
   203 
       
   204 	PyErr_SetString(PyExc_StopIteration, "input exhausted");
       
   205 	return NULL;
       
   206 }
       
   207 
       
   208 PyTypeObject ZstdDecompressorIteratorType = {
       
   209 	PyVarObject_HEAD_INIT(NULL, 0)
       
   210 	"zstd.ZstdDecompressorIterator",   /* tp_name */
       
   211 	sizeof(ZstdDecompressorIterator),  /* tp_basicsize */
       
   212 	0,                                 /* tp_itemsize */
       
   213 	(destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
       
   214 	0,                                 /* tp_print */
       
   215 	0,                                 /* tp_getattr */
       
   216 	0,                                 /* tp_setattr */
       
   217 	0,                                 /* tp_compare */
       
   218 	0,                                 /* tp_repr */
       
   219 	0,                                 /* tp_as_number */
       
   220 	0,                                 /* tp_as_sequence */
       
   221 	0,                                 /* tp_as_mapping */
       
   222 	0,                                 /* tp_hash */
       
   223 	0,                                 /* tp_call */
       
   224 	0,                                 /* tp_str */
       
   225 	0,                                 /* tp_getattro */
       
   226 	0,                                 /* tp_setattro */
       
   227 	0,                                 /* tp_as_buffer */
       
   228 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
       
   229 	ZstdDecompressorIterator__doc__,   /* tp_doc */
       
   230 	0,                                 /* tp_traverse */
       
   231 	0,                                 /* tp_clear */
       
   232 	0,                                 /* tp_richcompare */
       
   233 	0,                                 /* tp_weaklistoffset */
       
   234 	ZstdDecompressorIterator_iter,     /* tp_iter */
       
   235 	(iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
       
   236 	0,                                 /* tp_methods */
       
   237 	0,                                 /* tp_members */
       
   238 	0,                                 /* tp_getset */
       
   239 	0,                                 /* tp_base */
       
   240 	0,                                 /* tp_dict */
       
   241 	0,                                 /* tp_descr_get */
       
   242 	0,                                 /* tp_descr_set */
       
   243 	0,                                 /* tp_dictoffset */
       
   244 	0,                                 /* tp_init */
       
   245 	0,                                 /* tp_alloc */
       
   246 	PyType_GenericNew,                 /* tp_new */
       
   247 };
       
   248 
       
   249 void decompressoriterator_module_init(PyObject* mod) {
       
   250 	Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
       
   251 	if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
       
   252 		return;
       
   253 	}
       
   254 }