contrib/python-zstandard/c-ext/compressoriterator.c
changeset 30435 b86a448a2965
child 31796 e0dc40530c5a
equal deleted inserted replaced
30434:2e484bdea8c4 30435:b86a448a2965
       
     1 /**
       
     2 * Copyright (c) 2016-present, Gregory Szorc
       
     3 * All rights reserved.
       
     4 *
       
     5 * This software may be modified and distributed under the terms
       
     6 * of the BSD license. See the LICENSE file for details.
       
     7 */
       
     8 
       
     9 #include "python-zstandard.h"
       
    10 
       
    11 #define min(a, b) (((a) < (b)) ? (a) : (b))
       
    12 
       
    13 extern PyObject* ZstdError;
       
    14 
       
    15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
       
    16 "Represents an iterator of compressed data.\n"
       
    17 );
       
    18 
       
    19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
       
    20 	Py_XDECREF(self->readResult);
       
    21 	Py_XDECREF(self->compressor);
       
    22 	Py_XDECREF(self->reader);
       
    23 
       
    24 	if (self->buffer) {
       
    25 		PyBuffer_Release(self->buffer);
       
    26 		PyMem_FREE(self->buffer);
       
    27 		self->buffer = NULL;
       
    28 	}
       
    29 
       
    30 	if (self->cstream) {
       
    31 		ZSTD_freeCStream(self->cstream);
       
    32 		self->cstream = NULL;
       
    33 	}
       
    34 
       
    35 	if (self->output.dst) {
       
    36 		PyMem_Free(self->output.dst);
       
    37 		self->output.dst = NULL;
       
    38 	}
       
    39 
       
    40 	PyObject_Del(self);
       
    41 }
       
    42 
       
    43 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
       
    44 	Py_INCREF(self);
       
    45 	return self;
       
    46 }
       
    47 
       
    48 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
       
    49 	size_t zresult;
       
    50 	PyObject* readResult = NULL;
       
    51 	PyObject* chunk;
       
    52 	char* readBuffer;
       
    53 	Py_ssize_t readSize = 0;
       
    54 	Py_ssize_t bufferRemaining;
       
    55 
       
    56 	if (self->finishedOutput) {
       
    57 		PyErr_SetString(PyExc_StopIteration, "output flushed");
       
    58 		return NULL;
       
    59 	}
       
    60 
       
    61 feedcompressor:
       
    62 
       
    63 	/* If we have data left in the input, consume it. */
       
    64 	if (self->input.pos < self->input.size) {
       
    65 		Py_BEGIN_ALLOW_THREADS
       
    66 		zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
       
    67 		Py_END_ALLOW_THREADS
       
    68 
       
    69 		/* Release the Python object holding the input buffer. */
       
    70 		if (self->input.pos == self->input.size) {
       
    71 			self->input.src = NULL;
       
    72 			self->input.pos = 0;
       
    73 			self->input.size = 0;
       
    74 			Py_DECREF(self->readResult);
       
    75 			self->readResult = NULL;
       
    76 		}
       
    77 
       
    78 		if (ZSTD_isError(zresult)) {
       
    79 			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
       
    80 			return NULL;
       
    81 		}
       
    82 
       
    83 		/* If it produced output data, emit it. */
       
    84 		if (self->output.pos) {
       
    85 			chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
       
    86 			self->output.pos = 0;
       
    87 			return chunk;
       
    88 		}
       
    89 	}
       
    90 
       
    91 	/* We should never have output data sitting around after a previous call. */
       
    92 	assert(self->output.pos == 0);
       
    93 
       
    94 	/* The code above should have either emitted a chunk and returned or consumed
       
    95 	the entire input buffer. So the state of the input buffer is not
       
    96 	relevant. */
       
    97 	if (!self->finishedInput) {
       
    98 		if (self->reader) {
       
    99 			readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
       
   100 			if (!readResult) {
       
   101 				PyErr_SetString(ZstdError, "could not read() from source");
       
   102 				return NULL;
       
   103 			}
       
   104 
       
   105 			PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
       
   106 		}
       
   107 		else {
       
   108 			assert(self->buffer && self->buffer->buf);
       
   109 
       
   110 			/* Only support contiguous C arrays. */
       
   111 			assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL);
       
   112 			assert(self->buffer->itemsize == 1);
       
   113 
       
   114 			readBuffer = (char*)self->buffer->buf + self->bufferOffset;
       
   115 			bufferRemaining = self->buffer->len - self->bufferOffset;
       
   116 			readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
       
   117 			self->bufferOffset += readSize;
       
   118 		}
       
   119 
       
   120 		if (0 == readSize) {
       
   121 			Py_XDECREF(readResult);
       
   122 			self->finishedInput = 1;
       
   123 		}
       
   124 		else {
       
   125 			self->readResult = readResult;
       
   126 		}
       
   127 	}
       
   128 
       
   129 	/* EOF */
       
   130 	if (0 == readSize) {
       
   131 		zresult = ZSTD_endStream(self->cstream, &self->output);
       
   132 		if (ZSTD_isError(zresult)) {
       
   133 			PyErr_Format(ZstdError, "error ending compression stream: %s",
       
   134 				ZSTD_getErrorName(zresult));
       
   135 			return NULL;
       
   136 		}
       
   137 
       
   138 		assert(self->output.pos);
       
   139 
       
   140 		if (0 == zresult) {
       
   141 			self->finishedOutput = 1;
       
   142 		}
       
   143 
       
   144 		chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
       
   145 		self->output.pos = 0;
       
   146 		return chunk;
       
   147 	}
       
   148 
       
   149 	/* New data from reader. Feed into compressor. */
       
   150 	self->input.src = readBuffer;
       
   151 	self->input.size = readSize;
       
   152 	self->input.pos = 0;
       
   153 
       
   154 	Py_BEGIN_ALLOW_THREADS
       
   155 	zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input);
       
   156 	Py_END_ALLOW_THREADS
       
   157 
       
   158 	/* The input buffer currently points to memory managed by Python
       
   159 	(readBuffer). This object was allocated by this function. If it wasn't
       
   160 	fully consumed, we need to release it in a subsequent function call.
       
   161 	If it is fully consumed, do that now.
       
   162 	*/
       
   163 	if (self->input.pos == self->input.size) {
       
   164 		self->input.src = NULL;
       
   165 		self->input.pos = 0;
       
   166 		self->input.size = 0;
       
   167 		Py_XDECREF(self->readResult);
       
   168 		self->readResult = NULL;
       
   169 	}
       
   170 
       
   171 	if (ZSTD_isError(zresult)) {
       
   172 		PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
       
   173 		return NULL;
       
   174 	}
       
   175 
       
   176 	assert(self->input.pos <= self->input.size);
       
   177 
       
   178 	/* If we didn't write anything, start the process over. */
       
   179 	if (0 == self->output.pos) {
       
   180 		goto feedcompressor;
       
   181 	}
       
   182 
       
   183 	chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
       
   184 	self->output.pos = 0;
       
   185 	return chunk;
       
   186 }
       
   187 
       
   188 PyTypeObject ZstdCompressorIteratorType = {
       
   189 	PyVarObject_HEAD_INIT(NULL, 0)
       
   190 	"zstd.ZstdCompressorIterator",   /* tp_name */
       
   191 	sizeof(ZstdCompressorIterator),  /* tp_basicsize */
       
   192 	0,                               /* tp_itemsize */
       
   193 	(destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
       
   194 	0,                               /* tp_print */
       
   195 	0,                               /* tp_getattr */
       
   196 	0,                               /* tp_setattr */
       
   197 	0,                               /* tp_compare */
       
   198 	0,                               /* tp_repr */
       
   199 	0,                               /* tp_as_number */
       
   200 	0,                               /* tp_as_sequence */
       
   201 	0,                               /* tp_as_mapping */
       
   202 	0,                               /* tp_hash */
       
   203 	0,                               /* tp_call */
       
   204 	0,                               /* tp_str */
       
   205 	0,                               /* tp_getattro */
       
   206 	0,                               /* tp_setattro */
       
   207 	0,                               /* tp_as_buffer */
       
   208 	Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
       
   209 	ZstdCompressorIterator__doc__,   /* tp_doc */
       
   210 	0,                               /* tp_traverse */
       
   211 	0,                               /* tp_clear */
       
   212 	0,                               /* tp_richcompare */
       
   213 	0,                               /* tp_weaklistoffset */
       
   214 	ZstdCompressorIterator_iter,     /* tp_iter */
       
   215 	(iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
       
   216 	0,                               /* tp_methods */
       
   217 	0,                               /* tp_members */
       
   218 	0,                               /* tp_getset */
       
   219 	0,                               /* tp_base */
       
   220 	0,                               /* tp_dict */
       
   221 	0,                               /* tp_descr_get */
       
   222 	0,                               /* tp_descr_set */
       
   223 	0,                               /* tp_dictoffset */
       
   224 	0,                               /* tp_init */
       
   225 	0,                               /* tp_alloc */
       
   226 	PyType_GenericNew,              /* tp_new */
       
   227 };
       
   228 
       
   229 void compressoriterator_module_init(PyObject* mod) {
       
   230 	Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
       
   231 	if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
       
   232 		return;
       
   233 	}
       
   234 }