contrib/python-zstandard/c-ext/compressionreader.c
changeset 37495 b1fb341d8a61
child 40121 73fef626dae3
equal deleted inserted replaced
37494:1ce7a55b09d1 37495:b1fb341d8a61
       
     1 /**
       
     2 * Copyright (c) 2017-present, Gregory Szorc
       
     3 * All rights reserved.
       
     4 *
       
     5 * This software may be modified and distributed under the terms
       
     6 * of the BSD license. See the LICENSE file for details.
       
     7 */
       
     8 
       
     9 #include "python-zstandard.h"
       
    10 
       
    11 extern PyObject* ZstdError;
       
    12 
       
    13 static void set_unsupported_operation(void) {
       
    14 	PyObject* iomod;
       
    15 	PyObject* exc;
       
    16 
       
    17 	iomod = PyImport_ImportModule("io");
       
    18 	if (NULL == iomod) {
       
    19 		return;
       
    20 	}
       
    21 
       
    22 	exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
       
    23 	if (NULL == exc) {
       
    24 		Py_DECREF(iomod);
       
    25 		return;
       
    26 	}
       
    27 
       
    28 	PyErr_SetNone(exc);
       
    29 	Py_DECREF(exc);
       
    30 	Py_DECREF(iomod);
       
    31 }
       
    32 
       
    33 static void reader_dealloc(ZstdCompressionReader* self) {
       
    34 	Py_XDECREF(self->compressor);
       
    35 	Py_XDECREF(self->reader);
       
    36 
       
    37 	if (self->buffer.buf) {
       
    38 		PyBuffer_Release(&self->buffer);
       
    39 		memset(&self->buffer, 0, sizeof(self->buffer));
       
    40 	}
       
    41 
       
    42 	PyObject_Del(self);
       
    43 }
       
    44 
       
    45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
       
    46 	size_t zresult;
       
    47 
       
    48 	if (self->entered) {
       
    49 		PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
       
    50 		return NULL;
       
    51 	}
       
    52 
       
    53 	zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
       
    54 	if (ZSTD_isError(zresult)) {
       
    55 		PyErr_Format(ZstdError, "error setting source size: %s",
       
    56 			ZSTD_getErrorName(zresult));
       
    57 		return NULL;
       
    58 	}
       
    59 
       
    60 	self->entered = 1;
       
    61 
       
    62 	Py_INCREF(self);
       
    63 	return self;
       
    64 }
       
    65 
       
    66 static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
       
    67 	PyObject* exc_type;
       
    68 	PyObject* exc_value;
       
    69 	PyObject* exc_tb;
       
    70 
       
    71 	if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
       
    72 		return NULL;
       
    73 	}
       
    74 
       
    75 	self->entered = 0;
       
    76 	self->closed = 1;
       
    77 
       
    78 	/* Release resources associated with source. */
       
    79 	Py_CLEAR(self->reader);
       
    80 	if (self->buffer.buf) {
       
    81 		PyBuffer_Release(&self->buffer);
       
    82 		memset(&self->buffer, 0, sizeof(self->buffer));
       
    83 	}
       
    84 
       
    85     Py_CLEAR(self->compressor);
       
    86 
       
    87 	Py_RETURN_FALSE;
       
    88 }
       
    89 
       
    90 static PyObject* reader_readable(ZstdCompressionReader* self) {
       
    91 	Py_RETURN_TRUE;
       
    92 }
       
    93 
       
    94 static PyObject* reader_writable(ZstdCompressionReader* self) {
       
    95 	Py_RETURN_FALSE;
       
    96 }
       
    97 
       
    98 static PyObject* reader_seekable(ZstdCompressionReader* self) {
       
    99 	Py_RETURN_FALSE;
       
   100 }
       
   101 
       
   102 static PyObject* reader_readline(PyObject* self, PyObject* args) {
       
   103 	set_unsupported_operation();
       
   104 	return NULL;
       
   105 }
       
   106 
       
   107 static PyObject* reader_readlines(PyObject* self, PyObject* args) {
       
   108 	set_unsupported_operation();
       
   109 	return NULL;
       
   110 }
       
   111 
       
   112 static PyObject* reader_write(PyObject* self, PyObject* args) {
       
   113 	PyErr_SetString(PyExc_OSError, "stream is not writable");
       
   114 	return NULL;
       
   115 }
       
   116 
       
   117 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
       
   118 	PyErr_SetString(PyExc_OSError, "stream is not writable");
       
   119 	return NULL;
       
   120 }
       
   121 
       
   122 static PyObject* reader_isatty(PyObject* self) {
       
   123 	Py_RETURN_FALSE;
       
   124 }
       
   125 
       
   126 static PyObject* reader_flush(PyObject* self) {
       
   127 	Py_RETURN_NONE;
       
   128 }
       
   129 
       
   130 static PyObject* reader_close(ZstdCompressionReader* self) {
       
   131 	self->closed = 1;
       
   132 	Py_RETURN_NONE;
       
   133 }
       
   134 
       
   135 static PyObject* reader_closed(ZstdCompressionReader* self) {
       
   136 	if (self->closed) {
       
   137 		Py_RETURN_TRUE;
       
   138 	}
       
   139 	else {
       
   140 		Py_RETURN_FALSE;
       
   141 	}
       
   142 }
       
   143 
       
   144 static PyObject* reader_tell(ZstdCompressionReader* self) {
       
   145 	/* TODO should this raise OSError since stream isn't seekable? */
       
   146 	return PyLong_FromUnsignedLongLong(self->bytesCompressed);
       
   147 }
       
   148 
       
   149 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
       
   150 	static char* kwlist[] = {
       
   151 		"size",
       
   152 		NULL
       
   153 	};
       
   154 
       
   155 	Py_ssize_t size = -1;
       
   156 	PyObject* result = NULL;
       
   157 	char* resultBuffer;
       
   158 	Py_ssize_t resultSize;
       
   159 	size_t zresult;
       
   160 	size_t oldPos;
       
   161 
       
   162 	if (!self->entered) {
       
   163 		PyErr_SetString(ZstdError, "read() must be called from an active context manager");
       
   164 		return NULL;
       
   165 	}
       
   166 
       
   167 	if (self->closed) {
       
   168 		PyErr_SetString(PyExc_ValueError, "stream is closed");
       
   169 		return NULL;
       
   170 	}
       
   171 
       
   172 	if (self->finishedOutput) {
       
   173 		return PyBytes_FromStringAndSize("", 0);
       
   174 	}
       
   175 
       
   176 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "n", kwlist, &size)) {
       
   177 		return NULL;
       
   178 	}
       
   179 
       
   180 	if (size < 1) {
       
   181 		PyErr_SetString(PyExc_ValueError, "cannot read negative or size 0 amounts");
       
   182 		return NULL;
       
   183 	}
       
   184 
       
   185 	result = PyBytes_FromStringAndSize(NULL, size);
       
   186 	if (NULL == result) {
       
   187 		return NULL;
       
   188 	}
       
   189 
       
   190 	PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
       
   191 
       
   192 	self->output.dst = resultBuffer;
       
   193 	self->output.size = resultSize;
       
   194 	self->output.pos = 0;
       
   195 
       
   196 readinput:
       
   197 
       
   198 	/* If we have data left over, consume it. */
       
   199 	if (self->input.pos < self->input.size) {
       
   200 		oldPos = self->output.pos;
       
   201 
       
   202 		Py_BEGIN_ALLOW_THREADS
       
   203 		zresult = ZSTD_compress_generic(self->compressor->cctx,
       
   204 			&self->output, &self->input, ZSTD_e_continue);
       
   205 
       
   206 		Py_END_ALLOW_THREADS
       
   207 
       
   208 		self->bytesCompressed += self->output.pos - oldPos;
       
   209 
       
   210 		/* Input exhausted. Clear out state tracking. */
       
   211 		if (self->input.pos == self->input.size) {
       
   212 			memset(&self->input, 0, sizeof(self->input));
       
   213 			Py_CLEAR(self->readResult);
       
   214 
       
   215 			if (self->buffer.buf) {
       
   216 				self->finishedInput = 1;
       
   217 			}
       
   218 		}
       
   219 
       
   220 		if (ZSTD_isError(zresult)) {
       
   221 			PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
       
   222 			return NULL;
       
   223 		}
       
   224 
       
   225 		if (self->output.pos) {
       
   226 			/* If no more room in output, emit it. */
       
   227 			if (self->output.pos == self->output.size) {
       
   228 				memset(&self->output, 0, sizeof(self->output));
       
   229 				return result;
       
   230 			}
       
   231 
       
   232 			/*
       
   233 			 * There is room in the output. We fall through to below, which will either
       
   234 			 * get more input for us or will attempt to end the stream.
       
   235 			 */
       
   236 		}
       
   237 
       
   238 		/* Fall through to gather more input. */
       
   239 	}
       
   240 
       
   241 	if (!self->finishedInput) {
       
   242 		if (self->reader) {
       
   243 			Py_buffer buffer;
       
   244 
       
   245 			assert(self->readResult == NULL);
       
   246 			self->readResult = PyObject_CallMethod(self->reader, "read",
       
   247 				"k", self->readSize);
       
   248 			if (self->readResult == NULL) {
       
   249 				return NULL;
       
   250 			}
       
   251 
       
   252 			memset(&buffer, 0, sizeof(buffer));
       
   253 
       
   254 			if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
       
   255 				return NULL;
       
   256 			}
       
   257 
       
   258 			/* EOF */
       
   259 			if (0 == buffer.len) {
       
   260 				self->finishedInput = 1;
       
   261 				Py_CLEAR(self->readResult);
       
   262 			}
       
   263 			else {
       
   264 				self->input.src = buffer.buf;
       
   265 				self->input.size = buffer.len;
       
   266 				self->input.pos = 0;
       
   267 			}
       
   268 
       
   269 			PyBuffer_Release(&buffer);
       
   270 		}
       
   271 		else {
       
   272 			assert(self->buffer.buf);
       
   273 
       
   274 			self->input.src = self->buffer.buf;
       
   275 			self->input.size = self->buffer.len;
       
   276 			self->input.pos = 0;
       
   277 		}
       
   278 	}
       
   279 
       
   280 	if (self->input.size) {
       
   281 		goto readinput;
       
   282 	}
       
   283 
       
   284 	/* Else EOF */
       
   285 	oldPos = self->output.pos;
       
   286 
       
   287 	zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
       
   288 		&self->input, ZSTD_e_end);
       
   289 
       
   290 	self->bytesCompressed += self->output.pos - oldPos;
       
   291 
       
   292 	if (ZSTD_isError(zresult)) {
       
   293 		PyErr_Format(ZstdError, "error ending compression stream: %s",
       
   294 			ZSTD_getErrorName(zresult));
       
   295 		return NULL;
       
   296 	}
       
   297 
       
   298 	assert(self->output.pos);
       
   299 
       
   300 	if (0 == zresult) {
       
   301 		self->finishedOutput = 1;
       
   302 	}
       
   303 
       
   304 	if (safe_pybytes_resize(&result, self->output.pos)) {
       
   305 		Py_XDECREF(result);
       
   306 		return NULL;
       
   307 	}
       
   308 
       
   309 	memset(&self->output, 0, sizeof(self->output));
       
   310 
       
   311 	return result;
       
   312 }
       
   313 
       
   314 static PyObject* reader_readall(PyObject* self) {
       
   315 	PyErr_SetNone(PyExc_NotImplementedError);
       
   316 	return NULL;
       
   317 }
       
   318 
       
   319 static PyObject* reader_iter(PyObject* self) {
       
   320 	set_unsupported_operation();
       
   321 	return NULL;
       
   322 }
       
   323 
       
   324 static PyObject* reader_iternext(PyObject* self) {
       
   325 	set_unsupported_operation();
       
   326 	return NULL;
       
   327 }
       
   328 
       
   329 static PyMethodDef reader_methods[] = {
       
   330 	{ "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
       
   331 	PyDoc_STR("Enter a compression context") },
       
   332 	{ "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
       
   333 	PyDoc_STR("Exit a compression context") },
       
   334 	{ "close", (PyCFunction)reader_close, METH_NOARGS,
       
   335 	PyDoc_STR("Close the stream so it cannot perform any more operations") },
       
   336 	{ "closed", (PyCFunction)reader_closed, METH_NOARGS,
       
   337 	PyDoc_STR("Whether stream is closed") },
       
   338 	{ "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
       
   339 	{ "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
       
   340 	{ "readable", (PyCFunction)reader_readable, METH_NOARGS,
       
   341 	PyDoc_STR("Returns True") },
       
   342 	{ "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
       
   343 	{ "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
       
   344 	{ "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
       
   345 	{ "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
       
   346 	{ "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
       
   347 	PyDoc_STR("Returns False") },
       
   348 	{ "tell", (PyCFunction)reader_tell, METH_NOARGS,
       
   349 	PyDoc_STR("Returns current number of bytes compressed") },
       
   350 	{ "writable", (PyCFunction)reader_writable, METH_NOARGS,
       
   351 	PyDoc_STR("Returns False") },
       
   352 	{ "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
       
   353 	{ "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
       
   354 	{ NULL, NULL }
       
   355 };
       
   356 
       
   357 PyTypeObject ZstdCompressionReaderType = {
       
   358 	PyVarObject_HEAD_INIT(NULL, 0)
       
   359 	"zstd.ZstdCompressionReader", /* tp_name */
       
   360 	sizeof(ZstdCompressionReader), /* tp_basicsize */
       
   361 	0, /* tp_itemsize */
       
   362 	(destructor)reader_dealloc, /* tp_dealloc */
       
   363 	0, /* tp_print */
       
   364 	0, /* tp_getattr */
       
   365 	0, /* tp_setattr */
       
   366 	0, /* tp_compare */
       
   367 	0, /* tp_repr */
       
   368 	0, /* tp_as_number */
       
   369 	0, /* tp_as_sequence */
       
   370 	0, /* tp_as_mapping */
       
   371 	0, /* tp_hash */
       
   372 	0, /* tp_call */
       
   373 	0, /* tp_str */
       
   374 	0, /* tp_getattro */
       
   375 	0, /* tp_setattro */
       
   376 	0, /* tp_as_buffer */
       
   377 	Py_TPFLAGS_DEFAULT, /* tp_flags */
       
   378 	0, /* tp_doc */
       
   379 	0, /* tp_traverse */
       
   380 	0, /* tp_clear */
       
   381 	0, /* tp_richcompare */
       
   382 	0, /* tp_weaklistoffset */
       
   383 	reader_iter, /* tp_iter */
       
   384 	reader_iternext, /* tp_iternext */
       
   385 	reader_methods, /* tp_methods */
       
   386 	0, /* tp_members */
       
   387 	0, /* tp_getset */
       
   388 	0, /* tp_base */
       
   389 	0, /* tp_dict */
       
   390 	0, /* tp_descr_get */
       
   391 	0, /* tp_descr_set */
       
   392 	0, /* tp_dictoffset */
       
   393 	0, /* tp_init */
       
   394 	0, /* tp_alloc */
       
   395 	PyType_GenericNew, /* tp_new */
       
   396 };
       
   397 
       
   398 void compressionreader_module_init(PyObject* mod) {
       
   399 	/* TODO make reader a sub-class of io.RawIOBase */
       
   400 
       
   401 	Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
       
   402 	if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
       
   403 		return;
       
   404 	}
       
   405 }