diff -r 2e484bdea8c4 -r b86a448a2965 contrib/python-zstandard/c-ext/compressiondict.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/python-zstandard/c-ext/compressiondict.c Thu Nov 10 22:15:58 2016 -0800 @@ -0,0 +1,247 @@ +/** +* Copyright (c) 2016-present, Gregory Szorc +* All rights reserved. +* +* This software may be modified and distributed under the terms +* of the BSD license. See the LICENSE file for details. +*/ + +#include "python-zstandard.h" + +extern PyObject* ZstdError; + +ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) { + static char *kwlist[] = { "dict_size", "samples", "parameters", NULL }; + size_t capacity; + PyObject* samples; + Py_ssize_t samplesLen; + PyObject* parameters = NULL; + ZDICT_params_t zparams; + Py_ssize_t sampleIndex; + Py_ssize_t sampleSize; + PyObject* sampleItem; + size_t zresult; + void* sampleBuffer; + void* sampleOffset; + size_t samplesSize = 0; + size_t* sampleSizes; + void* dict; + ZstdCompressionDict* result; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist, + &capacity, + &PyList_Type, &samples, + (PyObject*)&DictParametersType, ¶meters)) { + return NULL; + } + + /* Validate parameters first since it is easiest. */ + zparams.selectivityLevel = 0; + zparams.compressionLevel = 0; + zparams.notificationLevel = 0; + zparams.dictID = 0; + zparams.reserved[0] = 0; + zparams.reserved[1] = 0; + + if (parameters) { + /* TODO validate data ranges */ + zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0)); + zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1)); + zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2)); + zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3)); + } + + /* Figure out the size of the raw samples */ + samplesLen = PyList_Size(samples); + for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { + sampleItem = PyList_GetItem(samples, sampleIndex); + if (!PyBytes_Check(sampleItem)) { + PyErr_SetString(PyExc_ValueError, "samples must be bytes"); + /* TODO probably need to perform DECREF here */ + return NULL; + } + samplesSize += PyBytes_GET_SIZE(sampleItem); + } + + /* Now that we know the total size of the raw simples, we can allocate + a buffer for the raw data */ + sampleBuffer = malloc(samplesSize); + if (!sampleBuffer) { + PyErr_NoMemory(); + return NULL; + } + sampleSizes = malloc(samplesLen * sizeof(size_t)); + if (!sampleSizes) { + free(sampleBuffer); + PyErr_NoMemory(); + return NULL; + } + + sampleOffset = sampleBuffer; + /* Now iterate again and assemble the samples in the buffer */ + for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { + sampleItem = PyList_GetItem(samples, sampleIndex); + sampleSize = PyBytes_GET_SIZE(sampleItem); + sampleSizes[sampleIndex] = sampleSize; + memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); + sampleOffset = (char*)sampleOffset + sampleSize; + } + + dict = malloc(capacity); + if (!dict) { + free(sampleSizes); + free(sampleBuffer); + PyErr_NoMemory(); + return NULL; + } + + zresult = ZDICT_trainFromBuffer_advanced(dict, capacity, + sampleBuffer, sampleSizes, (unsigned int)samplesLen, + zparams); + if (ZDICT_isError(zresult)) { + PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult)); + free(dict); + free(sampleSizes); + free(sampleBuffer); + return NULL; + } + + result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType); + if (!result) { + return NULL; + } + + result->dictData = dict; + result->dictSize = zresult; + return result; +} + + +PyDoc_STRVAR(ZstdCompressionDict__doc__, +"ZstdCompressionDict(data) - Represents a computed compression dictionary\n" +"\n" +"This type holds the results of a computed Zstandard compression dictionary.\n" +"Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n" +"obtained from another source into the constructor.\n" +); + +static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) { + const char* source; + Py_ssize_t sourceSize; + + self->dictData = NULL; + self->dictSize = 0; + +#if PY_MAJOR_VERSION >= 3 + if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { +#else + if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { +#endif + return -1; + } + + self->dictData = malloc(sourceSize); + if (!self->dictData) { + PyErr_NoMemory(); + return -1; + } + + memcpy(self->dictData, source, sourceSize); + self->dictSize = sourceSize; + + return 0; + } + +static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) { + if (self->dictData) { + free(self->dictData); + self->dictData = NULL; + } + + PyObject_Del(self); +} + +static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) { + unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize); + + return PyLong_FromLong(dictID); +} + +static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) { + return PyBytes_FromStringAndSize(self->dictData, self->dictSize); +} + +static PyMethodDef ZstdCompressionDict_methods[] = { + { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS, + PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") }, + { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS, + PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") }, + { NULL, NULL } +}; + +static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) { + return self->dictSize; +} + +static PySequenceMethods ZstdCompressionDict_sq = { + (lenfunc)ZstdCompressionDict_length, /* sq_length */ + 0, /* sq_concat */ + 0, /* sq_repeat */ + 0, /* sq_item */ + 0, /* sq_ass_item */ + 0, /* sq_contains */ + 0, /* sq_inplace_concat */ + 0 /* sq_inplace_repeat */ +}; + +PyTypeObject ZstdCompressionDictType = { + PyVarObject_HEAD_INIT(NULL, 0) + "zstd.ZstdCompressionDict", /* tp_name */ + sizeof(ZstdCompressionDict), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + &ZstdCompressionDict_sq, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + ZstdCompressionDict__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + ZstdCompressionDict_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)ZstdCompressionDict_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + +void compressiondict_module_init(PyObject* mod) { + Py_TYPE(&ZstdCompressionDictType) = &PyType_Type; + if (PyType_Ready(&ZstdCompressionDictType) < 0) { + return; + } + + Py_INCREF((PyObject*)&ZstdCompressionDictType); + PyModule_AddObject(mod, "ZstdCompressionDict", + (PyObject*)&ZstdCompressionDictType); +}