7 */ |
7 */ |
8 |
8 |
9 #include "python-zstandard.h" |
9 #include "python-zstandard.h" |
10 |
10 |
11 extern PyObject* ZstdError; |
11 extern PyObject* ZstdError; |
|
12 |
|
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) { |
|
14 ZSTD_customMem zmem; |
|
15 assert(!compressor->cdict); |
|
16 Py_BEGIN_ALLOW_THREADS |
|
17 memset(&zmem, 0, sizeof(zmem)); |
|
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, |
|
19 compressor->dict->dictSize, *zparams, zmem); |
|
20 Py_END_ALLOW_THREADS |
|
21 |
|
22 if (!compressor->cdict) { |
|
23 PyErr_SetString(ZstdError, "could not create compression dictionary"); |
|
24 return 1; |
|
25 } |
|
26 |
|
27 return 0; |
|
28 } |
12 |
29 |
13 /** |
30 /** |
14 * Initialize a zstd CStream from a ZstdCompressor instance. |
31 * Initialize a zstd CStream from a ZstdCompressor instance. |
15 * |
32 * |
16 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python |
33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python |
105 CompressionParametersObject* params = NULL; |
121 CompressionParametersObject* params = NULL; |
106 PyObject* writeChecksum = NULL; |
122 PyObject* writeChecksum = NULL; |
107 PyObject* writeContentSize = NULL; |
123 PyObject* writeContentSize = NULL; |
108 PyObject* writeDictID = NULL; |
124 PyObject* writeDictID = NULL; |
109 |
125 |
|
126 self->cctx = NULL; |
110 self->dict = NULL; |
127 self->dict = NULL; |
111 self->cparams = NULL; |
128 self->cparams = NULL; |
112 self->cdict = NULL; |
129 self->cdict = NULL; |
113 |
130 |
114 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist, |
131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist, |
337 |
367 |
338 return res; |
368 return res; |
339 } |
369 } |
340 |
370 |
341 PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
371 PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
342 "compress(data)\n" |
372 "compress(data, allow_empty=False)\n" |
343 "\n" |
373 "\n" |
344 "Compress data in a single operation.\n" |
374 "Compress data in a single operation.\n" |
345 "\n" |
375 "\n" |
346 "This is the simplest mechanism to perform compression: simply pass in a\n" |
376 "This is the simplest mechanism to perform compression: simply pass in a\n" |
347 "value and get a compressed value back. It is almost the most prone to abuse.\n" |
377 "value and get a compressed value back. It is almost the most prone to abuse.\n" |
348 "The input and output values must fit in memory, so passing in very large\n" |
378 "The input and output values must fit in memory, so passing in very large\n" |
349 "values can result in excessive memory usage. For this reason, one of the\n" |
379 "values can result in excessive memory usage. For this reason, one of the\n" |
350 "streaming based APIs is preferred for larger values.\n" |
380 "streaming based APIs is preferred for larger values.\n" |
351 ); |
381 ); |
352 |
382 |
353 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args) { |
383 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
384 static char* kwlist[] = { |
|
385 "data", |
|
386 "allow_empty", |
|
387 NULL |
|
388 }; |
|
389 |
354 const char* source; |
390 const char* source; |
355 Py_ssize_t sourceSize; |
391 Py_ssize_t sourceSize; |
|
392 PyObject* allowEmpty = NULL; |
356 size_t destSize; |
393 size_t destSize; |
357 ZSTD_CCtx* cctx; |
|
358 PyObject* output; |
394 PyObject* output; |
359 char* dest; |
395 char* dest; |
360 void* dictData = NULL; |
396 void* dictData = NULL; |
361 size_t dictSize = 0; |
397 size_t dictSize = 0; |
362 size_t zresult; |
398 size_t zresult; |
363 ZSTD_parameters zparams; |
399 ZSTD_parameters zparams; |
364 ZSTD_customMem zmem; |
|
365 |
400 |
366 #if PY_MAJOR_VERSION >= 3 |
401 #if PY_MAJOR_VERSION >= 3 |
367 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { |
402 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O", |
368 #else |
403 #else |
369 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { |
404 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O", |
370 #endif |
405 #endif |
|
406 kwlist, &source, &sourceSize, &allowEmpty)) { |
|
407 return NULL; |
|
408 } |
|
409 |
|
410 /* Limitation in zstd C API doesn't let decompression side distinguish |
|
411 between content size of 0 and unknown content size. This can make round |
|
412 tripping via Python difficult. Until this is fixed, require a flag |
|
413 to fire the footgun. |
|
414 https://github.com/indygreg/python-zstandard/issues/11 */ |
|
415 if (0 == sourceSize && self->fparams.contentSizeFlag |
|
416 && (!allowEmpty || PyObject_Not(allowEmpty))) { |
|
417 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); |
371 return NULL; |
418 return NULL; |
372 } |
419 } |
373 |
420 |
374 destSize = ZSTD_compressBound(sourceSize); |
421 destSize = ZSTD_compressBound(sourceSize); |
375 output = PyBytes_FromStringAndSize(NULL, destSize); |
422 output = PyBytes_FromStringAndSize(NULL, destSize); |
376 if (!output) { |
423 if (!output) { |
377 return NULL; |
424 return NULL; |
378 } |
425 } |
379 |
426 |
380 dest = PyBytes_AsString(output); |
427 dest = PyBytes_AsString(output); |
381 |
|
382 cctx = ZSTD_createCCtx(); |
|
383 if (!cctx) { |
|
384 Py_DECREF(output); |
|
385 PyErr_SetString(ZstdError, "could not create CCtx"); |
|
386 return NULL; |
|
387 } |
|
388 |
428 |
389 if (self->dict) { |
429 if (self->dict) { |
390 dictData = self->dict->dictData; |
430 dictData = self->dict->dictData; |
391 dictSize = self->dict->dictSize; |
431 dictSize = self->dict->dictSize; |
392 } |
432 } |
404 zparams.fParams = self->fparams; |
444 zparams.fParams = self->fparams; |
405 |
445 |
406 /* The raw dict data has to be processed before it can be used. Since this |
446 /* The raw dict data has to be processed before it can be used. Since this |
407 adds overhead - especially if multiple dictionary compression operations |
447 adds overhead - especially if multiple dictionary compression operations |
408 are performed on the same ZstdCompressor instance - we create a |
448 are performed on the same ZstdCompressor instance - we create a |
409 ZSTD_CDict once and reuse it for all operations. */ |
449 ZSTD_CDict once and reuse it for all operations. |
410 |
450 |
411 /* TODO the zparams (which can be derived from the source data size) used |
451 Note: the compression parameters used for the first invocation (possibly |
412 on first invocation are effectively reused for subsequent operations. This |
452 derived from the source size) will be reused on all subsequent invocations. |
413 may not be appropriate if input sizes vary significantly and could affect |
453 https://github.com/facebook/zstd/issues/358 contains more info. We could |
414 chosen compression parameters. |
454 potentially add an argument somewhere to control this behavior. |
415 https://github.com/facebook/zstd/issues/358 tracks this issue. */ |
455 */ |
416 if (dictData && !self->cdict) { |
456 if (dictData && !self->cdict) { |
417 Py_BEGIN_ALLOW_THREADS |
457 if (populate_cdict(self, dictData, dictSize, &zparams)) { |
418 memset(&zmem, 0, sizeof(zmem)); |
|
419 self->cdict = ZSTD_createCDict_advanced(dictData, dictSize, zparams, zmem); |
|
420 Py_END_ALLOW_THREADS |
|
421 |
|
422 if (!self->cdict) { |
|
423 Py_DECREF(output); |
458 Py_DECREF(output); |
424 ZSTD_freeCCtx(cctx); |
|
425 PyErr_SetString(ZstdError, "could not create compression dictionary"); |
|
426 return NULL; |
459 return NULL; |
427 } |
460 } |
428 } |
461 } |
429 |
462 |
430 Py_BEGIN_ALLOW_THREADS |
463 Py_BEGIN_ALLOW_THREADS |
431 /* By avoiding ZSTD_compress(), we don't necessarily write out content |
464 /* By avoiding ZSTD_compress(), we don't necessarily write out content |
432 size. This means the argument to ZstdCompressor to control frame |
465 size. This means the argument to ZstdCompressor to control frame |
433 parameters is honored. */ |
466 parameters is honored. */ |
434 if (self->cdict) { |
467 if (self->cdict) { |
435 zresult = ZSTD_compress_usingCDict(cctx, dest, destSize, |
468 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, |
436 source, sourceSize, self->cdict); |
469 source, sourceSize, self->cdict); |
437 } |
470 } |
438 else { |
471 else { |
439 zresult = ZSTD_compress_advanced(cctx, dest, destSize, |
472 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, |
440 source, sourceSize, dictData, dictSize, zparams); |
473 source, sourceSize, dictData, dictSize, zparams); |
441 } |
474 } |
442 Py_END_ALLOW_THREADS |
475 Py_END_ALLOW_THREADS |
443 |
|
444 ZSTD_freeCCtx(cctx); |
|
445 |
476 |
446 if (ZSTD_isError(zresult)) { |
477 if (ZSTD_isError(zresult)) { |
447 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
478 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
448 Py_CLEAR(output); |
479 Py_CLEAR(output); |
449 return NULL; |
480 return NULL; |
689 |
720 |
690 return result; |
721 return result; |
691 } |
722 } |
692 |
723 |
693 static PyMethodDef ZstdCompressor_methods[] = { |
724 static PyMethodDef ZstdCompressor_methods[] = { |
694 { "compress", (PyCFunction)ZstdCompressor_compress, METH_VARARGS, |
725 { "compress", (PyCFunction)ZstdCompressor_compress, |
695 ZstdCompressor_compress__doc__ }, |
726 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, |
696 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
727 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
697 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
728 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
698 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
729 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
699 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
730 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
700 { "read_from", (PyCFunction)ZstdCompressor_read_from, |
731 { "read_from", (PyCFunction)ZstdCompressor_read_from, |