comparison contrib/python-zstandard/c-ext/compressor.c @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
9 #include "python-zstandard.h" 9 #include "python-zstandard.h"
10 #include "pool.h" 10 #include "pool.h"
11 11
12 extern PyObject* ZstdError; 12 extern PyObject* ZstdError;
13 13
14 int ensure_cctx(ZstdCompressor* compressor) { 14 int setup_cctx(ZstdCompressor* compressor) {
15 size_t zresult; 15 size_t zresult;
16 16
17 assert(compressor); 17 assert(compressor);
18 assert(compressor->cctx); 18 assert(compressor->cctx);
19 assert(compressor->params); 19 assert(compressor->params);
20
21 ZSTD_CCtx_reset(compressor->cctx);
22 20
23 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params); 21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
24 if (ZSTD_isError(zresult)) { 22 if (ZSTD_isError(zresult)) {
25 PyErr_Format(ZstdError, "could not set compression parameters: %s", 23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
26 ZSTD_getErrorName(zresult)); 24 ZSTD_getErrorName(zresult));
235 if (dict) { 233 if (dict) {
236 self->dict = dict; 234 self->dict = dict;
237 Py_INCREF(dict); 235 Py_INCREF(dict);
238 } 236 }
239 237
240 if (ensure_cctx(self)) { 238 if (setup_cctx(self)) {
241 return -1; 239 return -1;
242 } 240 }
243 241
244 return 0; 242 return 0;
245 } 243 }
246 244
247 static void ZstdCompressor_dealloc(ZstdCompressor* self) { 245 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
344 if (!PyObject_HasAttrString(dest, "write")) { 342 if (!PyObject_HasAttrString(dest, "write")) {
345 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); 343 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
346 return NULL; 344 return NULL;
347 } 345 }
348 346
349 if (ensure_cctx(self)) { 347 ZSTD_CCtx_reset(self->cctx);
350 return NULL;
351 }
352 348
353 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); 349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
354 if (ZSTD_isError(zresult)) { 350 if (ZSTD_isError(zresult)) {
355 PyErr_Format(ZstdError, "error setting source size: %s", 351 PyErr_Format(ZstdError, "error setting source size: %s",
356 ZSTD_getErrorName(zresult)); 352 ZSTD_getErrorName(zresult));
487 483
488 PyObject* source; 484 PyObject* source;
489 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; 485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
490 size_t readSize = ZSTD_CStreamInSize(); 486 size_t readSize = ZSTD_CStreamInSize();
491 ZstdCompressionReader* result = NULL; 487 ZstdCompressionReader* result = NULL;
488 size_t zresult;
492 489
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist, 490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
494 &source, &sourceSize, &readSize)) { 491 &source, &sourceSize, &readSize)) {
495 return NULL; 492 return NULL;
496 } 493 }
518 PyErr_SetString(PyExc_TypeError, 515 PyErr_SetString(PyExc_TypeError,
519 "must pass an object with a read() method or that conforms to the buffer protocol"); 516 "must pass an object with a read() method or that conforms to the buffer protocol");
520 goto except; 517 goto except;
521 } 518 }
522 519
523 if (ensure_cctx(self)) { 520 ZSTD_CCtx_reset(self->cctx);
521
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 if (ZSTD_isError(zresult)) {
524 PyErr_Format(ZstdError, "error setting source source: %s",
525 ZSTD_getErrorName(zresult));
524 goto except; 526 goto except;
525 } 527 }
526 528
527 result->compressor = self; 529 result->compressor = self;
528 Py_INCREF(self); 530 Py_INCREF(self);
529 result->sourceSize = sourceSize;
530 531
531 return result; 532 return result;
532 533
533 except: 534 except:
534 Py_CLEAR(result); 535 Py_CLEAR(result);
574 PyErr_SetString(PyExc_ValueError, 575 PyErr_SetString(PyExc_ValueError,
575 "data buffer should be contiguous and have at most one dimension"); 576 "data buffer should be contiguous and have at most one dimension");
576 goto finally; 577 goto finally;
577 } 578 }
578 579
579 if (ensure_cctx(self)) { 580 ZSTD_CCtx_reset(self->cctx);
580 goto finally;
581 }
582 581
583 destSize = ZSTD_compressBound(source.len); 582 destSize = ZSTD_compressBound(source.len);
584 output = PyBytes_FromStringAndSize(NULL, destSize); 583 output = PyBytes_FromStringAndSize(NULL, destSize);
585 if (!output) { 584 if (!output) {
586 goto finally; 585 goto finally;
650 649
651 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) { 650 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
652 return NULL; 651 return NULL;
653 } 652 }
654 653
655 if (ensure_cctx(self)) { 654 ZSTD_CCtx_reset(self->cctx);
656 return NULL;
657 }
658 655
659 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize); 656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
660 if (ZSTD_isError(zresult)) { 657 if (ZSTD_isError(zresult)) {
661 PyErr_Format(ZstdError, "error setting source size: %s", 658 PyErr_Format(ZstdError, "error setting source size: %s",
662 ZSTD_getErrorName(zresult)); 659 ZSTD_getErrorName(zresult));
741 PyErr_SetString(PyExc_ValueError, 738 PyErr_SetString(PyExc_ValueError,
742 "must pass an object with a read() method or conforms to buffer protocol"); 739 "must pass an object with a read() method or conforms to buffer protocol");
743 goto except; 740 goto except;
744 } 741 }
745 742
746 if (ensure_cctx(self)) { 743 ZSTD_CCtx_reset(self->cctx);
747 return NULL;
748 }
749 744
750 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); 745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
751 if (ZSTD_isError(zresult)) { 746 if (ZSTD_isError(zresult)) {
752 PyErr_Format(ZstdError, "error setting source size: %s", 747 PyErr_Format(ZstdError, "error setting source size: %s",
753 ZSTD_getErrorName(zresult)); 748 ZSTD_getErrorName(zresult));
815 if (!PyObject_HasAttrString(writer, "write")) { 810 if (!PyObject_HasAttrString(writer, "write")) {
816 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); 811 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
817 return NULL; 812 return NULL;
818 } 813 }
819 814
820 if (ensure_cctx(self)) { 815 ZSTD_CCtx_reset(self->cctx);
821 return NULL;
822 }
823 816
824 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); 817 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
825 if (!result) { 818 if (!result) {
826 return NULL; 819 return NULL;
827 } 820 }
835 result->sourceSize = sourceSize; 828 result->sourceSize = sourceSize;
836 result->outSize = outSize; 829 result->outSize = outSize;
837 result->bytesCompressed = 0; 830 result->bytesCompressed = 0;
838 831
839 return result; 832 return result;
833 }
834
835 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
836 "Create an object for iterative compressing to same-sized chunks.\n"
837 );
838
839 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
840 static char* kwlist[] = {
841 "size",
842 "chunk_size",
843 NULL
844 };
845
846 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
847 size_t chunkSize = ZSTD_CStreamOutSize();
848 ZstdCompressionChunker* chunker;
849 size_t zresult;
850
851 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
852 &sourceSize, &chunkSize)) {
853 return NULL;
854 }
855
856 ZSTD_CCtx_reset(self->cctx);
857
858 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
859 if (ZSTD_isError(zresult)) {
860 PyErr_Format(ZstdError, "error setting source size: %s",
861 ZSTD_getErrorName(zresult));
862 return NULL;
863 }
864
865 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
866 if (!chunker) {
867 return NULL;
868 }
869
870 chunker->output.dst = PyMem_Malloc(chunkSize);
871 if (!chunker->output.dst) {
872 PyErr_NoMemory();
873 Py_DECREF(chunker);
874 return NULL;
875 }
876 chunker->output.size = chunkSize;
877 chunker->output.pos = 0;
878
879 chunker->compressor = self;
880 Py_INCREF(chunker->compressor);
881
882 chunker->chunkSize = chunkSize;
883
884 return chunker;
840 } 885 }
841 886
842 typedef struct { 887 typedef struct {
843 void* sourceData; 888 void* sourceData;
844 size_t sourceSize; 889 size_t sourceSize;
1522 1567
1523 return result; 1568 return result;
1524 } 1569 }
1525 1570
1526 static PyMethodDef ZstdCompressor_methods[] = { 1571 static PyMethodDef ZstdCompressor_methods[] = {
1572 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1573 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1527 { "compress", (PyCFunction)ZstdCompressor_compress, 1574 { "compress", (PyCFunction)ZstdCompressor_compress,
1528 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, 1575 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1529 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, 1576 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1530 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, 1577 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1531 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, 1578 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,