|
1 /** |
|
2 * Copyright (c) 2016-present, Gregory Szorc |
|
3 * All rights reserved. |
|
4 * |
|
5 * This software may be modified and distributed under the terms |
|
6 * of the BSD license. See the LICENSE file for details. |
|
7 */ |
|
8 |
|
9 #include "python-zstandard.h" |
|
10 |
|
11 #define min(a, b) (((a) < (b)) ? (a) : (b)) |
|
12 |
|
13 extern PyObject* ZstdError; |
|
14 |
|
15 PyDoc_STRVAR(ZstdCompressorIterator__doc__, |
|
16 "Represents an iterator of compressed data.\n" |
|
17 ); |
|
18 |
|
19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) { |
|
20 Py_XDECREF(self->readResult); |
|
21 Py_XDECREF(self->compressor); |
|
22 Py_XDECREF(self->reader); |
|
23 |
|
24 if (self->buffer) { |
|
25 PyBuffer_Release(self->buffer); |
|
26 PyMem_FREE(self->buffer); |
|
27 self->buffer = NULL; |
|
28 } |
|
29 |
|
30 if (self->cstream) { |
|
31 ZSTD_freeCStream(self->cstream); |
|
32 self->cstream = NULL; |
|
33 } |
|
34 |
|
35 if (self->output.dst) { |
|
36 PyMem_Free(self->output.dst); |
|
37 self->output.dst = NULL; |
|
38 } |
|
39 |
|
40 PyObject_Del(self); |
|
41 } |
|
42 |
|
43 static PyObject* ZstdCompressorIterator_iter(PyObject* self) { |
|
44 Py_INCREF(self); |
|
45 return self; |
|
46 } |
|
47 |
|
48 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) { |
|
49 size_t zresult; |
|
50 PyObject* readResult = NULL; |
|
51 PyObject* chunk; |
|
52 char* readBuffer; |
|
53 Py_ssize_t readSize = 0; |
|
54 Py_ssize_t bufferRemaining; |
|
55 |
|
56 if (self->finishedOutput) { |
|
57 PyErr_SetString(PyExc_StopIteration, "output flushed"); |
|
58 return NULL; |
|
59 } |
|
60 |
|
61 feedcompressor: |
|
62 |
|
63 /* If we have data left in the input, consume it. */ |
|
64 if (self->input.pos < self->input.size) { |
|
65 Py_BEGIN_ALLOW_THREADS |
|
66 zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input); |
|
67 Py_END_ALLOW_THREADS |
|
68 |
|
69 /* Release the Python object holding the input buffer. */ |
|
70 if (self->input.pos == self->input.size) { |
|
71 self->input.src = NULL; |
|
72 self->input.pos = 0; |
|
73 self->input.size = 0; |
|
74 Py_DECREF(self->readResult); |
|
75 self->readResult = NULL; |
|
76 } |
|
77 |
|
78 if (ZSTD_isError(zresult)) { |
|
79 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
80 return NULL; |
|
81 } |
|
82 |
|
83 /* If it produced output data, emit it. */ |
|
84 if (self->output.pos) { |
|
85 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
86 self->output.pos = 0; |
|
87 return chunk; |
|
88 } |
|
89 } |
|
90 |
|
91 /* We should never have output data sitting around after a previous call. */ |
|
92 assert(self->output.pos == 0); |
|
93 |
|
94 /* The code above should have either emitted a chunk and returned or consumed |
|
95 the entire input buffer. So the state of the input buffer is not |
|
96 relevant. */ |
|
97 if (!self->finishedInput) { |
|
98 if (self->reader) { |
|
99 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize); |
|
100 if (!readResult) { |
|
101 PyErr_SetString(ZstdError, "could not read() from source"); |
|
102 return NULL; |
|
103 } |
|
104 |
|
105 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
106 } |
|
107 else { |
|
108 assert(self->buffer && self->buffer->buf); |
|
109 |
|
110 /* Only support contiguous C arrays. */ |
|
111 assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL); |
|
112 assert(self->buffer->itemsize == 1); |
|
113 |
|
114 readBuffer = (char*)self->buffer->buf + self->bufferOffset; |
|
115 bufferRemaining = self->buffer->len - self->bufferOffset; |
|
116 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); |
|
117 self->bufferOffset += readSize; |
|
118 } |
|
119 |
|
120 if (0 == readSize) { |
|
121 Py_XDECREF(readResult); |
|
122 self->finishedInput = 1; |
|
123 } |
|
124 else { |
|
125 self->readResult = readResult; |
|
126 } |
|
127 } |
|
128 |
|
129 /* EOF */ |
|
130 if (0 == readSize) { |
|
131 zresult = ZSTD_endStream(self->cstream, &self->output); |
|
132 if (ZSTD_isError(zresult)) { |
|
133 PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
134 ZSTD_getErrorName(zresult)); |
|
135 return NULL; |
|
136 } |
|
137 |
|
138 assert(self->output.pos); |
|
139 |
|
140 if (0 == zresult) { |
|
141 self->finishedOutput = 1; |
|
142 } |
|
143 |
|
144 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
145 self->output.pos = 0; |
|
146 return chunk; |
|
147 } |
|
148 |
|
149 /* New data from reader. Feed into compressor. */ |
|
150 self->input.src = readBuffer; |
|
151 self->input.size = readSize; |
|
152 self->input.pos = 0; |
|
153 |
|
154 Py_BEGIN_ALLOW_THREADS |
|
155 zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input); |
|
156 Py_END_ALLOW_THREADS |
|
157 |
|
158 /* The input buffer currently points to memory managed by Python |
|
159 (readBuffer). This object was allocated by this function. If it wasn't |
|
160 fully consumed, we need to release it in a subsequent function call. |
|
161 If it is fully consumed, do that now. |
|
162 */ |
|
163 if (self->input.pos == self->input.size) { |
|
164 self->input.src = NULL; |
|
165 self->input.pos = 0; |
|
166 self->input.size = 0; |
|
167 Py_XDECREF(self->readResult); |
|
168 self->readResult = NULL; |
|
169 } |
|
170 |
|
171 if (ZSTD_isError(zresult)) { |
|
172 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
173 return NULL; |
|
174 } |
|
175 |
|
176 assert(self->input.pos <= self->input.size); |
|
177 |
|
178 /* If we didn't write anything, start the process over. */ |
|
179 if (0 == self->output.pos) { |
|
180 goto feedcompressor; |
|
181 } |
|
182 |
|
183 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
184 self->output.pos = 0; |
|
185 return chunk; |
|
186 } |
|
187 |
|
188 PyTypeObject ZstdCompressorIteratorType = { |
|
189 PyVarObject_HEAD_INIT(NULL, 0) |
|
190 "zstd.ZstdCompressorIterator", /* tp_name */ |
|
191 sizeof(ZstdCompressorIterator), /* tp_basicsize */ |
|
192 0, /* tp_itemsize */ |
|
193 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */ |
|
194 0, /* tp_print */ |
|
195 0, /* tp_getattr */ |
|
196 0, /* tp_setattr */ |
|
197 0, /* tp_compare */ |
|
198 0, /* tp_repr */ |
|
199 0, /* tp_as_number */ |
|
200 0, /* tp_as_sequence */ |
|
201 0, /* tp_as_mapping */ |
|
202 0, /* tp_hash */ |
|
203 0, /* tp_call */ |
|
204 0, /* tp_str */ |
|
205 0, /* tp_getattro */ |
|
206 0, /* tp_setattro */ |
|
207 0, /* tp_as_buffer */ |
|
208 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
209 ZstdCompressorIterator__doc__, /* tp_doc */ |
|
210 0, /* tp_traverse */ |
|
211 0, /* tp_clear */ |
|
212 0, /* tp_richcompare */ |
|
213 0, /* tp_weaklistoffset */ |
|
214 ZstdCompressorIterator_iter, /* tp_iter */ |
|
215 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */ |
|
216 0, /* tp_methods */ |
|
217 0, /* tp_members */ |
|
218 0, /* tp_getset */ |
|
219 0, /* tp_base */ |
|
220 0, /* tp_dict */ |
|
221 0, /* tp_descr_get */ |
|
222 0, /* tp_descr_set */ |
|
223 0, /* tp_dictoffset */ |
|
224 0, /* tp_init */ |
|
225 0, /* tp_alloc */ |
|
226 PyType_GenericNew, /* tp_new */ |
|
227 }; |
|
228 |
|
229 void compressoriterator_module_init(PyObject* mod) { |
|
230 Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type; |
|
231 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) { |
|
232 return; |
|
233 } |
|
234 } |