Mercurial > public > mercurial-scm > hg
diff contrib/python-zstandard/tests/test_decompressor.py @ 37495:b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
This was just released. It features a number of goodies. More info at
https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/.
The clang-format ignore list was updated to reflect the new source
of files.
The project contains a vendored copy of zstandard 1.3.4. The old
version was 1.1.3. One of the changes between those versions is that
zstandard is now dual licensed BSD + GPLv2 and the patent rights grant
has been removed. Good riddance.
The API should be backwards compatible. So no changes in core
should be needed. However, there were a number of changes in the
library that we'll want to adapt to. Those will be addressed in
subsequent commits.
Differential Revision: https://phab.mercurial-scm.org/D3198
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 09 Apr 2018 10:13:29 -0700 |
parents | e0dc40530c5a |
children | 73fef626dae3 |
line wrap: on
line diff
--- a/contrib/python-zstandard/tests/test_decompressor.py Sun Apr 08 01:08:43 2018 +0200 +++ b/contrib/python-zstandard/tests/test_decompressor.py Mon Apr 09 10:13:29 2018 -0700 @@ -1,16 +1,14 @@ import io +import os import random import struct import sys +import unittest -try: - import unittest2 as unittest -except ImportError: - import unittest - -import zstd +import zstandard as zstd from .common import ( + generate_samples, make_cffi, OpCountingBytesIO, ) @@ -23,35 +21,124 @@ @make_cffi +class TestFrameHeaderSize(unittest.TestCase): + def test_empty(self): + with self.assertRaisesRegexp( + zstd.ZstdError, 'could not determine frame header size: Src size ' + 'is incorrect'): + zstd.frame_header_size(b'') + + def test_too_small(self): + with self.assertRaisesRegexp( + zstd.ZstdError, 'could not determine frame header size: Src size ' + 'is incorrect'): + zstd.frame_header_size(b'foob') + + def test_basic(self): + # It doesn't matter that it isn't a valid frame. + self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6) + + +@make_cffi +class TestFrameContentSize(unittest.TestCase): + def test_empty(self): + with self.assertRaisesRegexp(zstd.ZstdError, + 'error when determining content size'): + zstd.frame_content_size(b'') + + def test_too_small(self): + with self.assertRaisesRegexp(zstd.ZstdError, + 'error when determining content size'): + zstd.frame_content_size(b'foob') + + def test_bad_frame(self): + with self.assertRaisesRegexp(zstd.ZstdError, + 'error when determining content size'): + zstd.frame_content_size(b'invalid frame header') + + def test_unknown(self): + cctx = zstd.ZstdCompressor(write_content_size=False) + frame = cctx.compress(b'foobar') + + self.assertEqual(zstd.frame_content_size(frame), -1) + + def test_empty(self): + cctx = zstd.ZstdCompressor() + frame = cctx.compress(b'') + + self.assertEqual(zstd.frame_content_size(frame), 0) + + def test_basic(self): + cctx = zstd.ZstdCompressor() + frame = cctx.compress(b'foobar') + + self.assertEqual(zstd.frame_content_size(frame), 6) + + +@make_cffi +class TestDecompressor(unittest.TestCase): + def test_memory_size(self): + dctx = zstd.ZstdDecompressor() + + self.assertGreater(dctx.memory_size(), 100) + + +@make_cffi class TestDecompressor_decompress(unittest.TestCase): def test_empty_input(self): dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): + with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): dctx.decompress(b'') def test_invalid_input(self): dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): + with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): dctx.decompress(b'foobar') + def test_input_types(self): + cctx = zstd.ZstdCompressor(level=1) + compressed = cctx.compress(b'foo') + + mutable_array = bytearray(len(compressed)) + mutable_array[:] = compressed + + sources = [ + memoryview(compressed), + bytearray(compressed), + mutable_array, + ] + + dctx = zstd.ZstdDecompressor() + for source in sources: + self.assertEqual(dctx.decompress(source), b'foo') + def test_no_content_size_in_frame(self): cctx = zstd.ZstdCompressor(write_content_size=False) compressed = cctx.compress(b'foobar') dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): + with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): dctx.decompress(compressed) def test_content_size_present(self): - cctx = zstd.ZstdCompressor(write_content_size=True) + cctx = zstd.ZstdCompressor() compressed = cctx.compress(b'foobar') dctx = zstd.ZstdDecompressor() decompressed = dctx.decompress(compressed) self.assertEqual(decompressed, b'foobar') + def test_empty_roundtrip(self): + cctx = zstd.ZstdCompressor() + compressed = cctx.compress(b'') + + dctx = zstd.ZstdDecompressor() + decompressed = dctx.decompress(compressed) + + self.assertEqual(decompressed, b'') + def test_max_output_size(self): cctx = zstd.ZstdCompressor(write_content_size=False) source = b'foobar' * 256 @@ -63,7 +150,8 @@ self.assertEqual(decompressed, source) # Input size - 1 fails - with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'): + with self.assertRaisesRegexp(zstd.ZstdError, + 'decompression error: did not decompress full frame'): dctx.decompress(compressed, max_output_size=len(source) - 1) # Input size + 1 works @@ -94,7 +182,7 @@ d = zstd.train_dictionary(8192, samples) orig = b'foobar' * 16384 - cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True) + cctx = zstd.ZstdCompressor(level=1, dict_data=d) compressed = cctx.compress(orig) dctx = zstd.ZstdDecompressor(dict_data=d) @@ -113,7 +201,7 @@ sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) compressed = [] - cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True) + cctx = zstd.ZstdCompressor(level=1, dict_data=d) for source in sources: compressed.append(cctx.compress(source)) @@ -122,6 +210,21 @@ decompressed = dctx.decompress(compressed[i]) self.assertEqual(decompressed, sources[i]) + def test_max_window_size(self): + with open(__file__, 'rb') as fh: + source = fh.read() + + # If we write a content size, the decompressor engages single pass + # mode and the window size doesn't come into play. + cctx = zstd.ZstdCompressor(write_content_size=False) + frame = cctx.compress(source) + + dctx = zstd.ZstdDecompressor(max_window_size=1) + + with self.assertRaisesRegexp( + zstd.ZstdError, 'decompression error: Frame requires too much memory'): + dctx.decompress(frame, max_output_size=len(source)) + @make_cffi class TestDecompressor_copy_stream(unittest.TestCase): @@ -186,6 +289,211 @@ @make_cffi +class TestDecompressor_stream_reader(unittest.TestCase): + def test_context_manager(self): + dctx = zstd.ZstdDecompressor() + + reader = dctx.stream_reader(b'foo') + with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): + reader.read(1) + + with dctx.stream_reader(b'foo') as reader: + with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): + with reader as reader2: + pass + + def test_not_implemented(self): + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(b'foo') as reader: + with self.assertRaises(NotImplementedError): + reader.readline() + + with self.assertRaises(NotImplementedError): + reader.readlines() + + with self.assertRaises(NotImplementedError): + reader.readall() + + with self.assertRaises(NotImplementedError): + iter(reader) + + with self.assertRaises(NotImplementedError): + next(reader) + + with self.assertRaises(io.UnsupportedOperation): + reader.write(b'foo') + + with self.assertRaises(io.UnsupportedOperation): + reader.writelines([]) + + def test_constant_methods(self): + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(b'foo') as reader: + self.assertTrue(reader.readable()) + self.assertFalse(reader.writable()) + self.assertTrue(reader.seekable()) + self.assertFalse(reader.isatty()) + self.assertIsNone(reader.flush()) + + def test_read_closed(self): + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(b'foo') as reader: + reader.close() + with self.assertRaisesRegexp(ValueError, 'stream is closed'): + reader.read(1) + + def test_bad_read_size(self): + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(b'foo') as reader: + with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): + reader.read(-1) + + with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): + reader.read(0) + + def test_read_buffer(self): + cctx = zstd.ZstdCompressor() + + source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + frame = cctx.compress(source) + + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(frame) as reader: + self.assertEqual(reader.tell(), 0) + + # We should get entire frame in one read. + result = reader.read(8192) + self.assertEqual(result, source) + self.assertEqual(reader.tell(), len(source)) + + # Read after EOF should return empty bytes. + self.assertEqual(reader.read(), b'') + self.assertEqual(reader.tell(), len(result)) + + self.assertTrue(reader.closed()) + + def test_read_buffer_small_chunks(self): + cctx = zstd.ZstdCompressor() + source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + frame = cctx.compress(source) + + dctx = zstd.ZstdDecompressor() + chunks = [] + + with dctx.stream_reader(frame, read_size=1) as reader: + while True: + chunk = reader.read(1) + if not chunk: + break + + chunks.append(chunk) + self.assertEqual(reader.tell(), sum(map(len, chunks))) + + self.assertEqual(b''.join(chunks), source) + + def test_read_stream(self): + cctx = zstd.ZstdCompressor() + source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + frame = cctx.compress(source) + + dctx = zstd.ZstdDecompressor() + with dctx.stream_reader(io.BytesIO(frame)) as reader: + self.assertEqual(reader.tell(), 0) + + chunk = reader.read(8192) + self.assertEqual(chunk, source) + self.assertEqual(reader.tell(), len(source)) + self.assertEqual(reader.read(), b'') + self.assertEqual(reader.tell(), len(source)) + + def test_read_stream_small_chunks(self): + cctx = zstd.ZstdCompressor() + source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + frame = cctx.compress(source) + + dctx = zstd.ZstdDecompressor() + chunks = [] + + with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader: + while True: + chunk = reader.read(1) + if not chunk: + break + + chunks.append(chunk) + self.assertEqual(reader.tell(), sum(map(len, chunks))) + + self.assertEqual(b''.join(chunks), source) + + def test_read_after_exit(self): + cctx = zstd.ZstdCompressor() + frame = cctx.compress(b'foo' * 60) + + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(frame) as reader: + while reader.read(16): + pass + + with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): + reader.read(10) + + def test_illegal_seeks(self): + cctx = zstd.ZstdCompressor() + frame = cctx.compress(b'foo' * 60) + + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(frame) as reader: + with self.assertRaisesRegexp(ValueError, + 'cannot seek to negative position'): + reader.seek(-1, os.SEEK_SET) + + reader.read(1) + + with self.assertRaisesRegexp( + ValueError, 'cannot seek zstd decompression stream backwards'): + reader.seek(0, os.SEEK_SET) + + with self.assertRaisesRegexp( + ValueError, 'cannot seek zstd decompression stream backwards'): + reader.seek(-1, os.SEEK_CUR) + + with self.assertRaisesRegexp( + ValueError, + 'zstd decompression streams cannot be seeked with SEEK_END'): + reader.seek(0, os.SEEK_END) + + reader.close() + + with self.assertRaisesRegexp(ValueError, 'stream is closed'): + reader.seek(4, os.SEEK_SET) + + with self.assertRaisesRegexp( + zstd.ZstdError, 'seek\(\) must be called from an active context'): + reader.seek(0) + + def test_seek(self): + source = b'foobar' * 60 + cctx = zstd.ZstdCompressor() + frame = cctx.compress(source) + + dctx = zstd.ZstdDecompressor() + + with dctx.stream_reader(frame) as reader: + reader.seek(3) + self.assertEqual(reader.read(3), b'bar') + + reader.seek(4, os.SEEK_CUR) + self.assertEqual(reader.read(2), b'ar') + + +@make_cffi class TestDecompressor_decompressobj(unittest.TestCase): def test_simple(self): data = zstd.ZstdCompressor(level=1).compress(b'foobar') @@ -194,6 +502,24 @@ dobj = dctx.decompressobj() self.assertEqual(dobj.decompress(data), b'foobar') + def test_input_types(self): + compressed = zstd.ZstdCompressor(level=1).compress(b'foo') + + dctx = zstd.ZstdDecompressor() + + mutable_array = bytearray(len(compressed)) + mutable_array[:] = compressed + + sources = [ + memoryview(compressed), + bytearray(compressed), + mutable_array, + ] + + for source in sources: + dobj = dctx.decompressobj() + self.assertEqual(dobj.decompress(source), b'foo') + def test_reuse(self): data = zstd.ZstdCompressor(level=1).compress(b'foobar') @@ -204,22 +530,58 @@ with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): dobj.decompress(data) + def test_bad_write_size(self): + dctx = zstd.ZstdDecompressor() + + with self.assertRaisesRegexp(ValueError, 'write_size must be positive'): + dctx.decompressobj(write_size=0) + + def test_write_size(self): + source = b'foo' * 64 + b'bar' * 128 + data = zstd.ZstdCompressor(level=1).compress(source) + + dctx = zstd.ZstdDecompressor() + + for i in range(128): + dobj = dctx.decompressobj(write_size=i + 1) + self.assertEqual(dobj.decompress(data), source) def decompress_via_writer(data): buffer = io.BytesIO() dctx = zstd.ZstdDecompressor() - with dctx.write_to(buffer) as decompressor: + with dctx.stream_writer(buffer) as decompressor: decompressor.write(data) return buffer.getvalue() @make_cffi -class TestDecompressor_write_to(unittest.TestCase): +class TestDecompressor_stream_writer(unittest.TestCase): def test_empty_roundtrip(self): cctx = zstd.ZstdCompressor() empty = cctx.compress(b'') self.assertEqual(decompress_via_writer(empty), b'') + def test_input_types(self): + cctx = zstd.ZstdCompressor(level=1) + compressed = cctx.compress(b'foo') + + mutable_array = bytearray(len(compressed)) + mutable_array[:] = compressed + + sources = [ + memoryview(compressed), + bytearray(compressed), + mutable_array, + ] + + dctx = zstd.ZstdDecompressor() + for source in sources: + buffer = io.BytesIO() + with dctx.stream_writer(buffer) as decompressor: + decompressor.write(source) + + self.assertEqual(buffer.getvalue(), b'foo') + def test_large_roundtrip(self): chunks = [] for i in range(255): @@ -242,7 +604,7 @@ buffer = io.BytesIO() dctx = zstd.ZstdDecompressor() - with dctx.write_to(buffer) as decompressor: + with dctx.stream_writer(buffer) as decompressor: pos = 0 while pos < len(compressed): pos2 = pos + 8192 @@ -262,14 +624,14 @@ orig = b'foobar' * 16384 buffer = io.BytesIO() cctx = zstd.ZstdCompressor(dict_data=d) - with cctx.write_to(buffer) as compressor: - self.assertEqual(compressor.write(orig), 1544) + with cctx.stream_writer(buffer) as compressor: + self.assertEqual(compressor.write(orig), 0) compressed = buffer.getvalue() buffer = io.BytesIO() dctx = zstd.ZstdDecompressor(dict_data=d) - with dctx.write_to(buffer) as decompressor: + with dctx.stream_writer(buffer) as decompressor: self.assertEqual(decompressor.write(compressed), len(orig)) self.assertEqual(buffer.getvalue(), orig) @@ -277,7 +639,7 @@ def test_memory_size(self): dctx = zstd.ZstdDecompressor() buffer = io.BytesIO() - with dctx.write_to(buffer) as decompressor: + with dctx.stream_writer(buffer) as decompressor: size = decompressor.memory_size() self.assertGreater(size, 100000) @@ -286,7 +648,7 @@ source = zstd.ZstdCompressor().compress(b'foobarfoobar') dest = OpCountingBytesIO() dctx = zstd.ZstdDecompressor() - with dctx.write_to(dest, write_size=1) as decompressor: + with dctx.stream_writer(dest, write_size=1) as decompressor: s = struct.Struct('>B') for c in source: if not isinstance(c, str): @@ -298,29 +660,29 @@ @make_cffi -class TestDecompressor_read_from(unittest.TestCase): +class TestDecompressor_read_to_iter(unittest.TestCase): def test_type_validation(self): dctx = zstd.ZstdDecompressor() # Object with read() works. - dctx.read_from(io.BytesIO()) + dctx.read_to_iter(io.BytesIO()) # Buffer protocol works. - dctx.read_from(b'foobar') + dctx.read_to_iter(b'foobar') with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): - b''.join(dctx.read_from(True)) + b''.join(dctx.read_to_iter(True)) def test_empty_input(self): dctx = zstd.ZstdDecompressor() source = io.BytesIO() - it = dctx.read_from(source) + it = dctx.read_to_iter(source) # TODO this is arguably wrong. Should get an error about missing frame foo. with self.assertRaises(StopIteration): next(it) - it = dctx.read_from(b'') + it = dctx.read_to_iter(b'') with self.assertRaises(StopIteration): next(it) @@ -328,11 +690,11 @@ dctx = zstd.ZstdDecompressor() source = io.BytesIO(b'foobar') - it = dctx.read_from(source) + it = dctx.read_to_iter(source) with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): next(it) - it = dctx.read_from(b'foobar') + it = dctx.read_to_iter(b'foobar') with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): next(it) @@ -344,7 +706,7 @@ source.seek(0) dctx = zstd.ZstdDecompressor() - it = dctx.read_from(source) + it = dctx.read_to_iter(source) # No chunks should be emitted since there is no data. with self.assertRaises(StopIteration): @@ -358,17 +720,17 @@ dctx = zstd.ZstdDecompressor() with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): - b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1)) + b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): - b''.join(dctx.read_from(b'foobar', skip_bytes=10)) + b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) def test_skip_bytes(self): cctx = zstd.ZstdCompressor(write_content_size=False) compressed = cctx.compress(b'foobar') dctx = zstd.ZstdDecompressor() - output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3)) + output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3)) self.assertEqual(output, b'foobar') def test_large_output(self): @@ -382,7 +744,7 @@ compressed.seek(0) dctx = zstd.ZstdDecompressor() - it = dctx.read_from(compressed) + it = dctx.read_to_iter(compressed) chunks = [] chunks.append(next(it)) @@ -395,7 +757,7 @@ self.assertEqual(decompressed, source.getvalue()) # And again with buffer protocol. - it = dctx.read_from(compressed.getvalue()) + it = dctx.read_to_iter(compressed.getvalue()) chunks = [] chunks.append(next(it)) chunks.append(next(it)) @@ -406,12 +768,13 @@ decompressed = b''.join(chunks) self.assertEqual(decompressed, source.getvalue()) + @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') def test_large_input(self): bytes = list(struct.Struct('>B').pack(i) for i in range(256)) compressed = io.BytesIO() input_size = 0 cctx = zstd.ZstdCompressor(level=1) - with cctx.write_to(compressed) as compressor: + with cctx.stream_writer(compressed) as compressor: while True: compressor.write(random.choice(bytes)) input_size += 1 @@ -426,7 +789,7 @@ zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) dctx = zstd.ZstdDecompressor() - it = dctx.read_from(compressed) + it = dctx.read_to_iter(compressed) chunks = [] chunks.append(next(it)) @@ -440,7 +803,7 @@ self.assertEqual(len(decompressed), input_size) # And again with buffer protocol. - it = dctx.read_from(compressed.getvalue()) + it = dctx.read_to_iter(compressed.getvalue()) chunks = [] chunks.append(next(it)) @@ -460,7 +823,7 @@ source = io.BytesIO() compressed = io.BytesIO() - with cctx.write_to(compressed) as compressor: + with cctx.stream_writer(compressed) as compressor: for i in range(256): chunk = b'\0' * 1024 compressor.write(chunk) @@ -473,17 +836,34 @@ self.assertEqual(simple, source.getvalue()) compressed.seek(0) - streamed = b''.join(dctx.read_from(compressed)) + streamed = b''.join(dctx.read_to_iter(compressed)) self.assertEqual(streamed, source.getvalue()) def test_read_write_size(self): source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) dctx = zstd.ZstdDecompressor() - for chunk in dctx.read_from(source, read_size=1, write_size=1): + for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): self.assertEqual(len(chunk), 1) self.assertEqual(source._read_count, len(source.getvalue())) + def test_magic_less(self): + params = zstd.CompressionParameters.from_level( + 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) + cctx = zstd.ZstdCompressor(compression_params=params) + frame = cctx.compress(b'foobar') + + self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd') + + dctx = zstd.ZstdDecompressor() + with self.assertRaisesRegexp( + zstd.ZstdError, 'error determining content size from frame header'): + dctx.decompress(frame) + + dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) + res = b''.join(dctx.read_to_iter(frame)) + self.assertEqual(res, b'foobar') + @make_cffi class TestDecompressor_content_dict_chain(unittest.TestCase): @@ -511,19 +891,20 @@ with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): dctx.decompress_content_dict_chain([b'foo' * 8]) - no_size = zstd.ZstdCompressor().compress(b'foo' * 64) + no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): dctx.decompress_content_dict_chain([no_size]) # Corrupt first frame. - frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) + frame = zstd.ZstdCompressor().compress(b'foo' * 64) frame = frame[0:12] + frame[15:] - with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'): + with self.assertRaisesRegexp(zstd.ZstdError, + 'chunk 0 did not decompress full frame'): dctx.decompress_content_dict_chain([frame]) def test_bad_subsequent_input(self): - initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) + initial = zstd.ZstdCompressor().compress(b'foo' * 64) dctx = zstd.ZstdDecompressor() @@ -539,17 +920,17 @@ with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): dctx.decompress_content_dict_chain([initial, b'foo' * 8]) - no_size = zstd.ZstdCompressor().compress(b'foo' * 64) + no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64) with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): dctx.decompress_content_dict_chain([initial, no_size]) # Corrupt second frame. - cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) + cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) frame = cctx.compress(b'bar' * 64) frame = frame[0:12] + frame[15:] - with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'): + with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): dctx.decompress_content_dict_chain([initial, frame]) def test_simple(self): @@ -562,10 +943,10 @@ ] chunks = [] - chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0])) + chunks.append(zstd.ZstdCompressor().compress(original[0])) for i, chunk in enumerate(original[1:]): d = zstd.ZstdCompressionDict(original[i]) - cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True) + cctx = zstd.ZstdCompressor(dict_data=d) chunks.append(cctx.compress(chunk)) for i in range(1, len(original)): @@ -594,7 +975,7 @@ dctx.multi_decompress_to_buffer([b'foobarbaz']) def test_list_input(self): - cctx = zstd.ZstdCompressor(write_content_size=True) + cctx = zstd.ZstdCompressor() original = [b'foo' * 4, b'bar' * 6] frames = [cctx.compress(d) for d in original] @@ -614,7 +995,7 @@ self.assertEqual(len(result[1]), 18) def test_list_input_frame_sizes(self): - cctx = zstd.ZstdCompressor(write_content_size=False) + cctx = zstd.ZstdCompressor() original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] frames = [cctx.compress(d) for d in original] @@ -630,7 +1011,7 @@ self.assertEqual(result[i].tobytes(), data) def test_buffer_with_segments_input(self): - cctx = zstd.ZstdCompressor(write_content_size=True) + cctx = zstd.ZstdCompressor() original = [b'foo' * 4, b'bar' * 6] frames = [cctx.compress(d) for d in original] @@ -669,7 +1050,7 @@ self.assertEqual(result[i].tobytes(), data) def test_buffer_with_segments_collection_input(self): - cctx = zstd.ZstdCompressor(write_content_size=True) + cctx = zstd.ZstdCompressor() original = [ b'foo0' * 2, @@ -711,8 +1092,18 @@ for i in range(5): self.assertEqual(decompressed[i].tobytes(), original[i]) + def test_dict(self): + d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16) + + cctx = zstd.ZstdCompressor(dict_data=d, level=1) + frames = [cctx.compress(s) for s in generate_samples()] + + dctx = zstd.ZstdDecompressor(dict_data=d) + result = dctx.multi_decompress_to_buffer(frames) + self.assertEqual([o.tobytes() for o in result], generate_samples()) + def test_multiple_threads(self): - cctx = zstd.ZstdCompressor(write_content_size=True) + cctx = zstd.ZstdCompressor() frames = [] frames.extend(cctx.compress(b'x' * 64) for i in range(256)) @@ -727,15 +1118,22 @@ self.assertEqual(result[256].tobytes(), b'y' * 64) def test_item_failure(self): - cctx = zstd.ZstdCompressor(write_content_size=True) + cctx = zstd.ZstdCompressor() frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)] - frames[1] = frames[1] + b'extra' + frames[1] = frames[1][0:15] + b'extra' + frames[1][15:] dctx = zstd.ZstdDecompressor() - with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'): + with self.assertRaisesRegexp(zstd.ZstdError, + 'error decompressing item 1: (' + 'Corrupted block|' + 'Destination buffer is too small)'): dctx.multi_decompress_to_buffer(frames) - with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'): + with self.assertRaisesRegexp(zstd.ZstdError, + 'error decompressing item 1: (' + 'Corrupted block|' + 'Destination buffer is too small)'): dctx.multi_decompress_to_buffer(frames, threads=2) +