Mercurial > public > mercurial-scm > hg-stable
diff contrib/python-zstandard/tests/test_compressor.py @ 37495:b1fb341d8a61
zstandard: vendor python-zstandard 0.9.0
This was just released. It features a number of goodies. More info at
https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/.
The clang-format ignore list was updated to reflect the new source
of files.
The project contains a vendored copy of zstandard 1.3.4. The old
version was 1.1.3. One of the changes between those versions is that
zstandard is now dual licensed BSD + GPLv2 and the patent rights grant
has been removed. Good riddance.
The API should be backwards compatible. So no changes in core
should be needed. However, there were a number of changes in the
library that we'll want to adapt to. Those will be addressed in
subsequent commits.
Differential Revision: https://phab.mercurial-scm.org/D3198
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 09 Apr 2018 10:13:29 -0700 |
parents | e0dc40530c5a |
children | 73fef626dae3 |
line wrap: on
line diff
--- a/contrib/python-zstandard/tests/test_compressor.py Sun Apr 08 01:08:43 2018 +0200 +++ b/contrib/python-zstandard/tests/test_compressor.py Mon Apr 09 10:13:29 2018 -0700 @@ -2,13 +2,10 @@ import io import struct import sys +import tarfile +import unittest -try: - import unittest2 as unittest -except ImportError: - import unittest - -import zstd +import zstandard as zstd from .common import ( make_cffi, @@ -23,7 +20,8 @@ def multithreaded_chunk_size(level, source_size=0): - params = zstd.get_compression_parameters(level, source_size) + params = zstd.ZstdCompressionParameters.from_level(level, + source_size=source_size) return 1 << (params.window_log + 2) @@ -32,67 +30,82 @@ class TestCompressor(unittest.TestCase): def test_level_bounds(self): with self.assertRaises(ValueError): - zstd.ZstdCompressor(level=0) + zstd.ZstdCompressor(level=23) - with self.assertRaises(ValueError): - zstd.ZstdCompressor(level=23) + def test_memory_size(self): + cctx = zstd.ZstdCompressor(level=1) + self.assertGreater(cctx.memory_size(), 100) @make_cffi class TestCompressor_compress(unittest.TestCase): - def test_multithreaded_unsupported(self): - samples = [] - for i in range(128): - samples.append(b'foo' * 64) - samples.append(b'bar' * 64) - - d = zstd.train_dictionary(8192, samples) - - cctx = zstd.ZstdCompressor(dict_data=d, threads=2) - - with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'): - cctx.compress(b'foo') - - params = zstd.get_compression_parameters(3) - cctx = zstd.ZstdCompressor(compression_params=params, threads=2) - with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'): - cctx.compress(b'foo') - def test_compress_empty(self): - cctx = zstd.ZstdCompressor(level=1) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) result = cctx.compress(b'') self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') params = zstd.get_frame_parameters(result) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 524288) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum, 0) - # TODO should be temporary until https://github.com/facebook/zstd/issues/506 - # is fixed. - cctx = zstd.ZstdCompressor(write_content_size=True) - with self.assertRaises(ValueError): - cctx.compress(b'') + cctx = zstd.ZstdCompressor() + result = cctx.compress(b'') + self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00') + params = zstd.get_frame_parameters(result) + self.assertEqual(params.content_size, 0) + + def test_input_types(self): + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) + expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f' - cctx.compress(b'', allow_empty=True) + mutable_array = bytearray(3) + mutable_array[:] = b'foo' + + sources = [ + memoryview(b'foo'), + bytearray(b'foo'), + mutable_array, + ] + + for source in sources: + self.assertEqual(cctx.compress(source), expected) def test_compress_large(self): chunks = [] for i in range(255): chunks.append(struct.Struct('>B').pack(i) * 16384) - cctx = zstd.ZstdCompressor(level=3) + cctx = zstd.ZstdCompressor(level=3, write_content_size=False) result = cctx.compress(b''.join(chunks)) self.assertEqual(len(result), 999) self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') - # This matches the test for read_from() below. - cctx = zstd.ZstdCompressor(level=1) + # This matches the test for read_to_iter() below. + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' b'\x02\x09\x00\x00\x6f') + def test_negative_level(self): + cctx = zstd.ZstdCompressor(level=-4) + result = cctx.compress(b'foo' * 256) + + def test_no_magic(self): + params = zstd.ZstdCompressionParameters.from_level( + 1, format=zstd.FORMAT_ZSTD1) + cctx = zstd.ZstdCompressor(compression_params=params) + magic = cctx.compress(b'foobar') + + params = zstd.ZstdCompressionParameters.from_level( + 1, format=zstd.FORMAT_ZSTD1_MAGICLESS) + cctx = zstd.ZstdCompressor(compression_params=params) + no_magic = cctx.compress(b'foobar') + + self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd') + self.assertEqual(magic[4:], no_magic) + def test_write_checksum(self): cctx = zstd.ZstdCompressor(level=1) no_checksum = cctx.compress(b'foobar') @@ -109,15 +122,15 @@ def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) + with_size = cctx.compress(b'foobar' * 256) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) no_size = cctx.compress(b'foobar' * 256) - cctx = zstd.ZstdCompressor(level=1, write_content_size=True) - with_size = cctx.compress(b'foobar' * 256) self.assertEqual(len(with_size), len(no_size) + 1) no_params = zstd.get_frame_parameters(no_size) with_params = zstd.get_frame_parameters(with_size) - self.assertEqual(no_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(with_params.content_size, 1536) def test_no_dict_id(self): @@ -140,7 +153,7 @@ no_params = zstd.get_frame_parameters(no_dict_id) with_params = zstd.get_frame_parameters(with_dict_id) self.assertEqual(no_params.dict_id, 0) - self.assertEqual(with_params.dict_id, 1584102229) + self.assertEqual(with_params.dict_id, 1387616518) def test_compress_dict_multiple(self): samples = [] @@ -156,6 +169,21 @@ for i in range(32): cctx.compress(b'foo bar foobar foo bar foobar') + def test_dict_precompute(self): + samples = [] + for i in range(128): + samples.append(b'foo' * 64) + samples.append(b'bar' * 64) + samples.append(b'foobar' * 64) + + d = zstd.train_dictionary(8192, samples) + d.precompute_compress(level=1) + + cctx = zstd.ZstdCompressor(level=1, dict_data=d) + + for i in range(32): + cctx.compress(b'foo bar foobar foo bar foobar') + def test_multithreaded(self): chunk_size = multithreaded_chunk_size(1) source = b''.join([b'x' * chunk_size, b'y' * chunk_size]) @@ -171,16 +199,65 @@ dctx = zstd.ZstdDecompressor() self.assertEqual(dctx.decompress(compressed), source) + def test_multithreaded_dict(self): + samples = [] + for i in range(128): + samples.append(b'foo' * 64) + samples.append(b'bar' * 64) + samples.append(b'foobar' * 64) + + d = zstd.train_dictionary(1024, samples) + + cctx = zstd.ZstdCompressor(dict_data=d, threads=2) + + result = cctx.compress(b'foo') + params = zstd.get_frame_parameters(result); + self.assertEqual(params.content_size, 3); + self.assertEqual(params.dict_id, d.dict_id()) + + self.assertEqual(result, + b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00' + b'\x66\x6f\x6f') + + def test_multithreaded_compression_params(self): + params = zstd.ZstdCompressionParameters.from_level(0, threads=2) + cctx = zstd.ZstdCompressor(compression_params=params) + + result = cctx.compress(b'foo') + params = zstd.get_frame_parameters(result); + self.assertEqual(params.content_size, 3); + + self.assertEqual(result, + b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') + @make_cffi class TestCompressor_compressobj(unittest.TestCase): def test_compressobj_empty(self): - cctx = zstd.ZstdCompressor(level=1) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) cobj = cctx.compressobj() self.assertEqual(cobj.compress(b''), b'') self.assertEqual(cobj.flush(), b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') + def test_input_types(self): + expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) + + mutable_array = bytearray(3) + mutable_array[:] = b'foo' + + sources = [ + memoryview(b'foo'), + bytearray(b'foo'), + mutable_array, + ] + + for source in sources: + cobj = cctx.compressobj() + self.assertEqual(cobj.compress(source), b'') + self.assertEqual(cobj.flush(), expected) + def test_compressobj_large(self): chunks = [] for i in range(255): @@ -194,7 +271,7 @@ self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') params = zstd.get_frame_parameters(result) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1048576) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) @@ -209,8 +286,8 @@ no_params = zstd.get_frame_parameters(no_checksum) with_params = zstd.get_frame_parameters(with_checksum) - self.assertEqual(no_params.content_size, 0) - self.assertEqual(with_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) + self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) self.assertFalse(no_params.has_checksum) @@ -221,14 +298,14 @@ def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) cobj = cctx.compressobj(size=len(b'foobar' * 256)) + with_size = cobj.compress(b'foobar' * 256) + cobj.flush() + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) + cobj = cctx.compressobj(size=len(b'foobar' * 256)) no_size = cobj.compress(b'foobar' * 256) + cobj.flush() - cctx = zstd.ZstdCompressor(level=1, write_content_size=True) - cobj = cctx.compressobj(size=len(b'foobar' * 256)) - with_size = cobj.compress(b'foobar' * 256) + cobj.flush() no_params = zstd.get_frame_parameters(no_size) with_params = zstd.get_frame_parameters(with_size) - self.assertEqual(no_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(with_params.content_size, 1536) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) @@ -300,6 +377,34 @@ self.assertEqual(len(compressed), 295) + def test_frame_progression(self): + cctx = zstd.ZstdCompressor() + + self.assertEqual(cctx.frame_progression(), (0, 0, 0)) + + cobj = cctx.compressobj() + + cobj.compress(b'foobar') + self.assertEqual(cctx.frame_progression(), (6, 0, 0)) + + cobj.flush() + self.assertEqual(cctx.frame_progression(), (6, 6, 15)) + + def test_bad_size(self): + cctx = zstd.ZstdCompressor() + + cobj = cctx.compressobj(size=2) + with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + cobj.compress(b'foo') + + # Try another operation on this instance. + with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + cobj.compress(b'aa') + + # Try another operation on the compressor. + cctx.compressobj(size=4) + cctx.compress(b'foobar') + @make_cffi class TestCompressor_copy_stream(unittest.TestCase): @@ -323,7 +428,7 @@ source = io.BytesIO() dest = io.BytesIO() - cctx = zstd.ZstdCompressor(level=1) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) r, w = cctx.copy_stream(source, dest) self.assertEqual(int(r), 0) self.assertEqual(w, 9) @@ -345,7 +450,7 @@ self.assertEqual(w, 999) params = zstd.get_frame_parameters(dest.getvalue()) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1048576) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) @@ -367,8 +472,8 @@ no_params = zstd.get_frame_parameters(no_checksum.getvalue()) with_params = zstd.get_frame_parameters(with_checksum.getvalue()) - self.assertEqual(no_params.content_size, 0) - self.assertEqual(with_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) + self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) self.assertFalse(no_params.has_checksum) @@ -378,12 +483,12 @@ source = io.BytesIO(b'foobar' * 256) no_size = io.BytesIO() - cctx = zstd.ZstdCompressor(level=1) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) cctx.copy_stream(source, no_size) source.seek(0) with_size = io.BytesIO() - cctx = zstd.ZstdCompressor(level=1, write_content_size=True) + cctx = zstd.ZstdCompressor(level=1) cctx.copy_stream(source, with_size) # Source content size is unknown, so no content size written. @@ -400,7 +505,7 @@ no_params = zstd.get_frame_parameters(no_size.getvalue()) with_params = zstd.get_frame_parameters(with_size.getvalue()) - self.assertEqual(no_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(with_params.content_size, 1536) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) @@ -426,19 +531,18 @@ source.seek(0) dest = io.BytesIO() - cctx = zstd.ZstdCompressor(threads=2) + cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) r, w = cctx.copy_stream(source, dest) self.assertEqual(r, 3145728) self.assertEqual(w, 295) params = zstd.get_frame_parameters(dest.getvalue()) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) # Writing content size and checksum works. - cctx = zstd.ZstdCompressor(threads=2, write_content_size=True, - write_checksum=True) + cctx = zstd.ZstdCompressor(threads=2, write_checksum=True) dest = io.BytesIO() source.seek(0) cctx.copy_stream(source, dest, size=len(source.getvalue())) @@ -448,31 +552,227 @@ self.assertEqual(params.dict_id, 0) self.assertTrue(params.has_checksum) + def test_bad_size(self): + source = io.BytesIO() + source.write(b'a' * 32768) + source.write(b'b' * 32768) + source.seek(0) -def compress(data, level): - buffer = io.BytesIO() - cctx = zstd.ZstdCompressor(level=level) - with cctx.write_to(buffer) as compressor: - compressor.write(data) - return buffer.getvalue() + dest = io.BytesIO() + + cctx = zstd.ZstdCompressor() + + with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + cctx.copy_stream(source, dest, size=42) + + # Try another operation on this compressor. + source.seek(0) + dest = io.BytesIO() + cctx.copy_stream(source, dest) @make_cffi -class TestCompressor_write_to(unittest.TestCase): +class TestCompressor_stream_reader(unittest.TestCase): + def test_context_manager(self): + cctx = zstd.ZstdCompressor() + + reader = cctx.stream_reader(b'foo' * 60) + with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): + reader.read(10) + + with cctx.stream_reader(b'foo') as reader: + with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): + with reader as reader2: + pass + + def test_not_implemented(self): + cctx = zstd.ZstdCompressor() + + with cctx.stream_reader(b'foo' * 60) as reader: + with self.assertRaises(io.UnsupportedOperation): + reader.readline() + + with self.assertRaises(io.UnsupportedOperation): + reader.readlines() + + # This could probably be implemented someday. + with self.assertRaises(NotImplementedError): + reader.readall() + + with self.assertRaises(io.UnsupportedOperation): + iter(reader) + + with self.assertRaises(io.UnsupportedOperation): + next(reader) + + with self.assertRaises(OSError): + reader.writelines([]) + + with self.assertRaises(OSError): + reader.write(b'foo') + + def test_constant_methods(self): + cctx = zstd.ZstdCompressor() + + with cctx.stream_reader(b'boo') as reader: + self.assertTrue(reader.readable()) + self.assertFalse(reader.writable()) + self.assertFalse(reader.seekable()) + self.assertFalse(reader.isatty()) + self.assertIsNone(reader.flush()) + + def test_read_closed(self): + cctx = zstd.ZstdCompressor() + + with cctx.stream_reader(b'foo' * 60) as reader: + reader.close() + with self.assertRaisesRegexp(ValueError, 'stream is closed'): + reader.read(10) + + def test_read_bad_size(self): + cctx = zstd.ZstdCompressor() + + with cctx.stream_reader(b'foo') as reader: + with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): + reader.read(-1) + + with self.assertRaisesRegexp(ValueError, 'cannot read negative or size 0 amounts'): + reader.read(0) + + def test_read_buffer(self): + cctx = zstd.ZstdCompressor() + + source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + frame = cctx.compress(source) + + with cctx.stream_reader(source) as reader: + self.assertEqual(reader.tell(), 0) + + # We should get entire frame in one read. + result = reader.read(8192) + self.assertEqual(result, frame) + self.assertEqual(reader.tell(), len(result)) + self.assertEqual(reader.read(), b'') + self.assertEqual(reader.tell(), len(result)) + + def test_read_buffer_small_chunks(self): + cctx = zstd.ZstdCompressor() + + source = b'foo' * 60 + chunks = [] + + with cctx.stream_reader(source) as reader: + self.assertEqual(reader.tell(), 0) + + while True: + chunk = reader.read(1) + if not chunk: + break + + chunks.append(chunk) + self.assertEqual(reader.tell(), sum(map(len, chunks))) + + self.assertEqual(b''.join(chunks), cctx.compress(source)) + + def test_read_stream(self): + cctx = zstd.ZstdCompressor() + + source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60]) + frame = cctx.compress(source) + + with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: + self.assertEqual(reader.tell(), 0) + + chunk = reader.read(8192) + self.assertEqual(chunk, frame) + self.assertEqual(reader.tell(), len(chunk)) + self.assertEqual(reader.read(), b'') + self.assertEqual(reader.tell(), len(chunk)) + + def test_read_stream_small_chunks(self): + cctx = zstd.ZstdCompressor() + + source = b'foo' * 60 + chunks = [] + + with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: + self.assertEqual(reader.tell(), 0) + + while True: + chunk = reader.read(1) + if not chunk: + break + + chunks.append(chunk) + self.assertEqual(reader.tell(), sum(map(len, chunks))) + + self.assertEqual(b''.join(chunks), cctx.compress(source)) + + def test_read_after_exit(self): + cctx = zstd.ZstdCompressor() + + with cctx.stream_reader(b'foo' * 60) as reader: + while reader.read(8192): + pass + + with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): + reader.read(10) + + def test_bad_size(self): + cctx = zstd.ZstdCompressor() + + source = io.BytesIO(b'foobar') + + with cctx.stream_reader(source, size=2) as reader: + with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + reader.read(10) + + # Try another compression operation. + with cctx.stream_reader(source, size=42): + pass + + +@make_cffi +class TestCompressor_stream_writer(unittest.TestCase): def test_empty(self): - result = compress(b'', 1) + buffer = io.BytesIO() + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) + with cctx.stream_writer(buffer) as compressor: + compressor.write(b'') + + result = buffer.getvalue() self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') params = zstd.get_frame_parameters(result) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 524288) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) + def test_input_types(self): + expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f' + cctx = zstd.ZstdCompressor(level=1) + + mutable_array = bytearray(3) + mutable_array[:] = b'foo' + + sources = [ + memoryview(b'foo'), + bytearray(b'foo'), + mutable_array, + ] + + for source in sources: + buffer = io.BytesIO() + with cctx.stream_writer(buffer) as compressor: + compressor.write(source) + + self.assertEqual(buffer.getvalue(), expected) + def test_multiple_compress(self): buffer = io.BytesIO() cctx = zstd.ZstdCompressor(level=5) - with cctx.write_to(buffer) as compressor: + with cctx.stream_writer(buffer) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(compressor.write(b'bar'), 0) self.assertEqual(compressor.write(b'x' * 8192), 0) @@ -491,35 +791,40 @@ d = zstd.train_dictionary(8192, samples) + h = hashlib.sha1(d.as_bytes()).hexdigest() + self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027') + buffer = io.BytesIO() cctx = zstd.ZstdCompressor(level=9, dict_data=d) - with cctx.write_to(buffer) as compressor: + with cctx.stream_writer(buffer) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(compressor.write(b'bar'), 0) - self.assertEqual(compressor.write(b'foo' * 16384), 634) + self.assertEqual(compressor.write(b'foo' * 16384), 0) compressed = buffer.getvalue() params = zstd.get_frame_parameters(compressed) - self.assertEqual(params.content_size, 0) - self.assertEqual(params.window_size, 1024) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) + self.assertEqual(params.window_size, 2097152) self.assertEqual(params.dict_id, d.dict_id()) self.assertFalse(params.has_checksum) - - self.assertEqual(compressed[0:32], - b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00' - b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54' - b'\x00\x00\x18\x6f\x6f\x66\x01\x00') - - h = hashlib.sha1(compressed).hexdigest() - self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92') + self.assertEqual(compressed, + b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00' + b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89') def test_compression_params(self): - params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST) + params = zstd.ZstdCompressionParameters( + window_log=20, + chain_log=6, + hash_log=12, + min_match=5, + search_log=4, + target_length=10, + compression_strategy=zstd.STRATEGY_FAST) buffer = io.BytesIO() cctx = zstd.ZstdCompressor(compression_params=params) - with cctx.write_to(buffer) as compressor: + with cctx.stream_writer(buffer) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(compressor.write(b'bar'), 0) self.assertEqual(compressor.write(b'foobar' * 16384), 0) @@ -527,29 +832,29 @@ compressed = buffer.getvalue() params = zstd.get_frame_parameters(compressed) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1048576) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) h = hashlib.sha1(compressed).hexdigest() - self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99') + self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef') def test_write_checksum(self): no_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1) - with cctx.write_to(no_checksum) as compressor: + with cctx.stream_writer(no_checksum) as compressor: self.assertEqual(compressor.write(b'foobar'), 0) with_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, write_checksum=True) - with cctx.write_to(with_checksum) as compressor: + with cctx.stream_writer(with_checksum) as compressor: self.assertEqual(compressor.write(b'foobar'), 0) no_params = zstd.get_frame_parameters(no_checksum.getvalue()) with_params = zstd.get_frame_parameters(with_checksum.getvalue()) - self.assertEqual(no_params.content_size, 0) - self.assertEqual(with_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) + self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) self.assertFalse(no_params.has_checksum) @@ -560,13 +865,13 @@ def test_write_content_size(self): no_size = io.BytesIO() - cctx = zstd.ZstdCompressor(level=1) - with cctx.write_to(no_size) as compressor: + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) + with cctx.stream_writer(no_size) as compressor: self.assertEqual(compressor.write(b'foobar' * 256), 0) with_size = io.BytesIO() - cctx = zstd.ZstdCompressor(level=1, write_content_size=True) - with cctx.write_to(with_size) as compressor: + cctx = zstd.ZstdCompressor(level=1) + with cctx.stream_writer(with_size) as compressor: self.assertEqual(compressor.write(b'foobar' * 256), 0) # Source size is not known in streaming mode, so header not @@ -576,12 +881,12 @@ # Declaring size will write the header. with_size = io.BytesIO() - with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor: + with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor: self.assertEqual(compressor.write(b'foobar' * 256), 0) no_params = zstd.get_frame_parameters(no_size.getvalue()) with_params = zstd.get_frame_parameters(with_size.getvalue()) - self.assertEqual(no_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(with_params.content_size, 1536) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) @@ -602,18 +907,22 @@ with_dict_id = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, dict_data=d) - with cctx.write_to(with_dict_id) as compressor: + with cctx.stream_writer(with_dict_id) as compressor: self.assertEqual(compressor.write(b'foobarfoobar'), 0) + self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03') + cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) no_dict_id = io.BytesIO() - with cctx.write_to(no_dict_id) as compressor: + with cctx.stream_writer(no_dict_id) as compressor: self.assertEqual(compressor.write(b'foobarfoobar'), 0) + self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00') + no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) - self.assertEqual(no_params.content_size, 0) - self.assertEqual(with_params.content_size, 0) + self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) + self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, d.dict_id()) self.assertFalse(no_params.has_checksum) @@ -625,7 +934,8 @@ def test_memory_size(self): cctx = zstd.ZstdCompressor(level=3) buffer = io.BytesIO() - with cctx.write_to(buffer) as compressor: + with cctx.stream_writer(buffer) as compressor: + compressor.write(b'foo') size = compressor.memory_size() self.assertGreater(size, 100000) @@ -633,7 +943,7 @@ def test_write_size(self): cctx = zstd.ZstdCompressor(level=3) dest = OpCountingBytesIO() - with cctx.write_to(dest, write_size=1) as compressor: + with cctx.stream_writer(dest, write_size=1) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(compressor.write(b'bar'), 0) self.assertEqual(compressor.write(b'foobar'), 0) @@ -643,7 +953,7 @@ def test_flush_repeated(self): cctx = zstd.ZstdCompressor(level=3) dest = OpCountingBytesIO() - with cctx.write_to(dest) as compressor: + with cctx.stream_writer(dest) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(dest._write_count, 0) self.assertEqual(compressor.flush(), 12) @@ -659,7 +969,7 @@ def test_flush_empty_block(self): cctx = zstd.ZstdCompressor(level=3, write_checksum=True) dest = OpCountingBytesIO() - with cctx.write_to(dest) as compressor: + with cctx.stream_writer(dest) as compressor: self.assertEqual(compressor.write(b'foobar' * 8192), 0) count = dest._write_count offset = dest.tell() @@ -680,50 +990,89 @@ def test_multithreaded(self): dest = io.BytesIO() cctx = zstd.ZstdCompressor(threads=2) - with cctx.write_to(dest) as compressor: + with cctx.stream_writer(dest) as compressor: compressor.write(b'a' * 1048576) compressor.write(b'b' * 1048576) compressor.write(b'c' * 1048576) self.assertEqual(len(dest.getvalue()), 295) + def test_tell(self): + dest = io.BytesIO() + cctx = zstd.ZstdCompressor() + with cctx.stream_writer(dest) as compressor: + self.assertEqual(compressor.tell(), 0) + + for i in range(256): + compressor.write(b'foo' * (i + 1)) + self.assertEqual(compressor.tell(), dest.tell()) + + def test_bad_size(self): + cctx = zstd.ZstdCompressor() + + dest = io.BytesIO() + + with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + with cctx.stream_writer(dest, size=2) as compressor: + compressor.write(b'foo') + + # Test another operation. + with cctx.stream_writer(dest, size=42): + pass + + def test_tarfile_compat(self): + raise unittest.SkipTest('not yet fully working') + + dest = io.BytesIO() + cctx = zstd.ZstdCompressor() + with cctx.stream_writer(dest) as compressor: + with tarfile.open('tf', mode='w', fileobj=compressor) as tf: + tf.add(__file__, 'test_compressor.py') + + dest.seek(0) + + dctx = zstd.ZstdDecompressor() + with dctx.stream_reader(dest) as reader: + with tarfile.open(mode='r:', fileobj=reader) as tf: + for member in tf: + self.assertEqual(member.name, 'test_compressor.py') @make_cffi -class TestCompressor_read_from(unittest.TestCase): +class TestCompressor_read_to_iter(unittest.TestCase): def test_type_validation(self): cctx = zstd.ZstdCompressor() # Object with read() works. - for chunk in cctx.read_from(io.BytesIO()): + for chunk in cctx.read_to_iter(io.BytesIO()): pass # Buffer protocol works. - for chunk in cctx.read_from(b'foobar'): + for chunk in cctx.read_to_iter(b'foobar'): pass with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): - for chunk in cctx.read_from(True): + for chunk in cctx.read_to_iter(True): pass def test_read_empty(self): - cctx = zstd.ZstdCompressor(level=1) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) source = io.BytesIO() - it = cctx.read_from(source) + it = cctx.read_to_iter(source) chunks = list(it) self.assertEqual(len(chunks), 1) compressed = b''.join(chunks) self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') # And again with the buffer protocol. - it = cctx.read_from(b'') + it = cctx.read_to_iter(b'') chunks = list(it) self.assertEqual(len(chunks), 1) compressed2 = b''.join(chunks) self.assertEqual(compressed2, compressed) def test_read_large(self): - cctx = zstd.ZstdCompressor(level=1) + cctx = zstd.ZstdCompressor(level=1, write_content_size=False) source = io.BytesIO() source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) @@ -732,7 +1081,7 @@ # Creating an iterator should not perform any compression until # first read. - it = cctx.read_from(source, size=len(source.getvalue())) + it = cctx.read_to_iter(source, size=len(source.getvalue())) self.assertEqual(source.tell(), 0) # We should have exactly 2 output chunks. @@ -758,21 +1107,28 @@ self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) params = zstd.get_frame_parameters(b''.join(chunks)) - self.assertEqual(params.content_size, 0) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 262144) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) # Now check the buffer protocol. - it = cctx.read_from(source.getvalue()) + it = cctx.read_to_iter(source.getvalue()) chunks = list(it) self.assertEqual(len(chunks), 2) + + params = zstd.get_frame_parameters(b''.join(chunks)) + self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) + #self.assertEqual(params.window_size, 262144) + self.assertEqual(params.dict_id, 0) + self.assertFalse(params.has_checksum) + self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) def test_read_write_size(self): source = OpCountingBytesIO(b'foobarfoobar') cctx = zstd.ZstdCompressor(level=3) - for chunk in cctx.read_from(source, read_size=1, write_size=1): + for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): self.assertEqual(len(chunk), 1) self.assertEqual(source._read_count, len(source.getvalue()) + 1) @@ -786,17 +1142,22 @@ cctx = zstd.ZstdCompressor(threads=2) - compressed = b''.join(cctx.read_from(source)) + compressed = b''.join(cctx.read_to_iter(source)) self.assertEqual(len(compressed), 295) + def test_bad_size(self): + cctx = zstd.ZstdCompressor() + + source = io.BytesIO(b'a' * 42) + + with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'): + b''.join(cctx.read_to_iter(source, size=2)) + + # Test another operation on errored compressor. + b''.join(cctx.read_to_iter(source)) + class TestCompressor_multi_compress_to_buffer(unittest.TestCase): - def test_multithreaded_unsupported(self): - cctx = zstd.ZstdCompressor(threads=2) - - with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'): - cctx.multi_compress_to_buffer([b'foo']) - def test_invalid_inputs(self): cctx = zstd.ZstdCompressor() @@ -819,7 +1180,7 @@ cctx.multi_compress_to_buffer([b'', b'', b'']) def test_list_input(self): - cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) + cctx = zstd.ZstdCompressor(write_checksum=True) original = [b'foo' * 12, b'bar' * 6] frames = [cctx.compress(c) for c in original] @@ -834,7 +1195,7 @@ self.assertEqual(b[1].tobytes(), frames[1]) def test_buffer_with_segments_input(self): - cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) + cctx = zstd.ZstdCompressor(write_checksum=True) original = [b'foo' * 4, b'bar' * 6] frames = [cctx.compress(c) for c in original] @@ -852,7 +1213,7 @@ self.assertEqual(result[1].tobytes(), frames[1]) def test_buffer_with_segments_collection_input(self): - cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) + cctx = zstd.ZstdCompressor(write_checksum=True) original = [ b'foo1', @@ -886,10 +1247,10 @@ def test_multiple_threads(self): # threads argument will cause multi-threaded ZSTD APIs to be used, which will # make output different. - refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) + refcctx = zstd.ZstdCompressor(write_checksum=True) reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)] - cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) + cctx = zstd.ZstdCompressor(write_checksum=True) frames = [] frames.extend(b'x' * 64 for i in range(256))