Mercurial > public > mercurial-scm > hg
comparison mercurial/util.py @ 30356:c86109eface7
util: add a stream compression API to compression engines
It is a common pattern throughout the code to perform compression
on an iterator of chunks, yielding an iterator of compressed chunks.
Let's formalize that as part of the compression engine API.
The zlib and bzip2 implementations allow an optional "level" option
to control the compression level. The default values are the same as
what the Python modules use. This option will be used in subsequent
patches.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 07 Nov 2016 18:57:07 -0800 |
parents | c52faa621d9f |
children | 673f0fdc1046 |
comparison
equal
deleted
inserted
replaced
30355:c52faa621d9f | 30356:c86109eface7 |
---|---|
2964 the user-facing "bundle spec" compression name and an internal | 2964 the user-facing "bundle spec" compression name and an internal |
2965 identifier used to denote the compression format within bundles. To | 2965 identifier used to denote the compression format within bundles. To |
2966 exclude the name from external usage, set the first element to ``None``. | 2966 exclude the name from external usage, set the first element to ``None``. |
2967 | 2967 |
2968 If bundle compression is supported, the class must also implement | 2968 If bundle compression is supported, the class must also implement |
2969 ``compressorobj`` and `decompressorreader``. | 2969 ``compressstream``, ``compressorobj`` and `decompressorreader``. |
2970 """ | 2970 """ |
2971 return None | 2971 return None |
2972 | |
2973 def compressstream(self, it, opts=None): | |
2974 """Compress an iterator of chunks. | |
2975 | |
2976 The method receives an iterator (ideally a generator) of chunks of | |
2977 bytes to be compressed. It returns an iterator (ideally a generator) | |
2978 of bytes of chunks representing the compressed output. | |
2979 | |
2980 Optionally accepts an argument defining how to perform compression. | |
2981 Each engine treats this argument differently. | |
2982 """ | |
2983 raise NotImplementedError() | |
2972 | 2984 |
2973 def compressorobj(self): | 2985 def compressorobj(self): |
2974 """(Temporary) Obtain an object used for compression. | 2986 """(Temporary) Obtain an object used for compression. |
2975 | 2987 |
2976 The returned object has ``compress(data)`` and ``flush()`` methods. | 2988 The returned object has ``compress(data)`` and ``flush()`` methods. |
2995 return 'gzip', 'GZ' | 3007 return 'gzip', 'GZ' |
2996 | 3008 |
2997 def compressorobj(self): | 3009 def compressorobj(self): |
2998 return zlib.compressobj() | 3010 return zlib.compressobj() |
2999 | 3011 |
3012 def compressstream(self, it, opts=None): | |
3013 opts = opts or {} | |
3014 | |
3015 z = zlib.compressobj(opts.get('level', -1)) | |
3016 for chunk in it: | |
3017 data = z.compress(chunk) | |
3018 # Not all calls to compress emit data. It is cheaper to inspect | |
3019 # here than to feed empty chunks through generator. | |
3020 if data: | |
3021 yield data | |
3022 | |
3023 yield z.flush() | |
3024 | |
3000 def decompressorreader(self, fh): | 3025 def decompressorreader(self, fh): |
3001 def gen(): | 3026 def gen(): |
3002 d = zlib.decompressobj() | 3027 d = zlib.decompressobj() |
3003 for chunk in filechunkiter(fh): | 3028 for chunk in filechunkiter(fh): |
3004 yield d.decompress(chunk) | 3029 yield d.decompress(chunk) |
3014 def bundletype(self): | 3039 def bundletype(self): |
3015 return 'bzip2', 'BZ' | 3040 return 'bzip2', 'BZ' |
3016 | 3041 |
3017 def compressorobj(self): | 3042 def compressorobj(self): |
3018 return bz2.BZ2Compressor() | 3043 return bz2.BZ2Compressor() |
3044 | |
3045 def compressstream(self, it, opts=None): | |
3046 opts = opts or {} | |
3047 z = bz2.BZ2Compressor(opts.get('level', 9)) | |
3048 for chunk in it: | |
3049 data = z.compress(chunk) | |
3050 if data: | |
3051 yield data | |
3052 | |
3053 yield z.flush() | |
3019 | 3054 |
3020 def decompressorreader(self, fh): | 3055 def decompressorreader(self, fh): |
3021 def gen(): | 3056 def gen(): |
3022 d = bz2.BZ2Decompressor() | 3057 d = bz2.BZ2Decompressor() |
3023 for chunk in filechunkiter(fh): | 3058 for chunk in filechunkiter(fh): |
3063 return 'none', 'UN' | 3098 return 'none', 'UN' |
3064 | 3099 |
3065 def compressorobj(self): | 3100 def compressorobj(self): |
3066 return nocompress() | 3101 return nocompress() |
3067 | 3102 |
3103 def compressstream(self, it, opts=None): | |
3104 return it | |
3105 | |
3068 def decompressorreader(self, fh): | 3106 def decompressorreader(self, fh): |
3069 return fh | 3107 return fh |
3070 | 3108 |
3071 compengines.register(_noopengine()) | 3109 compengines.register(_noopengine()) |
3072 | 3110 |