comparison mercurial/util.py @ 30356:c86109eface7

util: add a stream compression API to compression engines It is a common pattern throughout the code to perform compression on an iterator of chunks, yielding an iterator of compressed chunks. Let's formalize that as part of the compression engine API. The zlib and bzip2 implementations allow an optional "level" option to control the compression level. The default values are the same as what the Python modules use. This option will be used in subsequent patches.
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 07 Nov 2016 18:57:07 -0800
parents c52faa621d9f
children 673f0fdc1046
comparison
equal deleted inserted replaced
30355:c52faa621d9f 30356:c86109eface7
2964 the user-facing "bundle spec" compression name and an internal 2964 the user-facing "bundle spec" compression name and an internal
2965 identifier used to denote the compression format within bundles. To 2965 identifier used to denote the compression format within bundles. To
2966 exclude the name from external usage, set the first element to ``None``. 2966 exclude the name from external usage, set the first element to ``None``.
2967 2967
2968 If bundle compression is supported, the class must also implement 2968 If bundle compression is supported, the class must also implement
2969 ``compressorobj`` and `decompressorreader``. 2969 ``compressstream``, ``compressorobj`` and `decompressorreader``.
2970 """ 2970 """
2971 return None 2971 return None
2972
2973 def compressstream(self, it, opts=None):
2974 """Compress an iterator of chunks.
2975
2976 The method receives an iterator (ideally a generator) of chunks of
2977 bytes to be compressed. It returns an iterator (ideally a generator)
2978 of bytes of chunks representing the compressed output.
2979
2980 Optionally accepts an argument defining how to perform compression.
2981 Each engine treats this argument differently.
2982 """
2983 raise NotImplementedError()
2972 2984
2973 def compressorobj(self): 2985 def compressorobj(self):
2974 """(Temporary) Obtain an object used for compression. 2986 """(Temporary) Obtain an object used for compression.
2975 2987
2976 The returned object has ``compress(data)`` and ``flush()`` methods. 2988 The returned object has ``compress(data)`` and ``flush()`` methods.
2995 return 'gzip', 'GZ' 3007 return 'gzip', 'GZ'
2996 3008
2997 def compressorobj(self): 3009 def compressorobj(self):
2998 return zlib.compressobj() 3010 return zlib.compressobj()
2999 3011
3012 def compressstream(self, it, opts=None):
3013 opts = opts or {}
3014
3015 z = zlib.compressobj(opts.get('level', -1))
3016 for chunk in it:
3017 data = z.compress(chunk)
3018 # Not all calls to compress emit data. It is cheaper to inspect
3019 # here than to feed empty chunks through generator.
3020 if data:
3021 yield data
3022
3023 yield z.flush()
3024
3000 def decompressorreader(self, fh): 3025 def decompressorreader(self, fh):
3001 def gen(): 3026 def gen():
3002 d = zlib.decompressobj() 3027 d = zlib.decompressobj()
3003 for chunk in filechunkiter(fh): 3028 for chunk in filechunkiter(fh):
3004 yield d.decompress(chunk) 3029 yield d.decompress(chunk)
3014 def bundletype(self): 3039 def bundletype(self):
3015 return 'bzip2', 'BZ' 3040 return 'bzip2', 'BZ'
3016 3041
3017 def compressorobj(self): 3042 def compressorobj(self):
3018 return bz2.BZ2Compressor() 3043 return bz2.BZ2Compressor()
3044
3045 def compressstream(self, it, opts=None):
3046 opts = opts or {}
3047 z = bz2.BZ2Compressor(opts.get('level', 9))
3048 for chunk in it:
3049 data = z.compress(chunk)
3050 if data:
3051 yield data
3052
3053 yield z.flush()
3019 3054
3020 def decompressorreader(self, fh): 3055 def decompressorreader(self, fh):
3021 def gen(): 3056 def gen():
3022 d = bz2.BZ2Decompressor() 3057 d = bz2.BZ2Decompressor()
3023 for chunk in filechunkiter(fh): 3058 for chunk in filechunkiter(fh):
3063 return 'none', 'UN' 3098 return 'none', 'UN'
3064 3099
3065 def compressorobj(self): 3100 def compressorobj(self):
3066 return nocompress() 3101 return nocompress()
3067 3102
3103 def compressstream(self, it, opts=None):
3104 return it
3105
3068 def decompressorreader(self, fh): 3106 def decompressorreader(self, fh):
3069 return fh 3107 return fh
3070 3108
3071 compengines.register(_noopengine()) 3109 compengines.register(_noopengine())
3072 3110