Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/util.py @ 30360:358cda0af6ee
util: create new abstraction for compression engines
Currently, util.py has "compressors" and "decompressors" dicts
mapping compression algorithms to callables returning objects that
perform well-defined operations. In addition, revlog.py has code
for calling into a compressor or decompressor explicitly. And, there
is code in the wire protocol for performing zlib compression.
The 3rd party lz4revlog extension has demonstrated the utility of
supporting alternative compression formats for revlog storage. But
it stops short of supporting lz4 for bundles and the wire protocol.
There are also plans to support zstd as a general compression
replacement.
So, there appears to be a market for a unified API for registering
compression engines. This commit starts the process of establishing
one.
This commit establishes a base class/interface for defining
compression engines and how they will be used. A collection class
to hold references to registered compression engines has also been
introduced.
The built-in zlib, bz2, truncated bz2, and no-op compression engines
are registered with a singleton instance of the collection class.
The compression engine API will change once consumers are ported
to the new API and some common patterns can be simplified at the
engine API level. So don't get too attached to the API...
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 07 Nov 2016 18:31:39 -0800 |
parents | 318a24b52eeb |
children | 75f5beb54e29 |
comparison
equal
deleted
inserted
replaced
30359:954002426f78 | 30360:358cda0af6ee |
---|---|
2854 del self._atexit | 2854 del self._atexit |
2855 if pending: | 2855 if pending: |
2856 raise exc_val | 2856 raise exc_val |
2857 return received and suppressed | 2857 return received and suppressed |
2858 | 2858 |
2859 # compression utility | 2859 # compression code |
2860 | |
2861 class compressormanager(object): | |
2862 """Holds registrations of various compression engines. | |
2863 | |
2864 This class essentially abstracts the differences between compression | |
2865 engines to allow new compression formats to be added easily, possibly from | |
2866 extensions. | |
2867 | |
2868 Compressors are registered against the global instance by calling its | |
2869 ``register()`` method. | |
2870 """ | |
2871 def __init__(self): | |
2872 self._engines = {} | |
2873 # Bundle spec human name to engine name. | |
2874 self._bundlenames = {} | |
2875 # Internal bundle identifier to engine name. | |
2876 self._bundletypes = {} | |
2877 | |
2878 def __getitem__(self, key): | |
2879 return self._engines[key] | |
2880 | |
2881 def __contains__(self, key): | |
2882 return key in self._engines | |
2883 | |
2884 def __iter__(self): | |
2885 return iter(self._engines.keys()) | |
2886 | |
2887 def register(self, engine): | |
2888 """Register a compression engine with the manager. | |
2889 | |
2890 The argument must be a ``compressionengine`` instance. | |
2891 """ | |
2892 if not isinstance(engine, compressionengine): | |
2893 raise ValueError(_('argument must be a compressionengine')) | |
2894 | |
2895 name = engine.name() | |
2896 | |
2897 if name in self._engines: | |
2898 raise error.Abort(_('compression engine %s already registered') % | |
2899 name) | |
2900 | |
2901 bundleinfo = engine.bundletype() | |
2902 if bundleinfo: | |
2903 bundlename, bundletype = bundleinfo | |
2904 | |
2905 if bundlename in self._bundlenames: | |
2906 raise error.Abort(_('bundle name %s already registered') % | |
2907 bundlename) | |
2908 if bundletype in self._bundletypes: | |
2909 raise error.Abort(_('bundle type %s already registered by %s') % | |
2910 (bundletype, self._bundletypes[bundletype])) | |
2911 | |
2912 # No external facing name declared. | |
2913 if bundlename: | |
2914 self._bundlenames[bundlename] = name | |
2915 | |
2916 self._bundletypes[bundletype] = name | |
2917 | |
2918 self._engines[name] = engine | |
2919 | |
2920 @property | |
2921 def supportedbundlenames(self): | |
2922 return set(self._bundlenames.keys()) | |
2923 | |
2924 @property | |
2925 def supportedbundletypes(self): | |
2926 return set(self._bundletypes.keys()) | |
2927 | |
2928 def forbundlename(self, bundlename): | |
2929 """Obtain a compression engine registered to a bundle name. | |
2930 | |
2931 Will raise KeyError if the bundle type isn't registered. | |
2932 """ | |
2933 return self._engines[self._bundlenames[bundlename]] | |
2934 | |
2935 def forbundletype(self, bundletype): | |
2936 """Obtain a compression engine registered to a bundle type. | |
2937 | |
2938 Will raise KeyError if the bundle type isn't registered. | |
2939 """ | |
2940 return self._engines[self._bundletypes[bundletype]] | |
2941 | |
2942 compengines = compressormanager() | |
2943 | |
2944 class compressionengine(object): | |
2945 """Base class for compression engines. | |
2946 | |
2947 Compression engines must implement the interface defined by this class. | |
2948 """ | |
2949 def name(self): | |
2950 """Returns the name of the compression engine. | |
2951 | |
2952 This is the key the engine is registered under. | |
2953 | |
2954 This method must be implemented. | |
2955 """ | |
2956 raise NotImplementedError() | |
2957 | |
2958 def bundletype(self): | |
2959 """Describes bundle identifiers for this engine. | |
2960 | |
2961 If this compression engine isn't supported for bundles, returns None. | |
2962 | |
2963 If this engine can be used for bundles, returns a 2-tuple of strings of | |
2964 the user-facing "bundle spec" compression name and an internal | |
2965 identifier used to denote the compression format within bundles. To | |
2966 exclude the name from external usage, set the first element to ``None``. | |
2967 | |
2968 If bundle compression is supported, the class must also implement | |
2969 ``compressorobj`` and `decompressorreader``. | |
2970 """ | |
2971 return None | |
2972 | |
2973 def compressorobj(self): | |
2974 """(Temporary) Obtain an object used for compression. | |
2975 | |
2976 The returned object has ``compress(data)`` and ``flush()`` methods. | |
2977 These are used to incrementally feed data chunks into a compressor. | |
2978 """ | |
2979 raise NotImplementedError() | |
2980 | |
2981 def decompressorreader(self, fh): | |
2982 """Perform decompression on a file object. | |
2983 | |
2984 Argument is an object with a ``read(size)`` method that returns | |
2985 compressed data. Return value is an object with a ``read(size)`` that | |
2986 returns uncompressed data. | |
2987 """ | |
2988 raise NotImplementedError() | |
2989 | |
2990 class _zlibengine(compressionengine): | |
2991 def name(self): | |
2992 return 'zlib' | |
2993 | |
2994 def bundletype(self): | |
2995 return 'gzip', 'GZ' | |
2996 | |
2997 def compressorobj(self): | |
2998 return zlib.compressobj() | |
2999 | |
3000 def decompressorreader(self, fh): | |
3001 def gen(): | |
3002 d = zlib.decompressobj() | |
3003 for chunk in filechunkiter(fh): | |
3004 yield d.decompress(chunk) | |
3005 | |
3006 return chunkbuffer(gen()) | |
3007 | |
3008 compengines.register(_zlibengine()) | |
3009 | |
3010 class _bz2engine(compressionengine): | |
3011 def name(self): | |
3012 return 'bz2' | |
3013 | |
3014 def bundletype(self): | |
3015 return 'bzip2', 'BZ' | |
3016 | |
3017 def compressorobj(self): | |
3018 return bz2.BZ2Compressor() | |
3019 | |
3020 def decompressorreader(self, fh): | |
3021 def gen(): | |
3022 d = bz2.BZ2Decompressor() | |
3023 for chunk in filechunkiter(fh): | |
3024 yield d.decompress(chunk) | |
3025 | |
3026 return chunkbuffer(gen()) | |
3027 | |
3028 compengines.register(_bz2engine()) | |
3029 | |
3030 class _truncatedbz2engine(compressionengine): | |
3031 def name(self): | |
3032 return 'bz2truncated' | |
3033 | |
3034 def bundletype(self): | |
3035 return None, '_truncatedBZ' | |
3036 | |
3037 # We don't implement compressorobj because it is hackily handled elsewhere. | |
3038 | |
3039 def decompressorreader(self, fh): | |
3040 def gen(): | |
3041 # The input stream doesn't have the 'BZ' header. So add it back. | |
3042 d = bz2.BZ2Decompressor() | |
3043 d.decompress('BZ') | |
3044 for chunk in filechunkiter(fh): | |
3045 yield d.decompress(chunk) | |
3046 | |
3047 return chunkbuffer(gen()) | |
3048 | |
3049 compengines.register(_truncatedbz2engine()) | |
2860 | 3050 |
2861 class nocompress(object): | 3051 class nocompress(object): |
2862 def compress(self, x): | 3052 def compress(self, x): |
2863 return x | 3053 return x |
3054 | |
2864 def flush(self): | 3055 def flush(self): |
2865 return "" | 3056 return '' |
3057 | |
3058 class _noopengine(compressionengine): | |
3059 def name(self): | |
3060 return 'none' | |
3061 | |
3062 def bundletype(self): | |
3063 return 'none', 'UN' | |
3064 | |
3065 def compressorobj(self): | |
3066 return nocompress() | |
3067 | |
3068 def decompressorreader(self, fh): | |
3069 return fh | |
3070 | |
3071 compengines.register(_noopengine()) | |
2866 | 3072 |
2867 compressors = { | 3073 compressors = { |
2868 None: nocompress, | 3074 None: nocompress, |
2869 # lambda to prevent early import | 3075 # lambda to prevent early import |
2870 'BZ': lambda: bz2.BZ2Compressor(), | 3076 'BZ': lambda: bz2.BZ2Compressor(), |