mercurial-scm/hg: mercurial/util.py comparison

comparison mercurial/util.py @ 30798:f50c0db50025

util: compression APIs to support revlog decompression Previously, compression engines had APIs for performing revlog compression but no mechanism to perform revlog decompression. This patch changes that. Revlog decompression is slightly more complicated than compression because in the compression case there is (currently) only a single engine that can be used at a time. However for decompression, a revlog could contain chunks from multiple compression engines. This means decompression needs to map to multiple engines and decompressors. This functionality is outside the scope of this patch. But it drives the decision for engines to declare a byte header sequence that identifies revlog data as belonging to an engine and an API for obtaining an engine from a revlog header.

author	Gregory Szorc <gregory.szorc@gmail.com>
date	Mon, 02 Jan 2017 13:27:20 -0800
parents	31e1f0d4ab44
children	7005c03f7387

comparison

equal deleted inserted replaced

-:0bde7372e4c0
+:f50c0db50025
 self._engines = {}
 # Bundle spec human name to engine name.
 self._bundlenames = {}
 # Internal bundle identifier to engine name.
 self._bundletypes = {}
+# Revlog header to engine name.
+self._revlogheaders = {}
 # Wire proto identifier to engine name.
 self._wiretypes = {}
 def __getitem__(self, key):
 return self._engines[key]
 'registered by %s') %
 (wiretype, self._wiretypes[wiretype]))
 self._wiretypes[wiretype] = name
+revlogheader = engine.revlogheader()
+if revlogheader and revlogheader in self._revlogheaders:
+raise error.Abort(_('revlog header %s already registered by %s') %
+(revlogheader, self._revlogheaders[revlogheader]))
+if revlogheader:
+self._revlogheaders[revlogheader] = name
 self._engines[name] = engine
 @property
 def supportedbundlenames(self):
 return set(self._bundlenames.keys())
 if not engine.available():
 raise error.Abort(_('compression engine %s could not be loaded') %
 engine.name())
 return engine
+def forrevlogheader(self, header):
+"""Obtain a compression engine registered to a revlog header.
+Will raise KeyError if the revlog header value isn't registered.
+"""
+return self._engines[self._revlogheaders[header]]
 compengines = compressormanager()
 class compressionengine(object):
 """Base class for compression engines.
 If wire protocol compression is supported, the class must also implement
 ``compressstream`` and ``decompressorreader``.
 """
 return None
+def revlogheader(self):
+"""Header added to revlog chunks that identifies this engine.
+If this engine can be used to compress revlogs, this method should
+return the bytes used to identify chunks compressed with this engine.
+Else, the method should return ``None`` to indicate it does not
+participate in revlog compression.
+"""
+return None
 def compressstream(self, it, opts=None):
 """Compress an iterator of chunks.
 The method receives an iterator (ideally a generator) of chunks of
 bytes to be compressed. It returns an iterator (ideally a generator)
 The object has a ``compress(data)`` method that compresses binary
 data. This method returns compressed binary data or ``None`` if
 the data could not be compressed (too small, not compressible, etc).
 The returned data should have a header uniquely identifying this
 compression format so decompression can be routed to this engine.
+This header should be identified by the ``revlogheader()`` return
+value.
+The object has a ``decompress(data)`` method that decompresses
+data. The method will only be called if ``data`` begins with
+``revlogheader()``. The method should return the raw, uncompressed
+data or raise a ``RevlogError``.
 The object is reusable but is not thread safe.
 """
 raise NotImplementedError()
 def bundletype(self):
 return 'gzip', 'GZ'
 def wireprotosupport(self):
 return compewireprotosupport('zlib', 20, 20)
+def revlogheader(self):
+return 'x'
 def compressstream(self, it, opts=None):
 opts = opts or {}
 z = zlib.compressobj(opts.get('level', -1))
 if sum(map(len, parts)) < insize:
 return ''.join(parts)
 return None
+def decompress(self, data):
+try:
+return zlib.decompress(data)
+except zlib.error as e:
+raise error.RevlogError(_('revlog decompress error: %s') %
+str(e))
 def revlogcompressor(self, opts=None):
 return self.zlibrevlogcompressor()
 compengines.register(_zlibengine())
 # Clients always support uncompressed payloads. Servers don't because
 # unless you are on a fast network, uncompressed payloads can easily
 # saturate your network pipe.
 def wireprotosupport(self):
 return compewireprotosupport('none', 0, 10)
+# We don't implement revlogheader because it is handled specially
+# in the revlog class.
 def compressstream(self, it, opts=None):
 return it
 def decompressorreader(self, fh):
 return 'zstd', 'ZS'
 def wireprotosupport(self):
 return compewireprotosupport('zstd', 50, 50)
+def revlogheader(self):
+return '\x28'
 def compressstream(self, it, opts=None):
 opts = opts or {}
 # zstd level 3 is almost always significantly faster than zlib
 # while providing no worse compression. It strikes a good balance
 # between speed and compression.
 # Writing the content size adds a few bytes to the output. However,
 # it allows decompression to be more optimal since we can
 # pre-allocate a buffer to hold the result.
 self._cctx = zstd.ZstdCompressor(level=level,
 write_content_size=True)
+self._dctx = zstd.ZstdDecompressor()
 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
+self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
 def compress(self, data):
 insize = len(data)
 # Caller handles empty input case.
 assert insize > 0
 if sum(map(len, chunks)) < insize:
 return ''.join(chunks)
 return None
+def decompress(self, data):
+insize = len(data)
+try:
+# This was measured to be faster than other streaming
+# decompressors.
+dobj = self._dctx.decompressobj()
+chunks = []
+pos = 0
+while pos < insize:
+pos2 = pos + self._decompinsize
+chunk = dobj.decompress(data[pos:pos2])
+if chunk:
+chunks.append(chunk)
+pos = pos2
+# Frame should be exhausted, so no finish() API.
+return ''.join(chunks)
+except Exception as e:
+raise error.RevlogError(_('revlog decompress error: %s') %
+str(e))
 def revlogcompressor(self, opts=None):
 opts = opts or {}
 return self.zstdrevlogcompressor(self._module,
 level=opts.get('level', 3))

Mercurial > public > mercurial-scm > hg

comparison mercurial/util.py @ 30798:f50c0db50025