diff -r 96f811bceb85 -r 2b279126b8f5 mercurial/revlog.py --- a/mercurial/revlog.py Fri Jan 13 10:22:25 2017 +0100 +++ b/mercurial/revlog.py Fri Jan 13 19:58:00 2017 -0800 @@ -39,7 +39,8 @@ _pack = struct.pack _unpack = struct.unpack -_decompress = zlib.decompress +# Aliased for performance. +_zlibdecompress = zlib.decompress # revlog header flags REVLOGV0 = 0 @@ -339,6 +340,8 @@ self._chunkclear() # revnum -> (chain-length, sum-delta-length) self._chaininfocache = {} + # revlog header -> revlog compressor + self._decompressors = {} @util.propertycache def _compressor(self): @@ -1491,17 +1494,52 @@ """ if not data: return data + + # Revlogs are read much more frequently than they are written and many + # chunks only take microseconds to decompress, so performance is + # important here. + # + # We can make a few assumptions about revlogs: + # + # 1) the majority of chunks will be compressed (as opposed to inline + # raw data). + # 2) decompressing *any* data will likely by at least 10x slower than + # returning raw inline data. + # 3) we want to prioritize common and officially supported compression + # engines + # + # It follows that we want to optimize for "decompress compressed data + # when encoded with common and officially supported compression engines" + # case over "raw data" and "data encoded by less common or non-official + # compression engines." That is why we have the inline lookup first + # followed by the compengines lookup. + # + # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib + # compressed chunks. And this matters for changelog and manifest reads. t = data[0] - if t == '\0': - return data + if t == 'x': try: - return _decompress(data) + return _zlibdecompress(data) except zlib.error as e: raise RevlogError(_('revlog decompress error: %s') % str(e)) - if t == 'u': + # '\0' is more common than 'u' so it goes first. + elif t == '\0': + return data + elif t == 'u': return util.buffer(data, 1) - raise RevlogError(_('unknown compression type %r') % t) + + try: + compressor = self._decompressors[t] + except KeyError: + try: + engine = util.compengines.forrevlogheader(t) + compressor = engine.revlogcompressor() + self._decompressors[t] = compressor + except KeyError: + raise RevlogError(_('unknown compression type %r') % t) + + return compressor.decompress(data) def _isgooddelta(self, d, textlen): """Returns True if the given delta is good. Good means that it is within