comparison mercurial/revlog.py @ 47255:ff9fd7107d11

revlog: implement a "default compression" mode The revlog docker is now storing a default compression engine. When a chunk use that compression, a dedicated mode is used in the revlog entry and we can directly route it to the right decompressor. We should probably make PLAIN and DEFAULT mode the only available mode for revlogv2, but this is something for later. Differential Revision: https://phab.mercurial-scm.org/D10652
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 03 May 2021 21:13:24 +0200
parents eac3591abbf4
children 2b69555e4875
comparison
equal deleted inserted replaced
47254:eac3591abbf4 47255:ff9fd7107d11
33 ) 33 )
34 from .i18n import _ 34 from .i18n import _
35 from .pycompat import getattr 35 from .pycompat import getattr
36 from .revlogutils.constants import ( 36 from .revlogutils.constants import (
37 ALL_KINDS, 37 ALL_KINDS,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_INLINE, 39 COMP_MODE_INLINE,
39 COMP_MODE_PLAIN, 40 COMP_MODE_PLAIN,
40 FEATURES_BY_VERSION, 41 FEATURES_BY_VERSION,
41 FLAG_GENERALDELTA, 42 FLAG_GENERALDELTA,
42 FLAG_INLINE_DATA, 43 FLAG_INLINE_DATA,
705 706
706 @util.propertycache 707 @util.propertycache
707 def _compressor(self): 708 def _compressor(self):
708 engine = util.compengines[self._compengine] 709 engine = util.compengines[self._compengine]
709 return engine.revlogcompressor(self._compengineopts) 710 return engine.revlogcompressor(self._compengineopts)
711
712 @util.propertycache
713 def _decompressor(self):
714 """the default decompressor"""
715 if self._docket is None:
716 return None
717 t = self._docket.default_compression_header
718 c = self._get_decompressor(t)
719 return c.decompress
710 720
711 def _indexfp(self): 721 def _indexfp(self):
712 """file object for the revlog's index file""" 722 """file object for the revlog's index file"""
713 return self.opener(self._indexfile, mode=b"r") 723 return self.opener(self._indexfile, mode=b"r")
714 724
1774 """ 1784 """
1775 compression_mode = self.index[rev][10] 1785 compression_mode = self.index[rev][10]
1776 data = self._getsegmentforrevs(rev, rev, df=df)[1] 1786 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1777 if compression_mode == COMP_MODE_PLAIN: 1787 if compression_mode == COMP_MODE_PLAIN:
1778 return data 1788 return data
1789 elif compression_mode == COMP_MODE_DEFAULT:
1790 return self._decompressor(data)
1779 elif compression_mode == COMP_MODE_INLINE: 1791 elif compression_mode == COMP_MODE_INLINE:
1780 return self.decompress(data) 1792 return self.decompress(data)
1781 else: 1793 else:
1782 msg = 'unknown compression mode %d' 1794 msg = 'unknown compression mode %d'
1783 msg %= compression_mode 1795 msg %= compression_mode
1827 # issue4215 - we can't cache a run of chunks greater than 1839 # issue4215 - we can't cache a run of chunks greater than
1828 # 2G on Windows 1840 # 2G on Windows
1829 return [self._chunk(rev, df=df) for rev in revschunk] 1841 return [self._chunk(rev, df=df) for rev in revschunk]
1830 1842
1831 decomp = self.decompress 1843 decomp = self.decompress
1844 # self._decompressor might be None, but will not be used in that case
1845 def_decomp = self._decompressor
1832 for rev in revschunk: 1846 for rev in revschunk:
1833 chunkstart = start(rev) 1847 chunkstart = start(rev)
1834 if inline: 1848 if inline:
1835 chunkstart += (rev + 1) * iosize 1849 chunkstart += (rev + 1) * iosize
1836 chunklength = length(rev) 1850 chunklength = length(rev)
1838 c = buffer(data, chunkstart - offset, chunklength) 1852 c = buffer(data, chunkstart - offset, chunklength)
1839 if comp_mode == COMP_MODE_PLAIN: 1853 if comp_mode == COMP_MODE_PLAIN:
1840 ladd(c) 1854 ladd(c)
1841 elif comp_mode == COMP_MODE_INLINE: 1855 elif comp_mode == COMP_MODE_INLINE:
1842 ladd(decomp(c)) 1856 ladd(decomp(c))
1857 elif comp_mode == COMP_MODE_DEFAULT:
1858 ladd(def_decomp(c))
1843 else: 1859 else:
1844 msg = 'unknown compression mode %d' 1860 msg = 'unknown compression mode %d'
1845 msg %= comp_mode 1861 msg %= comp_mode
1846 raise error.RevlogError(msg) 1862 raise error.RevlogError(msg)
1847 1863
2487 if self._docket is not None: 2503 if self._docket is not None:
2488 h, d = deltainfo.data 2504 h, d = deltainfo.data
2489 if not h and not d: 2505 if not h and not d:
2490 # not data to store at all... declare them uncompressed 2506 # not data to store at all... declare them uncompressed
2491 compression_mode = COMP_MODE_PLAIN 2507 compression_mode = COMP_MODE_PLAIN
2492 elif not h and d[0:1] == b'\0': 2508 elif not h:
2493 compression_mode = COMP_MODE_PLAIN 2509 t = d[0:1]
2510 if t == b'\0':
2511 compression_mode = COMP_MODE_PLAIN
2512 elif t == self._docket.default_compression_header:
2513 compression_mode = COMP_MODE_DEFAULT
2494 elif h == b'u': 2514 elif h == b'u':
2495 # we have a more efficient way to declare uncompressed 2515 # we have a more efficient way to declare uncompressed
2496 h = b'' 2516 h = b''
2497 compression_mode = COMP_MODE_PLAIN 2517 compression_mode = COMP_MODE_PLAIN
2498 deltainfo = deltautil.drop_u_compression(deltainfo) 2518 deltainfo = deltautil.drop_u_compression(deltainfo)