mercurial/revlog.py
changeset 30817 2b279126b8f5
parent 30795 78ac56aebab6
child 30818 4c0a5a256ae8
equal deleted inserted replaced
30816:96f811bceb85 30817:2b279126b8f5
    37     util,
    37     util,
    38 )
    38 )
    39 
    39 
    40 _pack = struct.pack
    40 _pack = struct.pack
    41 _unpack = struct.unpack
    41 _unpack = struct.unpack
    42 _decompress = zlib.decompress
    42 # Aliased for performance.
       
    43 _zlibdecompress = zlib.decompress
    43 
    44 
    44 # revlog header flags
    45 # revlog header flags
    45 REVLOGV0 = 0
    46 REVLOGV0 = 0
    46 REVLOGNG = 1
    47 REVLOGNG = 1
    47 REVLOGNGINLINEDATA = (1 << 16)
    48 REVLOGNGINLINEDATA = (1 << 16)
   337             self.nodemap = self._nodecache = nodemap
   338             self.nodemap = self._nodecache = nodemap
   338         if not self._chunkcache:
   339         if not self._chunkcache:
   339             self._chunkclear()
   340             self._chunkclear()
   340         # revnum -> (chain-length, sum-delta-length)
   341         # revnum -> (chain-length, sum-delta-length)
   341         self._chaininfocache = {}
   342         self._chaininfocache = {}
       
   343         # revlog header -> revlog compressor
       
   344         self._decompressors = {}
   342 
   345 
   343     @util.propertycache
   346     @util.propertycache
   344     def _compressor(self):
   347     def _compressor(self):
   345         return util.compengines['zlib'].revlogcompressor()
   348         return util.compengines['zlib'].revlogcompressor()
   346 
   349 
  1489         The chunk is expected to begin with a header identifying the
  1492         The chunk is expected to begin with a header identifying the
  1490         format type so it can be routed to an appropriate decompressor.
  1493         format type so it can be routed to an appropriate decompressor.
  1491         """
  1494         """
  1492         if not data:
  1495         if not data:
  1493             return data
  1496             return data
       
  1497 
       
  1498         # Revlogs are read much more frequently than they are written and many
       
  1499         # chunks only take microseconds to decompress, so performance is
       
  1500         # important here.
       
  1501         #
       
  1502         # We can make a few assumptions about revlogs:
       
  1503         #
       
  1504         # 1) the majority of chunks will be compressed (as opposed to inline
       
  1505         #    raw data).
       
  1506         # 2) decompressing *any* data will likely by at least 10x slower than
       
  1507         #    returning raw inline data.
       
  1508         # 3) we want to prioritize common and officially supported compression
       
  1509         #    engines
       
  1510         #
       
  1511         # It follows that we want to optimize for "decompress compressed data
       
  1512         # when encoded with common and officially supported compression engines"
       
  1513         # case over "raw data" and "data encoded by less common or non-official
       
  1514         # compression engines." That is why we have the inline lookup first
       
  1515         # followed by the compengines lookup.
       
  1516         #
       
  1517         # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
       
  1518         # compressed chunks. And this matters for changelog and manifest reads.
  1494         t = data[0]
  1519         t = data[0]
  1495         if t == '\0':
  1520 
  1496             return data
       
  1497         if t == 'x':
  1521         if t == 'x':
  1498             try:
  1522             try:
  1499                 return _decompress(data)
  1523                 return _zlibdecompress(data)
  1500             except zlib.error as e:
  1524             except zlib.error as e:
  1501                 raise RevlogError(_('revlog decompress error: %s') % str(e))
  1525                 raise RevlogError(_('revlog decompress error: %s') % str(e))
  1502         if t == 'u':
  1526         # '\0' is more common than 'u' so it goes first.
       
  1527         elif t == '\0':
       
  1528             return data
       
  1529         elif t == 'u':
  1503             return util.buffer(data, 1)
  1530             return util.buffer(data, 1)
  1504         raise RevlogError(_('unknown compression type %r') % t)
  1531 
       
  1532         try:
       
  1533             compressor = self._decompressors[t]
       
  1534         except KeyError:
       
  1535             try:
       
  1536                 engine = util.compengines.forrevlogheader(t)
       
  1537                 compressor = engine.revlogcompressor()
       
  1538                 self._decompressors[t] = compressor
       
  1539             except KeyError:
       
  1540                 raise RevlogError(_('unknown compression type %r') % t)
       
  1541 
       
  1542         return compressor.decompress(data)
  1505 
  1543 
  1506     def _isgooddelta(self, d, textlen):
  1544     def _isgooddelta(self, d, textlen):
  1507         """Returns True if the given delta is good. Good means that it is within
  1545         """Returns True if the given delta is good. Good means that it is within
  1508         the disk span, disk size, and chain length bounds that we know to be
  1546         the disk span, disk size, and chain length bounds that we know to be
  1509         performant."""
  1547         performant."""