337 self.nodemap = self._nodecache = nodemap |
338 self.nodemap = self._nodecache = nodemap |
338 if not self._chunkcache: |
339 if not self._chunkcache: |
339 self._chunkclear() |
340 self._chunkclear() |
340 # revnum -> (chain-length, sum-delta-length) |
341 # revnum -> (chain-length, sum-delta-length) |
341 self._chaininfocache = {} |
342 self._chaininfocache = {} |
|
343 # revlog header -> revlog compressor |
|
344 self._decompressors = {} |
342 |
345 |
343 @util.propertycache |
346 @util.propertycache |
344 def _compressor(self): |
347 def _compressor(self): |
345 return util.compengines['zlib'].revlogcompressor() |
348 return util.compengines['zlib'].revlogcompressor() |
346 |
349 |
1489 The chunk is expected to begin with a header identifying the |
1492 The chunk is expected to begin with a header identifying the |
1490 format type so it can be routed to an appropriate decompressor. |
1493 format type so it can be routed to an appropriate decompressor. |
1491 """ |
1494 """ |
1492 if not data: |
1495 if not data: |
1493 return data |
1496 return data |
|
1497 |
|
1498 # Revlogs are read much more frequently than they are written and many |
|
1499 # chunks only take microseconds to decompress, so performance is |
|
1500 # important here. |
|
1501 # |
|
1502 # We can make a few assumptions about revlogs: |
|
1503 # |
|
1504 # 1) the majority of chunks will be compressed (as opposed to inline |
|
1505 # raw data). |
|
1506 # 2) decompressing *any* data will likely by at least 10x slower than |
|
1507 # returning raw inline data. |
|
1508 # 3) we want to prioritize common and officially supported compression |
|
1509 # engines |
|
1510 # |
|
1511 # It follows that we want to optimize for "decompress compressed data |
|
1512 # when encoded with common and officially supported compression engines" |
|
1513 # case over "raw data" and "data encoded by less common or non-official |
|
1514 # compression engines." That is why we have the inline lookup first |
|
1515 # followed by the compengines lookup. |
|
1516 # |
|
1517 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib |
|
1518 # compressed chunks. And this matters for changelog and manifest reads. |
1494 t = data[0] |
1519 t = data[0] |
1495 if t == '\0': |
1520 |
1496 return data |
|
1497 if t == 'x': |
1521 if t == 'x': |
1498 try: |
1522 try: |
1499 return _decompress(data) |
1523 return _zlibdecompress(data) |
1500 except zlib.error as e: |
1524 except zlib.error as e: |
1501 raise RevlogError(_('revlog decompress error: %s') % str(e)) |
1525 raise RevlogError(_('revlog decompress error: %s') % str(e)) |
1502 if t == 'u': |
1526 # '\0' is more common than 'u' so it goes first. |
|
1527 elif t == '\0': |
|
1528 return data |
|
1529 elif t == 'u': |
1503 return util.buffer(data, 1) |
1530 return util.buffer(data, 1) |
1504 raise RevlogError(_('unknown compression type %r') % t) |
1531 |
|
1532 try: |
|
1533 compressor = self._decompressors[t] |
|
1534 except KeyError: |
|
1535 try: |
|
1536 engine = util.compengines.forrevlogheader(t) |
|
1537 compressor = engine.revlogcompressor() |
|
1538 self._decompressors[t] = compressor |
|
1539 except KeyError: |
|
1540 raise RevlogError(_('unknown compression type %r') % t) |
|
1541 |
|
1542 return compressor.decompress(data) |
1505 |
1543 |
1506 def _isgooddelta(self, d, textlen): |
1544 def _isgooddelta(self, d, textlen): |
1507 """Returns True if the given delta is good. Good means that it is within |
1545 """Returns True if the given delta is good. Good means that it is within |
1508 the disk span, disk size, and chain length bounds that we know to be |
1546 the disk span, disk size, and chain length bounds that we know to be |
1509 performant.""" |
1547 performant.""" |