Mercurial > public > mercurial-scm > hg-stable
diff contrib/python-zstandard/zstd/decompress/huf_decompress.c @ 40122:73fef626dae3
zstandard: vendor python-zstandard 0.10.1
This was just released.
The upstream source distribution from PyPI was extracted. Unwanted
files were removed.
The clang-format ignore list was updated to reflect the new source
of files.
setup.py was updated to pass a new argument to python-zstandard's
function for returning an Extension instance. Upstream had to change
to use relative paths because Python 3.7's packaging doesn't
seem to like absolute paths when defining sources, includes, etc.
The default relative path calculation is relative to setup_zstd.py
which is different from the directory of Mercurial's setup.py.
The project contains a vendored copy of zstandard 1.3.6. The old
version was 1.3.4.
The API should be backwards compatible and nothing in core should
need adjusted. However, there is a new "chunker" API that we
may find useful in places where we want to emit compressed chunks
of a fixed size.
There are a pair of bug fixes in 0.10.0 with regards to
compressobj() and decompressobj() when block flushing is used. I
actually found these bugs when introducing these APIs in Mercurial!
But existing Mercurial code is not affected because we don't
perform block flushing.
# no-check-commit because 3rd party code has different style guidelines
Differential Revision: https://phab.mercurial-scm.org/D4911
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 08 Oct 2018 16:27:40 -0700 |
parents | b1fb341d8a61 |
children | 675775c33ab6 |
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd/decompress/huf_decompress.c Tue Sep 25 20:55:03 2018 +0900 +++ b/contrib/python-zstandard/zstd/decompress/huf_decompress.c Mon Oct 08 16:27:40 2018 -0700 @@ -1,6 +1,7 @@ /* ****************************************************************** - Huffman decoder, part of New Generation Entropy library - Copyright (C) 2013-2016, Yann Collet. + huff0 huffman decoder, + part of Finite State Entropy library + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -29,16 +30,15 @@ You can contact the author at : - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c ****************************************************************** */ /* ************************************************************** * Dependencies ****************************************************************/ #include <string.h> /* memcpy, memset */ +#include "compiler.h" #include "bitstream.h" /* BIT_* */ -#include "compiler.h" -#include "fse.h" /* header compression */ +#include "fse.h" /* to compress headers */ #define HUF_STATIC_LINKING_ONLY #include "huf.h" #include "error_private.h" @@ -48,7 +48,6 @@ * Error Management ****************************************************************/ #define HUF_isError ERR_isError -#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } @@ -75,15 +74,15 @@ /*-***************************/ /* single-symbol decoding */ /*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ -size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { U32 tableLog = 0; U32 nbSymbols = 0; size_t iSize; void* const dtPtr = DTable + 1; - HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; U32* rankVal; BYTE* huffWeight; @@ -96,7 +95,7 @@ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); @@ -124,7 +123,7 @@ U32 const w = huffWeight[n]; U32 const length = (1 << w) >> 1; U32 u; - HUF_DEltX2 D; + HUF_DEltX1 D; D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); for (u = rankVal[w]; u < rankVal[w] + length; u++) dt[u] = D; @@ -134,17 +133,15 @@ return iSize; } -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) { U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX2_wksp(DTable, src, srcSize, + return HUF_readDTableX1_wksp(DTable, src, srcSize, workSpace, sizeof(workSpace)); } -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ - FORCE_INLINE_TEMPLATE BYTE -HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ BYTE const c = dt[val].byte; @@ -152,44 +149,44 @@ return c; } -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) HINT_INLINE size_t -HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) { BYTE* const pStart = p; /* up to 4 symbols at a time */ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); } /* [0-3] symbols remaining */ if (MEM_32bits()) while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); /* no more data to retrieve from bitstream, no need to reload */ while (p < pEnd) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); return pEnd-pStart; } FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X2_usingDTable_internal_body( +HUF_decompress1X1_usingDTable_internal_body( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -197,14 +194,14 @@ BYTE* op = (BYTE*)dst; BYTE* const oend = op + dstSize; const void* dtPtr = DTable + 1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; BIT_DStream_t bitD; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); @@ -212,7 +209,7 @@ } FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X2_usingDTable_internal_body( +HUF_decompress4X1_usingDTable_internal_body( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) @@ -224,7 +221,7 @@ BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; const void* const dtPtr = DTable + 1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; /* Init */ BIT_DStream_t bitD1; @@ -260,22 +257,22 @@ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); BIT_reloadDStream(&bitD1); BIT_reloadDStream(&bitD2); BIT_reloadDStream(&bitD3); @@ -291,191 +288,10 @@ /* note : op4 supposed already verified within main loop */ /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } - - /* decoded size */ - return dstSize; - } -} - - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } - return 1; -} - -#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -HINT_INLINE size_t -HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, - const HUF_DEltX4* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_1(p, bitDPtr); - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - } - - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - - if (p < pEnd) - p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); - - return p-pStart; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X4_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BIT_DStream_t bitD; - - /* Init */ - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - /* decode */ - { BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); - } - - /* check */ - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; -} - - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X4_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable+1; - const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_1(op1, &bitD1); - HUF_DECODE_SYMBOLX4_1(op2, &bitD2); - HUF_DECODE_SYMBOLX4_1(op3, &bitD3); - HUF_DECODE_SYMBOLX4_1(op4, &bitD4); - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_0(op1, &bitD1); - HUF_DECODE_SYMBOLX4_0(op2, &bitD2); - HUF_DECODE_SYMBOLX4_0(op3, &bitD3); - HUF_DECODE_SYMBOLX4_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); /* check */ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); @@ -493,7 +309,7 @@ const HUF_DTable *DTable); #if DYNAMIC_BMI2 -#define X(fn) \ +#define HUF_DGEN(fn) \ \ static size_t fn##_default( \ void* dst, size_t dstSize, \ @@ -522,7 +338,7 @@ #else -#define X(fn) \ +#define HUF_DGEN(fn) \ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ { \ @@ -532,112 +348,114 @@ #endif -X(HUF_decompress1X2_usingDTable_internal) -X(HUF_decompress4X2_usingDTable_internal) -X(HUF_decompress1X4_usingDTable_internal) -X(HUF_decompress4X4_usingDTable_internal) +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) -#undef X -size_t HUF_decompress1X2_usingDTable( +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} + +size_t HUF_decompress4X1_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } - -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} - -size_t HUF_decompress4X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize, + size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize) { - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); } -size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, sizeof(workSpace)); } -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } /* *************************/ /* double-symbols decoding */ /* *************************/ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; -/* HUF_fillDTableX4Level2() : +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, const U32* rankValOrigin, const int minWeight, const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq) { - HUF_DEltX4 DElt; + HUF_DEltX2 DElt; U32 rankVal[HUF_TABLELOG_MAX + 1]; /* get pre-calculated rankVal */ @@ -672,10 +490,8 @@ } } } -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; -typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; -static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, const U32 sortedListSize, const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) @@ -700,12 +516,12 @@ int minWeight = nbBits + scaleLog; if (minWeight < 1) minWeight = 1; sortedRank = rankStart[minWeight]; - HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList+sortedRank, sortedListSize-sortedRank, nbBitsBaseline, symbol); } else { - HUF_DEltX4 DElt; + HUF_DEltX2 DElt; MEM_writeLE16(&(DElt.sequence), symbol); DElt.nbBits = (BYTE)(nbBits); DElt.length = 1; @@ -717,16 +533,16 @@ } } -size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src, - size_t srcSize, void* workSpace, - size_t wkspSize) +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) { U32 tableLog, maxW, sizeOfSort, nbSymbols; DTableDesc dtd = HUF_getDTableDesc(DTable); U32 const maxTableLog = dtd.maxTableLog; size_t iSize; void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; U32 *rankStart; rankValCol_t* rankVal; @@ -752,7 +568,7 @@ rankStart = rankStart0 + 1; memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); - HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ @@ -806,7 +622,7 @@ rankValPtr[w] = rankVal0[w] >> consumed; } } } } - HUF_fillDTableX4(dt, maxTableLog, + HUF_fillDTableX2(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog+1); @@ -817,112 +633,296 @@ return iSize; } -size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize) +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) { U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX4_wksp(DTable, src, srcSize, + return HUF_readDTableX2_wksp(DTable, src, srcSize, workSpace, sizeof(workSpace)); } -size_t HUF_decompress1X4_usingDTable( + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } -size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); } -size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, workSpace, sizeof(workSpace)); } -size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } -size_t HUF_decompress4X4_usingDTable( +size_t HUF_decompress4X2_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc dtd = HUF_getDTableDesc(DTable); if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } -static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) { const BYTE* ip = (const BYTE*) cSrc; - size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } -size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize) { - return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); } -size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, sizeof(workSpace)); } -size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } -/* ********************************/ -/* Generic decompression selector */ -/* ********************************/ +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, @@ -930,8 +930,8 @@ const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); } @@ -960,12 +960,12 @@ /** HUF_selectDecoder() : * Tells which decoder is likely to decode faster, * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . * Assumption : 0 < dstSize <= 128 KB */ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { assert(dstSize > 0); - assert(dstSize <= 128 KB); + assert(dstSize <= 128*1024); /* decoder timing evaluation */ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ U32 const D256 = (U32)(dstSize >> 8); @@ -980,7 +980,7 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); @@ -1002,8 +1002,8 @@ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; } } @@ -1025,8 +1025,8 @@ if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): - HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); } } @@ -1041,9 +1041,9 @@ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): - HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); } } @@ -1060,27 +1060,27 @@ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); } -size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); } size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) @@ -1090,7 +1090,7 @@ if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : - HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); } }