Mercurial > public > mercurial-scm > hg
diff contrib/python-zstandard/zstd/compress/zstd_opt.c @ 42070:675775c33ab6
zstandard: vendor python-zstandard 0.11
The upstream source distribution from PyPI was extracted. Unwanted
files were removed.
The clang-format ignore list was updated to reflect the new source
of files.
The project contains a vendored copy of zstandard 1.3.8. The old
version was 1.3.6. This should result in some minor performance wins.
test-check-py3-compat.t was updated to reflect now-passing tests on
Python 3.8.
Some HTTP tests were updated to reflect new zstd compression output.
# no-check-commit because 3rd party code has different style guidelines
Differential Revision: https://phab.mercurial-scm.org/D6199
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 04 Apr 2019 17:34:43 -0700 |
parents | 73fef626dae3 |
children | 69de49c4e39c |
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd/compress/zstd_opt.c Thu Apr 04 15:24:03 2019 -0700 +++ b/contrib/python-zstandard/zstd/compress/zstd_opt.c Thu Apr 04 17:34:43 2019 -0700 @@ -17,6 +17,8 @@ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ #define ZSTD_MAX_PRICE (1<<30) +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + /*-************************************* * Price functions for optimal parser @@ -52,11 +54,15 @@ return weight; } -/* debugging function, @return price in bytes */ +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ MEM_STATIC double ZSTD_fCost(U32 price) { return (double)price / (BITCOST_MULTIPLIER*8); } +#endif static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) { @@ -67,29 +73,44 @@ } -static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) { U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); - for (s=0; s<=lastEltIndex; s++) { + for (s=0; s<lastEltIndex+1; s++) { table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); sum += table[s]; } return sum; } -static void ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize, - int optLevel) +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) { + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); optPtr->priceType = zop_dynamic; if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= 1024) /* heuristic */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); optPtr->priceType = zop_predef; + } assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ optPtr->priceType = zop_dynamic; assert(optPtr->litFreq != NULL); @@ -208,7 +229,9 @@ /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel)); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); } } @@ -253,7 +276,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, - const optState_t* const optPtr, + const optState_t* const optPtr, int const optLevel) { U32 price; @@ -385,7 +408,6 @@ U32* largerPtr = smallerPtr + 1; U32 dummy32; /* to be nullified at the end */ U32 const windowLow = ms->window.lowLimit; - U32 const matchLow = windowLow ? windowLow : 1; U32 matchEndIdx = current+8+1; size_t bestLength = 8; U32 nbCompares = 1U << cParams->searchLog; @@ -401,7 +423,8 @@ assert(ip <= iend-8); /* required for h calculation */ hashTable[h] = current; /* Update Hash Table */ - while (nbCompares-- && (matchIndex >= matchLow)) { + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ assert(matchIndex < current); @@ -479,7 +502,7 @@ const BYTE* const base = ms->window.base; U32 const target = (U32)(ip - base); U32 idx = ms->nextToUpdate; - DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", idx, target, dictMode); while(idx < target) @@ -488,15 +511,18 @@ } void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { - ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict); + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); } FORCE_INLINE_TEMPLATE U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_matchState_t* ms, const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - U32 rep[ZSTD_REP_NUM], U32 const ll0, - ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) + U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + ZSTD_match_t* matches, + const U32 lengthToBeat, + U32 const mls /* template */) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); @@ -542,6 +568,7 @@ DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ { U32 const lastR = ZSTD_REP_NUM + ll0; U32 repCode; for (repCode = ll0; repCode < lastR; repCode++) { @@ -724,7 +751,7 @@ ZSTD_match_t* matches, U32 const lengthToBeat) { const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->searchLength; + U32 const matchLengthSearch = cParams->minMatch; DEBUGLOG(8, "ZSTD_BtGetAllMatches"); if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); @@ -774,12 +801,30 @@ return sol.litlen + sol.mlen; } +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const int optLevel, const ZSTD_dictMode_e dictMode) + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) { optState_t* const optStatePtr = &ms->opt; const BYTE* const istart = (const BYTE*)src; @@ -792,14 +837,15 @@ const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4; + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; ZSTD_optimal_t lastSequence; /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); assert(optLevel <= 2); ms->nextToUpdate3 = ms->nextToUpdate; ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); @@ -999,7 +1045,7 @@ U32 const offCode = opt[storePos].off; U32 const advance = llen + mlen; DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", - anchor - istart, llen, mlen); + anchor - istart, (unsigned)llen, (unsigned)mlen); if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ assert(storePos == storeEnd); /* must be last sequence */ @@ -1047,11 +1093,11 @@ /* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) { U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus > 0); - for (s=0; s<=lastEltIndex; s++) { + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; s<lastEltIndex+1; s++) { table[s] <<= ZSTD_FREQ_DIV+bonus; table[s]--; sum += table[s]; @@ -1063,9 +1109,43 @@ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) { optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its constract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + ms->nextToUpdate3 = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); } size_t ZSTD_compressBlock_btultra( @@ -1073,33 +1153,34 @@ const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); -#if 0 - /* 2-pass strategy (disabled) + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const current = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: * this strategy makes a first pass over first block to collect statistics * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. * The compression ratio gain is generally small (~0.5% on first block), * the cost is 2x cpu time on first block. */ assert(srcSize <= ZSTD_BLOCKSIZE_MAX); if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */ - U32 tmpRep[ZSTD_REP_NUM]; - DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics"); - assert(ms->nextToUpdate >= ms->window.dictLimit - && ms->nextToUpdate <= ms->window.dictLimit + 1); - memcpy(tmpRep, rep, sizeof(tmpRep)); - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ - ZSTD_resetSeqStore(seqStore); - /* invalidate first scan from history */ - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; - ms->window.lowLimit = ms->window.dictLimit; - ms->nextToUpdate = ms->window.dictLimit; - ms->nextToUpdate3 = ms->window.dictLimit; - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); } -#endif + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); } @@ -1130,3 +1211,7 @@ { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); } + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */