diff contrib/python-zstandard/zstd/compress/zstd_opt.c @ 42070:675775c33ab6

zstandard: vendor python-zstandard 0.11 The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.8. The old version was 1.3.6. This should result in some minor performance wins. test-check-py3-compat.t was updated to reflect now-passing tests on Python 3.8. Some HTTP tests were updated to reflect new zstd compression output. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D6199
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 04 Apr 2019 17:34:43 -0700
parents 73fef626dae3
children 69de49c4e39c
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd/compress/zstd_opt.c	Thu Apr 04 15:24:03 2019 -0700
+++ b/contrib/python-zstandard/zstd/compress/zstd_opt.c	Thu Apr 04 17:34:43 2019 -0700
@@ -17,6 +17,8 @@
 #define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
 #define ZSTD_MAX_PRICE     (1<<30)
 
+#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
 
 /*-*************************************
 *  Price functions for optimal parser
@@ -52,11 +54,15 @@
     return weight;
 }
 
-/* debugging function, @return price in bytes */
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
 MEM_STATIC double ZSTD_fCost(U32 price)
 {
     return (double)price / (BITCOST_MULTIPLIER*8);
 }
+#endif
 
 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
 {
@@ -67,29 +73,44 @@
 }
 
 
-static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
 {
     U32 s, sum=0;
+    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
     assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
-    for (s=0; s<=lastEltIndex; s++) {
+    for (s=0; s<lastEltIndex+1; s++) {
         table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
         sum += table[s];
     }
     return sum;
 }
 
-static void ZSTD_rescaleFreqs(optState_t* const optPtr,
-                              const BYTE* const src, size_t const srcSize,
-                              int optLevel)
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ *    take hints from dictionary if there is one
+ *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+            const BYTE* const src, size_t const srcSize,
+                  int const optLevel)
 {
+    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
     optPtr->priceType = zop_dynamic;
 
     if (optPtr->litLengthSum == 0) {  /* first block : init */
-        if (srcSize <= 1024)   /* heuristic */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
             optPtr->priceType = zop_predef;
+        }
 
         assert(optPtr->symbolCosts != NULL);
-        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
+        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+            /* huffman table presumed generated by dictionary */
             optPtr->priceType = zop_dynamic;
 
             assert(optPtr->litFreq != NULL);
@@ -208,7 +229,9 @@
 
     /* dynamic statistics */
     {   U32 const llCode = ZSTD_LLcode(litLength);
-        return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
+        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+             + optPtr->litLengthSumBasePrice
+             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
     }
 }
 
@@ -253,7 +276,7 @@
 FORCE_INLINE_TEMPLATE U32
 ZSTD_getMatchPrice(U32 const offset,
                    U32 const matchLength,
-                   const optState_t* const optPtr,
+             const optState_t* const optPtr,
                    int const optLevel)
 {
     U32 price;
@@ -385,7 +408,6 @@
     U32* largerPtr  = smallerPtr + 1;
     U32 dummy32;   /* to be nullified at the end */
     U32 const windowLow = ms->window.lowLimit;
-    U32 const matchLow = windowLow ? windowLow : 1;
     U32 matchEndIdx = current+8+1;
     size_t bestLength = 8;
     U32 nbCompares = 1U << cParams->searchLog;
@@ -401,7 +423,8 @@
     assert(ip <= iend-8);   /* required for h calculation */
     hashTable[h] = current;   /* Update Hash Table */
 
-    while (nbCompares-- && (matchIndex >= matchLow)) {
+    assert(windowLow > 0);
+    while (nbCompares-- && (matchIndex >= windowLow)) {
         U32* const nextPtr = bt + 2*(matchIndex & btMask);
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
         assert(matchIndex < current);
@@ -479,7 +502,7 @@
     const BYTE* const base = ms->window.base;
     U32 const target = (U32)(ip - base);
     U32 idx = ms->nextToUpdate;
-    DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                 idx, target, dictMode);
 
     while(idx < target)
@@ -488,15 +511,18 @@
 }
 
 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
-    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
+    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
 }
 
 FORCE_INLINE_TEMPLATE
 U32 ZSTD_insertBtAndGetAllMatches (
                     ZSTD_matchState_t* ms,
                     const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
-                    U32 rep[ZSTD_REP_NUM], U32 const ll0,
-                    ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
+                    U32 rep[ZSTD_REP_NUM],
+                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+                    ZSTD_match_t* matches,
+                    const U32 lengthToBeat,
+                    U32 const mls /* template */)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@@ -542,6 +568,7 @@
     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
 
     /* check repCode */
+    assert(ll0 <= 1);   /* necessarily 1 or 0 */
     {   U32 const lastR = ZSTD_REP_NUM + ll0;
         U32 repCode;
         for (repCode = ll0; repCode < lastR; repCode++) {
@@ -724,7 +751,7 @@
                         ZSTD_match_t* matches, U32 const lengthToBeat)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32 const matchLengthSearch = cParams->searchLength;
+    U32 const matchLengthSearch = cParams->minMatch;
     DEBUGLOG(8, "ZSTD_BtGetAllMatches");
     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
     ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
@@ -774,12 +801,30 @@
     return sol.litlen + sol.mlen;
 }
 
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+    int const nbElts = lastEltID + 1;
+    int enb;
+    for (enb=0; enb < nbElts; enb++) {
+        (void)table;
+        //RAWLOG(2, "%3i:%3i,  ", enb, table[enb]);
+        RAWLOG(2, "%4i,", table[enb]);
+    }
+    RAWLOG(2, " \n");
+}
+
+#endif
+
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                                seqStore_t* seqStore,
                                U32 rep[ZSTD_REP_NUM],
-                               const void* src, size_t srcSize,
-                               const int optLevel, const ZSTD_dictMode_e dictMode)
+                         const void* src, size_t srcSize,
+                         const int optLevel,
+                         const ZSTD_dictMode_e dictMode)
 {
     optState_t* const optStatePtr = &ms->opt;
     const BYTE* const istart = (const BYTE*)src;
@@ -792,14 +837,15 @@
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
 
     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
-    U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
+    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
 
     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
     ZSTD_match_t* const matches = optStatePtr->matchTable;
     ZSTD_optimal_t lastSequence;
 
     /* init */
-    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
+    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
     assert(optLevel <= 2);
     ms->nextToUpdate3 = ms->nextToUpdate;
     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
@@ -999,7 +1045,7 @@
                     U32 const offCode = opt[storePos].off;
                     U32 const advance = llen + mlen;
                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
-                                anchor - istart, llen, mlen);
+                                anchor - istart, (unsigned)llen, (unsigned)mlen);
 
                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
                         assert(storePos == storeEnd);   /* must be last sequence */
@@ -1047,11 +1093,11 @@
 
 
 /* used in 2-pass strategy */
-static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
 {
     U32 s, sum=0;
-    assert(ZSTD_FREQ_DIV+bonus > 0);
-    for (s=0; s<=lastEltIndex; s++) {
+    assert(ZSTD_FREQ_DIV+bonus >= 0);
+    for (s=0; s<lastEltIndex+1; s++) {
         table[s] <<= ZSTD_FREQ_DIV+bonus;
         table[s]--;
         sum += table[s];
@@ -1063,9 +1109,43 @@
 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
 {
     optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
-    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
-    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
-    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
+    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+}
+
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its constract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                     seqStore_t* seqStore,
+                     U32 rep[ZSTD_REP_NUM],
+               const void* src, size_t srcSize)
+{
+    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+    memcpy(tmpRep, rep, sizeof(tmpRep));
+
+    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+    assert(ms->opt.litLengthSum == 0);    /* first block */
+    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
+    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
+    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+
+    /* invalidate first scan from history */
+    ZSTD_resetSeqStore(seqStore);
+    ms->window.base -= srcSize;
+    ms->window.dictLimit += (U32)srcSize;
+    ms->window.lowLimit = ms->window.dictLimit;
+    ms->nextToUpdate = ms->window.dictLimit;
+    ms->nextToUpdate3 = ms->window.dictLimit;
+
+    /* re-inforce weight of collected statistics */
+    ZSTD_upscaleStats(&ms->opt);
 }
 
 size_t ZSTD_compressBlock_btultra(
@@ -1073,33 +1153,34 @@
         const void* src, size_t srcSize)
 {
     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
-#if 0
-    /* 2-pass strategy (disabled)
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    U32 const current = (U32)((const BYTE*)src - ms->window.base);
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+    /* 2-pass strategy:
      * this strategy makes a first pass over first block to collect statistics
      * and seed next round's statistics with it.
+     * After 1st pass, function forgets everything, and starts a new block.
+     * Consequently, this can only work if no data has been previously loaded in tables,
+     * aka, no dictionary, no prefix, no ldm preprocessing.
      * The compression ratio gain is generally small (~0.5% on first block),
      * the cost is 2x cpu time on first block. */
     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
     if ( (ms->opt.litLengthSum==0)   /* first block */
-      && (seqStore->sequences == seqStore->sequencesStart)   /* no ldm */
-      && (ms->window.dictLimit == ms->window.lowLimit) ) {   /* no dictionary */
-        U32 tmpRep[ZSTD_REP_NUM];
-        DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
-        assert(ms->nextToUpdate >= ms->window.dictLimit
-            && ms->nextToUpdate <= ms->window.dictLimit + 1);
-        memcpy(tmpRep, rep, sizeof(tmpRep));
-        ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
-        ZSTD_resetSeqStore(seqStore);
-        /* invalidate first scan from history */
-        ms->window.base -= srcSize;
-        ms->window.dictLimit += (U32)srcSize;
-        ms->window.lowLimit = ms->window.dictLimit;
-        ms->nextToUpdate = ms->window.dictLimit;
-        ms->nextToUpdate3 = ms->window.dictLimit;
-        /* re-inforce weight of collected statistics */
-        ZSTD_upscaleStats(&ms->opt);
+      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+      && (current == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      ) {
+        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
     }
-#endif
+
     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
 }
 
@@ -1130,3 +1211,7 @@
 {
     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
 }
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */