diff contrib/python-zstandard/zstd/compress/zstd_compress_internal.h @ 42070:675775c33ab6

zstandard: vendor python-zstandard 0.11 The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.8. The old version was 1.3.6. This should result in some minor performance wins. test-check-py3-compat.t was updated to reflect now-passing tests on Python 3.8. Some HTTP tests were updated to reflect new zstd compression output. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D6199
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 04 Apr 2019 17:34:43 -0700
parents 73fef626dae3
children 69de49c4e39c
line wrap: on
line diff
--- a/contrib/python-zstandard/zstd/compress/zstd_compress_internal.h	Thu Apr 04 15:24:03 2019 -0700
+++ b/contrib/python-zstandard/zstd/compress/zstd_compress_internal.h	Thu Apr 04 17:34:43 2019 -0700
@@ -48,12 +48,6 @@
 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
 
-typedef enum {
-    ZSTD_dictDefaultAttach = 0,
-    ZSTD_dictForceAttach = 1,
-    ZSTD_dictForceCopy = -1,
-} ZSTD_dictAttachPref_e;
-
 typedef struct ZSTD_prefixDict_s {
     const void* dict;
     size_t dictSize;
@@ -96,10 +90,10 @@
 
 typedef struct {
     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
-    U32* litFreq;                /* table of literals statistics, of size 256 */
-    U32* litLengthFreq;          /* table of litLength statistics, of size (MaxLL+1) */
-    U32* matchLengthFreq;        /* table of matchLength statistics, of size (MaxML+1) */
-    U32* offCodeFreq;            /* table of offCode statistics, of size (MaxOff+1) */
+    unsigned* litFreq;           /* table of literals statistics, of size 256 */
+    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
 
@@ -139,7 +133,7 @@
     U32* hashTable3;
     U32* chainTable;
     optState_t opt;         /* optimal parser state */
-    const ZSTD_matchState_t *dictMatchState;
+    const ZSTD_matchState_t * dictMatchState;
     ZSTD_compressionParameters cParams;
 };
 
@@ -167,7 +161,7 @@
     U32 hashLog;            /* Log size of hashTable */
     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
     U32 minMatchLength;     /* Minimum match length */
-    U32 hashEveryLog;       /* Log number of entries to skip */
+    U32 hashRateLog;       /* Log number of entries to skip */
     U32 windowLog;          /* Window log for the LDM */
 } ldmParams_t;
 
@@ -196,9 +190,10 @@
     ZSTD_dictAttachPref_e attachDictPref;
 
     /* Multithreading: used to pass parameters to mtctx */
-    unsigned nbWorkers;
-    unsigned jobSize;
-    unsigned overlapSizeLog;
+    int nbWorkers;
+    size_t jobSize;
+    int overlapLog;
+    int rsyncable;
 
     /* Long distance matching parameters */
     ldmParams_t ldmParams;
@@ -498,6 +493,64 @@
     }
 }
 
+/** ZSTD_ipow() :
+ * Return base^exponent.
+ */
+static U64 ZSTD_ipow(U64 base, U64 exponent)
+{
+    U64 power = 1;
+    while (exponent) {
+      if (exponent & 1) power *= base;
+      exponent >>= 1;
+      base *= base;
+    }
+    return power;
+}
+
+#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
+
+/** ZSTD_rollingHash_append() :
+ * Add the buffer to the hash value.
+ */
+static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
+{
+    BYTE const* istart = (BYTE const*)buf;
+    size_t pos;
+    for (pos = 0; pos < size; ++pos) {
+        hash *= prime8bytes;
+        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    }
+    return hash;
+}
+
+/** ZSTD_rollingHash_compute() :
+ * Compute the rolling hash value of the buffer.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
+{
+    return ZSTD_rollingHash_append(0, buf, size);
+}
+
+/** ZSTD_rollingHash_primePower() :
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
+ * over a window of length bytes.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
+{
+    return ZSTD_ipow(prime8bytes, length - 1);
+}
+
+/** ZSTD_rollingHash_rotate() :
+ * Rotate the rolling hash by one byte.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
+{
+    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
+    hash *= prime8bytes;
+    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    return hash;
+}
+
 /*-*************************************
 *  Round buffer management
 ***************************************/
@@ -626,20 +679,23 @@
  * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
  * is below them. forceWindow and dictMatchState are therefore incompatible.
  */
-MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
-                                           void const* srcEnd, U32 maxDist,
-                                           U32* loadedDictEndPtr,
-                                           const ZSTD_matchState_t** dictMatchStatePtr)
+MEM_STATIC void
+ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+                           void const* srcEnd,
+                           U32 maxDist,
+                           U32* loadedDictEndPtr,
+                     const ZSTD_matchState_t** dictMatchStatePtr)
 {
-    U32 const current = (U32)((BYTE const*)srcEnd - window->base);
-    U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
-    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist);
-    if (current > maxDist + loadedDictEnd) {
-        U32 const newLowLimit = current - maxDist;
+    U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
+    U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist);
+    if (blockEndIdx > maxDist + loadedDictEnd) {
+        U32 const newLowLimit = blockEndIdx - maxDist;
         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
         if (window->dictLimit < window->lowLimit) {
             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
-                        window->dictLimit, window->lowLimit);
+                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
             window->dictLimit = window->lowLimit;
         }
         if (loadedDictEndPtr)
@@ -690,20 +746,23 @@
 
 
 /* debug functions */
+#if (DEBUGLEVEL>=2)
 
 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
 {
     U32 const fp_accuracy = 8;
     U32 const fp_multiplier = (1 << fp_accuracy);
-    U32 const stat = rawStat + 1;
-    U32 const hb = ZSTD_highbit32(stat);
+    U32 const newStat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(newStat);
     U32 const BWeight = hb * fp_multiplier;
-    U32 const FWeight = (stat << fp_accuracy) >> hb;
+    U32 const FWeight = (newStat << fp_accuracy) >> hb;
     U32 const weight = BWeight + FWeight;
     assert(hb + fp_accuracy < 31);
     return (double)weight / fp_multiplier;
 }
 
+/* display a table content,
+ * listing each element, its frequency, and its predicted bit cost */
 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
 {
     unsigned u, sum;
@@ -715,6 +774,9 @@
     }
 }
 
+#endif
+
+
 #if defined (__cplusplus)
 }
 #endif