contrib/python-zstandard/zstd/compress/zstd_lazy.c
changeset 40121 73fef626dae3
parent 37495 b1fb341d8a61
child 42070 675775c33ab6
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
    14 
    14 
    15 /*-*************************************
    15 /*-*************************************
    16 *  Binary Tree search
    16 *  Binary Tree search
    17 ***************************************/
    17 ***************************************/
    18 
    18 
    19 void ZSTD_updateDUBT(
    19 static void
    20                 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
    20 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
    21                 const BYTE* ip, const BYTE* iend,
    21                 const BYTE* ip, const BYTE* iend,
    22                 U32 mls)
    22                 U32 mls)
    23 {
    23 {
       
    24     const ZSTD_compressionParameters* const cParams = &ms->cParams;
    24     U32* const hashTable = ms->hashTable;
    25     U32* const hashTable = ms->hashTable;
    25     U32  const hashLog = cParams->hashLog;
    26     U32  const hashLog = cParams->hashLog;
    26 
    27 
    27     U32* const bt = ms->chainTable;
    28     U32* const bt = ms->chainTable;
    28     U32  const btLog  = cParams->chainLog - 1;
    29     U32  const btLog  = cParams->chainLog - 1;
    57 
    58 
    58 /** ZSTD_insertDUBT1() :
    59 /** ZSTD_insertDUBT1() :
    59  *  sort one already inserted but unsorted position
    60  *  sort one already inserted but unsorted position
    60  *  assumption : current >= btlow == (current - btmask)
    61  *  assumption : current >= btlow == (current - btmask)
    61  *  doesn't fail */
    62  *  doesn't fail */
    62 static void ZSTD_insertDUBT1(
    63 static void
    63                  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
    64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
    64                  U32 current, const BYTE* inputEnd,
    65                  U32 current, const BYTE* inputEnd,
    65                  U32 nbCompares, U32 btLow, int extDict)
    66                  U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
    66 {
    67 {
       
    68     const ZSTD_compressionParameters* const cParams = &ms->cParams;
    67     U32*   const bt = ms->chainTable;
    69     U32*   const bt = ms->chainTable;
    68     U32    const btLog  = cParams->chainLog - 1;
    70     U32    const btLog  = cParams->chainLog - 1;
    69     U32    const btMask = (1 << btLog) - 1;
    71     U32    const btMask = (1 << btLog) - 1;
    70     size_t commonLengthSmaller=0, commonLengthLarger=0;
    72     size_t commonLengthSmaller=0, commonLengthLarger=0;
    71     const BYTE* const base = ms->window.base;
    73     const BYTE* const base = ms->window.base;
    90     while (nbCompares-- && (matchIndex > windowLow)) {
    92     while (nbCompares-- && (matchIndex > windowLow)) {
    91         U32* const nextPtr = bt + 2*(matchIndex & btMask);
    93         U32* const nextPtr = bt + 2*(matchIndex & btMask);
    92         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    94         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
    93         assert(matchIndex < current);
    95         assert(matchIndex < current);
    94 
    96 
    95         if ( (!extDict)
    97         if ( (dictMode != ZSTD_extDict)
    96           || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
    98           || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
    97           || (current < dictLimit) /* both in extDict */) {
    99           || (current < dictLimit) /* both in extDict */) {
    98             const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
   100             const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
       
   101                                      || (matchIndex+matchLength >= dictLimit)) ?
       
   102                                         base : dictBase;
    99             assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
   103             assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
   100                  || (current < dictLimit) );
   104                  || (current < dictLimit) );
   101             match = mBase + matchIndex;
   105             match = mBase + matchIndex;
   102             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
   106             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
   103         } else {
   107         } else {
   136 
   140 
   137     *smallerPtr = *largerPtr = 0;
   141     *smallerPtr = *largerPtr = 0;
   138 }
   142 }
   139 
   143 
   140 
   144 
   141 static size_t ZSTD_DUBT_findBestMatch (
   145 static size_t
   142                             ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   146 ZSTD_DUBT_findBetterDictMatch (
   143                             const BYTE* const ip, const BYTE* const iend,
   147         ZSTD_matchState_t* ms,
   144                             size_t* offsetPtr,
   148         const BYTE* const ip, const BYTE* const iend,
   145                             U32 const mls,
   149         size_t* offsetPtr,
   146                             U32 const extDict)
   150         U32 nbCompares,
   147 {
   151         U32 const mls,
       
   152         const ZSTD_dictMode_e dictMode)
       
   153 {
       
   154     const ZSTD_matchState_t * const dms = ms->dictMatchState;
       
   155     const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
       
   156     const U32 * const dictHashTable = dms->hashTable;
       
   157     U32         const hashLog = dmsCParams->hashLog;
       
   158     size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
       
   159     U32               dictMatchIndex = dictHashTable[h];
       
   160 
       
   161     const BYTE* const base = ms->window.base;
       
   162     const BYTE* const prefixStart = base + ms->window.dictLimit;
       
   163     U32         const current = (U32)(ip-base);
       
   164     const BYTE* const dictBase = dms->window.base;
       
   165     const BYTE* const dictEnd = dms->window.nextSrc;
       
   166     U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
       
   167     U32         const dictLowLimit = dms->window.lowLimit;
       
   168     U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
       
   169 
       
   170     U32*        const dictBt = dms->chainTable;
       
   171     U32         const btLog  = dmsCParams->chainLog - 1;
       
   172     U32         const btMask = (1 << btLog) - 1;
       
   173     U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
       
   174 
       
   175     size_t commonLengthSmaller=0, commonLengthLarger=0, bestLength=0;
       
   176     U32 matchEndIdx = current+8+1;
       
   177 
       
   178     (void)dictMode;
       
   179     assert(dictMode == ZSTD_dictMatchState);
       
   180 
       
   181     while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
       
   182         U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
       
   183         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
       
   184         const BYTE* match = dictBase + dictMatchIndex;
       
   185         matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
       
   186         if (dictMatchIndex+matchLength >= dictHighLimit)
       
   187             match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
       
   188 
       
   189         if (matchLength > bestLength) {
       
   190             U32 matchIndex = dictMatchIndex + dictIndexDelta;
       
   191             if (matchLength > matchEndIdx - matchIndex)
       
   192                 matchEndIdx = matchIndex + (U32)matchLength;
       
   193             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
       
   194                 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
       
   195                     current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
       
   196                 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
       
   197             }
       
   198             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
       
   199                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
       
   200             }
       
   201         }
       
   202 
       
   203         DEBUGLOG(2, "matchLength:%6zu, match:%p, prefixStart:%p, ip:%p", matchLength, match, prefixStart, ip);
       
   204         if (match[matchLength] < ip[matchLength]) {
       
   205             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
       
   206             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
       
   207             dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
       
   208         } else {
       
   209             /* match is larger than current */
       
   210             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
       
   211             commonLengthLarger = matchLength;
       
   212             dictMatchIndex = nextPtr[0];
       
   213         }
       
   214     }
       
   215 
       
   216     if (bestLength >= MINMATCH) {
       
   217         U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
       
   218         DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
       
   219                     current, (U32)bestLength, (U32)*offsetPtr, mIndex);
       
   220     }
       
   221     return bestLength;
       
   222 
       
   223 }
       
   224 
       
   225 
       
   226 static size_t
       
   227 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
       
   228                         const BYTE* const ip, const BYTE* const iend,
       
   229                         size_t* offsetPtr,
       
   230                         U32 const mls,
       
   231                         const ZSTD_dictMode_e dictMode)
       
   232 {
       
   233     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   148     U32*   const hashTable = ms->hashTable;
   234     U32*   const hashTable = ms->hashTable;
   149     U32    const hashLog = cParams->hashLog;
   235     U32    const hashLog = cParams->hashLog;
   150     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
   236     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
   151     U32          matchIndex  = hashTable[h];
   237     U32          matchIndex  = hashTable[h];
   152 
   238 
   193     /* batch sort stacked candidates */
   279     /* batch sort stacked candidates */
   194     matchIndex = previousCandidate;
   280     matchIndex = previousCandidate;
   195     while (matchIndex) {  /* will end on matchIndex == 0 */
   281     while (matchIndex) {  /* will end on matchIndex == 0 */
   196         U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
   282         U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
   197         U32 const nextCandidateIdx = *nextCandidateIdxPtr;
   283         U32 const nextCandidateIdx = *nextCandidateIdxPtr;
   198         ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
   284         ZSTD_insertDUBT1(ms, matchIndex, iend,
   199                          nbCandidates, unsortLimit, extDict);
   285                          nbCandidates, unsortLimit, dictMode);
   200         matchIndex = nextCandidateIdx;
   286         matchIndex = nextCandidateIdx;
   201         nbCandidates++;
   287         nbCandidates++;
   202     }
   288     }
   203 
   289 
   204     /* find longest match */
   290     /* find longest match */
   219         while (nbCompares-- && (matchIndex > windowLow)) {
   305         while (nbCompares-- && (matchIndex > windowLow)) {
   220             U32* const nextPtr = bt + 2*(matchIndex & btMask);
   306             U32* const nextPtr = bt + 2*(matchIndex & btMask);
   221             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
   307             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
   222             const BYTE* match;
   308             const BYTE* match;
   223 
   309 
   224             if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
   310             if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
   225                 match = base + matchIndex;
   311                 match = base + matchIndex;
   226                 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
   312                 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
   227             } else {
   313             } else {
   228                 match = dictBase + matchIndex;
   314                 match = dictBase + matchIndex;
   229                 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
   315                 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
   257                 matchIndex = nextPtr[0];
   343                 matchIndex = nextPtr[0];
   258         }   }
   344         }   }
   259 
   345 
   260         *smallerPtr = *largerPtr = 0;
   346         *smallerPtr = *largerPtr = 0;
   261 
   347 
       
   348         if (dictMode == ZSTD_dictMatchState && nbCompares) {
       
   349             bestLength = ZSTD_DUBT_findBetterDictMatch(ms, ip, iend, offsetPtr, nbCompares, mls, dictMode);
       
   350         }
       
   351 
   262         assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
   352         assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
   263         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
   353         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
   264         if (bestLength >= MINMATCH) {
   354         if (bestLength >= MINMATCH) {
   265             U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
   355             U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
   266             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
   356             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
   270     }
   360     }
   271 }
   361 }
   272 
   362 
   273 
   363 
   274 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
   364 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
   275 static size_t ZSTD_BtFindBestMatch (
   365 FORCE_INLINE_TEMPLATE size_t
   276                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   366 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
   277                         const BYTE* const ip, const BYTE* const iLimit,
   367                 const BYTE* const ip, const BYTE* const iLimit,
   278                         size_t* offsetPtr,
   368                       size_t* offsetPtr,
   279                         const U32 mls /* template */)
   369                 const U32 mls /* template */,
       
   370                 const ZSTD_dictMode_e dictMode)
   280 {
   371 {
   281     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
   372     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
   282     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
   373     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
   283     ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
   374     ZSTD_updateDUBT(ms, ip, iLimit, mls);
   284     return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
   375     return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
   285 }
   376 }
   286 
   377 
   287 
   378 
   288 static size_t ZSTD_BtFindBestMatch_selectMLS (
   379 static size_t
   289                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   380 ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
       
   381                             const BYTE* ip, const BYTE* const iLimit,
       
   382                                   size_t* offsetPtr)
       
   383 {
       
   384     switch(ms->cParams.searchLength)
       
   385     {
       
   386     default : /* includes case 3 */
       
   387     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
       
   388     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
       
   389     case 7 :
       
   390     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
       
   391     }
       
   392 }
       
   393 
       
   394 
       
   395 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
       
   396                         ZSTD_matchState_t* ms,
   290                         const BYTE* ip, const BYTE* const iLimit,
   397                         const BYTE* ip, const BYTE* const iLimit,
   291                         size_t* offsetPtr)
   398                         size_t* offsetPtr)
   292 {
   399 {
   293     switch(cParams->searchLength)
   400     switch(ms->cParams.searchLength)
   294     {
   401     {
   295     default : /* includes case 3 */
   402     default : /* includes case 3 */
   296     case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
   403     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
   297     case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
   404     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
   298     case 7 :
   405     case 7 :
   299     case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
   406     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
   300     }
   407     }
   301 }
   408 }
   302 
   409 
   303 
   410 
   304 /** Tree updater, providing best match */
   411 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
   305 static size_t ZSTD_BtFindBestMatch_extDict (
   412                         ZSTD_matchState_t* ms,
   306                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
       
   307                         const BYTE* const ip, const BYTE* const iLimit,
       
   308                         size_t* offsetPtr,
       
   309                         const U32 mls)
       
   310 {
       
   311     DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
       
   312     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
       
   313     ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
       
   314     return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
       
   315 }
       
   316 
       
   317 
       
   318 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
       
   319                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
       
   320                         const BYTE* ip, const BYTE* const iLimit,
   413                         const BYTE* ip, const BYTE* const iLimit,
   321                         size_t* offsetPtr)
   414                         size_t* offsetPtr)
   322 {
   415 {
   323     switch(cParams->searchLength)
   416     switch(ms->cParams.searchLength)
   324     {
   417     {
   325     default : /* includes case 3 */
   418     default : /* includes case 3 */
   326     case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
   419     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
   327     case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
   420     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
   328     case 7 :
   421     case 7 :
   329     case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
   422     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
   330     }
   423     }
   331 }
   424 }
   332 
   425 
   333 
   426 
   334 
   427 
   338 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
   431 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
   339 
   432 
   340 /* Update chains up to ip (excluded)
   433 /* Update chains up to ip (excluded)
   341    Assumption : always within prefix (i.e. not within extDict) */
   434    Assumption : always within prefix (i.e. not within extDict) */
   342 static U32 ZSTD_insertAndFindFirstIndex_internal(
   435 static U32 ZSTD_insertAndFindFirstIndex_internal(
   343                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   436                         ZSTD_matchState_t* ms,
       
   437                         const ZSTD_compressionParameters* const cParams,
   344                         const BYTE* ip, U32 const mls)
   438                         const BYTE* ip, U32 const mls)
   345 {
   439 {
   346     U32* const hashTable  = ms->hashTable;
   440     U32* const hashTable  = ms->hashTable;
   347     const U32 hashLog = cParams->hashLog;
   441     const U32 hashLog = cParams->hashLog;
   348     U32* const chainTable = ms->chainTable;
   442     U32* const chainTable = ms->chainTable;
   360 
   454 
   361     ms->nextToUpdate = target;
   455     ms->nextToUpdate = target;
   362     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
   456     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
   363 }
   457 }
   364 
   458 
   365 U32 ZSTD_insertAndFindFirstIndex(
   459 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
   366                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   460     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   367                         const BYTE* ip)
   461     return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.searchLength);
   368 {
       
   369     return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
       
   370 }
   462 }
   371 
   463 
   372 
   464 
   373 /* inlining is important to hardwire a hot branch (template emulation) */
   465 /* inlining is important to hardwire a hot branch (template emulation) */
   374 FORCE_INLINE_TEMPLATE
   466 FORCE_INLINE_TEMPLATE
   375 size_t ZSTD_HcFindBestMatch_generic (
   467 size_t ZSTD_HcFindBestMatch_generic (
   376                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   468                         ZSTD_matchState_t* ms,
   377                         const BYTE* const ip, const BYTE* const iLimit,
   469                         const BYTE* const ip, const BYTE* const iLimit,
   378                         size_t* offsetPtr,
   470                         size_t* offsetPtr,
   379                         const U32 mls, const U32 extDict)
   471                         const U32 mls, const ZSTD_dictMode_e dictMode)
   380 {
   472 {
       
   473     const ZSTD_compressionParameters* const cParams = &ms->cParams;
   381     U32* const chainTable = ms->chainTable;
   474     U32* const chainTable = ms->chainTable;
   382     const U32 chainSize = (1 << cParams->chainLog);
   475     const U32 chainSize = (1 << cParams->chainLog);
   383     const U32 chainMask = chainSize-1;
   476     const U32 chainMask = chainSize-1;
   384     const BYTE* const base = ms->window.base;
   477     const BYTE* const base = ms->window.base;
   385     const BYTE* const dictBase = ms->window.dictBase;
   478     const BYTE* const dictBase = ms->window.dictBase;
   395     /* HC4 match finder */
   488     /* HC4 match finder */
   396     U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
   489     U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
   397 
   490 
   398     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
   491     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
   399         size_t currentMl=0;
   492         size_t currentMl=0;
   400         if ((!extDict) || matchIndex >= dictLimit) {
   493         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
   401             const BYTE* const match = base + matchIndex;
   494             const BYTE* const match = base + matchIndex;
   402             if (match[ml] == ip[ml])   /* potentially better */
   495             if (match[ml] == ip[ml])   /* potentially better */
   403                 currentMl = ZSTD_count(ip, match, iLimit);
   496                 currentMl = ZSTD_count(ip, match, iLimit);
   404         } else {
   497         } else {
   405             const BYTE* const match = dictBase + matchIndex;
   498             const BYTE* const match = dictBase + matchIndex;
   417 
   510 
   418         if (matchIndex <= minChain) break;
   511         if (matchIndex <= minChain) break;
   419         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
   512         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
   420     }
   513     }
   421 
   514 
       
   515     if (dictMode == ZSTD_dictMatchState) {
       
   516         const ZSTD_matchState_t* const dms = ms->dictMatchState;
       
   517         const U32* const dmsChainTable = dms->chainTable;
       
   518         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
       
   519         const U32 dmsChainMask         = dmsChainSize - 1;
       
   520         const U32 dmsLowestIndex       = dms->window.dictLimit;
       
   521         const BYTE* const dmsBase      = dms->window.base;
       
   522         const BYTE* const dmsEnd       = dms->window.nextSrc;
       
   523         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
       
   524         const U32 dmsIndexDelta        = dictLimit - dmsSize;
       
   525         const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
       
   526 
       
   527         matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
       
   528 
       
   529         for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
       
   530             size_t currentMl=0;
       
   531             const BYTE* const match = dmsBase + matchIndex;
       
   532             assert(match+4 <= dmsEnd);
       
   533             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
       
   534                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
       
   535 
       
   536             /* save best solution */
       
   537             if (currentMl > ml) {
       
   538                 ml = currentMl;
       
   539                 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
       
   540                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
       
   541             }
       
   542 
       
   543             if (matchIndex <= dmsMinChain) break;
       
   544             matchIndex = dmsChainTable[matchIndex & dmsChainMask];
       
   545         }
       
   546     }
       
   547 
   422     return ml;
   548     return ml;
   423 }
   549 }
   424 
   550 
   425 
   551 
   426 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
   552 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
   427                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   553                         ZSTD_matchState_t* ms,
   428                         const BYTE* ip, const BYTE* const iLimit,
   554                         const BYTE* ip, const BYTE* const iLimit,
   429                         size_t* offsetPtr)
   555                         size_t* offsetPtr)
   430 {
   556 {
   431     switch(cParams->searchLength)
   557     switch(ms->cParams.searchLength)
   432     {
   558     {
   433     default : /* includes case 3 */
   559     default : /* includes case 3 */
   434     case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
   560     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
   435     case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
   561     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
   436     case 7 :
   562     case 7 :
   437     case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
   563     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
   438     }
   564     }
   439 }
   565 }
   440 
   566 
   441 
   567 
   442 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
   568 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
   443                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   569                         ZSTD_matchState_t* ms,
   444                         const BYTE* ip, const BYTE* const iLimit,
   570                         const BYTE* ip, const BYTE* const iLimit,
   445                         size_t* const offsetPtr)
   571                         size_t* offsetPtr)
   446 {
   572 {
   447     switch(cParams->searchLength)
   573     switch(ms->cParams.searchLength)
   448     {
   574     {
   449     default : /* includes case 3 */
   575     default : /* includes case 3 */
   450     case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
   576     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
   451     case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
   577     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
   452     case 7 :
   578     case 7 :
   453     case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
   579     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
       
   580     }
       
   581 }
       
   582 
       
   583 
       
   584 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
       
   585                         ZSTD_matchState_t* ms,
       
   586                         const BYTE* ip, const BYTE* const iLimit,
       
   587                         size_t* offsetPtr)
       
   588 {
       
   589     switch(ms->cParams.searchLength)
       
   590     {
       
   591     default : /* includes case 3 */
       
   592     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
       
   593     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
       
   594     case 7 :
       
   595     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
   454     }
   596     }
   455 }
   597 }
   456 
   598 
   457 
   599 
   458 /* *******************************
   600 /* *******************************
   460 *********************************/
   602 *********************************/
   461 FORCE_INLINE_TEMPLATE
   603 FORCE_INLINE_TEMPLATE
   462 size_t ZSTD_compressBlock_lazy_generic(
   604 size_t ZSTD_compressBlock_lazy_generic(
   463                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
   605                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
   464                         U32 rep[ZSTD_REP_NUM],
   606                         U32 rep[ZSTD_REP_NUM],
   465                         ZSTD_compressionParameters const* cParams,
       
   466                         const void* src, size_t srcSize,
   607                         const void* src, size_t srcSize,
   467                         const U32 searchMethod, const U32 depth)
   608                         const U32 searchMethod, const U32 depth,
       
   609                         ZSTD_dictMode_e const dictMode)
   468 {
   610 {
   469     const BYTE* const istart = (const BYTE*)src;
   611     const BYTE* const istart = (const BYTE*)src;
   470     const BYTE* ip = istart;
   612     const BYTE* ip = istart;
   471     const BYTE* anchor = istart;
   613     const BYTE* anchor = istart;
   472     const BYTE* const iend = istart + srcSize;
   614     const BYTE* const iend = istart + srcSize;
   473     const BYTE* const ilimit = iend - 8;
   615     const BYTE* const ilimit = iend - 8;
   474     const BYTE* const base = ms->window.base + ms->window.dictLimit;
   616     const BYTE* const base = ms->window.base;
       
   617     const U32 prefixLowestIndex = ms->window.dictLimit;
       
   618     const BYTE* const prefixLowest = base + prefixLowestIndex;
   475 
   619 
   476     typedef size_t (*searchMax_f)(
   620     typedef size_t (*searchMax_f)(
   477                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   621                         ZSTD_matchState_t* ms,
   478                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
   622                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
   479     searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
   623     searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
       
   624         (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
       
   625         (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
   480     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
   626     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
   481 
   627 
       
   628     const ZSTD_matchState_t* const dms = ms->dictMatchState;
       
   629     const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
       
   630                                      dms->window.dictLimit : 0;
       
   631     const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
       
   632                                      dms->window.base : NULL;
       
   633     const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
       
   634                                      dictBase + dictLowestIndex : NULL;
       
   635     const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
       
   636                                      dms->window.nextSrc : NULL;
       
   637     const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
       
   638                                      prefixLowestIndex - (U32)(dictEnd - dictBase) :
       
   639                                      0;
       
   640     const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
       
   641 
   482     /* init */
   642     /* init */
   483     ip += (ip==base);
   643     ip += (dictAndPrefixLength == 0);
   484     ms->nextToUpdate3 = ms->nextToUpdate;
   644     ms->nextToUpdate3 = ms->nextToUpdate;
   485     {   U32 const maxRep = (U32)(ip-base);
   645     if (dictMode == ZSTD_noDict) {
       
   646         U32 const maxRep = (U32)(ip - prefixLowest);
   486         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
   647         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
   487         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
   648         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
       
   649     }
       
   650     if (dictMode == ZSTD_dictMatchState) {
       
   651         /* dictMatchState repCode checks don't currently handle repCode == 0
       
   652          * disabling. */
       
   653         assert(offset_1 <= dictAndPrefixLength);
       
   654         assert(offset_2 <= dictAndPrefixLength);
   488     }
   655     }
   489 
   656 
   490     /* Match Loop */
   657     /* Match Loop */
   491     while (ip < ilimit) {
   658     while (ip < ilimit) {
   492         size_t matchLength=0;
   659         size_t matchLength=0;
   493         size_t offset=0;
   660         size_t offset=0;
   494         const BYTE* start=ip+1;
   661         const BYTE* start=ip+1;
   495 
   662 
   496         /* check repCode */
   663         /* check repCode */
   497         if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
   664         if (dictMode == ZSTD_dictMatchState) {
   498             /* repcode : we take it */
   665             const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
       
   666             const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
       
   667                                 && repIndex < prefixLowestIndex) ?
       
   668                                    dictBase + (repIndex - dictIndexDelta) :
       
   669                                    base + repIndex;
       
   670             if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
       
   671                 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
       
   672                 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
       
   673                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
       
   674                 if (depth==0) goto _storeSequence;
       
   675             }
       
   676         }
       
   677         if ( dictMode == ZSTD_noDict
       
   678           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
   499             matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
   679             matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
   500             if (depth==0) goto _storeSequence;
   680             if (depth==0) goto _storeSequence;
   501         }
   681         }
   502 
   682 
   503         /* first search (depth 0) */
   683         /* first search (depth 0) */
   504         {   size_t offsetFound = 99999999;
   684         {   size_t offsetFound = 999999999;
   505             size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
   685             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
   506             if (ml2 > matchLength)
   686             if (ml2 > matchLength)
   507                 matchLength = ml2, start = ip, offset=offsetFound;
   687                 matchLength = ml2, start = ip, offset=offsetFound;
   508         }
   688         }
   509 
   689 
   510         if (matchLength < 4) {
   690         if (matchLength < 4) {
   514 
   694 
   515         /* let's try to find a better solution */
   695         /* let's try to find a better solution */
   516         if (depth>=1)
   696         if (depth>=1)
   517         while (ip<ilimit) {
   697         while (ip<ilimit) {
   518             ip ++;
   698             ip ++;
   519             if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
   699             if ( (dictMode == ZSTD_noDict)
       
   700               && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
   520                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
   701                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
   521                 int const gain2 = (int)(mlRep * 3);
   702                 int const gain2 = (int)(mlRep * 3);
   522                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
   703                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
   523                 if ((mlRep >= 4) && (gain2 > gain1))
   704                 if ((mlRep >= 4) && (gain2 > gain1))
   524                     matchLength = mlRep, offset = 0, start = ip;
   705                     matchLength = mlRep, offset = 0, start = ip;
   525             }
   706             }
   526             {   size_t offset2=99999999;
   707             if (dictMode == ZSTD_dictMatchState) {
   527                 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
   708                 const U32 repIndex = (U32)(ip - base) - offset_1;
       
   709                 const BYTE* repMatch = repIndex < prefixLowestIndex ?
       
   710                                dictBase + (repIndex - dictIndexDelta) :
       
   711                                base + repIndex;
       
   712                 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
       
   713                     && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
       
   714                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
       
   715                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
       
   716                     int const gain2 = (int)(mlRep * 3);
       
   717                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
       
   718                     if ((mlRep >= 4) && (gain2 > gain1))
       
   719                         matchLength = mlRep, offset = 0, start = ip;
       
   720                 }
       
   721             }
       
   722             {   size_t offset2=999999999;
       
   723                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   528                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   724                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   529                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
   725                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
   530                 if ((ml2 >= 4) && (gain2 > gain1)) {
   726                 if ((ml2 >= 4) && (gain2 > gain1)) {
   531                     matchLength = ml2, offset = offset2, start = ip;
   727                     matchLength = ml2, offset = offset2, start = ip;
   532                     continue;   /* search a better one */
   728                     continue;   /* search a better one */
   533             }   }
   729             }   }
   534 
   730 
   535             /* let's find an even better one */
   731             /* let's find an even better one */
   536             if ((depth==2) && (ip<ilimit)) {
   732             if ((depth==2) && (ip<ilimit)) {
   537                 ip ++;
   733                 ip ++;
   538                 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
   734                 if ( (dictMode == ZSTD_noDict)
   539                     size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
   735                   && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
   540                     int const gain2 = (int)(ml2 * 4);
   736                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
       
   737                     int const gain2 = (int)(mlRep * 4);
   541                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
   738                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
   542                     if ((ml2 >= 4) && (gain2 > gain1))
   739                     if ((mlRep >= 4) && (gain2 > gain1))
   543                         matchLength = ml2, offset = 0, start = ip;
   740                         matchLength = mlRep, offset = 0, start = ip;
   544                 }
   741                 }
   545                 {   size_t offset2=99999999;
   742                 if (dictMode == ZSTD_dictMatchState) {
   546                     size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
   743                     const U32 repIndex = (U32)(ip - base) - offset_1;
       
   744                     const BYTE* repMatch = repIndex < prefixLowestIndex ?
       
   745                                    dictBase + (repIndex - dictIndexDelta) :
       
   746                                    base + repIndex;
       
   747                     if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
       
   748                         && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
       
   749                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
       
   750                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
       
   751                         int const gain2 = (int)(mlRep * 4);
       
   752                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
       
   753                         if ((mlRep >= 4) && (gain2 > gain1))
       
   754                             matchLength = mlRep, offset = 0, start = ip;
       
   755                     }
       
   756                 }
       
   757                 {   size_t offset2=999999999;
       
   758                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   547                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   759                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   548                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
   760                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
   549                     if ((ml2 >= 4) && (gain2 > gain1)) {
   761                     if ((ml2 >= 4) && (gain2 > gain1)) {
   550                         matchLength = ml2, offset = offset2, start = ip;
   762                         matchLength = ml2, offset = offset2, start = ip;
   551                         continue;
   763                         continue;
   558          * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
   770          * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
   559          * overflows the pointer, which is undefined behavior.
   771          * overflows the pointer, which is undefined behavior.
   560          */
   772          */
   561         /* catch up */
   773         /* catch up */
   562         if (offset) {
   774         if (offset) {
   563             while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
   775             if (dictMode == ZSTD_noDict) {
   564                  && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
   776                 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
   565                 { start--; matchLength++; }
   777                      && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
       
   778                     { start--; matchLength++; }
       
   779             }
       
   780             if (dictMode == ZSTD_dictMatchState) {
       
   781                 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
       
   782                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
       
   783                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
       
   784                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
       
   785             }
   566             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
   786             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
   567         }
   787         }
   568         /* store sequence */
   788         /* store sequence */
   569 _storeSequence:
   789 _storeSequence:
   570         {   size_t const litLength = start - anchor;
   790         {   size_t const litLength = start - anchor;
   571             ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
   791             ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
   572             anchor = ip = start + matchLength;
   792             anchor = ip = start + matchLength;
   573         }
   793         }
   574 
   794 
   575         /* check immediate repcode */
   795         /* check immediate repcode */
   576         while ( ((ip <= ilimit) & (offset_2>0))
   796         if (dictMode == ZSTD_dictMatchState) {
   577              && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
   797             while (ip <= ilimit) {
   578             /* store sequence */
   798                 U32 const current2 = (U32)(ip-base);
   579             matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
   799                 U32 const repIndex = current2 - offset_2;
   580             offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
   800                 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
   581             ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
   801                     && repIndex < prefixLowestIndex ?
   582             ip += matchLength;
   802                         dictBase - dictIndexDelta + repIndex :
   583             anchor = ip;
   803                         base + repIndex;
   584             continue;   /* faster when present ... (?) */
   804                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
   585     }   }
   805                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
       
   806                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
       
   807                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
       
   808                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
       
   809                     ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
       
   810                     ip += matchLength;
       
   811                     anchor = ip;
       
   812                     continue;
       
   813                 }
       
   814                 break;
       
   815             }
       
   816         }
       
   817 
       
   818         if (dictMode == ZSTD_noDict) {
       
   819             while ( ((ip <= ilimit) & (offset_2>0))
       
   820                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
       
   821                 /* store sequence */
       
   822                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
       
   823                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
       
   824                 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
       
   825                 ip += matchLength;
       
   826                 anchor = ip;
       
   827                 continue;   /* faster when present ... (?) */
       
   828     }   }   }
   586 
   829 
   587     /* Save reps for next block */
   830     /* Save reps for next block */
   588     rep[0] = offset_1 ? offset_1 : savedOffset;
   831     rep[0] = offset_1 ? offset_1 : savedOffset;
   589     rep[1] = offset_2 ? offset_2 : savedOffset;
   832     rep[1] = offset_2 ? offset_2 : savedOffset;
   590 
   833 
   593 }
   836 }
   594 
   837 
   595 
   838 
   596 size_t ZSTD_compressBlock_btlazy2(
   839 size_t ZSTD_compressBlock_btlazy2(
   597         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   840         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   598         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
   841         void const* src, size_t srcSize)
   599 {
   842 {
   600     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
   843     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
   601 }
   844 }
   602 
   845 
   603 size_t ZSTD_compressBlock_lazy2(
   846 size_t ZSTD_compressBlock_lazy2(
   604         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   847         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   605         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
   848         void const* src, size_t srcSize)
   606 {
   849 {
   607     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
   850     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
   608 }
   851 }
   609 
   852 
   610 size_t ZSTD_compressBlock_lazy(
   853 size_t ZSTD_compressBlock_lazy(
   611         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   854         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   612         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
   855         void const* src, size_t srcSize)
   613 {
   856 {
   614     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
   857     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
   615 }
   858 }
   616 
   859 
   617 size_t ZSTD_compressBlock_greedy(
   860 size_t ZSTD_compressBlock_greedy(
   618         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   861         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   619         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
   862         void const* src, size_t srcSize)
   620 {
   863 {
   621     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
   864     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
       
   865 }
       
   866 
       
   867 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
       
   868         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
       
   869         void const* src, size_t srcSize)
       
   870 {
       
   871     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
       
   872 }
       
   873 
       
   874 size_t ZSTD_compressBlock_lazy2_dictMatchState(
       
   875         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
       
   876         void const* src, size_t srcSize)
       
   877 {
       
   878     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
       
   879 }
       
   880 
       
   881 size_t ZSTD_compressBlock_lazy_dictMatchState(
       
   882         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
       
   883         void const* src, size_t srcSize)
       
   884 {
       
   885     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
       
   886 }
       
   887 
       
   888 size_t ZSTD_compressBlock_greedy_dictMatchState(
       
   889         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
       
   890         void const* src, size_t srcSize)
       
   891 {
       
   892     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
   622 }
   893 }
   623 
   894 
   624 
   895 
   625 FORCE_INLINE_TEMPLATE
   896 FORCE_INLINE_TEMPLATE
   626 size_t ZSTD_compressBlock_lazy_extDict_generic(
   897 size_t ZSTD_compressBlock_lazy_extDict_generic(
   627                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
   898                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
   628                         U32 rep[ZSTD_REP_NUM],
   899                         U32 rep[ZSTD_REP_NUM],
   629                         ZSTD_compressionParameters const* cParams,
       
   630                         const void* src, size_t srcSize,
   900                         const void* src, size_t srcSize,
   631                         const U32 searchMethod, const U32 depth)
   901                         const U32 searchMethod, const U32 depth)
   632 {
   902 {
   633     const BYTE* const istart = (const BYTE*)src;
   903     const BYTE* const istart = (const BYTE*)src;
   634     const BYTE* ip = istart;
   904     const BYTE* ip = istart;
   642     const BYTE* const dictBase = ms->window.dictBase;
   912     const BYTE* const dictBase = ms->window.dictBase;
   643     const BYTE* const dictEnd  = dictBase + dictLimit;
   913     const BYTE* const dictEnd  = dictBase + dictLimit;
   644     const BYTE* const dictStart  = dictBase + lowestIndex;
   914     const BYTE* const dictStart  = dictBase + lowestIndex;
   645 
   915 
   646     typedef size_t (*searchMax_f)(
   916     typedef size_t (*searchMax_f)(
   647                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
   917                         ZSTD_matchState_t* ms,
   648                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
   918                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
   649     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
   919     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
   650 
   920 
   651     U32 offset_1 = rep[0], offset_2 = rep[1];
   921     U32 offset_1 = rep[0], offset_2 = rep[1];
   652 
   922 
   653     /* init */
   923     /* init */
   654     ms->nextToUpdate3 = ms->nextToUpdate;
   924     ms->nextToUpdate3 = ms->nextToUpdate;
   672                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
   942                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
   673                 if (depth==0) goto _storeSequence;
   943                 if (depth==0) goto _storeSequence;
   674         }   }
   944         }   }
   675 
   945 
   676         /* first search (depth 0) */
   946         /* first search (depth 0) */
   677         {   size_t offsetFound = 99999999;
   947         {   size_t offsetFound = 999999999;
   678             size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
   948             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
   679             if (ml2 > matchLength)
   949             if (ml2 > matchLength)
   680                 matchLength = ml2, start = ip, offset=offsetFound;
   950                 matchLength = ml2, start = ip, offset=offsetFound;
   681         }
   951         }
   682 
   952 
   683          if (matchLength < 4) {
   953          if (matchLength < 4) {
   705                     if ((repLength >= 4) && (gain2 > gain1))
   975                     if ((repLength >= 4) && (gain2 > gain1))
   706                         matchLength = repLength, offset = 0, start = ip;
   976                         matchLength = repLength, offset = 0, start = ip;
   707             }   }
   977             }   }
   708 
   978 
   709             /* search match, depth 1 */
   979             /* search match, depth 1 */
   710             {   size_t offset2=99999999;
   980             {   size_t offset2=999999999;
   711                 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
   981                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   712                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   982                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   713                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
   983                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
   714                 if ((ml2 >= 4) && (gain2 > gain1)) {
   984                 if ((ml2 >= 4) && (gain2 > gain1)) {
   715                     matchLength = ml2, offset = offset2, start = ip;
   985                     matchLength = ml2, offset = offset2, start = ip;
   716                     continue;   /* search a better one */
   986                     continue;   /* search a better one */
   735                         if ((repLength >= 4) && (gain2 > gain1))
  1005                         if ((repLength >= 4) && (gain2 > gain1))
   736                             matchLength = repLength, offset = 0, start = ip;
  1006                             matchLength = repLength, offset = 0, start = ip;
   737                 }   }
  1007                 }   }
   738 
  1008 
   739                 /* search match, depth 2 */
  1009                 /* search match, depth 2 */
   740                 {   size_t offset2=99999999;
  1010                 {   size_t offset2=999999999;
   741                     size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
  1011                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
   742                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
  1012                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
   743                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
  1013                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
   744                     if ((ml2 >= 4) && (gain2 > gain1)) {
  1014                     if ((ml2 >= 4) && (gain2 > gain1)) {
   745                         matchLength = ml2, offset = offset2, start = ip;
  1015                         matchLength = ml2, offset = offset2, start = ip;
   746                         continue;
  1016                         continue;
   792 }
  1062 }
   793 
  1063 
   794 
  1064 
   795 size_t ZSTD_compressBlock_greedy_extDict(
  1065 size_t ZSTD_compressBlock_greedy_extDict(
   796         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1066         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   797         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  1067         void const* src, size_t srcSize)
   798 {
  1068 {
   799     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
  1069     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
   800 }
  1070 }
   801 
  1071 
   802 size_t ZSTD_compressBlock_lazy_extDict(
  1072 size_t ZSTD_compressBlock_lazy_extDict(
   803         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1073         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   804         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  1074         void const* src, size_t srcSize)
   805 
  1075 
   806 {
  1076 {
   807     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
  1077     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
   808 }
  1078 }
   809 
  1079 
   810 size_t ZSTD_compressBlock_lazy2_extDict(
  1080 size_t ZSTD_compressBlock_lazy2_extDict(
   811         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1081         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   812         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  1082         void const* src, size_t srcSize)
   813 
  1083 
   814 {
  1084 {
   815     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
  1085     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
   816 }
  1086 }
   817 
  1087 
   818 size_t ZSTD_compressBlock_btlazy2_extDict(
  1088 size_t ZSTD_compressBlock_btlazy2_extDict(
   819         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1089         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
   820         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
  1090         void const* src, size_t srcSize)
   821 
  1091 
   822 {
  1092 {
   823     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
  1093     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
   824 }
  1094 }