git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / lib / compress / zstd_opt.c
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include "zstd_compress_internal.h"
12#include "hist.h"
13#include "zstd_opt.h"
14
f535537f 15#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
648db22b 18
19#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20#define ZSTD_MAX_PRICE (1<<30)
21
22#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23
24
25/*-*************************************
26* Price functions for optimal parser
27***************************************/
28
29#if 0 /* approximation at bit level (for tests) */
30# define BITCOST_ACCURACY 0
31# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33#elif 0 /* fractional bit accuracy (for tests) */
34# define BITCOST_ACCURACY 8
35# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37#else /* opt==approx, ultra==accurate */
38# define BITCOST_ACCURACY 8
39# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41#endif
42
43/* ZSTD_bitWeight() :
44 * provide estimated "cost" of a stat in full bits only */
45MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46{
47 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48}
49
50/* ZSTD_fracWeight() :
51 * provide fractional-bit "cost" of a stat,
52 * using linear interpolation approximation */
53MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54{
55 U32 const stat = rawStat + 1;
56 U32 const hb = ZSTD_highbit32(stat);
57 U32 const BWeight = hb * BITCOST_MULTIPLIER;
58 /* Fweight was meant for "Fractional weight"
59 * but it's effectively a value between 1 and 2
60 * using fixed point arithmetic */
61 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62 U32 const weight = BWeight + FWeight;
63 assert(hb + BITCOST_ACCURACY < 31);
64 return weight;
65}
66
67#if (DEBUGLEVEL>=2)
68/* debugging function,
69 * @return price in bytes as fractional value
70 * for debug messages only */
71MEM_STATIC double ZSTD_fCost(int price)
72{
73 return (double)price / (BITCOST_MULTIPLIER*8);
74}
75#endif
76
77static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78{
79 return optPtr->literalCompressionMode != ZSTD_ps_disable;
80}
81
82static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83{
84 if (ZSTD_compressedLiterals(optPtr))
85 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89}
90
91
92static U32 sum_u32(const unsigned table[], size_t nbElts)
93{
94 size_t n;
95 U32 total = 0;
96 for (n=0; n<nbElts; n++) {
97 total += table[n];
98 }
99 return total;
100}
101
102typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
104static U32
105ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106{
107 U32 s, sum=0;
108 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109 (unsigned)lastEltIndex+1, (unsigned)shift );
110 assert(shift < 30);
111 for (s=0; s<lastEltIndex+1; s++) {
112 unsigned const base = base1 ? 1 : (table[s]>0);
113 unsigned const newStat = base + (table[s] >> shift);
114 sum += newStat;
115 table[s] = newStat;
116 }
117 return sum;
118}
119
120/* ZSTD_scaleStats() :
121 * reduce all elt frequencies in table if sum too large
122 * return the resulting sum of elements */
123static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124{
125 U32 const prevsum = sum_u32(table, lastEltIndex+1);
126 U32 const factor = prevsum >> logTarget;
127 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128 assert(logTarget < 30);
129 if (factor <= 1) return prevsum;
130 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131}
132
133/* ZSTD_rescaleFreqs() :
134 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135 * take hints from dictionary if there is one
136 * and init from zero if there is none,
137 * using src for literals stats, and baseline stats for sequence symbols
138 * otherwise downscale existing stats, to be used as seed for next block.
139 */
140static void
141ZSTD_rescaleFreqs(optState_t* const optPtr,
142 const BYTE* const src, size_t const srcSize,
143 int const optLevel)
144{
145 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147 optPtr->priceType = zop_dynamic;
148
149 if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
150
151 /* heuristic: use pre-defined stats for too small inputs */
152 if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153 DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154 optPtr->priceType = zop_predef;
155 }
156
157 assert(optPtr->symbolCosts != NULL);
158 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159
160 /* huffman stats covering the full value set : table presumed generated by dictionary */
161 optPtr->priceType = zop_dynamic;
162
163 if (compressedLiterals) {
164 /* generate literals statistics from huffman table */
165 unsigned lit;
166 assert(optPtr->litFreq != NULL);
167 optPtr->litSum = 0;
168 for (lit=0; lit<=MaxLit; lit++) {
169 U32 const scaleLog = 11; /* scale to 2K */
170 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171 assert(bitCost <= scaleLog);
172 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173 optPtr->litSum += optPtr->litFreq[lit];
174 } }
175
176 { unsigned ll;
177 FSE_CState_t llstate;
178 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179 optPtr->litLengthSum = 0;
180 for (ll=0; ll<=MaxLL; ll++) {
181 U32 const scaleLog = 10; /* scale to 1K */
182 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183 assert(bitCost < scaleLog);
184 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186 } }
187
188 { unsigned ml;
189 FSE_CState_t mlstate;
190 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191 optPtr->matchLengthSum = 0;
192 for (ml=0; ml<=MaxML; ml++) {
193 U32 const scaleLog = 10;
194 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195 assert(bitCost < scaleLog);
196 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198 } }
199
200 { unsigned of;
201 FSE_CState_t ofstate;
202 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203 optPtr->offCodeSum = 0;
204 for (of=0; of<=MaxOff; of++) {
205 U32 const scaleLog = 10;
206 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207 assert(bitCost < scaleLog);
208 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209 optPtr->offCodeSum += optPtr->offCodeFreq[of];
210 } }
211
212 } else { /* first block, no dictionary */
213
214 assert(optPtr->litFreq != NULL);
215 if (compressedLiterals) {
216 /* base initial cost of literals on direct frequency within src */
217 unsigned lit = MaxLit;
218 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
219 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220 }
221
222 { unsigned const baseLLfreqs[MaxLL+1] = {
223 4, 2, 1, 1, 1, 1, 1, 1,
224 1, 1, 1, 1, 1, 1, 1, 1,
225 1, 1, 1, 1, 1, 1, 1, 1,
226 1, 1, 1, 1, 1, 1, 1, 1,
227 1, 1, 1, 1
228 };
229 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231 }
232
233 { unsigned ml;
234 for (ml=0; ml<=MaxML; ml++)
235 optPtr->matchLengthFreq[ml] = 1;
236 }
237 optPtr->matchLengthSum = MaxML+1;
238
239 { unsigned const baseOFCfreqs[MaxOff+1] = {
240 6, 2, 1, 1, 2, 3, 4, 4,
241 4, 3, 2, 1, 1, 1, 1, 1,
242 1, 1, 1, 1, 1, 1, 1, 1,
243 1, 1, 1, 1, 1, 1, 1, 1
244 };
245 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247 }
248
249 }
250
251 } else { /* new block : scale down accumulated statistics */
252
253 if (compressedLiterals)
254 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258 }
259
260 ZSTD_setBasePrices(optPtr, optLevel);
261}
262
263/* ZSTD_rawLiteralsCost() :
264 * price of literals (only) in specified segment (which length can be 0).
265 * does not include price of literalLength symbol */
266static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267 const optState_t* const optPtr,
268 int optLevel)
269{
f535537f 270 DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
648db22b 271 if (litLength == 0) return 0;
272
273 if (!ZSTD_compressedLiterals(optPtr))
274 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
275
276 if (optPtr->priceType == zop_predef)
277 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
278
279 /* dynamic statistics */
280 { U32 price = optPtr->litSumBasePrice * litLength;
281 U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282 U32 u;
283 assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284 for (u=0; u < litLength; u++) {
285 U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286 if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287 price -= litPrice;
288 }
289 return price;
290 }
291}
292
293/* ZSTD_litLengthPrice() :
294 * cost of literalLength symbol */
295static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296{
297 assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298 if (optPtr->priceType == zop_predef)
299 return WEIGHT(litLength, optLevel);
300
301 /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302 * because it isn't representable in the zstd format.
303 * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304 * In such a case, the block would be all literals.
305 */
306 if (litLength == ZSTD_BLOCKSIZE_MAX)
307 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308
309 /* dynamic statistics */
310 { U32 const llCode = ZSTD_LLcode(litLength);
311 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312 + optPtr->litLengthSumBasePrice
313 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314 }
315}
316
317/* ZSTD_getMatchPrice() :
318 * Provides the cost of the match part (offset + matchLength) of a sequence.
319 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322 */
323FORCE_INLINE_TEMPLATE U32
324ZSTD_getMatchPrice(U32 const offBase,
325 U32 const matchLength,
326 const optState_t* const optPtr,
327 int const optLevel)
328{
329 U32 price;
330 U32 const offCode = ZSTD_highbit32(offBase);
331 U32 const mlBase = matchLength - MINMATCH;
332 assert(matchLength >= MINMATCH);
333
334 if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
335 return WEIGHT(mlBase, optLevel)
336 + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337
338 /* dynamic statistics */
339 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340 if ((optLevel<2) /*static*/ && offCode >= 20)
341 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342
343 /* match Length */
344 { U32 const mlCode = ZSTD_MLcode(mlBase);
345 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346 }
347
348 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349
350 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351 return price;
352}
353
354/* ZSTD_updateStats() :
355 * assumption : literals + litLength <= iend */
356static void ZSTD_updateStats(optState_t* const optPtr,
357 U32 litLength, const BYTE* literals,
358 U32 offBase, U32 matchLength)
359{
360 /* literals */
361 if (ZSTD_compressedLiterals(optPtr)) {
362 U32 u;
363 for (u=0; u < litLength; u++)
364 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366 }
367
368 /* literal Length */
369 { U32 const llCode = ZSTD_LLcode(litLength);
370 optPtr->litLengthFreq[llCode]++;
371 optPtr->litLengthSum++;
372 }
373
374 /* offset code : follows storeSeq() numeric representation */
375 { U32 const offCode = ZSTD_highbit32(offBase);
376 assert(offCode <= MaxOff);
377 optPtr->offCodeFreq[offCode]++;
378 optPtr->offCodeSum++;
379 }
380
381 /* match Length */
382 { U32 const mlBase = matchLength - MINMATCH;
383 U32 const mlCode = ZSTD_MLcode(mlBase);
384 optPtr->matchLengthFreq[mlCode]++;
385 optPtr->matchLengthSum++;
386 }
387}
388
389
390/* ZSTD_readMINMATCH() :
391 * function safe only for comparisons
392 * assumption : memPtr must be at least 4 bytes before end of buffer */
393MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394{
395 switch (length)
396 {
397 default :
398 case 4 : return MEM_read32(memPtr);
399 case 3 : if (MEM_isLittleEndian())
400 return MEM_read32(memPtr)<<8;
401 else
402 return MEM_read32(memPtr)>>8;
403 }
404}
405
406
407/* Update hashTable3 up to ip (excluded)
408 Assumption : always within prefix (i.e. not within extDict) */
f535537f 409static
410ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412 U32* nextToUpdate3,
413 const BYTE* const ip)
648db22b 414{
415 U32* const hashTable3 = ms->hashTable3;
416 U32 const hashLog3 = ms->hashLog3;
417 const BYTE* const base = ms->window.base;
418 U32 idx = *nextToUpdate3;
419 U32 const target = (U32)(ip - base);
420 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421 assert(hashLog3 > 0);
422
423 while(idx < target) {
424 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425 idx++;
426 }
427
428 *nextToUpdate3 = target;
429 return hashTable3[hash3];
430}
431
432
433/*-*************************************
434* Binary Tree search
435***************************************/
436/** ZSTD_insertBt1() : add one or multiple positions to tree.
437 * @param ip assumed <= iend-8 .
438 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439 * @return : nb of positions added */
f535537f 440static
441ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442U32 ZSTD_insertBt1(
648db22b 443 const ZSTD_matchState_t* ms,
444 const BYTE* const ip, const BYTE* const iend,
445 U32 const target,
446 U32 const mls, const int extDict)
447{
448 const ZSTD_compressionParameters* const cParams = &ms->cParams;
449 U32* const hashTable = ms->hashTable;
450 U32 const hashLog = cParams->hashLog;
451 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
452 U32* const bt = ms->chainTable;
453 U32 const btLog = cParams->chainLog - 1;
454 U32 const btMask = (1 << btLog) - 1;
455 U32 matchIndex = hashTable[h];
456 size_t commonLengthSmaller=0, commonLengthLarger=0;
457 const BYTE* const base = ms->window.base;
458 const BYTE* const dictBase = ms->window.dictBase;
459 const U32 dictLimit = ms->window.dictLimit;
460 const BYTE* const dictEnd = dictBase + dictLimit;
461 const BYTE* const prefixStart = base + dictLimit;
462 const BYTE* match;
463 const U32 curr = (U32)(ip-base);
464 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465 U32* smallerPtr = bt + 2*(curr&btMask);
466 U32* largerPtr = smallerPtr + 1;
467 U32 dummy32; /* to be nullified at the end */
468 /* windowLow is based on target because
469 * we only need positions that will be in the window at the end of the tree update.
470 */
471 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472 U32 matchEndIdx = curr+8+1;
473 size_t bestLength = 8;
474 U32 nbCompares = 1U << cParams->searchLog;
475#ifdef ZSTD_C_PREDICT
476 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478 predictedSmall += (predictedSmall>0);
479 predictedLarge += (predictedLarge>0);
480#endif /* ZSTD_C_PREDICT */
481
482 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483
484 assert(curr <= target);
485 assert(ip <= iend-8); /* required for h calculation */
486 hashTable[h] = curr; /* Update Hash Table */
487
488 assert(windowLow > 0);
489 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490 U32* const nextPtr = bt + 2*(matchIndex & btMask);
491 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
492 assert(matchIndex < curr);
493
494#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
495 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
496 if (matchIndex == predictedSmall) {
497 /* no need to check length, result known */
498 *smallerPtr = matchIndex;
499 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
500 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
501 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
502 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503 continue;
504 }
505 if (matchIndex == predictedLarge) {
506 *largerPtr = matchIndex;
507 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
508 largerPtr = nextPtr;
509 matchIndex = nextPtr[0];
510 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511 continue;
512 }
513#endif
514
515 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
517 match = base + matchIndex;
518 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519 } else {
520 match = dictBase + matchIndex;
521 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522 if (matchIndex+matchLength >= dictLimit)
523 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
524 }
525
526 if (matchLength > bestLength) {
527 bestLength = matchLength;
528 if (matchLength > matchEndIdx - matchIndex)
529 matchEndIdx = matchIndex + (U32)matchLength;
530 }
531
532 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
533 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534 }
535
536 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
537 /* match is smaller than current */
538 *smallerPtr = matchIndex; /* update smaller idx */
539 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
540 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
541 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
542 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
543 } else {
544 /* match is larger than current */
545 *largerPtr = matchIndex;
546 commonLengthLarger = matchLength;
547 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
548 largerPtr = nextPtr;
549 matchIndex = nextPtr[0];
550 } }
551
552 *smallerPtr = *largerPtr = 0;
553 { U32 positions = 0;
554 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
555 assert(matchEndIdx > curr + 8);
556 return MAX(positions, matchEndIdx - (curr + 8));
557 }
558}
559
560FORCE_INLINE_TEMPLATE
f535537f 561ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
648db22b 562void ZSTD_updateTree_internal(
563 ZSTD_matchState_t* ms,
564 const BYTE* const ip, const BYTE* const iend,
565 const U32 mls, const ZSTD_dictMode_e dictMode)
566{
567 const BYTE* const base = ms->window.base;
568 U32 const target = (U32)(ip - base);
569 U32 idx = ms->nextToUpdate;
f535537f 570 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
648db22b 571 idx, target, dictMode);
572
573 while(idx < target) {
574 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575 assert(idx < (U32)(idx + forward));
576 idx += forward;
577 }
578 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580 ms->nextToUpdate = target;
581}
582
583void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
584 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585}
586
f535537f 587FORCE_INLINE_TEMPLATE
588ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589U32
648db22b 590ZSTD_insertBtAndGetAllMatches (
591 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
592 ZSTD_matchState_t* ms,
593 U32* nextToUpdate3,
594 const BYTE* const ip, const BYTE* const iLimit,
595 const ZSTD_dictMode_e dictMode,
596 const U32 rep[ZSTD_REP_NUM],
597 const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598 const U32 lengthToBeat,
599 const U32 mls /* template */)
600{
601 const ZSTD_compressionParameters* const cParams = &ms->cParams;
602 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603 const BYTE* const base = ms->window.base;
604 U32 const curr = (U32)(ip-base);
605 U32 const hashLog = cParams->hashLog;
606 U32 const minMatch = (mls==3) ? 3 : 4;
607 U32* const hashTable = ms->hashTable;
608 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
609 U32 matchIndex = hashTable[h];
610 U32* const bt = ms->chainTable;
611 U32 const btLog = cParams->chainLog - 1;
612 U32 const btMask= (1U << btLog) - 1;
613 size_t commonLengthSmaller=0, commonLengthLarger=0;
614 const BYTE* const dictBase = ms->window.dictBase;
615 U32 const dictLimit = ms->window.dictLimit;
616 const BYTE* const dictEnd = dictBase + dictLimit;
617 const BYTE* const prefixStart = base + dictLimit;
618 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620 U32 const matchLow = windowLow ? windowLow : 1;
621 U32* smallerPtr = bt + 2*(curr&btMask);
622 U32* largerPtr = bt + 2*(curr&btMask) + 1;
623 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
624 U32 dummy32; /* to be nullified at the end */
625 U32 mnum = 0;
626 U32 nbCompares = 1U << cParams->searchLog;
627
628 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629 const ZSTD_compressionParameters* const dmsCParams =
630 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640
641 size_t bestLength = lengthToBeat-1;
642 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643
644 /* check repCode */
645 assert(ll0 <= 1); /* necessarily 1 or 0 */
646 { U32 const lastR = ZSTD_REP_NUM + ll0;
647 U32 repCode;
648 for (repCode = ll0; repCode < lastR; repCode++) {
649 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650 U32 const repIndex = curr - repOffset;
651 U32 repLen = 0;
652 assert(curr >= dictLimit);
653 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
654 /* We must validate the repcode offset because when we're using a dictionary the
655 * valid offset range shrinks when the dictionary goes out of bounds.
656 */
657 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659 }
660 } else { /* repIndex < dictLimit || repIndex >= curr */
661 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662 dmsBase + repIndex - dmsIndexDelta :
663 dictBase + repIndex;
664 assert(curr >= windowLow);
665 if ( dictMode == ZSTD_extDict
666 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
667 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
668 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670 }
671 if (dictMode == ZSTD_dictMatchState
672 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
673 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
674 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676 } }
677 /* save longer solution */
678 if (repLen > bestLength) {
679 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680 repCode, ll0, repOffset, repLen);
681 bestLength = repLen;
682 matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
683 matches[mnum].len = (U32)repLen;
684 mnum++;
685 if ( (repLen > sufficient_len)
686 | (ip+repLen == iLimit) ) { /* best possible */
687 return mnum;
688 } } } }
689
690 /* HC3 match finder */
691 if ((mls == 3) /*static*/ && (bestLength < mls)) {
692 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693 if ((matchIndex3 >= matchLow)
694 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695 size_t mlen;
696 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697 const BYTE* const match = base + matchIndex3;
698 mlen = ZSTD_count(ip, match, iLimit);
699 } else {
700 const BYTE* const match = dictBase + matchIndex3;
701 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702 }
703
704 /* save best solution */
705 if (mlen >= mls /* == 3 > bestLength */) {
706 DEBUGLOG(8, "found small match with hlog3, of length %u",
707 (U32)mlen);
708 bestLength = mlen;
709 assert(curr > matchIndex3);
710 assert(mnum==0); /* no prior solution */
711 matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712 matches[0].len = (U32)mlen;
713 mnum = 1;
714 if ( (mlen > sufficient_len) |
715 (ip+mlen == iLimit) ) { /* best possible length */
716 ms->nextToUpdate = curr+1; /* skip insertion */
717 return 1;
718 } } }
719 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720 } /* if (mls == 3) */
721
722 hashTable[h] = curr; /* Update Hash Table */
723
724 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725 U32* const nextPtr = bt + 2*(matchIndex & btMask);
726 const BYTE* match;
727 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
728 assert(curr > matchIndex);
729
730 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
732 match = base + matchIndex;
733 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
734 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735 } else {
736 match = dictBase + matchIndex;
737 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
738 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739 if (matchIndex+matchLength >= dictLimit)
740 match = base + matchIndex; /* prepare for match[matchLength] read */
741 }
742
743 if (matchLength > bestLength) {
744 DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746 assert(matchEndIdx > matchIndex);
747 if (matchLength > matchEndIdx - matchIndex)
748 matchEndIdx = matchIndex + (U32)matchLength;
749 bestLength = matchLength;
750 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751 matches[mnum].len = (U32)matchLength;
752 mnum++;
753 if ( (matchLength > ZSTD_OPT_NUM)
754 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757 } }
758
759 if (match[matchLength] < ip[matchLength]) {
760 /* match smaller than current */
761 *smallerPtr = matchIndex; /* update smaller idx */
762 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
763 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
764 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
765 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
766 } else {
767 *largerPtr = matchIndex;
768 commonLengthLarger = matchLength;
769 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
770 largerPtr = nextPtr;
771 matchIndex = nextPtr[0];
772 } }
773
774 *smallerPtr = *largerPtr = 0;
775
776 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777 if (dictMode == ZSTD_dictMatchState && nbCompares) {
778 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779 U32 dictMatchIndex = dms->hashTable[dmsH];
780 const U32* const dmsBt = dms->chainTable;
781 commonLengthSmaller = commonLengthLarger = 0;
782 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
785 const BYTE* match = dmsBase + dictMatchIndex;
786 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787 if (dictMatchIndex+matchLength >= dmsHighLimit)
788 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
789
790 if (matchLength > bestLength) {
791 matchIndex = dictMatchIndex + dmsIndexDelta;
792 DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793 (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794 if (matchLength > matchEndIdx - matchIndex)
795 matchEndIdx = matchIndex + (U32)matchLength;
796 bestLength = matchLength;
797 matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798 matches[mnum].len = (U32)matchLength;
799 mnum++;
800 if ( (matchLength > ZSTD_OPT_NUM)
801 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802 break; /* drop, to guarantee consistency (miss a little bit of compression) */
803 } }
804
805 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
806 if (match[matchLength] < ip[matchLength]) {
807 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
808 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
809 } else {
810 /* match is larger than current */
811 commonLengthLarger = matchLength;
812 dictMatchIndex = nextPtr[0];
813 } } } /* if (dictMode == ZSTD_dictMatchState) */
814
815 assert(matchEndIdx > curr+8);
816 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
817 return mnum;
818}
819
820typedef U32 (*ZSTD_getAllMatchesFn)(
821 ZSTD_match_t*,
822 ZSTD_matchState_t*,
823 U32*,
824 const BYTE*,
825 const BYTE*,
826 const U32 rep[ZSTD_REP_NUM],
827 U32 const ll0,
828 U32 const lengthToBeat);
829
f535537f 830FORCE_INLINE_TEMPLATE
831ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832U32 ZSTD_btGetAllMatches_internal(
648db22b 833 ZSTD_match_t* matches,
834 ZSTD_matchState_t* ms,
835 U32* nextToUpdate3,
836 const BYTE* ip,
837 const BYTE* const iHighLimit,
838 const U32 rep[ZSTD_REP_NUM],
839 U32 const ll0,
840 U32 const lengthToBeat,
841 const ZSTD_dictMode_e dictMode,
842 const U32 mls)
843{
844 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846 if (ip < ms->window.base + ms->nextToUpdate)
847 return 0; /* skipped area */
848 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850}
851
852#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
854#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
855 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
856 ZSTD_match_t* matches, \
857 ZSTD_matchState_t* ms, \
858 U32* nextToUpdate3, \
859 const BYTE* ip, \
860 const BYTE* const iHighLimit, \
861 const U32 rep[ZSTD_REP_NUM], \
862 U32 const ll0, \
863 U32 const lengthToBeat) \
864 { \
865 return ZSTD_btGetAllMatches_internal( \
866 matches, ms, nextToUpdate3, ip, iHighLimit, \
867 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868 }
869
870#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
871 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
872 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
873 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
874 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
876GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
880#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
881 { \
882 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
886 }
887
888static ZSTD_getAllMatchesFn
889ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
890{
891 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895 };
896 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897 assert((U32)dictMode < 3);
898 assert(mls - 3 < 4);
899 return getAllMatchesFns[(int)dictMode][mls - 3];
900}
901
902/*************************
903* LDM helper functions *
904*************************/
905
906/* Struct containing info needed to make decision about ldm inclusion */
907typedef struct {
908 rawSeqStore_t seqStore; /* External match candidates store for this block */
909 U32 startPosInBlock; /* Start position of the current match candidate */
910 U32 endPosInBlock; /* End position of the current match candidate */
911 U32 offset; /* Offset of the match candidate */
912} ZSTD_optLdm_t;
913
914/* ZSTD_optLdm_skipRawSeqStoreBytes():
915 * Moves forward in @rawSeqStore by @nbBytes,
916 * which will update the fields 'pos' and 'posInSequence'.
917 */
918static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
919{
920 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923 if (currPos >= currSeq.litLength + currSeq.matchLength) {
924 currPos -= currSeq.litLength + currSeq.matchLength;
925 rawSeqStore->pos++;
926 } else {
927 rawSeqStore->posInSequence = currPos;
928 break;
929 }
930 }
931 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932 rawSeqStore->posInSequence = 0;
933 }
934}
935
936/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937 * Calculates the beginning and end of the next match in the current block.
938 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939 */
940static void
941ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942 U32 blockBytesRemaining)
943{
944 rawSeq currSeq;
945 U32 currBlockEndPos;
946 U32 literalsBytesRemaining;
947 U32 matchBytesRemaining;
948
949 /* Setting match end position to MAX to ensure we never use an LDM during this block */
950 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951 optLdm->startPosInBlock = UINT_MAX;
952 optLdm->endPosInBlock = UINT_MAX;
953 return;
954 }
955 /* Calculate appropriate bytes left in matchLength and litLength
956 * after adjusting based on ldmSeqStore->posInSequence */
957 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959 currBlockEndPos = currPosInBlock + blockBytesRemaining;
960 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962 0;
963 matchBytesRemaining = (literalsBytesRemaining == 0) ?
964 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965 currSeq.matchLength;
966
967 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968 if (literalsBytesRemaining >= blockBytesRemaining) {
969 optLdm->startPosInBlock = UINT_MAX;
970 optLdm->endPosInBlock = UINT_MAX;
971 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972 return;
973 }
974
975 /* Matches may be < MINMATCH by this process. In that case, we will reject them
976 when we are deciding whether or not to add the ldm */
977 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979 optLdm->offset = currSeq.offset;
980
981 if (optLdm->endPosInBlock > currBlockEndPos) {
982 /* Match ends after the block ends, we can't use the whole match */
983 optLdm->endPosInBlock = currBlockEndPos;
984 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985 } else {
986 /* Consume nb of bytes equal to size of sequence left */
987 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988 }
989}
990
991/* ZSTD_optLdm_maybeAddMatch():
992 * Adds a match if it's long enough,
993 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994 * into 'matches'. Maintains the correct ordering of 'matches'.
995 */
996static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
998{
999 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1000 /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1001 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1002
1003 /* Ensure that current block position is not outside of the match */
1004 if (currPosInBlock < optLdm->startPosInBlock
1005 || currPosInBlock >= optLdm->endPosInBlock
1006 || candidateMatchLength < MINMATCH) {
1007 return;
1008 }
1009
1010 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1011 U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1012 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1013 candidateOffBase, candidateMatchLength, currPosInBlock);
1014 matches[*nbMatches].len = candidateMatchLength;
1015 matches[*nbMatches].off = candidateOffBase;
1016 (*nbMatches)++;
1017 }
1018}
1019
1020/* ZSTD_optLdm_processMatchCandidate():
1021 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1022 */
1023static void
1024ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1025 ZSTD_match_t* matches, U32* nbMatches,
1026 U32 currPosInBlock, U32 remainingBytes)
1027{
1028 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1029 return;
1030 }
1031
1032 if (currPosInBlock >= optLdm->endPosInBlock) {
1033 if (currPosInBlock > optLdm->endPosInBlock) {
1034 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1035 * at the end of a match from the ldm seq store, and will often be some bytes
1036 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1037 */
1038 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1039 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1040 }
1041 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1042 }
1043 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1044}
1045
1046
1047/*-*******************************
1048* Optimal parser
1049*********************************/
1050
648db22b 1051#if 0 /* debug */
1052
1053static void
1054listStats(const U32* table, int lastEltID)
1055{
1056 int const nbElts = lastEltID + 1;
1057 int enb;
1058 for (enb=0; enb < nbElts; enb++) {
1059 (void)table;
1060 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1061 RAWLOG(2, "%4i,", table[enb]);
1062 }
1063 RAWLOG(2, " \n");
1064}
1065
1066#endif
1067
f535537f 1068#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071
1072FORCE_INLINE_TEMPLATE
1073ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074size_t
648db22b 1075ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1076 seqStore_t* seqStore,
1077 U32 rep[ZSTD_REP_NUM],
1078 const void* src, size_t srcSize,
1079 const int optLevel,
1080 const ZSTD_dictMode_e dictMode)
1081{
1082 optState_t* const optStatePtr = &ms->opt;
1083 const BYTE* const istart = (const BYTE*)src;
1084 const BYTE* ip = istart;
1085 const BYTE* anchor = istart;
1086 const BYTE* const iend = istart + srcSize;
1087 const BYTE* const ilimit = iend - 8;
1088 const BYTE* const base = ms->window.base;
1089 const BYTE* const prefixStart = base + ms->window.dictLimit;
1090 const ZSTD_compressionParameters* const cParams = &ms->cParams;
1091
1092 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1093
1094 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1095 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1096 U32 nextToUpdate3 = ms->nextToUpdate;
1097
1098 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1099 ZSTD_match_t* const matches = optStatePtr->matchTable;
f535537f 1100 ZSTD_optimal_t lastStretch;
648db22b 1101 ZSTD_optLdm_t optLdm;
1102
f535537f 1103 ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
648db22b 1104
1105 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1106 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1107 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1108
1109 /* init */
1110 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1111 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1112 assert(optLevel <= 2);
1113 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1114 ip += (ip==prefixStart);
1115
1116 /* Match Loop */
1117 while (ip < ilimit) {
1118 U32 cur, last_pos = 0;
1119
1120 /* find first match */
1121 { U32 const litlen = (U32)(ip - anchor);
1122 U32 const ll0 = !litlen;
1123 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1124 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
f535537f 1125 (U32)(ip-istart), (U32)(iend-ip));
1126 if (!nbMatches) {
1127 DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128 ip++;
1129 continue;
1130 }
1131
1132 /* Match found: let's store this solution, and eventually find more candidates.
1133 * During this forward pass, @opt is used to store stretches,
1134 * defined as "a match followed by N literals".
1135 * Note how this is different from a Sequence, which is "N literals followed by a match".
1136 * Storing stretches allows us to store different match predecessors
1137 * for each literal position part of a literals run. */
648db22b 1138
1139 /* initialize opt[0] */
f535537f 1140 opt[0].mlen = 0; /* there are only literals so far */
648db22b 1141 opt[0].litlen = litlen;
f535537f 1142 /* No need to include the actual price of the literals before the first match
1143 * because it is static for the duration of the forward pass, and is included
1144 * in every subsequent price. But, we include the literal length because
1145 * the cost variation of litlen depends on the value of litlen.
648db22b 1146 */
f535537f 1147 opt[0].price = LL_PRICE(litlen);
1148 ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149 ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
648db22b 1150
1151 /* large match -> immediate encoding */
1152 { U32 const maxML = matches[nbMatches-1].len;
1153 U32 const maxOffBase = matches[nbMatches-1].off;
1154 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1155 nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1156
1157 if (maxML > sufficient_len) {
f535537f 1158 lastStretch.litlen = 0;
1159 lastStretch.mlen = maxML;
1160 lastStretch.off = maxOffBase;
1161 DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
648db22b 1162 maxML, sufficient_len);
1163 cur = 0;
f535537f 1164 last_pos = maxML;
648db22b 1165 goto _shortestPath;
1166 } }
1167
1168 /* set prices for first matches starting position == 0 */
1169 assert(opt[0].price >= 0);
f535537f 1170 { U32 pos;
648db22b 1171 U32 matchNb;
1172 for (pos = 1; pos < minMatch; pos++) {
f535537f 1173 opt[pos].price = ZSTD_MAX_PRICE;
1174 opt[pos].mlen = 0;
1175 opt[pos].litlen = litlen + pos;
648db22b 1176 }
1177 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1178 U32 const offBase = matches[matchNb].off;
1179 U32 const end = matches[matchNb].len;
1180 for ( ; pos <= end ; pos++ ) {
f535537f 1181 int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182 int const sequencePrice = opt[0].price + matchPrice;
648db22b 1183 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
f535537f 1184 pos, ZSTD_fCost(sequencePrice));
648db22b 1185 opt[pos].mlen = pos;
1186 opt[pos].off = offBase;
f535537f 1187 opt[pos].litlen = 0; /* end of match */
1188 opt[pos].price = sequencePrice + LL_PRICE(0);
1189 }
1190 }
648db22b 1191 last_pos = pos-1;
f535537f 1192 opt[pos].price = ZSTD_MAX_PRICE;
648db22b 1193 }
1194 }
1195
1196 /* check further positions */
1197 for (cur = 1; cur <= last_pos; cur++) {
1198 const BYTE* const inr = ip + cur;
f535537f 1199 assert(cur <= ZSTD_OPT_NUM);
1200 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
648db22b 1201
1202 /* Fix current position with one literal if cheaper */
f535537f 1203 { U32 const litlen = opt[cur-1].litlen + 1;
648db22b 1204 int const price = opt[cur-1].price
f535537f 1205 + LIT_PRICE(ip+cur-1)
1206 + LL_INCPRICE(litlen);
648db22b 1207 assert(price < 1000000000); /* overflow check */
1208 if (price <= opt[cur].price) {
f535537f 1209 ZSTD_optimal_t const prevMatch = opt[cur];
648db22b 1210 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1211 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1212 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
f535537f 1213 opt[cur] = opt[cur-1];
648db22b 1214 opt[cur].litlen = litlen;
1215 opt[cur].price = price;
f535537f 1216 if ( (optLevel >= 1) /* additional check only for higher modes */
1217 && (prevMatch.litlen == 0) /* replace a match */
1218 && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219 && LIKELY(ip + cur < iend)
1220 ) {
1221 /* check next position, in case it would be cheaper */
1222 int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223 int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224 DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225 cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226 if ( (with1literal < withMoreLiterals)
1227 && (with1literal < opt[cur+1].price) ) {
1228 /* update offset history - before it disappears */
1229 U32 const prev = cur - prevMatch.mlen;
1230 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231 assert(cur >= prevMatch.mlen);
1232 DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233 ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234 newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235 opt[cur+1] = prevMatch; /* mlen & offbase */
1236 ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237 opt[cur+1].litlen = 1;
1238 opt[cur+1].price = with1literal;
1239 if (last_pos < cur+1) last_pos = cur+1;
1240 }
1241 }
648db22b 1242 } else {
f535537f 1243 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
648db22b 1245 }
1246 }
1247
f535537f 1248 /* Offset history is not updated during match comparison.
1249 * Do it here, now that the match is selected and confirmed.
648db22b 1250 */
1251 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1252 assert(cur >= opt[cur].mlen);
f535537f 1253 if (opt[cur].litlen == 0) {
1254 /* just finished a match => alter offset history */
648db22b 1255 U32 const prev = cur - opt[cur].mlen;
f535537f 1256 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
648db22b 1257 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
648db22b 1258 }
1259
1260 /* last match must start at a minimum distance of 8 from oend */
1261 if (inr > ilimit) continue;
1262
1263 if (cur == last_pos) break;
1264
1265 if ( (optLevel==0) /*static_test*/
1266 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
f535537f 1267 DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
648db22b 1268 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1269 }
1270
1271 assert(opt[cur].price >= 0);
f535537f 1272 { U32 const ll0 = (opt[cur].litlen == 0);
1273 int const previousPrice = opt[cur].price;
1274 int const basePrice = previousPrice + LL_PRICE(0);
648db22b 1275 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1276 U32 matchNb;
1277
1278 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1279 (U32)(inr-istart), (U32)(iend-inr));
1280
1281 if (!nbMatches) {
1282 DEBUGLOG(7, "rPos:%u : no match found", cur);
1283 continue;
1284 }
1285
f535537f 1286 { U32 const longestML = matches[nbMatches-1].len;
1287 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288 inr-istart, cur, nbMatches, longestML);
1289
1290 if ( (longestML > sufficient_len)
1291 || (cur + longestML >= ZSTD_OPT_NUM)
1292 || (ip + cur + longestML >= iend) ) {
1293 lastStretch.mlen = longestML;
1294 lastStretch.off = matches[nbMatches-1].off;
1295 lastStretch.litlen = 0;
1296 last_pos = cur + longestML;
648db22b 1297 goto _shortestPath;
1298 } }
1299
1300 /* set prices using matches found at position == cur */
1301 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1302 U32 const offset = matches[matchNb].off;
1303 U32 const lastML = matches[matchNb].len;
1304 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1305 U32 mlen;
1306
1307 DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
f535537f 1308 matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
648db22b 1309
1310 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1311 U32 const pos = cur + mlen;
f535537f 1312 int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
648db22b 1313
1314 if ((pos > last_pos) || (price < opt[pos].price)) {
1315 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1316 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
f535537f 1317 while (last_pos < pos) {
1318 /* fill empty positions, for future comparisons */
1319 last_pos++;
1320 opt[last_pos].price = ZSTD_MAX_PRICE;
1321 opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1322 }
648db22b 1323 opt[pos].mlen = mlen;
1324 opt[pos].off = offset;
f535537f 1325 opt[pos].litlen = 0;
648db22b 1326 opt[pos].price = price;
1327 } else {
1328 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1329 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1330 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1331 }
1332 } } }
f535537f 1333 opt[last_pos+1].price = ZSTD_MAX_PRICE;
648db22b 1334 } /* for (cur = 1; cur <= last_pos; cur++) */
1335
f535537f 1336 lastStretch = opt[last_pos];
1337 assert(cur >= lastStretch.mlen);
1338 cur = last_pos - lastStretch.mlen;
648db22b 1339
1340_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1341 assert(opt[0].mlen == 0);
f535537f 1342 assert(last_pos >= lastStretch.mlen);
1343 assert(cur == last_pos - lastStretch.mlen);
648db22b 1344
f535537f 1345 if (lastStretch.mlen==0) {
1346 /* no solution : all matches have been converted into literals */
1347 assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348 ip += last_pos;
1349 continue;
1350 }
1351 assert(lastStretch.off > 0);
1352
1353 /* Update offset history */
1354 if (lastStretch.litlen == 0) {
1355 /* finishing on a match : update offset history */
1356 repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357 ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
648db22b 1358 } else {
f535537f 1359 ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360 assert(cur >= lastStretch.litlen);
1361 cur -= lastStretch.litlen;
648db22b 1362 }
1363
f535537f 1364 /* Let's write the shortest path solution.
1365 * It is stored in @opt in reverse order,
1366 * starting from @storeEnd (==cur+2),
1367 * effectively partially @opt overwriting.
1368 * Content is changed too:
1369 * - So far, @opt stored stretches, aka a match followed by literals
1370 * - Now, it will store sequences, aka literals followed by a match
1371 */
1372 { U32 const storeEnd = cur + 2;
648db22b 1373 U32 storeStart = storeEnd;
f535537f 1374 U32 stretchPos = cur;
648db22b 1375
1376 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1377 last_pos, cur); (void)last_pos;
f535537f 1378 assert(storeEnd < ZSTD_OPT_SIZE);
1379 DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380 storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381 if (lastStretch.litlen > 0) {
1382 /* last "sequence" is unfinished: just a bunch of literals */
1383 opt[storeEnd].litlen = lastStretch.litlen;
1384 opt[storeEnd].mlen = 0;
1385 storeStart = storeEnd-1;
1386 opt[storeStart] = lastStretch;
1387 } {
1388 opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1389 storeStart = storeEnd;
1390 }
1391 while (1) {
1392 ZSTD_optimal_t nextStretch = opt[stretchPos];
1393 opt[storeStart].litlen = nextStretch.litlen;
1394 DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395 opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396 if (nextStretch.mlen == 0) {
1397 /* reaching beginning of segment */
1398 break;
1399 }
648db22b 1400 storeStart--;
f535537f 1401 opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402 assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403 stretchPos -= nextStretch.litlen + nextStretch.mlen;
648db22b 1404 }
1405
1406 /* save sequences */
f535537f 1407 DEBUGLOG(6, "sending selected sequences into seqStore");
648db22b 1408 { U32 storePos;
1409 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1410 U32 const llen = opt[storePos].litlen;
1411 U32 const mlen = opt[storePos].mlen;
1412 U32 const offBase = opt[storePos].off;
1413 U32 const advance = llen + mlen;
1414 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1415 anchor - istart, (unsigned)llen, (unsigned)mlen);
1416
1417 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1418 assert(storePos == storeEnd); /* must be last sequence */
1419 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1420 continue; /* will finish */
1421 }
1422
1423 assert(anchor + llen <= iend);
1424 ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1425 ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1426 anchor += advance;
1427 ip = anchor;
1428 } }
f535537f 1429 DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430
1431 /* update all costs */
648db22b 1432 ZSTD_setBasePrices(optStatePtr, optLevel);
1433 }
1434 } /* while (ip < ilimit) */
1435
1436 /* Return the last literals size */
1437 return (size_t)(iend - anchor);
1438}
f535537f 1439#endif /* build exclusions */
648db22b 1440
f535537f 1441#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
648db22b 1442static size_t ZSTD_compressBlock_opt0(
1443 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1445{
1446 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1447}
f535537f 1448#endif
648db22b 1449
f535537f 1450#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
648db22b 1451static size_t ZSTD_compressBlock_opt2(
1452 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1453 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1454{
1455 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1456}
f535537f 1457#endif
648db22b 1458
f535537f 1459#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
648db22b 1460size_t ZSTD_compressBlock_btopt(
1461 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1462 const void* src, size_t srcSize)
1463{
1464 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1465 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1466}
f535537f 1467#endif
648db22b 1468
1469
1470
1471
f535537f 1472#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
648db22b 1473/* ZSTD_initStats_ultra():
1474 * make a first compression pass, just to seed stats with more accurate starting values.
1475 * only works on first block, with no dictionary and no ldm.
1476 * this function cannot error out, its narrow contract must be respected.
1477 */
f535537f 1478static
1479ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1480void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481 seqStore_t* seqStore,
1482 U32 rep[ZSTD_REP_NUM],
1483 const void* src, size_t srcSize)
648db22b 1484{
1485 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1486 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1487
1488 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1489 assert(ms->opt.litLengthSum == 0); /* first block */
1490 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1491 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1492 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1493
1494 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1495
1496 /* invalidate first scan from history, only keep entropy stats */
1497 ZSTD_resetSeqStore(seqStore);
1498 ms->window.base -= srcSize;
1499 ms->window.dictLimit += (U32)srcSize;
1500 ms->window.lowLimit = ms->window.dictLimit;
1501 ms->nextToUpdate = ms->window.dictLimit;
1502
1503}
1504
1505size_t ZSTD_compressBlock_btultra(
1506 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1507 const void* src, size_t srcSize)
1508{
1509 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1510 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1511}
1512
1513size_t ZSTD_compressBlock_btultra2(
1514 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1515 const void* src, size_t srcSize)
1516{
1517 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1518 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1519
1520 /* 2-passes strategy:
1521 * this strategy makes a first pass over first block to collect statistics
1522 * in order to seed next round's statistics with it.
1523 * After 1st pass, function forgets history, and starts a new block.
1524 * Consequently, this can only work if no data has been previously loaded in tables,
1525 * aka, no dictionary, no prefix, no ldm preprocessing.
1526 * The compression ratio gain is generally small (~0.5% on first block),
f535537f 1527 * the cost is 2x cpu time on first block. */
648db22b 1528 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1529 if ( (ms->opt.litLengthSum==0) /* first block */
1530 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1531 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1532 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1533 && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1534 ) {
1535 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1536 }
1537
1538 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1539}
f535537f 1540#endif
648db22b 1541
f535537f 1542#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
648db22b 1543size_t ZSTD_compressBlock_btopt_dictMatchState(
1544 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1545 const void* src, size_t srcSize)
1546{
1547 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1548}
1549
f535537f 1550size_t ZSTD_compressBlock_btopt_extDict(
648db22b 1551 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1552 const void* src, size_t srcSize)
1553{
f535537f 1554 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
648db22b 1555}
f535537f 1556#endif
648db22b 1557
f535537f 1558#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1559size_t ZSTD_compressBlock_btultra_dictMatchState(
648db22b 1560 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1561 const void* src, size_t srcSize)
1562{
f535537f 1563 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
648db22b 1564}
1565
1566size_t ZSTD_compressBlock_btultra_extDict(
1567 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1568 const void* src, size_t srcSize)
1569{
1570 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1571}
f535537f 1572#endif
648db22b 1573
1574/* note : no btultra2 variant for extDict nor dictMatchState,
1575 * because btultra2 is not meant to work with dictionaries
1576 * and is only specific for the first block (no prefix) */