1 /* ******************************************************************
2 * FSE : Finite State Entropy encoder
3 * Copyright (c) Meta Platforms, Inc. and affiliates.
5 * You can contact the author at :
6 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c
9 * This source code is licensed under both the BSD-style license (found in the
10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 * in the COPYING file in the root directory of this source tree).
12 * You may select, at your option, one of the above-listed licenses.
13 ****************************************************************** */
15 /* **************************************************************
17 ****************************************************************/
18 #include "../common/compiler.h"
19 #include "../common/mem.h" /* U32, U16, etc. */
20 #include "../common/debug.h" /* assert, DEBUGLOG */
21 #include "hist.h" /* HIST_count_wksp */
22 #include "../common/bitstream.h"
23 #define FSE_STATIC_LINKING_ONLY
24 #include "../common/fse.h"
25 #include "../common/error_private.h"
26 #define ZSTD_DEPS_NEED_MALLOC
27 #define ZSTD_DEPS_NEED_MATH64
28 #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
29 #include "../common/bits.h" /* ZSTD_highbit32 */
32 /* **************************************************************
34 ****************************************************************/
35 #define FSE_isError ERR_isError
38 /* **************************************************************
40 ****************************************************************/
42 designed to be included
43 for type-specific functions (template emulation in C)
44 Objective is to write these functions only once, for improved maintenance
48 #ifndef FSE_FUNCTION_EXTENSION
49 # error "FSE_FUNCTION_EXTENSION must be defined"
51 #ifndef FSE_FUNCTION_TYPE
52 # error "FSE_FUNCTION_TYPE must be defined"
56 #define FSE_CAT(X,Y) X##Y
57 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
58 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
61 /* Function templates */
63 /* FSE_buildCTable_wksp() :
64 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
65 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
66 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
68 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
69 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
70 void* workSpace, size_t wkspSize)
72 U32 const tableSize = 1 << tableLog;
73 U32 const tableMask = tableSize - 1;
75 U16* const tableU16 = ( (U16*) ptr) + 2;
76 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
77 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
78 U32 const step = FSE_TABLESTEP(tableSize);
79 U32 const maxSV1 = maxSymbolValue+1;
81 U16* cumul = (U16*)workSpace; /* size = maxSV1 */
82 FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
84 U32 highThreshold = tableSize-1;
86 assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
87 if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
89 tableU16[-2] = (U16) tableLog;
90 tableU16[-1] = (U16) maxSymbolValue;
91 assert(tableLog < 16); /* required for threshold strategy to work */
93 /* For explanations on how to distribute symbol values over the table :
94 * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
96 #ifdef __clang_analyzer__
97 ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
100 /* symbol start positions */
103 for (u=1; u <= maxSV1; u++) {
104 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
105 cumul[u] = cumul[u-1] + 1;
106 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
108 assert(normalizedCounter[u-1] >= 0);
109 cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
110 assert(cumul[u] >= cumul[u-1]); /* no overflow */
112 cumul[maxSV1] = (U16)(tableSize+1);
116 if (highThreshold == tableSize - 1) {
117 /* Case for no low prob count symbols. Lay down 8 bytes at a time
118 * to reduce branch misses since we are operating on a small block
120 BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
121 { U64 const add = 0x0101010101010101ull;
125 for (s=0; s<maxSV1; ++s, sv += add) {
127 int const n = normalizedCounter[s];
128 MEM_write64(spread + pos, sv);
129 for (i = 8; i < n; i += 8) {
130 MEM_write64(spread + pos + i, sv);
136 /* Spread symbols across the table. Lack of lowprob symbols means that
137 * we don't need variable sized inner loop, so we can unroll the loop and
138 * reduce branch misses.
140 { size_t position = 0;
142 size_t const unroll = 2; /* Experimentally determined optimal unroll */
143 assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
144 for (s = 0; s < (size_t)tableSize; s += unroll) {
146 for (u = 0; u < unroll; ++u) {
147 size_t const uPosition = (position + (u * step)) & tableMask;
148 tableSymbol[uPosition] = spread[s + u];
150 position = (position + (unroll * step)) & tableMask;
152 assert(position == 0); /* Must have initialized all positions */
157 for (symbol=0; symbol<maxSV1; symbol++) {
159 int const freq = normalizedCounter[symbol];
160 for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
161 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
162 position = (position + step) & tableMask;
163 while (position > highThreshold)
164 position = (position + step) & tableMask; /* Low proba area */
166 assert(position==0); /* Must have initialized all positions */
170 { U32 u; for (u=0; u<tableSize; u++) {
171 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
172 tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
175 /* Build Symbol Transformation Table */
176 { unsigned total = 0;
178 for (s=0; s<=maxSymbolValue; s++) {
179 switch (normalizedCounter[s])
182 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
183 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
188 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
189 assert(total <= INT_MAX);
190 symbolTT[s].deltaFindState = (int)(total - 1);
194 assert(normalizedCounter[s] > 1);
195 { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
196 U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
197 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
198 symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
199 total += (unsigned)normalizedCounter[s];
202 #if 0 /* debug : symbol costs */
203 DEBUGLOG(5, "\n --- table statistics : ");
205 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
206 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
207 symbol, normalizedCounter[symbol],
208 FSE_getMaxNbBits(symbolTT, symbol),
209 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
218 #ifndef FSE_COMMONDEFS_ONLY
220 /*-**************************************************************
221 * FSE NCount encoding
222 ****************************************************************/
223 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
225 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
226 + 4 /* bitCount initialized at 4 */
227 + 2 /* first two symbols may use one additional bit each */) / 8)
228 + 1 /* round up to whole nb bytes */
229 + 2 /* additional two bytes for bitstream flush */;
230 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
234 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
235 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
236 unsigned writeIsSafe)
238 BYTE* const ostart = (BYTE*) header;
240 BYTE* const oend = ostart + headerBufferSize;
242 const int tableSize = 1 << tableLog;
248 unsigned const alphabetSize = maxSymbolValue + 1;
252 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
256 remaining = tableSize+1; /* +1 for extra accuracy */
257 threshold = tableSize;
260 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
262 unsigned start = symbol;
263 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
264 if (symbol == alphabetSize) break; /* incorrect distribution */
265 while (symbol >= start+24) {
267 bitStream += 0xFFFFU << bitCount;
268 if ((!writeIsSafe) && (out > oend-2))
269 return ERROR(dstSize_tooSmall); /* Buffer overflow */
270 out[0] = (BYTE) bitStream;
271 out[1] = (BYTE)(bitStream>>8);
275 while (symbol >= start+3) {
277 bitStream += 3 << bitCount;
280 bitStream += (symbol-start) << bitCount;
283 if ((!writeIsSafe) && (out > oend - 2))
284 return ERROR(dstSize_tooSmall); /* Buffer overflow */
285 out[0] = (BYTE)bitStream;
286 out[1] = (BYTE)(bitStream>>8);
291 { int count = normalizedCounter[symbol++];
292 int const max = (2*threshold-1) - remaining;
293 remaining -= count < 0 ? -count : count;
294 count++; /* +1 for extra accuracy */
295 if (count>=threshold)
296 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
297 bitStream += count << bitCount;
299 bitCount -= (count<max);
300 previousIs0 = (count==1);
301 if (remaining<1) return ERROR(GENERIC);
302 while (remaining<threshold) { nbBits--; threshold>>=1; }
305 if ((!writeIsSafe) && (out > oend - 2))
306 return ERROR(dstSize_tooSmall); /* Buffer overflow */
307 out[0] = (BYTE)bitStream;
308 out[1] = (BYTE)(bitStream>>8);
315 return ERROR(GENERIC); /* incorrect normalized distribution */
316 assert(symbol <= alphabetSize);
318 /* flush remaining bitStream */
319 if ((!writeIsSafe) && (out > oend - 2))
320 return ERROR(dstSize_tooSmall); /* Buffer overflow */
321 out[0] = (BYTE)bitStream;
322 out[1] = (BYTE)(bitStream>>8);
323 out+= (bitCount+7) /8;
329 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
330 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
332 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
333 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
335 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
336 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
338 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
342 /*-**************************************************************
343 * FSE Compression Code
344 ****************************************************************/
346 /* provides the minimum logSize to safely represent a distribution */
347 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
349 U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
350 U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
351 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
352 assert(srcSize > 1); /* Not supported, RLE should be used instead */
356 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
358 U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
359 U32 tableLog = maxTableLog;
360 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
361 assert(srcSize > 1); /* Not supported, RLE should be used instead */
362 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
363 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
364 if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
365 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
366 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
370 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
372 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
375 /* Secondary normalization method.
376 To be used when primary method fails. */
378 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
380 short const NOT_YET_ASSIGNED = -2;
386 U32 const lowThreshold = (U32)(total >> tableLog);
387 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
389 for (s=0; s<=maxSymbolValue; s++) {
394 if (count[s] <= lowThreshold) {
395 norm[s] = lowProbCount;
400 if (count[s] <= lowOne) {
407 norm[s]=NOT_YET_ASSIGNED;
409 ToDistribute = (1 << tableLog) - distributed;
411 if (ToDistribute == 0)
414 if ((total / ToDistribute) > lowOne) {
415 /* risk of rounding to zero */
416 lowOne = (U32)((total * 3) / (ToDistribute * 2));
417 for (s=0; s<=maxSymbolValue; s++) {
418 if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
424 ToDistribute = (1 << tableLog) - distributed;
427 if (distributed == maxSymbolValue+1) {
428 /* all values are pretty poor;
429 probably incompressible data (should have already been detected);
430 find max, then give all remaining points to max */
431 U32 maxV = 0, maxC = 0;
432 for (s=0; s<=maxSymbolValue; s++)
433 if (count[s] > maxC) { maxV=s; maxC=count[s]; }
434 norm[maxV] += (short)ToDistribute;
439 /* all of the symbols were low enough for the lowOne or lowThreshold */
440 for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
441 if (norm[s] > 0) { ToDistribute--; norm[s]++; }
445 { U64 const vStepLog = 62 - tableLog;
446 U64 const mid = (1ULL << (vStepLog-1)) - 1;
447 U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
449 for (s=0; s<=maxSymbolValue; s++) {
450 if (norm[s]==NOT_YET_ASSIGNED) {
451 U64 const end = tmpTotal + (count[s] * rStep);
452 U32 const sStart = (U32)(tmpTotal >> vStepLog);
453 U32 const sEnd = (U32)(end >> vStepLog);
454 U32 const weight = sEnd - sStart;
456 return ERROR(GENERIC);
457 norm[s] = (short)weight;
464 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
465 const unsigned* count, size_t total,
466 unsigned maxSymbolValue, unsigned useLowProbCount)
469 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
470 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
471 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
472 if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
474 { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
475 short const lowProbCount = useLowProbCount ? -1 : 1;
476 U64 const scale = 62 - tableLog;
477 U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
478 U64 const vStep = 1ULL<<(scale-20);
479 int stillToDistribute = 1<<tableLog;
483 U32 lowThreshold = (U32)(total >> tableLog);
485 for (s=0; s<=maxSymbolValue; s++) {
486 if (count[s] == total) return 0; /* rle special case */
487 if (count[s] == 0) { normalizedCounter[s]=0; continue; }
488 if (count[s] <= lowThreshold) {
489 normalizedCounter[s] = lowProbCount;
492 short proba = (short)((count[s]*step) >> scale);
494 U64 restToBeat = vStep * rtbTable[proba];
495 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
497 if (proba > largestP) { largestP=proba; largest=s; }
498 normalizedCounter[s] = proba;
499 stillToDistribute -= proba;
501 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
502 /* corner case, need another normalization method */
503 size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
504 if (FSE_isError(errorCode)) return errorCode;
506 else normalizedCounter[largest] += (short)stillToDistribute;
510 { /* Print Table (debug) */
513 for (s=0; s<=maxSymbolValue; s++)
514 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
515 for (s=0; s<=maxSymbolValue; s++)
516 nTotal += abs(normalizedCounter[s]);
517 if (nTotal != (1U<<tableLog))
518 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
526 /* fake FSE_CTable, for rle input (always same symbol) */
527 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
530 U16* tableU16 = ( (U16*) ptr) + 2;
531 void* FSCTptr = (U32*)ptr + 2;
532 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
535 tableU16[-2] = (U16) 0;
536 tableU16[-1] = (U16) symbolValue;
540 tableU16[1] = 0; /* just in case */
542 /* Build Symbol Transformation Table */
543 symbolTT[symbolValue].deltaNbBits = 0;
544 symbolTT[symbolValue].deltaFindState = 0;
550 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
551 const void* src, size_t srcSize,
552 const FSE_CTable* ct, const unsigned fast)
554 const BYTE* const istart = (const BYTE*) src;
555 const BYTE* const iend = istart + srcSize;
559 FSE_CState_t CState1, CState2;
562 if (srcSize <= 2) return 0;
563 { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
564 if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
566 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
569 FSE_initCState2(&CState1, ct, *--ip);
570 FSE_initCState2(&CState2, ct, *--ip);
571 FSE_encodeSymbol(&bitC, &CState1, *--ip);
572 FSE_FLUSHBITS(&bitC);
574 FSE_initCState2(&CState2, ct, *--ip);
575 FSE_initCState2(&CState1, ct, *--ip);
580 if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
581 FSE_encodeSymbol(&bitC, &CState2, *--ip);
582 FSE_encodeSymbol(&bitC, &CState1, *--ip);
583 FSE_FLUSHBITS(&bitC);
586 /* 2 or 4 encoding per loop */
587 while ( ip>istart ) {
589 FSE_encodeSymbol(&bitC, &CState2, *--ip);
591 if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
592 FSE_FLUSHBITS(&bitC);
594 FSE_encodeSymbol(&bitC, &CState1, *--ip);
596 if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
597 FSE_encodeSymbol(&bitC, &CState2, *--ip);
598 FSE_encodeSymbol(&bitC, &CState1, *--ip);
601 FSE_FLUSHBITS(&bitC);
604 FSE_flushCState(&bitC, &CState2);
605 FSE_flushCState(&bitC, &CState1);
606 return BIT_closeCStream(&bitC);
609 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
610 const void* src, size_t srcSize,
611 const FSE_CTable* ct)
613 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
616 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
618 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
622 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
624 #endif /* FSE_COMMONDEFS_ONLY */