2 * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
12 /******************************************
14 ******************************************/
15 #include <stddef.h> /* size_t, ptrdiff_t */
17 #include "../common/compiler.h"
18 #include "../common/error_private.h"
21 /******************************************
23 ******************************************/
24 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
25 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
27 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
28 #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
29 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
30 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
33 /******************************************
35 ******************************************/
36 #define FSE_LIST_ERRORS(ITEM) \
37 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
38 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
39 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
40 ITEM(FSE_ERROR_corruptionDetected) \
41 ITEM(FSE_ERROR_maxCode)
43 #define FSE_GENERATE_ENUM(ENUM) ENUM,
44 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
47 /******************************************
48 * FSE symbol compression API
49 ******************************************/
51 This API consists of small unitary functions, which highly benefit from being inlined.
52 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
53 Visual seems to do it automatically.
54 For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
55 If none of these solutions is applicable, include "fse.c" directly.
58 typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */
59 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
73 const void* stateTable;
81 unsigned bitsConsumed;
89 const void* table; /* precise table may vary, depending on U16 */
92 typedef enum { FSE_DStream_unfinished = 0,
93 FSE_DStream_endOfBuffer = 1,
94 FSE_DStream_completed = 2,
95 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
96 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
99 /****************************************************************
101 ****************************************************************/
103 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
104 * Increasing memory usage improves compression ratio
105 * Reduced memory usage can improve speed, due to cache effect
106 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
107 #define FSE_MAX_MEMORY_USAGE 14
108 #define FSE_DEFAULT_MEMORY_USAGE 13
110 /* FSE_MAX_SYMBOL_VALUE :
111 * Maximum symbol value authorized.
112 * Required for proper stack allocation */
113 #define FSE_MAX_SYMBOL_VALUE 255
116 /****************************************************************
117 * template functions type & suffix
118 ****************************************************************/
119 #define FSE_FUNCTION_TYPE BYTE
120 #define FSE_FUNCTION_EXTENSION
123 /****************************************************************
125 ****************************************************************/
128 unsigned short newState;
129 unsigned char symbol;
130 unsigned char nbBits;
131 } FSE_decode_t; /* size == U32 */
135 /****************************************************************
137 ****************************************************************/
138 #ifdef _MSC_VER /* Visual Studio */
139 # define FORCE_INLINE static __forceinline
140 # include <intrin.h> /* For Visual 2005 */
141 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
142 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
144 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
145 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
147 # define FORCE_INLINE static inline __attribute__((always_inline))
149 # define FORCE_INLINE static inline
152 # define FORCE_INLINE static
153 # endif /* __STDC_VERSION__ */
157 /****************************************************************
159 ****************************************************************/
160 #include <stdlib.h> /* malloc, free, qsort */
161 #include <string.h> /* memcpy, memset */
162 #include <stdio.h> /* printf (debug) */
165 #ifndef MEM_ACCESS_MODULE
166 #define MEM_ACCESS_MODULE
167 /****************************************************************
169 *****************************************************************/
170 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
172 typedef uint8_t BYTE;
173 typedef uint16_t U16;
175 typedef uint32_t U32;
177 typedef uint64_t U64;
180 typedef unsigned char BYTE;
181 typedef unsigned short U16;
182 typedef signed short S16;
183 typedef unsigned int U32;
184 typedef signed int S32;
185 typedef unsigned long long U64;
186 typedef signed long long S64;
189 #endif /* MEM_ACCESS_MODULE */
191 /****************************************************************
193 *****************************************************************/
195 static unsigned FSE_32bits(void)
197 return sizeof(void*)==4;
200 static unsigned FSE_isLittleEndian(void)
202 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
206 static U16 FSE_read16(const void* memPtr)
208 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
211 static U32 FSE_read32(const void* memPtr)
213 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
216 static U64 FSE_read64(const void* memPtr)
218 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
221 static U16 FSE_readLE16(const void* memPtr)
223 if (FSE_isLittleEndian())
224 return FSE_read16(memPtr);
227 const BYTE* p = (const BYTE*)memPtr;
228 return (U16)(p[0] + (p[1]<<8));
232 static U32 FSE_readLE32(const void* memPtr)
234 if (FSE_isLittleEndian())
235 return FSE_read32(memPtr);
238 const BYTE* p = (const BYTE*)memPtr;
239 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
244 static U64 FSE_readLE64(const void* memPtr)
246 if (FSE_isLittleEndian())
247 return FSE_read64(memPtr);
250 const BYTE* p = (const BYTE*)memPtr;
251 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
252 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
256 static size_t FSE_readLEST(const void* memPtr)
259 return (size_t)FSE_readLE32(memPtr);
261 return (size_t)FSE_readLE64(memPtr);
266 /****************************************************************
268 *****************************************************************/
269 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
270 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
271 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
272 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
273 #define FSE_MIN_TABLELOG 5
275 #define FSE_TABLELOG_ABSOLUTE_MAX 15
276 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
277 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
281 /****************************************************************
283 ****************************************************************/
284 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
287 /****************************************************************
289 ****************************************************************/
294 } FSE_symbolCompressionTransform; /* total 8 bytes */
296 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
298 /****************************************************************
300 ****************************************************************/
301 FORCE_INLINE unsigned FSE_highbit32 (U32 val)
303 # if defined(_MSC_VER) /* Visual */
305 return _BitScanReverse(&r, val) ? (unsigned)r : 0;
306 # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
307 return __builtin_clz (val) ^ 31;
308 # else /* Software version */
309 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
317 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
323 /****************************************************************
325 ****************************************************************/
327 designed to be included
328 for type-specific functions (template emulation in C)
329 Objective is to write these functions only once, for improved maintenance
333 #ifndef FSE_FUNCTION_EXTENSION
334 # error "FSE_FUNCTION_EXTENSION must be defined"
336 #ifndef FSE_FUNCTION_TYPE
337 # error "FSE_FUNCTION_TYPE must be defined"
341 #define FSE_CAT(X,Y) X##Y
342 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
343 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
347 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
349 #define FSE_DECODE_TYPE FSE_decode_t
355 } FSE_DTableHeader; /* sizeof U32 */
357 static size_t FSE_buildDTable
358 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
361 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
362 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */
363 const U32 tableSize = 1 << tableLog;
364 const U32 tableMask = tableSize-1;
365 const U32 step = FSE_tableStep(tableSize);
366 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
368 U32 highThreshold = tableSize-1;
369 const S16 largeLimit= (S16)(1 << (tableLog-1));
374 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
375 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
377 /* Init, lay down lowprob symbols */
378 DTableH[0].tableLog = (U16)tableLog;
379 for (s=0; s<=maxSymbolValue; s++)
381 if (normalizedCounter[s]==-1)
383 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
388 if (normalizedCounter[s] >= largeLimit) noLarge=0;
389 symbolNext[s] = normalizedCounter[s];
394 for (s=0; s<=maxSymbolValue; s++)
397 for (i=0; i<normalizedCounter[s]; i++)
399 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
400 position = (position + step) & tableMask;
401 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
405 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
407 /* Build Decoding table */
410 for (i=0; i<tableSize; i++)
412 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
413 U16 nextState = symbolNext[symbol]++;
414 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
415 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
419 DTableH->fastMode = (U16)noLarge;
424 /******************************************
426 ******************************************/
427 #ifndef FSE_COMMONDEFS_ONLY
429 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
431 static short FSE_abs(short a)
437 /****************************************************************
438 * Header bitstream management
439 ****************************************************************/
440 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
441 const void* headerBuffer, size_t hbSize)
443 const BYTE* const istart = (const BYTE*) headerBuffer;
444 const BYTE* const iend = istart + hbSize;
445 const BYTE* ip = istart;
451 unsigned charnum = 0;
454 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
455 bitStream = FSE_readLE32(ip);
456 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
457 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
460 *tableLogPtr = nbBits;
461 remaining = (1<<nbBits)+1;
462 threshold = 1<<nbBits;
465 while ((remaining>1) && (charnum<=*maxSVPtr))
469 unsigned n0 = charnum;
470 while ((bitStream & 0xFFFF) == 0xFFFF)
476 bitStream = FSE_readLE32(ip) >> bitCount;
484 while ((bitStream & 3) == 3)
492 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
493 while (charnum < n0) normalizedCounter[charnum++] = 0;
494 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
498 bitStream = FSE_readLE32(ip) >> bitCount;
504 const short max = (short)((2*threshold-1)-remaining);
507 if ((bitStream & (threshold-1)) < (U32)max)
509 count = (short)(bitStream & (threshold-1));
510 bitCount += nbBits-1;
514 count = (short)(bitStream & (2*threshold-1));
515 if (count >= threshold) count -= max;
519 count--; /* extra accuracy */
520 remaining -= FSE_abs(count);
521 normalizedCounter[charnum++] = count;
523 while (remaining < threshold)
530 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
537 bitCount -= (int)(8 * (iend - 4 - ip));
540 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
544 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
545 *maxSVPtr = charnum-1;
547 ip += (bitCount+7)>>3;
548 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
553 /*********************************************************
554 * Decompression (Byte symbols)
555 *********************************************************/
556 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
559 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
560 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
562 DTableH->tableLog = 0;
563 DTableH->fastMode = 0;
566 cell->symbol = symbolValue;
573 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
576 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
577 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
578 const unsigned tableSize = 1 << nbBits;
579 const unsigned tableMask = tableSize - 1;
580 const unsigned maxSymbolValue = tableMask;
584 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
586 /* Build Decoding Table */
587 DTableH->tableLog = (U16)nbBits;
588 DTableH->fastMode = 1;
589 for (s=0; s<=maxSymbolValue; s++)
591 dinfo[s].newState = 0;
592 dinfo[s].symbol = (BYTE)s;
593 dinfo[s].nbBits = (BYTE)nbBits;
601 * Initialize a FSE_DStream_t.
602 * srcBuffer must point at the beginning of an FSE block.
603 * The function result is the size of the FSE_block (== srcSize).
604 * If srcSize is too small, the function will return an errorCode;
606 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
608 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
610 if (srcSize >= sizeof(size_t))
613 bitD->start = (const char*)srcBuffer;
614 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
615 bitD->bitContainer = FSE_readLEST(bitD->ptr);
616 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
617 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
618 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
623 bitD->start = (const char*)srcBuffer;
624 bitD->ptr = bitD->start;
625 bitD->bitContainer = *(const BYTE*)(bitD->start);
628 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
630 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
632 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
634 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
636 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
638 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
642 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
643 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
644 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
645 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
653 * Provides next n bits from the bitContainer.
654 * bitContainer is not modified (bits are still present for next read/look)
655 * On 32-bits, maxNbBits==25
656 * On 64-bits, maxNbBits==57
657 * return : value extracted.
659 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
661 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
662 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
665 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
667 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
668 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
671 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
673 bitD->bitsConsumed += nbBits;
678 * Read next n bits from the bitContainer.
679 * On 32-bits, don't read more than maxNbBits==25
680 * On 64-bits, don't read more than maxNbBits==57
681 * Use the fast variant *only* if n >= 1.
682 * return : value extracted.
684 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
686 size_t value = FSE_lookBits(bitD, nbBits);
687 FSE_skipBits(bitD, nbBits);
691 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
693 size_t value = FSE_lookBitsFast(bitD, nbBits);
694 FSE_skipBits(bitD, nbBits);
698 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
700 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
701 return FSE_DStream_tooFar;
703 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
705 bitD->ptr -= bitD->bitsConsumed >> 3;
706 bitD->bitsConsumed &= 7;
707 bitD->bitContainer = FSE_readLEST(bitD->ptr);
708 return FSE_DStream_unfinished;
710 if (bitD->ptr == bitD->start)
712 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
713 return FSE_DStream_completed;
716 U32 nbBytes = bitD->bitsConsumed >> 3;
717 U32 result = FSE_DStream_unfinished;
718 if (bitD->ptr - nbBytes < bitD->start)
720 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
721 result = FSE_DStream_endOfBuffer;
723 bitD->ptr -= nbBytes;
724 bitD->bitsConsumed -= nbBytes*8;
725 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
731 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
733 const void* ptr = dt;
734 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
735 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
736 FSE_reloadDStream(bitD);
737 DStatePtr->table = dt + 1;
740 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
742 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
743 const U32 nbBits = DInfo.nbBits;
744 BYTE symbol = DInfo.symbol;
745 size_t lowBits = FSE_readBits(bitD, nbBits);
747 DStatePtr->state = DInfo.newState + lowBits;
751 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
753 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
754 const U32 nbBits = DInfo.nbBits;
755 BYTE symbol = DInfo.symbol;
756 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
758 DStatePtr->state = DInfo.newState + lowBits;
763 Tells if bitD has reached end of bitStream or not */
765 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
767 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
770 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
772 return DStatePtr->state == 0;
776 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
777 void* dst, size_t maxDstSize,
778 const void* cSrc, size_t cSrcSize,
779 const FSE_DTable* dt, const unsigned fast)
781 BYTE* const ostart = (BYTE*) dst;
783 BYTE* const omax = op + maxDstSize;
784 BYTE* const olimit = omax-3;
792 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
793 if (FSE_isError(errorCode)) return errorCode;
795 FSE_initDState(&state1, &bitD, dt);
796 FSE_initDState(&state2, &bitD, dt);
798 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
800 /* 4 symbols per loop */
801 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
803 op[0] = FSE_GETSYMBOL(&state1);
805 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
806 FSE_reloadDStream(&bitD);
808 op[1] = FSE_GETSYMBOL(&state2);
810 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
811 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
813 op[2] = FSE_GETSYMBOL(&state1);
815 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
816 FSE_reloadDStream(&bitD);
818 op[3] = FSE_GETSYMBOL(&state2);
822 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
825 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
828 *op++ = FSE_GETSYMBOL(&state1);
830 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
833 *op++ = FSE_GETSYMBOL(&state2);
837 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
840 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
842 return (size_t)-FSE_ERROR_corruptionDetected;
846 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
847 const void* cSrc, size_t cSrcSize,
848 const FSE_DTable* dt)
850 FSE_DTableHeader DTableH;
851 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */
853 /* select fast mode (static) */
854 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
855 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
859 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
861 const BYTE* const istart = (const BYTE*)cSrc;
862 const BYTE* ip = istart;
863 short counting[FSE_MAX_SYMBOL_VALUE+1];
864 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
866 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
869 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
871 /* normal FSE decoding mode */
872 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
873 if (FSE_isError(errorCode)) return errorCode;
874 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
876 cSrcSize -= errorCode;
878 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
879 if (FSE_isError(errorCode)) return errorCode;
881 /* always return, even if it is an error code */
882 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
887 /* *******************************************************
888 * Huff0 : Huffman block compression
889 *********************************************************/
890 #define HUF_MAX_SYMBOL_VALUE 255
891 #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
892 #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
893 #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
894 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
895 # error "HUF_MAX_TABLELOG is too large !"
898 typedef struct HUF_CElt_s {
903 typedef struct nodeElt_s {
911 /* *******************************************************
912 * Huff0 : Huffman block decompression
913 *********************************************************/
919 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
921 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
922 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
925 const BYTE* ip = (const BYTE*) src;
930 void* ptr = DTable+1;
931 HUF_DElt* const dt = (HUF_DElt*)ptr;
933 if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
936 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
937 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
938 if (iSize >= 128) /* special header */
940 if (iSize >= (242)) /* RLE */
942 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
943 oSize = l[iSize-242];
944 memset(huffWeight, 1, sizeof(huffWeight));
947 else /* Incompressible */
950 iSize = ((oSize+1)/2);
951 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
953 for (n=0; n<oSize; n+=2)
955 huffWeight[n] = ip[n/2] >> 4;
956 huffWeight[n+1] = ip[n/2] & 15;
960 else /* header compressed with FSE (normal case) */
962 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
963 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
964 if (FSE_isError(oSize)) return oSize;
967 /* collect weight stats */
968 memset(rankVal, 0, sizeof(rankVal));
970 for (n=0; n<oSize; n++)
972 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
973 rankVal[huffWeight[n]]++;
974 weightTotal += (1 << huffWeight[n]) >> 1;
976 if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
978 /* get last non-null symbol weight (implied, total must be 2^n) */
979 maxBits = FSE_highbit32(weightTotal) + 1;
980 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
981 DTable[0] = (U16)maxBits;
983 U32 total = 1 << maxBits;
984 U32 rest = total - weightTotal;
985 U32 verif = 1 << FSE_highbit32(rest);
986 U32 lastWeight = FSE_highbit32(rest) + 1;
987 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
988 huffWeight[oSize] = (BYTE)lastWeight;
989 rankVal[lastWeight]++;
992 /* check tree construction validity */
993 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
997 for (n=1; n<=maxBits; n++)
999 U32 current = nextRankStart;
1000 nextRankStart += (rankVal[n] << (n-1));
1001 rankVal[n] = current;
1005 for (n=0; n<=oSize; n++)
1007 const U32 w = huffWeight[n];
1008 const U32 length = (1 << w) >> 1;
1011 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1012 for (i = rankVal[w]; i < rankVal[w] + length; i++)
1014 rankVal[w] += length;
1021 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1023 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1024 const BYTE c = dt[val].byte;
1025 FSE_skipBits(Dstream, dt[val].nbBits);
1029 static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
1030 void* dst, size_t maxDstSize,
1031 const void* cSrc, size_t cSrcSize,
1034 if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
1036 BYTE* const ostart = (BYTE*) dst;
1038 BYTE* const omax = op + maxDstSize;
1039 BYTE* const olimit = maxDstSize < 15 ? op : omax-15;
1041 const void* ptr = DTable;
1042 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1043 const U32 dtLog = DTable[0];
1049 const U16* jumpTable = (const U16*)cSrc;
1050 const size_t length1 = FSE_readLE16(jumpTable);
1051 const size_t length2 = FSE_readLE16(jumpTable+1);
1052 const size_t length3 = FSE_readLE16(jumpTable+2);
1053 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; /* check coherency !! */
1054 const char* const start1 = (const char*)(cSrc) + 6;
1055 const char* const start2 = start1 + length1;
1056 const char* const start3 = start2 + length2;
1057 const char* const start4 = start3 + length3;
1058 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1060 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1062 errorCode = FSE_initDStream(&bitD1, start1, length1);
1063 if (FSE_isError(errorCode)) return errorCode;
1064 errorCode = FSE_initDStream(&bitD2, start2, length2);
1065 if (FSE_isError(errorCode)) return errorCode;
1066 errorCode = FSE_initDStream(&bitD3, start3, length3);
1067 if (FSE_isError(errorCode)) return errorCode;
1068 errorCode = FSE_initDStream(&bitD4, start4, length4);
1069 if (FSE_isError(errorCode)) return errorCode;
1071 reloadStatus=FSE_reloadDStream(&bitD2);
1073 /* 16 symbols per loop */
1074 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
1075 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1077 #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1078 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1080 #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1081 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1082 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1084 #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1085 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1086 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1088 HUF_DECODE_SYMBOL_1( 0, bitD1);
1089 HUF_DECODE_SYMBOL_1( 1, bitD2);
1090 HUF_DECODE_SYMBOL_1( 2, bitD3);
1091 HUF_DECODE_SYMBOL_1( 3, bitD4);
1092 HUF_DECODE_SYMBOL_2( 4, bitD1);
1093 HUF_DECODE_SYMBOL_2( 5, bitD2);
1094 HUF_DECODE_SYMBOL_2( 6, bitD3);
1095 HUF_DECODE_SYMBOL_2( 7, bitD4);
1096 HUF_DECODE_SYMBOL_1( 8, bitD1);
1097 HUF_DECODE_SYMBOL_1( 9, bitD2);
1098 HUF_DECODE_SYMBOL_1(10, bitD3);
1099 HUF_DECODE_SYMBOL_1(11, bitD4);
1100 HUF_DECODE_SYMBOL_0(12, bitD1);
1101 HUF_DECODE_SYMBOL_0(13, bitD2);
1102 HUF_DECODE_SYMBOL_0(14, bitD3);
1103 HUF_DECODE_SYMBOL_0(15, bitD4);
1106 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
1107 return (size_t)-FSE_ERROR_corruptionDetected;
1111 /* bitTail = bitD1; */ /* *much* slower : -20% !??! */
1112 FSE_DStream_t bitTail;
1113 bitTail.ptr = bitD1.ptr;
1114 bitTail.bitsConsumed = bitD1.bitsConsumed;
1115 bitTail.bitContainer = bitD1.bitContainer; /* required in case of FSE_DStream_endOfBuffer */
1116 bitTail.start = start1;
1117 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1119 HUF_DECODE_SYMBOL_0(0, bitTail);
1122 if (FSE_endOfDStream(&bitTail))
1126 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1128 return (size_t)-FSE_ERROR_corruptionDetected;
1133 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1135 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1136 const BYTE* ip = (const BYTE*) cSrc;
1139 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1140 if (FSE_isError(errorCode)) return errorCode;
1141 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1143 cSrcSize -= errorCode;
1145 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1149 #endif /* FSE_COMMONDEFS_ONLY */
1152 zstd - standard compression library
1153 Copyright (C) 2014-2015, Yann Collet.
1155 BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
1157 Redistribution and use in source and binary forms, with or without
1158 modification, are permitted provided that the following conditions are
1160 * Redistributions of source code must retain the above copyright
1161 notice, this list of conditions and the following disclaimer.
1162 * Redistributions in binary form must reproduce the above
1163 copyright notice, this list of conditions and the following disclaimer
1164 in the documentation and/or other materials provided with the
1166 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1167 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1168 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1169 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1170 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1171 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1172 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1173 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1174 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1175 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1176 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1178 You can contact the author at :
1179 - zstd source repository : https://github.com/Cyan4973/zstd
1180 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1183 /****************************************************************
1185 *****************************************************************/
1187 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1188 * Increasing memory usage improves compression ratio
1189 * Reduced memory usage can improve speed, due to cache effect */
1190 #define ZSTD_MEMORY_USAGE 17
1193 /**************************************
1194 CPU Feature Detection
1195 **************************************/
1197 * Automated efficient unaligned memory access detection
1198 * Based on known hardware architectures
1199 * This list will be updated thanks to feedbacks
1201 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1202 || defined(__ARM_FEATURE_UNALIGNED) \
1203 || defined(__i386__) || defined(__x86_64__) \
1204 || defined(_M_IX86) || defined(_M_X64) \
1205 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1206 || (defined(_M_ARM) && (_M_ARM >= 7))
1207 # define ZSTD_UNALIGNED_ACCESS 1
1209 # define ZSTD_UNALIGNED_ACCESS 0
1213 /********************************************************
1215 *********************************************************/
1216 #include <stdlib.h> /* calloc */
1217 #include <string.h> /* memcpy, memmove */
1218 #include <stdio.h> /* debug : printf */
1221 /********************************************************
1222 * Compiler specifics
1223 *********************************************************/
1225 # include <immintrin.h> /* AVX2 intrinsics */
1228 #ifdef _MSC_VER /* Visual Studio */
1229 # include <intrin.h> /* For Visual 2005 */
1230 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1231 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
1235 #ifndef MEM_ACCESS_MODULE
1236 #define MEM_ACCESS_MODULE
1237 /********************************************************
1239 *********************************************************/
1240 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
1242 # include <inttypes.h>
1244 # include <stdint.h> /* intptr_t */
1246 typedef uint8_t BYTE;
1247 typedef uint16_t U16;
1248 typedef int16_t S16;
1249 typedef uint32_t U32;
1250 typedef int32_t S32;
1251 typedef uint64_t U64;
1253 typedef unsigned char BYTE;
1254 typedef unsigned short U16;
1255 typedef signed short S16;
1256 typedef unsigned int U32;
1257 typedef signed int S32;
1258 typedef unsigned long long U64;
1261 #endif /* MEM_ACCESS_MODULE */
1264 /********************************************************
1266 *********************************************************/
1267 static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
1269 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1270 #define HASH_TABLESIZE (1 << HASH_LOG)
1271 #define HASH_MASK (HASH_TABLESIZE - 1)
1273 #define KNUTH 2654435761
1280 #define KB *(1 <<10)
1281 #define MB *(1 <<20)
1282 #define GB *(1U<<30)
1284 #define BLOCKSIZE (128 KB) /* define, for static allocation */
1286 #define WORKPLACESIZE (BLOCKSIZE*3)
1291 #define MaxML ((1<<MLbits )-1)
1292 #define MaxLL ((1<<LLbits )-1)
1293 #define MaxOff ((1<<Offbits)-1)
1294 #define LitFSELog 11
1298 #define MAX(a,b) ((a)<(b)?(b):(a))
1299 #define MaxSeq MAX(MaxLL, MaxML)
1301 #define LITERAL_NOENTROPY 63
1302 #define COMMAND_NOENTROPY 7 /* to remove */
1304 #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
1306 static const size_t ZSTD_blockHeaderSize = 3;
1307 static const size_t ZSTD_frameHeaderSize = 4;
1310 /********************************************************
1312 *********************************************************/
1313 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1315 static unsigned ZSTD_isLittleEndian(void)
1317 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
1321 static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1323 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1325 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1327 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
1329 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1331 const BYTE* ip = (const BYTE*)src;
1332 BYTE* op = (BYTE*)dst;
1333 BYTE* const oend = op + length;
1334 while (op < oend) COPY8(op, ip);
1337 static U16 ZSTD_readLE16(const void* memPtr)
1339 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1342 const BYTE* p = (const BYTE*)memPtr;
1343 return (U16)((U16)p[0] + ((U16)p[1]<<8));
1347 static U32 ZSTD_readLE24(const void* memPtr)
1349 return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
1352 static U32 ZSTD_readBE32(const void* memPtr)
1354 const BYTE* p = (const BYTE*)memPtr;
1355 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1359 /**************************************
1361 ***************************************/
1362 typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1364 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1368 blockType_t blockType;
1370 } blockProperties_t;
1380 BYTE* litLengthStart;
1382 BYTE* matchLengthStart;
1389 typedef struct ZSTD_Cctx_s
1394 seqStore_t seqStore;
1396 __m256i hashTable[HASH_TABLESIZE>>3];
1398 U32 hashTable[HASH_TABLESIZE];
1400 BYTE buffer[WORKPLACESIZE];
1406 /**************************************
1408 **************************************/
1409 /* published entry point */
1410 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1413 /**************************************
1415 **************************************/
1416 #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
1417 #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */
1418 #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
1419 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1421 /**************************************************************
1422 * Decompression code
1423 **************************************************************/
1425 static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1427 const BYTE* const in = (const BYTE* const)src;
1431 if (srcSize < 3) return ERROR(srcSize_wrong);
1434 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1436 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1437 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1439 if (bpPtr->blockType == bt_end) return 0;
1440 if (bpPtr->blockType == bt_rle) return 1;
1445 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1447 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1449 memcpy(dst, src, srcSize);
1455 static size_t ZSTD_decompressLiterals(void* ctx,
1456 void* dst, size_t maxDstSize,
1457 const void* src, size_t srcSize)
1459 BYTE* op = (BYTE*)dst;
1460 BYTE* const oend = op + maxDstSize;
1461 const BYTE* ip = (const BYTE*)src;
1465 /* check : minimum 2, for litSize, +1, for content */
1466 if (srcSize <= 3) return ERROR(corruption_detected);
1468 litSize = ip[1] + (ip[0]<<8);
1469 litSize += ((ip[-3] >> 3) & 7) << 16; /* mmmmh.... */
1470 op = oend - litSize;
1473 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1474 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1475 if (FSE_isError(errorCode)) return ERROR(GENERIC);
1480 static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1481 void* dst, size_t maxDstSize,
1482 const BYTE** litStart, size_t* litSize,
1483 const void* src, size_t srcSize)
1485 const BYTE* const istart = (const BYTE* const)src;
1486 const BYTE* ip = istart;
1487 BYTE* const ostart = (BYTE* const)dst;
1488 BYTE* const oend = ostart + maxDstSize;
1489 blockProperties_t litbp;
1491 size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1492 if (ZSTDv01_isError(litcSize)) return litcSize;
1493 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1494 ip += ZSTD_blockHeaderSize;
1496 switch(litbp.blockType)
1501 *litSize = litcSize;
1505 size_t rleSize = litbp.origSize;
1506 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1507 if (!srcSize) return ERROR(srcSize_wrong);
1509 memset(oend - rleSize, *ip, rleSize);
1511 *litStart = oend - rleSize;
1518 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1519 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1520 *litStart = oend - decodedLitSize;
1521 *litSize = decodedLitSize;
1527 return ERROR(GENERIC);
1534 static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1535 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1536 const void* src, size_t srcSize)
1538 const BYTE* const istart = (const BYTE* const)src;
1539 const BYTE* ip = istart;
1540 const BYTE* const iend = istart + srcSize;
1541 U32 LLtype, Offtype, MLtype;
1542 U32 LLlog, Offlog, MLlog;
1546 if (srcSize < 5) return ERROR(srcSize_wrong);
1549 *nbSeq = ZSTD_readLE16(ip); ip+=2;
1551 Offtype = (*ip >> 4) & 3;
1552 MLtype = (*ip >> 2) & 3;
1555 dumpsLength = ip[2];
1556 dumpsLength += ip[1] << 8;
1561 dumpsLength = ip[1];
1562 dumpsLength += (ip[0] & 1) << 8;
1567 *dumpsLengthPtr = dumpsLength;
1570 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1574 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1582 FSE_buildDTable_rle(DTableLL, *ip++); break;
1585 FSE_buildDTable_raw(DTableLL, LLbits); break;
1588 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1589 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1590 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1592 FSE_buildDTable(DTableLL, norm, max, LLlog);
1599 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1600 FSE_buildDTable_rle(DTableOffb, *ip++); break;
1603 FSE_buildDTable_raw(DTableOffb, Offbits); break;
1606 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1607 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1608 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1610 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1617 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1618 FSE_buildDTable_rle(DTableML, *ip++); break;
1621 FSE_buildDTable_raw(DTableML, MLbits); break;
1624 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1625 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1626 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1628 FSE_buildDTable(DTableML, norm, max, MLlog);
1642 FSE_DStream_t DStream;
1643 FSE_DState_t stateLL;
1644 FSE_DState_t stateOffb;
1645 FSE_DState_t stateML;
1648 const BYTE* dumpsEnd;
1652 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1658 const BYTE* dumps = seqState->dumps;
1659 const BYTE* const de = seqState->dumpsEnd;
1661 /* Literal length */
1662 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1663 prevOffset = litLength ? seq->offset : seqState->prevOffset;
1664 seqState->prevOffset = seq->offset;
1665 if (litLength == MaxLL)
1667 const U32 add = dumps<de ? *dumps++ : 0;
1668 if (add < 255) litLength += add;
1673 litLength = ZSTD_readLE24(dumps);
1681 U32 offsetCode, nbBits;
1682 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1683 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1684 nbBits = offsetCode - 1;
1685 if (offsetCode==0) nbBits = 0; /* cmove */
1686 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1687 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1688 if (offsetCode==0) offset = prevOffset;
1692 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1693 if (matchLength == MaxML)
1695 const U32 add = dumps<de ? *dumps++ : 0;
1696 if (add < 255) matchLength += add;
1701 matchLength = ZSTD_readLE24(dumps);
1706 matchLength += MINMATCH;
1709 seq->litLength = litLength;
1710 seq->offset = offset;
1711 seq->matchLength = matchLength;
1712 seqState->dumps = dumps;
1716 static size_t ZSTD_execSequence(BYTE* op,
1718 const BYTE** litPtr, const BYTE* const litLimit,
1719 BYTE* const base, BYTE* const oend)
1721 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1722 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
1723 const BYTE* const ostart = op;
1724 BYTE* const oLitEnd = op + sequence.litLength;
1725 const size_t litLength = sequence.litLength;
1726 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1727 const BYTE* const litEnd = *litPtr + litLength;
1730 size_t const seqLength = sequence.litLength + sequence.matchLength;
1732 if (seqLength > (size_t)(oend - op)) return ERROR(dstSize_tooSmall);
1733 if (sequence.litLength > (size_t)(litLimit - *litPtr)) return ERROR(corruption_detected);
1734 /* Now we know there are no overflow in literal nor match lengths, can use pointer checks */
1735 if (sequence.offset > (U32)(oLitEnd - base)) return ERROR(corruption_detected);
1737 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
1738 if (litEnd > litLimit) return ERROR(corruption_detected); /* overRead beyond lit buffer */
1739 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
1742 ZSTD_memmove(op, *litPtr, sequence.litLength); /* note : v0.1 seems to allow scenarios where output or input are close to end of buffer */
1745 *litPtr = litEnd; /* update for next sequence */
1747 /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1748 if (oend-op < 8) return ERROR(dstSize_tooSmall);
1752 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1753 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1758 if (match < base) return ERROR(corruption_detected);
1759 if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1761 /* save beginning of literal sequence, in case of write overlap */
1764 if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1765 memcpy(saved, endMatch, qutt);
1768 if (sequence.offset < 8)
1770 const int dec64 = dec64table[sequence.offset];
1775 match += dec32table[sequence.offset];
1776 ZSTD_copy4(op+4, match);
1778 } else { ZSTD_copy8(op, match); }
1779 op += 8; match += 8;
1781 if (endMatch > oend-(16-MINMATCH))
1785 ZSTD_wildcopy(op, match, (oend-8) - op);
1786 match += (oend-8) - op;
1789 while (op<endMatch) *op++ = *match++;
1792 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1794 /* restore, in case of overlap */
1795 if (overlapRisk) memcpy(endMatch, saved, qutt);
1798 return endMatch-ostart;
1801 typedef struct ZSTDv01_Dctx_s
1803 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1804 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1805 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1806 void* previousDstEnd;
1814 static size_t ZSTD_decompressSequences(
1816 void* dst, size_t maxDstSize,
1817 const void* seqStart, size_t seqSize,
1818 const BYTE* litStart, size_t litSize)
1820 dctx_t* dctx = (dctx_t*)ctx;
1821 const BYTE* ip = (const BYTE*)seqStart;
1822 const BYTE* const iend = ip + seqSize;
1823 BYTE* const ostart = (BYTE* const)dst;
1825 BYTE* const oend = ostart + maxDstSize;
1826 size_t errorCode, dumpsLength;
1827 const BYTE* litPtr = litStart;
1828 const BYTE* const litEnd = litStart + litSize;
1831 U32* DTableLL = dctx->LLTable;
1832 U32* DTableML = dctx->MLTable;
1833 U32* DTableOffb = dctx->OffTable;
1834 BYTE* const base = (BYTE*) (dctx->base);
1836 /* Build Decoding Tables */
1837 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1838 DTableLL, DTableML, DTableOffb,
1840 if (ZSTDv01_isError(errorCode)) return errorCode;
1843 /* Regen sequences */
1846 seqState_t seqState;
1848 memset(&sequence, 0, sizeof(sequence));
1849 seqState.dumps = dumps;
1850 seqState.dumpsEnd = dumps + dumpsLength;
1851 seqState.prevOffset = 1;
1852 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1853 if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1854 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1855 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1856 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1858 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1862 ZSTD_decodeSequence(&sequence, &seqState);
1863 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1864 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1868 /* check if reached exact end */
1869 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */
1870 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
1872 /* last literal segment */
1874 size_t lastLLSize = litEnd - litPtr;
1875 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1876 if (lastLLSize > 0) {
1877 if (op != litPtr) memmove(op, litPtr, lastLLSize);
1887 static size_t ZSTD_decompressBlock(
1889 void* dst, size_t maxDstSize,
1890 const void* src, size_t srcSize)
1892 /* blockType == blockCompressed, srcSize is trusted */
1893 const BYTE* ip = (const BYTE*)src;
1894 const BYTE* litPtr = NULL;
1898 /* Decode literals sub-block */
1899 errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1900 if (ZSTDv01_isError(errorCode)) return errorCode;
1902 srcSize -= errorCode;
1904 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1908 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1910 const BYTE* ip = (const BYTE*)src;
1911 const BYTE* iend = ip + srcSize;
1912 BYTE* const ostart = (BYTE* const)dst;
1914 BYTE* const oend = ostart + maxDstSize;
1915 size_t remainingSize = srcSize;
1918 blockProperties_t blockProperties;
1921 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1922 magicNumber = ZSTD_readBE32(src);
1923 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1924 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1926 /* Loop on each block */
1929 size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1930 if (ZSTDv01_isError(blockSize)) return blockSize;
1932 ip += ZSTD_blockHeaderSize;
1933 remainingSize -= ZSTD_blockHeaderSize;
1934 if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1936 switch(blockProperties.blockType)
1939 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1942 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1945 return ERROR(GENERIC); /* not yet supported */
1949 if (remainingSize) return ERROR(srcSize_wrong);
1952 return ERROR(GENERIC);
1954 if (blockSize == 0) break; /* bt_end */
1956 if (ZSTDv01_isError(errorCode)) return errorCode;
1959 remainingSize -= blockSize;
1965 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1969 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1972 /* ZSTD_errorFrameSizeInfoLegacy() :
1973 assumes `cSize` and `dBound` are _not_ NULL */
1974 static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
1977 *dBound = ZSTD_CONTENTSIZE_ERROR;
1980 void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
1982 const BYTE* ip = (const BYTE*)src;
1983 size_t remainingSize = srcSize;
1984 size_t nbBlocks = 0;
1986 blockProperties_t blockProperties;
1989 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
1990 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
1993 magicNumber = ZSTD_readBE32(src);
1994 if (magicNumber != ZSTD_magicNumber) {
1995 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
1998 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2000 /* Loop on each block */
2003 size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2004 if (ZSTDv01_isError(blockSize)) {
2005 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2009 ip += ZSTD_blockHeaderSize;
2010 remainingSize -= ZSTD_blockHeaderSize;
2011 if (blockSize > remainingSize) {
2012 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2016 if (blockSize == 0) break; /* bt_end */
2019 remainingSize -= blockSize;
2023 *cSize = ip - (const BYTE*)src;
2024 *dBound = nbBlocks * BLOCKSIZE;
2027 /*******************************
2028 * Streaming Decompression API
2029 *******************************/
2031 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2033 dctx->expected = ZSTD_frameHeaderSize;
2035 dctx->previousDstEnd = NULL;
2040 ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2042 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2043 if (dctx==NULL) return NULL;
2044 ZSTDv01_resetDCtx(dctx);
2048 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2054 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2056 return ((dctx_t*)dctx)->expected;
2059 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2061 dctx_t* ctx = (dctx_t*)dctx;
2064 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2065 if (dst != ctx->previousDstEnd) /* not contiguous */
2068 /* Decompress : frame header */
2069 if (ctx->phase == 0)
2071 /* Check frame magic header */
2072 U32 magicNumber = ZSTD_readBE32(src);
2073 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2075 ctx->expected = ZSTD_blockHeaderSize;
2079 /* Decompress : block header */
2080 if (ctx->phase == 1)
2082 blockProperties_t bp;
2083 size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2084 if (ZSTDv01_isError(blockSize)) return blockSize;
2085 if (bp.blockType == bt_end)
2092 ctx->expected = blockSize;
2093 ctx->bType = bp.blockType;
2100 /* Decompress : block content */
2106 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2109 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2112 return ERROR(GENERIC); /* not yet handled */
2114 case bt_end : /* should never happen (filtered at phase 1) */
2118 return ERROR(GENERIC);
2121 ctx->expected = ZSTD_blockHeaderSize;
2122 if (ZSTDv01_isError(rSize)) return rSize;
2123 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);