2 * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
12 /******************************************
14 ******************************************/
15 #include <stddef.h> /* size_t, ptrdiff_t */
17 #include "../common/error_private.h"
20 /******************************************
22 ******************************************/
23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27 #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
32 /******************************************
34 ******************************************/
35 #define FSE_LIST_ERRORS(ITEM) \
36 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39 ITEM(FSE_ERROR_corruptionDetected) \
40 ITEM(FSE_ERROR_maxCode)
42 #define FSE_GENERATE_ENUM(ENUM) ENUM,
43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
46 /******************************************
47 * FSE symbol compression API
48 ******************************************/
50 This API consists of small unitary functions, which highly benefit from being inlined.
51 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52 Visual seems to do it automatically.
53 For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54 If none of these solutions is applicable, include "fse.c" directly.
57 typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */
58 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
72 const void* stateTable;
80 unsigned bitsConsumed;
88 const void* table; /* precise table may vary, depending on U16 */
91 typedef enum { FSE_DStream_unfinished = 0,
92 FSE_DStream_endOfBuffer = 1,
93 FSE_DStream_completed = 2,
94 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
95 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
98 /****************************************************************
100 ****************************************************************/
102 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103 * Increasing memory usage improves compression ratio
104 * Reduced memory usage can improve speed, due to cache effect
105 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106 #define FSE_MAX_MEMORY_USAGE 14
107 #define FSE_DEFAULT_MEMORY_USAGE 13
109 /* FSE_MAX_SYMBOL_VALUE :
110 * Maximum symbol value authorized.
111 * Required for proper stack allocation */
112 #define FSE_MAX_SYMBOL_VALUE 255
115 /****************************************************************
116 * template functions type & suffix
117 ****************************************************************/
118 #define FSE_FUNCTION_TYPE BYTE
119 #define FSE_FUNCTION_EXTENSION
122 /****************************************************************
124 ****************************************************************/
127 unsigned short newState;
128 unsigned char symbol;
129 unsigned char nbBits;
130 } FSE_decode_t; /* size == U32 */
134 /****************************************************************
136 ****************************************************************/
137 #ifdef _MSC_VER /* Visual Studio */
138 # define FORCE_INLINE static __forceinline
139 # include <intrin.h> /* For Visual 2005 */
140 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
141 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
143 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
146 # define FORCE_INLINE static inline __attribute__((always_inline))
148 # define FORCE_INLINE static inline
151 # define FORCE_INLINE static
152 # endif /* __STDC_VERSION__ */
156 /****************************************************************
158 ****************************************************************/
159 #include <stdlib.h> /* malloc, free, qsort */
160 #include <string.h> /* memcpy, memset */
161 #include <stdio.h> /* printf (debug) */
164 #ifndef MEM_ACCESS_MODULE
165 #define MEM_ACCESS_MODULE
166 /****************************************************************
168 *****************************************************************/
169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
171 typedef uint8_t BYTE;
172 typedef uint16_t U16;
174 typedef uint32_t U32;
176 typedef uint64_t U64;
179 typedef unsigned char BYTE;
180 typedef unsigned short U16;
181 typedef signed short S16;
182 typedef unsigned int U32;
183 typedef signed int S32;
184 typedef unsigned long long U64;
185 typedef signed long long S64;
188 #endif /* MEM_ACCESS_MODULE */
190 /****************************************************************
192 *****************************************************************/
194 static unsigned FSE_32bits(void)
196 return sizeof(void*)==4;
199 static unsigned FSE_isLittleEndian(void)
201 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
205 static U16 FSE_read16(const void* memPtr)
207 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
210 static U32 FSE_read32(const void* memPtr)
212 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
215 static U64 FSE_read64(const void* memPtr)
217 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
220 static U16 FSE_readLE16(const void* memPtr)
222 if (FSE_isLittleEndian())
223 return FSE_read16(memPtr);
226 const BYTE* p = (const BYTE*)memPtr;
227 return (U16)(p[0] + (p[1]<<8));
231 static U32 FSE_readLE32(const void* memPtr)
233 if (FSE_isLittleEndian())
234 return FSE_read32(memPtr);
237 const BYTE* p = (const BYTE*)memPtr;
238 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
243 static U64 FSE_readLE64(const void* memPtr)
245 if (FSE_isLittleEndian())
246 return FSE_read64(memPtr);
249 const BYTE* p = (const BYTE*)memPtr;
250 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
251 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
255 static size_t FSE_readLEST(const void* memPtr)
258 return (size_t)FSE_readLE32(memPtr);
260 return (size_t)FSE_readLE64(memPtr);
265 /****************************************************************
267 *****************************************************************/
268 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
269 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
270 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
271 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
272 #define FSE_MIN_TABLELOG 5
274 #define FSE_TABLELOG_ABSOLUTE_MAX 15
275 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
276 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
280 /****************************************************************
282 ****************************************************************/
283 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
286 /****************************************************************
288 ****************************************************************/
293 } FSE_symbolCompressionTransform; /* total 8 bytes */
295 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
297 /****************************************************************
299 ****************************************************************/
300 FORCE_INLINE unsigned FSE_highbit32 (U32 val)
302 # if defined(_MSC_VER) /* Visual */
304 return _BitScanReverse(&r, val) ? (unsigned)r : 0;
305 # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
306 return __builtin_clz (val) ^ 31;
307 # else /* Software version */
308 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
316 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
322 /****************************************************************
324 ****************************************************************/
326 designed to be included
327 for type-specific functions (template emulation in C)
328 Objective is to write these functions only once, for improved maintenance
332 #ifndef FSE_FUNCTION_EXTENSION
333 # error "FSE_FUNCTION_EXTENSION must be defined"
335 #ifndef FSE_FUNCTION_TYPE
336 # error "FSE_FUNCTION_TYPE must be defined"
340 #define FSE_CAT(X,Y) X##Y
341 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
342 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
346 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
348 #define FSE_DECODE_TYPE FSE_decode_t
354 } FSE_DTableHeader; /* sizeof U32 */
356 static size_t FSE_buildDTable
357 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
360 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
361 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */
362 const U32 tableSize = 1 << tableLog;
363 const U32 tableMask = tableSize-1;
364 const U32 step = FSE_tableStep(tableSize);
365 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
367 U32 highThreshold = tableSize-1;
368 const S16 largeLimit= (S16)(1 << (tableLog-1));
373 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
374 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
376 /* Init, lay down lowprob symbols */
377 DTableH[0].tableLog = (U16)tableLog;
378 for (s=0; s<=maxSymbolValue; s++)
380 if (normalizedCounter[s]==-1)
382 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
387 if (normalizedCounter[s] >= largeLimit) noLarge=0;
388 symbolNext[s] = normalizedCounter[s];
393 for (s=0; s<=maxSymbolValue; s++)
396 for (i=0; i<normalizedCounter[s]; i++)
398 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
399 position = (position + step) & tableMask;
400 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
404 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
406 /* Build Decoding table */
409 for (i=0; i<tableSize; i++)
411 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
412 U16 nextState = symbolNext[symbol]++;
413 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
414 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
418 DTableH->fastMode = (U16)noLarge;
423 /******************************************
425 ******************************************/
426 #ifndef FSE_COMMONDEFS_ONLY
428 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
430 static short FSE_abs(short a)
436 /****************************************************************
437 * Header bitstream management
438 ****************************************************************/
439 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
440 const void* headerBuffer, size_t hbSize)
442 const BYTE* const istart = (const BYTE*) headerBuffer;
443 const BYTE* const iend = istart + hbSize;
444 const BYTE* ip = istart;
450 unsigned charnum = 0;
453 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
454 bitStream = FSE_readLE32(ip);
455 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
456 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
459 *tableLogPtr = nbBits;
460 remaining = (1<<nbBits)+1;
461 threshold = 1<<nbBits;
464 while ((remaining>1) && (charnum<=*maxSVPtr))
468 unsigned n0 = charnum;
469 while ((bitStream & 0xFFFF) == 0xFFFF)
475 bitStream = FSE_readLE32(ip) >> bitCount;
483 while ((bitStream & 3) == 3)
491 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
492 while (charnum < n0) normalizedCounter[charnum++] = 0;
493 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
497 bitStream = FSE_readLE32(ip) >> bitCount;
503 const short max = (short)((2*threshold-1)-remaining);
506 if ((bitStream & (threshold-1)) < (U32)max)
508 count = (short)(bitStream & (threshold-1));
509 bitCount += nbBits-1;
513 count = (short)(bitStream & (2*threshold-1));
514 if (count >= threshold) count -= max;
518 count--; /* extra accuracy */
519 remaining -= FSE_abs(count);
520 normalizedCounter[charnum++] = count;
522 while (remaining < threshold)
529 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
536 bitCount -= (int)(8 * (iend - 4 - ip));
539 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
543 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
544 *maxSVPtr = charnum-1;
546 ip += (bitCount+7)>>3;
547 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
552 /*********************************************************
553 * Decompression (Byte symbols)
554 *********************************************************/
555 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
558 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
559 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
561 DTableH->tableLog = 0;
562 DTableH->fastMode = 0;
565 cell->symbol = symbolValue;
572 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
575 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
576 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
577 const unsigned tableSize = 1 << nbBits;
578 const unsigned tableMask = tableSize - 1;
579 const unsigned maxSymbolValue = tableMask;
583 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
585 /* Build Decoding Table */
586 DTableH->tableLog = (U16)nbBits;
587 DTableH->fastMode = 1;
588 for (s=0; s<=maxSymbolValue; s++)
590 dinfo[s].newState = 0;
591 dinfo[s].symbol = (BYTE)s;
592 dinfo[s].nbBits = (BYTE)nbBits;
600 * Initialize a FSE_DStream_t.
601 * srcBuffer must point at the beginning of an FSE block.
602 * The function result is the size of the FSE_block (== srcSize).
603 * If srcSize is too small, the function will return an errorCode;
605 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
607 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
609 if (srcSize >= sizeof(size_t))
612 bitD->start = (const char*)srcBuffer;
613 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
614 bitD->bitContainer = FSE_readLEST(bitD->ptr);
615 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
616 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
617 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
622 bitD->start = (const char*)srcBuffer;
623 bitD->ptr = bitD->start;
624 bitD->bitContainer = *(const BYTE*)(bitD->start);
627 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
629 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
631 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
633 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
635 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
637 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
641 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
642 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
643 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
644 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
652 * Provides next n bits from the bitContainer.
653 * bitContainer is not modified (bits are still present for next read/look)
654 * On 32-bits, maxNbBits==25
655 * On 64-bits, maxNbBits==57
656 * return : value extracted.
658 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
660 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
661 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
664 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
666 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
667 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
670 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
672 bitD->bitsConsumed += nbBits;
677 * Read next n bits from the bitContainer.
678 * On 32-bits, don't read more than maxNbBits==25
679 * On 64-bits, don't read more than maxNbBits==57
680 * Use the fast variant *only* if n >= 1.
681 * return : value extracted.
683 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
685 size_t value = FSE_lookBits(bitD, nbBits);
686 FSE_skipBits(bitD, nbBits);
690 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
692 size_t value = FSE_lookBitsFast(bitD, nbBits);
693 FSE_skipBits(bitD, nbBits);
697 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
699 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
700 return FSE_DStream_tooFar;
702 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
704 bitD->ptr -= bitD->bitsConsumed >> 3;
705 bitD->bitsConsumed &= 7;
706 bitD->bitContainer = FSE_readLEST(bitD->ptr);
707 return FSE_DStream_unfinished;
709 if (bitD->ptr == bitD->start)
711 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
712 return FSE_DStream_completed;
715 U32 nbBytes = bitD->bitsConsumed >> 3;
716 U32 result = FSE_DStream_unfinished;
717 if (bitD->ptr - nbBytes < bitD->start)
719 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
720 result = FSE_DStream_endOfBuffer;
722 bitD->ptr -= nbBytes;
723 bitD->bitsConsumed -= nbBytes*8;
724 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
730 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
732 const void* ptr = dt;
733 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
734 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
735 FSE_reloadDStream(bitD);
736 DStatePtr->table = dt + 1;
739 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
741 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
742 const U32 nbBits = DInfo.nbBits;
743 BYTE symbol = DInfo.symbol;
744 size_t lowBits = FSE_readBits(bitD, nbBits);
746 DStatePtr->state = DInfo.newState + lowBits;
750 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
752 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
753 const U32 nbBits = DInfo.nbBits;
754 BYTE symbol = DInfo.symbol;
755 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
757 DStatePtr->state = DInfo.newState + lowBits;
762 Tells if bitD has reached end of bitStream or not */
764 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
766 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
769 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
771 return DStatePtr->state == 0;
775 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
776 void* dst, size_t maxDstSize,
777 const void* cSrc, size_t cSrcSize,
778 const FSE_DTable* dt, const unsigned fast)
780 BYTE* const ostart = (BYTE*) dst;
782 BYTE* const omax = op + maxDstSize;
783 BYTE* const olimit = omax-3;
791 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
792 if (FSE_isError(errorCode)) return errorCode;
794 FSE_initDState(&state1, &bitD, dt);
795 FSE_initDState(&state2, &bitD, dt);
797 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
799 /* 4 symbols per loop */
800 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
802 op[0] = FSE_GETSYMBOL(&state1);
804 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
805 FSE_reloadDStream(&bitD);
807 op[1] = FSE_GETSYMBOL(&state2);
809 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
810 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
812 op[2] = FSE_GETSYMBOL(&state1);
814 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
815 FSE_reloadDStream(&bitD);
817 op[3] = FSE_GETSYMBOL(&state2);
821 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
824 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
827 *op++ = FSE_GETSYMBOL(&state1);
829 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
832 *op++ = FSE_GETSYMBOL(&state2);
836 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
839 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
841 return (size_t)-FSE_ERROR_corruptionDetected;
845 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
846 const void* cSrc, size_t cSrcSize,
847 const FSE_DTable* dt)
849 FSE_DTableHeader DTableH;
850 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */
852 /* select fast mode (static) */
853 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
854 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
858 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
860 const BYTE* const istart = (const BYTE*)cSrc;
861 const BYTE* ip = istart;
862 short counting[FSE_MAX_SYMBOL_VALUE+1];
863 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
865 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
868 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
870 /* normal FSE decoding mode */
871 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
872 if (FSE_isError(errorCode)) return errorCode;
873 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
875 cSrcSize -= errorCode;
877 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
878 if (FSE_isError(errorCode)) return errorCode;
880 /* always return, even if it is an error code */
881 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
886 /* *******************************************************
887 * Huff0 : Huffman block compression
888 *********************************************************/
889 #define HUF_MAX_SYMBOL_VALUE 255
890 #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
891 #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
892 #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
893 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
894 # error "HUF_MAX_TABLELOG is too large !"
897 typedef struct HUF_CElt_s {
902 typedef struct nodeElt_s {
910 /* *******************************************************
911 * Huff0 : Huffman block decompression
912 *********************************************************/
918 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
920 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
921 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
924 const BYTE* ip = (const BYTE*) src;
929 void* ptr = DTable+1;
930 HUF_DElt* const dt = (HUF_DElt*)ptr;
932 if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
935 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
936 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
937 if (iSize >= 128) /* special header */
939 if (iSize >= (242)) /* RLE */
941 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
942 oSize = l[iSize-242];
943 memset(huffWeight, 1, sizeof(huffWeight));
946 else /* Incompressible */
949 iSize = ((oSize+1)/2);
950 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
952 for (n=0; n<oSize; n+=2)
954 huffWeight[n] = ip[n/2] >> 4;
955 huffWeight[n+1] = ip[n/2] & 15;
959 else /* header compressed with FSE (normal case) */
961 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
962 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
963 if (FSE_isError(oSize)) return oSize;
966 /* collect weight stats */
967 memset(rankVal, 0, sizeof(rankVal));
969 for (n=0; n<oSize; n++)
971 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
972 rankVal[huffWeight[n]]++;
973 weightTotal += (1 << huffWeight[n]) >> 1;
975 if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
977 /* get last non-null symbol weight (implied, total must be 2^n) */
978 maxBits = FSE_highbit32(weightTotal) + 1;
979 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
980 DTable[0] = (U16)maxBits;
982 U32 total = 1 << maxBits;
983 U32 rest = total - weightTotal;
984 U32 verif = 1 << FSE_highbit32(rest);
985 U32 lastWeight = FSE_highbit32(rest) + 1;
986 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
987 huffWeight[oSize] = (BYTE)lastWeight;
988 rankVal[lastWeight]++;
991 /* check tree construction validity */
992 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
996 for (n=1; n<=maxBits; n++)
998 U32 current = nextRankStart;
999 nextRankStart += (rankVal[n] << (n-1));
1000 rankVal[n] = current;
1004 for (n=0; n<=oSize; n++)
1006 const U32 w = huffWeight[n];
1007 const U32 length = (1 << w) >> 1;
1010 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1011 for (i = rankVal[w]; i < rankVal[w] + length; i++)
1013 rankVal[w] += length;
1020 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1022 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1023 const BYTE c = dt[val].byte;
1024 FSE_skipBits(Dstream, dt[val].nbBits);
1028 static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
1029 void* dst, size_t maxDstSize,
1030 const void* cSrc, size_t cSrcSize,
1033 if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
1035 BYTE* const ostart = (BYTE*) dst;
1037 BYTE* const omax = op + maxDstSize;
1038 BYTE* const olimit = maxDstSize < 15 ? op : omax-15;
1040 const void* ptr = DTable;
1041 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1042 const U32 dtLog = DTable[0];
1048 const U16* jumpTable = (const U16*)cSrc;
1049 const size_t length1 = FSE_readLE16(jumpTable);
1050 const size_t length2 = FSE_readLE16(jumpTable+1);
1051 const size_t length3 = FSE_readLE16(jumpTable+2);
1052 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; /* check coherency !! */
1053 const char* const start1 = (const char*)(cSrc) + 6;
1054 const char* const start2 = start1 + length1;
1055 const char* const start3 = start2 + length2;
1056 const char* const start4 = start3 + length3;
1057 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1059 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1061 errorCode = FSE_initDStream(&bitD1, start1, length1);
1062 if (FSE_isError(errorCode)) return errorCode;
1063 errorCode = FSE_initDStream(&bitD2, start2, length2);
1064 if (FSE_isError(errorCode)) return errorCode;
1065 errorCode = FSE_initDStream(&bitD3, start3, length3);
1066 if (FSE_isError(errorCode)) return errorCode;
1067 errorCode = FSE_initDStream(&bitD4, start4, length4);
1068 if (FSE_isError(errorCode)) return errorCode;
1070 reloadStatus=FSE_reloadDStream(&bitD2);
1072 /* 16 symbols per loop */
1073 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
1074 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1076 #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1077 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1079 #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1080 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1081 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1083 #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1084 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1085 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1087 HUF_DECODE_SYMBOL_1( 0, bitD1);
1088 HUF_DECODE_SYMBOL_1( 1, bitD2);
1089 HUF_DECODE_SYMBOL_1( 2, bitD3);
1090 HUF_DECODE_SYMBOL_1( 3, bitD4);
1091 HUF_DECODE_SYMBOL_2( 4, bitD1);
1092 HUF_DECODE_SYMBOL_2( 5, bitD2);
1093 HUF_DECODE_SYMBOL_2( 6, bitD3);
1094 HUF_DECODE_SYMBOL_2( 7, bitD4);
1095 HUF_DECODE_SYMBOL_1( 8, bitD1);
1096 HUF_DECODE_SYMBOL_1( 9, bitD2);
1097 HUF_DECODE_SYMBOL_1(10, bitD3);
1098 HUF_DECODE_SYMBOL_1(11, bitD4);
1099 HUF_DECODE_SYMBOL_0(12, bitD1);
1100 HUF_DECODE_SYMBOL_0(13, bitD2);
1101 HUF_DECODE_SYMBOL_0(14, bitD3);
1102 HUF_DECODE_SYMBOL_0(15, bitD4);
1105 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
1106 return (size_t)-FSE_ERROR_corruptionDetected;
1110 /* bitTail = bitD1; */ /* *much* slower : -20% !??! */
1111 FSE_DStream_t bitTail;
1112 bitTail.ptr = bitD1.ptr;
1113 bitTail.bitsConsumed = bitD1.bitsConsumed;
1114 bitTail.bitContainer = bitD1.bitContainer; /* required in case of FSE_DStream_endOfBuffer */
1115 bitTail.start = start1;
1116 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1118 HUF_DECODE_SYMBOL_0(0, bitTail);
1121 if (FSE_endOfDStream(&bitTail))
1125 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1127 return (size_t)-FSE_ERROR_corruptionDetected;
1132 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1134 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1135 const BYTE* ip = (const BYTE*) cSrc;
1138 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1139 if (FSE_isError(errorCode)) return errorCode;
1140 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1142 cSrcSize -= errorCode;
1144 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1148 #endif /* FSE_COMMONDEFS_ONLY */
1151 zstd - standard compression library
1152 Copyright (C) 2014-2015, Yann Collet.
1154 BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
1156 Redistribution and use in source and binary forms, with or without
1157 modification, are permitted provided that the following conditions are
1159 * Redistributions of source code must retain the above copyright
1160 notice, this list of conditions and the following disclaimer.
1161 * Redistributions in binary form must reproduce the above
1162 copyright notice, this list of conditions and the following disclaimer
1163 in the documentation and/or other materials provided with the
1165 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1166 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1167 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1168 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1169 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1170 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1171 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1172 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1173 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1174 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1175 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1177 You can contact the author at :
1178 - zstd source repository : https://github.com/Cyan4973/zstd
1179 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1182 /****************************************************************
1184 *****************************************************************/
1186 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1187 * Increasing memory usage improves compression ratio
1188 * Reduced memory usage can improve speed, due to cache effect */
1189 #define ZSTD_MEMORY_USAGE 17
1192 /**************************************
1193 CPU Feature Detection
1194 **************************************/
1196 * Automated efficient unaligned memory access detection
1197 * Based on known hardware architectures
1198 * This list will be updated thanks to feedbacks
1200 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1201 || defined(__ARM_FEATURE_UNALIGNED) \
1202 || defined(__i386__) || defined(__x86_64__) \
1203 || defined(_M_IX86) || defined(_M_X64) \
1204 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1205 || (defined(_M_ARM) && (_M_ARM >= 7))
1206 # define ZSTD_UNALIGNED_ACCESS 1
1208 # define ZSTD_UNALIGNED_ACCESS 0
1212 /********************************************************
1214 *********************************************************/
1215 #include <stdlib.h> /* calloc */
1216 #include <string.h> /* memcpy, memmove */
1217 #include <stdio.h> /* debug : printf */
1220 /********************************************************
1221 * Compiler specifics
1222 *********************************************************/
1224 # include <immintrin.h> /* AVX2 intrinsics */
1227 #ifdef _MSC_VER /* Visual Studio */
1228 # include <intrin.h> /* For Visual 2005 */
1229 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1230 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
1234 #ifndef MEM_ACCESS_MODULE
1235 #define MEM_ACCESS_MODULE
1236 /********************************************************
1238 *********************************************************/
1239 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
1241 # include <inttypes.h>
1243 # include <stdint.h> /* intptr_t */
1245 typedef uint8_t BYTE;
1246 typedef uint16_t U16;
1247 typedef int16_t S16;
1248 typedef uint32_t U32;
1249 typedef int32_t S32;
1250 typedef uint64_t U64;
1252 typedef unsigned char BYTE;
1253 typedef unsigned short U16;
1254 typedef signed short S16;
1255 typedef unsigned int U32;
1256 typedef signed int S32;
1257 typedef unsigned long long U64;
1260 #endif /* MEM_ACCESS_MODULE */
1263 /********************************************************
1265 *********************************************************/
1266 static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
1268 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1269 #define HASH_TABLESIZE (1 << HASH_LOG)
1270 #define HASH_MASK (HASH_TABLESIZE - 1)
1272 #define KNUTH 2654435761
1279 #define KB *(1 <<10)
1280 #define MB *(1 <<20)
1281 #define GB *(1U<<30)
1283 #define BLOCKSIZE (128 KB) /* define, for static allocation */
1285 #define WORKPLACESIZE (BLOCKSIZE*3)
1290 #define MaxML ((1<<MLbits )-1)
1291 #define MaxLL ((1<<LLbits )-1)
1292 #define MaxOff ((1<<Offbits)-1)
1293 #define LitFSELog 11
1297 #define MAX(a,b) ((a)<(b)?(b):(a))
1298 #define MaxSeq MAX(MaxLL, MaxML)
1300 #define LITERAL_NOENTROPY 63
1301 #define COMMAND_NOENTROPY 7 /* to remove */
1303 #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
1305 static const size_t ZSTD_blockHeaderSize = 3;
1306 static const size_t ZSTD_frameHeaderSize = 4;
1309 /********************************************************
1311 *********************************************************/
1312 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1314 static unsigned ZSTD_isLittleEndian(void)
1316 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
1320 static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1322 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1324 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1326 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
1328 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1330 const BYTE* ip = (const BYTE*)src;
1331 BYTE* op = (BYTE*)dst;
1332 BYTE* const oend = op + length;
1333 while (op < oend) COPY8(op, ip);
1336 static U16 ZSTD_readLE16(const void* memPtr)
1338 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1341 const BYTE* p = (const BYTE*)memPtr;
1342 return (U16)((U16)p[0] + ((U16)p[1]<<8));
1346 static U32 ZSTD_readLE24(const void* memPtr)
1348 return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
1351 static U32 ZSTD_readBE32(const void* memPtr)
1353 const BYTE* p = (const BYTE*)memPtr;
1354 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1358 /**************************************
1360 ***************************************/
1361 typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1363 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1367 blockType_t blockType;
1369 } blockProperties_t;
1379 BYTE* litLengthStart;
1381 BYTE* matchLengthStart;
1388 typedef struct ZSTD_Cctx_s
1393 seqStore_t seqStore;
1395 __m256i hashTable[HASH_TABLESIZE>>3];
1397 U32 hashTable[HASH_TABLESIZE];
1399 BYTE buffer[WORKPLACESIZE];
1405 /**************************************
1407 **************************************/
1408 /* published entry point */
1409 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1412 /**************************************
1414 **************************************/
1415 #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
1416 #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */
1417 #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
1418 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1420 /**************************************************************
1421 * Decompression code
1422 **************************************************************/
1424 static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1426 const BYTE* const in = (const BYTE* const)src;
1430 if (srcSize < 3) return ERROR(srcSize_wrong);
1433 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1435 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1436 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1438 if (bpPtr->blockType == bt_end) return 0;
1439 if (bpPtr->blockType == bt_rle) return 1;
1444 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1446 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1448 memcpy(dst, src, srcSize);
1454 static size_t ZSTD_decompressLiterals(void* ctx,
1455 void* dst, size_t maxDstSize,
1456 const void* src, size_t srcSize)
1458 BYTE* op = (BYTE*)dst;
1459 BYTE* const oend = op + maxDstSize;
1460 const BYTE* ip = (const BYTE*)src;
1464 /* check : minimum 2, for litSize, +1, for content */
1465 if (srcSize <= 3) return ERROR(corruption_detected);
1467 litSize = ip[1] + (ip[0]<<8);
1468 litSize += ((ip[-3] >> 3) & 7) << 16; /* mmmmh.... */
1469 op = oend - litSize;
1472 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1473 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1474 if (FSE_isError(errorCode)) return ERROR(GENERIC);
1479 static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1480 void* dst, size_t maxDstSize,
1481 const BYTE** litStart, size_t* litSize,
1482 const void* src, size_t srcSize)
1484 const BYTE* const istart = (const BYTE* const)src;
1485 const BYTE* ip = istart;
1486 BYTE* const ostart = (BYTE* const)dst;
1487 BYTE* const oend = ostart + maxDstSize;
1488 blockProperties_t litbp;
1490 size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1491 if (ZSTDv01_isError(litcSize)) return litcSize;
1492 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1493 ip += ZSTD_blockHeaderSize;
1495 switch(litbp.blockType)
1500 *litSize = litcSize;
1504 size_t rleSize = litbp.origSize;
1505 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1506 if (!srcSize) return ERROR(srcSize_wrong);
1508 memset(oend - rleSize, *ip, rleSize);
1510 *litStart = oend - rleSize;
1517 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1518 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1519 *litStart = oend - decodedLitSize;
1520 *litSize = decodedLitSize;
1526 return ERROR(GENERIC);
1533 static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1534 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1535 const void* src, size_t srcSize)
1537 const BYTE* const istart = (const BYTE* const)src;
1538 const BYTE* ip = istart;
1539 const BYTE* const iend = istart + srcSize;
1540 U32 LLtype, Offtype, MLtype;
1541 U32 LLlog, Offlog, MLlog;
1545 if (srcSize < 5) return ERROR(srcSize_wrong);
1548 *nbSeq = ZSTD_readLE16(ip); ip+=2;
1550 Offtype = (*ip >> 4) & 3;
1551 MLtype = (*ip >> 2) & 3;
1554 dumpsLength = ip[2];
1555 dumpsLength += ip[1] << 8;
1560 dumpsLength = ip[1];
1561 dumpsLength += (ip[0] & 1) << 8;
1566 *dumpsLengthPtr = dumpsLength;
1569 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1573 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1581 FSE_buildDTable_rle(DTableLL, *ip++); break;
1584 FSE_buildDTable_raw(DTableLL, LLbits); break;
1587 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1588 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1589 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1591 FSE_buildDTable(DTableLL, norm, max, LLlog);
1598 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1599 FSE_buildDTable_rle(DTableOffb, *ip++); break;
1602 FSE_buildDTable_raw(DTableOffb, Offbits); break;
1605 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1606 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1607 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1609 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1616 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1617 FSE_buildDTable_rle(DTableML, *ip++); break;
1620 FSE_buildDTable_raw(DTableML, MLbits); break;
1623 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1624 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1625 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1627 FSE_buildDTable(DTableML, norm, max, MLlog);
1641 FSE_DStream_t DStream;
1642 FSE_DState_t stateLL;
1643 FSE_DState_t stateOffb;
1644 FSE_DState_t stateML;
1647 const BYTE* dumpsEnd;
1651 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1657 const BYTE* dumps = seqState->dumps;
1658 const BYTE* const de = seqState->dumpsEnd;
1660 /* Literal length */
1661 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1662 prevOffset = litLength ? seq->offset : seqState->prevOffset;
1663 seqState->prevOffset = seq->offset;
1664 if (litLength == MaxLL)
1666 const U32 add = dumps<de ? *dumps++ : 0;
1667 if (add < 255) litLength += add;
1672 litLength = ZSTD_readLE24(dumps);
1680 U32 offsetCode, nbBits;
1681 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1682 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1683 nbBits = offsetCode - 1;
1684 if (offsetCode==0) nbBits = 0; /* cmove */
1685 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1686 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1687 if (offsetCode==0) offset = prevOffset;
1691 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1692 if (matchLength == MaxML)
1694 const U32 add = dumps<de ? *dumps++ : 0;
1695 if (add < 255) matchLength += add;
1700 matchLength = ZSTD_readLE24(dumps);
1705 matchLength += MINMATCH;
1708 seq->litLength = litLength;
1709 seq->offset = offset;
1710 seq->matchLength = matchLength;
1711 seqState->dumps = dumps;
1715 static size_t ZSTD_execSequence(BYTE* op,
1717 const BYTE** litPtr, const BYTE* const litLimit,
1718 BYTE* const base, BYTE* const oend)
1720 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1721 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
1722 const BYTE* const ostart = op;
1723 BYTE* const oLitEnd = op + sequence.litLength;
1724 const size_t litLength = sequence.litLength;
1725 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1726 const BYTE* const litEnd = *litPtr + litLength;
1729 size_t const seqLength = sequence.litLength + sequence.matchLength;
1731 if (seqLength > (size_t)(oend - op)) return ERROR(dstSize_tooSmall);
1732 if (sequence.litLength > (size_t)(litLimit - *litPtr)) return ERROR(corruption_detected);
1733 /* Now we know there are no overflow in literal nor match lengths, can use pointer checks */
1734 if (sequence.offset > (U32)(oLitEnd - base)) return ERROR(corruption_detected);
1736 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
1737 if (litEnd > litLimit) return ERROR(corruption_detected); /* overRead beyond lit buffer */
1738 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
1741 ZSTD_memmove(op, *litPtr, sequence.litLength); /* note : v0.1 seems to allow scenarios where output or input are close to end of buffer */
1744 *litPtr = litEnd; /* update for next sequence */
1746 /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1747 if (oend-op < 8) return ERROR(dstSize_tooSmall);
1751 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1752 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1757 if (match < base) return ERROR(corruption_detected);
1758 if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1760 /* save beginning of literal sequence, in case of write overlap */
1763 if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1764 memcpy(saved, endMatch, qutt);
1767 if (sequence.offset < 8)
1769 const int dec64 = dec64table[sequence.offset];
1774 match += dec32table[sequence.offset];
1775 ZSTD_copy4(op+4, match);
1777 } else { ZSTD_copy8(op, match); }
1778 op += 8; match += 8;
1780 if (endMatch > oend-(16-MINMATCH))
1784 ZSTD_wildcopy(op, match, (oend-8) - op);
1785 match += (oend-8) - op;
1788 while (op<endMatch) *op++ = *match++;
1791 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1793 /* restore, in case of overlap */
1794 if (overlapRisk) memcpy(endMatch, saved, qutt);
1797 return endMatch-ostart;
1800 typedef struct ZSTDv01_Dctx_s
1802 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1803 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1804 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1805 void* previousDstEnd;
1813 static size_t ZSTD_decompressSequences(
1815 void* dst, size_t maxDstSize,
1816 const void* seqStart, size_t seqSize,
1817 const BYTE* litStart, size_t litSize)
1819 dctx_t* dctx = (dctx_t*)ctx;
1820 const BYTE* ip = (const BYTE*)seqStart;
1821 const BYTE* const iend = ip + seqSize;
1822 BYTE* const ostart = (BYTE* const)dst;
1824 BYTE* const oend = ostart + maxDstSize;
1825 size_t errorCode, dumpsLength;
1826 const BYTE* litPtr = litStart;
1827 const BYTE* const litEnd = litStart + litSize;
1830 U32* DTableLL = dctx->LLTable;
1831 U32* DTableML = dctx->MLTable;
1832 U32* DTableOffb = dctx->OffTable;
1833 BYTE* const base = (BYTE*) (dctx->base);
1835 /* Build Decoding Tables */
1836 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1837 DTableLL, DTableML, DTableOffb,
1839 if (ZSTDv01_isError(errorCode)) return errorCode;
1842 /* Regen sequences */
1845 seqState_t seqState;
1847 memset(&sequence, 0, sizeof(sequence));
1848 seqState.dumps = dumps;
1849 seqState.dumpsEnd = dumps + dumpsLength;
1850 seqState.prevOffset = 1;
1851 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1852 if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1853 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1854 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1855 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1857 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1861 ZSTD_decodeSequence(&sequence, &seqState);
1862 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1863 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1867 /* check if reached exact end */
1868 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */
1869 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
1871 /* last literal segment */
1873 size_t lastLLSize = litEnd - litPtr;
1874 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1875 if (lastLLSize > 0) {
1876 if (op != litPtr) memmove(op, litPtr, lastLLSize);
1886 static size_t ZSTD_decompressBlock(
1888 void* dst, size_t maxDstSize,
1889 const void* src, size_t srcSize)
1891 /* blockType == blockCompressed, srcSize is trusted */
1892 const BYTE* ip = (const BYTE*)src;
1893 const BYTE* litPtr = NULL;
1897 /* Decode literals sub-block */
1898 errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1899 if (ZSTDv01_isError(errorCode)) return errorCode;
1901 srcSize -= errorCode;
1903 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1907 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1909 const BYTE* ip = (const BYTE*)src;
1910 const BYTE* iend = ip + srcSize;
1911 BYTE* const ostart = (BYTE* const)dst;
1913 BYTE* const oend = ostart + maxDstSize;
1914 size_t remainingSize = srcSize;
1917 blockProperties_t blockProperties;
1920 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1921 magicNumber = ZSTD_readBE32(src);
1922 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1923 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1925 /* Loop on each block */
1928 size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1929 if (ZSTDv01_isError(blockSize)) return blockSize;
1931 ip += ZSTD_blockHeaderSize;
1932 remainingSize -= ZSTD_blockHeaderSize;
1933 if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1935 switch(blockProperties.blockType)
1938 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1941 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1944 return ERROR(GENERIC); /* not yet supported */
1948 if (remainingSize) return ERROR(srcSize_wrong);
1951 return ERROR(GENERIC);
1953 if (blockSize == 0) break; /* bt_end */
1955 if (ZSTDv01_isError(errorCode)) return errorCode;
1958 remainingSize -= blockSize;
1964 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1968 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1971 /* ZSTD_errorFrameSizeInfoLegacy() :
1972 assumes `cSize` and `dBound` are _not_ NULL */
1973 static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
1976 *dBound = ZSTD_CONTENTSIZE_ERROR;
1979 void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
1981 const BYTE* ip = (const BYTE*)src;
1982 size_t remainingSize = srcSize;
1983 size_t nbBlocks = 0;
1985 blockProperties_t blockProperties;
1988 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
1989 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
1992 magicNumber = ZSTD_readBE32(src);
1993 if (magicNumber != ZSTD_magicNumber) {
1994 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
1997 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1999 /* Loop on each block */
2002 size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2003 if (ZSTDv01_isError(blockSize)) {
2004 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2008 ip += ZSTD_blockHeaderSize;
2009 remainingSize -= ZSTD_blockHeaderSize;
2010 if (blockSize > remainingSize) {
2011 ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2015 if (blockSize == 0) break; /* bt_end */
2018 remainingSize -= blockSize;
2022 *cSize = ip - (const BYTE*)src;
2023 *dBound = nbBlocks * BLOCKSIZE;
2026 /*******************************
2027 * Streaming Decompression API
2028 *******************************/
2030 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2032 dctx->expected = ZSTD_frameHeaderSize;
2034 dctx->previousDstEnd = NULL;
2039 ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2041 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2042 if (dctx==NULL) return NULL;
2043 ZSTDv01_resetDCtx(dctx);
2047 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2053 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2055 return ((dctx_t*)dctx)->expected;
2058 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2060 dctx_t* ctx = (dctx_t*)dctx;
2063 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2064 if (dst != ctx->previousDstEnd) /* not contiguous */
2067 /* Decompress : frame header */
2068 if (ctx->phase == 0)
2070 /* Check frame magic header */
2071 U32 magicNumber = ZSTD_readBE32(src);
2072 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2074 ctx->expected = ZSTD_blockHeaderSize;
2078 /* Decompress : block header */
2079 if (ctx->phase == 1)
2081 blockProperties_t bp;
2082 size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2083 if (ZSTDv01_isError(blockSize)) return blockSize;
2084 if (bp.blockType == bt_end)
2091 ctx->expected = blockSize;
2092 ctx->bType = bp.blockType;
2099 /* Decompress : block content */
2105 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2108 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2111 return ERROR(GENERIC); /* not yet handled */
2113 case bt_end : /* should never happen (filtered at phase 1) */
2117 return ERROR(GENERIC);
2120 ctx->expected = ZSTD_blockHeaderSize;
2121 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);