2 * Copyright (c) Meta Platforms, Inc. and affiliates.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /*-*************************************
13 ***************************************/
14 #include "zstd_compress_literals.h"
17 /* **************************************************************
19 ****************************************************************/
22 static size_t showHexa(const void* src, size_t srcSize)
24 const BYTE* const ip = (const BYTE*)src;
26 for (u=0; u<srcSize; u++) {
27 RAWLOG(5, " %02X", ip[u]); (void)ip;
36 /* **************************************************************
37 * Literals compression - special cases
38 ****************************************************************/
39 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
41 BYTE* const ostart = (BYTE*)dst;
42 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
44 DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
46 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
50 case 1: /* 2 - 1 - 5 */
51 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
53 case 2: /* 2 - 2 - 12 */
54 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
56 case 3: /* 2 - 2 - 20 */
57 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
59 default: /* not necessary : flSize is {1,2,3} */
63 ZSTD_memcpy(ostart + flSize, src, srcSize);
64 DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
65 return srcSize + flSize;
68 static int allBytesIdentical(const void* src, size_t srcSize)
72 { const BYTE b = ((const BYTE*)src)[0];
74 for (p=1; p<srcSize; p++) {
75 if (((const BYTE*)src)[p] != b) return 0;
81 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
83 BYTE* const ostart = (BYTE*)dst;
84 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
86 assert(dstCapacity >= 4); (void)dstCapacity;
87 assert(allBytesIdentical(src, srcSize));
91 case 1: /* 2 - 1 - 5 */
92 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
94 case 2: /* 2 - 2 - 12 */
95 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
97 case 3: /* 2 - 2 - 20 */
98 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
100 default: /* not necessary : flSize is {1,2,3} */
104 ostart[flSize] = *(const BYTE*)src;
105 DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
109 /* ZSTD_minLiteralsToCompress() :
110 * returns minimal amount of literals
111 * for literal compression to even be attempted.
112 * Minimum is made tighter as compression strategy increases.
115 ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
117 assert((int)strategy >= 0);
118 assert((int)strategy <= 9);
119 /* btultra2 : min 8 bytes;
120 * then 2x larger for each successive compression strategy
121 * max threshold 64 bytes */
122 { int const shift = MIN(9-(int)strategy, 3);
123 size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124 DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
129 size_t ZSTD_compressLiterals (
130 void* dst, size_t dstCapacity,
131 const void* src, size_t srcSize,
132 void* entropyWorkspace, size_t entropyWorkspaceSize,
133 const ZSTD_hufCTables_t* prevHuf,
134 ZSTD_hufCTables_t* nextHuf,
135 ZSTD_strategy strategy,
136 int disableLiteralCompression,
137 int suspectUncompressible,
140 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
141 BYTE* const ostart = (BYTE*)dst;
142 U32 singleStream = srcSize < 256;
143 symbolEncodingType_e hType = set_compressed;
146 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147 disableLiteralCompression, (U32)srcSize, dstCapacity);
149 DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
151 /* Prepare nextEntropy assuming reusing the existing table */
152 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
154 if (disableLiteralCompression)
155 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
157 /* if too small, don't even attempt compression (speed opt) */
158 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
161 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
162 { HUF_repeat repeat = prevHuf->repeatMode;
164 | (bmi2 ? HUF_flags_bmi2 : 0)
165 | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
166 | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
167 | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
169 typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170 huf_compress_f huf_compress;
171 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
172 huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173 cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
175 HUF_SYMBOLVALUE_MAX, LitHufLog,
176 entropyWorkspace, entropyWorkspaceSize,
177 (HUF_CElt*)nextHuf->CTable,
179 DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
180 if (repeat != HUF_repeat_none) {
181 /* reused the existing table */
182 DEBUGLOG(5, "reusing statistics from previous huffman block");
187 { size_t const minGain = ZSTD_minGain(srcSize, strategy);
188 if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
193 /* A return value of 1 signals that the alphabet consists of a single symbol.
194 * However, in some rare circumstances, it could be the compressed size (a single byte).
195 * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196 * (it's also necessary to not generate statistics).
197 * Therefore, in such a case, actively check that all bytes are identical. */
198 if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
203 if (hType == set_compressed) {
204 /* using a newly constructed table */
205 nextHuf->repeatMode = HUF_repeat_check;
211 case 3: /* 2 - 2 - 10 - 10 */
212 if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213 { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
214 MEM_writeLE24(ostart, lhc);
217 case 4: /* 2 - 2 - 14 - 14 */
218 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
219 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
220 MEM_writeLE32(ostart, lhc);
223 case 5: /* 2 - 2 - 18 - 18 */
224 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
225 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
226 MEM_writeLE32(ostart, lhc);
227 ostart[4] = (BYTE)(cLitSize >> 10);
230 default: /* not possible : lhSize is {3,4,5} */
233 DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
234 return lhSize+cLitSize;