git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / tests / fuzz / sequence_compression_api.c
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11/**
12 * This fuzz target performs a zstd round-trip test by generating an arbitrary
13 * array of sequences, generating the associated source buffer, calling
14 * ZSTD_compressSequences(), and then decompresses and compares the result with
15 * the original generated source buffer.
16 */
17
18#define ZSTD_STATIC_LINKING_ONLY
19
20#include <stddef.h>
21#include <stdlib.h>
22#include <stdio.h>
23#include <string.h>
24#include <time.h>
25#include "fuzz_helpers.h"
26#include "zstd_helpers.h"
27#include "fuzz_data_producer.h"
28#include "fuzz_third_party_seq_prod.h"
29
30static ZSTD_CCtx* cctx = NULL;
31static ZSTD_DCtx* dctx = NULL;
32static void* literalsBuffer = NULL;
33static void* generatedSrc = NULL;
34static ZSTD_Sequence* generatedSequences = NULL;
35
36static void* dictBuffer = NULL;
37static ZSTD_CDict* cdict = NULL;
38static ZSTD_DDict* ddict = NULL;
39
40#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
41#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
42#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
43#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
44#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
45
46/* Deterministic random number generator */
47#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
48static uint32_t FUZZ_RDG_rand(uint32_t* src)
49{
50 static const uint32_t prime1 = 2654435761U;
51 static const uint32_t prime2 = 2246822519U;
52 uint32_t rand32 = *src;
53 rand32 *= prime1;
54 rand32 ^= prime2;
55 rand32 = FUZZ_RDG_rotl32(rand32, 13);
56 *src = rand32;
57 return rand32 >> 5;
58}
59
60/* Make a pseudorandom string - this simple function exists to avoid
61 * taking a dependency on datagen.h to have RDG_genBuffer().
62 */
63static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
64 const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
65 uint32_t seed = FUZZ_dataProducer_uint32(producer);
66 if (size) {
67 for (size_t n = 0; n < size; n++) {
68 int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
69 str[n] = charset[key];
70 }
71 }
72 return str;
73}
74
75/* Returns size of source buffer */
76static size_t decodeSequences(void* dst, size_t nbSequences,
77 size_t literalsSize,
78 const void* dict, size_t dictSize,
79 ZSTD_sequenceFormat_e mode)
80{
81 const uint8_t* litPtr = literalsBuffer;
82 const uint8_t* const litBegin = literalsBuffer;
83 const uint8_t* const litEnd = litBegin + literalsSize;
84 const uint8_t* dictPtr = dict;
85 uint8_t* op = dst;
86 const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
87 size_t generatedSrcBufferSize = 0;
88 size_t bytesWritten = 0;
89
90 for (size_t i = 0; i < nbSequences; ++i) {
91 /* block boundary */
92 if (generatedSequences[i].offset == 0)
93 FUZZ_ASSERT(generatedSequences[i].matchLength == 0);
94
95 if (litPtr + generatedSequences[i].litLength > litEnd) {
96 litPtr = litBegin;
97 }
98 memcpy(op, litPtr, generatedSequences[i].litLength);
99 bytesWritten += generatedSequences[i].litLength;
100 op += generatedSequences[i].litLength;
101 litPtr += generatedSequences[i].litLength;
102
103 /* Copy over the match */
104 { size_t matchLength = generatedSequences[i].matchLength;
105 size_t j = 0;
106 size_t k = 0;
107 if (dictSize != 0) {
108 if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
109 size_t dictOffset = generatedSequences[i].offset - bytesWritten;
110 size_t matchInDict = MIN(matchLength, dictOffset);
111 for (; k < matchInDict; ++k) {
112 op[k] = dictPtr[dictSize - dictOffset + k];
113 }
114 matchLength -= matchInDict;
115 op += matchInDict;
116 }
117 }
118 for (; j < matchLength; ++j) {
119 op[j] = op[j - generatedSequences[i].offset];
120 }
121 op += j;
122 FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
123 bytesWritten += generatedSequences[i].matchLength;
124 }
125 }
126 generatedSrcBufferSize = bytesWritten;
127 FUZZ_ASSERT(litPtr <= litEnd);
128 if (mode == ZSTD_sf_noBlockDelimiters) {
129 const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
130 if (lastLLSize <= oend - op) {
131 memcpy(op, litPtr, lastLLSize);
132 generatedSrcBufferSize += lastLLSize;
133 } }
134 return generatedSrcBufferSize;
135}
136
137/* Returns nb sequences generated
138 * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
139 * However, it can fail with ZSTD_sf_explicitBlockDelimiters,
140 * due to potential lack of space in
141 */
142static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
143 size_t literalsSizeLimit, size_t dictSize,
144 size_t windowLog, ZSTD_sequenceFormat_e mode)
145{
146 const uint32_t repCode = 0; /* not used by sequence ingestion api */
147 size_t windowSize = 1ULL << windowLog;
148 size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
149 uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
150 uint32_t bytesGenerated = 0;
151 uint32_t nbSeqGenerated = 0;
152 uint32_t isFirstSequence = 1;
153 uint32_t blockSize = 0;
154
155 if (mode == ZSTD_sf_explicitBlockDelimiters) {
156 /* ensure that no sequence can be larger than one block */
157 literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
158 matchLengthMax = MIN(matchLengthMax, blockSizeMax/2);
159 }
160
161 while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
162 && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
163 && !FUZZ_dataProducer_empty(producer)) {
164 uint32_t matchLength;
165 uint32_t matchBound = matchLengthMax;
166 uint32_t offset;
167 uint32_t offsetBound;
168 const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
169 const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
170 bytesGenerated += litLength;
171 if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
172 break;
173 }
174 offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize;
175 offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
176 if (dictSize > 0 && bytesGenerated <= windowSize) {
177 /* Prevent match length from being such that it would be associated with an offset too large
178 * from the decoder's perspective. If not possible (match would be too small),
179 * then reduce the offset if necessary.
180 */
181 const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
182 if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
183 const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
184 offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
185 } else {
186 matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
187 }
188 }
189 matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
190 bytesGenerated += matchLength;
191 if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
192 break;
193 }
194 { ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
195 const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
196 #define SPLITPROB 6000
197 #define SPLITMARK 5234
198 const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
199 if (mode == ZSTD_sf_explicitBlockDelimiters) {
200 const size_t seqSize = seq.litLength + seq.matchLength;
201 if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */
202 const ZSTD_Sequence endBlock = {0, 0, 0, 0};
203 generatedSequences[nbSeqGenerated++] = endBlock;
204 blockSize = seqSize;
205 }
206 if (split) {
207 const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
208 generatedSequences[nbSeqGenerated++] = endBlock;
209 assert(lastLits <= seq.litLength);
210 seq.litLength -= lastLits;
211 blockSize = seqSize - lastLits;
212 } else {
213 blockSize += seqSize;
214 }
215 }
216 generatedSequences[nbSeqGenerated++] = seq;
217 isFirstSequence = 0;
218 }
219 }
220
221 if (mode == ZSTD_sf_explicitBlockDelimiters) {
222 /* always end sequences with a block delimiter */
223 const ZSTD_Sequence endBlock = {0, 0, 0, 0};
224 assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
225 generatedSequences[nbSeqGenerated++] = endBlock;
226 }
227 return nbSeqGenerated;
228}
229
230static size_t roundTripTest(void* result, size_t resultCapacity,
231 void* compressed, size_t compressedCapacity,
232 const void* src, size_t srcSize,
233 const ZSTD_Sequence* seqs, size_t seqSize,
234 unsigned hasDict,
235 ZSTD_sequenceFormat_e mode)
236{
237 size_t cSize;
238 size_t dSize;
239
240 if (hasDict) {
241 FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
242 FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
243 }
244
245 cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
246 seqs, seqSize,
247 src, srcSize);
248 if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
249 && (mode == ZSTD_sf_explicitBlockDelimiters) ) {
250 /* Valid scenario : in explicit delimiter mode,
251 * it might be possible for the compressed size to outgrow dstCapacity.
252 * In which case, it's still a valid fuzzer scenario,
253 * but no roundtrip shall be possible */
254 return 0;
255 }
256 /* round-trip */
257 FUZZ_ZASSERT(cSize);
258 dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
259 FUZZ_ZASSERT(dSize);
260 FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
261 FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
262 return dSize;
263}
264
265int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
266{
267 FUZZ_SEQ_PROD_SETUP();
268
269 void* rBuf;
270 size_t rBufSize;
271 void* cBuf;
272 size_t cBufSize;
273 size_t generatedSrcSize;
274 size_t nbSequences;
275 size_t dictSize = 0;
276 unsigned hasDict;
277 unsigned wLog;
278 int cLevel;
279 ZSTD_sequenceFormat_e mode;
280
281 FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
282 FUZZ_ASSERT(producer);
283
284 if (!cctx) {
285 cctx = ZSTD_createCCtx();
286 FUZZ_ASSERT(cctx);
287 }
288 if (!dctx) {
289 dctx = ZSTD_createDCtx();
290 FUZZ_ASSERT(dctx);
291 }
292
293 /* Generate window log first so we don't generate offsets too large */
294 wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
295 cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
296 mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
297
298 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
299 ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
300 ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
301 ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
302 ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
303 ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
304 ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
305 ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);
306
307 if (!literalsBuffer) {
308 literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
309 FUZZ_ASSERT(literalsBuffer);
310 literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
311 }
312
313 if (!dictBuffer) { /* Generate global dictionary buffer */
314 ZSTD_compressionParameters cParams;
315
316 /* Generate a large dictionary buffer */
317 dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
318 FUZZ_ASSERT(dictBuffer);
319
320 /* Create global cdict and ddict */
321 cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
322 cParams.minMatch = ZSTD_MINMATCH_MIN;
323 cParams.hashLog = ZSTD_HASHLOG_MIN;
324 cParams.chainLog = ZSTD_CHAINLOG_MIN;
325
326 cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
327 ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
328 FUZZ_ASSERT(cdict);
329 FUZZ_ASSERT(ddict);
330 }
331
332 FUZZ_ASSERT(cdict);
333 FUZZ_ASSERT(ddict);
334
335 hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
336 if (hasDict) {
337 dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
338 }
339
340 if (!generatedSequences) {
341 generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
342 }
343 if (!generatedSrc) {
344 generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
345 }
346
347 nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
348 generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
349
350 /* Note : in explicit block delimiters mode,
351 * the fuzzer might generate a lot of small blocks.
352 * In which case, the final compressed size might be > ZSTD_compressBound().
353 * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
354 * The test just doesn't roundtrip. */
355 cBufSize = ZSTD_compressBound(generatedSrcSize);
356 cBuf = FUZZ_malloc(cBufSize);
357
358 rBufSize = generatedSrcSize;
359 rBuf = FUZZ_malloc(rBufSize);
360
361 { const size_t result = roundTripTest(rBuf, rBufSize,
362 cBuf, cBufSize,
363 generatedSrc, generatedSrcSize,
364 generatedSequences, nbSequences,
365 hasDict, mode);
366 FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */
367 }
368
369 free(rBuf);
370 free(cBuf);
371 FUZZ_dataProducer_free(producer);
372#ifndef STATEFUL_FUZZING
373 ZSTD_freeCCtx(cctx); cctx = NULL;
374 ZSTD_freeDCtx(dctx); dctx = NULL;
375 free(generatedSequences); generatedSequences = NULL;
376 free(generatedSrc); generatedSrc = NULL;
377 free(literalsBuffer); literalsBuffer = NULL;
378#endif
379 FUZZ_SEQ_PROD_TEARDOWN();
380 return 0;
381}