X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flibchdr%2Fdeps%2Fzstd-1.5.5%2Ftests%2Ffuzz%2Fsequence_compression_api.c;fp=deps%2Flibchdr%2Fdeps%2Fzstd-1.5.5%2Ftests%2Ffuzz%2Fsequence_compression_api.c;h=ede7080e3a25acad0a83db21f85718bb41e9b567;hb=648db22b0750712da893c306efcc8e4b2d3a4e3c;hp=0000000000000000000000000000000000000000;hpb=e2fb1389dc12376acb84e4993ed3b08760257252;p=pcsx_rearmed.git diff --git a/deps/libchdr/deps/zstd-1.5.5/tests/fuzz/sequence_compression_api.c b/deps/libchdr/deps/zstd-1.5.5/tests/fuzz/sequence_compression_api.c new file mode 100644 index 00000000..ede7080e --- /dev/null +++ b/deps/libchdr/deps/zstd-1.5.5/tests/fuzz/sequence_compression_api.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This fuzz target performs a zstd round-trip test by generating an arbitrary + * array of sequences, generating the associated source buffer, calling + * ZSTD_compressSequences(), and then decompresses and compares the result with + * the original generated source buffer. + */ + +#define ZSTD_STATIC_LINKING_ONLY + +#include +#include +#include +#include +#include +#include "fuzz_helpers.h" +#include "zstd_helpers.h" +#include "fuzz_data_producer.h" +#include "fuzz_third_party_seq_prod.h" + +static ZSTD_CCtx* cctx = NULL; +static ZSTD_DCtx* dctx = NULL; +static void* literalsBuffer = NULL; +static void* generatedSrc = NULL; +static ZSTD_Sequence* generatedSequences = NULL; + +static void* dictBuffer = NULL; +static ZSTD_CDict* cdict = NULL; +static ZSTD_DDict* ddict = NULL; + +#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */ +#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */ +#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ +#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */ +#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */ + +/* Deterministic random number generator */ +#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static uint32_t FUZZ_RDG_rand(uint32_t* src) +{ + static const uint32_t prime1 = 2654435761U; + static const uint32_t prime2 = 2246822519U; + uint32_t rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = FUZZ_RDG_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +/* Make a pseudorandom string - this simple function exists to avoid + * taking a dependency on datagen.h to have RDG_genBuffer(). + */ +static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) { + const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; + uint32_t seed = FUZZ_dataProducer_uint32(producer); + if (size) { + for (size_t n = 0; n < size; n++) { + int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1); + str[n] = charset[key]; + } + } + return str; +} + +/* Returns size of source buffer */ +static size_t decodeSequences(void* dst, size_t nbSequences, + size_t literalsSize, + const void* dict, size_t dictSize, + ZSTD_sequenceFormat_e mode) +{ + const uint8_t* litPtr = literalsBuffer; + const uint8_t* const litBegin = literalsBuffer; + const uint8_t* const litEnd = litBegin + literalsSize; + const uint8_t* dictPtr = dict; + uint8_t* op = dst; + const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; + size_t generatedSrcBufferSize = 0; + size_t bytesWritten = 0; + + for (size_t i = 0; i < nbSequences; ++i) { + /* block boundary */ + if (generatedSequences[i].offset == 0) + FUZZ_ASSERT(generatedSequences[i].matchLength == 0); + + if (litPtr + generatedSequences[i].litLength > litEnd) { + litPtr = litBegin; + } + memcpy(op, litPtr, generatedSequences[i].litLength); + bytesWritten += generatedSequences[i].litLength; + op += generatedSequences[i].litLength; + litPtr += generatedSequences[i].litLength; + + /* Copy over the match */ + { size_t matchLength = generatedSequences[i].matchLength; + size_t j = 0; + size_t k = 0; + if (dictSize != 0) { + if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */ + size_t dictOffset = generatedSequences[i].offset - bytesWritten; + size_t matchInDict = MIN(matchLength, dictOffset); + for (; k < matchInDict; ++k) { + op[k] = dictPtr[dictSize - dictOffset + k]; + } + matchLength -= matchInDict; + op += matchInDict; + } + } + for (; j < matchLength; ++j) { + op[j] = op[j - generatedSequences[i].offset]; + } + op += j; + FUZZ_ASSERT(generatedSequences[i].matchLength == j + k); + bytesWritten += generatedSequences[i].matchLength; + } + } + generatedSrcBufferSize = bytesWritten; + FUZZ_ASSERT(litPtr <= litEnd); + if (mode == ZSTD_sf_noBlockDelimiters) { + const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr); + if (lastLLSize <= oend - op) { + memcpy(op, litPtr, lastLLSize); + generatedSrcBufferSize += lastLLSize; + } } + return generatedSrcBufferSize; +} + +/* Returns nb sequences generated + * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode. + * However, it can fail with ZSTD_sf_explicitBlockDelimiters, + * due to potential lack of space in + */ +static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, + size_t literalsSizeLimit, size_t dictSize, + size_t windowLog, ZSTD_sequenceFormat_e mode) +{ + const uint32_t repCode = 0; /* not used by sequence ingestion api */ + size_t windowSize = 1ULL << windowLog; + size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; + uint32_t bytesGenerated = 0; + uint32_t nbSeqGenerated = 0; + uint32_t isFirstSequence = 1; + uint32_t blockSize = 0; + + if (mode == ZSTD_sf_explicitBlockDelimiters) { + /* ensure that no sequence can be larger than one block */ + literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2); + matchLengthMax = MIN(matchLengthMax, blockSizeMax/2); + } + + while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */ + && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE + && !FUZZ_dataProducer_empty(producer)) { + uint32_t matchLength; + uint32_t matchBound = matchLengthMax; + uint32_t offset; + uint32_t offsetBound; + const uint32_t minLitLength = (isFirstSequence && (dictSize == 0)); + const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit); + bytesGenerated += litLength; + if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { + break; + } + offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize; + offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); + if (dictSize > 0 && bytesGenerated <= windowSize) { + /* Prevent match length from being such that it would be associated with an offset too large + * from the decoder's perspective. If not possible (match would be too small), + * then reduce the offset if necessary. + */ + const size_t bytesToReachWindowSize = windowSize - bytesGenerated; + if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) { + const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound; + offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound); + } else { + matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize); + } + } + matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound); + bytesGenerated += matchLength; + if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { + break; + } + { ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; + const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength); + #define SPLITPROB 6000 + #define SPLITMARK 5234 + const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + const size_t seqSize = seq.litLength + seq.matchLength; + if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */ + const ZSTD_Sequence endBlock = {0, 0, 0, 0}; + generatedSequences[nbSeqGenerated++] = endBlock; + blockSize = seqSize; + } + if (split) { + const ZSTD_Sequence endBlock = {0, lastLits, 0, 0}; + generatedSequences[nbSeqGenerated++] = endBlock; + assert(lastLits <= seq.litLength); + seq.litLength -= lastLits; + blockSize = seqSize - lastLits; + } else { + blockSize += seqSize; + } + } + generatedSequences[nbSeqGenerated++] = seq; + isFirstSequence = 0; + } + } + + if (mode == ZSTD_sf_explicitBlockDelimiters) { + /* always end sequences with a block delimiter */ + const ZSTD_Sequence endBlock = {0, 0, 0, 0}; + assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ); + generatedSequences[nbSeqGenerated++] = endBlock; + } + return nbSeqGenerated; +} + +static size_t roundTripTest(void* result, size_t resultCapacity, + void* compressed, size_t compressedCapacity, + const void* src, size_t srcSize, + const ZSTD_Sequence* seqs, size_t seqSize, + unsigned hasDict, + ZSTD_sequenceFormat_e mode) +{ + size_t cSize; + size_t dSize; + + if (hasDict) { + FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict)); + FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict)); + } + + cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, + seqs, seqSize, + src, srcSize); + if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall) + && (mode == ZSTD_sf_explicitBlockDelimiters) ) { + /* Valid scenario : in explicit delimiter mode, + * it might be possible for the compressed size to outgrow dstCapacity. + * In which case, it's still a valid fuzzer scenario, + * but no roundtrip shall be possible */ + return 0; + } + /* round-trip */ + FUZZ_ZASSERT(cSize); + dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); + FUZZ_ZASSERT(dSize); + FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!"); + return dSize; +} + +int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) +{ + FUZZ_SEQ_PROD_SETUP(); + + void* rBuf; + size_t rBufSize; + void* cBuf; + size_t cBufSize; + size_t generatedSrcSize; + size_t nbSequences; + size_t dictSize = 0; + unsigned hasDict; + unsigned wLog; + int cLevel; + ZSTD_sequenceFormat_e mode; + + FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size); + FUZZ_ASSERT(producer); + + if (!cctx) { + cctx = ZSTD_createCCtx(); + FUZZ_ASSERT(cctx); + } + if (!dctx) { + dctx = ZSTD_createDCtx(); + FUZZ_ASSERT(dctx); + } + + /* Generate window log first so we don't generate offsets too large */ + wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22); + mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1); + + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach); + + if (!literalsBuffer) { + literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE); + FUZZ_ASSERT(literalsBuffer); + literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer); + } + + if (!dictBuffer) { /* Generate global dictionary buffer */ + ZSTD_compressionParameters cParams; + + /* Generate a large dictionary buffer */ + dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1); + FUZZ_ASSERT(dictBuffer); + + /* Create global cdict and ddict */ + cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE); + cParams.minMatch = ZSTD_MINMATCH_MIN; + cParams.hashLog = ZSTD_HASHLOG_MIN; + cParams.chainLog = ZSTD_CHAINLOG_MIN; + + cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem); + ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem); + FUZZ_ASSERT(cdict); + FUZZ_ASSERT(ddict); + } + + FUZZ_ASSERT(cdict); + FUZZ_ASSERT(ddict); + + hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); + if (hasDict) { + dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE; + } + + if (!generatedSequences) { + generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); + } + if (!generatedSrc) { + generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); + } + + nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode); + generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode); + + /* Note : in explicit block delimiters mode, + * the fuzzer might generate a lot of small blocks. + * In which case, the final compressed size might be > ZSTD_compressBound(). + * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity. + * The test just doesn't roundtrip. */ + cBufSize = ZSTD_compressBound(generatedSrcSize); + cBuf = FUZZ_malloc(cBufSize); + + rBufSize = generatedSrcSize; + rBuf = FUZZ_malloc(rBufSize); + + { const size_t result = roundTripTest(rBuf, rBufSize, + cBuf, cBufSize, + generatedSrc, generatedSrcSize, + generatedSequences, nbSequences, + hasDict, mode); + FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */ + } + + free(rBuf); + free(cBuf); + FUZZ_dataProducer_free(producer); +#ifndef STATEFUL_FUZZING + ZSTD_freeCCtx(cctx); cctx = NULL; + ZSTD_freeDCtx(dctx); dctx = NULL; + free(generatedSequences); generatedSequences = NULL; + free(generatedSrc); generatedSrc = NULL; + free(literalsBuffer); literalsBuffer = NULL; +#endif + FUZZ_SEQ_PROD_TEARDOWN(); + return 0; +}