[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / tests / fuzz / sequence_compression_api.c

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/**
 * This fuzz target performs a zstd round-trip test by generating an arbitrary
 * array of sequences, generating the associated source buffer, calling
 * ZSTD_compressSequences(), and then decompresses and compares the result with
 * the original generated source buffer.
 */

#define ZSTD_STATIC_LINKING_ONLY

#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "fuzz_helpers.h"
#include "zstd_helpers.h"
#include "fuzz_data_producer.h"
#include "fuzz_third_party_seq_prod.h"

static ZSTD_CCtx* cctx = NULL;
static ZSTD_DCtx* dctx = NULL;
static void* literalsBuffer = NULL;
static void* generatedSrc = NULL;
static ZSTD_Sequence* generatedSequences = NULL;

static void* dictBuffer = NULL;
static ZSTD_CDict* cdict = NULL;
static ZSTD_DDict* ddict = NULL;

#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */

/* Deterministic random number generator */
#define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static uint32_t FUZZ_RDG_rand(uint32_t* src)
{
    static const uint32_t prime1 = 2654435761U;
    static const uint32_t prime2 = 2246822519U;
    uint32_t rand32 = *src;
    rand32 *= prime1;
    rand32 ^= prime2;
    rand32  = FUZZ_RDG_rotl32(rand32, 13);
    *src = rand32;
    return rand32 >> 5;
}

/* Make a pseudorandom string - this simple function exists to avoid
 * taking a dependency on datagen.h to have RDG_genBuffer().
 */
static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
    const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
    uint32_t seed = FUZZ_dataProducer_uint32(producer);
    if (size) {
        for (size_t n = 0; n < size; n++) {
            int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
            str[n] = charset[key];
        }
    }
    return str;
}

/* Returns size of source buffer */
static size_t decodeSequences(void* dst, size_t nbSequences,
                              size_t literalsSize,
                              const void* dict, size_t dictSize,
                              ZSTD_sequenceFormat_e mode)
{
    const uint8_t* litPtr = literalsBuffer;
    const uint8_t* const litBegin = literalsBuffer;
    const uint8_t* const litEnd = litBegin + literalsSize;
    const uint8_t* dictPtr = dict;
    uint8_t* op = dst;
    const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
    size_t generatedSrcBufferSize = 0;
    size_t bytesWritten = 0;

    for (size_t i = 0; i < nbSequences; ++i) {
        /* block boundary */
        if (generatedSequences[i].offset == 0)
            FUZZ_ASSERT(generatedSequences[i].matchLength == 0);

        if (litPtr + generatedSequences[i].litLength > litEnd) {
            litPtr = litBegin;
        }
        memcpy(op, litPtr, generatedSequences[i].litLength);
        bytesWritten += generatedSequences[i].litLength;
        op += generatedSequences[i].litLength;
        litPtr += generatedSequences[i].litLength;

        /* Copy over the match */
        {   size_t matchLength = generatedSequences[i].matchLength;
            size_t j = 0;
            size_t k = 0;
            if (dictSize != 0) {
                if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
                    size_t dictOffset = generatedSequences[i].offset - bytesWritten;
                    size_t matchInDict = MIN(matchLength, dictOffset);
                    for (; k < matchInDict; ++k) {
                        op[k] = dictPtr[dictSize - dictOffset + k];
                    }
                    matchLength -= matchInDict;
                    op += matchInDict;
                }
            }
            for (; j < matchLength; ++j) {
                op[j] = op[j - generatedSequences[i].offset];
            }
            op += j;
            FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
            bytesWritten += generatedSequences[i].matchLength;
        }
    }
    generatedSrcBufferSize = bytesWritten;
    FUZZ_ASSERT(litPtr <= litEnd);
    if (mode == ZSTD_sf_noBlockDelimiters) {
        const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
        if (lastLLSize <= oend - op) {
            memcpy(op, litPtr, lastLLSize);
            generatedSrcBufferSize += lastLLSize;
    }   }
    return generatedSrcBufferSize;
}

/* Returns nb sequences generated
 * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
 * However, it can fail with ZSTD_sf_explicitBlockDelimiters,
 * due to potential lack of space in
 */
static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
                                      size_t literalsSizeLimit, size_t dictSize,
                                      size_t windowLog, ZSTD_sequenceFormat_e mode)
{
    const uint32_t repCode = 0;  /* not used by sequence ingestion api */
    size_t windowSize = 1ULL << windowLog;
    size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
    uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
    uint32_t bytesGenerated = 0;
    uint32_t nbSeqGenerated = 0;
    uint32_t isFirstSequence = 1;
    uint32_t blockSize = 0;

    if (mode == ZSTD_sf_explicitBlockDelimiters) {
        /* ensure that no sequence can be larger than one block */
        literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
        matchLengthMax = MIN(matchLengthMax, blockSizeMax/2);
    }

    while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
         && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
         && !FUZZ_dataProducer_empty(producer)) {
        uint32_t matchLength;
        uint32_t matchBound = matchLengthMax;
        uint32_t offset;
        uint32_t offsetBound;
        const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
        const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
        bytesGenerated += litLength;
        if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
            break;
        }
        offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize;
        offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
        if (dictSize > 0 && bytesGenerated <= windowSize) {
            /* Prevent match length from being such that it would be associated with an offset too large
             * from the decoder's perspective. If not possible (match would be too small),
             * then reduce the offset if necessary.
             */
            const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
            if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
                const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
                offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
            } else {
                matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
            }
        }
        matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
        bytesGenerated += matchLength;
        if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
            break;
        }
        {   ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
            const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
            #define SPLITPROB 6000
            #define SPLITMARK 5234
            const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
            if (mode == ZSTD_sf_explicitBlockDelimiters) {
                const size_t seqSize = seq.litLength + seq.matchLength;
                if (blockSize + seqSize > blockSizeMax) {  /* reaching limit : must end block now */
                    const ZSTD_Sequence endBlock = {0, 0, 0, 0};
                    generatedSequences[nbSeqGenerated++] = endBlock;
                    blockSize = seqSize;
                }
                if (split) {
                    const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
                    generatedSequences[nbSeqGenerated++] = endBlock;
                    assert(lastLits <= seq.litLength);
                    seq.litLength -= lastLits;
                    blockSize = seqSize - lastLits;
                } else {
                    blockSize += seqSize;
                }
            }
            generatedSequences[nbSeqGenerated++] = seq;
            isFirstSequence = 0;
        }
    }

    if (mode == ZSTD_sf_explicitBlockDelimiters) {
        /* always end sequences with a block delimiter */
        const ZSTD_Sequence endBlock = {0, 0, 0, 0};
        assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
        generatedSequences[nbSeqGenerated++] = endBlock;
    }
    return nbSeqGenerated;
}

static size_t roundTripTest(void* result, size_t resultCapacity,
                            void* compressed, size_t compressedCapacity,
                            const void* src, size_t srcSize,
                            const ZSTD_Sequence* seqs, size_t seqSize,
                            unsigned hasDict,
                            ZSTD_sequenceFormat_e mode)
{
    size_t cSize;
    size_t dSize;

    if (hasDict) {
        FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
        FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
    }

    cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
                                   seqs, seqSize,
                                   src, srcSize);
    if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
      && (mode == ZSTD_sf_explicitBlockDelimiters) ) {
        /* Valid scenario : in explicit delimiter mode,
         * it might be possible for the compressed size to outgrow dstCapacity.
         * In which case, it's still a valid fuzzer scenario,
         * but no roundtrip shall be possible */
        return 0;
    }
    /* round-trip */
    FUZZ_ZASSERT(cSize);
    dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
    FUZZ_ZASSERT(dSize);
    FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
    FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
    return dSize;
}

int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
{
    FUZZ_SEQ_PROD_SETUP();

    void* rBuf;
    size_t rBufSize;
    void* cBuf;
    size_t cBufSize;
    size_t generatedSrcSize;
    size_t nbSequences;
    size_t dictSize = 0;
    unsigned hasDict;
    unsigned wLog;
    int cLevel;
    ZSTD_sequenceFormat_e mode;

    FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
    FUZZ_ASSERT(producer);

    if (!cctx) {
        cctx = ZSTD_createCCtx();
        FUZZ_ASSERT(cctx);
    }
    if (!dctx) {
        dctx = ZSTD_createDCtx();
        FUZZ_ASSERT(dctx);
    }

    /* Generate window log first so we don't generate offsets too large */
    wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
    cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
    mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);

    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);

    if (!literalsBuffer) {
        literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
        FUZZ_ASSERT(literalsBuffer);
        literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
    }

    if (!dictBuffer) { /* Generate global dictionary buffer */
        ZSTD_compressionParameters cParams;

        /* Generate a large dictionary buffer */
        dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
        FUZZ_ASSERT(dictBuffer);

        /* Create global cdict and ddict */
        cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
        cParams.minMatch = ZSTD_MINMATCH_MIN;
        cParams.hashLog = ZSTD_HASHLOG_MIN;
        cParams.chainLog = ZSTD_CHAINLOG_MIN;

        cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
        ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
        FUZZ_ASSERT(cdict);
        FUZZ_ASSERT(ddict);
    }

    FUZZ_ASSERT(cdict);
    FUZZ_ASSERT(ddict);

    hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
    if (hasDict) {
        dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
    }

    if (!generatedSequences) {
        generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
    }
    if (!generatedSrc) {
        generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
    }

    nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
    generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);

    /* Note : in explicit block delimiters mode,
     * the fuzzer might generate a lot of small blocks.
     * In which case, the final compressed size might be > ZSTD_compressBound().
     * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
     * The test just doesn't roundtrip. */
    cBufSize = ZSTD_compressBound(generatedSrcSize);
    cBuf = FUZZ_malloc(cBufSize);

    rBufSize = generatedSrcSize;
    rBuf = FUZZ_malloc(rBufSize);

    {   const size_t result = roundTripTest(rBuf, rBufSize,
                                        cBuf, cBufSize,
                                        generatedSrc, generatedSrcSize,
                                        generatedSequences, nbSequences,
                                        hasDict, mode);
        FUZZ_ASSERT(result <= generatedSrcSize);  /* can be 0 when no round-trip */
    }

    free(rBuf);
    free(cBuf);
    FUZZ_dataProducer_free(producer);
#ifndef STATEFUL_FUZZING
    ZSTD_freeCCtx(cctx); cctx = NULL;
    ZSTD_freeDCtx(dctx); dctx = NULL;
    free(generatedSequences); generatedSequences = NULL;
    free(generatedSrc); generatedSrc = NULL;
    free(literalsBuffer); literalsBuffer = NULL;
#endif
    FUZZ_SEQ_PROD_TEARDOWN();
    return 0;
}
Commit	Line	Data
	1	/*
	2	* Copyright (c) Meta Platforms, Inc. and affiliates.
	3	* All rights reserved.
	4	*
	5	* This source code is licensed under both the BSD-style license (found in the
	6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
	7	* in the COPYING file in the root directory of this source tree).
	8	* You may select, at your option, one of the above-listed licenses.
	9	*/
	10
	11	/**
	12	* This fuzz target performs a zstd round-trip test by generating an arbitrary
	13	* array of sequences, generating the associated source buffer, calling
	14	* ZSTD_compressSequences(), and then decompresses and compares the result with
	15	* the original generated source buffer.
	16	*/
	17
	18	#define ZSTD_STATIC_LINKING_ONLY
	19
	20	#include <stddef.h>
	21	#include <stdlib.h>
	22	#include <stdio.h>
	23	#include <string.h>
	24	#include <time.h>
	25	#include "fuzz_helpers.h"
	26	#include "zstd_helpers.h"
	27	#include "fuzz_data_producer.h"
	28	#include "fuzz_third_party_seq_prod.h"
	29
	30	static ZSTD_CCtx* cctx = NULL;
	31	static ZSTD_DCtx* dctx = NULL;
	32	static void* literalsBuffer = NULL;
	33	static void* generatedSrc = NULL;
	34	static ZSTD_Sequence* generatedSequences = NULL;
	35
	36	static void* dictBuffer = NULL;
	37	static ZSTD_CDict* cdict = NULL;
	38	static ZSTD_DDict* ddict = NULL;
	39
	40	#define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
	41	#define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
	42	#define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
	43	#define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
	44	#define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
	45
	46	/* Deterministic random number generator */
	47	#define FUZZ_RDG_rotl32(x,r) ((x << r) \| (x >> (32 - r)))
	48	static uint32_t FUZZ_RDG_rand(uint32_t* src)
	49	{
	50	static const uint32_t prime1 = 2654435761U;
	51	static const uint32_t prime2 = 2246822519U;
	52	uint32_t rand32 = *src;
	53	rand32 *= prime1;
	54	rand32 ^= prime2;
	55	rand32 = FUZZ_RDG_rotl32(rand32, 13);
	56	*src = rand32;
	57	return rand32 >> 5;
	58	}
	59
	60	/* Make a pseudorandom string - this simple function exists to avoid
	61	* taking a dependency on datagen.h to have RDG_genBuffer().
	62	*/
	63	static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
	64	const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
	65	uint32_t seed = FUZZ_dataProducer_uint32(producer);
	66	if (size) {
	67	for (size_t n = 0; n < size; n++) {
	68	int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
	69	str[n] = charset[key];
	70	}
	71	}
	72	return str;
	73	}
	74
	75	/* Returns size of source buffer */
	76	static size_t decodeSequences(void* dst, size_t nbSequences,
	77	size_t literalsSize,
	78	const void* dict, size_t dictSize,
	79	ZSTD_sequenceFormat_e mode)
	80	{
	81	const uint8_t* litPtr = literalsBuffer;
	82	const uint8_t* const litBegin = literalsBuffer;
	83	const uint8_t* const litEnd = litBegin + literalsSize;
	84	const uint8_t* dictPtr = dict;
	85	uint8_t* op = dst;
	86	const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
	87	size_t generatedSrcBufferSize = 0;
	88	size_t bytesWritten = 0;
	89
	90	for (size_t i = 0; i < nbSequences; ++i) {
	91	/* block boundary */
	92	if (generatedSequences[i].offset == 0)
	93	FUZZ_ASSERT(generatedSequences[i].matchLength == 0);
	94
	95	if (litPtr + generatedSequences[i].litLength > litEnd) {
	96	litPtr = litBegin;
	97	}
	98	memcpy(op, litPtr, generatedSequences[i].litLength);
	99	bytesWritten += generatedSequences[i].litLength;
	100	op += generatedSequences[i].litLength;
	101	litPtr += generatedSequences[i].litLength;
	102
	103	/* Copy over the match */
	104	{ size_t matchLength = generatedSequences[i].matchLength;
	105	size_t j = 0;
	106	size_t k = 0;
	107	if (dictSize != 0) {
	108	if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
	109	size_t dictOffset = generatedSequences[i].offset - bytesWritten;
	110	size_t matchInDict = MIN(matchLength, dictOffset);
	111	for (; k < matchInDict; ++k) {
	112	op[k] = dictPtr[dictSize - dictOffset + k];
	113	}
	114	matchLength -= matchInDict;
	115	op += matchInDict;
	116	}
	117	}
	118	for (; j < matchLength; ++j) {
	119	op[j] = op[j - generatedSequences[i].offset];
	120	}
	121	op += j;
	122	FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
	123	bytesWritten += generatedSequences[i].matchLength;
	124	}
	125	}
	126	generatedSrcBufferSize = bytesWritten;
	127	FUZZ_ASSERT(litPtr <= litEnd);
	128	if (mode == ZSTD_sf_noBlockDelimiters) {
	129	const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
	130	if (lastLLSize <= oend - op) {
	131	memcpy(op, litPtr, lastLLSize);
	132	generatedSrcBufferSize += lastLLSize;
	133	} }
	134	return generatedSrcBufferSize;
	135	}
	136
	137	/* Returns nb sequences generated
	138	* Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
	139	* However, it can fail with ZSTD_sf_explicitBlockDelimiters,
	140	* due to potential lack of space in
	141	*/
	142	static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
	143	size_t literalsSizeLimit, size_t dictSize,
	144	size_t windowLog, ZSTD_sequenceFormat_e mode)
	145	{
	146	const uint32_t repCode = 0; /* not used by sequence ingestion api */
	147	size_t windowSize = 1ULL << windowLog;
	148	size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
	149	uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
	150	uint32_t bytesGenerated = 0;
	151	uint32_t nbSeqGenerated = 0;
	152	uint32_t isFirstSequence = 1;
	153	uint32_t blockSize = 0;
	154
	155	if (mode == ZSTD_sf_explicitBlockDelimiters) {
	156	/* ensure that no sequence can be larger than one block */
	157	literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
	158	matchLengthMax = MIN(matchLengthMax, blockSizeMax/2);
	159	}
	160
	161	while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
	162	&& bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
	163	&& !FUZZ_dataProducer_empty(producer)) {
	164	uint32_t matchLength;
	165	uint32_t matchBound = matchLengthMax;
	166	uint32_t offset;
	167	uint32_t offsetBound;
	168	const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
	169	const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
	170	bytesGenerated += litLength;
	171	if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
	172	break;
	173	}
	174	offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize;
	175	offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
	176	if (dictSize > 0 && bytesGenerated <= windowSize) {
	177	/* Prevent match length from being such that it would be associated with an offset too large
	178	* from the decoder's perspective. If not possible (match would be too small),
	179	* then reduce the offset if necessary.
	180	*/
	181	const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
	182	if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
	183	const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
	184	offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
	185	} else {
	186	matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
	187	}
	188	}
	189	matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
	190	bytesGenerated += matchLength;
	191	if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
	192	break;
	193	}
	194	{ ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
	195	const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
	196	#define SPLITPROB 6000
	197	#define SPLITMARK 5234
	198	const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
	199	if (mode == ZSTD_sf_explicitBlockDelimiters) {
	200	const size_t seqSize = seq.litLength + seq.matchLength;
	201	if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */
	202	const ZSTD_Sequence endBlock = {0, 0, 0, 0};
	203	generatedSequences[nbSeqGenerated++] = endBlock;
	204	blockSize = seqSize;
	205	}
	206	if (split) {
	207	const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
	208	generatedSequences[nbSeqGenerated++] = endBlock;
	209	assert(lastLits <= seq.litLength);
	210	seq.litLength -= lastLits;
	211	blockSize = seqSize - lastLits;
	212	} else {
	213	blockSize += seqSize;
	214	}
	215	}
	216	generatedSequences[nbSeqGenerated++] = seq;
	217	isFirstSequence = 0;
	218	}
	219	}
	220
	221	if (mode == ZSTD_sf_explicitBlockDelimiters) {
	222	/* always end sequences with a block delimiter */
	223	const ZSTD_Sequence endBlock = {0, 0, 0, 0};
	224	assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
	225	generatedSequences[nbSeqGenerated++] = endBlock;
	226	}
	227	return nbSeqGenerated;
	228	}
	229
	230	static size_t roundTripTest(void* result, size_t resultCapacity,
	231	void* compressed, size_t compressedCapacity,
	232	const void* src, size_t srcSize,
	233	const ZSTD_Sequence* seqs, size_t seqSize,
	234	unsigned hasDict,
	235	ZSTD_sequenceFormat_e mode)
	236	{
	237	size_t cSize;
	238	size_t dSize;
	239
	240	if (hasDict) {
	241	FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
	242	FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
	243	}
	244
	245	cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
	246	seqs, seqSize,
	247	src, srcSize);
	248	if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
	249	&& (mode == ZSTD_sf_explicitBlockDelimiters) ) {
	250	/* Valid scenario : in explicit delimiter mode,
	251	* it might be possible for the compressed size to outgrow dstCapacity.
	252	* In which case, it's still a valid fuzzer scenario,
	253	* but no roundtrip shall be possible */
	254	return 0;
	255	}
	256	/* round-trip */
	257	FUZZ_ZASSERT(cSize);
	258	dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
	259	FUZZ_ZASSERT(dSize);
	260	FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
	261	FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
	262	return dSize;
	263	}
	264
	265	int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
	266	{
	267	FUZZ_SEQ_PROD_SETUP();
	268
	269	void* rBuf;
	270	size_t rBufSize;
	271	void* cBuf;
	272	size_t cBufSize;
	273	size_t generatedSrcSize;
	274	size_t nbSequences;
	275	size_t dictSize = 0;
	276	unsigned hasDict;
	277	unsigned wLog;
	278	int cLevel;
	279	ZSTD_sequenceFormat_e mode;
	280
	281	FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
	282	FUZZ_ASSERT(producer);
	283
	284	if (!cctx) {
	285	cctx = ZSTD_createCCtx();
	286	FUZZ_ASSERT(cctx);
	287	}
	288	if (!dctx) {
	289	dctx = ZSTD_createDCtx();
	290	FUZZ_ASSERT(dctx);
	291	}
	292
	293	/* Generate window log first so we don't generate offsets too large */
	294	wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
	295	cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
	296	mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
	297
	298	ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
	299	ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
	300	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
	301	ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
	302	ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
	303	ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
	304	ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode);
	305	ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);
	306
	307	if (!literalsBuffer) {
	308	literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
	309	FUZZ_ASSERT(literalsBuffer);
	310	literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
	311	}
	312
	313	if (!dictBuffer) { /* Generate global dictionary buffer */
	314	ZSTD_compressionParameters cParams;
	315
	316	/* Generate a large dictionary buffer */
	317	dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
	318	FUZZ_ASSERT(dictBuffer);
	319
	320	/* Create global cdict and ddict */
	321	cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
	322	cParams.minMatch = ZSTD_MINMATCH_MIN;
	323	cParams.hashLog = ZSTD_HASHLOG_MIN;
	324	cParams.chainLog = ZSTD_CHAINLOG_MIN;
	325
	326	cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
	327	ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
	328	FUZZ_ASSERT(cdict);
	329	FUZZ_ASSERT(ddict);
	330	}
	331
	332	FUZZ_ASSERT(cdict);
	333	FUZZ_ASSERT(ddict);
	334
	335	hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
	336	if (hasDict) {
	337	dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
	338	}
	339
	340	if (!generatedSequences) {
	341	generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
	342	}
	343	if (!generatedSrc) {
	344	generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
	345	}
	346
	347	nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
	348	generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
	349
	350	/* Note : in explicit block delimiters mode,
	351	* the fuzzer might generate a lot of small blocks.
	352	* In which case, the final compressed size might be > ZSTD_compressBound().
	353	* This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
	354	* The test just doesn't roundtrip. */
	355	cBufSize = ZSTD_compressBound(generatedSrcSize);
	356	cBuf = FUZZ_malloc(cBufSize);
	357
	358	rBufSize = generatedSrcSize;
	359	rBuf = FUZZ_malloc(rBufSize);
	360
	361	{ const size_t result = roundTripTest(rBuf, rBufSize,
	362	cBuf, cBufSize,
	363	generatedSrc, generatedSrcSize,
	364	generatedSequences, nbSequences,
	365	hasDict, mode);
	366	FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */
	367	}
	368
	369	free(rBuf);
	370	free(cBuf);
	371	FUZZ_dataProducer_free(producer);
	372	#ifndef STATEFUL_FUZZING
	373	ZSTD_freeCCtx(cctx); cctx = NULL;
	374	ZSTD_freeDCtx(dctx); dctx = NULL;
	375	free(generatedSequences); generatedSequences = NULL;
	376	free(generatedSrc); generatedSrc = NULL;
	377	free(literalsBuffer); literalsBuffer = NULL;
	378	#endif
	379	FUZZ_SEQ_PROD_TEARDOWN();
	380	return 0;
	381	}