| 1 | /* |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | #define ZSTD_STATIC_LINKING_ONLY |
| 12 | #define ZDICT_STATIC_LINKING_ONLY |
| 13 | |
| 14 | #include <string.h> |
| 15 | |
| 16 | #include "zstd_helpers.h" |
| 17 | #include "fuzz_helpers.h" |
| 18 | #include "zstd.h" |
| 19 | #include "zdict.h" |
| 20 | #include "sequence_producer.h" |
| 21 | #include "fuzz_third_party_seq_prod.h" |
| 22 | |
| 23 | const int kMinClevel = -3; |
| 24 | const int kMaxClevel = 19; |
| 25 | |
| 26 | void* FUZZ_seqProdState = NULL; |
| 27 | |
| 28 | static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) |
| 29 | { |
| 30 | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); |
| 31 | } |
| 32 | |
| 33 | static unsigned produceParamValue(unsigned min, unsigned max, |
| 34 | FUZZ_dataProducer_t *producer) { |
| 35 | return FUZZ_dataProducer_uint32Range(producer, min, max); |
| 36 | } |
| 37 | |
| 38 | static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, |
| 39 | unsigned max, FUZZ_dataProducer_t *producer) { |
| 40 | unsigned const value = produceParamValue(min, max, producer); |
| 41 | set(cctx, param, value); |
| 42 | } |
| 43 | |
| 44 | ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) |
| 45 | { |
| 46 | /* Select compression parameters */ |
| 47 | ZSTD_compressionParameters cParams; |
| 48 | cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); |
| 49 | cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); |
| 50 | cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); |
| 51 | cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); |
| 52 | cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, |
| 53 | ZSTD_MINMATCH_MAX); |
| 54 | cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); |
| 55 | cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); |
| 56 | return ZSTD_adjustCParams(cParams, srcSize, 0); |
| 57 | } |
| 58 | |
| 59 | ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) |
| 60 | { |
| 61 | /* Select frame parameters */ |
| 62 | ZSTD_frameParameters fParams; |
| 63 | fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
| 64 | fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
| 65 | fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); |
| 66 | return fParams; |
| 67 | } |
| 68 | |
| 69 | ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) |
| 70 | { |
| 71 | ZSTD_parameters params; |
| 72 | params.cParams = FUZZ_randomCParams(srcSize, producer); |
| 73 | params.fParams = FUZZ_randomFParams(producer); |
| 74 | return params; |
| 75 | } |
| 76 | |
| 77 | static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) { |
| 78 | #ifdef FUZZ_THIRD_PARTY_SEQ_PROD |
| 79 | ZSTD_registerSequenceProducer( |
| 80 | cctx, |
| 81 | FUZZ_seqProdState, |
| 82 | FUZZ_thirdPartySeqProd |
| 83 | ); |
| 84 | #else |
| 85 | ZSTD_registerSequenceProducer( |
| 86 | cctx, |
| 87 | NULL, |
| 88 | simpleSequenceProducer |
| 89 | ); |
| 90 | #endif |
| 91 | |
| 92 | #ifdef FUZZ_THIRD_PARTY_SEQ_PROD |
| 93 | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1)); |
| 94 | #else |
| 95 | setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer); |
| 96 | #endif |
| 97 | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0)); |
| 98 | FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable)); |
| 99 | } |
| 100 | |
| 101 | void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) |
| 102 | { |
| 103 | ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); |
| 104 | set(cctx, ZSTD_c_windowLog, cParams.windowLog); |
| 105 | set(cctx, ZSTD_c_hashLog, cParams.hashLog); |
| 106 | set(cctx, ZSTD_c_chainLog, cParams.chainLog); |
| 107 | set(cctx, ZSTD_c_searchLog, cParams.searchLog); |
| 108 | set(cctx, ZSTD_c_minMatch, cParams.minMatch); |
| 109 | set(cctx, ZSTD_c_targetLength, cParams.targetLength); |
| 110 | set(cctx, ZSTD_c_strategy, cParams.strategy); |
| 111 | /* Select frame parameters */ |
| 112 | setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); |
| 113 | setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); |
| 114 | setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); |
| 115 | /* Select long distance matching parameters */ |
| 116 | setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer); |
| 117 | setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); |
| 118 | setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, |
| 119 | ZSTD_LDM_MINMATCH_MAX, producer); |
| 120 | setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, |
| 121 | producer); |
| 122 | setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, |
| 123 | ZSTD_LDM_HASHRATELOG_MAX, producer); |
| 124 | /* Set misc parameters */ |
| 125 | #ifndef ZSTD_MULTITHREAD |
| 126 | // To reproduce with or without ZSTD_MULTITHREAD, we are going to use |
| 127 | // the same amount of entropy. |
| 128 | unsigned const nbWorkers_value = produceParamValue(0, 2, producer); |
| 129 | unsigned const rsyncable_value = produceParamValue(0, 1, producer); |
| 130 | (void)nbWorkers_value; |
| 131 | (void)rsyncable_value; |
| 132 | set(cctx, ZSTD_c_nbWorkers, 0); |
| 133 | set(cctx, ZSTD_c_rsyncable, 0); |
| 134 | #else |
| 135 | setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); |
| 136 | setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); |
| 137 | #endif |
| 138 | setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer); |
| 139 | setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer); |
| 140 | setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); |
| 141 | setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); |
| 142 | setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); |
| 143 | setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer); |
| 144 | setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); |
| 145 | setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer); |
| 146 | setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer); |
| 147 | setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer); |
| 148 | setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer); |
| 149 | if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { |
| 150 | setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); |
| 151 | } |
| 152 | if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { |
| 153 | setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); |
| 154 | } |
| 155 | |
| 156 | #ifdef FUZZ_THIRD_PARTY_SEQ_PROD |
| 157 | setSequenceProducerParams(cctx, producer); |
| 158 | #else |
| 159 | if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) { |
| 160 | setSequenceProducerParams(cctx, producer); |
| 161 | } else { |
| 162 | ZSTD_registerSequenceProducer(cctx, NULL, NULL); |
| 163 | } |
| 164 | #endif |
| 165 | } |
| 166 | |
| 167 | FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) |
| 168 | { |
| 169 | size_t const dictSize = MAX(srcSize / 8, 1024); |
| 170 | size_t const totalSampleSize = dictSize * 11; |
| 171 | FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize }; |
| 172 | char* const samples = (char*)FUZZ_malloc(totalSampleSize); |
| 173 | unsigned nbSamples = 100; |
| 174 | size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples); |
| 175 | size_t pos = 0; |
| 176 | size_t sample = 0; |
| 177 | ZDICT_fastCover_params_t params; |
| 178 | |
| 179 | for (sample = 0; sample < nbSamples; ++sample) { |
| 180 | size_t const remaining = totalSampleSize - pos; |
| 181 | size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); |
| 182 | size_t const limit = MIN(srcSize - offset, remaining); |
| 183 | size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); |
| 184 | memcpy(samples + pos, (const char*)src + offset, toCopy); |
| 185 | pos += toCopy; |
| 186 | samplesSizes[sample] = toCopy; |
| 187 | } |
| 188 | memset(samples + pos, 0, totalSampleSize - pos); |
| 189 | |
| 190 | memset(¶ms, 0, sizeof(params)); |
| 191 | params.accel = 5; |
| 192 | params.k = 40; |
| 193 | params.d = 8; |
| 194 | params.f = 14; |
| 195 | params.zParams.compressionLevel = 1; |
| 196 | dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, |
| 197 | samples, samplesSizes, nbSamples, params); |
| 198 | if (ZSTD_isError(dict.size)) { |
| 199 | free(dict.buff); |
| 200 | memset(&dict, 0, sizeof(dict)); |
| 201 | } |
| 202 | |
| 203 | free(samplesSizes); |
| 204 | free(samples); |
| 205 | |
| 206 | return dict; |
| 207 | } |