| 1 | /* |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | /* This header contains definitions |
| 12 | * that shall **only** be used by modules within lib/compress. |
| 13 | */ |
| 14 | |
| 15 | #ifndef ZSTD_COMPRESS_H |
| 16 | #define ZSTD_COMPRESS_H |
| 17 | |
| 18 | /*-************************************* |
| 19 | * Dependencies |
| 20 | ***************************************/ |
| 21 | #include "../common/zstd_internal.h" |
| 22 | #include "zstd_cwksp.h" |
| 23 | #ifdef ZSTD_MULTITHREAD |
| 24 | # include "zstdmt_compress.h" |
| 25 | #endif |
| 26 | #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ |
| 27 | |
| 28 | #if defined (__cplusplus) |
| 29 | extern "C" { |
| 30 | #endif |
| 31 | |
| 32 | /*-************************************* |
| 33 | * Constants |
| 34 | ***************************************/ |
| 35 | #define kSearchStrength 8 |
| 36 | #define HASH_READ_SIZE 8 |
| 37 | #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". |
| 38 | It could be confused for a real successor at index "1", if sorted as larger than its predecessor. |
| 39 | It's not a big deal though : candidate will just be sorted again. |
| 40 | Additionally, candidate position 1 will be lost. |
| 41 | But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. |
| 42 | The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. |
| 43 | This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ |
| 44 | |
| 45 | |
| 46 | /*-************************************* |
| 47 | * Context memory management |
| 48 | ***************************************/ |
| 49 | typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; |
| 50 | typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; |
| 51 | |
| 52 | typedef struct ZSTD_prefixDict_s { |
| 53 | const void* dict; |
| 54 | size_t dictSize; |
| 55 | ZSTD_dictContentType_e dictContentType; |
| 56 | } ZSTD_prefixDict; |
| 57 | |
| 58 | typedef struct { |
| 59 | void* dictBuffer; |
| 60 | void const* dict; |
| 61 | size_t dictSize; |
| 62 | ZSTD_dictContentType_e dictContentType; |
| 63 | ZSTD_CDict* cdict; |
| 64 | } ZSTD_localDict; |
| 65 | |
| 66 | typedef struct { |
| 67 | HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; |
| 68 | HUF_repeat repeatMode; |
| 69 | } ZSTD_hufCTables_t; |
| 70 | |
| 71 | typedef struct { |
| 72 | FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; |
| 73 | FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; |
| 74 | FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; |
| 75 | FSE_repeat offcode_repeatMode; |
| 76 | FSE_repeat matchlength_repeatMode; |
| 77 | FSE_repeat litlength_repeatMode; |
| 78 | } ZSTD_fseCTables_t; |
| 79 | |
| 80 | typedef struct { |
| 81 | ZSTD_hufCTables_t huf; |
| 82 | ZSTD_fseCTables_t fse; |
| 83 | } ZSTD_entropyCTables_t; |
| 84 | |
| 85 | /*********************************************** |
| 86 | * Entropy buffer statistics structs and funcs * |
| 87 | ***********************************************/ |
| 88 | /** ZSTD_hufCTablesMetadata_t : |
| 89 | * Stores Literals Block Type for a super-block in hType, and |
| 90 | * huffman tree description in hufDesBuffer. |
| 91 | * hufDesSize refers to the size of huffman tree description in bytes. |
| 92 | * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ |
| 93 | typedef struct { |
| 94 | symbolEncodingType_e hType; |
| 95 | BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; |
| 96 | size_t hufDesSize; |
| 97 | } ZSTD_hufCTablesMetadata_t; |
| 98 | |
| 99 | /** ZSTD_fseCTablesMetadata_t : |
| 100 | * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and |
| 101 | * fse tables in fseTablesBuffer. |
| 102 | * fseTablesSize refers to the size of fse tables in bytes. |
| 103 | * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ |
| 104 | typedef struct { |
| 105 | symbolEncodingType_e llType; |
| 106 | symbolEncodingType_e ofType; |
| 107 | symbolEncodingType_e mlType; |
| 108 | BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; |
| 109 | size_t fseTablesSize; |
| 110 | size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ |
| 111 | } ZSTD_fseCTablesMetadata_t; |
| 112 | |
| 113 | typedef struct { |
| 114 | ZSTD_hufCTablesMetadata_t hufMetadata; |
| 115 | ZSTD_fseCTablesMetadata_t fseMetadata; |
| 116 | } ZSTD_entropyCTablesMetadata_t; |
| 117 | |
| 118 | /** ZSTD_buildBlockEntropyStats() : |
| 119 | * Builds entropy for the block. |
| 120 | * @return : 0 on success or error code */ |
| 121 | size_t ZSTD_buildBlockEntropyStats( |
| 122 | const seqStore_t* seqStorePtr, |
| 123 | const ZSTD_entropyCTables_t* prevEntropy, |
| 124 | ZSTD_entropyCTables_t* nextEntropy, |
| 125 | const ZSTD_CCtx_params* cctxParams, |
| 126 | ZSTD_entropyCTablesMetadata_t* entropyMetadata, |
| 127 | void* workspace, size_t wkspSize); |
| 128 | |
| 129 | /********************************* |
| 130 | * Compression internals structs * |
| 131 | *********************************/ |
| 132 | |
| 133 | typedef struct { |
| 134 | U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ |
| 135 | U32 len; /* Raw length of match */ |
| 136 | } ZSTD_match_t; |
| 137 | |
| 138 | typedef struct { |
| 139 | U32 offset; /* Offset of sequence */ |
| 140 | U32 litLength; /* Length of literals prior to match */ |
| 141 | U32 matchLength; /* Raw length of match */ |
| 142 | } rawSeq; |
| 143 | |
| 144 | typedef struct { |
| 145 | rawSeq* seq; /* The start of the sequences */ |
| 146 | size_t pos; /* The index in seq where reading stopped. pos <= size. */ |
| 147 | size_t posInSequence; /* The position within the sequence at seq[pos] where reading |
| 148 | stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ |
| 149 | size_t size; /* The number of sequences. <= capacity. */ |
| 150 | size_t capacity; /* The capacity starting from `seq` pointer */ |
| 151 | } rawSeqStore_t; |
| 152 | |
| 153 | typedef struct { |
| 154 | U32 idx; /* Index in array of ZSTD_Sequence */ |
| 155 | U32 posInSequence; /* Position within sequence at idx */ |
| 156 | size_t posInSrc; /* Number of bytes given by sequences provided so far */ |
| 157 | } ZSTD_sequencePosition; |
| 158 | |
| 159 | UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; |
| 160 | |
| 161 | typedef struct { |
| 162 | int price; |
| 163 | U32 off; |
| 164 | U32 mlen; |
| 165 | U32 litlen; |
| 166 | U32 rep[ZSTD_REP_NUM]; |
| 167 | } ZSTD_optimal_t; |
| 168 | |
| 169 | typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; |
| 170 | |
| 171 | typedef struct { |
| 172 | /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ |
| 173 | unsigned* litFreq; /* table of literals statistics, of size 256 */ |
| 174 | unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ |
| 175 | unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ |
| 176 | unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ |
| 177 | ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ |
| 178 | ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ |
| 179 | |
| 180 | U32 litSum; /* nb of literals */ |
| 181 | U32 litLengthSum; /* nb of litLength codes */ |
| 182 | U32 matchLengthSum; /* nb of matchLength codes */ |
| 183 | U32 offCodeSum; /* nb of offset codes */ |
| 184 | U32 litSumBasePrice; /* to compare to log2(litfreq) */ |
| 185 | U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ |
| 186 | U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ |
| 187 | U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ |
| 188 | ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ |
| 189 | const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ |
| 190 | ZSTD_paramSwitch_e literalCompressionMode; |
| 191 | } optState_t; |
| 192 | |
| 193 | typedef struct { |
| 194 | ZSTD_entropyCTables_t entropy; |
| 195 | U32 rep[ZSTD_REP_NUM]; |
| 196 | } ZSTD_compressedBlockState_t; |
| 197 | |
| 198 | typedef struct { |
| 199 | BYTE const* nextSrc; /* next block here to continue on current prefix */ |
| 200 | BYTE const* base; /* All regular indexes relative to this position */ |
| 201 | BYTE const* dictBase; /* extDict indexes relative to this position */ |
| 202 | U32 dictLimit; /* below that point, need extDict */ |
| 203 | U32 lowLimit; /* below that point, no more valid data */ |
| 204 | U32 nbOverflowCorrections; /* Number of times overflow correction has run since |
| 205 | * ZSTD_window_init(). Useful for debugging coredumps |
| 206 | * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. |
| 207 | */ |
| 208 | } ZSTD_window_t; |
| 209 | |
| 210 | #define ZSTD_WINDOW_START_INDEX 2 |
| 211 | |
| 212 | typedef struct ZSTD_matchState_t ZSTD_matchState_t; |
| 213 | |
| 214 | #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ |
| 215 | |
| 216 | struct ZSTD_matchState_t { |
| 217 | ZSTD_window_t window; /* State for window round buffer management */ |
| 218 | U32 loadedDictEnd; /* index of end of dictionary, within context's referential. |
| 219 | * When loadedDictEnd != 0, a dictionary is in use, and still valid. |
| 220 | * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. |
| 221 | * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). |
| 222 | * When dict referential is copied into active context (i.e. not attached), |
| 223 | * loadedDictEnd == dictSize, since referential starts from zero. |
| 224 | */ |
| 225 | U32 nextToUpdate; /* index from which to continue table update */ |
| 226 | U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ |
| 227 | |
| 228 | U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ |
| 229 | BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ |
| 230 | U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ |
| 231 | U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */ |
| 232 | U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ |
| 233 | |
| 234 | U32* hashTable; |
| 235 | U32* hashTable3; |
| 236 | U32* chainTable; |
| 237 | |
| 238 | U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ |
| 239 | |
| 240 | int dedicatedDictSearch; /* Indicates whether this matchState is using the |
| 241 | * dedicated dictionary search structure. |
| 242 | */ |
| 243 | optState_t opt; /* optimal parser state */ |
| 244 | const ZSTD_matchState_t* dictMatchState; |
| 245 | ZSTD_compressionParameters cParams; |
| 246 | const rawSeqStore_t* ldmSeqStore; |
| 247 | |
| 248 | /* Controls prefetching in some dictMatchState matchfinders. |
| 249 | * This behavior is controlled from the cctx ms. |
| 250 | * This parameter has no effect in the cdict ms. */ |
| 251 | int prefetchCDictTables; |
| 252 | |
| 253 | /* When == 0, lazy match finders insert every position. |
| 254 | * When != 0, lazy match finders only insert positions they search. |
| 255 | * This allows them to skip much faster over incompressible data, |
| 256 | * at a small cost to compression ratio. |
| 257 | */ |
| 258 | int lazySkipping; |
| 259 | }; |
| 260 | |
| 261 | typedef struct { |
| 262 | ZSTD_compressedBlockState_t* prevCBlock; |
| 263 | ZSTD_compressedBlockState_t* nextCBlock; |
| 264 | ZSTD_matchState_t matchState; |
| 265 | } ZSTD_blockState_t; |
| 266 | |
| 267 | typedef struct { |
| 268 | U32 offset; |
| 269 | U32 checksum; |
| 270 | } ldmEntry_t; |
| 271 | |
| 272 | typedef struct { |
| 273 | BYTE const* split; |
| 274 | U32 hash; |
| 275 | U32 checksum; |
| 276 | ldmEntry_t* bucket; |
| 277 | } ldmMatchCandidate_t; |
| 278 | |
| 279 | #define LDM_BATCH_SIZE 64 |
| 280 | |
| 281 | typedef struct { |
| 282 | ZSTD_window_t window; /* State for the window round buffer management */ |
| 283 | ldmEntry_t* hashTable; |
| 284 | U32 loadedDictEnd; |
| 285 | BYTE* bucketOffsets; /* Next position in bucket to insert entry */ |
| 286 | size_t splitIndices[LDM_BATCH_SIZE]; |
| 287 | ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; |
| 288 | } ldmState_t; |
| 289 | |
| 290 | typedef struct { |
| 291 | ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ |
| 292 | U32 hashLog; /* Log size of hashTable */ |
| 293 | U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ |
| 294 | U32 minMatchLength; /* Minimum match length */ |
| 295 | U32 hashRateLog; /* Log number of entries to skip */ |
| 296 | U32 windowLog; /* Window log for the LDM */ |
| 297 | } ldmParams_t; |
| 298 | |
| 299 | typedef struct { |
| 300 | int collectSequences; |
| 301 | ZSTD_Sequence* seqStart; |
| 302 | size_t seqIndex; |
| 303 | size_t maxSequences; |
| 304 | } SeqCollector; |
| 305 | |
| 306 | struct ZSTD_CCtx_params_s { |
| 307 | ZSTD_format_e format; |
| 308 | ZSTD_compressionParameters cParams; |
| 309 | ZSTD_frameParameters fParams; |
| 310 | |
| 311 | int compressionLevel; |
| 312 | int forceWindow; /* force back-references to respect limit of |
| 313 | * 1<<wLog, even for dictionary */ |
| 314 | size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. |
| 315 | * No target when targetCBlockSize == 0. |
| 316 | * There is no guarantee on compressed block size */ |
| 317 | int srcSizeHint; /* User's best guess of source size. |
| 318 | * Hint is not valid when srcSizeHint == 0. |
| 319 | * There is no guarantee that hint is close to actual source size */ |
| 320 | |
| 321 | ZSTD_dictAttachPref_e attachDictPref; |
| 322 | ZSTD_paramSwitch_e literalCompressionMode; |
| 323 | |
| 324 | /* Multithreading: used to pass parameters to mtctx */ |
| 325 | int nbWorkers; |
| 326 | size_t jobSize; |
| 327 | int overlapLog; |
| 328 | int rsyncable; |
| 329 | |
| 330 | /* Long distance matching parameters */ |
| 331 | ldmParams_t ldmParams; |
| 332 | |
| 333 | /* Dedicated dict search algorithm trigger */ |
| 334 | int enableDedicatedDictSearch; |
| 335 | |
| 336 | /* Input/output buffer modes */ |
| 337 | ZSTD_bufferMode_e inBufferMode; |
| 338 | ZSTD_bufferMode_e outBufferMode; |
| 339 | |
| 340 | /* Sequence compression API */ |
| 341 | ZSTD_sequenceFormat_e blockDelimiters; |
| 342 | int validateSequences; |
| 343 | |
| 344 | /* Block splitting */ |
| 345 | ZSTD_paramSwitch_e useBlockSplitter; |
| 346 | |
| 347 | /* Param for deciding whether to use row-based matchfinder */ |
| 348 | ZSTD_paramSwitch_e useRowMatchFinder; |
| 349 | |
| 350 | /* Always load a dictionary in ext-dict mode (not prefix mode)? */ |
| 351 | int deterministicRefPrefix; |
| 352 | |
| 353 | /* Internal use, for createCCtxParams() and freeCCtxParams() only */ |
| 354 | ZSTD_customMem customMem; |
| 355 | |
| 356 | /* Controls prefetching in some dictMatchState matchfinders */ |
| 357 | ZSTD_paramSwitch_e prefetchCDictTables; |
| 358 | |
| 359 | /* Controls whether zstd will fall back to an internal matchfinder |
| 360 | * if the external matchfinder returns an error code. */ |
| 361 | int enableMatchFinderFallback; |
| 362 | |
| 363 | /* Indicates whether an external matchfinder has been referenced. |
| 364 | * Users can't set this externally. |
| 365 | * It is set internally in ZSTD_registerSequenceProducer(). */ |
| 366 | int useSequenceProducer; |
| 367 | |
| 368 | /* Adjust the max block size*/ |
| 369 | size_t maxBlockSize; |
| 370 | |
| 371 | /* Controls repcode search in external sequence parsing */ |
| 372 | ZSTD_paramSwitch_e searchForExternalRepcodes; |
| 373 | }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ |
| 374 | |
| 375 | #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) |
| 376 | #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) |
| 377 | |
| 378 | /** |
| 379 | * Indicates whether this compression proceeds directly from user-provided |
| 380 | * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or |
| 381 | * whether the context needs to buffer the input/output (ZSTDb_buffered). |
| 382 | */ |
| 383 | typedef enum { |
| 384 | ZSTDb_not_buffered, |
| 385 | ZSTDb_buffered |
| 386 | } ZSTD_buffered_policy_e; |
| 387 | |
| 388 | /** |
| 389 | * Struct that contains all elements of block splitter that should be allocated |
| 390 | * in a wksp. |
| 391 | */ |
| 392 | #define ZSTD_MAX_NB_BLOCK_SPLITS 196 |
| 393 | typedef struct { |
| 394 | seqStore_t fullSeqStoreChunk; |
| 395 | seqStore_t firstHalfSeqStore; |
| 396 | seqStore_t secondHalfSeqStore; |
| 397 | seqStore_t currSeqStore; |
| 398 | seqStore_t nextSeqStore; |
| 399 | |
| 400 | U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; |
| 401 | ZSTD_entropyCTablesMetadata_t entropyMetadata; |
| 402 | } ZSTD_blockSplitCtx; |
| 403 | |
| 404 | /* Context for block-level external matchfinder API */ |
| 405 | typedef struct { |
| 406 | void* mState; |
| 407 | ZSTD_sequenceProducer_F* mFinder; |
| 408 | ZSTD_Sequence* seqBuffer; |
| 409 | size_t seqBufferCapacity; |
| 410 | } ZSTD_externalMatchCtx; |
| 411 | |
| 412 | struct ZSTD_CCtx_s { |
| 413 | ZSTD_compressionStage_e stage; |
| 414 | int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ |
| 415 | int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ |
| 416 | ZSTD_CCtx_params requestedParams; |
| 417 | ZSTD_CCtx_params appliedParams; |
| 418 | ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ |
| 419 | U32 dictID; |
| 420 | size_t dictContentSize; |
| 421 | |
| 422 | ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ |
| 423 | size_t blockSize; |
| 424 | unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ |
| 425 | unsigned long long consumedSrcSize; |
| 426 | unsigned long long producedCSize; |
| 427 | XXH64_state_t xxhState; |
| 428 | ZSTD_customMem customMem; |
| 429 | ZSTD_threadPool* pool; |
| 430 | size_t staticSize; |
| 431 | SeqCollector seqCollector; |
| 432 | int isFirstBlock; |
| 433 | int initialized; |
| 434 | |
| 435 | seqStore_t seqStore; /* sequences storage ptrs */ |
| 436 | ldmState_t ldmState; /* long distance matching state */ |
| 437 | rawSeq* ldmSequences; /* Storage for the ldm output sequences */ |
| 438 | size_t maxNbLdmSequences; |
| 439 | rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ |
| 440 | ZSTD_blockState_t blockState; |
| 441 | U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ |
| 442 | |
| 443 | /* Whether we are streaming or not */ |
| 444 | ZSTD_buffered_policy_e bufferedPolicy; |
| 445 | |
| 446 | /* streaming */ |
| 447 | char* inBuff; |
| 448 | size_t inBuffSize; |
| 449 | size_t inToCompress; |
| 450 | size_t inBuffPos; |
| 451 | size_t inBuffTarget; |
| 452 | char* outBuff; |
| 453 | size_t outBuffSize; |
| 454 | size_t outBuffContentSize; |
| 455 | size_t outBuffFlushedSize; |
| 456 | ZSTD_cStreamStage streamStage; |
| 457 | U32 frameEnded; |
| 458 | |
| 459 | /* Stable in/out buffer verification */ |
| 460 | ZSTD_inBuffer expectedInBuffer; |
| 461 | size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ |
| 462 | size_t expectedOutBufferSize; |
| 463 | |
| 464 | /* Dictionary */ |
| 465 | ZSTD_localDict localDict; |
| 466 | const ZSTD_CDict* cdict; |
| 467 | ZSTD_prefixDict prefixDict; /* single-usage dictionary */ |
| 468 | |
| 469 | /* Multi-threading */ |
| 470 | #ifdef ZSTD_MULTITHREAD |
| 471 | ZSTDMT_CCtx* mtctx; |
| 472 | #endif |
| 473 | |
| 474 | /* Tracing */ |
| 475 | #if ZSTD_TRACE |
| 476 | ZSTD_TraceCtx traceCtx; |
| 477 | #endif |
| 478 | |
| 479 | /* Workspace for block splitter */ |
| 480 | ZSTD_blockSplitCtx blockSplitCtx; |
| 481 | |
| 482 | /* Workspace for external matchfinder */ |
| 483 | ZSTD_externalMatchCtx externalMatchCtx; |
| 484 | }; |
| 485 | |
| 486 | typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; |
| 487 | typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; |
| 488 | |
| 489 | typedef enum { |
| 490 | ZSTD_noDict = 0, |
| 491 | ZSTD_extDict = 1, |
| 492 | ZSTD_dictMatchState = 2, |
| 493 | ZSTD_dedicatedDictSearch = 3 |
| 494 | } ZSTD_dictMode_e; |
| 495 | |
| 496 | typedef enum { |
| 497 | ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict. |
| 498 | * In this mode we use both the srcSize and the dictSize |
| 499 | * when selecting and adjusting parameters. |
| 500 | */ |
| 501 | ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch. |
| 502 | * In this mode we only take the srcSize into account when selecting |
| 503 | * and adjusting parameters. |
| 504 | */ |
| 505 | ZSTD_cpm_createCDict = 2, /* Creating a CDict. |
| 506 | * In this mode we take both the source size and the dictionary size |
| 507 | * into account when selecting and adjusting the parameters. |
| 508 | */ |
| 509 | ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. |
| 510 | * We don't know what these parameters are for. We default to the legacy |
| 511 | * behavior of taking both the source size and the dict size into account |
| 512 | * when selecting and adjusting parameters. |
| 513 | */ |
| 514 | } ZSTD_cParamMode_e; |
| 515 | |
| 516 | typedef size_t (*ZSTD_blockCompressor) ( |
| 517 | ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
| 518 | void const* src, size_t srcSize); |
| 519 | ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); |
| 520 | |
| 521 | |
| 522 | MEM_STATIC U32 ZSTD_LLcode(U32 litLength) |
| 523 | { |
| 524 | static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, |
| 525 | 8, 9, 10, 11, 12, 13, 14, 15, |
| 526 | 16, 16, 17, 17, 18, 18, 19, 19, |
| 527 | 20, 20, 20, 20, 21, 21, 21, 21, |
| 528 | 22, 22, 22, 22, 22, 22, 22, 22, |
| 529 | 23, 23, 23, 23, 23, 23, 23, 23, |
| 530 | 24, 24, 24, 24, 24, 24, 24, 24, |
| 531 | 24, 24, 24, 24, 24, 24, 24, 24 }; |
| 532 | static const U32 LL_deltaCode = 19; |
| 533 | return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; |
| 534 | } |
| 535 | |
| 536 | /* ZSTD_MLcode() : |
| 537 | * note : mlBase = matchLength - MINMATCH; |
| 538 | * because it's the format it's stored in seqStore->sequences */ |
| 539 | MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) |
| 540 | { |
| 541 | static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
| 542 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| 543 | 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, |
| 544 | 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, |
| 545 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 546 | 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, |
| 547 | 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, |
| 548 | 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; |
| 549 | static const U32 ML_deltaCode = 36; |
| 550 | return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; |
| 551 | } |
| 552 | |
| 553 | /* ZSTD_cParam_withinBounds: |
| 554 | * @return 1 if value is within cParam bounds, |
| 555 | * 0 otherwise */ |
| 556 | MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) |
| 557 | { |
| 558 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); |
| 559 | if (ZSTD_isError(bounds.error)) return 0; |
| 560 | if (value < bounds.lowerBound) return 0; |
| 561 | if (value > bounds.upperBound) return 0; |
| 562 | return 1; |
| 563 | } |
| 564 | |
| 565 | /* ZSTD_noCompressBlock() : |
| 566 | * Writes uncompressed block to dst buffer from given src. |
| 567 | * Returns the size of the block */ |
| 568 | MEM_STATIC size_t |
| 569 | ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) |
| 570 | { |
| 571 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); |
| 572 | DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); |
| 573 | RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, |
| 574 | dstSize_tooSmall, "dst buf too small for uncompressed block"); |
| 575 | MEM_writeLE24(dst, cBlockHeader24); |
| 576 | ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); |
| 577 | return ZSTD_blockHeaderSize + srcSize; |
| 578 | } |
| 579 | |
| 580 | MEM_STATIC size_t |
| 581 | ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) |
| 582 | { |
| 583 | BYTE* const op = (BYTE*)dst; |
| 584 | U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); |
| 585 | RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); |
| 586 | MEM_writeLE24(op, cBlockHeader); |
| 587 | op[3] = src; |
| 588 | return 4; |
| 589 | } |
| 590 | |
| 591 | |
| 592 | /* ZSTD_minGain() : |
| 593 | * minimum compression required |
| 594 | * to generate a compress block or a compressed literals section. |
| 595 | * note : use same formula for both situations */ |
| 596 | MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) |
| 597 | { |
| 598 | U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; |
| 599 | ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); |
| 600 | assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); |
| 601 | return (srcSize >> minlog) + 2; |
| 602 | } |
| 603 | |
| 604 | MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) |
| 605 | { |
| 606 | switch (cctxParams->literalCompressionMode) { |
| 607 | case ZSTD_ps_enable: |
| 608 | return 0; |
| 609 | case ZSTD_ps_disable: |
| 610 | return 1; |
| 611 | default: |
| 612 | assert(0 /* impossible: pre-validated */); |
| 613 | ZSTD_FALLTHROUGH; |
| 614 | case ZSTD_ps_auto: |
| 615 | return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | /*! ZSTD_safecopyLiterals() : |
| 620 | * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. |
| 621 | * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single |
| 622 | * large copies. |
| 623 | */ |
| 624 | static void |
| 625 | ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) |
| 626 | { |
| 627 | assert(iend > ilimit_w); |
| 628 | if (ip <= ilimit_w) { |
| 629 | ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); |
| 630 | op += ilimit_w - ip; |
| 631 | ip = ilimit_w; |
| 632 | } |
| 633 | while (ip < iend) *op++ = *ip++; |
| 634 | } |
| 635 | |
| 636 | |
| 637 | #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) |
| 638 | #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) |
| 639 | #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) |
| 640 | #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ |
| 641 | #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) |
| 642 | #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) |
| 643 | #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) |
| 644 | #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) |
| 645 | #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ |
| 646 | |
| 647 | /*! ZSTD_storeSeq() : |
| 648 | * Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t. |
| 649 | * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). |
| 650 | * @matchLength : must be >= MINMATCH |
| 651 | * Allowed to over-read literals up to litLimit. |
| 652 | */ |
| 653 | HINT_INLINE UNUSED_ATTR void |
| 654 | ZSTD_storeSeq(seqStore_t* seqStorePtr, |
| 655 | size_t litLength, const BYTE* literals, const BYTE* litLimit, |
| 656 | U32 offBase, |
| 657 | size_t matchLength) |
| 658 | { |
| 659 | BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; |
| 660 | BYTE const* const litEnd = literals + litLength; |
| 661 | #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) |
| 662 | static const BYTE* g_start = NULL; |
| 663 | if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ |
| 664 | { U32 const pos = (U32)((const BYTE*)literals - g_start); |
| 665 | DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", |
| 666 | pos, (U32)litLength, (U32)matchLength, (U32)offBase); |
| 667 | } |
| 668 | #endif |
| 669 | assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); |
| 670 | /* copy Literals */ |
| 671 | assert(seqStorePtr->maxNbLit <= 128 KB); |
| 672 | assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); |
| 673 | assert(literals + litLength <= litLimit); |
| 674 | if (litEnd <= litLimit_w) { |
| 675 | /* Common case we can use wildcopy. |
| 676 | * First copy 16 bytes, because literals are likely short. |
| 677 | */ |
| 678 | ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); |
| 679 | ZSTD_copy16(seqStorePtr->lit, literals); |
| 680 | if (litLength > 16) { |
| 681 | ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); |
| 682 | } |
| 683 | } else { |
| 684 | ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); |
| 685 | } |
| 686 | seqStorePtr->lit += litLength; |
| 687 | |
| 688 | /* literal Length */ |
| 689 | if (litLength>0xFFFF) { |
| 690 | assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ |
| 691 | seqStorePtr->longLengthType = ZSTD_llt_literalLength; |
| 692 | seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); |
| 693 | } |
| 694 | seqStorePtr->sequences[0].litLength = (U16)litLength; |
| 695 | |
| 696 | /* match offset */ |
| 697 | seqStorePtr->sequences[0].offBase = offBase; |
| 698 | |
| 699 | /* match Length */ |
| 700 | assert(matchLength >= MINMATCH); |
| 701 | { size_t const mlBase = matchLength - MINMATCH; |
| 702 | if (mlBase>0xFFFF) { |
| 703 | assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ |
| 704 | seqStorePtr->longLengthType = ZSTD_llt_matchLength; |
| 705 | seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); |
| 706 | } |
| 707 | seqStorePtr->sequences[0].mlBase = (U16)mlBase; |
| 708 | } |
| 709 | |
| 710 | seqStorePtr->sequences++; |
| 711 | } |
| 712 | |
| 713 | /* ZSTD_updateRep() : |
| 714 | * updates in-place @rep (array of repeat offsets) |
| 715 | * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() |
| 716 | */ |
| 717 | MEM_STATIC void |
| 718 | ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) |
| 719 | { |
| 720 | if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ |
| 721 | rep[2] = rep[1]; |
| 722 | rep[1] = rep[0]; |
| 723 | rep[0] = OFFBASE_TO_OFFSET(offBase); |
| 724 | } else { /* repcode */ |
| 725 | U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; |
| 726 | if (repCode > 0) { /* note : if repCode==0, no change */ |
| 727 | U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; |
| 728 | rep[2] = (repCode >= 2) ? rep[1] : rep[2]; |
| 729 | rep[1] = rep[0]; |
| 730 | rep[0] = currentOffset; |
| 731 | } else { /* repCode == 0 */ |
| 732 | /* nothing to do */ |
| 733 | } |
| 734 | } |
| 735 | } |
| 736 | |
| 737 | typedef struct repcodes_s { |
| 738 | U32 rep[3]; |
| 739 | } repcodes_t; |
| 740 | |
| 741 | MEM_STATIC repcodes_t |
| 742 | ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) |
| 743 | { |
| 744 | repcodes_t newReps; |
| 745 | ZSTD_memcpy(&newReps, rep, sizeof(newReps)); |
| 746 | ZSTD_updateRep(newReps.rep, offBase, ll0); |
| 747 | return newReps; |
| 748 | } |
| 749 | |
| 750 | |
| 751 | /*-************************************* |
| 752 | * Match length counter |
| 753 | ***************************************/ |
| 754 | MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) |
| 755 | { |
| 756 | const BYTE* const pStart = pIn; |
| 757 | const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); |
| 758 | |
| 759 | if (pIn < pInLoopLimit) { |
| 760 | { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); |
| 761 | if (diff) return ZSTD_NbCommonBytes(diff); } |
| 762 | pIn+=sizeof(size_t); pMatch+=sizeof(size_t); |
| 763 | while (pIn < pInLoopLimit) { |
| 764 | size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); |
| 765 | if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } |
| 766 | pIn += ZSTD_NbCommonBytes(diff); |
| 767 | return (size_t)(pIn - pStart); |
| 768 | } } |
| 769 | if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } |
| 770 | if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } |
| 771 | if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; |
| 772 | return (size_t)(pIn - pStart); |
| 773 | } |
| 774 | |
| 775 | /** ZSTD_count_2segments() : |
| 776 | * can count match length with `ip` & `match` in 2 different segments. |
| 777 | * convention : on reaching mEnd, match count continue starting from iStart |
| 778 | */ |
| 779 | MEM_STATIC size_t |
| 780 | ZSTD_count_2segments(const BYTE* ip, const BYTE* match, |
| 781 | const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) |
| 782 | { |
| 783 | const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); |
| 784 | size_t const matchLength = ZSTD_count(ip, match, vEnd); |
| 785 | if (match + matchLength != mEnd) return matchLength; |
| 786 | DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); |
| 787 | DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); |
| 788 | DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); |
| 789 | DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); |
| 790 | DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); |
| 791 | return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); |
| 792 | } |
| 793 | |
| 794 | |
| 795 | /*-************************************* |
| 796 | * Hashes |
| 797 | ***************************************/ |
| 798 | static const U32 prime3bytes = 506832829U; |
| 799 | static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } |
| 800 | MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ |
| 801 | MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } |
| 802 | |
| 803 | static const U32 prime4bytes = 2654435761U; |
| 804 | static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } |
| 805 | static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } |
| 806 | static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } |
| 807 | |
| 808 | static const U64 prime5bytes = 889523592379ULL; |
| 809 | static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } |
| 810 | static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } |
| 811 | static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } |
| 812 | |
| 813 | static const U64 prime6bytes = 227718039650203ULL; |
| 814 | static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } |
| 815 | static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } |
| 816 | static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } |
| 817 | |
| 818 | static const U64 prime7bytes = 58295818150454627ULL; |
| 819 | static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } |
| 820 | static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } |
| 821 | static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } |
| 822 | |
| 823 | static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; |
| 824 | static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } |
| 825 | static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } |
| 826 | static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } |
| 827 | |
| 828 | |
| 829 | MEM_STATIC FORCE_INLINE_ATTR |
| 830 | size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) |
| 831 | { |
| 832 | /* Although some of these hashes do support hBits up to 64, some do not. |
| 833 | * To be on the safe side, always avoid hBits > 32. */ |
| 834 | assert(hBits <= 32); |
| 835 | |
| 836 | switch(mls) |
| 837 | { |
| 838 | default: |
| 839 | case 4: return ZSTD_hash4Ptr(p, hBits); |
| 840 | case 5: return ZSTD_hash5Ptr(p, hBits); |
| 841 | case 6: return ZSTD_hash6Ptr(p, hBits); |
| 842 | case 7: return ZSTD_hash7Ptr(p, hBits); |
| 843 | case 8: return ZSTD_hash8Ptr(p, hBits); |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | MEM_STATIC FORCE_INLINE_ATTR |
| 848 | size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { |
| 849 | /* Although some of these hashes do support hBits up to 64, some do not. |
| 850 | * To be on the safe side, always avoid hBits > 32. */ |
| 851 | assert(hBits <= 32); |
| 852 | |
| 853 | switch(mls) |
| 854 | { |
| 855 | default: |
| 856 | case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); |
| 857 | case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); |
| 858 | case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); |
| 859 | case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); |
| 860 | case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); |
| 861 | } |
| 862 | } |
| 863 | |
| 864 | |
| 865 | /** ZSTD_ipow() : |
| 866 | * Return base^exponent. |
| 867 | */ |
| 868 | static U64 ZSTD_ipow(U64 base, U64 exponent) |
| 869 | { |
| 870 | U64 power = 1; |
| 871 | while (exponent) { |
| 872 | if (exponent & 1) power *= base; |
| 873 | exponent >>= 1; |
| 874 | base *= base; |
| 875 | } |
| 876 | return power; |
| 877 | } |
| 878 | |
| 879 | #define ZSTD_ROLL_HASH_CHAR_OFFSET 10 |
| 880 | |
| 881 | /** ZSTD_rollingHash_append() : |
| 882 | * Add the buffer to the hash value. |
| 883 | */ |
| 884 | static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) |
| 885 | { |
| 886 | BYTE const* istart = (BYTE const*)buf; |
| 887 | size_t pos; |
| 888 | for (pos = 0; pos < size; ++pos) { |
| 889 | hash *= prime8bytes; |
| 890 | hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; |
| 891 | } |
| 892 | return hash; |
| 893 | } |
| 894 | |
| 895 | /** ZSTD_rollingHash_compute() : |
| 896 | * Compute the rolling hash value of the buffer. |
| 897 | */ |
| 898 | MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) |
| 899 | { |
| 900 | return ZSTD_rollingHash_append(0, buf, size); |
| 901 | } |
| 902 | |
| 903 | /** ZSTD_rollingHash_primePower() : |
| 904 | * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash |
| 905 | * over a window of length bytes. |
| 906 | */ |
| 907 | MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) |
| 908 | { |
| 909 | return ZSTD_ipow(prime8bytes, length - 1); |
| 910 | } |
| 911 | |
| 912 | /** ZSTD_rollingHash_rotate() : |
| 913 | * Rotate the rolling hash by one byte. |
| 914 | */ |
| 915 | MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) |
| 916 | { |
| 917 | hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; |
| 918 | hash *= prime8bytes; |
| 919 | hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; |
| 920 | return hash; |
| 921 | } |
| 922 | |
| 923 | /*-************************************* |
| 924 | * Round buffer management |
| 925 | ***************************************/ |
| 926 | #if (ZSTD_WINDOWLOG_MAX_64 > 31) |
| 927 | # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" |
| 928 | #endif |
| 929 | /* Max current allowed */ |
| 930 | #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) |
| 931 | /* Maximum chunk size before overflow correction needs to be called again */ |
| 932 | #define ZSTD_CHUNKSIZE_MAX \ |
| 933 | ( ((U32)-1) /* Maximum ending current index */ \ |
| 934 | - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ |
| 935 | |
| 936 | /** |
| 937 | * ZSTD_window_clear(): |
| 938 | * Clears the window containing the history by simply setting it to empty. |
| 939 | */ |
| 940 | MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) |
| 941 | { |
| 942 | size_t const endT = (size_t)(window->nextSrc - window->base); |
| 943 | U32 const end = (U32)endT; |
| 944 | |
| 945 | window->lowLimit = end; |
| 946 | window->dictLimit = end; |
| 947 | } |
| 948 | |
| 949 | MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) |
| 950 | { |
| 951 | return window.dictLimit == ZSTD_WINDOW_START_INDEX && |
| 952 | window.lowLimit == ZSTD_WINDOW_START_INDEX && |
| 953 | (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; |
| 954 | } |
| 955 | |
| 956 | /** |
| 957 | * ZSTD_window_hasExtDict(): |
| 958 | * Returns non-zero if the window has a non-empty extDict. |
| 959 | */ |
| 960 | MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) |
| 961 | { |
| 962 | return window.lowLimit < window.dictLimit; |
| 963 | } |
| 964 | |
| 965 | /** |
| 966 | * ZSTD_matchState_dictMode(): |
| 967 | * Inspects the provided matchState and figures out what dictMode should be |
| 968 | * passed to the compressor. |
| 969 | */ |
| 970 | MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) |
| 971 | { |
| 972 | return ZSTD_window_hasExtDict(ms->window) ? |
| 973 | ZSTD_extDict : |
| 974 | ms->dictMatchState != NULL ? |
| 975 | (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : |
| 976 | ZSTD_noDict; |
| 977 | } |
| 978 | |
| 979 | /* Defining this macro to non-zero tells zstd to run the overflow correction |
| 980 | * code much more frequently. This is very inefficient, and should only be |
| 981 | * used for tests and fuzzers. |
| 982 | */ |
| 983 | #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY |
| 984 | # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
| 985 | # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 |
| 986 | # else |
| 987 | # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 |
| 988 | # endif |
| 989 | #endif |
| 990 | |
| 991 | /** |
| 992 | * ZSTD_window_canOverflowCorrect(): |
| 993 | * Returns non-zero if the indices are large enough for overflow correction |
| 994 | * to work correctly without impacting compression ratio. |
| 995 | */ |
| 996 | MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, |
| 997 | U32 cycleLog, |
| 998 | U32 maxDist, |
| 999 | U32 loadedDictEnd, |
| 1000 | void const* src) |
| 1001 | { |
| 1002 | U32 const cycleSize = 1u << cycleLog; |
| 1003 | U32 const curr = (U32)((BYTE const*)src - window.base); |
| 1004 | U32 const minIndexToOverflowCorrect = cycleSize |
| 1005 | + MAX(maxDist, cycleSize) |
| 1006 | + ZSTD_WINDOW_START_INDEX; |
| 1007 | |
| 1008 | /* Adjust the min index to backoff the overflow correction frequency, |
| 1009 | * so we don't waste too much CPU in overflow correction. If this |
| 1010 | * computation overflows we don't really care, we just need to make |
| 1011 | * sure it is at least minIndexToOverflowCorrect. |
| 1012 | */ |
| 1013 | U32 const adjustment = window.nbOverflowCorrections + 1; |
| 1014 | U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, |
| 1015 | minIndexToOverflowCorrect); |
| 1016 | U32 const indexLargeEnough = curr > adjustedIndex; |
| 1017 | |
| 1018 | /* Only overflow correct early if the dictionary is invalidated already, |
| 1019 | * so we don't hurt compression ratio. |
| 1020 | */ |
| 1021 | U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; |
| 1022 | |
| 1023 | return indexLargeEnough && dictionaryInvalidated; |
| 1024 | } |
| 1025 | |
| 1026 | /** |
| 1027 | * ZSTD_window_needOverflowCorrection(): |
| 1028 | * Returns non-zero if the indices are getting too large and need overflow |
| 1029 | * protection. |
| 1030 | */ |
| 1031 | MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, |
| 1032 | U32 cycleLog, |
| 1033 | U32 maxDist, |
| 1034 | U32 loadedDictEnd, |
| 1035 | void const* src, |
| 1036 | void const* srcEnd) |
| 1037 | { |
| 1038 | U32 const curr = (U32)((BYTE const*)srcEnd - window.base); |
| 1039 | if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { |
| 1040 | if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { |
| 1041 | return 1; |
| 1042 | } |
| 1043 | } |
| 1044 | return curr > ZSTD_CURRENT_MAX; |
| 1045 | } |
| 1046 | |
| 1047 | /** |
| 1048 | * ZSTD_window_correctOverflow(): |
| 1049 | * Reduces the indices to protect from index overflow. |
| 1050 | * Returns the correction made to the indices, which must be applied to every |
| 1051 | * stored index. |
| 1052 | * |
| 1053 | * The least significant cycleLog bits of the indices must remain the same, |
| 1054 | * which may be 0. Every index up to maxDist in the past must be valid. |
| 1055 | */ |
| 1056 | MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, |
| 1057 | U32 maxDist, void const* src) |
| 1058 | { |
| 1059 | /* preemptive overflow correction: |
| 1060 | * 1. correction is large enough: |
| 1061 | * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog |
| 1062 | * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog |
| 1063 | * |
| 1064 | * current - newCurrent |
| 1065 | * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog) |
| 1066 | * > (3<<29) - (1<<chainLog) |
| 1067 | * > (3<<29) - (1<<30) (NOTE: chainLog <= 30) |
| 1068 | * > 1<<29 |
| 1069 | * |
| 1070 | * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: |
| 1071 | * After correction, current is less than (1<<chainLog + 1<<windowLog). |
| 1072 | * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t. |
| 1073 | * In 32-bit mode we are safe, because (chainLog <= 29), so |
| 1074 | * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32. |
| 1075 | * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: |
| 1076 | * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. |
| 1077 | */ |
| 1078 | U32 const cycleSize = 1u << cycleLog; |
| 1079 | U32 const cycleMask = cycleSize - 1; |
| 1080 | U32 const curr = (U32)((BYTE const*)src - window->base); |
| 1081 | U32 const currentCycle = curr & cycleMask; |
| 1082 | /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ |
| 1083 | U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX |
| 1084 | ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) |
| 1085 | : 0; |
| 1086 | U32 const newCurrent = currentCycle |
| 1087 | + currentCycleCorrection |
| 1088 | + MAX(maxDist, cycleSize); |
| 1089 | U32 const correction = curr - newCurrent; |
| 1090 | /* maxDist must be a power of two so that: |
| 1091 | * (newCurrent & cycleMask) == (curr & cycleMask) |
| 1092 | * This is required to not corrupt the chains / binary tree. |
| 1093 | */ |
| 1094 | assert((maxDist & (maxDist - 1)) == 0); |
| 1095 | assert((curr & cycleMask) == (newCurrent & cycleMask)); |
| 1096 | assert(curr > newCurrent); |
| 1097 | if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { |
| 1098 | /* Loose bound, should be around 1<<29 (see above) */ |
| 1099 | assert(correction > 1<<28); |
| 1100 | } |
| 1101 | |
| 1102 | window->base += correction; |
| 1103 | window->dictBase += correction; |
| 1104 | if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { |
| 1105 | window->lowLimit = ZSTD_WINDOW_START_INDEX; |
| 1106 | } else { |
| 1107 | window->lowLimit -= correction; |
| 1108 | } |
| 1109 | if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { |
| 1110 | window->dictLimit = ZSTD_WINDOW_START_INDEX; |
| 1111 | } else { |
| 1112 | window->dictLimit -= correction; |
| 1113 | } |
| 1114 | |
| 1115 | /* Ensure we can still reference the full window. */ |
| 1116 | assert(newCurrent >= maxDist); |
| 1117 | assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); |
| 1118 | /* Ensure that lowLimit and dictLimit didn't underflow. */ |
| 1119 | assert(window->lowLimit <= newCurrent); |
| 1120 | assert(window->dictLimit <= newCurrent); |
| 1121 | |
| 1122 | ++window->nbOverflowCorrections; |
| 1123 | |
| 1124 | DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, |
| 1125 | window->lowLimit); |
| 1126 | return correction; |
| 1127 | } |
| 1128 | |
| 1129 | /** |
| 1130 | * ZSTD_window_enforceMaxDist(): |
| 1131 | * Updates lowLimit so that: |
| 1132 | * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd |
| 1133 | * |
| 1134 | * It ensures index is valid as long as index >= lowLimit. |
| 1135 | * This must be called before a block compression call. |
| 1136 | * |
| 1137 | * loadedDictEnd is only defined if a dictionary is in use for current compression. |
| 1138 | * As the name implies, loadedDictEnd represents the index at end of dictionary. |
| 1139 | * The value lies within context's referential, it can be directly compared to blockEndIdx. |
| 1140 | * |
| 1141 | * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. |
| 1142 | * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. |
| 1143 | * This is because dictionaries are allowed to be referenced fully |
| 1144 | * as long as the last byte of the dictionary is in the window. |
| 1145 | * Once input has progressed beyond window size, dictionary cannot be referenced anymore. |
| 1146 | * |
| 1147 | * In normal dict mode, the dictionary lies between lowLimit and dictLimit. |
| 1148 | * In dictMatchState mode, lowLimit and dictLimit are the same, |
| 1149 | * and the dictionary is below them. |
| 1150 | * forceWindow and dictMatchState are therefore incompatible. |
| 1151 | */ |
| 1152 | MEM_STATIC void |
| 1153 | ZSTD_window_enforceMaxDist(ZSTD_window_t* window, |
| 1154 | const void* blockEnd, |
| 1155 | U32 maxDist, |
| 1156 | U32* loadedDictEndPtr, |
| 1157 | const ZSTD_matchState_t** dictMatchStatePtr) |
| 1158 | { |
| 1159 | U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); |
| 1160 | U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; |
| 1161 | DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", |
| 1162 | (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); |
| 1163 | |
| 1164 | /* - When there is no dictionary : loadedDictEnd == 0. |
| 1165 | In which case, the test (blockEndIdx > maxDist) is merely to avoid |
| 1166 | overflowing next operation `newLowLimit = blockEndIdx - maxDist`. |
| 1167 | - When there is a standard dictionary : |
| 1168 | Index referential is copied from the dictionary, |
| 1169 | which means it starts from 0. |
| 1170 | In which case, loadedDictEnd == dictSize, |
| 1171 | and it makes sense to compare `blockEndIdx > maxDist + dictSize` |
| 1172 | since `blockEndIdx` also starts from zero. |
| 1173 | - When there is an attached dictionary : |
| 1174 | loadedDictEnd is expressed within the referential of the context, |
| 1175 | so it can be directly compared against blockEndIdx. |
| 1176 | */ |
| 1177 | if (blockEndIdx > maxDist + loadedDictEnd) { |
| 1178 | U32 const newLowLimit = blockEndIdx - maxDist; |
| 1179 | if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; |
| 1180 | if (window->dictLimit < window->lowLimit) { |
| 1181 | DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", |
| 1182 | (unsigned)window->dictLimit, (unsigned)window->lowLimit); |
| 1183 | window->dictLimit = window->lowLimit; |
| 1184 | } |
| 1185 | /* On reaching window size, dictionaries are invalidated */ |
| 1186 | if (loadedDictEndPtr) *loadedDictEndPtr = 0; |
| 1187 | if (dictMatchStatePtr) *dictMatchStatePtr = NULL; |
| 1188 | } |
| 1189 | } |
| 1190 | |
| 1191 | /* Similar to ZSTD_window_enforceMaxDist(), |
| 1192 | * but only invalidates dictionary |
| 1193 | * when input progresses beyond window size. |
| 1194 | * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) |
| 1195 | * loadedDictEnd uses same referential as window->base |
| 1196 | * maxDist is the window size */ |
| 1197 | MEM_STATIC void |
| 1198 | ZSTD_checkDictValidity(const ZSTD_window_t* window, |
| 1199 | const void* blockEnd, |
| 1200 | U32 maxDist, |
| 1201 | U32* loadedDictEndPtr, |
| 1202 | const ZSTD_matchState_t** dictMatchStatePtr) |
| 1203 | { |
| 1204 | assert(loadedDictEndPtr != NULL); |
| 1205 | assert(dictMatchStatePtr != NULL); |
| 1206 | { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); |
| 1207 | U32 const loadedDictEnd = *loadedDictEndPtr; |
| 1208 | DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", |
| 1209 | (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); |
| 1210 | assert(blockEndIdx >= loadedDictEnd); |
| 1211 | |
| 1212 | if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { |
| 1213 | /* On reaching window size, dictionaries are invalidated. |
| 1214 | * For simplification, if window size is reached anywhere within next block, |
| 1215 | * the dictionary is invalidated for the full block. |
| 1216 | * |
| 1217 | * We also have to invalidate the dictionary if ZSTD_window_update() has detected |
| 1218 | * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. |
| 1219 | * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use |
| 1220 | * dictMatchState, so setting it to NULL is not a problem. |
| 1221 | */ |
| 1222 | DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); |
| 1223 | *loadedDictEndPtr = 0; |
| 1224 | *dictMatchStatePtr = NULL; |
| 1225 | } else { |
| 1226 | if (*loadedDictEndPtr != 0) { |
| 1227 | DEBUGLOG(6, "dictionary considered valid for current block"); |
| 1228 | } } } |
| 1229 | } |
| 1230 | |
| 1231 | MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { |
| 1232 | ZSTD_memset(window, 0, sizeof(*window)); |
| 1233 | window->base = (BYTE const*)" "; |
| 1234 | window->dictBase = (BYTE const*)" "; |
| 1235 | ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ |
| 1236 | window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ |
| 1237 | window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ |
| 1238 | window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ |
| 1239 | window->nbOverflowCorrections = 0; |
| 1240 | } |
| 1241 | |
| 1242 | /** |
| 1243 | * ZSTD_window_update(): |
| 1244 | * Updates the window by appending [src, src + srcSize) to the window. |
| 1245 | * If it is not contiguous, the current prefix becomes the extDict, and we |
| 1246 | * forget about the extDict. Handles overlap of the prefix and extDict. |
| 1247 | * Returns non-zero if the segment is contiguous. |
| 1248 | */ |
| 1249 | MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, |
| 1250 | void const* src, size_t srcSize, |
| 1251 | int forceNonContiguous) |
| 1252 | { |
| 1253 | BYTE const* const ip = (BYTE const*)src; |
| 1254 | U32 contiguous = 1; |
| 1255 | DEBUGLOG(5, "ZSTD_window_update"); |
| 1256 | if (srcSize == 0) |
| 1257 | return contiguous; |
| 1258 | assert(window->base != NULL); |
| 1259 | assert(window->dictBase != NULL); |
| 1260 | /* Check if blocks follow each other */ |
| 1261 | if (src != window->nextSrc || forceNonContiguous) { |
| 1262 | /* not contiguous */ |
| 1263 | size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); |
| 1264 | DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); |
| 1265 | window->lowLimit = window->dictLimit; |
| 1266 | assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ |
| 1267 | window->dictLimit = (U32)distanceFromBase; |
| 1268 | window->dictBase = window->base; |
| 1269 | window->base = ip - distanceFromBase; |
| 1270 | /* ms->nextToUpdate = window->dictLimit; */ |
| 1271 | if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ |
| 1272 | contiguous = 0; |
| 1273 | } |
| 1274 | window->nextSrc = ip + srcSize; |
| 1275 | /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ |
| 1276 | if ( (ip+srcSize > window->dictBase + window->lowLimit) |
| 1277 | & (ip < window->dictBase + window->dictLimit)) { |
| 1278 | ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; |
| 1279 | U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; |
| 1280 | window->lowLimit = lowLimitMax; |
| 1281 | DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); |
| 1282 | } |
| 1283 | return contiguous; |
| 1284 | } |
| 1285 | |
| 1286 | /** |
| 1287 | * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. |
| 1288 | */ |
| 1289 | MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) |
| 1290 | { |
| 1291 | U32 const maxDistance = 1U << windowLog; |
| 1292 | U32 const lowestValid = ms->window.lowLimit; |
| 1293 | U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; |
| 1294 | U32 const isDictionary = (ms->loadedDictEnd != 0); |
| 1295 | /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary |
| 1296 | * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't |
| 1297 | * valid for the entire block. So this check is sufficient to find the lowest valid match index. |
| 1298 | */ |
| 1299 | U32 const matchLowest = isDictionary ? lowestValid : withinWindow; |
| 1300 | return matchLowest; |
| 1301 | } |
| 1302 | |
| 1303 | /** |
| 1304 | * Returns the lowest allowed match index in the prefix. |
| 1305 | */ |
| 1306 | MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) |
| 1307 | { |
| 1308 | U32 const maxDistance = 1U << windowLog; |
| 1309 | U32 const lowestValid = ms->window.dictLimit; |
| 1310 | U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; |
| 1311 | U32 const isDictionary = (ms->loadedDictEnd != 0); |
| 1312 | /* When computing the lowest prefix index we need to take the dictionary into account to handle |
| 1313 | * the edge case where the dictionary and the source are contiguous in memory. |
| 1314 | */ |
| 1315 | U32 const matchLowest = isDictionary ? lowestValid : withinWindow; |
| 1316 | return matchLowest; |
| 1317 | } |
| 1318 | |
| 1319 | |
| 1320 | |
| 1321 | /* debug functions */ |
| 1322 | #if (DEBUGLEVEL>=2) |
| 1323 | |
| 1324 | MEM_STATIC double ZSTD_fWeight(U32 rawStat) |
| 1325 | { |
| 1326 | U32 const fp_accuracy = 8; |
| 1327 | U32 const fp_multiplier = (1 << fp_accuracy); |
| 1328 | U32 const newStat = rawStat + 1; |
| 1329 | U32 const hb = ZSTD_highbit32(newStat); |
| 1330 | U32 const BWeight = hb * fp_multiplier; |
| 1331 | U32 const FWeight = (newStat << fp_accuracy) >> hb; |
| 1332 | U32 const weight = BWeight + FWeight; |
| 1333 | assert(hb + fp_accuracy < 31); |
| 1334 | return (double)weight / fp_multiplier; |
| 1335 | } |
| 1336 | |
| 1337 | /* display a table content, |
| 1338 | * listing each element, its frequency, and its predicted bit cost */ |
| 1339 | MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) |
| 1340 | { |
| 1341 | unsigned u, sum; |
| 1342 | for (u=0, sum=0; u<=max; u++) sum += table[u]; |
| 1343 | DEBUGLOG(2, "total nb elts: %u", sum); |
| 1344 | for (u=0; u<=max; u++) { |
| 1345 | DEBUGLOG(2, "%2u: %5u (%.2f)", |
| 1346 | u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); |
| 1347 | } |
| 1348 | } |
| 1349 | |
| 1350 | #endif |
| 1351 | |
| 1352 | /* Short Cache */ |
| 1353 | |
| 1354 | /* Normally, zstd matchfinders follow this flow: |
| 1355 | * 1. Compute hash at ip |
| 1356 | * 2. Load index from hashTable[hash] |
| 1357 | * 3. Check if *ip == *(base + index) |
| 1358 | * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. |
| 1359 | * |
| 1360 | * Short cache is an optimization which allows us to avoid step 3 most of the time |
| 1361 | * when the data doesn't actually match. With short cache, the flow becomes: |
| 1362 | * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. |
| 1363 | * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. |
| 1364 | * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. |
| 1365 | * |
| 1366 | * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to |
| 1367 | * dictMatchState matchfinders. |
| 1368 | */ |
| 1369 | #define ZSTD_SHORT_CACHE_TAG_BITS 8 |
| 1370 | #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) |
| 1371 | |
| 1372 | /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. |
| 1373 | * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ |
| 1374 | MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { |
| 1375 | size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; |
| 1376 | U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); |
| 1377 | assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); |
| 1378 | hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; |
| 1379 | } |
| 1380 | |
| 1381 | /* Helper function for short cache matchfinders. |
| 1382 | * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ |
| 1383 | MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { |
| 1384 | U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; |
| 1385 | U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; |
| 1386 | return tag1 == tag2; |
| 1387 | } |
| 1388 | |
| 1389 | #if defined (__cplusplus) |
| 1390 | } |
| 1391 | #endif |
| 1392 | |
| 1393 | /* =============================================================== |
| 1394 | * Shared internal declarations |
| 1395 | * These prototypes may be called from sources not in lib/compress |
| 1396 | * =============================================================== */ |
| 1397 | |
| 1398 | /* ZSTD_loadCEntropy() : |
| 1399 | * dict : must point at beginning of a valid zstd dictionary. |
| 1400 | * return : size of dictionary header (size of magic number + dict ID + entropy tables) |
| 1401 | * assumptions : magic number supposed already checked |
| 1402 | * and dictSize >= 8 */ |
| 1403 | size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, |
| 1404 | const void* const dict, size_t dictSize); |
| 1405 | |
| 1406 | void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); |
| 1407 | |
| 1408 | /* ============================================================== |
| 1409 | * Private declarations |
| 1410 | * These prototypes shall only be called from within lib/compress |
| 1411 | * ============================================================== */ |
| 1412 | |
| 1413 | /* ZSTD_getCParamsFromCCtxParams() : |
| 1414 | * cParams are built depending on compressionLevel, src size hints, |
| 1415 | * LDM and manually set compression parameters. |
| 1416 | * Note: srcSizeHint == 0 means 0! |
| 1417 | */ |
| 1418 | ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( |
| 1419 | const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); |
| 1420 | |
| 1421 | /*! ZSTD_initCStream_internal() : |
| 1422 | * Private use only. Init streaming operation. |
| 1423 | * expects params to be valid. |
| 1424 | * must receive dict, or cdict, or none, but not both. |
| 1425 | * @return : 0, or an error code */ |
| 1426 | size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, |
| 1427 | const void* dict, size_t dictSize, |
| 1428 | const ZSTD_CDict* cdict, |
| 1429 | const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); |
| 1430 | |
| 1431 | void ZSTD_resetSeqStore(seqStore_t* ssPtr); |
| 1432 | |
| 1433 | /*! ZSTD_getCParamsFromCDict() : |
| 1434 | * as the name implies */ |
| 1435 | ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); |
| 1436 | |
| 1437 | /* ZSTD_compressBegin_advanced_internal() : |
| 1438 | * Private use only. To be called from zstdmt_compress.c. */ |
| 1439 | size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, |
| 1440 | const void* dict, size_t dictSize, |
| 1441 | ZSTD_dictContentType_e dictContentType, |
| 1442 | ZSTD_dictTableLoadMethod_e dtlm, |
| 1443 | const ZSTD_CDict* cdict, |
| 1444 | const ZSTD_CCtx_params* params, |
| 1445 | unsigned long long pledgedSrcSize); |
| 1446 | |
| 1447 | /* ZSTD_compress_advanced_internal() : |
| 1448 | * Private use only. To be called from zstdmt_compress.c. */ |
| 1449 | size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, |
| 1450 | void* dst, size_t dstCapacity, |
| 1451 | const void* src, size_t srcSize, |
| 1452 | const void* dict,size_t dictSize, |
| 1453 | const ZSTD_CCtx_params* params); |
| 1454 | |
| 1455 | |
| 1456 | /* ZSTD_writeLastEmptyBlock() : |
| 1457 | * output an empty Block with end-of-frame mark to complete a frame |
| 1458 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) |
| 1459 | * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) |
| 1460 | */ |
| 1461 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); |
| 1462 | |
| 1463 | |
| 1464 | /* ZSTD_referenceExternalSequences() : |
| 1465 | * Must be called before starting a compression operation. |
| 1466 | * seqs must parse a prefix of the source. |
| 1467 | * This cannot be used when long range matching is enabled. |
| 1468 | * Zstd will use these sequences, and pass the literals to a secondary block |
| 1469 | * compressor. |
| 1470 | * @return : An error code on failure. |
| 1471 | * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory |
| 1472 | * access and data corruption. |
| 1473 | */ |
| 1474 | size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); |
| 1475 | |
| 1476 | /** ZSTD_cycleLog() : |
| 1477 | * condition for correct operation : hashLog > 1 */ |
| 1478 | U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); |
| 1479 | |
| 1480 | /** ZSTD_CCtx_trace() : |
| 1481 | * Trace the end of a compression call. |
| 1482 | */ |
| 1483 | void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); |
| 1484 | |
| 1485 | /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of |
| 1486 | * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. |
| 1487 | * Note that the block delimiter must include the last literals of the block. |
| 1488 | */ |
| 1489 | size_t |
| 1490 | ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, |
| 1491 | ZSTD_sequencePosition* seqPos, |
| 1492 | const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, |
| 1493 | const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); |
| 1494 | |
| 1495 | /* Returns the number of bytes to move the current read position back by. |
| 1496 | * Only non-zero if we ended up splitting a sequence. |
| 1497 | * Otherwise, it may return a ZSTD error if something went wrong. |
| 1498 | * |
| 1499 | * This function will attempt to scan through blockSize bytes |
| 1500 | * represented by the sequences in @inSeqs, |
| 1501 | * storing any (partial) sequences. |
| 1502 | * |
| 1503 | * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to |
| 1504 | * avoid splitting a match, or to avoid splitting a match such that it would produce a match |
| 1505 | * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. |
| 1506 | */ |
| 1507 | size_t |
| 1508 | ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, |
| 1509 | const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, |
| 1510 | const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); |
| 1511 | |
| 1512 | |
| 1513 | /* =============================================================== |
| 1514 | * Deprecated definitions that are still used internally to avoid |
| 1515 | * deprecation warnings. These functions are exactly equivalent to |
| 1516 | * their public variants, but avoid the deprecation warnings. |
| 1517 | * =============================================================== */ |
| 1518 | |
| 1519 | size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); |
| 1520 | |
| 1521 | size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, |
| 1522 | void* dst, size_t dstCapacity, |
| 1523 | const void* src, size_t srcSize); |
| 1524 | |
| 1525 | size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, |
| 1526 | void* dst, size_t dstCapacity, |
| 1527 | const void* src, size_t srcSize); |
| 1528 | |
| 1529 | size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); |
| 1530 | |
| 1531 | |
| 1532 | #endif /* ZSTD_COMPRESS_H */ |