| 1 | /* |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | #include <limits.h> |
| 12 | #include <math.h> |
| 13 | #include <stddef.h> |
| 14 | #include <stdio.h> |
| 15 | #include <stdlib.h> |
| 16 | #include <string.h> |
| 17 | #include <time.h> /* time(), for seed random initialization */ |
| 18 | |
| 19 | #include "util.h" |
| 20 | #include "timefn.h" /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */ |
| 21 | #include "zstd.h" |
| 22 | #include "zstd_internal.h" |
| 23 | #include "mem.h" |
| 24 | #define ZDICT_STATIC_LINKING_ONLY |
| 25 | #include "zdict.h" |
| 26 | |
| 27 | /* Direct access to internal compression functions is required */ |
| 28 | #include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */ |
| 29 | #include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */ |
| 30 | |
| 31 | #define XXH_STATIC_LINKING_ONLY |
| 32 | #include "xxhash.h" /* XXH64 */ |
| 33 | |
| 34 | #if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) |
| 35 | # define inline /* disable */ |
| 36 | #endif |
| 37 | |
| 38 | /*-************************************ |
| 39 | * DISPLAY Macros |
| 40 | **************************************/ |
| 41 | #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) |
| 42 | #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } |
| 43 | static U32 g_displayLevel = 2; |
| 44 | |
| 45 | #define DISPLAYUPDATE(...) \ |
| 46 | do { \ |
| 47 | if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \ |
| 48 | (g_displayLevel >= 4)) { \ |
| 49 | g_displayClock = UTIL_getTime(); \ |
| 50 | DISPLAY(__VA_ARGS__); \ |
| 51 | if (g_displayLevel >= 4) fflush(stderr); \ |
| 52 | } \ |
| 53 | } while (0) |
| 54 | |
| 55 | static const U64 g_refreshRate = SEC_TO_MICRO / 6; |
| 56 | static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; |
| 57 | |
| 58 | #define CHECKERR(code) \ |
| 59 | do { \ |
| 60 | if (ZSTD_isError(code)) { \ |
| 61 | DISPLAY("Error occurred while generating data: %s\n", \ |
| 62 | ZSTD_getErrorName(code)); \ |
| 63 | exit(1); \ |
| 64 | } \ |
| 65 | } while (0) |
| 66 | |
| 67 | |
| 68 | /*-******************************************************* |
| 69 | * Random function |
| 70 | *********************************************************/ |
| 71 | static U32 RAND(U32* src) |
| 72 | { |
| 73 | #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) |
| 74 | static const U32 prime1 = 2654435761U; |
| 75 | static const U32 prime2 = 2246822519U; |
| 76 | U32 rand32 = *src; |
| 77 | rand32 *= prime1; |
| 78 | rand32 += prime2; |
| 79 | rand32 = RAND_rotl32(rand32, 13); |
| 80 | *src = rand32; |
| 81 | return RAND_rotl32(rand32, 27); |
| 82 | #undef RAND_rotl32 |
| 83 | } |
| 84 | |
| 85 | #define DISTSIZE (8192) |
| 86 | |
| 87 | /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ |
| 88 | static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) |
| 89 | { |
| 90 | size_t i; |
| 91 | BYTE* op = ptr; |
| 92 | |
| 93 | for (i = 0; i < size; i++) { |
| 94 | op[i] = (BYTE) (RAND(seed) % (maxSymb + 1)); |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | /* Write `size` random bytes into `ptr` */ |
| 99 | static void RAND_buffer(U32* seed, void* ptr, size_t size) |
| 100 | { |
| 101 | size_t i; |
| 102 | BYTE* op = ptr; |
| 103 | |
| 104 | for (i = 0; i + 4 <= size; i += 4) { |
| 105 | MEM_writeLE32(op + i, RAND(seed)); |
| 106 | } |
| 107 | for (; i < size; i++) { |
| 108 | op[i] = RAND(seed) & 0xff; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | /* Write `size` bytes into `ptr` following the distribution `dist` */ |
| 113 | static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) |
| 114 | { |
| 115 | size_t i; |
| 116 | BYTE* op = ptr; |
| 117 | |
| 118 | for (i = 0; i < size; i++) { |
| 119 | op[i] = dist[RAND(seed) % DISTSIZE]; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | /* Generate a random distribution where the frequency of each symbol follows a |
| 124 | * geometric distribution defined by `weight` |
| 125 | * `dist` should have size at least `DISTSIZE` */ |
| 126 | static void RAND_genDist(U32* seed, BYTE* dist, double weight) |
| 127 | { |
| 128 | size_t i = 0; |
| 129 | size_t statesLeft = DISTSIZE; |
| 130 | BYTE symb = (BYTE) (RAND(seed) % 256); |
| 131 | BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ |
| 132 | |
| 133 | while (i < DISTSIZE) { |
| 134 | size_t states = ((size_t)(weight * (double)statesLeft)) + 1; |
| 135 | size_t j; |
| 136 | for (j = 0; j < states && i < DISTSIZE; j++, i++) { |
| 137 | dist[i] = symb; |
| 138 | } |
| 139 | |
| 140 | symb += step; |
| 141 | statesLeft -= states; |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | /* Generates a random number in the range [min, max) */ |
| 146 | static inline U32 RAND_range(U32* seed, U32 min, U32 max) |
| 147 | { |
| 148 | return (RAND(seed) % (max-min)) + min; |
| 149 | } |
| 150 | |
| 151 | #define ROUND(x) ((U32)(x + 0.5)) |
| 152 | |
| 153 | /* Generates a random number in an exponential distribution with mean `mean` */ |
| 154 | static double RAND_exp(U32* seed, double mean) |
| 155 | { |
| 156 | double const u = RAND(seed) / (double) UINT_MAX; |
| 157 | return log(1-u) * (-mean); |
| 158 | } |
| 159 | |
| 160 | /*-******************************************************* |
| 161 | * Constants and Structs |
| 162 | *********************************************************/ |
| 163 | const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"}; |
| 164 | |
| 165 | #define MAX_DECOMPRESSED_SIZE_LOG 20 |
| 166 | #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) |
| 167 | |
| 168 | #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ |
| 169 | |
| 170 | #define MIN_SEQ_LEN (3) |
| 171 | #define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) |
| 172 | |
| 173 | #ifndef MAX_PATH |
| 174 | #ifdef PATH_MAX |
| 175 | #define MAX_PATH PATH_MAX |
| 176 | #else |
| 177 | #define MAX_PATH 256 |
| 178 | #endif |
| 179 | #endif |
| 180 | |
| 181 | BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; |
| 182 | BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; |
| 183 | BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; |
| 184 | |
| 185 | seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; |
| 186 | BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */ |
| 187 | BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; |
| 188 | BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; |
| 189 | BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; |
| 190 | |
| 191 | U64 WKSP[HUF_WORKSPACE_SIZE_U64]; |
| 192 | |
| 193 | typedef struct { |
| 194 | size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ |
| 195 | unsigned windowSize; /* contentSize >= windowSize means single segment */ |
| 196 | } frameHeader_t; |
| 197 | |
| 198 | /* For repeat modes */ |
| 199 | typedef struct { |
| 200 | U32 rep[ZSTD_REP_NUM]; |
| 201 | |
| 202 | int hufInit; |
| 203 | /* the distribution used in the previous block for repeat mode */ |
| 204 | BYTE hufDist[DISTSIZE]; |
| 205 | HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)]; |
| 206 | |
| 207 | int fseInit; |
| 208 | FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; |
| 209 | FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; |
| 210 | FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; |
| 211 | |
| 212 | /* Symbols that were present in the previous distribution, for use with |
| 213 | * set_repeat */ |
| 214 | BYTE litlengthSymbolSet[36]; |
| 215 | BYTE offsetSymbolSet[29]; |
| 216 | BYTE matchlengthSymbolSet[53]; |
| 217 | } cblockStats_t; |
| 218 | |
| 219 | typedef struct { |
| 220 | void* data; |
| 221 | void* dataStart; |
| 222 | void* dataEnd; |
| 223 | |
| 224 | void* src; |
| 225 | void* srcStart; |
| 226 | void* srcEnd; |
| 227 | |
| 228 | frameHeader_t header; |
| 229 | |
| 230 | cblockStats_t stats; |
| 231 | cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ |
| 232 | } frame_t; |
| 233 | |
| 234 | typedef struct { |
| 235 | int useDict; |
| 236 | U32 dictID; |
| 237 | size_t dictContentSize; |
| 238 | BYTE* dictContent; |
| 239 | } dictInfo; |
| 240 | |
| 241 | typedef enum { |
| 242 | gt_frame = 0, /* generate frames */ |
| 243 | gt_block, /* generate compressed blocks without block/frame headers */ |
| 244 | } genType_e; |
| 245 | |
| 246 | #ifndef MIN |
| 247 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
| 248 | #endif |
| 249 | |
| 250 | /*-******************************************************* |
| 251 | * Global variables (set from command line) |
| 252 | *********************************************************/ |
| 253 | U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */ |
| 254 | U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */ |
| 255 | |
| 256 | /*-******************************************************* |
| 257 | * Generator Functions |
| 258 | *********************************************************/ |
| 259 | |
| 260 | struct { |
| 261 | int contentSize; /* force the content size to be present */ |
| 262 | } opts; /* advanced options on generation */ |
| 263 | |
| 264 | /* Generate and write a random frame header */ |
| 265 | static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info) |
| 266 | { |
| 267 | BYTE* const op = frame->data; |
| 268 | size_t pos = 0; |
| 269 | frameHeader_t fh; |
| 270 | |
| 271 | BYTE windowByte = 0; |
| 272 | |
| 273 | int singleSegment = 0; |
| 274 | int contentSizeFlag = 0; |
| 275 | int fcsCode = 0; |
| 276 | |
| 277 | memset(&fh, 0, sizeof(fh)); |
| 278 | |
| 279 | /* generate window size */ |
| 280 | { |
| 281 | /* Follow window algorithm from specification */ |
| 282 | int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); |
| 283 | int const mantissa = RAND(seed) % 8; |
| 284 | windowByte = (BYTE) ((exponent << 3) | mantissa); |
| 285 | fh.windowSize = (1U << (exponent + 10)); |
| 286 | fh.windowSize += fh.windowSize / 8 * mantissa; |
| 287 | } |
| 288 | |
| 289 | { |
| 290 | /* Generate random content size */ |
| 291 | size_t highBit; |
| 292 | if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) { |
| 293 | /* do content of at least 128 bytes */ |
| 294 | highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog); |
| 295 | } else if (RAND(seed) & 3) { |
| 296 | /* do small content */ |
| 297 | highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog)); |
| 298 | } else { |
| 299 | /* 0 size frame */ |
| 300 | highBit = 0; |
| 301 | } |
| 302 | fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; |
| 303 | |
| 304 | /* provide size sometimes */ |
| 305 | contentSizeFlag = opts.contentSize | (RAND(seed) & 1); |
| 306 | |
| 307 | if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { |
| 308 | /* do single segment sometimes */ |
| 309 | fh.windowSize = (U32) fh.contentSize; |
| 310 | singleSegment = 1; |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | if (contentSizeFlag) { |
| 315 | /* Determine how large fcs field has to be */ |
| 316 | int minFcsCode = (fh.contentSize >= 256) + |
| 317 | (fh.contentSize >= 65536 + 256) + |
| 318 | (fh.contentSize > 0xFFFFFFFFU); |
| 319 | if (!singleSegment && !minFcsCode) { |
| 320 | minFcsCode = 1; |
| 321 | } |
| 322 | fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); |
| 323 | if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; |
| 324 | } |
| 325 | |
| 326 | /* write out the header */ |
| 327 | MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); |
| 328 | pos += 4; |
| 329 | |
| 330 | { |
| 331 | /* |
| 332 | * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6) |
| 333 | * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5) |
| 334 | * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2) |
| 335 | * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0) |
| 336 | * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header |
| 337 | */ |
| 338 | int const dictBits = info.useDict ? 3 : 0; |
| 339 | BYTE const frameHeaderDescriptor = |
| 340 | (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); |
| 341 | op[pos++] = frameHeaderDescriptor; |
| 342 | } |
| 343 | |
| 344 | if (!singleSegment) { |
| 345 | op[pos++] = windowByte; |
| 346 | } |
| 347 | if (info.useDict) { |
| 348 | MEM_writeLE32(op + pos, (U32) info.dictID); |
| 349 | pos += 4; |
| 350 | } |
| 351 | if (contentSizeFlag) { |
| 352 | switch (fcsCode) { |
| 353 | default: /* Impossible */ |
| 354 | case 0: op[pos++] = (BYTE) fh.contentSize; break; |
| 355 | case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break; |
| 356 | case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break; |
| 357 | case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break; |
| 358 | } |
| 359 | } |
| 360 | |
| 361 | DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize); |
| 362 | DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize); |
| 363 | DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag); |
| 364 | DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment); |
| 365 | |
| 366 | frame->data = op + pos; |
| 367 | frame->header = fh; |
| 368 | } |
| 369 | |
| 370 | /* Write a literal block in either raw or RLE form, return the literals size */ |
| 371 | static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) |
| 372 | { |
| 373 | BYTE* op = (BYTE*)frame->data; |
| 374 | int const type = RAND(seed) % 2; |
| 375 | int const sizeFormatDesc = RAND(seed) % 8; |
| 376 | size_t litSize; |
| 377 | size_t maxLitSize = MIN(contentSize, g_maxBlockSize); |
| 378 | |
| 379 | if (sizeFormatDesc == 0) { |
| 380 | /* Size_FormatDesc = ?0 */ |
| 381 | maxLitSize = MIN(maxLitSize, 31); |
| 382 | } else if (sizeFormatDesc <= 4) { |
| 383 | /* Size_FormatDesc = 01 */ |
| 384 | maxLitSize = MIN(maxLitSize, 4095); |
| 385 | } else { |
| 386 | /* Size_Format = 11 */ |
| 387 | maxLitSize = MIN(maxLitSize, 1048575); |
| 388 | } |
| 389 | |
| 390 | litSize = RAND(seed) % (maxLitSize + 1); |
| 391 | if (frame->src == frame->srcStart && litSize == 0) { |
| 392 | litSize = 1; /* no empty literals if there's nothing preceding this block */ |
| 393 | } |
| 394 | if (litSize + 3 > contentSize) { |
| 395 | litSize = contentSize; /* no matches shorter than 3 are allowed */ |
| 396 | } |
| 397 | /* use smallest size format that fits */ |
| 398 | if (litSize < 32) { |
| 399 | op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; |
| 400 | op += 1; |
| 401 | } else if (litSize < 4096) { |
| 402 | op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; |
| 403 | op[1] = (litSize >> 4) & 0xff; |
| 404 | op += 2; |
| 405 | } else { |
| 406 | op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; |
| 407 | op[1] = (litSize >> 4) & 0xff; |
| 408 | op[2] = (litSize >> 12) & 0xff; |
| 409 | op += 3; |
| 410 | } |
| 411 | |
| 412 | if (type == 0) { |
| 413 | /* Raw literals */ |
| 414 | DISPLAYLEVEL(4, " raw literals\n"); |
| 415 | |
| 416 | RAND_buffer(seed, LITERAL_BUFFER, litSize); |
| 417 | memcpy(op, LITERAL_BUFFER, litSize); |
| 418 | op += litSize; |
| 419 | } else { |
| 420 | /* RLE literals */ |
| 421 | BYTE const symb = (BYTE) (RAND(seed) % 256); |
| 422 | |
| 423 | DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (unsigned)symb); |
| 424 | |
| 425 | memset(LITERAL_BUFFER, symb, litSize); |
| 426 | op[0] = symb; |
| 427 | op++; |
| 428 | } |
| 429 | |
| 430 | frame->data = op; |
| 431 | |
| 432 | return litSize; |
| 433 | } |
| 434 | |
| 435 | /* Generate a Huffman header for the given source */ |
| 436 | static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, |
| 437 | const void* src, size_t srcSize) |
| 438 | { |
| 439 | BYTE* const ostart = (BYTE*)dst; |
| 440 | BYTE* op = ostart; |
| 441 | |
| 442 | unsigned huffLog = 11; |
| 443 | unsigned maxSymbolValue = 255; |
| 444 | |
| 445 | unsigned count[HUF_SYMBOLVALUE_MAX+1]; |
| 446 | |
| 447 | /* Scan input and build symbol stats */ |
| 448 | { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP)); |
| 449 | assert(!HIST_isError(largest)); |
| 450 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ |
| 451 | if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ |
| 452 | } |
| 453 | |
| 454 | /* Build Huffman Tree */ |
| 455 | /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ |
| 456 | huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); |
| 457 | DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); |
| 458 | { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); |
| 459 | CHECKERR(maxBits); |
| 460 | huffLog = (U32)maxBits; |
| 461 | } |
| 462 | |
| 463 | /* Write table description header */ |
| 464 | { size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); |
| 465 | if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ |
| 466 | op += hSize; |
| 467 | } |
| 468 | |
| 469 | return op - ostart; |
| 470 | } |
| 471 | |
| 472 | /* Write a Huffman coded literals block and return the literals size */ |
| 473 | static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) |
| 474 | { |
| 475 | BYTE* origop = (BYTE*)frame->data; |
| 476 | BYTE* opend = (BYTE*)frame->dataEnd; |
| 477 | BYTE* op; |
| 478 | BYTE* const ostart = origop; |
| 479 | int const sizeFormat = RAND(seed) % 4; |
| 480 | size_t litSize; |
| 481 | size_t hufHeaderSize = 0; |
| 482 | size_t compressedSize = 0; |
| 483 | size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize); |
| 484 | |
| 485 | symbolEncodingType_e hType; |
| 486 | |
| 487 | if (contentSize < 64) { |
| 488 | /* make sure we get reasonably-sized literals for compression */ |
| 489 | return ERROR(GENERIC); |
| 490 | } |
| 491 | |
| 492 | DISPLAYLEVEL(4, " compressed literals\n"); |
| 493 | |
| 494 | switch (sizeFormat) { |
| 495 | case 0: /* fall through, size is the same as case 1 */ |
| 496 | case 1: |
| 497 | maxLitSize = MIN(maxLitSize, 1023); |
| 498 | origop += 3; |
| 499 | break; |
| 500 | case 2: |
| 501 | maxLitSize = MIN(maxLitSize, 16383); |
| 502 | origop += 4; |
| 503 | break; |
| 504 | case 3: |
| 505 | maxLitSize = MIN(maxLitSize, 262143); |
| 506 | origop += 5; |
| 507 | break; |
| 508 | default:; /* impossible */ |
| 509 | } |
| 510 | |
| 511 | do { |
| 512 | op = origop; |
| 513 | do { |
| 514 | litSize = RAND(seed) % (maxLitSize + 1); |
| 515 | } while (litSize < 32); /* avoid small literal sizes */ |
| 516 | if (litSize + 3 > contentSize) { |
| 517 | litSize = contentSize; /* no matches shorter than 3 are allowed */ |
| 518 | } |
| 519 | |
| 520 | /* most of the time generate a new distribution */ |
| 521 | if ((RAND(seed) & 3) || !frame->stats.hufInit) { |
| 522 | do { |
| 523 | if (RAND(seed) & 3) { |
| 524 | /* add 10 to ensure some compressibility */ |
| 525 | double const weight = ((RAND(seed) % 90) + 10) / 100.0; |
| 526 | |
| 527 | DISPLAYLEVEL(5, " distribution weight: %d%%\n", |
| 528 | (int)(weight * 100)); |
| 529 | |
| 530 | RAND_genDist(seed, frame->stats.hufDist, weight); |
| 531 | } else { |
| 532 | /* sometimes do restricted range literals to force |
| 533 | * non-huffman headers */ |
| 534 | DISPLAYLEVEL(5, " small range literals\n"); |
| 535 | RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, |
| 536 | 15); |
| 537 | } |
| 538 | RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, |
| 539 | litSize); |
| 540 | |
| 541 | /* generate the header from the distribution instead of the |
| 542 | * actual data to avoid bugs with symbols that were in the |
| 543 | * distribution but never showed up in the output */ |
| 544 | hufHeaderSize = writeHufHeader( |
| 545 | seed, frame->stats.hufTable, op, opend - op, |
| 546 | frame->stats.hufDist, DISTSIZE); |
| 547 | CHECKERR(hufHeaderSize); |
| 548 | /* repeat until a valid header is written */ |
| 549 | } while (hufHeaderSize == 0); |
| 550 | op += hufHeaderSize; |
| 551 | hType = set_compressed; |
| 552 | |
| 553 | frame->stats.hufInit = 1; |
| 554 | } else { |
| 555 | /* repeat the distribution/table from last time */ |
| 556 | DISPLAYLEVEL(5, " huffman repeat stats\n"); |
| 557 | RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, |
| 558 | litSize); |
| 559 | hufHeaderSize = 0; |
| 560 | hType = set_repeat; |
| 561 | } |
| 562 | |
| 563 | do { |
| 564 | compressedSize = |
| 565 | sizeFormat == 0 |
| 566 | ? HUF_compress1X_usingCTable( |
| 567 | op, opend - op, LITERAL_BUFFER, litSize, |
| 568 | frame->stats.hufTable, /* flags */ 0) |
| 569 | : HUF_compress4X_usingCTable( |
| 570 | op, opend - op, LITERAL_BUFFER, litSize, |
| 571 | frame->stats.hufTable, /* flags */ 0); |
| 572 | CHECKERR(compressedSize); |
| 573 | /* this only occurs when it could not compress or similar */ |
| 574 | } while (compressedSize <= 0); |
| 575 | |
| 576 | op += compressedSize; |
| 577 | |
| 578 | compressedSize += hufHeaderSize; |
| 579 | DISPLAYLEVEL(5, " regenerated size: %u\n", (unsigned)litSize); |
| 580 | DISPLAYLEVEL(5, " compressed size: %u\n", (unsigned)compressedSize); |
| 581 | if (compressedSize >= litSize) { |
| 582 | DISPLAYLEVEL(5, " trying again\n"); |
| 583 | /* if we have to try again, reset the stats so we don't accidentally |
| 584 | * try to repeat a distribution we just made */ |
| 585 | frame->stats = frame->oldStats; |
| 586 | } else { |
| 587 | break; |
| 588 | } |
| 589 | } while (1); |
| 590 | |
| 591 | /* write header */ |
| 592 | switch (sizeFormat) { |
| 593 | case 0: /* fall through, size is the same as case 1 */ |
| 594 | case 1: { |
| 595 | U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | |
| 596 | ((U32)compressedSize << 14); |
| 597 | MEM_writeLE24(ostart, header); |
| 598 | break; |
| 599 | } |
| 600 | case 2: { |
| 601 | U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | |
| 602 | ((U32)compressedSize << 18); |
| 603 | MEM_writeLE32(ostart, header); |
| 604 | break; |
| 605 | } |
| 606 | case 3: { |
| 607 | U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | |
| 608 | ((U32)compressedSize << 22); |
| 609 | MEM_writeLE32(ostart, header); |
| 610 | ostart[4] = (BYTE)(compressedSize >> 10); |
| 611 | break; |
| 612 | } |
| 613 | default:; /* impossible */ |
| 614 | } |
| 615 | |
| 616 | frame->data = op; |
| 617 | return litSize; |
| 618 | } |
| 619 | |
| 620 | static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) |
| 621 | { |
| 622 | /* only do compressed for larger segments to avoid compressibility issues */ |
| 623 | if (RAND(seed) & 7 && contentSize >= 64) { |
| 624 | return writeLiteralsBlockCompressed(seed, frame, contentSize); |
| 625 | } else { |
| 626 | return writeLiteralsBlockSimple(seed, frame, contentSize); |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | static inline void initSeqStore(seqStore_t *seqStore) { |
| 631 | seqStore->maxNbSeq = MAX_NB_SEQ; |
| 632 | seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX; |
| 633 | seqStore->sequencesStart = SEQUENCE_BUFFER; |
| 634 | seqStore->litStart = SEQUENCE_LITERAL_BUFFER; |
| 635 | seqStore->llCode = SEQUENCE_LLCODE; |
| 636 | seqStore->mlCode = SEQUENCE_MLCODE; |
| 637 | seqStore->ofCode = SEQUENCE_OFCODE; |
| 638 | |
| 639 | ZSTD_resetSeqStore(seqStore); |
| 640 | } |
| 641 | |
| 642 | /* Randomly generate sequence commands */ |
| 643 | static U32 |
| 644 | generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, |
| 645 | size_t contentSize, size_t literalsSize, dictInfo info) |
| 646 | { |
| 647 | /* The total length of all the matches */ |
| 648 | size_t const remainingMatch = contentSize - literalsSize; |
| 649 | size_t excessMatch = 0; |
| 650 | U32 numSequences = 0; |
| 651 | U32 i; |
| 652 | |
| 653 | const BYTE* literals = LITERAL_BUFFER; |
| 654 | BYTE* srcPtr = frame->src; |
| 655 | |
| 656 | if (literalsSize != contentSize) { |
| 657 | /* each match must be at least MIN_SEQ_LEN, so this is the maximum |
| 658 | * number of sequences we can have */ |
| 659 | U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; |
| 660 | numSequences = (RAND(seed) % maxSequences) + 1; |
| 661 | |
| 662 | /* the extra match lengths we have to allocate to each sequence */ |
| 663 | excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; |
| 664 | } |
| 665 | |
| 666 | DISPLAYLEVEL(5, " total match lengths: %u\n", (unsigned)remainingMatch); |
| 667 | for (i = 0; i < numSequences; i++) { |
| 668 | /* Generate match and literal lengths by exponential distribution to |
| 669 | * ensure nice numbers */ |
| 670 | U32 matchLen = |
| 671 | MIN_SEQ_LEN + |
| 672 | ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i))); |
| 673 | U32 literalLen = |
| 674 | (RAND(seed) & 7) |
| 675 | ? ROUND(RAND_exp(seed, |
| 676 | (double)literalsSize / |
| 677 | (double)(numSequences - i))) |
| 678 | : 0; |
| 679 | /* actual offset, code to send, and point to copy up to when shifting |
| 680 | * codes in the repeat offsets history */ |
| 681 | U32 offset, offBase, repIndex; |
| 682 | |
| 683 | /* bounds checks */ |
| 684 | matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN); |
| 685 | literalLen = MIN(literalLen, (U32) literalsSize); |
| 686 | if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; |
| 687 | if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch; |
| 688 | |
| 689 | memcpy(srcPtr, literals, literalLen); |
| 690 | srcPtr += literalLen; |
| 691 | do { |
| 692 | if (RAND(seed) & 7) { |
| 693 | /* do a normal offset */ |
| 694 | U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart); |
| 695 | offset = (RAND(seed) % |
| 696 | MIN(frame->header.windowSize, |
| 697 | (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + |
| 698 | 1; |
| 699 | if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) { |
| 700 | /* need to occasionally generate offsets that go past the start */ |
| 701 | /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */ |
| 702 | U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1; |
| 703 | offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart; |
| 704 | if (offset > frame->header.windowSize) { |
| 705 | if (lenPastStart < MIN_SEQ_LEN) { |
| 706 | /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */ |
| 707 | /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */ |
| 708 | /* make sure lenPastStart does not go past dictionary start though */ |
| 709 | lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize); |
| 710 | offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart; |
| 711 | } |
| 712 | { U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart); |
| 713 | matchLen = MIN(matchLen, matchLenBound); |
| 714 | } |
| 715 | } |
| 716 | } |
| 717 | offBase = OFFSET_TO_OFFBASE(offset); |
| 718 | repIndex = 2; |
| 719 | } else { |
| 720 | /* do a repeat offset */ |
| 721 | U32 const randomRepIndex = RAND(seed) % 3; |
| 722 | offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1); /* expects values between 1 & 3 */ |
| 723 | if (literalLen > 0) { |
| 724 | offset = frame->stats.rep[randomRepIndex]; |
| 725 | repIndex = randomRepIndex; |
| 726 | } else { |
| 727 | /* special case : literalLen == 0 */ |
| 728 | offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1 |
| 729 | : frame->stats.rep[randomRepIndex + 1]; |
| 730 | repIndex = MIN(2, randomRepIndex + 1); |
| 731 | } |
| 732 | } |
| 733 | } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); |
| 734 | |
| 735 | { BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize); |
| 736 | size_t j; |
| 737 | for (j = 0; j < matchLen; j++) { |
| 738 | if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) { |
| 739 | /* copy from dictionary instead of literals */ |
| 740 | size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart); |
| 741 | *srcPtr = *(dictEnd - dictOffset); |
| 742 | } |
| 743 | else { |
| 744 | *srcPtr = *(srcPtr-offset); |
| 745 | } |
| 746 | srcPtr++; |
| 747 | } } |
| 748 | |
| 749 | { int r; |
| 750 | for (r = repIndex; r > 0; r--) { |
| 751 | frame->stats.rep[r] = frame->stats.rep[r - 1]; |
| 752 | } |
| 753 | frame->stats.rep[0] = offset; |
| 754 | } |
| 755 | |
| 756 | DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", |
| 757 | (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen); |
| 758 | DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u", |
| 759 | (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i); |
| 760 | DISPLAYLEVEL(6, "\n"); |
| 761 | if (OFFBASE_IS_REPCODE(offBase)) { /* expects sumtype numeric representation of ZSTD_storeSeq() */ |
| 762 | DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex); |
| 763 | } |
| 764 | /* use libzstd sequence handling */ |
| 765 | ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen, |
| 766 | offBase, matchLen); |
| 767 | |
| 768 | literalsSize -= literalLen; |
| 769 | excessMatch -= (matchLen - MIN_SEQ_LEN); |
| 770 | literals += literalLen; |
| 771 | } |
| 772 | |
| 773 | memcpy(srcPtr, literals, literalsSize); |
| 774 | srcPtr += literalsSize; |
| 775 | DISPLAYLEVEL(6, " excess literals: %5u ", (unsigned)literalsSize); |
| 776 | DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart)); |
| 777 | DISPLAYLEVEL(6, "\n"); |
| 778 | |
| 779 | return numSequences; |
| 780 | } |
| 781 | |
| 782 | static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) |
| 783 | { |
| 784 | size_t i; |
| 785 | |
| 786 | memset(set, 0, (size_t)maxSymbolValue+1); |
| 787 | |
| 788 | for (i = 0; i < len; i++) { |
| 789 | set[symbols[i]] = 1; |
| 790 | } |
| 791 | } |
| 792 | |
| 793 | static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) |
| 794 | { |
| 795 | size_t i; |
| 796 | |
| 797 | for (i = 0; i < len; i++) { |
| 798 | if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { |
| 799 | return 0; |
| 800 | } |
| 801 | } |
| 802 | return 1; |
| 803 | } |
| 804 | |
| 805 | static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, |
| 806 | size_t nbSeq) |
| 807 | { |
| 808 | /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ |
| 809 | unsigned count[MaxSeq+1]; |
| 810 | S16 norm[MaxSeq+1]; |
| 811 | FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; |
| 812 | FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; |
| 813 | FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; |
| 814 | U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ |
| 815 | const seqDef* const sequences = seqStorePtr->sequencesStart; |
| 816 | const BYTE* const ofCodeTable = seqStorePtr->ofCode; |
| 817 | const BYTE* const llCodeTable = seqStorePtr->llCode; |
| 818 | const BYTE* const mlCodeTable = seqStorePtr->mlCode; |
| 819 | BYTE* const oend = (BYTE*)frame->dataEnd; |
| 820 | BYTE* op = (BYTE*)frame->data; |
| 821 | BYTE* seqHead; |
| 822 | BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)]; |
| 823 | |
| 824 | /* literals compressing block removed so that can be done separately */ |
| 825 | |
| 826 | /* Sequences Header */ |
| 827 | if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); |
| 828 | if (nbSeq < 128) *op++ = (BYTE)nbSeq; |
| 829 | else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; |
| 830 | else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; |
| 831 | |
| 832 | if (nbSeq==0) { |
| 833 | frame->data = op; |
| 834 | return 0; |
| 835 | } |
| 836 | |
| 837 | /* seqHead : flags for FSE encoding type */ |
| 838 | seqHead = op++; |
| 839 | |
| 840 | /* convert length/distances into codes */ |
| 841 | ZSTD_seqToCodes(seqStorePtr); |
| 842 | |
| 843 | /* CTable for Literal Lengths */ |
| 844 | { unsigned max = MaxLL; |
| 845 | size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ |
| 846 | assert(!HIST_isError(mostFrequent)); |
| 847 | if (frame->stats.fseInit && !(RAND(seed) & 3) && |
| 848 | isSymbolSubset(llCodeTable, nbSeq, |
| 849 | frame->stats.litlengthSymbolSet, 35)) { |
| 850 | /* maybe do repeat mode if we're allowed to */ |
| 851 | LLtype = set_repeat; |
| 852 | } else if (mostFrequent == nbSeq) { |
| 853 | /* do RLE if we have the chance */ |
| 854 | *op++ = llCodeTable[0]; |
| 855 | FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); |
| 856 | LLtype = set_rle; |
| 857 | } else if (!(RAND(seed) & 3)) { |
| 858 | /* maybe use the default distribution */ |
| 859 | CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer))); |
| 860 | LLtype = set_basic; |
| 861 | } else { |
| 862 | /* fall back on a full table */ |
| 863 | size_t nbSeq_1 = nbSeq; |
| 864 | const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); |
| 865 | if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } |
| 866 | FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); |
| 867 | { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ |
| 868 | if (FSE_isError(NCountSize)) return ERROR(GENERIC); |
| 869 | op += NCountSize; } |
| 870 | CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer))); |
| 871 | LLtype = set_compressed; |
| 872 | } } |
| 873 | |
| 874 | /* CTable for Offsets */ |
| 875 | /* see Literal Lengths for descriptions of mode choices */ |
| 876 | { unsigned max = MaxOff; |
| 877 | size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ |
| 878 | assert(!HIST_isError(mostFrequent)); |
| 879 | if (frame->stats.fseInit && !(RAND(seed) & 3) && |
| 880 | isSymbolSubset(ofCodeTable, nbSeq, |
| 881 | frame->stats.offsetSymbolSet, 28)) { |
| 882 | Offtype = set_repeat; |
| 883 | } else if (mostFrequent == nbSeq) { |
| 884 | *op++ = ofCodeTable[0]; |
| 885 | FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); |
| 886 | Offtype = set_rle; |
| 887 | } else if (!(RAND(seed) & 3)) { |
| 888 | FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); |
| 889 | Offtype = set_basic; |
| 890 | } else { |
| 891 | size_t nbSeq_1 = nbSeq; |
| 892 | const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); |
| 893 | if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } |
| 894 | FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); |
| 895 | { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ |
| 896 | if (FSE_isError(NCountSize)) return ERROR(GENERIC); |
| 897 | op += NCountSize; } |
| 898 | FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); |
| 899 | Offtype = set_compressed; |
| 900 | } } |
| 901 | |
| 902 | /* CTable for MatchLengths */ |
| 903 | /* see Literal Lengths for descriptions of mode choices */ |
| 904 | { unsigned max = MaxML; |
| 905 | size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ |
| 906 | assert(!HIST_isError(mostFrequent)); |
| 907 | if (frame->stats.fseInit && !(RAND(seed) & 3) && |
| 908 | isSymbolSubset(mlCodeTable, nbSeq, |
| 909 | frame->stats.matchlengthSymbolSet, 52)) { |
| 910 | MLtype = set_repeat; |
| 911 | } else if (mostFrequent == nbSeq) { |
| 912 | *op++ = *mlCodeTable; |
| 913 | FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); |
| 914 | MLtype = set_rle; |
| 915 | } else if (!(RAND(seed) & 3)) { |
| 916 | /* sometimes do default distribution */ |
| 917 | FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); |
| 918 | MLtype = set_basic; |
| 919 | } else { |
| 920 | /* fall back on table */ |
| 921 | size_t nbSeq_1 = nbSeq; |
| 922 | const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); |
| 923 | if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } |
| 924 | FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); |
| 925 | { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ |
| 926 | if (FSE_isError(NCountSize)) return ERROR(GENERIC); |
| 927 | op += NCountSize; } |
| 928 | FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); |
| 929 | MLtype = set_compressed; |
| 930 | } } |
| 931 | frame->stats.fseInit = 1; |
| 932 | initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); |
| 933 | initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); |
| 934 | initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); |
| 935 | |
| 936 | DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype); |
| 937 | |
| 938 | *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); |
| 939 | |
| 940 | /* Encoding Sequences */ |
| 941 | { BIT_CStream_t blockStream; |
| 942 | FSE_CState_t stateMatchLength; |
| 943 | FSE_CState_t stateOffsetBits; |
| 944 | FSE_CState_t stateLitLength; |
| 945 | |
| 946 | RETURN_ERROR_IF( |
| 947 | ERR_isError(BIT_initCStream(&blockStream, op, oend-op)), |
| 948 | dstSize_tooSmall, "not enough space remaining"); |
| 949 | |
| 950 | /* first symbols */ |
| 951 | FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); |
| 952 | FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); |
| 953 | FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); |
| 954 | BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); |
| 955 | if (MEM_32bits()) BIT_flushBits(&blockStream); |
| 956 | BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); |
| 957 | if (MEM_32bits()) BIT_flushBits(&blockStream); |
| 958 | BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); |
| 959 | BIT_flushBits(&blockStream); |
| 960 | |
| 961 | { size_t n; |
| 962 | for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ |
| 963 | BYTE const llCode = llCodeTable[n]; |
| 964 | BYTE const ofCode = ofCodeTable[n]; |
| 965 | BYTE const mlCode = mlCodeTable[n]; |
| 966 | U32 const llBits = LL_bits[llCode]; |
| 967 | U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ |
| 968 | U32 const mlBits = ML_bits[mlCode]; |
| 969 | /* (7)*/ /* (7)*/ |
| 970 | FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ |
| 971 | FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ |
| 972 | if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ |
| 973 | FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ |
| 974 | if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) |
| 975 | BIT_flushBits(&blockStream); /* (7)*/ |
| 976 | BIT_addBits(&blockStream, sequences[n].litLength, llBits); |
| 977 | if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); |
| 978 | BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); |
| 979 | if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ |
| 980 | BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ |
| 981 | BIT_flushBits(&blockStream); /* (7)*/ |
| 982 | } } |
| 983 | |
| 984 | FSE_flushCState(&blockStream, &stateMatchLength); |
| 985 | FSE_flushCState(&blockStream, &stateOffsetBits); |
| 986 | FSE_flushCState(&blockStream, &stateLitLength); |
| 987 | |
| 988 | { size_t const streamSize = BIT_closeCStream(&blockStream); |
| 989 | if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ |
| 990 | op += streamSize; |
| 991 | } } |
| 992 | |
| 993 | frame->data = op; |
| 994 | |
| 995 | return 0; |
| 996 | } |
| 997 | |
| 998 | static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, |
| 999 | size_t literalsSize, dictInfo info) |
| 1000 | { |
| 1001 | seqStore_t seqStore; |
| 1002 | size_t numSequences; |
| 1003 | |
| 1004 | |
| 1005 | initSeqStore(&seqStore); |
| 1006 | |
| 1007 | /* randomly generate sequences */ |
| 1008 | numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info); |
| 1009 | /* write them out to the frame data */ |
| 1010 | CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); |
| 1011 | |
| 1012 | return numSequences; |
| 1013 | } |
| 1014 | |
| 1015 | static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info) |
| 1016 | { |
| 1017 | BYTE* const blockStart = (BYTE*)frame->data; |
| 1018 | size_t literalsSize; |
| 1019 | size_t nbSeq; |
| 1020 | |
| 1021 | DISPLAYLEVEL(4, " compressed block:\n"); |
| 1022 | |
| 1023 | literalsSize = writeLiteralsBlock(seed, frame, contentSize); |
| 1024 | |
| 1025 | DISPLAYLEVEL(4, " literals size: %u\n", (unsigned)literalsSize); |
| 1026 | |
| 1027 | nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info); |
| 1028 | |
| 1029 | DISPLAYLEVEL(4, " number of sequences: %u\n", (unsigned)nbSeq); |
| 1030 | |
| 1031 | return (BYTE*)frame->data - blockStart; |
| 1032 | } |
| 1033 | |
| 1034 | static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, |
| 1035 | int lastBlock, dictInfo info) |
| 1036 | { |
| 1037 | int const blockTypeDesc = RAND(seed) % 8; |
| 1038 | size_t blockSize; |
| 1039 | int blockType; |
| 1040 | |
| 1041 | BYTE *const header = (BYTE*)frame->data; |
| 1042 | BYTE *op = header + 3; |
| 1043 | |
| 1044 | DISPLAYLEVEL(4, " block:\n"); |
| 1045 | DISPLAYLEVEL(4, " block content size: %u\n", (unsigned)contentSize); |
| 1046 | DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no"); |
| 1047 | |
| 1048 | if (blockTypeDesc == 0) { |
| 1049 | /* Raw data frame */ |
| 1050 | |
| 1051 | RAND_buffer(seed, frame->src, contentSize); |
| 1052 | memcpy(op, frame->src, contentSize); |
| 1053 | |
| 1054 | op += contentSize; |
| 1055 | blockType = 0; |
| 1056 | blockSize = contentSize; |
| 1057 | } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) { |
| 1058 | /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/ |
| 1059 | BYTE const symbol = RAND(seed) & 0xff; |
| 1060 | |
| 1061 | op[0] = symbol; |
| 1062 | memset(frame->src, symbol, contentSize); |
| 1063 | |
| 1064 | op++; |
| 1065 | blockType = 1; |
| 1066 | blockSize = contentSize; |
| 1067 | } else { |
| 1068 | /* compressed, most common */ |
| 1069 | size_t compressedSize; |
| 1070 | blockType = 2; |
| 1071 | |
| 1072 | frame->oldStats = frame->stats; |
| 1073 | |
| 1074 | frame->data = op; |
| 1075 | compressedSize = writeCompressedBlock(seed, frame, contentSize, info); |
| 1076 | if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */ |
| 1077 | blockType = 0; |
| 1078 | memcpy(op, frame->src, contentSize); |
| 1079 | |
| 1080 | op += contentSize; |
| 1081 | blockSize = contentSize; /* fall back on raw block if data doesn't |
| 1082 | compress */ |
| 1083 | |
| 1084 | frame->stats = frame->oldStats; /* don't update the stats */ |
| 1085 | } else { |
| 1086 | op += compressedSize; |
| 1087 | blockSize = compressedSize; |
| 1088 | } |
| 1089 | } |
| 1090 | frame->src = (BYTE*)frame->src + contentSize; |
| 1091 | |
| 1092 | DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]); |
| 1093 | DISPLAYLEVEL(4, " block size field: %u\n", (unsigned)blockSize); |
| 1094 | |
| 1095 | header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff); |
| 1096 | MEM_writeLE16(header + 1, (U16) (blockSize >> 5)); |
| 1097 | |
| 1098 | frame->data = op; |
| 1099 | } |
| 1100 | |
| 1101 | static void writeBlocks(U32* seed, frame_t* frame, dictInfo info) |
| 1102 | { |
| 1103 | size_t contentLeft = frame->header.contentSize; |
| 1104 | size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); |
| 1105 | while (1) { |
| 1106 | /* 1 in 4 chance of ending frame */ |
| 1107 | int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); |
| 1108 | size_t blockContentSize; |
| 1109 | if (lastBlock) { |
| 1110 | blockContentSize = contentLeft; |
| 1111 | } else { |
| 1112 | if (contentLeft > 0 && (RAND(seed) & 7)) { |
| 1113 | /* some variable size block */ |
| 1114 | blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); |
| 1115 | } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { |
| 1116 | /* some full size block */ |
| 1117 | blockContentSize = maxBlockSize; |
| 1118 | } else { |
| 1119 | /* some empty block */ |
| 1120 | blockContentSize = 0; |
| 1121 | } |
| 1122 | } |
| 1123 | |
| 1124 | writeBlock(seed, frame, blockContentSize, lastBlock, info); |
| 1125 | |
| 1126 | contentLeft -= blockContentSize; |
| 1127 | if (lastBlock) break; |
| 1128 | } |
| 1129 | } |
| 1130 | |
| 1131 | static void writeChecksum(frame_t* frame) |
| 1132 | { |
| 1133 | /* write checksum so implementations can verify their output */ |
| 1134 | U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); |
| 1135 | DISPLAYLEVEL(3, " checksum: %08x\n", (unsigned)digest); |
| 1136 | MEM_writeLE32(frame->data, (U32)digest); |
| 1137 | frame->data = (BYTE*)frame->data + 4; |
| 1138 | } |
| 1139 | |
| 1140 | static void outputBuffer(const void* buf, size_t size, const char* const path) |
| 1141 | { |
| 1142 | /* write data out to file */ |
| 1143 | const BYTE* ip = (const BYTE*)buf; |
| 1144 | FILE* out; |
| 1145 | if (path) { |
| 1146 | out = fopen(path, "wb"); |
| 1147 | } else { |
| 1148 | out = stdout; |
| 1149 | } |
| 1150 | if (!out) { |
| 1151 | fprintf(stderr, "Failed to open file at %s: ", path); |
| 1152 | perror(NULL); |
| 1153 | exit(1); |
| 1154 | } |
| 1155 | |
| 1156 | { size_t fsize = size; |
| 1157 | size_t written = 0; |
| 1158 | while (written < fsize) { |
| 1159 | written += fwrite(ip + written, 1, fsize - written, out); |
| 1160 | if (ferror(out)) { |
| 1161 | fprintf(stderr, "Failed to write to file at %s: ", path); |
| 1162 | perror(NULL); |
| 1163 | exit(1); |
| 1164 | } |
| 1165 | } |
| 1166 | } |
| 1167 | |
| 1168 | if (path) { |
| 1169 | fclose(out); |
| 1170 | } |
| 1171 | } |
| 1172 | |
| 1173 | static void initFrame(frame_t* fr) |
| 1174 | { |
| 1175 | memset(fr, 0, sizeof(*fr)); |
| 1176 | fr->data = fr->dataStart = FRAME_BUFFER; |
| 1177 | fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); |
| 1178 | fr->src = fr->srcStart = CONTENT_BUFFER; |
| 1179 | fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); |
| 1180 | |
| 1181 | /* init repeat codes */ |
| 1182 | fr->stats.rep[0] = 1; |
| 1183 | fr->stats.rep[1] = 4; |
| 1184 | fr->stats.rep[2] = 8; |
| 1185 | } |
| 1186 | |
| 1187 | /** |
| 1188 | * Generated a single zstd compressed block with no block/frame header. |
| 1189 | * Returns the final seed. |
| 1190 | */ |
| 1191 | static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info) |
| 1192 | { |
| 1193 | size_t blockContentSize; |
| 1194 | int blockWritten = 0; |
| 1195 | BYTE* op; |
| 1196 | DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed); |
| 1197 | initFrame(frame); |
| 1198 | op = (BYTE*)frame->data; |
| 1199 | |
| 1200 | while (!blockWritten) { |
| 1201 | size_t cSize; |
| 1202 | /* generate window size */ |
| 1203 | { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10); |
| 1204 | int const mantissa = RAND(&seed) % 8; |
| 1205 | frame->header.windowSize = (1U << (exponent + 10)); |
| 1206 | frame->header.windowSize += (frame->header.windowSize / 8) * mantissa; |
| 1207 | } |
| 1208 | |
| 1209 | /* generate content size */ |
| 1210 | { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); |
| 1211 | if (RAND(&seed) & 15) { |
| 1212 | /* some full size blocks */ |
| 1213 | blockContentSize = maxBlockSize; |
| 1214 | } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) { |
| 1215 | /* some small blocks <= 128 bytes*/ |
| 1216 | blockContentSize = RAND(&seed) % (1U << 7); |
| 1217 | } else { |
| 1218 | /* some variable size blocks */ |
| 1219 | blockContentSize = RAND(&seed) % maxBlockSize; |
| 1220 | } |
| 1221 | } |
| 1222 | |
| 1223 | /* try generating a compressed block */ |
| 1224 | frame->oldStats = frame->stats; |
| 1225 | frame->data = op; |
| 1226 | cSize = writeCompressedBlock(&seed, frame, blockContentSize, info); |
| 1227 | if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */ |
| 1228 | /* data doesn't compress -- try again */ |
| 1229 | frame->stats = frame->oldStats; /* don't update the stats */ |
| 1230 | DISPLAYLEVEL(5, " can't compress block : try again \n"); |
| 1231 | } else { |
| 1232 | blockWritten = 1; |
| 1233 | DISPLAYLEVEL(4, " block size: %u \n", (unsigned)cSize); |
| 1234 | frame->src = (BYTE*)frame->src + blockContentSize; |
| 1235 | } |
| 1236 | } |
| 1237 | return seed; |
| 1238 | } |
| 1239 | |
| 1240 | /* Return the final seed */ |
| 1241 | static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info) |
| 1242 | { |
| 1243 | /* generate a complete frame */ |
| 1244 | DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed); |
| 1245 | initFrame(fr); |
| 1246 | |
| 1247 | writeFrameHeader(&seed, fr, info); |
| 1248 | writeBlocks(&seed, fr, info); |
| 1249 | writeChecksum(fr); |
| 1250 | |
| 1251 | return seed; |
| 1252 | } |
| 1253 | |
| 1254 | /*_******************************************************* |
| 1255 | * Dictionary Helper Functions |
| 1256 | *********************************************************/ |
| 1257 | /* returns 0 if successful, otherwise returns 1 upon error */ |
| 1258 | static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict) |
| 1259 | { |
| 1260 | /* allocate space for samples */ |
| 1261 | int ret = 0; |
| 1262 | unsigned const numSamples = 4; |
| 1263 | size_t sampleSizes[4]; |
| 1264 | BYTE* const samples = malloc(5000*sizeof(BYTE)); |
| 1265 | if (samples == NULL) { |
| 1266 | DISPLAY("Error: could not allocate space for samples\n"); |
| 1267 | return 1; |
| 1268 | } |
| 1269 | |
| 1270 | /* generate samples */ |
| 1271 | { unsigned literalValue = 1; |
| 1272 | unsigned samplesPos = 0; |
| 1273 | size_t currSize = 1; |
| 1274 | while (literalValue <= 4) { |
| 1275 | sampleSizes[literalValue - 1] = currSize; |
| 1276 | { size_t k; |
| 1277 | for (k = 0; k < currSize; k++) { |
| 1278 | *(samples + (samplesPos++)) = (BYTE)literalValue; |
| 1279 | } } |
| 1280 | literalValue++; |
| 1281 | currSize *= 16; |
| 1282 | } } |
| 1283 | |
| 1284 | { size_t dictWriteSize = 0; |
| 1285 | ZDICT_params_t zdictParams; |
| 1286 | size_t const headerSize = MAX(dictSize/4, 256); |
| 1287 | size_t const dictContentSize = dictSize - headerSize; |
| 1288 | BYTE* const dictContent = fullDict + headerSize; |
| 1289 | if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) { |
| 1290 | DISPLAY("Error: dictionary size is too small\n"); |
| 1291 | ret = 1; |
| 1292 | goto exitGenRandomDict; |
| 1293 | } |
| 1294 | |
| 1295 | /* init dictionary params */ |
| 1296 | memset(&zdictParams, 0, sizeof(zdictParams)); |
| 1297 | zdictParams.dictID = dictID; |
| 1298 | zdictParams.notificationLevel = 1; |
| 1299 | |
| 1300 | /* fill in dictionary content */ |
| 1301 | RAND_buffer(&seed, (void*)dictContent, dictContentSize); |
| 1302 | |
| 1303 | /* finalize dictionary with random samples */ |
| 1304 | dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize, |
| 1305 | dictContent, dictContentSize, |
| 1306 | samples, sampleSizes, numSamples, |
| 1307 | zdictParams); |
| 1308 | |
| 1309 | if (ZDICT_isError(dictWriteSize)) { |
| 1310 | DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize)); |
| 1311 | ret = 1; |
| 1312 | } |
| 1313 | } |
| 1314 | |
| 1315 | exitGenRandomDict: |
| 1316 | free(samples); |
| 1317 | return ret; |
| 1318 | } |
| 1319 | |
| 1320 | static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){ |
| 1321 | /* allocate space statically */ |
| 1322 | dictInfo dictOp; |
| 1323 | memset(&dictOp, 0, sizeof(dictOp)); |
| 1324 | dictOp.useDict = useDict; |
| 1325 | dictOp.dictContentSize = dictContentSize; |
| 1326 | dictOp.dictContent = dictContent; |
| 1327 | dictOp.dictID = dictID; |
| 1328 | return dictOp; |
| 1329 | } |
| 1330 | |
| 1331 | /*-******************************************************* |
| 1332 | * Test Mode |
| 1333 | *********************************************************/ |
| 1334 | |
| 1335 | BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; |
| 1336 | |
| 1337 | static size_t testDecodeSimple(frame_t* fr) |
| 1338 | { |
| 1339 | /* test decoding the generated data with the simple API */ |
| 1340 | size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, |
| 1341 | fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); |
| 1342 | |
| 1343 | if (ZSTD_isError(ret)) return ret; |
| 1344 | |
| 1345 | if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, |
| 1346 | (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { |
| 1347 | return ERROR(corruption_detected); |
| 1348 | } |
| 1349 | |
| 1350 | return ret; |
| 1351 | } |
| 1352 | |
| 1353 | static size_t testDecodeStreaming(frame_t* fr) |
| 1354 | { |
| 1355 | /* test decoding the generated data with the streaming API */ |
| 1356 | ZSTD_DStream* zd = ZSTD_createDStream(); |
| 1357 | ZSTD_inBuffer in; |
| 1358 | ZSTD_outBuffer out; |
| 1359 | size_t ret; |
| 1360 | |
| 1361 | if (!zd) return ERROR(memory_allocation); |
| 1362 | |
| 1363 | in.src = fr->dataStart; |
| 1364 | in.pos = 0; |
| 1365 | in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; |
| 1366 | |
| 1367 | out.dst = DECOMPRESSED_BUFFER; |
| 1368 | out.pos = 0; |
| 1369 | out.size = ZSTD_DStreamOutSize(); |
| 1370 | |
| 1371 | ZSTD_initDStream(zd); |
| 1372 | while (1) { |
| 1373 | ret = ZSTD_decompressStream(zd, &out, &in); |
| 1374 | if (ZSTD_isError(ret)) goto cleanup; /* error */ |
| 1375 | if (ret == 0) break; /* frame is done */ |
| 1376 | |
| 1377 | /* force decoding to be done in chunks */ |
| 1378 | out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); |
| 1379 | } |
| 1380 | |
| 1381 | ret = out.pos; |
| 1382 | |
| 1383 | if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { |
| 1384 | return ERROR(corruption_detected); |
| 1385 | } |
| 1386 | |
| 1387 | cleanup: |
| 1388 | ZSTD_freeDStream(zd); |
| 1389 | return ret; |
| 1390 | } |
| 1391 | |
| 1392 | static size_t testDecodeWithDict(U32 seed, genType_e genType) |
| 1393 | { |
| 1394 | /* create variables */ |
| 1395 | size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN; |
| 1396 | U32 const dictID = RAND(&seed); |
| 1397 | size_t errorDetected = 0; |
| 1398 | BYTE* const fullDict = malloc(dictSize); |
| 1399 | if (fullDict == NULL) { |
| 1400 | return ERROR(GENERIC); |
| 1401 | } |
| 1402 | |
| 1403 | /* generate random dictionary */ |
| 1404 | if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */ |
| 1405 | errorDetected = ERROR(GENERIC); |
| 1406 | goto dictTestCleanup; |
| 1407 | } |
| 1408 | |
| 1409 | |
| 1410 | { frame_t fr; |
| 1411 | dictInfo info; |
| 1412 | ZSTD_DCtx* const dctx = ZSTD_createDCtx(); |
| 1413 | size_t ret; |
| 1414 | |
| 1415 | /* get dict info */ |
| 1416 | { size_t const headerSize = MAX(dictSize/4, 256); |
| 1417 | size_t const dictContentSize = dictSize-headerSize; |
| 1418 | BYTE* const dictContent = fullDict+headerSize; |
| 1419 | info = initDictInfo(1, dictContentSize, dictContent, dictID); |
| 1420 | } |
| 1421 | |
| 1422 | /* manually decompress and check difference */ |
| 1423 | if (genType == gt_frame) { |
| 1424 | /* Test frame */ |
| 1425 | generateFrame(seed, &fr, info); |
| 1426 | ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, |
| 1427 | fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, |
| 1428 | fullDict, dictSize); |
| 1429 | } else { |
| 1430 | /* Test block */ |
| 1431 | generateCompressedBlock(seed, &fr, info); |
| 1432 | ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize); |
| 1433 | if (ZSTD_isError(ret)) { |
| 1434 | errorDetected = ret; |
| 1435 | ZSTD_freeDCtx(dctx); |
| 1436 | goto dictTestCleanup; |
| 1437 | } |
| 1438 | ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, |
| 1439 | fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart); |
| 1440 | } |
| 1441 | ZSTD_freeDCtx(dctx); |
| 1442 | |
| 1443 | if (ZSTD_isError(ret)) { |
| 1444 | errorDetected = ret; |
| 1445 | goto dictTestCleanup; |
| 1446 | } |
| 1447 | |
| 1448 | if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) { |
| 1449 | errorDetected = ERROR(corruption_detected); |
| 1450 | goto dictTestCleanup; |
| 1451 | } |
| 1452 | } |
| 1453 | |
| 1454 | dictTestCleanup: |
| 1455 | free(fullDict); |
| 1456 | return errorDetected; |
| 1457 | } |
| 1458 | |
| 1459 | static size_t testDecodeRawBlock(frame_t* fr) |
| 1460 | { |
| 1461 | ZSTD_DCtx* dctx = ZSTD_createDCtx(); |
| 1462 | size_t ret = ZSTD_decompressBegin(dctx); |
| 1463 | if (ZSTD_isError(ret)) return ret; |
| 1464 | |
| 1465 | ret = ZSTD_decompressBlock_deprecated( |
| 1466 | dctx, |
| 1467 | DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, |
| 1468 | fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); |
| 1469 | ZSTD_freeDCtx(dctx); |
| 1470 | if (ZSTD_isError(ret)) return ret; |
| 1471 | |
| 1472 | if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, |
| 1473 | (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { |
| 1474 | return ERROR(corruption_detected); |
| 1475 | } |
| 1476 | |
| 1477 | return ret; |
| 1478 | } |
| 1479 | |
| 1480 | static int runBlockTest(U32* seed) |
| 1481 | { |
| 1482 | frame_t fr; |
| 1483 | U32 const seedCopy = *seed; |
| 1484 | { dictInfo const info = initDictInfo(0, 0, NULL, 0); |
| 1485 | *seed = generateCompressedBlock(*seed, &fr, info); |
| 1486 | } |
| 1487 | |
| 1488 | { size_t const r = testDecodeRawBlock(&fr); |
| 1489 | if (ZSTD_isError(r)) { |
| 1490 | DISPLAY("Error in block mode on test seed %u: %s\n", |
| 1491 | (unsigned)seedCopy, ZSTD_getErrorName(r)); |
| 1492 | return 1; |
| 1493 | } |
| 1494 | } |
| 1495 | |
| 1496 | { size_t const r = testDecodeWithDict(*seed, gt_block); |
| 1497 | if (ZSTD_isError(r)) { |
| 1498 | DISPLAY("Error in block mode with dictionary on test seed %u: %s\n", |
| 1499 | (unsigned)seedCopy, ZSTD_getErrorName(r)); |
| 1500 | return 1; |
| 1501 | } |
| 1502 | } |
| 1503 | return 0; |
| 1504 | } |
| 1505 | |
| 1506 | static int runFrameTest(U32* seed) |
| 1507 | { |
| 1508 | frame_t fr; |
| 1509 | U32 const seedCopy = *seed; |
| 1510 | { dictInfo const info = initDictInfo(0, 0, NULL, 0); |
| 1511 | *seed = generateFrame(*seed, &fr, info); |
| 1512 | } |
| 1513 | |
| 1514 | { size_t const r = testDecodeSimple(&fr); |
| 1515 | if (ZSTD_isError(r)) { |
| 1516 | DISPLAY("Error in simple mode on test seed %u: %s\n", |
| 1517 | (unsigned)seedCopy, ZSTD_getErrorName(r)); |
| 1518 | return 1; |
| 1519 | } |
| 1520 | } |
| 1521 | { size_t const r = testDecodeStreaming(&fr); |
| 1522 | if (ZSTD_isError(r)) { |
| 1523 | DISPLAY("Error in streaming mode on test seed %u: %s\n", |
| 1524 | (unsigned)seedCopy, ZSTD_getErrorName(r)); |
| 1525 | return 1; |
| 1526 | } |
| 1527 | } |
| 1528 | { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */ |
| 1529 | if (ZSTD_isError(r)) { |
| 1530 | DISPLAY("Error in dictionary mode on test seed %u: %s\n", |
| 1531 | (unsigned)seedCopy, ZSTD_getErrorName(r)); |
| 1532 | return 1; |
| 1533 | } |
| 1534 | } |
| 1535 | return 0; |
| 1536 | } |
| 1537 | |
| 1538 | static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS, |
| 1539 | genType_e genType) |
| 1540 | { |
| 1541 | unsigned fnum; |
| 1542 | |
| 1543 | UTIL_time_t const startClock = UTIL_getTime(); |
| 1544 | U64 const maxClockSpan = testDurationS * SEC_TO_MICRO; |
| 1545 | |
| 1546 | if (numFiles == 0 && !testDurationS) numFiles = 1; |
| 1547 | |
| 1548 | DISPLAY("seed: %u\n", (unsigned)seed); |
| 1549 | |
| 1550 | for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) { |
| 1551 | if (fnum < numFiles) |
| 1552 | DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); |
| 1553 | else |
| 1554 | DISPLAYUPDATE("\r%u ", fnum); |
| 1555 | |
| 1556 | { int const ret = (genType == gt_frame) ? |
| 1557 | runFrameTest(&seed) : |
| 1558 | runBlockTest(&seed); |
| 1559 | if (ret) return ret; |
| 1560 | } |
| 1561 | } |
| 1562 | |
| 1563 | DISPLAY("\r%u tests completed: ", fnum); |
| 1564 | DISPLAY("OK\n"); |
| 1565 | |
| 1566 | return 0; |
| 1567 | } |
| 1568 | |
| 1569 | /*-******************************************************* |
| 1570 | * File I/O |
| 1571 | *********************************************************/ |
| 1572 | |
| 1573 | static int generateFile(U32 seed, const char* const path, |
| 1574 | const char* const origPath, genType_e genType) |
| 1575 | { |
| 1576 | frame_t fr; |
| 1577 | |
| 1578 | DISPLAY("seed: %u\n", (unsigned)seed); |
| 1579 | |
| 1580 | { dictInfo const info = initDictInfo(0, 0, NULL, 0); |
| 1581 | if (genType == gt_frame) { |
| 1582 | generateFrame(seed, &fr, info); |
| 1583 | } else { |
| 1584 | generateCompressedBlock(seed, &fr, info); |
| 1585 | } |
| 1586 | } |
| 1587 | outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); |
| 1588 | if (origPath) { |
| 1589 | outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); |
| 1590 | } |
| 1591 | return 0; |
| 1592 | } |
| 1593 | |
| 1594 | static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, |
| 1595 | const char* const origPath, genType_e genType) |
| 1596 | { |
| 1597 | char outPath[MAX_PATH]; |
| 1598 | unsigned fnum; |
| 1599 | |
| 1600 | DISPLAY("seed: %u\n", (unsigned)seed); |
| 1601 | |
| 1602 | for (fnum = 0; fnum < numFiles; fnum++) { |
| 1603 | frame_t fr; |
| 1604 | |
| 1605 | DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); |
| 1606 | |
| 1607 | { dictInfo const info = initDictInfo(0, 0, NULL, 0); |
| 1608 | if (genType == gt_frame) { |
| 1609 | seed = generateFrame(seed, &fr, info); |
| 1610 | } else { |
| 1611 | seed = generateCompressedBlock(seed, &fr, info); |
| 1612 | } |
| 1613 | } |
| 1614 | |
| 1615 | if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { |
| 1616 | DISPLAY("Error: path too long\n"); |
| 1617 | return 1; |
| 1618 | } |
| 1619 | outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); |
| 1620 | |
| 1621 | if (origPath) { |
| 1622 | if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { |
| 1623 | DISPLAY("Error: path too long\n"); |
| 1624 | return 1; |
| 1625 | } |
| 1626 | outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); |
| 1627 | } |
| 1628 | } |
| 1629 | |
| 1630 | DISPLAY("\r%u/%u \n", fnum, numFiles); |
| 1631 | |
| 1632 | return 0; |
| 1633 | } |
| 1634 | |
| 1635 | static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, |
| 1636 | const char* const origPath, const size_t dictSize, |
| 1637 | genType_e genType) |
| 1638 | { |
| 1639 | char outPath[MAX_PATH]; |
| 1640 | BYTE* fullDict; |
| 1641 | U32 const dictID = RAND(&seed); |
| 1642 | int errorDetected = 0; |
| 1643 | |
| 1644 | if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { |
| 1645 | DISPLAY("Error: path too long\n"); |
| 1646 | return 1; |
| 1647 | } |
| 1648 | |
| 1649 | /* allocate space for the dictionary */ |
| 1650 | fullDict = malloc(dictSize); |
| 1651 | if (fullDict == NULL) { |
| 1652 | DISPLAY("Error: could not allocate space for full dictionary.\n"); |
| 1653 | return 1; |
| 1654 | } |
| 1655 | |
| 1656 | /* randomly generate the dictionary */ |
| 1657 | { int const ret = genRandomDict(dictID, seed, dictSize, fullDict); |
| 1658 | if (ret != 0) { |
| 1659 | errorDetected = ret; |
| 1660 | goto dictCleanup; |
| 1661 | } |
| 1662 | } |
| 1663 | |
| 1664 | /* write out dictionary */ |
| 1665 | if (numFiles != 0) { |
| 1666 | if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { |
| 1667 | DISPLAY("Error: dictionary path too long\n"); |
| 1668 | errorDetected = 1; |
| 1669 | goto dictCleanup; |
| 1670 | } |
| 1671 | outputBuffer(fullDict, dictSize, outPath); |
| 1672 | } |
| 1673 | else { |
| 1674 | outputBuffer(fullDict, dictSize, "dictionary"); |
| 1675 | } |
| 1676 | |
| 1677 | /* generate random compressed/decompressed files */ |
| 1678 | { unsigned fnum; |
| 1679 | for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) { |
| 1680 | frame_t fr; |
| 1681 | DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); |
| 1682 | { |
| 1683 | size_t const headerSize = MAX(dictSize/4, 256); |
| 1684 | size_t const dictContentSize = dictSize-headerSize; |
| 1685 | BYTE* const dictContent = fullDict+headerSize; |
| 1686 | dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID); |
| 1687 | if (genType == gt_frame) { |
| 1688 | seed = generateFrame(seed, &fr, info); |
| 1689 | } else { |
| 1690 | seed = generateCompressedBlock(seed, &fr, info); |
| 1691 | } |
| 1692 | } |
| 1693 | |
| 1694 | if (numFiles != 0) { |
| 1695 | if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { |
| 1696 | DISPLAY("Error: path too long\n"); |
| 1697 | errorDetected = 1; |
| 1698 | goto dictCleanup; |
| 1699 | } |
| 1700 | outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); |
| 1701 | |
| 1702 | if (origPath) { |
| 1703 | if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { |
| 1704 | DISPLAY("Error: path too long\n"); |
| 1705 | errorDetected = 1; |
| 1706 | goto dictCleanup; |
| 1707 | } |
| 1708 | outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); |
| 1709 | } |
| 1710 | } |
| 1711 | else { |
| 1712 | outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); |
| 1713 | if (origPath) { |
| 1714 | outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); |
| 1715 | } |
| 1716 | } |
| 1717 | } |
| 1718 | } |
| 1719 | |
| 1720 | dictCleanup: |
| 1721 | free(fullDict); |
| 1722 | return errorDetected; |
| 1723 | } |
| 1724 | |
| 1725 | |
| 1726 | /*_******************************************************* |
| 1727 | * Command line |
| 1728 | *********************************************************/ |
| 1729 | static U32 makeSeed(void) |
| 1730 | { |
| 1731 | U32 t = (U32) time(NULL); |
| 1732 | return XXH32(&t, sizeof(t), 0) % 65536; |
| 1733 | } |
| 1734 | |
| 1735 | static unsigned readInt(const char** argument) |
| 1736 | { |
| 1737 | unsigned val = 0; |
| 1738 | while ((**argument>='0') && (**argument<='9')) { |
| 1739 | val *= 10; |
| 1740 | val += **argument - '0'; |
| 1741 | (*argument)++; |
| 1742 | } |
| 1743 | return val; |
| 1744 | } |
| 1745 | |
| 1746 | static void usage(const char* programName) |
| 1747 | { |
| 1748 | DISPLAY( "Usage :\n"); |
| 1749 | DISPLAY( " %s [args]\n", programName); |
| 1750 | DISPLAY( "\n"); |
| 1751 | DISPLAY( "Arguments :\n"); |
| 1752 | DISPLAY( " -p<path> : select output path (default:stdout)\n"); |
| 1753 | DISPLAY( " in multiple files mode this should be a directory\n"); |
| 1754 | DISPLAY( " -o<path> : select path to output original file (default:no output)\n"); |
| 1755 | DISPLAY( " in multiple files mode this should be a directory\n"); |
| 1756 | DISPLAY( " -s# : select seed (default:random based on time)\n"); |
| 1757 | DISPLAY( " -n# : number of files to generate (default:1)\n"); |
| 1758 | DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); |
| 1759 | DISPLAY( " -T# : length of time to run tests for\n"); |
| 1760 | DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); |
| 1761 | DISPLAY( " -h/H : display help/long help and exit\n"); |
| 1762 | } |
| 1763 | |
| 1764 | static void advancedUsage(const char* programName) |
| 1765 | { |
| 1766 | usage(programName); |
| 1767 | DISPLAY( "\n"); |
| 1768 | DISPLAY( "Advanced arguments :\n"); |
| 1769 | DISPLAY( " --content-size : always include the content size in the frame header\n"); |
| 1770 | DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n"); |
| 1771 | DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n"); |
| 1772 | DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n"); |
| 1773 | DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n"); |
| 1774 | DISPLAY( " (this is ignored with gen-blocks)\n"); |
| 1775 | } |
| 1776 | |
| 1777 | /*! readU32FromChar() : |
| 1778 | @return : unsigned integer value read from input in `char` format |
| 1779 | allows and interprets K, KB, KiB, M, MB and MiB suffix. |
| 1780 | Will also modify `*stringPtr`, advancing it to position where it stopped reading. |
| 1781 | Note : function result can overflow if digit string > MAX_UINT */ |
| 1782 | static unsigned readU32FromChar(const char** stringPtr) |
| 1783 | { |
| 1784 | unsigned result = 0; |
| 1785 | while ((**stringPtr >='0') && (**stringPtr <='9')) |
| 1786 | result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; |
| 1787 | if ((**stringPtr=='K') || (**stringPtr=='M')) { |
| 1788 | result <<= 10; |
| 1789 | if (**stringPtr=='M') result <<= 10; |
| 1790 | (*stringPtr)++ ; |
| 1791 | if (**stringPtr=='i') (*stringPtr)++; |
| 1792 | if (**stringPtr=='B') (*stringPtr)++; |
| 1793 | } |
| 1794 | return result; |
| 1795 | } |
| 1796 | |
| 1797 | /** longCommandWArg() : |
| 1798 | * check if *stringPtr is the same as longCommand. |
| 1799 | * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. |
| 1800 | * @return 0 and doesn't modify *stringPtr otherwise. |
| 1801 | */ |
| 1802 | static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) |
| 1803 | { |
| 1804 | size_t const comSize = strlen(longCommand); |
| 1805 | int const result = !strncmp(*stringPtr, longCommand, comSize); |
| 1806 | if (result) *stringPtr += comSize; |
| 1807 | return result; |
| 1808 | } |
| 1809 | |
| 1810 | int main(int argc, char** argv) |
| 1811 | { |
| 1812 | U32 seed = 0; |
| 1813 | int seedset = 0; |
| 1814 | unsigned numFiles = 0; |
| 1815 | unsigned testDuration = 0; |
| 1816 | int testMode = 0; |
| 1817 | const char* path = NULL; |
| 1818 | const char* origPath = NULL; |
| 1819 | int useDict = 0; |
| 1820 | unsigned dictSize = (10 << 10); /* 10 kB default */ |
| 1821 | genType_e genType = gt_frame; |
| 1822 | |
| 1823 | int argNb; |
| 1824 | |
| 1825 | /* Check command line */ |
| 1826 | for (argNb=1; argNb<argc; argNb++) { |
| 1827 | const char* argument = argv[argNb]; |
| 1828 | if(!argument) continue; /* Protection if argument empty */ |
| 1829 | |
| 1830 | /* Handle commands. Aggregated commands are allowed */ |
| 1831 | if (argument[0]=='-') { |
| 1832 | argument++; |
| 1833 | while (*argument!=0) { |
| 1834 | switch(*argument) |
| 1835 | { |
| 1836 | case 'h': |
| 1837 | usage(argv[0]); |
| 1838 | return 0; |
| 1839 | case 'H': |
| 1840 | advancedUsage(argv[0]); |
| 1841 | return 0; |
| 1842 | case 'v': |
| 1843 | argument++; |
| 1844 | g_displayLevel++; |
| 1845 | break; |
| 1846 | case 's': |
| 1847 | argument++; |
| 1848 | seedset=1; |
| 1849 | seed = readInt(&argument); |
| 1850 | break; |
| 1851 | case 'n': |
| 1852 | argument++; |
| 1853 | numFiles = readInt(&argument); |
| 1854 | break; |
| 1855 | case 'T': |
| 1856 | argument++; |
| 1857 | testDuration = readInt(&argument); |
| 1858 | if (*argument == 'm') { |
| 1859 | testDuration *= 60; |
| 1860 | argument++; |
| 1861 | if (*argument == 'n') argument++; |
| 1862 | } |
| 1863 | break; |
| 1864 | case 'o': |
| 1865 | argument++; |
| 1866 | origPath = argument; |
| 1867 | argument += strlen(argument); |
| 1868 | break; |
| 1869 | case 'p': |
| 1870 | argument++; |
| 1871 | path = argument; |
| 1872 | argument += strlen(argument); |
| 1873 | break; |
| 1874 | case 't': |
| 1875 | argument++; |
| 1876 | testMode = 1; |
| 1877 | break; |
| 1878 | case '-': |
| 1879 | argument++; |
| 1880 | if (strcmp(argument, "content-size") == 0) { |
| 1881 | opts.contentSize = 1; |
| 1882 | } else if (longCommandWArg(&argument, "use-dict=")) { |
| 1883 | dictSize = readU32FromChar(&argument); |
| 1884 | useDict = 1; |
| 1885 | } else if (strcmp(argument, "gen-blocks") == 0) { |
| 1886 | genType = gt_block; |
| 1887 | } else if (longCommandWArg(&argument, "max-block-size-log=")) { |
| 1888 | U32 value = readU32FromChar(&argument); |
| 1889 | if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) { |
| 1890 | g_maxBlockSize = 1U << value; |
| 1891 | } |
| 1892 | } else if (longCommandWArg(&argument, "max-content-size-log=")) { |
| 1893 | U32 value = readU32FromChar(&argument); |
| 1894 | g_maxDecompressedSizeLog = |
| 1895 | MIN(MAX_DECOMPRESSED_SIZE_LOG, value); |
| 1896 | } else { |
| 1897 | advancedUsage(argv[0]); |
| 1898 | return 1; |
| 1899 | } |
| 1900 | argument += strlen(argument); |
| 1901 | break; |
| 1902 | default: |
| 1903 | usage(argv[0]); |
| 1904 | return 1; |
| 1905 | } } } } /* for (argNb=1; argNb<argc; argNb++) */ |
| 1906 | |
| 1907 | if (!seedset) { |
| 1908 | seed = makeSeed(); |
| 1909 | } |
| 1910 | |
| 1911 | if (testMode) { |
| 1912 | return runTestMode(seed, numFiles, testDuration, genType); |
| 1913 | } else { |
| 1914 | if (testDuration) { |
| 1915 | DISPLAY("Error: -T requires test mode (-t)\n\n"); |
| 1916 | usage(argv[0]); |
| 1917 | return 1; |
| 1918 | } |
| 1919 | } |
| 1920 | |
| 1921 | if (!path) { |
| 1922 | DISPLAY("Error: path is required in file generation mode\n"); |
| 1923 | usage(argv[0]); |
| 1924 | return 1; |
| 1925 | } |
| 1926 | |
| 1927 | if (numFiles == 0 && useDict == 0) { |
| 1928 | return generateFile(seed, path, origPath, genType); |
| 1929 | } else if (useDict == 0){ |
| 1930 | return generateCorpus(seed, numFiles, path, origPath, genType); |
| 1931 | } else { |
| 1932 | /* should generate files with a dictionary */ |
| 1933 | return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType); |
| 1934 | } |
| 1935 | |
| 1936 | } |