git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / tests / decodecorpus.c
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include <limits.h>
12#include <math.h>
13#include <stddef.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <time.h> /* time(), for seed random initialization */
18
19#include "util.h"
20#include "timefn.h" /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */
21#include "zstd.h"
22#include "zstd_internal.h"
23#include "mem.h"
24#define ZDICT_STATIC_LINKING_ONLY
25#include "zdict.h"
26
27/* Direct access to internal compression functions is required */
28#include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */
29#include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */
30
31#define XXH_STATIC_LINKING_ONLY
32#include "xxhash.h" /* XXH64 */
33
34#if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
35# define inline /* disable */
36#endif
37
38/*-************************************
39* DISPLAY Macros
40**************************************/
41#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
42#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
43static U32 g_displayLevel = 2;
44
45#define DISPLAYUPDATE(...) \
46 do { \
47 if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \
48 (g_displayLevel >= 4)) { \
49 g_displayClock = UTIL_getTime(); \
50 DISPLAY(__VA_ARGS__); \
51 if (g_displayLevel >= 4) fflush(stderr); \
52 } \
53 } while (0)
54
55static const U64 g_refreshRate = SEC_TO_MICRO / 6;
56static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
57
58#define CHECKERR(code) \
59 do { \
60 if (ZSTD_isError(code)) { \
61 DISPLAY("Error occurred while generating data: %s\n", \
62 ZSTD_getErrorName(code)); \
63 exit(1); \
64 } \
65 } while (0)
66
67
68/*-*******************************************************
69* Random function
70*********************************************************/
71static U32 RAND(U32* src)
72{
73#define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
74 static const U32 prime1 = 2654435761U;
75 static const U32 prime2 = 2246822519U;
76 U32 rand32 = *src;
77 rand32 *= prime1;
78 rand32 += prime2;
79 rand32 = RAND_rotl32(rand32, 13);
80 *src = rand32;
81 return RAND_rotl32(rand32, 27);
82#undef RAND_rotl32
83}
84
85#define DISTSIZE (8192)
86
87/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
88static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
89{
90 size_t i;
91 BYTE* op = ptr;
92
93 for (i = 0; i < size; i++) {
94 op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
95 }
96}
97
98/* Write `size` random bytes into `ptr` */
99static void RAND_buffer(U32* seed, void* ptr, size_t size)
100{
101 size_t i;
102 BYTE* op = ptr;
103
104 for (i = 0; i + 4 <= size; i += 4) {
105 MEM_writeLE32(op + i, RAND(seed));
106 }
107 for (; i < size; i++) {
108 op[i] = RAND(seed) & 0xff;
109 }
110}
111
112/* Write `size` bytes into `ptr` following the distribution `dist` */
113static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
114{
115 size_t i;
116 BYTE* op = ptr;
117
118 for (i = 0; i < size; i++) {
119 op[i] = dist[RAND(seed) % DISTSIZE];
120 }
121}
122
123/* Generate a random distribution where the frequency of each symbol follows a
124 * geometric distribution defined by `weight`
125 * `dist` should have size at least `DISTSIZE` */
126static void RAND_genDist(U32* seed, BYTE* dist, double weight)
127{
128 size_t i = 0;
129 size_t statesLeft = DISTSIZE;
130 BYTE symb = (BYTE) (RAND(seed) % 256);
131 BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
132
133 while (i < DISTSIZE) {
134 size_t states = ((size_t)(weight * (double)statesLeft)) + 1;
135 size_t j;
136 for (j = 0; j < states && i < DISTSIZE; j++, i++) {
137 dist[i] = symb;
138 }
139
140 symb += step;
141 statesLeft -= states;
142 }
143}
144
145/* Generates a random number in the range [min, max) */
146static inline U32 RAND_range(U32* seed, U32 min, U32 max)
147{
148 return (RAND(seed) % (max-min)) + min;
149}
150
151#define ROUND(x) ((U32)(x + 0.5))
152
153/* Generates a random number in an exponential distribution with mean `mean` */
154static double RAND_exp(U32* seed, double mean)
155{
156 double const u = RAND(seed) / (double) UINT_MAX;
157 return log(1-u) * (-mean);
158}
159
160/*-*******************************************************
161* Constants and Structs
162*********************************************************/
163const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"};
164
165#define MAX_DECOMPRESSED_SIZE_LOG 20
166#define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
167
168#define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
169
170#define MIN_SEQ_LEN (3)
171#define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
172
173#ifndef MAX_PATH
174 #ifdef PATH_MAX
175 #define MAX_PATH PATH_MAX
176 #else
177 #define MAX_PATH 256
178 #endif
179#endif
180
181BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
182BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
183BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX];
184
185seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
186BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */
187BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
188BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
189BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
190
191U64 WKSP[HUF_WORKSPACE_SIZE_U64];
192
193typedef struct {
194 size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
195 unsigned windowSize; /* contentSize >= windowSize means single segment */
196} frameHeader_t;
197
198/* For repeat modes */
199typedef struct {
200 U32 rep[ZSTD_REP_NUM];
201
202 int hufInit;
203 /* the distribution used in the previous block for repeat mode */
204 BYTE hufDist[DISTSIZE];
205 HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)];
206
207 int fseInit;
208 FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
209 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
210 FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
211
212 /* Symbols that were present in the previous distribution, for use with
213 * set_repeat */
214 BYTE litlengthSymbolSet[36];
215 BYTE offsetSymbolSet[29];
216 BYTE matchlengthSymbolSet[53];
217} cblockStats_t;
218
219typedef struct {
220 void* data;
221 void* dataStart;
222 void* dataEnd;
223
224 void* src;
225 void* srcStart;
226 void* srcEnd;
227
228 frameHeader_t header;
229
230 cblockStats_t stats;
231 cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
232} frame_t;
233
234typedef struct {
235 int useDict;
236 U32 dictID;
237 size_t dictContentSize;
238 BYTE* dictContent;
239} dictInfo;
240
241typedef enum {
242 gt_frame = 0, /* generate frames */
243 gt_block, /* generate compressed blocks without block/frame headers */
244} genType_e;
245
246#ifndef MIN
247 #define MIN(a, b) ((a) < (b) ? (a) : (b))
248#endif
249
250/*-*******************************************************
251* Global variables (set from command line)
252*********************************************************/
253U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */
254U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */
255
256/*-*******************************************************
257* Generator Functions
258*********************************************************/
259
260struct {
261 int contentSize; /* force the content size to be present */
262} opts; /* advanced options on generation */
263
264/* Generate and write a random frame header */
265static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
266{
267 BYTE* const op = frame->data;
268 size_t pos = 0;
269 frameHeader_t fh;
270
271 BYTE windowByte = 0;
272
273 int singleSegment = 0;
274 int contentSizeFlag = 0;
275 int fcsCode = 0;
276
277 memset(&fh, 0, sizeof(fh));
278
279 /* generate window size */
280 {
281 /* Follow window algorithm from specification */
282 int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
283 int const mantissa = RAND(seed) % 8;
284 windowByte = (BYTE) ((exponent << 3) | mantissa);
285 fh.windowSize = (1U << (exponent + 10));
286 fh.windowSize += fh.windowSize / 8 * mantissa;
287 }
288
289 {
290 /* Generate random content size */
291 size_t highBit;
292 if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) {
293 /* do content of at least 128 bytes */
294 highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog);
295 } else if (RAND(seed) & 3) {
296 /* do small content */
297 highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog));
298 } else {
299 /* 0 size frame */
300 highBit = 0;
301 }
302 fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
303
304 /* provide size sometimes */
305 contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
306
307 if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
308 /* do single segment sometimes */
309 fh.windowSize = (U32) fh.contentSize;
310 singleSegment = 1;
311 }
312 }
313
314 if (contentSizeFlag) {
315 /* Determine how large fcs field has to be */
316 int minFcsCode = (fh.contentSize >= 256) +
317 (fh.contentSize >= 65536 + 256) +
318 (fh.contentSize > 0xFFFFFFFFU);
319 if (!singleSegment && !minFcsCode) {
320 minFcsCode = 1;
321 }
322 fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
323 if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
324 }
325
326 /* write out the header */
327 MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
328 pos += 4;
329
330 {
331 /*
332 * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
333 * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
334 * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
335 * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
336 * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
337 */
338 int const dictBits = info.useDict ? 3 : 0;
339 BYTE const frameHeaderDescriptor =
340 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
341 op[pos++] = frameHeaderDescriptor;
342 }
343
344 if (!singleSegment) {
345 op[pos++] = windowByte;
346 }
347 if (info.useDict) {
348 MEM_writeLE32(op + pos, (U32) info.dictID);
349 pos += 4;
350 }
351 if (contentSizeFlag) {
352 switch (fcsCode) {
353 default: /* Impossible */
354 case 0: op[pos++] = (BYTE) fh.contentSize; break;
355 case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
356 case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
357 case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
358 }
359 }
360
361 DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize);
362 DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize);
363 DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag);
364 DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment);
365
366 frame->data = op + pos;
367 frame->header = fh;
368}
369
370/* Write a literal block in either raw or RLE form, return the literals size */
371static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
372{
373 BYTE* op = (BYTE*)frame->data;
374 int const type = RAND(seed) % 2;
375 int const sizeFormatDesc = RAND(seed) % 8;
376 size_t litSize;
377 size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
378
379 if (sizeFormatDesc == 0) {
380 /* Size_FormatDesc = ?0 */
381 maxLitSize = MIN(maxLitSize, 31);
382 } else if (sizeFormatDesc <= 4) {
383 /* Size_FormatDesc = 01 */
384 maxLitSize = MIN(maxLitSize, 4095);
385 } else {
386 /* Size_Format = 11 */
387 maxLitSize = MIN(maxLitSize, 1048575);
388 }
389
390 litSize = RAND(seed) % (maxLitSize + 1);
391 if (frame->src == frame->srcStart && litSize == 0) {
392 litSize = 1; /* no empty literals if there's nothing preceding this block */
393 }
394 if (litSize + 3 > contentSize) {
395 litSize = contentSize; /* no matches shorter than 3 are allowed */
396 }
397 /* use smallest size format that fits */
398 if (litSize < 32) {
399 op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
400 op += 1;
401 } else if (litSize < 4096) {
402 op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
403 op[1] = (litSize >> 4) & 0xff;
404 op += 2;
405 } else {
406 op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
407 op[1] = (litSize >> 4) & 0xff;
408 op[2] = (litSize >> 12) & 0xff;
409 op += 3;
410 }
411
412 if (type == 0) {
413 /* Raw literals */
414 DISPLAYLEVEL(4, " raw literals\n");
415
416 RAND_buffer(seed, LITERAL_BUFFER, litSize);
417 memcpy(op, LITERAL_BUFFER, litSize);
418 op += litSize;
419 } else {
420 /* RLE literals */
421 BYTE const symb = (BYTE) (RAND(seed) % 256);
422
423 DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (unsigned)symb);
424
425 memset(LITERAL_BUFFER, symb, litSize);
426 op[0] = symb;
427 op++;
428 }
429
430 frame->data = op;
431
432 return litSize;
433}
434
435/* Generate a Huffman header for the given source */
436static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
437 const void* src, size_t srcSize)
438{
439 BYTE* const ostart = (BYTE*)dst;
440 BYTE* op = ostart;
441
442 unsigned huffLog = 11;
443 unsigned maxSymbolValue = 255;
444
445 unsigned count[HUF_SYMBOLVALUE_MAX+1];
446
447 /* Scan input and build symbol stats */
448 { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP));
449 assert(!HIST_isError(largest));
450 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */
451 if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
452 }
453
454 /* Build Huffman Tree */
455 /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
456 huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
457 DISPLAYLEVEL(6, " huffman log: %u\n", huffLog);
458 { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
459 CHECKERR(maxBits);
460 huffLog = (U32)maxBits;
461 }
462
463 /* Write table description header */
464 { size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
465 if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
466 op += hSize;
467 }
468
469 return op - ostart;
470}
471
472/* Write a Huffman coded literals block and return the literals size */
473static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
474{
475 BYTE* origop = (BYTE*)frame->data;
476 BYTE* opend = (BYTE*)frame->dataEnd;
477 BYTE* op;
478 BYTE* const ostart = origop;
479 int const sizeFormat = RAND(seed) % 4;
480 size_t litSize;
481 size_t hufHeaderSize = 0;
482 size_t compressedSize = 0;
483 size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize);
484
485 symbolEncodingType_e hType;
486
487 if (contentSize < 64) {
488 /* make sure we get reasonably-sized literals for compression */
489 return ERROR(GENERIC);
490 }
491
492 DISPLAYLEVEL(4, " compressed literals\n");
493
494 switch (sizeFormat) {
495 case 0: /* fall through, size is the same as case 1 */
496 case 1:
497 maxLitSize = MIN(maxLitSize, 1023);
498 origop += 3;
499 break;
500 case 2:
501 maxLitSize = MIN(maxLitSize, 16383);
502 origop += 4;
503 break;
504 case 3:
505 maxLitSize = MIN(maxLitSize, 262143);
506 origop += 5;
507 break;
508 default:; /* impossible */
509 }
510
511 do {
512 op = origop;
513 do {
514 litSize = RAND(seed) % (maxLitSize + 1);
515 } while (litSize < 32); /* avoid small literal sizes */
516 if (litSize + 3 > contentSize) {
517 litSize = contentSize; /* no matches shorter than 3 are allowed */
518 }
519
520 /* most of the time generate a new distribution */
521 if ((RAND(seed) & 3) || !frame->stats.hufInit) {
522 do {
523 if (RAND(seed) & 3) {
524 /* add 10 to ensure some compressibility */
525 double const weight = ((RAND(seed) % 90) + 10) / 100.0;
526
527 DISPLAYLEVEL(5, " distribution weight: %d%%\n",
528 (int)(weight * 100));
529
530 RAND_genDist(seed, frame->stats.hufDist, weight);
531 } else {
532 /* sometimes do restricted range literals to force
533 * non-huffman headers */
534 DISPLAYLEVEL(5, " small range literals\n");
535 RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
536 15);
537 }
538 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
539 litSize);
540
541 /* generate the header from the distribution instead of the
542 * actual data to avoid bugs with symbols that were in the
543 * distribution but never showed up in the output */
544 hufHeaderSize = writeHufHeader(
545 seed, frame->stats.hufTable, op, opend - op,
546 frame->stats.hufDist, DISTSIZE);
547 CHECKERR(hufHeaderSize);
548 /* repeat until a valid header is written */
549 } while (hufHeaderSize == 0);
550 op += hufHeaderSize;
551 hType = set_compressed;
552
553 frame->stats.hufInit = 1;
554 } else {
555 /* repeat the distribution/table from last time */
556 DISPLAYLEVEL(5, " huffman repeat stats\n");
557 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
558 litSize);
559 hufHeaderSize = 0;
560 hType = set_repeat;
561 }
562
563 do {
564 compressedSize =
565 sizeFormat == 0
566 ? HUF_compress1X_usingCTable(
567 op, opend - op, LITERAL_BUFFER, litSize,
568 frame->stats.hufTable, /* flags */ 0)
569 : HUF_compress4X_usingCTable(
570 op, opend - op, LITERAL_BUFFER, litSize,
571 frame->stats.hufTable, /* flags */ 0);
572 CHECKERR(compressedSize);
573 /* this only occurs when it could not compress or similar */
574 } while (compressedSize <= 0);
575
576 op += compressedSize;
577
578 compressedSize += hufHeaderSize;
579 DISPLAYLEVEL(5, " regenerated size: %u\n", (unsigned)litSize);
580 DISPLAYLEVEL(5, " compressed size: %u\n", (unsigned)compressedSize);
581 if (compressedSize >= litSize) {
582 DISPLAYLEVEL(5, " trying again\n");
583 /* if we have to try again, reset the stats so we don't accidentally
584 * try to repeat a distribution we just made */
585 frame->stats = frame->oldStats;
586 } else {
587 break;
588 }
589 } while (1);
590
591 /* write header */
592 switch (sizeFormat) {
593 case 0: /* fall through, size is the same as case 1 */
594 case 1: {
595 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
596 ((U32)compressedSize << 14);
597 MEM_writeLE24(ostart, header);
598 break;
599 }
600 case 2: {
601 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
602 ((U32)compressedSize << 18);
603 MEM_writeLE32(ostart, header);
604 break;
605 }
606 case 3: {
607 U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
608 ((U32)compressedSize << 22);
609 MEM_writeLE32(ostart, header);
610 ostart[4] = (BYTE)(compressedSize >> 10);
611 break;
612 }
613 default:; /* impossible */
614 }
615
616 frame->data = op;
617 return litSize;
618}
619
620static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
621{
622 /* only do compressed for larger segments to avoid compressibility issues */
623 if (RAND(seed) & 7 && contentSize >= 64) {
624 return writeLiteralsBlockCompressed(seed, frame, contentSize);
625 } else {
626 return writeLiteralsBlockSimple(seed, frame, contentSize);
627 }
628}
629
630static inline void initSeqStore(seqStore_t *seqStore) {
631 seqStore->maxNbSeq = MAX_NB_SEQ;
632 seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX;
633 seqStore->sequencesStart = SEQUENCE_BUFFER;
634 seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
635 seqStore->llCode = SEQUENCE_LLCODE;
636 seqStore->mlCode = SEQUENCE_MLCODE;
637 seqStore->ofCode = SEQUENCE_OFCODE;
638
639 ZSTD_resetSeqStore(seqStore);
640}
641
642/* Randomly generate sequence commands */
643static U32
644generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
645 size_t contentSize, size_t literalsSize, dictInfo info)
646{
647 /* The total length of all the matches */
648 size_t const remainingMatch = contentSize - literalsSize;
649 size_t excessMatch = 0;
650 U32 numSequences = 0;
651 U32 i;
652
653 const BYTE* literals = LITERAL_BUFFER;
654 BYTE* srcPtr = frame->src;
655
656 if (literalsSize != contentSize) {
657 /* each match must be at least MIN_SEQ_LEN, so this is the maximum
658 * number of sequences we can have */
659 U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
660 numSequences = (RAND(seed) % maxSequences) + 1;
661
662 /* the extra match lengths we have to allocate to each sequence */
663 excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
664 }
665
666 DISPLAYLEVEL(5, " total match lengths: %u\n", (unsigned)remainingMatch);
667 for (i = 0; i < numSequences; i++) {
668 /* Generate match and literal lengths by exponential distribution to
669 * ensure nice numbers */
670 U32 matchLen =
671 MIN_SEQ_LEN +
672 ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i)));
673 U32 literalLen =
674 (RAND(seed) & 7)
675 ? ROUND(RAND_exp(seed,
676 (double)literalsSize /
677 (double)(numSequences - i)))
678 : 0;
679 /* actual offset, code to send, and point to copy up to when shifting
680 * codes in the repeat offsets history */
681 U32 offset, offBase, repIndex;
682
683 /* bounds checks */
684 matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
685 literalLen = MIN(literalLen, (U32) literalsSize);
686 if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
687 if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
688
689 memcpy(srcPtr, literals, literalLen);
690 srcPtr += literalLen;
691 do {
692 if (RAND(seed) & 7) {
693 /* do a normal offset */
694 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
695 offset = (RAND(seed) %
696 MIN(frame->header.windowSize,
697 (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
698 1;
699 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
700 /* need to occasionally generate offsets that go past the start */
701 /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
702 U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
703 offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
704 if (offset > frame->header.windowSize) {
705 if (lenPastStart < MIN_SEQ_LEN) {
706 /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
707 /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
708 /* make sure lenPastStart does not go past dictionary start though */
709 lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
710 offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
711 }
712 { U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
713 matchLen = MIN(matchLen, matchLenBound);
714 }
715 }
716 }
717 offBase = OFFSET_TO_OFFBASE(offset);
718 repIndex = 2;
719 } else {
720 /* do a repeat offset */
721 U32 const randomRepIndex = RAND(seed) % 3;
722 offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1); /* expects values between 1 & 3 */
723 if (literalLen > 0) {
724 offset = frame->stats.rep[randomRepIndex];
725 repIndex = randomRepIndex;
726 } else {
727 /* special case : literalLen == 0 */
728 offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1
729 : frame->stats.rep[randomRepIndex + 1];
730 repIndex = MIN(2, randomRepIndex + 1);
731 }
732 }
733 } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
734
735 { BYTE* const dictEnd = info.dictContent + info.dictContentSize;
736 size_t j;
737 for (j = 0; j < matchLen; j++) {
738 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
739 /* copy from dictionary instead of literals */
740 size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
741 *srcPtr = *(dictEnd - dictOffset);
742 }
743 else {
744 *srcPtr = *(srcPtr-offset);
745 }
746 srcPtr++;
747 } }
748
749 { int r;
750 for (r = repIndex; r > 0; r--) {
751 frame->stats.rep[r] = frame->stats.rep[r - 1];
752 }
753 frame->stats.rep[0] = offset;
754 }
755
756 DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u",
757 (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen);
758 DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
759 (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i);
760 DISPLAYLEVEL(6, "\n");
761 if (OFFBASE_IS_REPCODE(offBase)) { /* expects sumtype numeric representation of ZSTD_storeSeq() */
762 DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex);
763 }
764 /* use libzstd sequence handling */
765 ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen,
766 offBase, matchLen);
767
768 literalsSize -= literalLen;
769 excessMatch -= (matchLen - MIN_SEQ_LEN);
770 literals += literalLen;
771 }
772
773 memcpy(srcPtr, literals, literalsSize);
774 srcPtr += literalsSize;
775 DISPLAYLEVEL(6, " excess literals: %5u ", (unsigned)literalsSize);
776 DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
777 DISPLAYLEVEL(6, "\n");
778
779 return numSequences;
780}
781
782static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
783{
784 size_t i;
785
786 memset(set, 0, (size_t)maxSymbolValue+1);
787
788 for (i = 0; i < len; i++) {
789 set[symbols[i]] = 1;
790 }
791}
792
793static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
794{
795 size_t i;
796
797 for (i = 0; i < len; i++) {
798 if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
799 return 0;
800 }
801 }
802 return 1;
803}
804
805static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
806 size_t nbSeq)
807{
808 /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
809 unsigned count[MaxSeq+1];
810 S16 norm[MaxSeq+1];
811 FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
812 FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
813 FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
814 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
815 const seqDef* const sequences = seqStorePtr->sequencesStart;
816 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
817 const BYTE* const llCodeTable = seqStorePtr->llCode;
818 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
819 BYTE* const oend = (BYTE*)frame->dataEnd;
820 BYTE* op = (BYTE*)frame->data;
821 BYTE* seqHead;
822 BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)];
823
824 /* literals compressing block removed so that can be done separately */
825
826 /* Sequences Header */
827 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
828 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
829 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
830 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
831
832 if (nbSeq==0) {
833 frame->data = op;
834 return 0;
835 }
836
837 /* seqHead : flags for FSE encoding type */
838 seqHead = op++;
839
840 /* convert length/distances into codes */
841 ZSTD_seqToCodes(seqStorePtr);
842
843 /* CTable for Literal Lengths */
844 { unsigned max = MaxLL;
845 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
846 assert(!HIST_isError(mostFrequent));
847 if (frame->stats.fseInit && !(RAND(seed) & 3) &&
848 isSymbolSubset(llCodeTable, nbSeq,
849 frame->stats.litlengthSymbolSet, 35)) {
850 /* maybe do repeat mode if we're allowed to */
851 LLtype = set_repeat;
852 } else if (mostFrequent == nbSeq) {
853 /* do RLE if we have the chance */
854 *op++ = llCodeTable[0];
855 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
856 LLtype = set_rle;
857 } else if (!(RAND(seed) & 3)) {
858 /* maybe use the default distribution */
859 CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)));
860 LLtype = set_basic;
861 } else {
862 /* fall back on a full table */
863 size_t nbSeq_1 = nbSeq;
864 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
865 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
866 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
867 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
868 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
869 op += NCountSize; }
870 CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)));
871 LLtype = set_compressed;
872 } }
873
874 /* CTable for Offsets */
875 /* see Literal Lengths for descriptions of mode choices */
876 { unsigned max = MaxOff;
877 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
878 assert(!HIST_isError(mostFrequent));
879 if (frame->stats.fseInit && !(RAND(seed) & 3) &&
880 isSymbolSubset(ofCodeTable, nbSeq,
881 frame->stats.offsetSymbolSet, 28)) {
882 Offtype = set_repeat;
883 } else if (mostFrequent == nbSeq) {
884 *op++ = ofCodeTable[0];
885 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
886 Offtype = set_rle;
887 } else if (!(RAND(seed) & 3)) {
888 FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
889 Offtype = set_basic;
890 } else {
891 size_t nbSeq_1 = nbSeq;
892 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
893 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
894 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
895 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
896 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
897 op += NCountSize; }
898 FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
899 Offtype = set_compressed;
900 } }
901
902 /* CTable for MatchLengths */
903 /* see Literal Lengths for descriptions of mode choices */
904 { unsigned max = MaxML;
905 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */
906 assert(!HIST_isError(mostFrequent));
907 if (frame->stats.fseInit && !(RAND(seed) & 3) &&
908 isSymbolSubset(mlCodeTable, nbSeq,
909 frame->stats.matchlengthSymbolSet, 52)) {
910 MLtype = set_repeat;
911 } else if (mostFrequent == nbSeq) {
912 *op++ = *mlCodeTable;
913 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
914 MLtype = set_rle;
915 } else if (!(RAND(seed) & 3)) {
916 /* sometimes do default distribution */
917 FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
918 MLtype = set_basic;
919 } else {
920 /* fall back on table */
921 size_t nbSeq_1 = nbSeq;
922 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
923 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
924 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048);
925 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
926 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
927 op += NCountSize; }
928 FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
929 MLtype = set_compressed;
930 } }
931 frame->stats.fseInit = 1;
932 initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
933 initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
934 initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
935
936 DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype);
937
938 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
939
940 /* Encoding Sequences */
941 { BIT_CStream_t blockStream;
942 FSE_CState_t stateMatchLength;
943 FSE_CState_t stateOffsetBits;
944 FSE_CState_t stateLitLength;
945
946 RETURN_ERROR_IF(
947 ERR_isError(BIT_initCStream(&blockStream, op, oend-op)),
948 dstSize_tooSmall, "not enough space remaining");
949
950 /* first symbols */
951 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
952 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
953 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
954 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
955 if (MEM_32bits()) BIT_flushBits(&blockStream);
956 BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
957 if (MEM_32bits()) BIT_flushBits(&blockStream);
958 BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
959 BIT_flushBits(&blockStream);
960
961 { size_t n;
962 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
963 BYTE const llCode = llCodeTable[n];
964 BYTE const ofCode = ofCodeTable[n];
965 BYTE const mlCode = mlCodeTable[n];
966 U32 const llBits = LL_bits[llCode];
967 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
968 U32 const mlBits = ML_bits[mlCode];
969 /* (7)*/ /* (7)*/
970 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
971 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
972 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
973 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
974 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
975 BIT_flushBits(&blockStream); /* (7)*/
976 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
977 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
978 BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
979 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
980 BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
981 BIT_flushBits(&blockStream); /* (7)*/
982 } }
983
984 FSE_flushCState(&blockStream, &stateMatchLength);
985 FSE_flushCState(&blockStream, &stateOffsetBits);
986 FSE_flushCState(&blockStream, &stateLitLength);
987
988 { size_t const streamSize = BIT_closeCStream(&blockStream);
989 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
990 op += streamSize;
991 } }
992
993 frame->data = op;
994
995 return 0;
996}
997
998static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
999 size_t literalsSize, dictInfo info)
1000{
1001 seqStore_t seqStore;
1002 size_t numSequences;
1003
1004
1005 initSeqStore(&seqStore);
1006
1007 /* randomly generate sequences */
1008 numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
1009 /* write them out to the frame data */
1010 CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
1011
1012 return numSequences;
1013}
1014
1015static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
1016{
1017 BYTE* const blockStart = (BYTE*)frame->data;
1018 size_t literalsSize;
1019 size_t nbSeq;
1020
1021 DISPLAYLEVEL(4, " compressed block:\n");
1022
1023 literalsSize = writeLiteralsBlock(seed, frame, contentSize);
1024
1025 DISPLAYLEVEL(4, " literals size: %u\n", (unsigned)literalsSize);
1026
1027 nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
1028
1029 DISPLAYLEVEL(4, " number of sequences: %u\n", (unsigned)nbSeq);
1030
1031 return (BYTE*)frame->data - blockStart;
1032}
1033
1034static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
1035 int lastBlock, dictInfo info)
1036{
1037 int const blockTypeDesc = RAND(seed) % 8;
1038 size_t blockSize;
1039 int blockType;
1040
1041 BYTE *const header = (BYTE*)frame->data;
1042 BYTE *op = header + 3;
1043
1044 DISPLAYLEVEL(4, " block:\n");
1045 DISPLAYLEVEL(4, " block content size: %u\n", (unsigned)contentSize);
1046 DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no");
1047
1048 if (blockTypeDesc == 0) {
1049 /* Raw data frame */
1050
1051 RAND_buffer(seed, frame->src, contentSize);
1052 memcpy(op, frame->src, contentSize);
1053
1054 op += contentSize;
1055 blockType = 0;
1056 blockSize = contentSize;
1057 } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) {
1058 /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/
1059 BYTE const symbol = RAND(seed) & 0xff;
1060
1061 op[0] = symbol;
1062 memset(frame->src, symbol, contentSize);
1063
1064 op++;
1065 blockType = 1;
1066 blockSize = contentSize;
1067 } else {
1068 /* compressed, most common */
1069 size_t compressedSize;
1070 blockType = 2;
1071
1072 frame->oldStats = frame->stats;
1073
1074 frame->data = op;
1075 compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
1076 if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */
1077 blockType = 0;
1078 memcpy(op, frame->src, contentSize);
1079
1080 op += contentSize;
1081 blockSize = contentSize; /* fall back on raw block if data doesn't
1082 compress */
1083
1084 frame->stats = frame->oldStats; /* don't update the stats */
1085 } else {
1086 op += compressedSize;
1087 blockSize = compressedSize;
1088 }
1089 }
1090 frame->src = (BYTE*)frame->src + contentSize;
1091
1092 DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]);
1093 DISPLAYLEVEL(4, " block size field: %u\n", (unsigned)blockSize);
1094
1095 header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
1096 MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
1097
1098 frame->data = op;
1099}
1100
1101static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
1102{
1103 size_t contentLeft = frame->header.contentSize;
1104 size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1105 while (1) {
1106 /* 1 in 4 chance of ending frame */
1107 int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
1108 size_t blockContentSize;
1109 if (lastBlock) {
1110 blockContentSize = contentLeft;
1111 } else {
1112 if (contentLeft > 0 && (RAND(seed) & 7)) {
1113 /* some variable size block */
1114 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
1115 } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
1116 /* some full size block */
1117 blockContentSize = maxBlockSize;
1118 } else {
1119 /* some empty block */
1120 blockContentSize = 0;
1121 }
1122 }
1123
1124 writeBlock(seed, frame, blockContentSize, lastBlock, info);
1125
1126 contentLeft -= blockContentSize;
1127 if (lastBlock) break;
1128 }
1129}
1130
1131static void writeChecksum(frame_t* frame)
1132{
1133 /* write checksum so implementations can verify their output */
1134 U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
1135 DISPLAYLEVEL(3, " checksum: %08x\n", (unsigned)digest);
1136 MEM_writeLE32(frame->data, (U32)digest);
1137 frame->data = (BYTE*)frame->data + 4;
1138}
1139
1140static void outputBuffer(const void* buf, size_t size, const char* const path)
1141{
1142 /* write data out to file */
1143 const BYTE* ip = (const BYTE*)buf;
1144 FILE* out;
1145 if (path) {
1146 out = fopen(path, "wb");
1147 } else {
1148 out = stdout;
1149 }
1150 if (!out) {
1151 fprintf(stderr, "Failed to open file at %s: ", path);
1152 perror(NULL);
1153 exit(1);
1154 }
1155
1156 { size_t fsize = size;
1157 size_t written = 0;
1158 while (written < fsize) {
1159 written += fwrite(ip + written, 1, fsize - written, out);
1160 if (ferror(out)) {
1161 fprintf(stderr, "Failed to write to file at %s: ", path);
1162 perror(NULL);
1163 exit(1);
1164 }
1165 }
1166 }
1167
1168 if (path) {
1169 fclose(out);
1170 }
1171}
1172
1173static void initFrame(frame_t* fr)
1174{
1175 memset(fr, 0, sizeof(*fr));
1176 fr->data = fr->dataStart = FRAME_BUFFER;
1177 fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
1178 fr->src = fr->srcStart = CONTENT_BUFFER;
1179 fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
1180
1181 /* init repeat codes */
1182 fr->stats.rep[0] = 1;
1183 fr->stats.rep[1] = 4;
1184 fr->stats.rep[2] = 8;
1185}
1186
1187/**
1188 * Generated a single zstd compressed block with no block/frame header.
1189 * Returns the final seed.
1190 */
1191static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info)
1192{
1193 size_t blockContentSize;
1194 int blockWritten = 0;
1195 BYTE* op;
1196 DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed);
1197 initFrame(frame);
1198 op = (BYTE*)frame->data;
1199
1200 while (!blockWritten) {
1201 size_t cSize;
1202 /* generate window size */
1203 { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10);
1204 int const mantissa = RAND(&seed) % 8;
1205 frame->header.windowSize = (1U << (exponent + 10));
1206 frame->header.windowSize += (frame->header.windowSize / 8) * mantissa;
1207 }
1208
1209 /* generate content size */
1210 { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize);
1211 if (RAND(&seed) & 15) {
1212 /* some full size blocks */
1213 blockContentSize = maxBlockSize;
1214 } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) {
1215 /* some small blocks <= 128 bytes*/
1216 blockContentSize = RAND(&seed) % (1U << 7);
1217 } else {
1218 /* some variable size blocks */
1219 blockContentSize = RAND(&seed) % maxBlockSize;
1220 }
1221 }
1222
1223 /* try generating a compressed block */
1224 frame->oldStats = frame->stats;
1225 frame->data = op;
1226 cSize = writeCompressedBlock(&seed, frame, blockContentSize, info);
1227 if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
1228 /* data doesn't compress -- try again */
1229 frame->stats = frame->oldStats; /* don't update the stats */
1230 DISPLAYLEVEL(5, " can't compress block : try again \n");
1231 } else {
1232 blockWritten = 1;
1233 DISPLAYLEVEL(4, " block size: %u \n", (unsigned)cSize);
1234 frame->src = (BYTE*)frame->src + blockContentSize;
1235 }
1236 }
1237 return seed;
1238}
1239
1240/* Return the final seed */
1241static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
1242{
1243 /* generate a complete frame */
1244 DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed);
1245 initFrame(fr);
1246
1247 writeFrameHeader(&seed, fr, info);
1248 writeBlocks(&seed, fr, info);
1249 writeChecksum(fr);
1250
1251 return seed;
1252}
1253
1254/*_*******************************************************
1255* Dictionary Helper Functions
1256*********************************************************/
1257/* returns 0 if successful, otherwise returns 1 upon error */
1258static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict)
1259{
1260 /* allocate space for samples */
1261 int ret = 0;
1262 unsigned const numSamples = 4;
1263 size_t sampleSizes[4];
1264 BYTE* const samples = malloc(5000*sizeof(BYTE));
1265 if (samples == NULL) {
1266 DISPLAY("Error: could not allocate space for samples\n");
1267 return 1;
1268 }
1269
1270 /* generate samples */
1271 { unsigned literalValue = 1;
1272 unsigned samplesPos = 0;
1273 size_t currSize = 1;
1274 while (literalValue <= 4) {
1275 sampleSizes[literalValue - 1] = currSize;
1276 { size_t k;
1277 for (k = 0; k < currSize; k++) {
1278 *(samples + (samplesPos++)) = (BYTE)literalValue;
1279 } }
1280 literalValue++;
1281 currSize *= 16;
1282 } }
1283
1284 { size_t dictWriteSize = 0;
1285 ZDICT_params_t zdictParams;
1286 size_t const headerSize = MAX(dictSize/4, 256);
1287 size_t const dictContentSize = dictSize - headerSize;
1288 BYTE* const dictContent = fullDict + headerSize;
1289 if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
1290 DISPLAY("Error: dictionary size is too small\n");
1291 ret = 1;
1292 goto exitGenRandomDict;
1293 }
1294
1295 /* init dictionary params */
1296 memset(&zdictParams, 0, sizeof(zdictParams));
1297 zdictParams.dictID = dictID;
1298 zdictParams.notificationLevel = 1;
1299
1300 /* fill in dictionary content */
1301 RAND_buffer(&seed, (void*)dictContent, dictContentSize);
1302
1303 /* finalize dictionary with random samples */
1304 dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
1305 dictContent, dictContentSize,
1306 samples, sampleSizes, numSamples,
1307 zdictParams);
1308
1309 if (ZDICT_isError(dictWriteSize)) {
1310 DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
1311 ret = 1;
1312 }
1313 }
1314
1315exitGenRandomDict:
1316 free(samples);
1317 return ret;
1318}
1319
1320static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
1321 /* allocate space statically */
1322 dictInfo dictOp;
1323 memset(&dictOp, 0, sizeof(dictOp));
1324 dictOp.useDict = useDict;
1325 dictOp.dictContentSize = dictContentSize;
1326 dictOp.dictContent = dictContent;
1327 dictOp.dictID = dictID;
1328 return dictOp;
1329}
1330
1331/*-*******************************************************
1332* Test Mode
1333*********************************************************/
1334
1335BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
1336
1337static size_t testDecodeSimple(frame_t* fr)
1338{
1339 /* test decoding the generated data with the simple API */
1340 size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1341 fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1342
1343 if (ZSTD_isError(ret)) return ret;
1344
1345 if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1346 (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1347 return ERROR(corruption_detected);
1348 }
1349
1350 return ret;
1351}
1352
1353static size_t testDecodeStreaming(frame_t* fr)
1354{
1355 /* test decoding the generated data with the streaming API */
1356 ZSTD_DStream* zd = ZSTD_createDStream();
1357 ZSTD_inBuffer in;
1358 ZSTD_outBuffer out;
1359 size_t ret;
1360
1361 if (!zd) return ERROR(memory_allocation);
1362
1363 in.src = fr->dataStart;
1364 in.pos = 0;
1365 in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
1366
1367 out.dst = DECOMPRESSED_BUFFER;
1368 out.pos = 0;
1369 out.size = ZSTD_DStreamOutSize();
1370
1371 ZSTD_initDStream(zd);
1372 while (1) {
1373 ret = ZSTD_decompressStream(zd, &out, &in);
1374 if (ZSTD_isError(ret)) goto cleanup; /* error */
1375 if (ret == 0) break; /* frame is done */
1376
1377 /* force decoding to be done in chunks */
1378 out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
1379 }
1380
1381 ret = out.pos;
1382
1383 if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
1384 return ERROR(corruption_detected);
1385 }
1386
1387cleanup:
1388 ZSTD_freeDStream(zd);
1389 return ret;
1390}
1391
1392static size_t testDecodeWithDict(U32 seed, genType_e genType)
1393{
1394 /* create variables */
1395 size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
1396 U32 const dictID = RAND(&seed);
1397 size_t errorDetected = 0;
1398 BYTE* const fullDict = malloc(dictSize);
1399 if (fullDict == NULL) {
1400 return ERROR(GENERIC);
1401 }
1402
1403 /* generate random dictionary */
1404 if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */
1405 errorDetected = ERROR(GENERIC);
1406 goto dictTestCleanup;
1407 }
1408
1409
1410 { frame_t fr;
1411 dictInfo info;
1412 ZSTD_DCtx* const dctx = ZSTD_createDCtx();
1413 size_t ret;
1414
1415 /* get dict info */
1416 { size_t const headerSize = MAX(dictSize/4, 256);
1417 size_t const dictContentSize = dictSize-headerSize;
1418 BYTE* const dictContent = fullDict+headerSize;
1419 info = initDictInfo(1, dictContentSize, dictContent, dictID);
1420 }
1421
1422 /* manually decompress and check difference */
1423 if (genType == gt_frame) {
1424 /* Test frame */
1425 generateFrame(seed, &fr, info);
1426 ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1427 fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
1428 fullDict, dictSize);
1429 } else {
1430 /* Test block */
1431 generateCompressedBlock(seed, &fr, info);
1432 ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize);
1433 if (ZSTD_isError(ret)) {
1434 errorDetected = ret;
1435 ZSTD_freeDCtx(dctx);
1436 goto dictTestCleanup;
1437 }
1438 ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1439 fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart);
1440 }
1441 ZSTD_freeDCtx(dctx);
1442
1443 if (ZSTD_isError(ret)) {
1444 errorDetected = ret;
1445 goto dictTestCleanup;
1446 }
1447
1448 if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
1449 errorDetected = ERROR(corruption_detected);
1450 goto dictTestCleanup;
1451 }
1452 }
1453
1454dictTestCleanup:
1455 free(fullDict);
1456 return errorDetected;
1457}
1458
1459static size_t testDecodeRawBlock(frame_t* fr)
1460{
1461 ZSTD_DCtx* dctx = ZSTD_createDCtx();
1462 size_t ret = ZSTD_decompressBegin(dctx);
1463 if (ZSTD_isError(ret)) return ret;
1464
1465 ret = ZSTD_decompressBlock_deprecated(
1466 dctx,
1467 DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1468 fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1469 ZSTD_freeDCtx(dctx);
1470 if (ZSTD_isError(ret)) return ret;
1471
1472 if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1473 (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1474 return ERROR(corruption_detected);
1475 }
1476
1477 return ret;
1478}
1479
1480static int runBlockTest(U32* seed)
1481{
1482 frame_t fr;
1483 U32 const seedCopy = *seed;
1484 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1485 *seed = generateCompressedBlock(*seed, &fr, info);
1486 }
1487
1488 { size_t const r = testDecodeRawBlock(&fr);
1489 if (ZSTD_isError(r)) {
1490 DISPLAY("Error in block mode on test seed %u: %s\n",
1491 (unsigned)seedCopy, ZSTD_getErrorName(r));
1492 return 1;
1493 }
1494 }
1495
1496 { size_t const r = testDecodeWithDict(*seed, gt_block);
1497 if (ZSTD_isError(r)) {
1498 DISPLAY("Error in block mode with dictionary on test seed %u: %s\n",
1499 (unsigned)seedCopy, ZSTD_getErrorName(r));
1500 return 1;
1501 }
1502 }
1503 return 0;
1504}
1505
1506static int runFrameTest(U32* seed)
1507{
1508 frame_t fr;
1509 U32 const seedCopy = *seed;
1510 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1511 *seed = generateFrame(*seed, &fr, info);
1512 }
1513
1514 { size_t const r = testDecodeSimple(&fr);
1515 if (ZSTD_isError(r)) {
1516 DISPLAY("Error in simple mode on test seed %u: %s\n",
1517 (unsigned)seedCopy, ZSTD_getErrorName(r));
1518 return 1;
1519 }
1520 }
1521 { size_t const r = testDecodeStreaming(&fr);
1522 if (ZSTD_isError(r)) {
1523 DISPLAY("Error in streaming mode on test seed %u: %s\n",
1524 (unsigned)seedCopy, ZSTD_getErrorName(r));
1525 return 1;
1526 }
1527 }
1528 { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */
1529 if (ZSTD_isError(r)) {
1530 DISPLAY("Error in dictionary mode on test seed %u: %s\n",
1531 (unsigned)seedCopy, ZSTD_getErrorName(r));
1532 return 1;
1533 }
1534 }
1535 return 0;
1536}
1537
1538static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS,
1539 genType_e genType)
1540{
1541 unsigned fnum;
1542
1543 UTIL_time_t const startClock = UTIL_getTime();
1544 U64 const maxClockSpan = testDurationS * SEC_TO_MICRO;
1545
1546 if (numFiles == 0 && !testDurationS) numFiles = 1;
1547
1548 DISPLAY("seed: %u\n", (unsigned)seed);
1549
1550 for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) {
1551 if (fnum < numFiles)
1552 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
1553 else
1554 DISPLAYUPDATE("\r%u ", fnum);
1555
1556 { int const ret = (genType == gt_frame) ?
1557 runFrameTest(&seed) :
1558 runBlockTest(&seed);
1559 if (ret) return ret;
1560 }
1561 }
1562
1563 DISPLAY("\r%u tests completed: ", fnum);
1564 DISPLAY("OK\n");
1565
1566 return 0;
1567}
1568
1569/*-*******************************************************
1570* File I/O
1571*********************************************************/
1572
1573static int generateFile(U32 seed, const char* const path,
1574 const char* const origPath, genType_e genType)
1575{
1576 frame_t fr;
1577
1578 DISPLAY("seed: %u\n", (unsigned)seed);
1579
1580 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1581 if (genType == gt_frame) {
1582 generateFrame(seed, &fr, info);
1583 } else {
1584 generateCompressedBlock(seed, &fr, info);
1585 }
1586 }
1587 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1588 if (origPath) {
1589 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1590 }
1591 return 0;
1592}
1593
1594static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
1595 const char* const origPath, genType_e genType)
1596{
1597 char outPath[MAX_PATH];
1598 unsigned fnum;
1599
1600 DISPLAY("seed: %u\n", (unsigned)seed);
1601
1602 for (fnum = 0; fnum < numFiles; fnum++) {
1603 frame_t fr;
1604
1605 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
1606
1607 { dictInfo const info = initDictInfo(0, 0, NULL, 0);
1608 if (genType == gt_frame) {
1609 seed = generateFrame(seed, &fr, info);
1610 } else {
1611 seed = generateCompressedBlock(seed, &fr, info);
1612 }
1613 }
1614
1615 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1616 DISPLAY("Error: path too long\n");
1617 return 1;
1618 }
1619 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1620
1621 if (origPath) {
1622 if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1623 DISPLAY("Error: path too long\n");
1624 return 1;
1625 }
1626 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1627 }
1628 }
1629
1630 DISPLAY("\r%u/%u \n", fnum, numFiles);
1631
1632 return 0;
1633}
1634
1635static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
1636 const char* const origPath, const size_t dictSize,
1637 genType_e genType)
1638{
1639 char outPath[MAX_PATH];
1640 BYTE* fullDict;
1641 U32 const dictID = RAND(&seed);
1642 int errorDetected = 0;
1643
1644 if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1645 DISPLAY("Error: path too long\n");
1646 return 1;
1647 }
1648
1649 /* allocate space for the dictionary */
1650 fullDict = malloc(dictSize);
1651 if (fullDict == NULL) {
1652 DISPLAY("Error: could not allocate space for full dictionary.\n");
1653 return 1;
1654 }
1655
1656 /* randomly generate the dictionary */
1657 { int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
1658 if (ret != 0) {
1659 errorDetected = ret;
1660 goto dictCleanup;
1661 }
1662 }
1663
1664 /* write out dictionary */
1665 if (numFiles != 0) {
1666 if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1667 DISPLAY("Error: dictionary path too long\n");
1668 errorDetected = 1;
1669 goto dictCleanup;
1670 }
1671 outputBuffer(fullDict, dictSize, outPath);
1672 }
1673 else {
1674 outputBuffer(fullDict, dictSize, "dictionary");
1675 }
1676
1677 /* generate random compressed/decompressed files */
1678 { unsigned fnum;
1679 for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
1680 frame_t fr;
1681 DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
1682 {
1683 size_t const headerSize = MAX(dictSize/4, 256);
1684 size_t const dictContentSize = dictSize-headerSize;
1685 BYTE* const dictContent = fullDict+headerSize;
1686 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
1687 if (genType == gt_frame) {
1688 seed = generateFrame(seed, &fr, info);
1689 } else {
1690 seed = generateCompressedBlock(seed, &fr, info);
1691 }
1692 }
1693
1694 if (numFiles != 0) {
1695 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1696 DISPLAY("Error: path too long\n");
1697 errorDetected = 1;
1698 goto dictCleanup;
1699 }
1700 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1701
1702 if (origPath) {
1703 if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1704 DISPLAY("Error: path too long\n");
1705 errorDetected = 1;
1706 goto dictCleanup;
1707 }
1708 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1709 }
1710 }
1711 else {
1712 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1713 if (origPath) {
1714 outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1715 }
1716 }
1717 }
1718 }
1719
1720dictCleanup:
1721 free(fullDict);
1722 return errorDetected;
1723}
1724
1725
1726/*_*******************************************************
1727* Command line
1728*********************************************************/
1729static U32 makeSeed(void)
1730{
1731 U32 t = (U32) time(NULL);
1732 return XXH32(&t, sizeof(t), 0) % 65536;
1733}
1734
1735static unsigned readInt(const char** argument)
1736{
1737 unsigned val = 0;
1738 while ((**argument>='0') && (**argument<='9')) {
1739 val *= 10;
1740 val += **argument - '0';
1741 (*argument)++;
1742 }
1743 return val;
1744}
1745
1746static void usage(const char* programName)
1747{
1748 DISPLAY( "Usage :\n");
1749 DISPLAY( " %s [args]\n", programName);
1750 DISPLAY( "\n");
1751 DISPLAY( "Arguments :\n");
1752 DISPLAY( " -p<path> : select output path (default:stdout)\n");
1753 DISPLAY( " in multiple files mode this should be a directory\n");
1754 DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
1755 DISPLAY( " in multiple files mode this should be a directory\n");
1756 DISPLAY( " -s# : select seed (default:random based on time)\n");
1757 DISPLAY( " -n# : number of files to generate (default:1)\n");
1758 DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n");
1759 DISPLAY( " -T# : length of time to run tests for\n");
1760 DISPLAY( " -v : increase verbosity level (default:0, max:7)\n");
1761 DISPLAY( " -h/H : display help/long help and exit\n");
1762}
1763
1764static void advancedUsage(const char* programName)
1765{
1766 usage(programName);
1767 DISPLAY( "\n");
1768 DISPLAY( "Advanced arguments :\n");
1769 DISPLAY( " --content-size : always include the content size in the frame header\n");
1770 DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n");
1771 DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n");
1772 DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n");
1773 DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
1774 DISPLAY( " (this is ignored with gen-blocks)\n");
1775}
1776
1777/*! readU32FromChar() :
1778 @return : unsigned integer value read from input in `char` format
1779 allows and interprets K, KB, KiB, M, MB and MiB suffix.
1780 Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1781 Note : function result can overflow if digit string > MAX_UINT */
1782static unsigned readU32FromChar(const char** stringPtr)
1783{
1784 unsigned result = 0;
1785 while ((**stringPtr >='0') && (**stringPtr <='9'))
1786 result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
1787 if ((**stringPtr=='K') || (**stringPtr=='M')) {
1788 result <<= 10;
1789 if (**stringPtr=='M') result <<= 10;
1790 (*stringPtr)++ ;
1791 if (**stringPtr=='i') (*stringPtr)++;
1792 if (**stringPtr=='B') (*stringPtr)++;
1793 }
1794 return result;
1795}
1796
1797/** longCommandWArg() :
1798 * check if *stringPtr is the same as longCommand.
1799 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
1800 * @return 0 and doesn't modify *stringPtr otherwise.
1801 */
1802static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
1803{
1804 size_t const comSize = strlen(longCommand);
1805 int const result = !strncmp(*stringPtr, longCommand, comSize);
1806 if (result) *stringPtr += comSize;
1807 return result;
1808}
1809
1810int main(int argc, char** argv)
1811{
1812 U32 seed = 0;
1813 int seedset = 0;
1814 unsigned numFiles = 0;
1815 unsigned testDuration = 0;
1816 int testMode = 0;
1817 const char* path = NULL;
1818 const char* origPath = NULL;
1819 int useDict = 0;
1820 unsigned dictSize = (10 << 10); /* 10 kB default */
1821 genType_e genType = gt_frame;
1822
1823 int argNb;
1824
1825 /* Check command line */
1826 for (argNb=1; argNb<argc; argNb++) {
1827 const char* argument = argv[argNb];
1828 if(!argument) continue; /* Protection if argument empty */
1829
1830 /* Handle commands. Aggregated commands are allowed */
1831 if (argument[0]=='-') {
1832 argument++;
1833 while (*argument!=0) {
1834 switch(*argument)
1835 {
1836 case 'h':
1837 usage(argv[0]);
1838 return 0;
1839 case 'H':
1840 advancedUsage(argv[0]);
1841 return 0;
1842 case 'v':
1843 argument++;
1844 g_displayLevel++;
1845 break;
1846 case 's':
1847 argument++;
1848 seedset=1;
1849 seed = readInt(&argument);
1850 break;
1851 case 'n':
1852 argument++;
1853 numFiles = readInt(&argument);
1854 break;
1855 case 'T':
1856 argument++;
1857 testDuration = readInt(&argument);
1858 if (*argument == 'm') {
1859 testDuration *= 60;
1860 argument++;
1861 if (*argument == 'n') argument++;
1862 }
1863 break;
1864 case 'o':
1865 argument++;
1866 origPath = argument;
1867 argument += strlen(argument);
1868 break;
1869 case 'p':
1870 argument++;
1871 path = argument;
1872 argument += strlen(argument);
1873 break;
1874 case 't':
1875 argument++;
1876 testMode = 1;
1877 break;
1878 case '-':
1879 argument++;
1880 if (strcmp(argument, "content-size") == 0) {
1881 opts.contentSize = 1;
1882 } else if (longCommandWArg(&argument, "use-dict=")) {
1883 dictSize = readU32FromChar(&argument);
1884 useDict = 1;
1885 } else if (strcmp(argument, "gen-blocks") == 0) {
1886 genType = gt_block;
1887 } else if (longCommandWArg(&argument, "max-block-size-log=")) {
1888 U32 value = readU32FromChar(&argument);
1889 if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) {
1890 g_maxBlockSize = 1U << value;
1891 }
1892 } else if (longCommandWArg(&argument, "max-content-size-log=")) {
1893 U32 value = readU32FromChar(&argument);
1894 g_maxDecompressedSizeLog =
1895 MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
1896 } else {
1897 advancedUsage(argv[0]);
1898 return 1;
1899 }
1900 argument += strlen(argument);
1901 break;
1902 default:
1903 usage(argv[0]);
1904 return 1;
1905 } } } } /* for (argNb=1; argNb<argc; argNb++) */
1906
1907 if (!seedset) {
1908 seed = makeSeed();
1909 }
1910
1911 if (testMode) {
1912 return runTestMode(seed, numFiles, testDuration, genType);
1913 } else {
1914 if (testDuration) {
1915 DISPLAY("Error: -T requires test mode (-t)\n\n");
1916 usage(argv[0]);
1917 return 1;
1918 }
1919 }
1920
1921 if (!path) {
1922 DISPLAY("Error: path is required in file generation mode\n");
1923 usage(argv[0]);
1924 return 1;
1925 }
1926
1927 if (numFiles == 0 && useDict == 0) {
1928 return generateFile(seed, path, origPath, genType);
1929 } else if (useDict == 0){
1930 return generateCorpus(seed, numFiles, path, origPath, genType);
1931 } else {
1932 /* should generate files with a dictionary */
1933 return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType);
1934 }
1935
1936}