git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / contrib / largeNbDicts / largeNbDicts.c
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11/* largeNbDicts
12 * This is a benchmark test tool
13 * dedicated to the specific case of dictionary decompression
14 * using a very large nb of dictionaries
15 * thus suffering latency from lots of cache misses.
16 * It's created in a bid to investigate performance and find optimizations. */
17
18
19/*--- Dependencies ---*/
20
21#include <stddef.h> /* size_t */
22#include <stdlib.h> /* malloc, free, abort, qsort*/
23#include <stdio.h> /* fprintf */
24#include <limits.h> /* UINT_MAX */
25#include <assert.h> /* assert */
26
27#include "util.h"
28#include "benchfn.h"
29#define ZSTD_STATIC_LINKING_ONLY
30#include "zstd.h"
31#include "zdict.h"
32
33
34/*--- Constants --- */
35
36#define KB *(1<<10)
37#define MB *(1<<20)
38
39#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
40#define DICTSIZE (4 KB)
41#define CLEVEL_DEFAULT 3
42#define DICT_LOAD_METHOD ZSTD_dlm_byCopy
43
44#define BENCH_TIME_DEFAULT_S 6
45#define RUN_TIME_DEFAULT_MS 1000
46#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
47
48#define DISPLAY_LEVEL_DEFAULT 3
49
50#define BENCH_SIZE_MAX (1200 MB)
51
52
53/*--- Macros ---*/
54
55#define CONTROL(c) { if (!(c)) abort(); }
56#undef MIN
57#define MIN(a,b) ((a) < (b) ? (a) : (b))
58
59
60/*--- Display Macros ---*/
61
62#define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
63#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
64static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
65
66
67/*--- buffer_t ---*/
68
69typedef struct {
70 void* ptr;
71 size_t size;
72 size_t capacity;
73} buffer_t;
74
75static const buffer_t kBuffNull = { NULL, 0, 0 };
76
77/* @return : kBuffNull if any error */
78static buffer_t createBuffer(size_t capacity)
79{
80 assert(capacity > 0);
81 void* const ptr = malloc(capacity);
82 if (ptr==NULL) return kBuffNull;
83
84 buffer_t buffer;
85 buffer.ptr = ptr;
86 buffer.capacity = capacity;
87 buffer.size = 0;
88 return buffer;
89}
90
91static void freeBuffer(buffer_t buff)
92{
93 free(buff.ptr);
94}
95
96
97static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
98{
99 size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
100 buff->size = readSize;
101}
102
103
104/* @return : kBuffNull if any error */
105static buffer_t createBuffer_fromFile(const char* fileName)
106{
107 U64 const fileSize = UTIL_getFileSize(fileName);
108 size_t const bufferSize = (size_t) fileSize;
109
110 if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
111 assert((U64)bufferSize == fileSize); /* check overflow */
112
113 { FILE* const f = fopen(fileName, "rb");
114 if (f == NULL) return kBuffNull;
115
116 buffer_t buff = createBuffer(bufferSize);
117 CONTROL(buff.ptr != NULL);
118
119 fillBuffer_fromHandle(&buff, f);
120 CONTROL(buff.size == buff.capacity);
121
122 fclose(f); /* do nothing specific if fclose() fails */
123 return buff;
124 }
125}
126
127
128/* @return : kBuffNull if any error */
129static buffer_t
130createDictionaryBuffer(const char* dictionaryName,
131 const void* srcBuffer,
132 const size_t* srcBlockSizes, size_t nbBlocks,
133 size_t requestedDictSize)
134{
135 if (dictionaryName) {
136 DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
137 return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
138
139 } else {
140
141 DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
142 (unsigned)requestedDictSize);
143 void* const dictBuffer = malloc(requestedDictSize);
144 CONTROL(dictBuffer != NULL);
145
146 assert(nbBlocks <= UINT_MAX);
147 size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
148 srcBuffer,
149 srcBlockSizes, (unsigned)nbBlocks);
150 CONTROL(!ZSTD_isError(dictSize));
151
152 buffer_t result;
153 result.ptr = dictBuffer;
154 result.capacity = requestedDictSize;
155 result.size = dictSize;
156 return result;
157 }
158}
159
160/*! BMK_loadFiles() :
161 * Loads `buffer`, with content from files listed within `fileNamesTable`.
162 * Fills `buffer` entirely.
163 * @return : 0 on success, !=0 on error */
164static int loadFiles(void* buffer, size_t bufferSize,
165 size_t* fileSizes,
166 const char* const * fileNamesTable, unsigned nbFiles)
167{
168 size_t pos = 0, totalSize = 0;
169
170 for (unsigned n=0; n<nbFiles; n++) {
171 U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
172 if (UTIL_isDirectory(fileNamesTable[n])) {
173 fileSizes[n] = 0;
174 continue;
175 }
176 if (fileSize == UTIL_FILESIZE_UNKNOWN) {
177 fileSizes[n] = 0;
178 continue;
179 }
180
181 FILE* const f = fopen(fileNamesTable[n], "rb");
182 assert(f!=NULL);
183
184 assert(pos <= bufferSize);
185 assert(fileSize <= bufferSize - pos);
186
187 { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
188 assert(readSize == fileSize);
189 pos += readSize;
190 }
191 fileSizes[n] = (size_t)fileSize;
192 totalSize += (size_t)fileSize;
193 fclose(f);
194 }
195
196 assert(totalSize == bufferSize);
197 return 0;
198}
199
200
201
202/*--- slice_collection_t ---*/
203
204typedef struct {
205 void** slicePtrs;
206 size_t* capacities;
207 size_t nbSlices;
208} slice_collection_t;
209
210static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
211
212static void freeSliceCollection(slice_collection_t collection)
213{
214 free(collection.slicePtrs);
215 free(collection.capacities);
216}
217
218/* shrinkSizes() :
219 * downsizes sizes of slices within collection, according to `newSizes`.
220 * every `newSizes` entry must be <= than its corresponding collection size */
221void shrinkSizes(slice_collection_t collection,
222 const size_t* newSizes) /* presumed same size as collection */
223{
224 size_t const nbSlices = collection.nbSlices;
225 for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
226 assert(newSizes[blockNb] <= collection.capacities[blockNb]);
227 collection.capacities[blockNb] = newSizes[blockNb];
228 }
229}
230
231
232/* splitSlices() :
233 * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
234 * otherwise, creates exactly nbSlices slices,
235 * by either truncating input (when smaller)
236 * or repeating input from beginning */
237static slice_collection_t
238splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
239{
240 if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
241 size_t nbSrcBlocks = 0;
242 for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
243 size_t pos = 0;
244 while (pos <= srcSlices.capacities[ssnb]) {
245 nbSrcBlocks++;
246 pos += blockSize;
247 }
248 }
249
250 if (nbSlices == 0) nbSlices = nbSrcBlocks;
251
252 void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
253 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
254 if (sliceTable == NULL || capacities == NULL) {
255 free(sliceTable);
256 free(capacities);
257 return kNullCollection;
258 }
259
260 size_t ssnb = 0;
261 for (size_t sliceNb=0; sliceNb < nbSlices; ) {
262 ssnb = (ssnb + 1) % srcSlices.nbSlices;
263 size_t pos = 0;
264 char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
265 while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
266 size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
267 sliceTable[sliceNb] = ptr + pos;
268 capacities[sliceNb] = size;
269 sliceNb++;
270 pos += blockSize;
271 }
272 }
273
274 slice_collection_t result;
275 result.nbSlices = nbSlices;
276 result.slicePtrs = sliceTable;
277 result.capacities = capacities;
278 return result;
279}
280
281
282static size_t sliceCollection_totalCapacity(slice_collection_t sc)
283{
284 size_t totalSize = 0;
285 for (size_t n=0; n<sc.nbSlices; n++)
286 totalSize += sc.capacities[n];
287 return totalSize;
288}
289
290
291/* --- buffer collection --- */
292
293typedef struct {
294 buffer_t buffer;
295 slice_collection_t slices;
296} buffer_collection_t;
297
298
299static void freeBufferCollection(buffer_collection_t bc)
300{
301 freeBuffer(bc.buffer);
302 freeSliceCollection(bc.slices);
303}
304
305
306static buffer_collection_t
307createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
308{
309 size_t const bufferSize = sliceCollection_totalCapacity(sc);
310
311 buffer_t buffer = createBuffer(bufferSize);
312 CONTROL(buffer.ptr != NULL);
313
314 size_t const nbSlices = sc.nbSlices;
315 void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
316 CONTROL(slices != NULL);
317
318 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
319 CONTROL(capacities != NULL);
320
321 char* const ptr = (char*)buffer.ptr;
322 size_t pos = 0;
323 for (size_t n=0; n < nbSlices; n++) {
324 capacities[n] = sc.capacities[n];
325 slices[n] = ptr + pos;
326 pos += capacities[n];
327 }
328
329 buffer_collection_t result;
330 result.buffer = buffer;
331 result.slices.nbSlices = nbSlices;
332 result.slices.capacities = capacities;
333 result.slices.slicePtrs = slices;
334 return result;
335}
336
337static buffer_collection_t
338createBufferCollection_fromSliceCollection(slice_collection_t sc)
339{
340 size_t const bufferSize = sliceCollection_totalCapacity(sc);
341
342 buffer_t buffer = createBuffer(bufferSize);
343 CONTROL(buffer.ptr != NULL);
344
345 size_t const nbSlices = sc.nbSlices;
346 void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
347 CONTROL(slices != NULL);
348
349 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
350 CONTROL(capacities != NULL);
351
352 char* const ptr = (char*)buffer.ptr;
353 size_t pos = 0;
354 for (size_t n=0; n < nbSlices; n++) {
355 capacities[n] = sc.capacities[n];
356 slices[n] = ptr + pos;
357 pos += capacities[n];
358 }
359
360 for (size_t i = 0; i < nbSlices; i++) {
361 memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]);
362 capacities[i] = sc.capacities[i];
363 }
364
365 buffer_collection_t result;
366 result.buffer = buffer;
367 result.slices.nbSlices = nbSlices;
368 result.slices.capacities = capacities;
369 result.slices.slicePtrs = slices;
370
371 return result;
372}
373
374/* @return : kBuffNull if any error */
375static buffer_collection_t
376createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
377{
378 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
379 assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
380 assert(totalSizeToLoad <= BENCH_SIZE_MAX);
381 size_t const loadedSize = (size_t)totalSizeToLoad;
382 assert(loadedSize > 0);
383 void* const srcBuffer = malloc(loadedSize);
384 assert(srcBuffer != NULL);
385
386 assert(nbFiles > 0);
387 size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
388 assert(fileSizes != NULL);
389
390 /* Load input buffer */
391 int const errorCode = loadFiles(srcBuffer, loadedSize,
392 fileSizes,
393 fileNamesTable, nbFiles);
394 assert(errorCode == 0);
395
396 void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
397 assert(sliceTable != NULL);
398
399 char* const ptr = (char*)srcBuffer;
400 size_t pos = 0;
401 unsigned fileNb = 0;
402 for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
403 sliceTable[fileNb] = ptr + pos;
404 pos += fileSizes[fileNb];
405 }
406 assert(pos == loadedSize);
407 assert(fileNb == nbFiles);
408
409
410 buffer_t buffer;
411 buffer.ptr = srcBuffer;
412 buffer.capacity = loadedSize;
413 buffer.size = loadedSize;
414
415 slice_collection_t slices;
416 slices.slicePtrs = sliceTable;
417 slices.capacities = fileSizes;
418 slices.nbSlices = nbFiles;
419
420 buffer_collection_t bc;
421 bc.buffer = buffer;
422 bc.slices = slices;
423 return bc;
424}
425
426
427
428
429/*--- ddict_collection_t ---*/
430
431typedef struct {
432 ZSTD_DDict** ddicts;
433 size_t nbDDict;
434} ddict_collection_t;
435
436typedef struct {
437 ZSTD_CDict** cdicts;
438 size_t nbCDict;
439} cdict_collection_t;
440
441static const cdict_collection_t kNullCDictCollection = { NULL, 0 };
442
443static void freeCDictCollection(cdict_collection_t cdictc)
444{
445 for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) {
446 ZSTD_freeCDict(cdictc.cdicts[dictNb]);
447 }
448 free(cdictc.cdicts);
449}
450
451/* returns .buffers=NULL if operation fails */
452static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams)
453{
454 ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
455 if (cdicts==NULL) return kNullCDictCollection;
456 for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
457 cdicts[dictNb] = ZSTD_createCDict_advanced2(dictBuffer, dictSize, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem);
458 CONTROL(cdicts[dictNb] != NULL);
459 }
460 cdict_collection_t cdictc;
461 cdictc.cdicts = cdicts;
462 cdictc.nbCDict = nbCDict;
463 return cdictc;
464}
465
466static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
467
468static void freeDDictCollection(ddict_collection_t ddictc)
469{
470 for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
471 ZSTD_freeDDict(ddictc.ddicts[dictNb]);
472 }
473 free(ddictc.ddicts);
474}
475
476/* returns .buffers=NULL if operation fails */
477static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
478{
479 ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
480 assert(ddicts != NULL);
481 if (ddicts==NULL) return kNullDDictCollection;
482 for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
483 ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
484 assert(ddicts[dictNb] != NULL);
485 }
486 ddict_collection_t ddictc;
487 ddictc.ddicts = ddicts;
488 ddictc.nbDDict = nbDDict;
489 return ddictc;
490}
491
492
493/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
494void shuffleCDictionaries(cdict_collection_t dicts)
495{
496 size_t const nbDicts = dicts.nbCDict;
497 for (size_t r=0; r<nbDicts; r++) {
498 size_t const d = (size_t)rand() % nbDicts;
499 ZSTD_CDict* tmpd = dicts.cdicts[d];
500 dicts.cdicts[d] = dicts.cdicts[r];
501 dicts.cdicts[r] = tmpd;
502 }
503 for (size_t r=0; r<nbDicts; r++) {
504 size_t const d1 = (size_t)rand() % nbDicts;
505 size_t const d2 = (size_t)rand() % nbDicts;
506 ZSTD_CDict* tmpd = dicts.cdicts[d1];
507 dicts.cdicts[d1] = dicts.cdicts[d2];
508 dicts.cdicts[d2] = tmpd;
509 }
510}
511
512/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
513void shuffleDDictionaries(ddict_collection_t dicts)
514{
515 size_t const nbDicts = dicts.nbDDict;
516 for (size_t r=0; r<nbDicts; r++) {
517 size_t const d = (size_t)rand() % nbDicts;
518 ZSTD_DDict* tmpd = dicts.ddicts[d];
519 dicts.ddicts[d] = dicts.ddicts[r];
520 dicts.ddicts[r] = tmpd;
521 }
522 for (size_t r=0; r<nbDicts; r++) {
523 size_t const d1 = (size_t)rand() % nbDicts;
524 size_t const d2 = (size_t)rand() % nbDicts;
525 ZSTD_DDict* tmpd = dicts.ddicts[d1];
526 dicts.ddicts[d1] = dicts.ddicts[d2];
527 dicts.ddicts[d2] = tmpd;
528 }
529}
530
531
532/* --- Compression --- */
533
534/* compressBlocks() :
535 * @return : total compressed size of all blocks,
536 * or 0 if error.
537 */
538static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
539 slice_collection_t dstBlockBuffers,
540 slice_collection_t srcBlockBuffers,
541 ZSTD_CDict* cdict, int cLevel)
542{
543 size_t const nbBlocks = srcBlockBuffers.nbSlices;
544 assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
545
546 ZSTD_CCtx* const cctx = ZSTD_createCCtx();
547 assert(cctx != NULL);
548
549 size_t totalCSize = 0;
550 for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
551 size_t cBlockSize;
552 if (cdict == NULL) {
553 cBlockSize = ZSTD_compressCCtx(cctx,
554 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
555 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
556 cLevel);
557 } else {
558 cBlockSize = ZSTD_compress_usingCDict(cctx,
559 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
560 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
561 cdict);
562 }
563 CONTROL(!ZSTD_isError(cBlockSize));
564 if (cSizes) cSizes[blockNb] = cBlockSize;
565 totalCSize += cBlockSize;
566 }
567 return totalCSize;
568}
569
570
571/* --- Benchmark --- */
572
573typedef struct {
574 ZSTD_CCtx* cctx;
575 size_t nbDicts;
576 size_t dictNb;
577 cdict_collection_t dictionaries;
578} compressInstructions;
579
580compressInstructions createCompressInstructions(cdict_collection_t dictionaries, ZSTD_CCtx_params* cctxParams)
581{
582 compressInstructions ci;
583 ci.cctx = ZSTD_createCCtx();
584 CONTROL(ci.cctx != NULL);
585 if (cctxParams)
586 ZSTD_CCtx_setParametersUsingCCtxParams(ci.cctx, cctxParams);
587 ci.nbDicts = dictionaries.nbCDict;
588 ci.dictNb = 0;
589 ci.dictionaries = dictionaries;
590 return ci;
591}
592
593void freeCompressInstructions(compressInstructions ci)
594{
595 ZSTD_freeCCtx(ci.cctx);
596}
597
598typedef struct {
599 ZSTD_DCtx* dctx;
600 size_t nbDicts;
601 size_t dictNb;
602 ddict_collection_t dictionaries;
603} decompressInstructions;
604
605decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
606{
607 decompressInstructions di;
608 di.dctx = ZSTD_createDCtx();
609 assert(di.dctx != NULL);
610 di.nbDicts = dictionaries.nbDDict;
611 di.dictNb = 0;
612 di.dictionaries = dictionaries;
613 return di;
614}
615
616void freeDecompressInstructions(decompressInstructions di)
617{
618 ZSTD_freeDCtx(di.dctx);
619}
620
621/* benched function */
622size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
623{
624 compressInstructions* const ci = (compressInstructions*) payload;
625 (void)dstCapacity;
626
627 ZSTD_CCtx_refCDict(ci->cctx, ci->dictionaries.cdicts[ci->dictNb]);
628 ZSTD_compress2(ci->cctx,
629 dst, srcSize,
630 src, srcSize);
631
632 ci->dictNb = ci->dictNb + 1;
633 if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0;
634
635 return srcSize;
636}
637
638/* benched function */
639size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
640{
641 decompressInstructions* const di = (decompressInstructions*) payload;
642
643 size_t const result = ZSTD_decompress_usingDDict(di->dctx,
644 dst, dstCapacity,
645 src, srcSize,
646 di->dictionaries.ddicts[di->dictNb]);
647
648 di->dictNb = di->dictNb + 1;
649 if (di->dictNb >= di->nbDicts) di->dictNb = 0;
650
651 return result;
652}
653
654typedef enum {
655 fastest = 0,
656 median = 1,
657} metricAggregatePref_e;
658
659/* compareFunction() :
660 * Sort input in decreasing order when used with qsort() */
661int compareFunction(const void *a, const void *b)
662{
663 double x = *(const double *)a;
664 double y = *(const double *)b;
665 if (x < y)
666 return 1;
667 else if (x > y)
668 return -1;
669 return 0;
670}
671
672double aggregateData(double *data, size_t size,
673 metricAggregatePref_e metricAggregatePref)
674{
675 qsort(data, size, sizeof(*data), compareFunction);
676 if (metricAggregatePref == fastest)
677 return data[0];
678 else /* median */
679 return (data[(size - 1) / 2] + data[size / 2]) / 2;
680}
681
682static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks,
683 ddict_collection_t ddictionaries,
684 cdict_collection_t cdictionaries, unsigned nbRounds,
685 int benchCompression, const char *exeName,
686 ZSTD_CCtx_params *cctxParams,
687 metricAggregatePref_e metricAggregatePref)
688{
689 assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
690 if (benchCompression) assert(cctxParams);
691
692 unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
693 unsigned const total_time_ms = nbRounds * ms_per_round;
694
695 double *const speedPerRound = (double *)malloc(nbRounds * sizeof(double));
696
697 BMK_timedFnState_t* const benchState =
698 BMK_createTimedFnState(total_time_ms, ms_per_round);
699
700 decompressInstructions di = createDecompressInstructions(ddictionaries);
701 compressInstructions ci =
702 createCompressInstructions(cdictionaries, cctxParams);
703 void* payload = benchCompression ? (void*)&ci : (void*)&di;
704 BMK_benchParams_t const bp = {
705 .benchFn = benchCompression ? compress : decompress,
706 .benchPayload = payload,
707 .initFn = NULL,
708 .initPayload = NULL,
709 .errorFn = ZSTD_isError,
710 .blockCount = dstBlocks.nbSlices,
711 .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
712 .srcSizes = srcBlocks.capacities,
713 .dstBuffers = dstBlocks.slicePtrs,
714 .dstCapacities = dstBlocks.capacities,
715 .blockResults = NULL
716 };
717
718 size_t roundNb = 0;
719 for (;;) {
720 BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
721 CONTROL(BMK_isSuccessful_runOutcome(outcome));
722
723 BMK_runTime_t const result = BMK_extract_runTime(outcome);
724 double const dTime_ns = result.nanoSecPerRun;
725 double const dTime_sec = (double)dTime_ns / 1000000000;
726 size_t const srcSize = result.sumOfReturn;
727 double const speed_MBps = (double)srcSize / dTime_sec / (1 MB);
728 speedPerRound[roundNb] = speed_MBps;
729 if (benchCompression)
730 DISPLAY("Compression Speed : %.1f MB/s \r", speed_MBps);
731 else
732 DISPLAY("Decompression Speed : %.1f MB/s \r", speed_MBps);
733
734 fflush(stdout);
735 if (BMK_isCompleted_TimedFn(benchState)) break;
736 roundNb++;
737 }
738 DISPLAY("\n");
739 /* BMK_benchTimedFn may not run exactly nbRounds iterations */
740 double speedAggregated =
741 aggregateData(speedPerRound, roundNb + 1, metricAggregatePref);
742 if (metricAggregatePref == fastest)
743 DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated);
744 else
745 DISPLAY("Median Speed : %.1f MB/s \n", speedAggregated);
746
747 char* csvFileName = malloc(strlen(exeName) + 5);
748 strcpy(csvFileName, exeName);
749 strcat(csvFileName, ".csv");
750 FILE* csvFile = fopen(csvFileName, "r");
751 if (!csvFile) {
752 csvFile = fopen(csvFileName, "wt");
753 assert(csvFile);
754 fprintf(csvFile, "%s\n", exeName);
755 /* Print table headers */
756 fprintf(
757 csvFile,
758 "Compression/Decompression,Level,nbDicts,dictAttachPref,metricAggregatePref,Speed\n");
759 } else {
760 fclose(csvFile);
761 csvFile = fopen(csvFileName, "at");
762 assert(csvFile);
763 }
764
765 int cLevel = -1;
766 int dictAttachPref = -1;
767 if (benchCompression) {
768 ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_compressionLevel,
769 &cLevel);
770 ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_forceAttachDict,
771 &dictAttachPref);
772 }
773 fprintf(csvFile, "%s,%d,%ld,%d,%d,%.1f\n",
774 benchCompression ? "Compression" : "Decompression", cLevel,
775 benchCompression ? ci.nbDicts : di.nbDicts, dictAttachPref,
776 metricAggregatePref, speedAggregated);
777 fclose(csvFile);
778 free(csvFileName);
779
780 freeDecompressInstructions(di);
781 freeCompressInstructions(ci);
782 BMK_freeTimedFnState(benchState);
783
784 return 0; /* success */
785}
786
787
788/*! bench() :
789 * fileName : file to load for benchmarking purpose
790 * dictionary : optional (can be NULL), file to load as dictionary,
791 * if none provided : will be calculated on the fly by the program.
792 * @return : 0 is success, 1+ otherwise */
793int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary,
794 size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks,
795 unsigned nbRounds, int benchCompression,
796 ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params *cctxParams,
797 const char *exeName, metricAggregatePref_e metricAggregatePref)
798{
799 int result = 0;
800
801 DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
802 buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
803 CONTROL(srcs.buffer.ptr != NULL);
804 buffer_t srcBuffer = srcs.buffer;
805 size_t const srcSize = srcBuffer.size;
806 DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
807 (double)srcSize / (1 MB));
808
809 slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
810 nbBlocks = (unsigned)(srcSlices.nbSlices);
811 DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
812 if (blockSize)
813 DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
814 DISPLAYLEVEL(3, "\n");
815 size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
816
817
818 size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
819 CONTROL(dstCapacities != NULL);
820 size_t dstBufferCapacity = 0;
821 for (size_t bnb=0; bnb<nbBlocks; bnb++) {
822 dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
823 dstBufferCapacity += dstCapacities[bnb];
824 }
825
826 buffer_t dstBuffer = createBuffer(dstBufferCapacity);
827 CONTROL(dstBuffer.ptr != NULL);
828
829 void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
830 CONTROL(sliceTable != NULL);
831
832 { char* const ptr = dstBuffer.ptr;
833 size_t pos = 0;
834 for (size_t snb=0; snb < nbBlocks; snb++) {
835 sliceTable[snb] = ptr + pos;
836 pos += dstCapacities[snb];
837 } }
838
839 slice_collection_t dstSlices;
840 dstSlices.capacities = dstCapacities;
841 dstSlices.slicePtrs = sliceTable;
842 dstSlices.nbSlices = nbBlocks;
843
844
845 /* dictionary determination */
846 buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
847 srcs.buffer.ptr,
848 srcSlices.capacities, srcSlices.nbSlices,
849 DICTSIZE);
850 CONTROL(dictBuffer.ptr != NULL);
851
852 ZSTD_CDict* const cdict = ZSTD_createCDict_advanced2(dictBuffer.ptr, dictBuffer.size, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem);
853 CONTROL(cdict != NULL);
854
855 size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
856 CONTROL(cTotalSizeNoDict != 0);
857 DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
858 clevel,
859 (double)totalSrcSlicesSize / (double)cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
860
861 size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
862 CONTROL(cSizes != NULL);
863
864 size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
865 CONTROL(cTotalSize != 0);
866 DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
867 (unsigned)dictBuffer.size,
868 (double)totalSrcSlicesSize / (double)cTotalSize, (unsigned)cTotalSize);
869
870 /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
871 shrinkSizes(dstSlices, cSizes);
872
873 unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
874
875 cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, dictContentType, cctxParams);
876 CONTROL(cdictionaries.cdicts != NULL);
877
878 ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
879 CONTROL(ddictionaries.ddicts != NULL);
880
881 if (benchCompression) {
882 size_t const dictMem = ZSTD_sizeof_CDict(cdictionaries.cdicts[0]);
883 size_t const allDictMem = dictMem * nbDicts;
884 DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
885 nbDicts, (double)allDictMem / (1 MB));
886
887 shuffleCDictionaries(cdictionaries);
888
889 buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices);
890 CONTROL(resultCollection.buffer.ptr != NULL);
891
892 result = benchMem(dstSlices, resultCollection.slices, ddictionaries,
893 cdictionaries, nbRounds, benchCompression, exeName,
894 cctxParams, metricAggregatePref);
895
896 freeBufferCollection(resultCollection);
897 } else {
898 size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, DICT_LOAD_METHOD);
899 size_t const allDictMem = dictMem * nbDicts;
900 DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
901 nbDicts, (double)allDictMem / (1 MB));
902
903 shuffleDDictionaries(ddictionaries);
904
905 buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
906 CONTROL(resultCollection.buffer.ptr != NULL);
907
908 result = benchMem(resultCollection.slices, dstSlices, ddictionaries,
909 cdictionaries, nbRounds, benchCompression, exeName,
910 NULL, metricAggregatePref);
911
912 freeBufferCollection(resultCollection);
913 }
914
915 /* free all heap objects in reverse order */
916 freeCDictCollection(cdictionaries);
917 freeDDictCollection(ddictionaries);
918 free(cSizes);
919 ZSTD_freeCDict(cdict);
920 freeBuffer(dictBuffer);
921 freeSliceCollection(dstSlices);
922 freeBuffer(dstBuffer);
923 freeSliceCollection(srcSlices);
924 freeBufferCollection(srcs);
925
926 return result;
927}
928
929
930
931/* --- Command Line --- */
932
933/*! readU32FromChar() :
934 * @return : unsigned integer value read from input in `char` format.
935 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
936 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
937 * Note : function will exit() program if digit sequence overflows */
938static unsigned readU32FromChar(const char** stringPtr)
939{
940 unsigned result = 0;
941 while ((**stringPtr >='0') && (**stringPtr <='9')) {
942 unsigned const max = (((unsigned)(-1)) / 10) - 1;
943 assert(result <= max); /* check overflow */
944 result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
945 }
946 if ((**stringPtr=='K') || (**stringPtr=='M')) {
947 unsigned const maxK = ((unsigned)(-1)) >> 10;
948 assert(result <= maxK); /* check overflow */
949 result <<= 10;
950 if (**stringPtr=='M') {
951 assert(result <= maxK); /* check overflow */
952 result <<= 10;
953 }
954 (*stringPtr)++; /* skip `K` or `M` */
955 if (**stringPtr=='i') (*stringPtr)++;
956 if (**stringPtr=='B') (*stringPtr)++;
957 }
958 return result;
959}
960
961/** longCommandWArg() :
962 * check if *stringPtr is the same as longCommand.
963 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
964 * @return 0 and doesn't modify *stringPtr otherwise.
965 */
966static int longCommandWArg(const char** stringPtr, const char* longCommand)
967{
968 size_t const comSize = strlen(longCommand);
969 int const result = !strncmp(*stringPtr, longCommand, comSize);
970 if (result) *stringPtr += comSize;
971 return result;
972}
973
974
975int usage(const char* exeName)
976{
977 DISPLAY (" \n");
978 DISPLAY (" %s [Options] filename(s) \n", exeName);
979 DISPLAY (" \n");
980 DISPLAY ("Options : \n");
981 DISPLAY ("-z : benchmark compression (default) \n");
982 DISPLAY ("-d : benchmark decompression \n");
983 DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
984 DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
985 DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
986 DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
987 DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
988 DISPLAY ("-p# : print speed for all rounds 0=fastest 1=median (default: 0) \n");
989 DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
990 DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
991 DISPLAY ("-h : help (this text) \n");
992 DISPLAY (" \n");
993 DISPLAY ("Advanced Options (see zstd.h for documentation) : \n");
994 DISPLAY ("--dedicated-dict-search\n");
995 DISPLAY ("--dict-content-type=#\n");
996 DISPLAY ("--dict-attach-pref=#\n");
997 return 0;
998}
999
1000int bad_usage(const char* exeName)
1001{
1002 DISPLAY (" bad usage : \n");
1003 usage(exeName);
1004 return 1;
1005}
1006
1007int main (int argc, const char** argv)
1008{
1009 int recursiveMode = 0;
1010 int benchCompression = 1;
1011 int dedicatedDictSearch = 0;
1012 unsigned nbRounds = BENCH_TIME_DEFAULT_S;
1013 const char* const exeName = argv[0];
1014
1015 if (argc < 2) return bad_usage(exeName);
1016
1017 const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
1018 assert(nameTable != NULL);
1019 unsigned nameIdx = 0;
1020
1021 const char* dictionary = NULL;
1022 int cLevel = CLEVEL_DEFAULT;
1023 size_t blockSize = BLOCKSIZE_DEFAULT;
1024 unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
1025 unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
1026 ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
1027 ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach;
1028 ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto;
1029 metricAggregatePref_e metricAggregatePref = fastest;
1030
1031 for (int argNb = 1; argNb < argc ; argNb++) {
1032 const char* argument = argv[argNb];
1033 if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
1034 if (!strcmp(argument, "-d")) { benchCompression = 0; continue; }
1035 if (!strcmp(argument, "-z")) { benchCompression = 1; continue; }
1036 if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
1037 if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
1038 if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
1039 if (longCommandWArg(&argument, "-p")) { metricAggregatePref = (int)readU32FromChar(&argument); continue;}
1040 if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
1041 if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
1042 if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
1043 if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
1044 if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
1045 if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
1046 if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
1047 if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; }
1048 if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; }
1049 if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; }
1050 if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
1051 /* anything that's not a command is a filename */
1052 nameTable[nameIdx++] = argument;
1053 }
1054
1055 FileNamesTable* filenameTable;
1056
1057 if (recursiveMode) {
1058#ifndef UTIL_HAS_CREATEFILELIST
1059 assert(0); /* missing capability, do not run */
1060#endif
1061 filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
1062 } else {
1063 filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
1064 nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */
1065 }
1066
1067 ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
1068 ZSTD_CCtxParams_init(cctxParams, cLevel);
1069 ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch);
1070 ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0);
1071 ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref);
1072 ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables);
1073
1074 int result =
1075 bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize,
1076 dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds,
1077 benchCompression, dictContentType, cctxParams, exeName,
1078 metricAggregatePref);
1079
1080 UTIL_freeFileNamesTable(filenameTable);
1081 free(nameTable);
1082 ZSTD_freeCCtxParams(cctxParams);
1083
1084 return result;
1085}