eeaaf7182e55045a88e22a430c3a7a1b96e0cc29
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / contrib / largeNbDicts / largeNbDicts.c
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10
11 /* largeNbDicts
12  * This is a benchmark test tool
13  * dedicated to the specific case of dictionary decompression
14  * using a very large nb of dictionaries
15  * thus suffering latency from lots of cache misses.
16  * It's created in a bid to investigate performance and find optimizations. */
17
18
19 /*---  Dependencies  ---*/
20
21 #include <stddef.h>   /* size_t */
22 #include <stdlib.h>   /* malloc, free, abort, qsort*/
23 #include <stdio.h>    /* fprintf */
24 #include <limits.h>   /* UINT_MAX */
25 #include <assert.h>   /* assert */
26
27 #include "util.h"
28 #include "benchfn.h"
29 #define ZSTD_STATIC_LINKING_ONLY
30 #include "zstd.h"
31 #include "zdict.h"
32
33
34 /*---  Constants  --- */
35
36 #define KB  *(1<<10)
37 #define MB  *(1<<20)
38
39 #define BLOCKSIZE_DEFAULT 0  /* no slicing into blocks */
40 #define DICTSIZE  (4 KB)
41 #define CLEVEL_DEFAULT 3
42 #define DICT_LOAD_METHOD ZSTD_dlm_byCopy
43
44 #define BENCH_TIME_DEFAULT_S   6
45 #define RUN_TIME_DEFAULT_MS    1000
46 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
47
48 #define DISPLAY_LEVEL_DEFAULT 3
49
50 #define BENCH_SIZE_MAX (1200 MB)
51
52
53 /*---  Macros  ---*/
54
55 #define CONTROL(c)   { if (!(c)) abort(); }
56 #undef MIN
57 #define MIN(a,b)     ((a) < (b) ? (a) : (b))
58
59
60 /*---  Display Macros  ---*/
61
62 #define DISPLAY(...)         fprintf(stdout, __VA_ARGS__)
63 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
64 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
65
66
67 /*---  buffer_t  ---*/
68
69 typedef struct {
70     void* ptr;
71     size_t size;
72     size_t capacity;
73 } buffer_t;
74
75 static const buffer_t kBuffNull = { NULL, 0, 0 };
76
77 /* @return : kBuffNull if any error */
78 static buffer_t createBuffer(size_t capacity)
79 {
80     assert(capacity > 0);
81     void* const ptr = malloc(capacity);
82     if (ptr==NULL) return kBuffNull;
83
84     buffer_t buffer;
85     buffer.ptr = ptr;
86     buffer.capacity = capacity;
87     buffer.size = 0;
88     return buffer;
89 }
90
91 static void freeBuffer(buffer_t buff)
92 {
93     free(buff.ptr);
94 }
95
96
97 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
98 {
99     size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
100     buff->size = readSize;
101 }
102
103
104 /* @return : kBuffNull if any error */
105 static buffer_t createBuffer_fromFile(const char* fileName)
106 {
107     U64 const fileSize = UTIL_getFileSize(fileName);
108     size_t const bufferSize = (size_t) fileSize;
109
110     if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
111     assert((U64)bufferSize == fileSize);   /* check overflow */
112
113     {   FILE* const f = fopen(fileName, "rb");
114         if (f == NULL) return kBuffNull;
115
116         buffer_t buff = createBuffer(bufferSize);
117         CONTROL(buff.ptr != NULL);
118
119         fillBuffer_fromHandle(&buff, f);
120         CONTROL(buff.size == buff.capacity);
121
122         fclose(f);   /* do nothing specific if fclose() fails */
123         return buff;
124     }
125 }
126
127
128 /* @return : kBuffNull if any error */
129 static buffer_t
130 createDictionaryBuffer(const char* dictionaryName,
131                        const void* srcBuffer,
132                        const size_t* srcBlockSizes, size_t nbBlocks,
133                        size_t requestedDictSize)
134 {
135     if (dictionaryName) {
136         DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
137         return createBuffer_fromFile(dictionaryName);  /* note : result might be kBuffNull */
138
139     } else {
140
141         DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
142                         (unsigned)requestedDictSize);
143         void* const dictBuffer = malloc(requestedDictSize);
144         CONTROL(dictBuffer != NULL);
145
146         assert(nbBlocks <= UINT_MAX);
147         size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
148                                                       srcBuffer,
149                                                       srcBlockSizes, (unsigned)nbBlocks);
150         CONTROL(!ZSTD_isError(dictSize));
151
152         buffer_t result;
153         result.ptr = dictBuffer;
154         result.capacity = requestedDictSize;
155         result.size = dictSize;
156         return result;
157     }
158 }
159
160 /*! BMK_loadFiles() :
161  *  Loads `buffer`, with content from files listed within `fileNamesTable`.
162  *  Fills `buffer` entirely.
163  * @return : 0 on success, !=0 on error */
164 static int loadFiles(void* buffer, size_t bufferSize,
165                      size_t* fileSizes,
166                      const char* const * fileNamesTable, unsigned nbFiles)
167 {
168     size_t pos = 0, totalSize = 0;
169
170     for (unsigned n=0; n<nbFiles; n++) {
171         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
172         if (UTIL_isDirectory(fileNamesTable[n])) {
173             fileSizes[n] = 0;
174             continue;
175         }
176         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
177             fileSizes[n] = 0;
178             continue;
179         }
180
181         FILE* const f = fopen(fileNamesTable[n], "rb");
182         assert(f!=NULL);
183
184         assert(pos <= bufferSize);
185         assert(fileSize <= bufferSize - pos);
186
187         {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
188             assert(readSize == fileSize);
189             pos += readSize;
190         }
191         fileSizes[n] = (size_t)fileSize;
192         totalSize += (size_t)fileSize;
193         fclose(f);
194     }
195
196     assert(totalSize == bufferSize);
197     return 0;
198 }
199
200
201
202 /*---  slice_collection_t  ---*/
203
204 typedef struct {
205     void** slicePtrs;
206     size_t* capacities;
207     size_t nbSlices;
208 } slice_collection_t;
209
210 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
211
212 static void freeSliceCollection(slice_collection_t collection)
213 {
214     free(collection.slicePtrs);
215     free(collection.capacities);
216 }
217
218 /* shrinkSizes() :
219  * downsizes sizes of slices within collection, according to `newSizes`.
220  * every `newSizes` entry must be <= than its corresponding collection size */
221 void shrinkSizes(slice_collection_t collection,
222                  const size_t* newSizes)  /* presumed same size as collection */
223 {
224     size_t const nbSlices = collection.nbSlices;
225     for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
226         assert(newSizes[blockNb] <= collection.capacities[blockNb]);
227         collection.capacities[blockNb] = newSizes[blockNb];
228     }
229 }
230
231
232 /* splitSlices() :
233  * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
234  *            otherwise, creates exactly nbSlices slices,
235  *            by either truncating input (when smaller)
236  *            or repeating input from beginning */
237 static slice_collection_t
238 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
239 {
240     if (blockSize==0) blockSize = (size_t)(-1);   /* means "do not cut" */
241     size_t nbSrcBlocks = 0;
242     for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
243         size_t pos = 0;
244         while (pos <= srcSlices.capacities[ssnb]) {
245             nbSrcBlocks++;
246             pos += blockSize;
247         }
248     }
249
250     if (nbSlices == 0) nbSlices = nbSrcBlocks;
251
252     void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
253     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
254     if (sliceTable == NULL || capacities == NULL) {
255         free(sliceTable);
256         free(capacities);
257         return kNullCollection;
258     }
259
260     size_t ssnb = 0;
261     for (size_t sliceNb=0; sliceNb < nbSlices; ) {
262         ssnb = (ssnb + 1) % srcSlices.nbSlices;
263         size_t pos = 0;
264         char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
265         while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
266             size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
267             sliceTable[sliceNb] = ptr + pos;
268             capacities[sliceNb] = size;
269             sliceNb++;
270             pos += blockSize;
271         }
272     }
273
274     slice_collection_t result;
275     result.nbSlices = nbSlices;
276     result.slicePtrs = sliceTable;
277     result.capacities = capacities;
278     return result;
279 }
280
281
282 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
283 {
284     size_t totalSize = 0;
285     for (size_t n=0; n<sc.nbSlices; n++)
286         totalSize += sc.capacities[n];
287     return totalSize;
288 }
289
290
291 /* ---  buffer collection  --- */
292
293 typedef struct {
294     buffer_t buffer;
295     slice_collection_t slices;
296 } buffer_collection_t;
297
298
299 static void freeBufferCollection(buffer_collection_t bc)
300 {
301     freeBuffer(bc.buffer);
302     freeSliceCollection(bc.slices);
303 }
304
305
306 static buffer_collection_t
307 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
308 {
309     size_t const bufferSize = sliceCollection_totalCapacity(sc);
310
311     buffer_t buffer = createBuffer(bufferSize);
312     CONTROL(buffer.ptr != NULL);
313
314     size_t const nbSlices = sc.nbSlices;
315     void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
316     CONTROL(slices != NULL);
317
318     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
319     CONTROL(capacities != NULL);
320
321     char* const ptr = (char*)buffer.ptr;
322     size_t pos = 0;
323     for (size_t n=0; n < nbSlices; n++) {
324         capacities[n] = sc.capacities[n];
325         slices[n] = ptr + pos;
326         pos += capacities[n];
327     }
328
329     buffer_collection_t result;
330     result.buffer = buffer;
331     result.slices.nbSlices = nbSlices;
332     result.slices.capacities = capacities;
333     result.slices.slicePtrs = slices;
334     return result;
335 }
336
337 static buffer_collection_t
338 createBufferCollection_fromSliceCollection(slice_collection_t sc)
339 {
340     size_t const bufferSize = sliceCollection_totalCapacity(sc);
341
342     buffer_t buffer = createBuffer(bufferSize);
343     CONTROL(buffer.ptr != NULL);
344
345     size_t const nbSlices = sc.nbSlices;
346     void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
347     CONTROL(slices != NULL);
348
349     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
350     CONTROL(capacities != NULL);
351
352     char* const ptr = (char*)buffer.ptr;
353     size_t pos = 0;
354     for (size_t n=0; n < nbSlices; n++) {
355         capacities[n] = sc.capacities[n];
356         slices[n] = ptr + pos;
357         pos += capacities[n];
358     }
359
360     for (size_t i = 0; i < nbSlices; i++) {
361         memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]);
362         capacities[i] = sc.capacities[i];
363     }
364
365     buffer_collection_t result;
366     result.buffer = buffer;
367     result.slices.nbSlices = nbSlices;
368     result.slices.capacities = capacities;
369     result.slices.slicePtrs = slices;
370
371     return result;
372 }
373
374 /* @return : kBuffNull if any error */
375 static buffer_collection_t
376 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
377 {
378     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
379     assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
380     assert(totalSizeToLoad <= BENCH_SIZE_MAX);
381     size_t const loadedSize = (size_t)totalSizeToLoad;
382     assert(loadedSize > 0);
383     void* const srcBuffer = malloc(loadedSize);
384     assert(srcBuffer != NULL);
385
386     assert(nbFiles > 0);
387     size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
388     assert(fileSizes != NULL);
389
390     /* Load input buffer */
391     int const errorCode = loadFiles(srcBuffer, loadedSize,
392                                     fileSizes,
393                                     fileNamesTable, nbFiles);
394     assert(errorCode == 0);
395
396     void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
397     assert(sliceTable != NULL);
398
399     char* const ptr = (char*)srcBuffer;
400     size_t pos = 0;
401     unsigned fileNb = 0;
402     for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
403         sliceTable[fileNb] = ptr + pos;
404         pos += fileSizes[fileNb];
405     }
406     assert(pos == loadedSize);
407     assert(fileNb == nbFiles);
408
409
410     buffer_t buffer;
411     buffer.ptr = srcBuffer;
412     buffer.capacity = loadedSize;
413     buffer.size = loadedSize;
414
415     slice_collection_t slices;
416     slices.slicePtrs = sliceTable;
417     slices.capacities = fileSizes;
418     slices.nbSlices = nbFiles;
419
420     buffer_collection_t bc;
421     bc.buffer = buffer;
422     bc.slices = slices;
423     return bc;
424 }
425
426
427
428
429 /*---  ddict_collection_t  ---*/
430
431 typedef struct {
432     ZSTD_DDict** ddicts;
433     size_t nbDDict;
434 } ddict_collection_t;
435
436 typedef struct {
437     ZSTD_CDict** cdicts;
438     size_t nbCDict;
439 } cdict_collection_t;
440
441 static const cdict_collection_t kNullCDictCollection = { NULL, 0 };
442
443 static void freeCDictCollection(cdict_collection_t cdictc)
444 {
445     for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) {
446         ZSTD_freeCDict(cdictc.cdicts[dictNb]);
447     }
448     free(cdictc.cdicts);
449 }
450
451 /* returns .buffers=NULL if operation fails */
452 static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams)
453 {
454     ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
455     if (cdicts==NULL) return kNullCDictCollection;
456     for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
457         cdicts[dictNb] = ZSTD_createCDict_advanced2(dictBuffer, dictSize, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem);
458         CONTROL(cdicts[dictNb] != NULL);
459     }
460     cdict_collection_t cdictc;
461     cdictc.cdicts = cdicts;
462     cdictc.nbCDict = nbCDict;
463     return cdictc;
464 }
465
466 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
467
468 static void freeDDictCollection(ddict_collection_t ddictc)
469 {
470     for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
471         ZSTD_freeDDict(ddictc.ddicts[dictNb]);
472     }
473     free(ddictc.ddicts);
474 }
475
476 /* returns .buffers=NULL if operation fails */
477 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
478 {
479     ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
480     assert(ddicts != NULL);
481     if (ddicts==NULL) return kNullDDictCollection;
482     for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
483         ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
484         assert(ddicts[dictNb] != NULL);
485     }
486     ddict_collection_t ddictc;
487     ddictc.ddicts = ddicts;
488     ddictc.nbDDict = nbDDict;
489     return ddictc;
490 }
491
492
493 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
494 void shuffleCDictionaries(cdict_collection_t dicts)
495 {
496     size_t const nbDicts = dicts.nbCDict;
497     for (size_t r=0; r<nbDicts; r++) {
498         size_t const d = (size_t)rand() % nbDicts;
499         ZSTD_CDict* tmpd = dicts.cdicts[d];
500         dicts.cdicts[d] = dicts.cdicts[r];
501         dicts.cdicts[r] = tmpd;
502     }
503     for (size_t r=0; r<nbDicts; r++) {
504         size_t const d1 = (size_t)rand() % nbDicts;
505         size_t const d2 = (size_t)rand() % nbDicts;
506         ZSTD_CDict* tmpd = dicts.cdicts[d1];
507         dicts.cdicts[d1] = dicts.cdicts[d2];
508         dicts.cdicts[d2] = tmpd;
509     }
510 }
511
512 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
513 void shuffleDDictionaries(ddict_collection_t dicts)
514 {
515     size_t const nbDicts = dicts.nbDDict;
516     for (size_t r=0; r<nbDicts; r++) {
517         size_t const d = (size_t)rand() % nbDicts;
518         ZSTD_DDict* tmpd = dicts.ddicts[d];
519         dicts.ddicts[d] = dicts.ddicts[r];
520         dicts.ddicts[r] = tmpd;
521     }
522     for (size_t r=0; r<nbDicts; r++) {
523         size_t const d1 = (size_t)rand() % nbDicts;
524         size_t const d2 = (size_t)rand() % nbDicts;
525         ZSTD_DDict* tmpd = dicts.ddicts[d1];
526         dicts.ddicts[d1] = dicts.ddicts[d2];
527         dicts.ddicts[d2] = tmpd;
528     }
529 }
530
531
532 /* ---   Compression  --- */
533
534 /* compressBlocks() :
535  * @return : total compressed size of all blocks,
536  *        or 0 if error.
537  */
538 static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If present, must contain at least nbBlocks fields */
539                              slice_collection_t dstBlockBuffers,
540                              slice_collection_t srcBlockBuffers,
541                              ZSTD_CDict* cdict, int cLevel)
542 {
543     size_t const nbBlocks = srcBlockBuffers.nbSlices;
544     assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
545
546     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
547     assert(cctx != NULL);
548
549     size_t totalCSize = 0;
550     for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
551         size_t cBlockSize;
552         if (cdict == NULL) {
553             cBlockSize = ZSTD_compressCCtx(cctx,
554                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
555                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
556                             cLevel);
557         } else {
558             cBlockSize = ZSTD_compress_usingCDict(cctx,
559                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
560                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
561                             cdict);
562         }
563         CONTROL(!ZSTD_isError(cBlockSize));
564         if (cSizes) cSizes[blockNb] = cBlockSize;
565         totalCSize += cBlockSize;
566     }
567     return totalCSize;
568 }
569
570
571 /* ---  Benchmark  --- */
572
573 typedef struct {
574     ZSTD_CCtx* cctx;
575     size_t nbDicts;
576     size_t dictNb;
577     cdict_collection_t dictionaries;
578 } compressInstructions;
579
580 compressInstructions createCompressInstructions(cdict_collection_t dictionaries, ZSTD_CCtx_params* cctxParams)
581 {
582     compressInstructions ci;
583     ci.cctx = ZSTD_createCCtx();
584     CONTROL(ci.cctx != NULL);
585     if (cctxParams)
586       ZSTD_CCtx_setParametersUsingCCtxParams(ci.cctx, cctxParams);
587     ci.nbDicts = dictionaries.nbCDict;
588     ci.dictNb = 0;
589     ci.dictionaries = dictionaries;
590     return ci;
591 }
592
593 void freeCompressInstructions(compressInstructions ci)
594 {
595     ZSTD_freeCCtx(ci.cctx);
596 }
597
598 typedef struct {
599     ZSTD_DCtx* dctx;
600     size_t nbDicts;
601     size_t dictNb;
602     ddict_collection_t dictionaries;
603 } decompressInstructions;
604
605 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
606 {
607     decompressInstructions di;
608     di.dctx = ZSTD_createDCtx();
609     assert(di.dctx != NULL);
610     di.nbDicts = dictionaries.nbDDict;
611     di.dictNb = 0;
612     di.dictionaries = dictionaries;
613     return di;
614 }
615
616 void freeDecompressInstructions(decompressInstructions di)
617 {
618     ZSTD_freeDCtx(di.dctx);
619 }
620
621 /* benched function */
622 size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
623 {
624     compressInstructions* const ci = (compressInstructions*) payload;
625     (void)dstCapacity;
626
627     ZSTD_CCtx_refCDict(ci->cctx, ci->dictionaries.cdicts[ci->dictNb]);
628     ZSTD_compress2(ci->cctx,
629             dst, srcSize,
630             src, srcSize);
631
632     ci->dictNb = ci->dictNb + 1;
633     if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0;
634
635     return srcSize;
636 }
637
638 /* benched function */
639 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
640 {
641     decompressInstructions* const di = (decompressInstructions*) payload;
642
643     size_t const result = ZSTD_decompress_usingDDict(di->dctx,
644                                         dst, dstCapacity,
645                                         src, srcSize,
646                                         di->dictionaries.ddicts[di->dictNb]);
647
648     di->dictNb = di->dictNb + 1;
649     if (di->dictNb >= di->nbDicts) di->dictNb = 0;
650
651     return result;
652 }
653
654 typedef enum {
655   fastest = 0,
656   median = 1,
657 } metricAggregatePref_e;
658
659 /* compareFunction() :
660  * Sort input in decreasing order when used with qsort() */
661 int compareFunction(const void *a, const void *b)
662 {
663   double x = *(const double *)a;
664   double y = *(const double *)b;
665   if (x < y)
666     return 1;
667   else if (x > y)
668     return -1;
669   return 0;
670 }
671
672 double aggregateData(double *data, size_t size,
673                      metricAggregatePref_e metricAggregatePref)
674 {
675   qsort(data, size, sizeof(*data), compareFunction);
676   if (metricAggregatePref == fastest)
677     return data[0];
678   else /* median */
679     return (data[(size - 1) / 2] + data[size / 2]) / 2;
680 }
681
682 static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks,
683                     ddict_collection_t ddictionaries,
684                     cdict_collection_t cdictionaries, unsigned nbRounds,
685                     int benchCompression, const char *exeName,
686                     ZSTD_CCtx_params *cctxParams,
687                     metricAggregatePref_e metricAggregatePref)
688 {
689     assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
690     if (benchCompression) assert(cctxParams);
691
692     unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
693     unsigned const total_time_ms = nbRounds * ms_per_round;
694
695     double *const speedPerRound = (double *)malloc(nbRounds * sizeof(double));
696
697     BMK_timedFnState_t* const benchState =
698             BMK_createTimedFnState(total_time_ms, ms_per_round);
699
700     decompressInstructions di = createDecompressInstructions(ddictionaries);
701     compressInstructions ci =
702         createCompressInstructions(cdictionaries, cctxParams);
703     void* payload = benchCompression ? (void*)&ci : (void*)&di;
704     BMK_benchParams_t const bp = {
705         .benchFn = benchCompression ? compress : decompress,
706         .benchPayload = payload,
707         .initFn = NULL,
708         .initPayload = NULL,
709         .errorFn = ZSTD_isError,
710         .blockCount = dstBlocks.nbSlices,
711         .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
712         .srcSizes = srcBlocks.capacities,
713         .dstBuffers = dstBlocks.slicePtrs,
714         .dstCapacities = dstBlocks.capacities,
715         .blockResults = NULL
716     };
717
718     size_t roundNb = 0;
719     for (;;) {
720         BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
721         CONTROL(BMK_isSuccessful_runOutcome(outcome));
722
723         BMK_runTime_t const result = BMK_extract_runTime(outcome);
724         double const dTime_ns = result.nanoSecPerRun;
725         double const dTime_sec = (double)dTime_ns / 1000000000;
726         size_t const srcSize = result.sumOfReturn;
727         double const speed_MBps = (double)srcSize / dTime_sec / (1 MB);
728         speedPerRound[roundNb] = speed_MBps;
729         if (benchCompression)
730             DISPLAY("Compression Speed : %.1f MB/s \r", speed_MBps);
731         else
732             DISPLAY("Decompression Speed : %.1f MB/s \r", speed_MBps);
733
734         fflush(stdout);
735         if (BMK_isCompleted_TimedFn(benchState)) break;
736         roundNb++;
737     }
738     DISPLAY("\n");
739     /* BMK_benchTimedFn may not run exactly nbRounds iterations */
740     double speedAggregated =
741         aggregateData(speedPerRound, roundNb + 1, metricAggregatePref);
742     if (metricAggregatePref == fastest)
743       DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated);
744     else
745       DISPLAY("Median Speed : %.1f MB/s \n", speedAggregated);
746
747     char* csvFileName = malloc(strlen(exeName) + 5);
748     strcpy(csvFileName, exeName);
749     strcat(csvFileName, ".csv");
750     FILE* csvFile = fopen(csvFileName, "r");
751     if (!csvFile) {
752         csvFile = fopen(csvFileName, "wt");
753         assert(csvFile);
754         fprintf(csvFile, "%s\n", exeName);
755         /* Print table headers */
756         fprintf(
757             csvFile,
758             "Compression/Decompression,Level,nbDicts,dictAttachPref,metricAggregatePref,Speed\n");
759     } else {
760         fclose(csvFile);
761         csvFile = fopen(csvFileName, "at");
762         assert(csvFile);
763     }
764
765     int cLevel = -1;
766     int dictAttachPref = -1;
767     if (benchCompression) {
768       ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_compressionLevel,
769                                    &cLevel);
770       ZSTD_CCtxParams_getParameter(cctxParams, ZSTD_c_forceAttachDict,
771                                    &dictAttachPref);
772     }
773     fprintf(csvFile, "%s,%d,%ld,%d,%d,%.1f\n",
774             benchCompression ? "Compression" : "Decompression", cLevel,
775             benchCompression ? ci.nbDicts : di.nbDicts, dictAttachPref,
776             metricAggregatePref, speedAggregated);
777     fclose(csvFile);
778     free(csvFileName);
779
780     freeDecompressInstructions(di);
781     freeCompressInstructions(ci);
782     BMK_freeTimedFnState(benchState);
783
784     return 0;   /* success */
785 }
786
787
788 /*! bench() :
789  *  fileName : file to load for benchmarking purpose
790  *  dictionary : optional (can be NULL), file to load as dictionary,
791  *              if none provided : will be calculated on the fly by the program.
792  * @return : 0 is success, 1+ otherwise */
793 int bench(const char **fileNameTable, unsigned nbFiles, const char *dictionary,
794           size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks,
795           unsigned nbRounds, int benchCompression,
796           ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params *cctxParams,
797           const char *exeName, metricAggregatePref_e metricAggregatePref)
798 {
799     int result = 0;
800
801     DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
802     buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
803     CONTROL(srcs.buffer.ptr != NULL);
804     buffer_t srcBuffer = srcs.buffer;
805     size_t const srcSize = srcBuffer.size;
806     DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
807                     (double)srcSize / (1 MB));
808
809     slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
810     nbBlocks = (unsigned)(srcSlices.nbSlices);
811     DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
812     if (blockSize)
813         DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
814     DISPLAYLEVEL(3, "\n");
815     size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
816
817
818     size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
819     CONTROL(dstCapacities != NULL);
820     size_t dstBufferCapacity = 0;
821     for (size_t bnb=0; bnb<nbBlocks; bnb++) {
822         dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
823         dstBufferCapacity += dstCapacities[bnb];
824     }
825
826     buffer_t dstBuffer = createBuffer(dstBufferCapacity);
827     CONTROL(dstBuffer.ptr != NULL);
828
829     void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
830     CONTROL(sliceTable != NULL);
831
832     {   char* const ptr = dstBuffer.ptr;
833         size_t pos = 0;
834         for (size_t snb=0; snb < nbBlocks; snb++) {
835             sliceTable[snb] = ptr + pos;
836             pos += dstCapacities[snb];
837     }   }
838
839     slice_collection_t dstSlices;
840     dstSlices.capacities = dstCapacities;
841     dstSlices.slicePtrs = sliceTable;
842     dstSlices.nbSlices = nbBlocks;
843
844
845     /* dictionary determination */
846     buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
847                                 srcs.buffer.ptr,
848                                 srcSlices.capacities, srcSlices.nbSlices,
849                                 DICTSIZE);
850     CONTROL(dictBuffer.ptr != NULL);
851
852     ZSTD_CDict* const cdict = ZSTD_createCDict_advanced2(dictBuffer.ptr, dictBuffer.size, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem);
853     CONTROL(cdict != NULL);
854
855     size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
856     CONTROL(cTotalSizeNoDict != 0);
857     DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f  (%u bytes) \n",
858                     clevel,
859                     (double)totalSrcSlicesSize / (double)cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
860
861     size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
862     CONTROL(cSizes != NULL);
863
864     size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
865     CONTROL(cTotalSize != 0);
866     DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f  (%u bytes) \n",
867                     (unsigned)dictBuffer.size,
868                     (double)totalSrcSlicesSize / (double)cTotalSize, (unsigned)cTotalSize);
869
870     /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
871     shrinkSizes(dstSlices, cSizes);
872
873     unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
874
875     cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, dictContentType, cctxParams);
876     CONTROL(cdictionaries.cdicts != NULL);
877
878     ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
879     CONTROL(ddictionaries.ddicts != NULL);
880
881     if (benchCompression) {
882         size_t const dictMem = ZSTD_sizeof_CDict(cdictionaries.cdicts[0]);
883         size_t const allDictMem = dictMem * nbDicts;
884         DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
885                         nbDicts, (double)allDictMem / (1 MB));
886
887         shuffleCDictionaries(cdictionaries);
888
889         buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices);
890         CONTROL(resultCollection.buffer.ptr != NULL);
891
892         result = benchMem(dstSlices, resultCollection.slices, ddictionaries,
893                           cdictionaries, nbRounds, benchCompression, exeName,
894                           cctxParams, metricAggregatePref);
895
896         freeBufferCollection(resultCollection);
897     } else {
898         size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, DICT_LOAD_METHOD);
899         size_t const allDictMem = dictMem * nbDicts;
900         DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
901                         nbDicts, (double)allDictMem / (1 MB));
902
903         shuffleDDictionaries(ddictionaries);
904
905         buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
906         CONTROL(resultCollection.buffer.ptr != NULL);
907
908         result = benchMem(resultCollection.slices, dstSlices, ddictionaries,
909                           cdictionaries, nbRounds, benchCompression, exeName,
910                           NULL, metricAggregatePref);
911
912         freeBufferCollection(resultCollection);
913     }
914
915     /* free all heap objects in reverse order */
916     freeCDictCollection(cdictionaries);
917     freeDDictCollection(ddictionaries);
918     free(cSizes);
919     ZSTD_freeCDict(cdict);
920     freeBuffer(dictBuffer);
921     freeSliceCollection(dstSlices);
922     freeBuffer(dstBuffer);
923     freeSliceCollection(srcSlices);
924     freeBufferCollection(srcs);
925
926     return result;
927 }
928
929
930
931 /* ---  Command Line  --- */
932
933 /*! readU32FromChar() :
934  * @return : unsigned integer value read from input in `char` format.
935  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
936  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
937  *  Note : function will exit() program if digit sequence overflows */
938 static unsigned readU32FromChar(const char** stringPtr)
939 {
940     unsigned result = 0;
941     while ((**stringPtr >='0') && (**stringPtr <='9')) {
942         unsigned const max = (((unsigned)(-1)) / 10) - 1;
943         assert(result <= max);   /* check overflow */
944         result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
945     }
946     if ((**stringPtr=='K') || (**stringPtr=='M')) {
947         unsigned const maxK = ((unsigned)(-1)) >> 10;
948         assert(result <= maxK);   /* check overflow */
949         result <<= 10;
950         if (**stringPtr=='M') {
951             assert(result <= maxK);   /* check overflow */
952             result <<= 10;
953         }
954         (*stringPtr)++;  /* skip `K` or `M` */
955         if (**stringPtr=='i') (*stringPtr)++;
956         if (**stringPtr=='B') (*stringPtr)++;
957     }
958     return result;
959 }
960
961 /** longCommandWArg() :
962  *  check if *stringPtr is the same as longCommand.
963  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
964  * @return 0 and doesn't modify *stringPtr otherwise.
965  */
966 static int longCommandWArg(const char** stringPtr, const char* longCommand)
967 {
968     size_t const comSize = strlen(longCommand);
969     int const result = !strncmp(*stringPtr, longCommand, comSize);
970     if (result) *stringPtr += comSize;
971     return result;
972 }
973
974
975 int usage(const char* exeName)
976 {
977     DISPLAY (" \n");
978     DISPLAY (" %s [Options] filename(s) \n", exeName);
979     DISPLAY (" \n");
980     DISPLAY ("Options : \n");
981     DISPLAY ("-z          : benchmark compression (default) \n");
982     DISPLAY ("-d          : benchmark decompression \n");
983     DISPLAY ("-r          : recursively load all files in subdirectories (default: off) \n");
984     DISPLAY ("-B#         : split input into blocks of size # (default: no split) \n");
985     DISPLAY ("-#          : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
986     DISPLAY ("-D #        : use # as a dictionary (default: create one) \n");
987     DISPLAY ("-i#         : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
988     DISPLAY ("-p#         : print speed for all rounds 0=fastest 1=median (default: 0) \n");
989     DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
990     DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
991     DISPLAY ("-h          : help (this text) \n");
992     DISPLAY (" \n");
993     DISPLAY ("Advanced Options (see zstd.h for documentation) : \n");
994     DISPLAY ("--dedicated-dict-search\n");
995     DISPLAY ("--dict-content-type=#\n");
996     DISPLAY ("--dict-attach-pref=#\n");
997     return 0;
998 }
999
1000 int bad_usage(const char* exeName)
1001 {
1002     DISPLAY (" bad usage : \n");
1003     usage(exeName);
1004     return 1;
1005 }
1006
1007 int main (int argc, const char** argv)
1008 {
1009     int recursiveMode = 0;
1010     int benchCompression = 1;
1011     int dedicatedDictSearch = 0;
1012     unsigned nbRounds = BENCH_TIME_DEFAULT_S;
1013     const char* const exeName = argv[0];
1014
1015     if (argc < 2) return bad_usage(exeName);
1016
1017     const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
1018     assert(nameTable != NULL);
1019     unsigned nameIdx = 0;
1020
1021     const char* dictionary = NULL;
1022     int cLevel = CLEVEL_DEFAULT;
1023     size_t blockSize = BLOCKSIZE_DEFAULT;
1024     unsigned nbDicts = 0;  /* determine nbDicts automatically: 1 dictionary per block */
1025     unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
1026     ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto;
1027     ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach;
1028     ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto;
1029     metricAggregatePref_e metricAggregatePref = fastest;
1030
1031     for (int argNb = 1; argNb < argc ; argNb++) {
1032         const char* argument = argv[argNb];
1033         if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
1034         if (!strcmp(argument, "-d")) { benchCompression = 0; continue; }
1035         if (!strcmp(argument, "-z")) { benchCompression = 1; continue; }
1036         if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
1037         if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
1038         if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
1039         if (longCommandWArg(&argument, "-p")) { metricAggregatePref = (int)readU32FromChar(&argument); continue;}
1040         if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
1041         if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
1042         if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
1043         if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
1044         if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
1045         if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
1046         if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
1047         if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; }
1048         if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; }
1049         if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; }
1050         if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
1051         /* anything that's not a command is a filename */
1052         nameTable[nameIdx++] = argument;
1053     }
1054
1055     FileNamesTable* filenameTable;
1056
1057     if (recursiveMode) {
1058 #ifndef UTIL_HAS_CREATEFILELIST
1059         assert(0);   /* missing capability, do not run */
1060 #endif
1061         filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
1062     } else {
1063         filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
1064         nameTable = NULL;  /* UTIL_createFileNamesTable() takes ownership of nameTable */
1065     }
1066
1067     ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
1068     ZSTD_CCtxParams_init(cctxParams, cLevel);
1069     ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch);
1070     ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0);
1071     ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref);
1072     ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables);
1073
1074     int result =
1075         bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize,
1076               dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds,
1077               benchCompression, dictContentType, cctxParams, exeName,
1078               metricAggregatePref);
1079
1080     UTIL_freeFileNamesTable(filenameTable);
1081     free(nameTable);
1082     ZSTD_freeCCtxParams(cctxParams);
1083
1084     return result;
1085 }