git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / programs / benchzstd.c
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10
11 /* **************************************
12  *  Tuning parameters
13  ****************************************/
14 #ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
15 #    define BMK_TIMETEST_DEFAULT_S 3
16 #endif
17
18 /* *************************************
19  *  Includes
20  ***************************************/
21 /* this must be included first */
22 #include "platform.h" /* Large Files support, compiler specifics */
23
24 /* then following system includes */
25 #include <assert.h> /* assert */
26 #include <errno.h>
27 #include <stdio.h>    /* fprintf, fopen */
28 #include <stdlib.h>   /* malloc, free */
29 #include <string.h>   /* memset, strerror */
30 #include "util.h"     /* UTIL_getFileSize, UTIL_sleep */
31 #include "../lib/common/mem.h"
32 #include "benchfn.h"
33 #include "timefn.h" /* UTIL_time_t */
34 #ifndef ZSTD_STATIC_LINKING_ONLY
35 #    define ZSTD_STATIC_LINKING_ONLY
36 #endif
37 #include "../lib/zstd.h"
38 #include "datagen.h" /* RDG_genBuffer */
39 #include "lorem.h"   /* LOREM_genBuffer */
40 #ifndef XXH_INLINE_ALL
41 #    define XXH_INLINE_ALL
42 #endif
43 #include "../lib/common/xxhash.h"
44 #include "../lib/zstd_errors.h"
45 #include "benchzstd.h"
46
47 /* *************************************
48  *  Constants
49  ***************************************/
50 #ifndef ZSTD_GIT_COMMIT
51 #    define ZSTD_GIT_COMMIT_STRING ""
52 #else
53 #    define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
54 #endif
55
56 #define TIMELOOP_MICROSEC (1 * 1000000ULL)             /* 1 second */
57 #define TIMELOOP_NANOSEC (1 * 1000000000ULL)           /* 1 second */
58 #define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */
59 #define COOLPERIOD_SEC 10
60
61 #define KB *(1 << 10)
62 #define MB *(1 << 20)
63 #define GB *(1U << 30)
64
65 #define BMK_RUNTEST_DEFAULT_MS 1000
66
67 static const size_t maxMemory = (sizeof(size_t) == 4)
68         ?
69         /* 32-bit */ (2 GB - 64 MB)
70         :
71         /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31));
72
73 /* *************************************
74  *  console display
75  ***************************************/
76 #define DISPLAY(...)                  \
77     {                                 \
78         fprintf(stderr, __VA_ARGS__); \
79         fflush(NULL);                 \
80     }
81 #define DISPLAYLEVEL(l, ...)  \
82     if (displayLevel >= l) {  \
83         DISPLAY(__VA_ARGS__); \
84     }
85 /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : +
86  * progression;   4 : + information */
87 #define OUTPUT(...)                   \
88     {                                 \
89         fprintf(stdout, __VA_ARGS__); \
90         fflush(NULL);                 \
91     }
92 #define OUTPUTLEVEL(l, ...)  \
93     if (displayLevel >= l) { \
94         OUTPUT(__VA_ARGS__); \
95     }
96
97 /* *************************************
98  *  Exceptions
99  ***************************************/
100 #ifndef DEBUG
101 #    define DEBUG 0
102 #endif
103 #define DEBUGOUTPUT(...)          \
104     {                             \
105         if (DEBUG)                \
106             DISPLAY(__VA_ARGS__); \
107     }
108
109 #define RETURN_ERROR_INT(errorNum, ...)                \
110     {                                                  \
111         DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
112         DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
113         DISPLAYLEVEL(1, __VA_ARGS__);                  \
114         DISPLAYLEVEL(1, " \n");                        \
115         return errorNum;                               \
116     }
117
118 #define CHECK_Z(zf)                                                  \
119     {                                                                \
120         size_t const zerr = zf;                                      \
121         if (ZSTD_isError(zerr)) {                                    \
122             DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);           \
123             DISPLAY("Error : ");                                     \
124             DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \
125             DISPLAY(" \n");                                          \
126             exit(1);                                                 \
127         }                                                            \
128     }
129
130 #define RETURN_ERROR(errorNum, retType, ...)           \
131     {                                                  \
132         retType r;                                     \
133         memset(&r, 0, sizeof(retType));                \
134         DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
135         DISPLAYLEVEL(1, "Error %i : ", errorNum);      \
136         DISPLAYLEVEL(1, __VA_ARGS__);                  \
137         DISPLAYLEVEL(1, " \n");                        \
138         r.tag = errorNum;                              \
139         return r;                                      \
140     }
141
142 /* replacement for snprintf(), which is not supported by C89
143  * sprintf() would be the supported one, but it's labelled unsafe,
144  * so some modern static analyzer will flag it as such, making it unusable.
145  * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
146 static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value)
147 {
148     size_t written = 0;
149     int i;
150     assert(value <= 100);
151
152     for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) {
153         if (formatString[i] != '%') {
154             buffer[written++] = formatString[i];
155             continue;
156         }
157
158         if (formatString[++i] == 'u') {
159             /* Handle single digit */
160             if (value < 10) {
161                 buffer[written++] = '0' + (char)value;
162             } else if (value < 100) {
163                 /* Handle two digits */
164                 if (written >= buffer_size - 2) {
165                     return -1; /* buffer overflow */
166                 }
167                 buffer[written++] = '0' + (char)(value / 10);
168                 buffer[written++] = '0' + (char)(value % 10);
169             } else { /* 100 */
170                 if (written >= buffer_size - 3) {
171                     return -1; /* buffer overflow */
172                 }
173                 buffer[written++] = '1';
174                 buffer[written++] = '0';
175                 buffer[written++] = '0';
176             }
177         } else if (formatString[i] == '%') { /* Check for escaped percent sign */
178             buffer[written++] = '%';
179         } else {
180             return -1; /* unsupported format */
181         }
182     }
183
184     if (written < buffer_size) {
185         buffer[written] = '\0';
186     } else {
187         buffer[0] = '\0'; /* Handle truncation */
188     }
189
190     return (int)written;
191 }
192
193 /* *************************************
194  *  Benchmark Parameters
195  ***************************************/
196
197 BMK_advancedParams_t BMK_initAdvancedParams(void)
198 {
199     BMK_advancedParams_t const res = {
200         BMK_both,               /* mode */
201         BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
202         0,                      /* blockSize */
203         0,               /* targetCBlockSize */
204         0,                      /* nbWorkers */
205         0,                      /* realTime */
206         0,                      /* additionalParam */
207         0,                      /* ldmFlag */
208         0,                      /* ldmMinMatch */
209         0,                      /* ldmHashLog */
210         0,                      /* ldmBuckSizeLog */
211         0,                      /* ldmHashRateLog */
212         ZSTD_ps_auto,           /* literalCompressionMode */
213         0                       /* useRowMatchFinder */
214     };
215     return res;
216 }
217
218 /* ********************************************************
219  *  Bench functions
220  **********************************************************/
221 typedef struct {
222     const void* srcPtr;
223     size_t srcSize;
224     void* cPtr;
225     size_t cRoom;
226     size_t cSize;
227     void* resPtr;
228     size_t resSize;
229 } blockParam_t;
230
231 #undef MIN
232 #undef MAX
233 #define MIN(a, b) ((a) < (b) ? (a) : (b))
234 #define MAX(a, b) ((a) > (b) ? (a) : (b))
235
236 static void BMK_initCCtx(
237         ZSTD_CCtx* ctx,
238         const void* dictBuffer,
239         size_t dictBufferSize,
240         int cLevel,
241         const ZSTD_compressionParameters* comprParams,
242         const BMK_advancedParams_t* adv)
243 {
244     ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
245     if (adv->nbWorkers == 1) {
246         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
247     } else {
248         CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
249     }
250     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
251     CHECK_Z(ZSTD_CCtx_setParameter(
252             ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
253     CHECK_Z(ZSTD_CCtx_setParameter(
254             ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
255     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
256     CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
257     CHECK_Z(ZSTD_CCtx_setParameter(
258             ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
259     CHECK_Z(ZSTD_CCtx_setParameter(
260             ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
261     CHECK_Z(ZSTD_CCtx_setParameter(
262             ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
263     CHECK_Z(ZSTD_CCtx_setParameter(
264             ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
265     CHECK_Z(ZSTD_CCtx_setParameter(
266             ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
267     CHECK_Z(ZSTD_CCtx_setParameter(
268             ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
269     CHECK_Z(ZSTD_CCtx_setParameter(
270             ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
271     CHECK_Z(ZSTD_CCtx_setParameter(
272             ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
273     CHECK_Z(ZSTD_CCtx_setParameter(
274             ctx,
275             ZSTD_c_literalCompressionMode,
276             (int)adv->literalCompressionMode));
277     CHECK_Z(ZSTD_CCtx_setParameter(
278             ctx, ZSTD_c_strategy, (int)comprParams->strategy));
279     CHECK_Z(ZSTD_CCtx_setParameter(
280             ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize));
281     CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
282 }
283
284 static void
285 BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize)
286 {
287     CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
288     CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
289 }
290
291 typedef struct {
292     ZSTD_CCtx* cctx;
293     const void* dictBuffer;
294     size_t dictBufferSize;
295     int cLevel;
296     const ZSTD_compressionParameters* comprParams;
297     const BMK_advancedParams_t* adv;
298 } BMK_initCCtxArgs;
299
300 static size_t local_initCCtx(void* payload)
301 {
302     BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
303     BMK_initCCtx(
304             ag->cctx,
305             ag->dictBuffer,
306             ag->dictBufferSize,
307             ag->cLevel,
308             ag->comprParams,
309             ag->adv);
310     return 0;
311 }
312
313 typedef struct {
314     ZSTD_DCtx* dctx;
315     const void* dictBuffer;
316     size_t dictBufferSize;
317 } BMK_initDCtxArgs;
318
319 static size_t local_initDCtx(void* payload)
320 {
321     BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
322     BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
323     return 0;
324 }
325
326 /* `addArgs` is the context */
327 static size_t local_defaultCompress(
328         const void* srcBuffer,
329         size_t srcSize,
330         void* dstBuffer,
331         size_t dstSize,
332         void* addArgs)
333 {
334     ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
335     return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
336 }
337
338 /* `addArgs` is the context */
339 static size_t local_defaultDecompress(
340         const void* srcBuffer,
341         size_t srcSize,
342         void* dstBuffer,
343         size_t dstCapacity,
344         void* addArgs)
345 {
346     size_t moreToFlush    = 1;
347     ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
348     ZSTD_inBuffer in;
349     ZSTD_outBuffer out;
350     in.src   = srcBuffer;
351     in.size  = srcSize;
352     in.pos   = 0;
353     out.dst  = dstBuffer;
354     out.size = dstCapacity;
355     out.pos  = 0;
356     while (moreToFlush) {
357         if (out.pos == out.size) {
358             return (size_t)-ZSTD_error_dstSize_tooSmall;
359         }
360         moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
361         if (ZSTD_isError(moreToFlush)) {
362             return moreToFlush;
363         }
364     }
365     return out.pos;
366 }
367
368 /* ================================================================= */
369 /*      Benchmark Zstandard, mem-to-mem scenarios                    */
370 /* ================================================================= */
371
372 int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
373 {
374     return outcome.tag == 0;
375 }
376
377 BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
378 {
379     assert(outcome.tag == 0);
380     return outcome.internal_never_use_directly;
381 }
382
383 static BMK_benchOutcome_t BMK_benchOutcome_error(void)
384 {
385     BMK_benchOutcome_t b;
386     memset(&b, 0, sizeof(b));
387     b.tag = 1;
388     return b;
389 }
390
391 static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(
392         BMK_benchResult_t result)
393 {
394     BMK_benchOutcome_t b;
395     b.tag                         = 0;
396     b.internal_never_use_directly = result;
397     return b;
398 }
399
400 /* benchMem with no allocation */
401 static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
402         const void** srcPtrs,
403         size_t* srcSizes,
404         void** cPtrs,
405         size_t* cCapacities,
406         size_t* cSizes,
407         void** resPtrs,
408         size_t* resSizes,
409         void** resultBufferPtr,
410         void* compressedBuffer,
411         size_t maxCompressedSize,
412         BMK_timedFnState_t* timeStateCompress,
413         BMK_timedFnState_t* timeStateDecompress,
414
415         const void* srcBuffer,
416         size_t srcSize,
417         const size_t* fileSizes,
418         unsigned nbFiles,
419         const int cLevel,
420         const ZSTD_compressionParameters* comprParams,
421         const void* dictBuffer,
422         size_t dictBufferSize,
423         ZSTD_CCtx* cctx,
424         ZSTD_DCtx* dctx,
425         int displayLevel,
426         const char* displayName,
427         const BMK_advancedParams_t* adv)
428 {
429     size_t const blockSize =
430             ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
431                      ? adv->blockSize
432                      : srcSize)
433             + (!srcSize); /* avoid div by 0 */
434     BMK_benchResult_t benchResult;
435     size_t const loadedCompressedSize = srcSize;
436     size_t cSize                      = 0;
437     double ratio                      = 0.;
438     U32 nbBlocks;
439
440     assert(cctx != NULL);
441     assert(dctx != NULL);
442
443     /* init */
444     memset(&benchResult, 0, sizeof(benchResult));
445     if (strlen(displayName) > 17)
446         displayName +=
447                 strlen(displayName) - 17; /* display last 17 characters */
448     if (adv->mode == BMK_decodeOnly) {
449         /* benchmark only decompression : source must be already compressed */
450         const char* srcPtr = (const char*)srcBuffer;
451         U64 totalDSize64   = 0;
452         U32 fileNb;
453         for (fileNb = 0; fileNb < nbFiles; fileNb++) {
454             U64 const fSize64 =
455                     ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
456             if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) {
457                 RETURN_ERROR(
458                         32,
459                         BMK_benchOutcome_t,
460                         "Decompressed size cannot be determined: cannot benchmark");
461             }
462             if (fSize64 == ZSTD_CONTENTSIZE_ERROR) {
463                 RETURN_ERROR(
464                         32,
465                         BMK_benchOutcome_t,
466                         "Error while trying to assess decompressed size: data may be invalid");
467             }
468             totalDSize64 += fSize64;
469             srcPtr += fileSizes[fileNb];
470         }
471         {
472             size_t const decodedSize = (size_t)totalDSize64;
473             assert((U64)decodedSize == totalDSize64); /* check overflow */
474             free(*resultBufferPtr);
475             if (totalDSize64 > decodedSize) { /* size_t overflow */
476                 RETURN_ERROR(
477                         32,
478                         BMK_benchOutcome_t,
479                         "decompressed size is too large for local system");
480             }
481             *resultBufferPtr = malloc(decodedSize);
482             if (!(*resultBufferPtr)) {
483                 RETURN_ERROR(
484                         33,
485                         BMK_benchOutcome_t,
486                         "allocation error: not enough memory");
487             }
488             cSize   = srcSize;
489             srcSize = decodedSize;
490             ratio   = (double)srcSize / (double)cSize;
491         }
492     }
493
494     /* Init data blocks  */
495     {
496         const char* srcPtr = (const char*)srcBuffer;
497         char* cPtr         = (char*)compressedBuffer;
498         char* resPtr       = (char*)(*resultBufferPtr);
499         U32 fileNb;
500         for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) {
501             size_t remaining              = fileSizes[fileNb];
502             U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly)
503                     ? 1
504                     : (U32)((remaining + (blockSize - 1)) / blockSize);
505             U32 const blockEnd            = nbBlocks + nbBlocksforThisFile;
506             for (; nbBlocks < blockEnd; nbBlocks++) {
507                 size_t const thisBlockSize = MIN(remaining, blockSize);
508                 srcPtrs[nbBlocks]          = srcPtr;
509                 srcSizes[nbBlocks]         = thisBlockSize;
510                 cPtrs[nbBlocks]            = cPtr;
511                 cCapacities[nbBlocks]      = (adv->mode == BMK_decodeOnly)
512                              ? thisBlockSize
513                              : ZSTD_compressBound(thisBlockSize);
514                 resPtrs[nbBlocks]          = resPtr;
515                 resSizes[nbBlocks]         = (adv->mode == BMK_decodeOnly)
516                                 ? (size_t)ZSTD_findDecompressedSize(
517                                 srcPtr, thisBlockSize)
518                                 : thisBlockSize;
519                 srcPtr += thisBlockSize;
520                 cPtr += cCapacities[nbBlocks];
521                 resPtr += thisBlockSize;
522                 remaining -= thisBlockSize;
523                 if (adv->mode == BMK_decodeOnly) {
524                     cSizes[nbBlocks]  = thisBlockSize;
525                     benchResult.cSize = thisBlockSize;
526                 }
527             }
528         }
529     }
530
531     /* warming up `compressedBuffer` */
532     if (adv->mode == BMK_decodeOnly) {
533         memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
534     } else {
535         RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
536     }
537
538     if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
539         OUTPUTLEVEL(
540                 2,
541                 "Warning : time measurements may be incorrect in multithreading mode... \n")
542     }
543
544     /* Bench */
545     {
546         U64 const crcOrig = (adv->mode == BMK_decodeOnly)
547                 ? 0
548                 : XXH64(srcBuffer, srcSize, 0);
549 #define NB_MARKS 4
550         const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
551         U32 markNb                  = 0;
552         int compressionCompleted    = (adv->mode == BMK_decodeOnly);
553         int decompressionCompleted  = (adv->mode == BMK_compressOnly);
554         BMK_benchParams_t cbp, dbp;
555         BMK_initCCtxArgs cctxprep;
556         BMK_initDCtxArgs dctxprep;
557
558         cbp.benchFn       = local_defaultCompress; /* ZSTD_compress2 */
559         cbp.benchPayload  = cctx;
560         cbp.initFn        = local_initCCtx; /* BMK_initCCtx */
561         cbp.initPayload   = &cctxprep;
562         cbp.errorFn       = ZSTD_isError;
563         cbp.blockCount    = nbBlocks;
564         cbp.srcBuffers    = srcPtrs;
565         cbp.srcSizes      = srcSizes;
566         cbp.dstBuffers    = cPtrs;
567         cbp.dstCapacities = cCapacities;
568         cbp.blockResults  = cSizes;
569
570         cctxprep.cctx           = cctx;
571         cctxprep.dictBuffer     = dictBuffer;
572         cctxprep.dictBufferSize = dictBufferSize;
573         cctxprep.cLevel         = cLevel;
574         cctxprep.comprParams    = comprParams;
575         cctxprep.adv            = adv;
576
577         dbp.benchFn       = local_defaultDecompress;
578         dbp.benchPayload  = dctx;
579         dbp.initFn        = local_initDCtx;
580         dbp.initPayload   = &dctxprep;
581         dbp.errorFn       = ZSTD_isError;
582         dbp.blockCount    = nbBlocks;
583         dbp.srcBuffers    = (const void* const*)cPtrs;
584         dbp.srcSizes      = cSizes;
585         dbp.dstBuffers    = resPtrs;
586         dbp.dstCapacities = resSizes;
587         dbp.blockResults  = NULL;
588
589         dctxprep.dctx           = dctx;
590         dctxprep.dictBuffer     = dictBuffer;
591         dctxprep.dictBufferSize = dictBufferSize;
592
593         OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
594         assert(srcSize < UINT_MAX);
595         OUTPUTLEVEL(
596                 2,
597                 "%2s-%-17.17s :%10u -> \r",
598                 marks[markNb],
599                 displayName,
600                 (unsigned)srcSize);
601
602         while (!(compressionCompleted && decompressionCompleted)) {
603             if (!compressionCompleted) {
604                 BMK_runOutcome_t const cOutcome =
605                         BMK_benchTimedFn(timeStateCompress, cbp);
606
607                 if (!BMK_isSuccessful_runOutcome(cOutcome)) {
608                     RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
609                 }
610
611                 {
612                     BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
613                     cSize                       = cResult.sumOfReturn;
614                     ratio = (double)srcSize / (double)cSize;
615                     {
616                         BMK_benchResult_t newResult;
617                         newResult.cSpeed =
618                                 (U64)((double)srcSize * TIMELOOP_NANOSEC
619                                       / cResult.nanoSecPerRun);
620                         benchResult.cSize = cSize;
621                         if (newResult.cSpeed > benchResult.cSpeed)
622                             benchResult.cSpeed = newResult.cSpeed;
623                     }
624                 }
625
626                 {
627                     int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
628                     assert(cSize < UINT_MAX);
629                     OUTPUTLEVEL(
630                             2,
631                             "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
632                             marks[markNb],
633                             displayName,
634                             (unsigned)srcSize,
635                             (unsigned)cSize,
636                             ratioAccuracy,
637                             ratio,
638                             benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
639                             (double)benchResult.cSpeed / MB_UNIT);
640                 }
641                 compressionCompleted =
642                         BMK_isCompleted_TimedFn(timeStateCompress);
643             }
644
645             if (!decompressionCompleted) {
646                 BMK_runOutcome_t const dOutcome =
647                         BMK_benchTimedFn(timeStateDecompress, dbp);
648
649                 if (!BMK_isSuccessful_runOutcome(dOutcome)) {
650                     RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
651                 }
652
653                 {
654                     BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
655                     U64 const newDSpeed =
656                             (U64)((double)srcSize * TIMELOOP_NANOSEC
657                                   / dResult.nanoSecPerRun);
658                     if (newDSpeed > benchResult.dSpeed)
659                         benchResult.dSpeed = newDSpeed;
660                 }
661
662                 {
663                     int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
664                     OUTPUTLEVEL(
665                             2,
666                             "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
667                             marks[markNb],
668                             displayName,
669                             (unsigned)srcSize,
670                             (unsigned)cSize,
671                             ratioAccuracy,
672                             ratio,
673                             benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
674                             (double)benchResult.cSpeed / MB_UNIT,
675                             (double)benchResult.dSpeed / MB_UNIT);
676                 }
677                 decompressionCompleted =
678                         BMK_isCompleted_TimedFn(timeStateDecompress);
679             }
680             markNb = (markNb + 1) % NB_MARKS;
681         } /* while (!(compressionCompleted && decompressionCompleted)) */
682
683         /* CRC Checking */
684         {
685             const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
686             U64 const crcCheck       = XXH64(resultBuffer, srcSize, 0);
687             if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) {
688                 size_t u;
689                 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n",
690                         displayName,
691                         (unsigned)crcOrig,
692                         (unsigned)crcCheck);
693                 for (u = 0; u < srcSize; u++) {
694                     if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
695                         unsigned segNb, bNb, pos;
696                         size_t bacc = 0;
697                         DISPLAY("Decoding error at pos %u ", (unsigned)u);
698                         for (segNb = 0; segNb < nbBlocks; segNb++) {
699                             if (bacc + srcSizes[segNb] > u)
700                                 break;
701                             bacc += srcSizes[segNb];
702                         }
703                         pos = (U32)(u - bacc);
704                         bNb = pos / (128 KB);
705                         DISPLAY("(sample %u, block %u, pos %u) \n",
706                                 segNb,
707                                 bNb,
708                                 pos);
709                         {
710                             size_t const lowest = (u > 5) ? 5 : u;
711                             size_t n;
712                             DISPLAY("origin: ");
713                             for (n = lowest; n > 0; n--)
714                                 DISPLAY("%02X ",
715                                         ((const BYTE*)srcBuffer)[u - n]);
716                             DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
717                             for (n = 1; n < 3; n++)
718                                 DISPLAY("%02X ",
719                                         ((const BYTE*)srcBuffer)[u + n]);
720                             DISPLAY(" \n");
721                             DISPLAY("decode: ");
722                             for (n = lowest; n > 0; n--)
723                                 DISPLAY("%02X ", resultBuffer[u - n]);
724                             DISPLAY(" :%02X:  ", resultBuffer[u]);
725                             for (n = 1; n < 3; n++)
726                                 DISPLAY("%02X ", resultBuffer[u + n]);
727                             DISPLAY(" \n");
728                         }
729                         break;
730                     }
731                     if (u == srcSize - 1) { /* should never happen */
732                         DISPLAY("no difference detected\n");
733                     }
734                 } /* for (u=0; u<srcSize; u++) */
735             }     /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
736         }         /* CRC Checking */
737
738         if (displayLevel
739             == 1) { /* hidden display mode -q, used by python speed benchmark */
740             double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
741             double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
742             if (adv->additionalParam) {
743                 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n",
744                        cLevel,
745                        (int)cSize,
746                        ratio,
747                        cSpeed,
748                        dSpeed,
749                        displayName,
750                        adv->additionalParam);
751             } else {
752                 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n",
753                        cLevel,
754                        (int)cSize,
755                        ratio,
756                        cSpeed,
757                        dSpeed,
758                        displayName);
759             }
760         }
761
762         OUTPUTLEVEL(2, "%2i#\n", cLevel);
763     } /* Bench */
764
765     benchResult.cMem =
766             (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
767     return BMK_benchOutcome_setValidResult(benchResult);
768 }
769
770 BMK_benchOutcome_t BMK_benchMemAdvanced(
771         const void* srcBuffer,
772         size_t srcSize,
773         void* dstBuffer,
774         size_t dstCapacity,
775         const size_t* fileSizes,
776         unsigned nbFiles,
777         int cLevel,
778         const ZSTD_compressionParameters* comprParams,
779         const void* dictBuffer,
780         size_t dictBufferSize,
781         int displayLevel,
782         const char* displayName,
783         const BMK_advancedParams_t* adv)
784
785 {
786     int const dstParamsError =
787             !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
788
789     size_t const blockSize =
790             ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
791                      ? adv->blockSize
792                      : srcSize)
793             + (!srcSize) /* avoid div by 0 */;
794     U32 const maxNbBlocks =
795             (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles;
796
797     /* these are the blockTable parameters, just split up */
798     const void** const srcPtrs =
799             (const void**)malloc(maxNbBlocks * sizeof(void*));
800     size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
801
802     void** const cPtrs        = (void**)malloc(maxNbBlocks * sizeof(void*));
803     size_t* const cSizes      = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
804     size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
805
806     void** const resPtrs   = (void**)malloc(maxNbBlocks * sizeof(void*));
807     size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
808
809     BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(
810             adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
811     BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(
812             adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
813
814     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
815     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
816
817     const size_t maxCompressedSize = dstCapacity
818             ? dstCapacity
819             : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
820
821     void* const internalDstBuffer =
822             dstBuffer ? NULL : malloc(maxCompressedSize);
823     void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
824
825     BMK_benchOutcome_t outcome =
826             BMK_benchOutcome_error(); /* error by default */
827
828     void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
829
830     int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes
831             || !cCapacities || !resPtrs || !resSizes || !timeStateCompress
832             || !timeStateDecompress || !cctx || !dctx || !compressedBuffer
833             || !resultBuffer;
834
835     if (!allocationincomplete && !dstParamsError) {
836         outcome = BMK_benchMemAdvancedNoAlloc(
837                 srcPtrs,
838                 srcSizes,
839                 cPtrs,
840                 cCapacities,
841                 cSizes,
842                 resPtrs,
843                 resSizes,
844                 &resultBuffer,
845                 compressedBuffer,
846                 maxCompressedSize,
847                 timeStateCompress,
848                 timeStateDecompress,
849                 srcBuffer,
850                 srcSize,
851                 fileSizes,
852                 nbFiles,
853                 cLevel,
854                 comprParams,
855                 dictBuffer,
856                 dictBufferSize,
857                 cctx,
858                 dctx,
859                 displayLevel,
860                 displayName,
861                 adv);
862     }
863
864     /* clean up */
865     BMK_freeTimedFnState(timeStateCompress);
866     BMK_freeTimedFnState(timeStateDecompress);
867
868     ZSTD_freeCCtx(cctx);
869     ZSTD_freeDCtx(dctx);
870
871     free(internalDstBuffer);
872     free(resultBuffer);
873
874     free((void*)srcPtrs);
875     free(srcSizes);
876     free(cPtrs);
877     free(cSizes);
878     free(cCapacities);
879     free(resPtrs);
880     free(resSizes);
881
882     if (allocationincomplete) {
883         RETURN_ERROR(
884                 31, BMK_benchOutcome_t, "allocation error : not enough memory");
885     }
886
887     if (dstParamsError) {
888         RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
889     }
890     return outcome;
891 }
892
893 BMK_benchOutcome_t BMK_benchMem(
894         const void* srcBuffer,
895         size_t srcSize,
896         const size_t* fileSizes,
897         unsigned nbFiles,
898         int cLevel,
899         const ZSTD_compressionParameters* comprParams,
900         const void* dictBuffer,
901         size_t dictBufferSize,
902         int displayLevel,
903         const char* displayName)
904 {
905     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
906     return BMK_benchMemAdvanced(
907             srcBuffer,
908             srcSize,
909             NULL,
910             0,
911             fileSizes,
912             nbFiles,
913             cLevel,
914             comprParams,
915             dictBuffer,
916             dictBufferSize,
917             displayLevel,
918             displayName,
919             &adv);
920 }
921
922 static BMK_benchOutcome_t BMK_benchCLevel(
923         const void* srcBuffer,
924         size_t benchedSize,
925         const size_t* fileSizes,
926         unsigned nbFiles,
927         int cLevel,
928         const ZSTD_compressionParameters* comprParams,
929         const void* dictBuffer,
930         size_t dictBufferSize,
931         int displayLevel,
932         const char* displayName,
933         BMK_advancedParams_t const* const adv)
934 {
935     const char* pch = strrchr(displayName, '\\'); /* Windows */
936     if (!pch)
937         pch = strrchr(displayName, '/'); /* Linux */
938     if (pch)
939         displayName = pch + 1;
940
941     if (adv->realTime) {
942         DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
943         SET_REALTIME_PRIORITY;
944     }
945
946     if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
947         OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
948                ZSTD_VERSION_STRING,
949                ZSTD_GIT_COMMIT_STRING,
950                (unsigned)benchedSize,
951                adv->nbSeconds,
952                (unsigned)(adv->blockSize >> 10));
953
954     return BMK_benchMemAdvanced(
955             srcBuffer,
956             benchedSize,
957             NULL,
958             0,
959             fileSizes,
960             nbFiles,
961             cLevel,
962             comprParams,
963             dictBuffer,
964             dictBufferSize,
965             displayLevel,
966             displayName,
967             adv);
968 }
969
970 int BMK_syntheticTest(
971         int cLevel,
972         double compressibility,
973         const ZSTD_compressionParameters* compressionParams,
974         int displayLevel,
975         const BMK_advancedParams_t* adv)
976 {
977     char nameBuff[20]        = { 0 };
978     const char* name         = nameBuff;
979     size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000;
980     void* srcBuffer;
981     BMK_benchOutcome_t res;
982
983     if (cLevel > ZSTD_maxCLevel()) {
984         DISPLAYLEVEL(1, "Invalid Compression Level");
985         return 15;
986     }
987
988     /* Memory allocation */
989     srcBuffer = malloc(benchedSize);
990     if (!srcBuffer) {
991         DISPLAYLEVEL(1, "allocation error : not enough memory");
992         return 16;
993     }
994
995     /* Fill input buffer */
996     if (compressibility < 0.0) {
997         LOREM_genBuffer(srcBuffer, benchedSize, 0);
998         name = "Lorem ipsum";
999     } else {
1000         RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
1001         formatString_u(
1002                 nameBuff,
1003                 sizeof(nameBuff),
1004                 "Synthetic %u%%",
1005                 (unsigned)(compressibility * 100));
1006     }
1007
1008     /* Bench */
1009     res = BMK_benchCLevel(
1010             srcBuffer,
1011             benchedSize,
1012             &benchedSize /* ? */,
1013             1 /* ? */,
1014             cLevel,
1015             compressionParams,
1016             NULL,
1017             0, /* dictionary */
1018             displayLevel,
1019             name,
1020             adv);
1021
1022     /* clean up */
1023     free(srcBuffer);
1024
1025     return !BMK_isSuccessful_benchOutcome(res);
1026 }
1027
1028 static size_t BMK_findMaxMem(U64 requiredMem)
1029 {
1030     size_t const step = 64 MB;
1031     BYTE* testmem     = NULL;
1032
1033     requiredMem = (((requiredMem >> 26) + 1) << 26);
1034     requiredMem += step;
1035     if (requiredMem > maxMemory)
1036         requiredMem = maxMemory;
1037
1038     do {
1039         testmem = (BYTE*)malloc((size_t)requiredMem);
1040         requiredMem -= step;
1041     } while (!testmem && requiredMem > 0);
1042
1043     free(testmem);
1044     return (size_t)(requiredMem);
1045 }
1046
1047 /*! BMK_loadFiles() :
1048  *  Loads `buffer` with content of files listed within `fileNamesTable`.
1049  *  At most, fills `buffer` entirely. */
1050 static int BMK_loadFiles(
1051         void* buffer,
1052         size_t bufferSize,
1053         size_t* fileSizes,
1054         const char* const* fileNamesTable,
1055         unsigned nbFiles,
1056         int displayLevel)
1057 {
1058     size_t pos = 0, totalSize = 0;
1059     unsigned n;
1060     for (n = 0; n < nbFiles; n++) {
1061         U64 fileSize = UTIL_getFileSize(
1062                 fileNamesTable[n]); /* last file may be shortened */
1063         if (UTIL_isDirectory(fileNamesTable[n])) {
1064             DISPLAYLEVEL(
1065                     2, "Ignoring %s directory...       \n", fileNamesTable[n]);
1066             fileSizes[n] = 0;
1067             continue;
1068         }
1069         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1070             DISPLAYLEVEL(
1071                     2,
1072                     "Cannot evaluate size of %s, ignoring ... \n",
1073                     fileNamesTable[n]);
1074             fileSizes[n] = 0;
1075             continue;
1076         }
1077         {
1078             FILE* const f = fopen(fileNamesTable[n], "rb");
1079             if (f == NULL)
1080                 RETURN_ERROR_INT(
1081                         10, "impossible to open file %s", fileNamesTable[n]);
1082             OUTPUTLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
1083             if (fileSize > bufferSize - pos)
1084                 fileSize = bufferSize - pos,
1085                 nbFiles  = n; /* buffer too small - stop after this file */
1086             {
1087                 size_t const readSize =
1088                         fread(((char*)buffer) + pos, 1, (size_t)fileSize, f);
1089                 if (readSize != (size_t)fileSize)
1090                     RETURN_ERROR_INT(
1091                             11, "could not read %s", fileNamesTable[n]);
1092                 pos += readSize;
1093             }
1094             fileSizes[n] = (size_t)fileSize;
1095             totalSize += (size_t)fileSize;
1096             fclose(f);
1097         }
1098     }
1099
1100     if (totalSize == 0)
1101         RETURN_ERROR_INT(12, "no data to bench");
1102     return 0;
1103 }
1104
1105 int BMK_benchFilesAdvanced(
1106         const char* const* fileNamesTable,
1107         unsigned nbFiles,
1108         const char* dictFileName,
1109         int cLevel,
1110         const ZSTD_compressionParameters* compressionParams,
1111         int displayLevel,
1112         const BMK_advancedParams_t* adv)
1113 {
1114     void* srcBuffer = NULL;
1115     size_t benchedSize;
1116     void* dictBuffer      = NULL;
1117     size_t dictBufferSize = 0;
1118     size_t* fileSizes     = NULL;
1119     BMK_benchOutcome_t res;
1120     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
1121
1122     if (!nbFiles) {
1123         DISPLAYLEVEL(1, "No Files to Benchmark");
1124         return 13;
1125     }
1126
1127     if (cLevel > ZSTD_maxCLevel()) {
1128         DISPLAYLEVEL(1, "Invalid Compression Level");
1129         return 14;
1130     }
1131
1132     if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
1133         DISPLAYLEVEL(1, "Error loading files");
1134         return 15;
1135     }
1136
1137     fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
1138     if (!fileSizes) {
1139         DISPLAYLEVEL(1, "not enough memory for fileSizes");
1140         return 16;
1141     }
1142
1143     /* Load dictionary */
1144     if (dictFileName != NULL) {
1145         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1146         if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
1147             DISPLAYLEVEL(
1148                     1,
1149                     "error loading %s : %s \n",
1150                     dictFileName,
1151                     strerror(errno));
1152             free(fileSizes);
1153             DISPLAYLEVEL(1, "benchmark aborted");
1154             return 17;
1155         }
1156         if (dictFileSize > 64 MB) {
1157             free(fileSizes);
1158             DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
1159             return 18;
1160         }
1161         dictBufferSize = (size_t)dictFileSize;
1162         dictBuffer     = malloc(dictBufferSize);
1163         if (dictBuffer == NULL) {
1164             free(fileSizes);
1165             DISPLAYLEVEL(
1166                     1,
1167                     "not enough memory for dictionary (%u bytes)",
1168                     (unsigned)dictBufferSize);
1169             return 19;
1170         }
1171
1172         {
1173             int const errorCode = BMK_loadFiles(
1174                     dictBuffer,
1175                     dictBufferSize,
1176                     fileSizes,
1177                     &dictFileName /*?*/,
1178                     1 /*?*/,
1179                     displayLevel);
1180             if (errorCode) {
1181                 res = BMK_benchOutcome_error();
1182                 goto _cleanUp;
1183             }
1184         }
1185     }
1186
1187     /* Memory allocation & restrictions */
1188     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
1189     if ((U64)benchedSize > totalSizeToLoad)
1190         benchedSize = (size_t)totalSizeToLoad;
1191     if (benchedSize < totalSizeToLoad)
1192         DISPLAY("Not enough memory; testing %u MB only...\n",
1193                 (unsigned)(benchedSize >> 20));
1194
1195     srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
1196     if (!srcBuffer) {
1197         free(dictBuffer);
1198         free(fileSizes);
1199         DISPLAYLEVEL(1, "not enough memory for srcBuffer");
1200         return 20;
1201     }
1202
1203     /* Load input buffer */
1204     {
1205         int const errorCode = BMK_loadFiles(
1206                 srcBuffer,
1207                 benchedSize,
1208                 fileSizes,
1209                 fileNamesTable,
1210                 nbFiles,
1211                 displayLevel);
1212         if (errorCode) {
1213             res = BMK_benchOutcome_error();
1214             goto _cleanUp;
1215         }
1216     }
1217
1218     /* Bench */
1219     {
1220         char mfName[20] = { 0 };
1221         formatString_u(mfName, sizeof(mfName), " %u files", nbFiles);
1222         {
1223             const char* const displayName =
1224                     (nbFiles > 1) ? mfName : fileNamesTable[0];
1225             res = BMK_benchCLevel(
1226                     srcBuffer,
1227                     benchedSize,
1228                     fileSizes,
1229                     nbFiles,
1230                     cLevel,
1231                     compressionParams,
1232                     dictBuffer,
1233                     dictBufferSize,
1234                     displayLevel,
1235                     displayName,
1236                     adv);
1237         }
1238     }
1239
1240 _cleanUp:
1241     free(srcBuffer);
1242     free(dictBuffer);
1243     free(fileSizes);
1244     return !BMK_isSuccessful_benchOutcome(res);
1245 }
1246
1247 int BMK_benchFiles(
1248         const char* const* fileNamesTable,
1249         unsigned nbFiles,
1250         const char* dictFileName,
1251         int cLevel,
1252         const ZSTD_compressionParameters* compressionParams,
1253         int displayLevel)
1254 {
1255     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
1256     return BMK_benchFilesAdvanced(
1257             fileNamesTable,
1258             nbFiles,
1259             dictFileName,
1260             cLevel,
1261             compressionParams,
1262             displayLevel,
1263             &adv);
1264 }