git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / programs / benchzstd.c
CommitLineData
f535537f 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11/* **************************************
12 * Tuning parameters
13 ****************************************/
14#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
15# define BMK_TIMETEST_DEFAULT_S 3
16#endif
17
18/* *************************************
19 * Includes
20 ***************************************/
21/* this must be included first */
22#include "platform.h" /* Large Files support, compiler specifics */
23
24/* then following system includes */
25#include <assert.h> /* assert */
26#include <errno.h>
27#include <stdio.h> /* fprintf, fopen */
28#include <stdlib.h> /* malloc, free */
29#include <string.h> /* memset, strerror */
30#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
31#include "../lib/common/mem.h"
32#include "benchfn.h"
33#include "timefn.h" /* UTIL_time_t */
34#ifndef ZSTD_STATIC_LINKING_ONLY
35# define ZSTD_STATIC_LINKING_ONLY
36#endif
37#include "../lib/zstd.h"
38#include "datagen.h" /* RDG_genBuffer */
39#include "lorem.h" /* LOREM_genBuffer */
40#ifndef XXH_INLINE_ALL
41# define XXH_INLINE_ALL
42#endif
43#include "../lib/common/xxhash.h"
44#include "../lib/zstd_errors.h"
45#include "benchzstd.h"
46
47/* *************************************
48 * Constants
49 ***************************************/
50#ifndef ZSTD_GIT_COMMIT
51# define ZSTD_GIT_COMMIT_STRING ""
52#else
53# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
54#endif
55
56#define TIMELOOP_MICROSEC (1 * 1000000ULL) /* 1 second */
57#define TIMELOOP_NANOSEC (1 * 1000000000ULL) /* 1 second */
58#define ACTIVEPERIOD_MICROSEC (70 * TIMELOOP_MICROSEC) /* 70 seconds */
59#define COOLPERIOD_SEC 10
60
61#define KB *(1 << 10)
62#define MB *(1 << 20)
63#define GB *(1U << 30)
64
65#define BMK_RUNTEST_DEFAULT_MS 1000
66
67static const size_t maxMemory = (sizeof(size_t) == 4)
68 ?
69 /* 32-bit */ (2 GB - 64 MB)
70 :
71 /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t) * 8) - 31));
72
73/* *************************************
74 * console display
75 ***************************************/
76#define DISPLAY(...) \
77 { \
78 fprintf(stderr, __VA_ARGS__); \
79 fflush(NULL); \
80 }
81#define DISPLAYLEVEL(l, ...) \
82 if (displayLevel >= l) { \
83 DISPLAY(__VA_ARGS__); \
84 }
85/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : +
86 * progression; 4 : + information */
87#define OUTPUT(...) \
88 { \
89 fprintf(stdout, __VA_ARGS__); \
90 fflush(NULL); \
91 }
92#define OUTPUTLEVEL(l, ...) \
93 if (displayLevel >= l) { \
94 OUTPUT(__VA_ARGS__); \
95 }
96
97/* *************************************
98 * Exceptions
99 ***************************************/
100#ifndef DEBUG
101# define DEBUG 0
102#endif
103#define DEBUGOUTPUT(...) \
104 { \
105 if (DEBUG) \
106 DISPLAY(__VA_ARGS__); \
107 }
108
109#define RETURN_ERROR_INT(errorNum, ...) \
110 { \
111 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
112 DISPLAYLEVEL(1, "Error %i : ", errorNum); \
113 DISPLAYLEVEL(1, __VA_ARGS__); \
114 DISPLAYLEVEL(1, " \n"); \
115 return errorNum; \
116 }
117
118#define CHECK_Z(zf) \
119 { \
120 size_t const zerr = zf; \
121 if (ZSTD_isError(zerr)) { \
122 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
123 DISPLAY("Error : "); \
124 DISPLAY("%s failed : %s", #zf, ZSTD_getErrorName(zerr)); \
125 DISPLAY(" \n"); \
126 exit(1); \
127 } \
128 }
129
130#define RETURN_ERROR(errorNum, retType, ...) \
131 { \
132 retType r; \
133 memset(&r, 0, sizeof(retType)); \
134 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
135 DISPLAYLEVEL(1, "Error %i : ", errorNum); \
136 DISPLAYLEVEL(1, __VA_ARGS__); \
137 DISPLAYLEVEL(1, " \n"); \
138 r.tag = errorNum; \
139 return r; \
140 }
141
142/* replacement for snprintf(), which is not supported by C89
143 * sprintf() would be the supported one, but it's labelled unsafe,
144 * so some modern static analyzer will flag it as such, making it unusable.
145 * formatString_u() replaces snprintf() for the specific case where there are only %u arguments */
146static int formatString_u(char* buffer, size_t buffer_size, const char* formatString, unsigned int value)
147{
148 size_t written = 0;
149 int i;
150 assert(value <= 100);
151
152 for (i = 0; formatString[i] != '\0' && written < buffer_size - 1; ++i) {
153 if (formatString[i] != '%') {
154 buffer[written++] = formatString[i];
155 continue;
156 }
157
158 if (formatString[++i] == 'u') {
159 /* Handle single digit */
160 if (value < 10) {
161 buffer[written++] = '0' + (char)value;
162 } else if (value < 100) {
163 /* Handle two digits */
164 if (written >= buffer_size - 2) {
165 return -1; /* buffer overflow */
166 }
167 buffer[written++] = '0' + (char)(value / 10);
168 buffer[written++] = '0' + (char)(value % 10);
169 } else { /* 100 */
170 if (written >= buffer_size - 3) {
171 return -1; /* buffer overflow */
172 }
173 buffer[written++] = '1';
174 buffer[written++] = '0';
175 buffer[written++] = '0';
176 }
177 } else if (formatString[i] == '%') { /* Check for escaped percent sign */
178 buffer[written++] = '%';
179 } else {
180 return -1; /* unsupported format */
181 }
182 }
183
184 if (written < buffer_size) {
185 buffer[written] = '\0';
186 } else {
187 buffer[0] = '\0'; /* Handle truncation */
188 }
189
190 return (int)written;
191}
192
193/* *************************************
194 * Benchmark Parameters
195 ***************************************/
196
197BMK_advancedParams_t BMK_initAdvancedParams(void)
198{
199 BMK_advancedParams_t const res = {
200 BMK_both, /* mode */
201 BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
202 0, /* blockSize */
203 0, /* targetCBlockSize */
204 0, /* nbWorkers */
205 0, /* realTime */
206 0, /* additionalParam */
207 0, /* ldmFlag */
208 0, /* ldmMinMatch */
209 0, /* ldmHashLog */
210 0, /* ldmBuckSizeLog */
211 0, /* ldmHashRateLog */
212 ZSTD_ps_auto, /* literalCompressionMode */
213 0 /* useRowMatchFinder */
214 };
215 return res;
216}
217
218/* ********************************************************
219 * Bench functions
220 **********************************************************/
221typedef struct {
222 const void* srcPtr;
223 size_t srcSize;
224 void* cPtr;
225 size_t cRoom;
226 size_t cSize;
227 void* resPtr;
228 size_t resSize;
229} blockParam_t;
230
231#undef MIN
232#undef MAX
233#define MIN(a, b) ((a) < (b) ? (a) : (b))
234#define MAX(a, b) ((a) > (b) ? (a) : (b))
235
236static void BMK_initCCtx(
237 ZSTD_CCtx* ctx,
238 const void* dictBuffer,
239 size_t dictBufferSize,
240 int cLevel,
241 const ZSTD_compressionParameters* comprParams,
242 const BMK_advancedParams_t* adv)
243{
244 ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
245 if (adv->nbWorkers == 1) {
246 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
247 } else {
248 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
249 }
250 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
251 CHECK_Z(ZSTD_CCtx_setParameter(
252 ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
253 CHECK_Z(ZSTD_CCtx_setParameter(
254 ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
255 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
256 CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
257 CHECK_Z(ZSTD_CCtx_setParameter(
258 ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
259 CHECK_Z(ZSTD_CCtx_setParameter(
260 ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
261 CHECK_Z(ZSTD_CCtx_setParameter(
262 ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
263 CHECK_Z(ZSTD_CCtx_setParameter(
264 ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
265 CHECK_Z(ZSTD_CCtx_setParameter(
266 ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
267 CHECK_Z(ZSTD_CCtx_setParameter(
268 ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
269 CHECK_Z(ZSTD_CCtx_setParameter(
270 ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
271 CHECK_Z(ZSTD_CCtx_setParameter(
272 ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
273 CHECK_Z(ZSTD_CCtx_setParameter(
274 ctx,
275 ZSTD_c_literalCompressionMode,
276 (int)adv->literalCompressionMode));
277 CHECK_Z(ZSTD_CCtx_setParameter(
278 ctx, ZSTD_c_strategy, (int)comprParams->strategy));
279 CHECK_Z(ZSTD_CCtx_setParameter(
280 ctx, ZSTD_c_targetCBlockSize, (int)adv->targetCBlockSize));
281 CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
282}
283
284static void
285BMK_initDCtx(ZSTD_DCtx* dctx, const void* dictBuffer, size_t dictBufferSize)
286{
287 CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
288 CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
289}
290
291typedef struct {
292 ZSTD_CCtx* cctx;
293 const void* dictBuffer;
294 size_t dictBufferSize;
295 int cLevel;
296 const ZSTD_compressionParameters* comprParams;
297 const BMK_advancedParams_t* adv;
298} BMK_initCCtxArgs;
299
300static size_t local_initCCtx(void* payload)
301{
302 BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
303 BMK_initCCtx(
304 ag->cctx,
305 ag->dictBuffer,
306 ag->dictBufferSize,
307 ag->cLevel,
308 ag->comprParams,
309 ag->adv);
310 return 0;
311}
312
313typedef struct {
314 ZSTD_DCtx* dctx;
315 const void* dictBuffer;
316 size_t dictBufferSize;
317} BMK_initDCtxArgs;
318
319static size_t local_initDCtx(void* payload)
320{
321 BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
322 BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
323 return 0;
324}
325
326/* `addArgs` is the context */
327static size_t local_defaultCompress(
328 const void* srcBuffer,
329 size_t srcSize,
330 void* dstBuffer,
331 size_t dstSize,
332 void* addArgs)
333{
334 ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
335 return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
336}
337
338/* `addArgs` is the context */
339static size_t local_defaultDecompress(
340 const void* srcBuffer,
341 size_t srcSize,
342 void* dstBuffer,
343 size_t dstCapacity,
344 void* addArgs)
345{
346 size_t moreToFlush = 1;
347 ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
348 ZSTD_inBuffer in;
349 ZSTD_outBuffer out;
350 in.src = srcBuffer;
351 in.size = srcSize;
352 in.pos = 0;
353 out.dst = dstBuffer;
354 out.size = dstCapacity;
355 out.pos = 0;
356 while (moreToFlush) {
357 if (out.pos == out.size) {
358 return (size_t)-ZSTD_error_dstSize_tooSmall;
359 }
360 moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
361 if (ZSTD_isError(moreToFlush)) {
362 return moreToFlush;
363 }
364 }
365 return out.pos;
366}
367
368/* ================================================================= */
369/* Benchmark Zstandard, mem-to-mem scenarios */
370/* ================================================================= */
371
372int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
373{
374 return outcome.tag == 0;
375}
376
377BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
378{
379 assert(outcome.tag == 0);
380 return outcome.internal_never_use_directly;
381}
382
383static BMK_benchOutcome_t BMK_benchOutcome_error(void)
384{
385 BMK_benchOutcome_t b;
386 memset(&b, 0, sizeof(b));
387 b.tag = 1;
388 return b;
389}
390
391static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(
392 BMK_benchResult_t result)
393{
394 BMK_benchOutcome_t b;
395 b.tag = 0;
396 b.internal_never_use_directly = result;
397 return b;
398}
399
400/* benchMem with no allocation */
401static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
402 const void** srcPtrs,
403 size_t* srcSizes,
404 void** cPtrs,
405 size_t* cCapacities,
406 size_t* cSizes,
407 void** resPtrs,
408 size_t* resSizes,
409 void** resultBufferPtr,
410 void* compressedBuffer,
411 size_t maxCompressedSize,
412 BMK_timedFnState_t* timeStateCompress,
413 BMK_timedFnState_t* timeStateDecompress,
414
415 const void* srcBuffer,
416 size_t srcSize,
417 const size_t* fileSizes,
418 unsigned nbFiles,
419 const int cLevel,
420 const ZSTD_compressionParameters* comprParams,
421 const void* dictBuffer,
422 size_t dictBufferSize,
423 ZSTD_CCtx* cctx,
424 ZSTD_DCtx* dctx,
425 int displayLevel,
426 const char* displayName,
427 const BMK_advancedParams_t* adv)
428{
429 size_t const blockSize =
430 ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
431 ? adv->blockSize
432 : srcSize)
433 + (!srcSize); /* avoid div by 0 */
434 BMK_benchResult_t benchResult;
435 size_t const loadedCompressedSize = srcSize;
436 size_t cSize = 0;
437 double ratio = 0.;
438 U32 nbBlocks;
439
440 assert(cctx != NULL);
441 assert(dctx != NULL);
442
443 /* init */
444 memset(&benchResult, 0, sizeof(benchResult));
445 if (strlen(displayName) > 17)
446 displayName +=
447 strlen(displayName) - 17; /* display last 17 characters */
448 if (adv->mode == BMK_decodeOnly) {
449 /* benchmark only decompression : source must be already compressed */
450 const char* srcPtr = (const char*)srcBuffer;
451 U64 totalDSize64 = 0;
452 U32 fileNb;
453 for (fileNb = 0; fileNb < nbFiles; fileNb++) {
454 U64 const fSize64 =
455 ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
456 if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) {
457 RETURN_ERROR(
458 32,
459 BMK_benchOutcome_t,
460 "Decompressed size cannot be determined: cannot benchmark");
461 }
462 if (fSize64 == ZSTD_CONTENTSIZE_ERROR) {
463 RETURN_ERROR(
464 32,
465 BMK_benchOutcome_t,
466 "Error while trying to assess decompressed size: data may be invalid");
467 }
468 totalDSize64 += fSize64;
469 srcPtr += fileSizes[fileNb];
470 }
471 {
472 size_t const decodedSize = (size_t)totalDSize64;
473 assert((U64)decodedSize == totalDSize64); /* check overflow */
474 free(*resultBufferPtr);
475 if (totalDSize64 > decodedSize) { /* size_t overflow */
476 RETURN_ERROR(
477 32,
478 BMK_benchOutcome_t,
479 "decompressed size is too large for local system");
480 }
481 *resultBufferPtr = malloc(decodedSize);
482 if (!(*resultBufferPtr)) {
483 RETURN_ERROR(
484 33,
485 BMK_benchOutcome_t,
486 "allocation error: not enough memory");
487 }
488 cSize = srcSize;
489 srcSize = decodedSize;
490 ratio = (double)srcSize / (double)cSize;
491 }
492 }
493
494 /* Init data blocks */
495 {
496 const char* srcPtr = (const char*)srcBuffer;
497 char* cPtr = (char*)compressedBuffer;
498 char* resPtr = (char*)(*resultBufferPtr);
499 U32 fileNb;
500 for (nbBlocks = 0, fileNb = 0; fileNb < nbFiles; fileNb++) {
501 size_t remaining = fileSizes[fileNb];
502 U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly)
503 ? 1
504 : (U32)((remaining + (blockSize - 1)) / blockSize);
505 U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
506 for (; nbBlocks < blockEnd; nbBlocks++) {
507 size_t const thisBlockSize = MIN(remaining, blockSize);
508 srcPtrs[nbBlocks] = srcPtr;
509 srcSizes[nbBlocks] = thisBlockSize;
510 cPtrs[nbBlocks] = cPtr;
511 cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly)
512 ? thisBlockSize
513 : ZSTD_compressBound(thisBlockSize);
514 resPtrs[nbBlocks] = resPtr;
515 resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly)
516 ? (size_t)ZSTD_findDecompressedSize(
517 srcPtr, thisBlockSize)
518 : thisBlockSize;
519 srcPtr += thisBlockSize;
520 cPtr += cCapacities[nbBlocks];
521 resPtr += thisBlockSize;
522 remaining -= thisBlockSize;
523 if (adv->mode == BMK_decodeOnly) {
524 cSizes[nbBlocks] = thisBlockSize;
525 benchResult.cSize = thisBlockSize;
526 }
527 }
528 }
529 }
530
531 /* warming up `compressedBuffer` */
532 if (adv->mode == BMK_decodeOnly) {
533 memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
534 } else {
535 RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
536 }
537
538 if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) {
539 OUTPUTLEVEL(
540 2,
541 "Warning : time measurements may be incorrect in multithreading mode... \n")
542 }
543
544 /* Bench */
545 {
546 U64 const crcOrig = (adv->mode == BMK_decodeOnly)
547 ? 0
548 : XXH64(srcBuffer, srcSize, 0);
549#define NB_MARKS 4
550 const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
551 U32 markNb = 0;
552 int compressionCompleted = (adv->mode == BMK_decodeOnly);
553 int decompressionCompleted = (adv->mode == BMK_compressOnly);
554 BMK_benchParams_t cbp, dbp;
555 BMK_initCCtxArgs cctxprep;
556 BMK_initDCtxArgs dctxprep;
557
558 cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */
559 cbp.benchPayload = cctx;
560 cbp.initFn = local_initCCtx; /* BMK_initCCtx */
561 cbp.initPayload = &cctxprep;
562 cbp.errorFn = ZSTD_isError;
563 cbp.blockCount = nbBlocks;
564 cbp.srcBuffers = srcPtrs;
565 cbp.srcSizes = srcSizes;
566 cbp.dstBuffers = cPtrs;
567 cbp.dstCapacities = cCapacities;
568 cbp.blockResults = cSizes;
569
570 cctxprep.cctx = cctx;
571 cctxprep.dictBuffer = dictBuffer;
572 cctxprep.dictBufferSize = dictBufferSize;
573 cctxprep.cLevel = cLevel;
574 cctxprep.comprParams = comprParams;
575 cctxprep.adv = adv;
576
577 dbp.benchFn = local_defaultDecompress;
578 dbp.benchPayload = dctx;
579 dbp.initFn = local_initDCtx;
580 dbp.initPayload = &dctxprep;
581 dbp.errorFn = ZSTD_isError;
582 dbp.blockCount = nbBlocks;
583 dbp.srcBuffers = (const void* const*)cPtrs;
584 dbp.srcSizes = cSizes;
585 dbp.dstBuffers = resPtrs;
586 dbp.dstCapacities = resSizes;
587 dbp.blockResults = NULL;
588
589 dctxprep.dctx = dctx;
590 dctxprep.dictBuffer = dictBuffer;
591 dctxprep.dictBufferSize = dictBufferSize;
592
593 OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
594 assert(srcSize < UINT_MAX);
595 OUTPUTLEVEL(
596 2,
597 "%2s-%-17.17s :%10u -> \r",
598 marks[markNb],
599 displayName,
600 (unsigned)srcSize);
601
602 while (!(compressionCompleted && decompressionCompleted)) {
603 if (!compressionCompleted) {
604 BMK_runOutcome_t const cOutcome =
605 BMK_benchTimedFn(timeStateCompress, cbp);
606
607 if (!BMK_isSuccessful_runOutcome(cOutcome)) {
608 RETURN_ERROR(30, BMK_benchOutcome_t, "compression error");
609 }
610
611 {
612 BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
613 cSize = cResult.sumOfReturn;
614 ratio = (double)srcSize / (double)cSize;
615 {
616 BMK_benchResult_t newResult;
617 newResult.cSpeed =
618 (U64)((double)srcSize * TIMELOOP_NANOSEC
619 / cResult.nanoSecPerRun);
620 benchResult.cSize = cSize;
621 if (newResult.cSpeed > benchResult.cSpeed)
622 benchResult.cSpeed = newResult.cSpeed;
623 }
624 }
625
626 {
627 int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
628 assert(cSize < UINT_MAX);
629 OUTPUTLEVEL(
630 2,
631 "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r",
632 marks[markNb],
633 displayName,
634 (unsigned)srcSize,
635 (unsigned)cSize,
636 ratioAccuracy,
637 ratio,
638 benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
639 (double)benchResult.cSpeed / MB_UNIT);
640 }
641 compressionCompleted =
642 BMK_isCompleted_TimedFn(timeStateCompress);
643 }
644
645 if (!decompressionCompleted) {
646 BMK_runOutcome_t const dOutcome =
647 BMK_benchTimedFn(timeStateDecompress, dbp);
648
649 if (!BMK_isSuccessful_runOutcome(dOutcome)) {
650 RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error");
651 }
652
653 {
654 BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
655 U64 const newDSpeed =
656 (U64)((double)srcSize * TIMELOOP_NANOSEC
657 / dResult.nanoSecPerRun);
658 if (newDSpeed > benchResult.dSpeed)
659 benchResult.dSpeed = newDSpeed;
660 }
661
662 {
663 int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
664 OUTPUTLEVEL(
665 2,
666 "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r",
667 marks[markNb],
668 displayName,
669 (unsigned)srcSize,
670 (unsigned)cSize,
671 ratioAccuracy,
672 ratio,
673 benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1,
674 (double)benchResult.cSpeed / MB_UNIT,
675 (double)benchResult.dSpeed / MB_UNIT);
676 }
677 decompressionCompleted =
678 BMK_isCompleted_TimedFn(timeStateDecompress);
679 }
680 markNb = (markNb + 1) % NB_MARKS;
681 } /* while (!(compressionCompleted && decompressionCompleted)) */
682
683 /* CRC Checking */
684 {
685 const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
686 U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
687 if ((adv->mode == BMK_both) && (crcOrig != crcCheck)) {
688 size_t u;
689 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n",
690 displayName,
691 (unsigned)crcOrig,
692 (unsigned)crcCheck);
693 for (u = 0; u < srcSize; u++) {
694 if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
695 unsigned segNb, bNb, pos;
696 size_t bacc = 0;
697 DISPLAY("Decoding error at pos %u ", (unsigned)u);
698 for (segNb = 0; segNb < nbBlocks; segNb++) {
699 if (bacc + srcSizes[segNb] > u)
700 break;
701 bacc += srcSizes[segNb];
702 }
703 pos = (U32)(u - bacc);
704 bNb = pos / (128 KB);
705 DISPLAY("(sample %u, block %u, pos %u) \n",
706 segNb,
707 bNb,
708 pos);
709 {
710 size_t const lowest = (u > 5) ? 5 : u;
711 size_t n;
712 DISPLAY("origin: ");
713 for (n = lowest; n > 0; n--)
714 DISPLAY("%02X ",
715 ((const BYTE*)srcBuffer)[u - n]);
716 DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]);
717 for (n = 1; n < 3; n++)
718 DISPLAY("%02X ",
719 ((const BYTE*)srcBuffer)[u + n]);
720 DISPLAY(" \n");
721 DISPLAY("decode: ");
722 for (n = lowest; n > 0; n--)
723 DISPLAY("%02X ", resultBuffer[u - n]);
724 DISPLAY(" :%02X: ", resultBuffer[u]);
725 for (n = 1; n < 3; n++)
726 DISPLAY("%02X ", resultBuffer[u + n]);
727 DISPLAY(" \n");
728 }
729 break;
730 }
731 if (u == srcSize - 1) { /* should never happen */
732 DISPLAY("no difference detected\n");
733 }
734 } /* for (u=0; u<srcSize; u++) */
735 } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
736 } /* CRC Checking */
737
738 if (displayLevel
739 == 1) { /* hidden display mode -q, used by python speed benchmark */
740 double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
741 double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
742 if (adv->additionalParam) {
743 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n",
744 cLevel,
745 (int)cSize,
746 ratio,
747 cSpeed,
748 dSpeed,
749 displayName,
750 adv->additionalParam);
751 } else {
752 OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n",
753 cLevel,
754 (int)cSize,
755 ratio,
756 cSpeed,
757 dSpeed,
758 displayName);
759 }
760 }
761
762 OUTPUTLEVEL(2, "%2i#\n", cLevel);
763 } /* Bench */
764
765 benchResult.cMem =
766 (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
767 return BMK_benchOutcome_setValidResult(benchResult);
768}
769
770BMK_benchOutcome_t BMK_benchMemAdvanced(
771 const void* srcBuffer,
772 size_t srcSize,
773 void* dstBuffer,
774 size_t dstCapacity,
775 const size_t* fileSizes,
776 unsigned nbFiles,
777 int cLevel,
778 const ZSTD_compressionParameters* comprParams,
779 const void* dictBuffer,
780 size_t dictBufferSize,
781 int displayLevel,
782 const char* displayName,
783 const BMK_advancedParams_t* adv)
784
785{
786 int const dstParamsError =
787 !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
788
789 size_t const blockSize =
790 ((adv->blockSize >= 32 && (adv->mode != BMK_decodeOnly))
791 ? adv->blockSize
792 : srcSize)
793 + (!srcSize) /* avoid div by 0 */;
794 U32 const maxNbBlocks =
795 (U32)((srcSize + (blockSize - 1)) / blockSize) + nbFiles;
796
797 /* these are the blockTable parameters, just split up */
798 const void** const srcPtrs =
799 (const void**)malloc(maxNbBlocks * sizeof(void*));
800 size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
801
802 void** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
803 size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
804 size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
805
806 void** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
807 size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
808
809 BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(
810 adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
811 BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(
812 adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
813
814 ZSTD_CCtx* const cctx = ZSTD_createCCtx();
815 ZSTD_DCtx* const dctx = ZSTD_createDCtx();
816
817 const size_t maxCompressedSize = dstCapacity
818 ? dstCapacity
819 : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
820
821 void* const internalDstBuffer =
822 dstBuffer ? NULL : malloc(maxCompressedSize);
823 void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
824
825 BMK_benchOutcome_t outcome =
826 BMK_benchOutcome_error(); /* error by default */
827
828 void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
829
830 int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes
831 || !cCapacities || !resPtrs || !resSizes || !timeStateCompress
832 || !timeStateDecompress || !cctx || !dctx || !compressedBuffer
833 || !resultBuffer;
834
835 if (!allocationincomplete && !dstParamsError) {
836 outcome = BMK_benchMemAdvancedNoAlloc(
837 srcPtrs,
838 srcSizes,
839 cPtrs,
840 cCapacities,
841 cSizes,
842 resPtrs,
843 resSizes,
844 &resultBuffer,
845 compressedBuffer,
846 maxCompressedSize,
847 timeStateCompress,
848 timeStateDecompress,
849 srcBuffer,
850 srcSize,
851 fileSizes,
852 nbFiles,
853 cLevel,
854 comprParams,
855 dictBuffer,
856 dictBufferSize,
857 cctx,
858 dctx,
859 displayLevel,
860 displayName,
861 adv);
862 }
863
864 /* clean up */
865 BMK_freeTimedFnState(timeStateCompress);
866 BMK_freeTimedFnState(timeStateDecompress);
867
868 ZSTD_freeCCtx(cctx);
869 ZSTD_freeDCtx(dctx);
870
871 free(internalDstBuffer);
872 free(resultBuffer);
873
874 free((void*)srcPtrs);
875 free(srcSizes);
876 free(cPtrs);
877 free(cSizes);
878 free(cCapacities);
879 free(resPtrs);
880 free(resSizes);
881
882 if (allocationincomplete) {
883 RETURN_ERROR(
884 31, BMK_benchOutcome_t, "allocation error : not enough memory");
885 }
886
887 if (dstParamsError) {
888 RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
889 }
890 return outcome;
891}
892
893BMK_benchOutcome_t BMK_benchMem(
894 const void* srcBuffer,
895 size_t srcSize,
896 const size_t* fileSizes,
897 unsigned nbFiles,
898 int cLevel,
899 const ZSTD_compressionParameters* comprParams,
900 const void* dictBuffer,
901 size_t dictBufferSize,
902 int displayLevel,
903 const char* displayName)
904{
905 BMK_advancedParams_t const adv = BMK_initAdvancedParams();
906 return BMK_benchMemAdvanced(
907 srcBuffer,
908 srcSize,
909 NULL,
910 0,
911 fileSizes,
912 nbFiles,
913 cLevel,
914 comprParams,
915 dictBuffer,
916 dictBufferSize,
917 displayLevel,
918 displayName,
919 &adv);
920}
921
922static BMK_benchOutcome_t BMK_benchCLevel(
923 const void* srcBuffer,
924 size_t benchedSize,
925 const size_t* fileSizes,
926 unsigned nbFiles,
927 int cLevel,
928 const ZSTD_compressionParameters* comprParams,
929 const void* dictBuffer,
930 size_t dictBufferSize,
931 int displayLevel,
932 const char* displayName,
933 BMK_advancedParams_t const* const adv)
934{
935 const char* pch = strrchr(displayName, '\\'); /* Windows */
936 if (!pch)
937 pch = strrchr(displayName, '/'); /* Linux */
938 if (pch)
939 displayName = pch + 1;
940
941 if (adv->realTime) {
942 DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
943 SET_REALTIME_PRIORITY;
944 }
945
946 if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
947 OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
948 ZSTD_VERSION_STRING,
949 ZSTD_GIT_COMMIT_STRING,
950 (unsigned)benchedSize,
951 adv->nbSeconds,
952 (unsigned)(adv->blockSize >> 10));
953
954 return BMK_benchMemAdvanced(
955 srcBuffer,
956 benchedSize,
957 NULL,
958 0,
959 fileSizes,
960 nbFiles,
961 cLevel,
962 comprParams,
963 dictBuffer,
964 dictBufferSize,
965 displayLevel,
966 displayName,
967 adv);
968}
969
970int BMK_syntheticTest(
971 int cLevel,
972 double compressibility,
973 const ZSTD_compressionParameters* compressionParams,
974 int displayLevel,
975 const BMK_advancedParams_t* adv)
976{
977 char nameBuff[20] = { 0 };
978 const char* name = nameBuff;
979 size_t const benchedSize = adv->blockSize ? adv->blockSize : 10000000;
980 void* srcBuffer;
981 BMK_benchOutcome_t res;
982
983 if (cLevel > ZSTD_maxCLevel()) {
984 DISPLAYLEVEL(1, "Invalid Compression Level");
985 return 15;
986 }
987
988 /* Memory allocation */
989 srcBuffer = malloc(benchedSize);
990 if (!srcBuffer) {
991 DISPLAYLEVEL(1, "allocation error : not enough memory");
992 return 16;
993 }
994
995 /* Fill input buffer */
996 if (compressibility < 0.0) {
997 LOREM_genBuffer(srcBuffer, benchedSize, 0);
998 name = "Lorem ipsum";
999 } else {
1000 RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
1001 formatString_u(
1002 nameBuff,
1003 sizeof(nameBuff),
1004 "Synthetic %u%%",
1005 (unsigned)(compressibility * 100));
1006 }
1007
1008 /* Bench */
1009 res = BMK_benchCLevel(
1010 srcBuffer,
1011 benchedSize,
1012 &benchedSize /* ? */,
1013 1 /* ? */,
1014 cLevel,
1015 compressionParams,
1016 NULL,
1017 0, /* dictionary */
1018 displayLevel,
1019 name,
1020 adv);
1021
1022 /* clean up */
1023 free(srcBuffer);
1024
1025 return !BMK_isSuccessful_benchOutcome(res);
1026}
1027
1028static size_t BMK_findMaxMem(U64 requiredMem)
1029{
1030 size_t const step = 64 MB;
1031 BYTE* testmem = NULL;
1032
1033 requiredMem = (((requiredMem >> 26) + 1) << 26);
1034 requiredMem += step;
1035 if (requiredMem > maxMemory)
1036 requiredMem = maxMemory;
1037
1038 do {
1039 testmem = (BYTE*)malloc((size_t)requiredMem);
1040 requiredMem -= step;
1041 } while (!testmem && requiredMem > 0);
1042
1043 free(testmem);
1044 return (size_t)(requiredMem);
1045}
1046
1047/*! BMK_loadFiles() :
1048 * Loads `buffer` with content of files listed within `fileNamesTable`.
1049 * At most, fills `buffer` entirely. */
1050static int BMK_loadFiles(
1051 void* buffer,
1052 size_t bufferSize,
1053 size_t* fileSizes,
1054 const char* const* fileNamesTable,
1055 unsigned nbFiles,
1056 int displayLevel)
1057{
1058 size_t pos = 0, totalSize = 0;
1059 unsigned n;
1060 for (n = 0; n < nbFiles; n++) {
1061 U64 fileSize = UTIL_getFileSize(
1062 fileNamesTable[n]); /* last file may be shortened */
1063 if (UTIL_isDirectory(fileNamesTable[n])) {
1064 DISPLAYLEVEL(
1065 2, "Ignoring %s directory... \n", fileNamesTable[n]);
1066 fileSizes[n] = 0;
1067 continue;
1068 }
1069 if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1070 DISPLAYLEVEL(
1071 2,
1072 "Cannot evaluate size of %s, ignoring ... \n",
1073 fileNamesTable[n]);
1074 fileSizes[n] = 0;
1075 continue;
1076 }
1077 {
1078 FILE* const f = fopen(fileNamesTable[n], "rb");
1079 if (f == NULL)
1080 RETURN_ERROR_INT(
1081 10, "impossible to open file %s", fileNamesTable[n]);
1082 OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
1083 if (fileSize > bufferSize - pos)
1084 fileSize = bufferSize - pos,
1085 nbFiles = n; /* buffer too small - stop after this file */
1086 {
1087 size_t const readSize =
1088 fread(((char*)buffer) + pos, 1, (size_t)fileSize, f);
1089 if (readSize != (size_t)fileSize)
1090 RETURN_ERROR_INT(
1091 11, "could not read %s", fileNamesTable[n]);
1092 pos += readSize;
1093 }
1094 fileSizes[n] = (size_t)fileSize;
1095 totalSize += (size_t)fileSize;
1096 fclose(f);
1097 }
1098 }
1099
1100 if (totalSize == 0)
1101 RETURN_ERROR_INT(12, "no data to bench");
1102 return 0;
1103}
1104
1105int BMK_benchFilesAdvanced(
1106 const char* const* fileNamesTable,
1107 unsigned nbFiles,
1108 const char* dictFileName,
1109 int cLevel,
1110 const ZSTD_compressionParameters* compressionParams,
1111 int displayLevel,
1112 const BMK_advancedParams_t* adv)
1113{
1114 void* srcBuffer = NULL;
1115 size_t benchedSize;
1116 void* dictBuffer = NULL;
1117 size_t dictBufferSize = 0;
1118 size_t* fileSizes = NULL;
1119 BMK_benchOutcome_t res;
1120 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
1121
1122 if (!nbFiles) {
1123 DISPLAYLEVEL(1, "No Files to Benchmark");
1124 return 13;
1125 }
1126
1127 if (cLevel > ZSTD_maxCLevel()) {
1128 DISPLAYLEVEL(1, "Invalid Compression Level");
1129 return 14;
1130 }
1131
1132 if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) {
1133 DISPLAYLEVEL(1, "Error loading files");
1134 return 15;
1135 }
1136
1137 fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
1138 if (!fileSizes) {
1139 DISPLAYLEVEL(1, "not enough memory for fileSizes");
1140 return 16;
1141 }
1142
1143 /* Load dictionary */
1144 if (dictFileName != NULL) {
1145 U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1146 if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
1147 DISPLAYLEVEL(
1148 1,
1149 "error loading %s : %s \n",
1150 dictFileName,
1151 strerror(errno));
1152 free(fileSizes);
1153 DISPLAYLEVEL(1, "benchmark aborted");
1154 return 17;
1155 }
1156 if (dictFileSize > 64 MB) {
1157 free(fileSizes);
1158 DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName);
1159 return 18;
1160 }
1161 dictBufferSize = (size_t)dictFileSize;
1162 dictBuffer = malloc(dictBufferSize);
1163 if (dictBuffer == NULL) {
1164 free(fileSizes);
1165 DISPLAYLEVEL(
1166 1,
1167 "not enough memory for dictionary (%u bytes)",
1168 (unsigned)dictBufferSize);
1169 return 19;
1170 }
1171
1172 {
1173 int const errorCode = BMK_loadFiles(
1174 dictBuffer,
1175 dictBufferSize,
1176 fileSizes,
1177 &dictFileName /*?*/,
1178 1 /*?*/,
1179 displayLevel);
1180 if (errorCode) {
1181 res = BMK_benchOutcome_error();
1182 goto _cleanUp;
1183 }
1184 }
1185 }
1186
1187 /* Memory allocation & restrictions */
1188 benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
1189 if ((U64)benchedSize > totalSizeToLoad)
1190 benchedSize = (size_t)totalSizeToLoad;
1191 if (benchedSize < totalSizeToLoad)
1192 DISPLAY("Not enough memory; testing %u MB only...\n",
1193 (unsigned)(benchedSize >> 20));
1194
1195 srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
1196 if (!srcBuffer) {
1197 free(dictBuffer);
1198 free(fileSizes);
1199 DISPLAYLEVEL(1, "not enough memory for srcBuffer");
1200 return 20;
1201 }
1202
1203 /* Load input buffer */
1204 {
1205 int const errorCode = BMK_loadFiles(
1206 srcBuffer,
1207 benchedSize,
1208 fileSizes,
1209 fileNamesTable,
1210 nbFiles,
1211 displayLevel);
1212 if (errorCode) {
1213 res = BMK_benchOutcome_error();
1214 goto _cleanUp;
1215 }
1216 }
1217
1218 /* Bench */
1219 {
1220 char mfName[20] = { 0 };
1221 formatString_u(mfName, sizeof(mfName), " %u files", nbFiles);
1222 {
1223 const char* const displayName =
1224 (nbFiles > 1) ? mfName : fileNamesTable[0];
1225 res = BMK_benchCLevel(
1226 srcBuffer,
1227 benchedSize,
1228 fileSizes,
1229 nbFiles,
1230 cLevel,
1231 compressionParams,
1232 dictBuffer,
1233 dictBufferSize,
1234 displayLevel,
1235 displayName,
1236 adv);
1237 }
1238 }
1239
1240_cleanUp:
1241 free(srcBuffer);
1242 free(dictBuffer);
1243 free(fileSizes);
1244 return !BMK_isSuccessful_benchOutcome(res);
1245}
1246
1247int BMK_benchFiles(
1248 const char* const* fileNamesTable,
1249 unsigned nbFiles,
1250 const char* dictFileName,
1251 int cLevel,
1252 const ZSTD_compressionParameters* compressionParams,
1253 int displayLevel)
1254{
1255 BMK_advancedParams_t const adv = BMK_initAdvancedParams();
1256 return BMK_benchFilesAdvanced(
1257 fileNamesTable,
1258 nbFiles,
1259 dictFileName,
1260 cLevel,
1261 compressionParams,
1262 displayLevel,
1263 &adv);
1264}