git subrepo pull (merge) --force deps/libchdr
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.5 / tests / paramgrill.c
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12/*-************************************
13* Dependencies
14**************************************/
15#include "util.h" /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */
16#include <stdlib.h> /* malloc */
17#include <stdio.h> /* fprintf, fopen, ftello64 */
18#include <string.h> /* strcmp */
19#include <math.h> /* log */
20#include <assert.h>
21
22#include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */
23#include "mem.h"
24#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */
25#include "zstd.h"
26#include "datagen.h"
27#include "xxhash.h"
28#include "benchfn.h"
29#include "benchzstd.h"
30#include "zstd_errors.h"
31#include "zstd_internal.h" /* should not be needed */
32
33
34/*-************************************
35* Constants
36**************************************/
37#define PROGRAM_DESCRIPTION "ZSTD parameters tester"
38#define AUTHOR "Yann Collet"
39#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR
40
41#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
42#define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */
43
44static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
45
46#define COMPRESSIBILITY_DEFAULT 0.50
47
48static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO;
49static const int g_maxNbVariations = 64;
50
51
52/*-************************************
53* Macros
54**************************************/
55#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
56#define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); }
57#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
58
59#define TIMED 0
60#ifndef DEBUG
61# define DEBUG 0
62#endif
63
64#undef MIN
65#undef MAX
66#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
67#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
68#define CUSTOM_LEVEL 99
69#define BASE_CLEVEL 1
70
71#define FADT_MIN 0
72#define FADT_MAX ((U32)-1)
73
74#define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1)
75#define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1)
76#define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1)
77#define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1)
78#define MML_RANGE (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1)
79#define TLEN_RANGE 17
80#define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1)
81#define FADT_RANGE 3
82
83#define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } }
84#define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } }
85
86#define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */
87
88static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = {
89 "(none) ", "ZSTD_fast ", "ZSTD_dfast ",
90 "ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ",
91 "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra ",
92 "ZSTD_btultra2"};
93
94static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 };
95
96
97/*-************************************
98* Setup for Adding new params
99**************************************/
100
101/* indices for each of the variables */
102typedef enum {
103 wlog_ind = 0,
104 clog_ind = 1,
105 hlog_ind = 2,
106 slog_ind = 3,
107 mml_ind = 4,
108 tlen_ind = 5,
109 strt_ind = 6,
110 fadt_ind = 7, /* forceAttachDict */
111 NUM_PARAMS = 8
112} varInds_t;
113
114typedef struct {
115 U32 vals[NUM_PARAMS];
116} paramValues_t;
117
118/* minimum value of parameters */
119static const U32 mintable[NUM_PARAMS] =
120 { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN };
121
122/* maximum value of parameters */
123static const U32 maxtable[NUM_PARAMS] =
124 { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX };
125
126/* # of values parameters can take on */
127static const U32 rangetable[NUM_PARAMS] =
128 { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE };
129
130/* ZSTD_cctxSetParameter() index to set */
131static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] =
132 { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict };
133
134/* names of parameters */
135static const char* g_paramNames[NUM_PARAMS] =
136 { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" };
137
138/* shortened names of parameters */
139static const char* g_shortParamNames[NUM_PARAMS] =
140 { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" };
141
142/* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */
143static U32 rangeMap(varInds_t param, int ind)
144{
145 U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0);
146 switch(param) {
147 case wlog_ind: /* using default: triggers -Wswitch-enum */
148 case clog_ind:
149 case hlog_ind:
150 case slog_ind:
151 case mml_ind:
152 case strt_ind:
153 return mintable[param] + uind;
154 case tlen_ind:
155 return tlen_table[uind];
156 case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */
157 return uind - 1;
158 case NUM_PARAMS:
159 default:;
160 }
161 DISPLAY("Error, not a valid param\n ");
162 assert(0);
163 return (U32)-1;
164}
165
166/* inverse of rangeMap */
167static int invRangeMap(varInds_t param, U32 value)
168{
169 value = MIN(MAX(mintable[param], value), maxtable[param]);
170 switch(param) {
171 case wlog_ind:
172 case clog_ind:
173 case hlog_ind:
174 case slog_ind:
175 case mml_ind:
176 case strt_ind:
177 return (int)(value - mintable[param]);
178 case tlen_ind: /* bin search */
179 {
180 int lo = 0;
181 int hi = TLEN_RANGE;
182 while(lo < hi) {
183 int mid = (lo + hi) / 2;
184 if(tlen_table[mid] < value) {
185 lo = mid + 1;
186 } if(tlen_table[mid] == value) {
187 return mid;
188 } else {
189 hi = mid;
190 }
191 }
192 return lo;
193 }
194 case fadt_ind:
195 return (int)value + 1;
196 case NUM_PARAMS:
197 default:;
198 }
199 DISPLAY("Error, not a valid param\n ");
200 assert(0);
201 return -2;
202}
203
204/* display of params */
205static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width)
206{
207 switch(param) {
208 case wlog_ind:
209 case clog_ind:
210 case hlog_ind:
211 case slog_ind:
212 case mml_ind:
213 case tlen_ind:
214 if(width) {
215 fprintf(f, "%*u", width, value);
216 } else {
217 fprintf(f, "%u", value);
218 }
219 break;
220 case strt_ind:
221 if(width) {
222 fprintf(f, "%*s", width, g_stratName[value]);
223 } else {
224 fprintf(f, "%s", g_stratName[value]);
225 }
226 break;
227 case fadt_ind: /* force attach dict */
228 if(width) {
229 fprintf(f, "%*d", width, (int)value);
230 } else {
231 fprintf(f, "%d", (int)value);
232 }
233 break;
234 case NUM_PARAMS:
235 default:
236 DISPLAY("Error, not a valid param\n ");
237 assert(0);
238 break;
239 }
240}
241
242
243/*-************************************
244* Benchmark Parameters/Global Variables
245**************************************/
246
247/* General Utility */
248static U32 g_timeLimit_s = 99999; /* about 27 hours */
249static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */
250static U32 g_blockSize = 0;
251static U32 g_rand = 1;
252
253/* Display */
254static int g_displayLevel = 3;
255static BYTE g_silenceParams[NUM_PARAMS]; /* can selectively silence some params when displaying them */
256
257/* Mode Selection */
258static U32 g_singleRun = 0;
259static U32 g_optimizer = 0;
260static int g_optmode = 0;
261
262/* For cLevel Table generation */
263static U32 g_target = 0;
264static U32 g_noSeed = 0;
265
266/* For optimizer */
267static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */
268static double g_ratioMultiplier = 5.;
269static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict */
270static BMK_benchResult_t g_lvltarget;
271
272typedef enum {
273 directMap,
274 xxhashMap,
275 noMemo
276} memoTableType_t;
277
278typedef struct {
279 memoTableType_t tableType;
280 BYTE* table;
281 size_t tableLen;
282 varInds_t varArray[NUM_PARAMS];
283 size_t varLen;
284} memoTable_t;
285
286typedef struct {
287 BMK_benchResult_t result;
288 paramValues_t params;
289} winnerInfo_t;
290
291typedef struct {
292 U32 cSpeed; /* bytes / sec */
293 U32 dSpeed;
294 U32 cMem; /* bytes */
295} constraint_t;
296
297typedef struct winner_ll_node winner_ll_node;
298struct winner_ll_node {
299 winnerInfo_t res;
300 winner_ll_node* next;
301};
302
303static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */
304
305/*
306 * Additional Global Variables (Defined Above Use)
307 * g_level_constraint
308 * g_alreadyTested
309 * g_maxTries
310 * g_clockGranularity
311 */
312
313
314/*-*******************************************************
315* General Util Functions
316*********************************************************/
317
318/* nullified useless params, to ensure count stats */
319/* cleans up params for memoizing / display */
320static paramValues_t sanitizeParams(paramValues_t params)
321{
322 if (params.vals[strt_ind] == ZSTD_fast)
323 params.vals[clog_ind] = 0, params.vals[slog_ind] = 0;
324 if (params.vals[strt_ind] == ZSTD_dfast)
325 params.vals[slog_ind] = 0;
326 if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) )
327 params.vals[tlen_ind] = 0;
328
329 return params;
330}
331
332static ZSTD_compressionParameters pvalsToCParams(paramValues_t p)
333{
334 ZSTD_compressionParameters c;
335 memset(&c, 0, sizeof(ZSTD_compressionParameters));
336 c.windowLog = p.vals[wlog_ind];
337 c.chainLog = p.vals[clog_ind];
338 c.hashLog = p.vals[hlog_ind];
339 c.searchLog = p.vals[slog_ind];
340 c.minMatch = p.vals[mml_ind];
341 c.targetLength = p.vals[tlen_ind];
342 c.strategy = p.vals[strt_ind];
343 /* no forceAttachDict */
344 return c;
345}
346
347static paramValues_t cParamsToPVals(ZSTD_compressionParameters c)
348{
349 paramValues_t p;
350 varInds_t i;
351 p.vals[wlog_ind] = c.windowLog;
352 p.vals[clog_ind] = c.chainLog;
353 p.vals[hlog_ind] = c.hashLog;
354 p.vals[slog_ind] = c.searchLog;
355 p.vals[mml_ind] = c.minMatch;
356 p.vals[tlen_ind] = c.targetLength;
357 p.vals[strt_ind] = c.strategy;
358
359 /* set all other params to their minimum value */
360 for (i = strt_ind + 1; i < NUM_PARAMS; i++) {
361 p.vals[i] = mintable[i];
362 }
363 return p;
364}
365
366/* equivalent of ZSTD_adjustCParams for paramValues_t */
367static paramValues_t
368adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize)
369{
370 paramValues_t ot = p;
371 varInds_t i;
372 p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize));
373 if (!dictSize) { p.vals[fadt_ind] = 0; }
374 /* retain value of all other parameters */
375 for(i = strt_ind + 1; i < NUM_PARAMS; i++) {
376 p.vals[i] = ot.vals[i];
377 }
378 return p;
379}
380
381static size_t BMK_findMaxMem(U64 requiredMem)
382{
383 size_t const step = 64 MB;
384 void* testmem = NULL;
385
386 requiredMem = (((requiredMem >> 26) + 1) << 26);
387 if (requiredMem > maxMemory) requiredMem = maxMemory;
388
389 requiredMem += 2 * step;
390 while (!testmem && requiredMem > 0) {
391 testmem = malloc ((size_t)requiredMem);
392 requiredMem -= step;
393 }
394
395 free (testmem);
396 return (size_t) requiredMem;
397}
398
399/* accuracy in seconds only, span can be multiple years */
400static U32 BMK_timeSpan_s(const UTIL_time_t tStart)
401{
402 return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL);
403}
404
405static U32 FUZ_rotl32(U32 x, U32 r)
406{
407 return ((x << r) | (x >> (32 - r)));
408}
409
410static U32 FUZ_rand(U32* src)
411{
412 const U32 prime1 = 2654435761U;
413 const U32 prime2 = 2246822519U;
414 U32 rand32 = *src;
415 rand32 *= prime1;
416 rand32 += prime2;
417 rand32 = FUZ_rotl32(rand32, 13);
418 *src = rand32;
419 return rand32 >> 5;
420}
421
422#define BOUNDCHECK(val,min,max) { \
423 if (((val)<(min)) | ((val)>(max))) { \
424 DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); \
425 return 0; \
426} }
427
428static int paramValid(const paramValues_t paramTarget)
429{
430 U32 i;
431 for(i = 0; i < NUM_PARAMS; i++) {
432 BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]);
433 }
434 return 1;
435}
436
437/* cParamUnsetMin() :
438 * if any parameter in paramTarget is not yet set,
439 * it will receive its corresponding minimal value.
440 * This function never fails */
441static paramValues_t cParamUnsetMin(paramValues_t paramTarget)
442{
443 varInds_t vi;
444 for (vi = 0; vi < NUM_PARAMS; vi++) {
445 if (paramTarget.vals[vi] == PARAM_UNSET) {
446 paramTarget.vals[vi] = mintable[vi];
447 }
448 }
449 return paramTarget;
450}
451
452static paramValues_t emptyParams(void)
453{
454 U32 i;
455 paramValues_t p;
456 for(i = 0; i < NUM_PARAMS; i++) {
457 p.vals[i] = PARAM_UNSET;
458 }
459 return p;
460}
461
462static winnerInfo_t initWinnerInfo(const paramValues_t p)
463{
464 winnerInfo_t w1;
465 w1.result.cSpeed = 0;
466 w1.result.dSpeed = 0;
467 w1.result.cMem = (size_t)-1;
468 w1.result.cSize = (size_t)-1;
469 w1.params = p;
470 return w1;
471}
472
473static paramValues_t
474overwriteParams(paramValues_t base, const paramValues_t mask)
475{
476 U32 i;
477 for(i = 0; i < NUM_PARAMS; i++) {
478 if(mask.vals[i] != PARAM_UNSET) {
479 base.vals[i] = mask.vals[i];
480 }
481 }
482 return base;
483}
484
485static void
486paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr)
487{
488 ptr->vals[paramIndex] = rangeMap(paramIndex,
489 invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt);
490}
491
492/* varies ptr by nbChanges respecting varyParams*/
493static void
494paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges)
495{
496 paramValues_t p;
497 int validated = 0;
498 while (!validated) {
499 U32 i;
500 p = *ptr;
501 for (i = 0 ; i < nbChanges ; i++) {
502 const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1);
503 paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1],
504 (int)((changeID & 1) << 1) - 1,
505 &p);
506 }
507 validated = paramValid(p);
508 }
509 *ptr = p;
510}
511
512/* Completely random parameter selection */
513static paramValues_t randomParams(void)
514{
515 varInds_t v; paramValues_t p;
516 for(v = 0; v < NUM_PARAMS; v++) {
517 p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v]));
518 }
519 return p;
520}
521
522static U64 g_clockGranularity = 100000000ULL;
523
524static void init_clockGranularity(void)
525{
526 UTIL_time_t const clockStart = UTIL_getTime();
527 U64 el1 = 0, el2 = 0;
528 int i = 0;
529 do {
530 el1 = el2;
531 el2 = UTIL_clockSpanNano(clockStart);
532 if(el1 < el2) {
533 U64 iv = el2 - el1;
534 if(g_clockGranularity > iv) {
535 g_clockGranularity = iv;
536 i = 0;
537 } else {
538 i++;
539 }
540 }
541 } while(i < 10);
542 DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity);
543}
544
545/*-************************************
546* Optimizer Util Functions
547**************************************/
548
549/* checks results are feasible */
550static int feasible(const BMK_benchResult_t results, const constraint_t target) {
551 return (results.cSpeed >= target.cSpeed)
552 && (results.dSpeed >= target.dSpeed)
553 && (results.cMem <= target.cMem)
554 && (!g_optmode || results.cSize <= g_lvltarget.cSize);
555}
556
557/* hill climbing value for part 1 */
558/* Scoring here is a linear reward for all set constraints normalized between 0 and 1
559 * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic
560 * bonus to exceeding the constraint value. We also give linear ratio for compression ratio.
561 * The constant factors are experimental.
562 */
563static double
564resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target)
565{
566 double cs = 0., ds = 0., rt, cm = 0.;
567 const double r1 = 1, r2 = 0.1, rtr = 0.5;
568 double ret;
569 if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; }
570 if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; }
571 if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; }
572 rt = ((double)srcSize / (double)res.cSize);
573
574 ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr +
575 (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2;
576
577 return ret;
578}
579
580/* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */
581static double
582resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes)
583{
584 double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1;
585 double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1;
586 if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) {
587 return 0.0;
588 }
589 return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1;
590}
591
592/* return true if r2 strictly better than r1 */
593static int
594compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize)
595{
596 if(feasible(result1, target) && feasible(result2, target)) {
597 if(g_optmode) {
598 return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget);
599 } else {
600 return (result1.cSize > result2.cSize)
601 || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
602 || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
603 }
604 }
605 return feasible(result2, target)
606 || (!feasible(result1, target)
607 && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
608}
609
610static constraint_t relaxTarget(constraint_t target) {
611 target.cMem = (U32)-1;
612 target.cSpeed = (target.cSpeed * g_strictness) / 100;
613 target.dSpeed = (target.dSpeed * g_strictness) / 100;
614 return target;
615}
616
617static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize)
618{
619 varInds_t v;
620 for(v = 0; v < NUM_PARAMS; v++) {
621 if(pc->vals[v] != PARAM_UNSET) {
622 U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]);
623 if(newval != pc->vals[v]) {
624 pc->vals[v] = newval;
625 DISPLAY("Warning: parameter %s not in valid range, adjusting to ",
626 g_paramNames[v]);
627 displayParamVal(stderr, v, newval, 0); DISPLAY("\n");
628 }
629 }
630 }
631
632 if(pc->vals[wlog_ind] != PARAM_UNSET) {
633
634 U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1;
635 /* edge case of highBit not working for 0 */
636
637 if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) {
638 U32 adjust = MAX(mintable[wlog_ind], sshb);
639 if(adjust != pc->vals[wlog_ind]) {
640 pc->vals[wlog_ind] = adjust;
641 DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n",
642 (unsigned)pc->vals[wlog_ind]);
643 }
644 }
645 }
646
647 if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
648 U32 maxclog;
649 if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) {
650 maxclog = pc->vals[wlog_ind] + 1;
651 } else {
652 maxclog = pc->vals[wlog_ind];
653 }
654
655 if(pc->vals[clog_ind] > maxclog) {
656 pc->vals[clog_ind] = maxclog;
657 DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n",
658 (unsigned)pc->vals[clog_ind]);
659 }
660 }
661
662 if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) {
663 if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) {
664 pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1;
665 DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n",
666 (unsigned)pc->vals[hlog_ind]);
667 }
668 }
669
670 if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
671 if(pc->vals[slog_ind] > pc->vals[clog_ind]) {
672 pc->vals[clog_ind] = pc->vals[slog_ind];
673 DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n",
674 (unsigned)pc->vals[slog_ind]);
675 }
676 }
677}
678
679static int
680redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize)
681{
682 return
683 (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */
684 || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */
685 || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/
686 || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */
687 || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */
688}
689
690
691/*-************************************
692* Display Functions
693**************************************/
694
695/* BMK_paramValues_into_commandLine() :
696 * transform a set of parameters paramValues_t
697 * into a command line compatible with `zstd` syntax
698 * and writes it into FILE* f.
699 * f must be already opened and writable */
700static void
701BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params)
702{
703 varInds_t v;
704 int first = 1;
705 fprintf(f,"--zstd=");
706 for (v = 0; v < NUM_PARAMS; v++) {
707 if (g_silenceParams[v]) { continue; }
708 if (!first) { fprintf(f, ","); }
709 fprintf(f,"%s=", g_paramNames[v]);
710
711 if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); }
712 else { displayParamVal(f, v, params.vals[v], 0); }
713 first = 0;
714 }
715 fprintf(f, "\n");
716}
717
718
719/* comparison function: */
720/* strictly better, strictly worse, equal, speed-side adv, size-side adv */
721#define WORSE_RESULT 0
722#define BETTER_RESULT 1
723#define ERROR_RESULT 2
724
725#define SPEED_RESULT 4
726#define SIZE_RESULT 5
727/* maybe have epsilon-eq to limit table size? */
728static int
729speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2)
730{
731 if(r1.cSpeed < r2.cSpeed) {
732 if(r1.cSize >= r2.cSize) {
733 return BETTER_RESULT;
734 }
735 return SPEED_RESULT; /* r2 is smaller but not faster. */
736 } else {
737 if(r1.cSize <= r2.cSize) {
738 return WORSE_RESULT;
739 }
740 return SIZE_RESULT; /* r2 is faster but not smaller */
741 }
742}
743
744/* 0 for insertion, 1 for no insert */
745/* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */
746static int
747insertWinner(const winnerInfo_t w, const constraint_t targetConstraints)
748{
749 BMK_benchResult_t r = w.result;
750 winner_ll_node* cur_node = g_winners;
751 /* first node to insert */
752 if(!feasible(r, targetConstraints)) {
753 return 1;
754 }
755
756 if(g_winners == NULL) {
757 winner_ll_node* first_node = malloc(sizeof(winner_ll_node));
758 if(first_node == NULL) {
759 return 1;
760 }
761 first_node->next = NULL;
762 first_node->res = w;
763 g_winners = first_node;
764 return 0;
765 }
766
767 while(cur_node->next != NULL) {
768 switch(speedSizeCompare(cur_node->res.result, r)) {
769 case WORSE_RESULT:
770 {
771 return 1; /* never insert if better */
772 }
773 case BETTER_RESULT:
774 {
775 winner_ll_node* tmp;
776 cur_node->res = cur_node->next->res;
777 tmp = cur_node->next;
778 cur_node->next = cur_node->next->next;
779 free(tmp);
780 break;
781 }
782 case SIZE_RESULT:
783 {
784 cur_node = cur_node->next;
785 break;
786 }
787 case SPEED_RESULT: /* insert after first size result, then return */
788 {
789 winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
790 if(newnode == NULL) {
791 return 1;
792 }
793 newnode->res = cur_node->res;
794 cur_node->res = w;
795 newnode->next = cur_node->next;
796 cur_node->next = newnode;
797 return 0;
798 }
799 }
800
801 }
802
803 assert(cur_node->next == NULL);
804 switch(speedSizeCompare(cur_node->res.result, r)) {
805 case WORSE_RESULT:
806 {
807 return 1; /* never insert if better */
808 }
809 case BETTER_RESULT:
810 {
811 cur_node->res = w;
812 return 0;
813 }
814 case SIZE_RESULT:
815 {
816 winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
817 if(newnode == NULL) {
818 return 1;
819 }
820 newnode->res = w;
821 newnode->next = NULL;
822 cur_node->next = newnode;
823 return 0;
824 }
825 case SPEED_RESULT: /* insert before first size result, then return */
826 {
827 winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
828 if(newnode == NULL) {
829 return 1;
830 }
831 newnode->res = cur_node->res;
832 cur_node->res = w;
833 newnode->next = cur_node->next;
834 cur_node->next = newnode;
835 return 0;
836 }
837 default:
838 return 1;
839 }
840}
841
842static void
843BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize)
844{
845 varInds_t v;
846 int first = 1;
847 res.params = cParamUnsetMin(res.params);
848 fprintf(f, " {");
849 for (v = 0; v < NUM_PARAMS; v++) {
850 if (g_silenceParams[v]) { continue; }
851 if (!first) { fprintf(f, ","); }
852 displayParamVal(f, v, res.params.vals[v], 3);
853 first = 0;
854 }
855
856 { double const ratio = res.result.cSize ?
857 (double)srcSize / (double)res.result.cSize : 0;
858 double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT;
859 double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT;
860
861 fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
862 ratio, cSpeedMBps, dSpeedMBps);
863 }
864}
865
866/* Writes to f the results of a parameter benchmark */
867/* when used with --optimize, will only print results better than previously discovered */
868static void
869BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize)
870{
871 char lvlstr[15] = "Custom Level";
872 winnerInfo_t w;
873 w.params = params;
874 w.result = result;
875
876 fprintf(f, "\r%79s\r", "");
877
878 if(cLevel != CUSTOM_LEVEL) {
879 snprintf(lvlstr, 15, " Level %2d ", cLevel);
880 }
881
882 if(TIMED) {
883 const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC;
884 const U64 time_ns = UTIL_clockSpanNano(g_time);
885 const U64 minutes = time_ns / mn_in_ns;
886 fprintf(f, "%1lu:%2lu:%05.2f - ",
887 (unsigned long) minutes / 60,
888 (unsigned long) minutes % 60,
889 (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC );
890 }
891
892 fprintf(f, "/* %s */ ", lvlstr);
893 BMK_displayOneResult(f, w, srcSize);
894}
895
896static void
897BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize)
898{
899 /* global winner used for constraints */
900 /* cSize, cSpeed, dSpeed, cMem */
901 static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL },
902 { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } }
903 };
904 if ( DEBUG
905 || compareResultLT(g_winner.result, result, targetConstraints, srcSize)
906 || g_displayLevel >= 4) {
907 if ( DEBUG
908 && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
909 DISPLAY("New Winner: \n");
910 }
911
912 if(g_displayLevel >= 2) {
913 BMK_printWinner(f, cLevel, result, params, srcSize);
914 }
915
916 if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
917 if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); }
918 g_winner.result = result;
919 g_winner.params = params;
920 }
921 }
922
923 if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) {
924 winnerInfo_t w;
925 winner_ll_node* n;
926 w.result = result;
927 w.params = params;
928 insertWinner(w, targetConstraints);
929
930 if(!DEBUG) { fprintf(f, "\033c"); }
931 fprintf(f, "\n");
932
933 /* the table */
934 fprintf(f, "================================\n");
935 for(n = g_winners; n != NULL; n = n->next) {
936 BMK_displayOneResult(f, n->res, srcSize);
937 }
938 fprintf(f, "================================\n");
939 fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
940 (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
941
942
943 fprintf(f, "Overall Winner: \n");
944 BMK_displayOneResult(f, g_winner, srcSize);
945 BMK_paramValues_into_commandLine(f, g_winner.params);
946
947 fprintf(f, "Latest BMK: \n");\
948 BMK_displayOneResult(f, w, srcSize);
949 }
950}
951
952
953/* BMK_print_cLevelEntry() :
954 * Writes one cLevelTable entry, for one level.
955 * f must exist, be already opened, and be seekable.
956 * this function cannot error.
957 */
958static void
959BMK_print_cLevelEntry(FILE* f, const int cLevel,
960 paramValues_t params,
961 const BMK_benchResult_t result, const size_t srcSize)
962{
963 varInds_t v;
964 int first = 1;
965
966 assert(cLevel >= 0);
967 assert(cLevel <= NB_LEVELS_TRACKED);
968 params = cParamUnsetMin(params);
969
970 fprintf(f, " {");
971 /* print cParams.
972 * assumption : all cParams are present and in order in the following range */
973 for (v = 0; v <= strt_ind; v++) {
974 if (!first) { fprintf(f, ","); }
975 displayParamVal(f, v, params.vals[v], 3);
976 first = 0;
977 }
978 /* print comment */
979 { double const ratio = result.cSize ?
980 (double)srcSize / (double)result.cSize : 0;
981 double const cSpeedMBps = (double)result.cSpeed / MB_UNIT;
982 double const dSpeedMBps = (double)result.dSpeed / MB_UNIT;
983
984 fprintf(f, " }, /* level %2i: R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
985 cLevel, ratio, cSpeedMBps, dSpeedMBps);
986 }
987}
988
989
990/* BMK_print_cLevelTable() :
991 * print candidate compression table into proposed FILE* f.
992 * f must exist, be already opened, and be seekable.
993 * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
994 * this function cannot error.
995 */
996static void
997BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize)
998{
999 int cLevel;
1000
1001 fprintf(f, "\n /* Proposed configurations : */ \n");
1002 fprintf(f, " /* W, C, H, S, L, T, strat */ \n");
1003
1004 for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++)
1005 BMK_print_cLevelEntry(f,
1006 cLevel, winners[cLevel].params,
1007 winners[cLevel].result, srcSize);
1008}
1009
1010
1011/* BMK_saveAndPrint_cLevelTable() :
1012 * save candidate compression table into FILE* f,
1013 * and then to stdout.
1014 * f must exist, be already opened, and be seekable.
1015 * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
1016 * this function cannot error.
1017 */
1018static void
1019BMK_saveAndPrint_cLevelTable(FILE* const f,
1020 const winnerInfo_t* winners,
1021 const size_t srcSize)
1022{
1023 fseek(f, 0, SEEK_SET);
1024 BMK_print_cLevelTable(f, winners, srcSize);
1025 fflush(f);
1026 BMK_print_cLevelTable(stdout, winners, srcSize);
1027}
1028
1029
1030/*-*******************************************************
1031* Functions to Benchmark
1032*********************************************************/
1033
1034typedef struct {
1035 ZSTD_CCtx* cctx;
1036 const void* dictBuffer;
1037 size_t dictBufferSize;
1038 int cLevel;
1039 const paramValues_t* comprParams;
1040} BMK_initCCtxArgs;
1041
1042static size_t local_initCCtx(void* payload) {
1043 const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload;
1044 varInds_t i;
1045 ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters);
1046 ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel);
1047
1048 for(i = 0; i < NUM_PARAMS; i++) {
1049 if(ag->comprParams->vals[i] != PARAM_UNSET)
1050 ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]);
1051 }
1052 ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize);
1053
1054 return 0;
1055}
1056
1057typedef struct {
1058 ZSTD_DCtx* dctx;
1059 const void* dictBuffer;
1060 size_t dictBufferSize;
1061} BMK_initDCtxArgs;
1062
1063static size_t local_initDCtx(void* payload) {
1064 const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload;
1065 ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters);
1066 ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
1067 return 0;
1068}
1069
1070/* additional argument is just the context */
1071static size_t local_defaultCompress(
1072 const void* srcBuffer, size_t srcSize,
1073 void* dstBuffer, size_t dstSize,
1074 void* addArgs)
1075{
1076 ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs;
1077 assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */
1078 return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
1079}
1080
1081/* additional argument is just the context */
1082static size_t local_defaultDecompress(
1083 const void* srcBuffer, size_t srcSize,
1084 void* dstBuffer, size_t dstSize,
1085 void* addArgs) {
1086 size_t moreToFlush = 1;
1087 ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs;
1088 ZSTD_inBuffer in;
1089 ZSTD_outBuffer out;
1090 in.src = srcBuffer;
1091 in.size = srcSize;
1092 in.pos = 0;
1093 out.dst = dstBuffer;
1094 out.size = dstSize;
1095 out.pos = 0;
1096 while (moreToFlush) {
1097 if(out.pos == out.size) {
1098 return (size_t)-ZSTD_error_dstSize_tooSmall;
1099 }
1100 moreToFlush = ZSTD_decompressStream(dctx,
1101 &out, &in);
1102 if (ZSTD_isError(moreToFlush)) {
1103 return moreToFlush;
1104 }
1105 }
1106 return out.pos;
1107
1108}
1109
1110/*-************************************
1111* Data Initialization Functions
1112**************************************/
1113
1114typedef struct {
1115 void* srcBuffer;
1116 size_t srcSize;
1117 const void** srcPtrs;
1118 size_t* srcSizes;
1119 void** dstPtrs;
1120 size_t* dstCapacities;
1121 size_t* dstSizes;
1122 void** resPtrs;
1123 size_t* resSizes;
1124 size_t nbBlocks;
1125 size_t maxBlockSize;
1126} buffers_t;
1127
1128typedef struct {
1129 size_t dictSize;
1130 void* dictBuffer;
1131 ZSTD_CCtx* cctx;
1132 ZSTD_DCtx* dctx;
1133} contexts_t;
1134
1135static void freeNonSrcBuffers(const buffers_t b) {
1136 free((void*)b.srcPtrs);
1137 free(b.srcSizes);
1138
1139 if(b.dstPtrs != NULL) {
1140 free(b.dstPtrs[0]);
1141 }
1142 free(b.dstPtrs);
1143 free(b.dstCapacities);
1144 free(b.dstSizes);
1145
1146 if(b.resPtrs != NULL) {
1147 free(b.resPtrs[0]);
1148 }
1149 free(b.resPtrs);
1150 free(b.resSizes);
1151}
1152
1153static void freeBuffers(const buffers_t b) {
1154 if(b.srcPtrs != NULL) {
1155 free(b.srcBuffer);
1156 }
1157 freeNonSrcBuffers(b);
1158}
1159
1160/* srcBuffer will be freed by freeBuffers now */
1161static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles,
1162 const size_t* fileSizes)
1163{
1164 size_t pos = 0, n, blockSize;
1165 U32 maxNbBlocks, blockNb = 0;
1166 buff->srcSize = 0;
1167 for(n = 0; n < nbFiles; n++) {
1168 buff->srcSize += fileSizes[n];
1169 }
1170
1171 if(buff->srcSize == 0) {
1172 DISPLAY("No data to bench\n");
1173 return 1;
1174 }
1175
1176 blockSize = g_blockSize ? g_blockSize : buff->srcSize;
1177 maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles;
1178
1179 buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*));
1180 buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1181
1182 buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1183 buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1184 buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1185
1186 buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1187 buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1188
1189 if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) {
1190 DISPLAY("alloc error\n");
1191 freeNonSrcBuffers(*buff);
1192 return 1;
1193 }
1194
1195 buff->srcBuffer = srcBuffer;
1196 buff->srcPtrs[0] = (const void*)buff->srcBuffer;
1197 buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024));
1198 buff->resPtrs[0] = malloc(buff->srcSize);
1199
1200 if(!buff->dstPtrs[0] || !buff->resPtrs[0]) {
1201 DISPLAY("alloc error\n");
1202 freeNonSrcBuffers(*buff);
1203 return 1;
1204 }
1205
1206 for(n = 0; n < nbFiles; n++) {
1207 size_t pos_end = pos + fileSizes[n];
1208 for(; pos < pos_end; blockNb++) {
1209 buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos);
1210 buff->srcSizes[blockNb] = blockSize;
1211 pos += blockSize;
1212 }
1213
1214 if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; }
1215 pos = pos_end;
1216 }
1217
1218 buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]);
1219 buff->dstSizes[0] = buff->dstCapacities[0];
1220 buff->resSizes[0] = buff->srcSizes[0];
1221 buff->maxBlockSize = buff->srcSizes[0];
1222
1223 for(n = 1; n < blockNb; n++) {
1224 buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1];
1225 buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1];
1226 buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]);
1227 buff->dstSizes[n] = buff->dstCapacities[n];
1228 buff->resSizes[n] = buff->srcSizes[n];
1229
1230 buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]);
1231 }
1232
1233 buff->nbBlocks = blockNb;
1234
1235 return 0;
1236}
1237
1238/* allocates buffer's arguments. returns success / failure */
1239static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable,
1240 size_t nbFiles) {
1241 size_t pos = 0;
1242 size_t n;
1243 size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles);
1244 size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad);
1245 size_t* fileSizes = calloc(sizeof(size_t), nbFiles);
1246 void* srcBuffer = NULL;
1247 int ret = 0;
1248
1249 if(!totalSizeToLoad || !benchedSize) {
1250 ret = 1;
1251 DISPLAY("Nothing to Bench\n");
1252 goto _cleanUp;
1253 }
1254
1255 srcBuffer = malloc(benchedSize);
1256
1257 if(!fileSizes || !srcBuffer) {
1258 ret = 1;
1259 goto _cleanUp;
1260 }
1261
1262 for(n = 0; n < nbFiles; n++) {
1263 FILE* f;
1264 U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
1265 if (UTIL_isDirectory(fileNamesTable[n])) {
1266 DISPLAY("Ignoring %s directory... \n", fileNamesTable[n]);
1267 continue;
1268 }
1269 if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1270 DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
1271 continue;
1272 }
1273 f = fopen(fileNamesTable[n], "rb");
1274 if (f==NULL) {
1275 DISPLAY("impossible to open file %s\n", fileNamesTable[n]);
1276 fclose(f);
1277 ret = 10;
1278 goto _cleanUp;
1279 }
1280
1281 DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
1282
1283 if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n; /* buffer too small - stop after this file */
1284 {
1285 char* buffer = (char*)(srcBuffer);
1286 size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f);
1287 fclose(f);
1288 if (readSize != (size_t)fileSize) {
1289 DISPLAY("could not read %s", fileNamesTable[n]);
1290 ret = 1;
1291 goto _cleanUp;
1292 }
1293
1294 fileSizes[n] = readSize;
1295 pos += readSize;
1296 }
1297 }
1298
1299 ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes);
1300
1301_cleanUp:
1302 if(ret) { free(srcBuffer); }
1303 free(fileSizes);
1304 return ret;
1305}
1306
1307static void freeContexts(const contexts_t ctx) {
1308 free(ctx.dictBuffer);
1309 ZSTD_freeCCtx(ctx.cctx);
1310 ZSTD_freeDCtx(ctx.dctx);
1311}
1312
1313static int createContexts(contexts_t* ctx, const char* dictFileName) {
1314 FILE* f;
1315 size_t readSize;
1316 ctx->cctx = ZSTD_createCCtx();
1317 ctx->dctx = ZSTD_createDCtx();
1318 assert(ctx->cctx != NULL);
1319 assert(ctx->dctx != NULL);
1320
1321 if(dictFileName == NULL) {
1322 ctx->dictSize = 0;
1323 ctx->dictBuffer = NULL;
1324 return 0;
1325 }
1326 { U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1327 assert(dictFileSize != UTIL_FILESIZE_UNKNOWN);
1328 ctx->dictSize = (size_t)dictFileSize;
1329 assert((U64)ctx->dictSize == dictFileSize); /* check overflow */
1330 }
1331 ctx->dictBuffer = malloc(ctx->dictSize);
1332
1333 f = fopen(dictFileName, "rb");
1334
1335 if (f==NULL) {
1336 DISPLAY("unable to open file\n");
1337 freeContexts(*ctx);
1338 return 1;
1339 }
1340
1341 if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) {
1342 DISPLAY("dictionary too large\n");
1343 fclose(f);
1344 freeContexts(*ctx);
1345 return 1;
1346 }
1347 readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f);
1348 fclose(f);
1349 if (readSize != ctx->dictSize) {
1350 DISPLAY("unable to read file\n");
1351 freeContexts(*ctx);
1352 return 1;
1353 }
1354 return 0;
1355}
1356
1357/*-************************************
1358* Optimizer Memoization Functions
1359**************************************/
1360
1361/* return: new length */
1362/* keep old array, will need if iter over strategy. */
1363/* prunes useless params */
1364static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) {
1365 size_t i, j = 0;
1366 for(i = 0; i < varLength; i++) {
1367 if( !((varArray[i] == clog_ind && strat == ZSTD_fast)
1368 || (varArray[i] == slog_ind && strat == ZSTD_fast)
1369 || (varArray[i] == slog_ind && strat == ZSTD_dfast)
1370 || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) {
1371 varNew[j] = varArray[i];
1372 j++;
1373 }
1374 }
1375 return j;
1376}
1377
1378/* res should be NUM_PARAMS size */
1379/* constructs varArray from paramValues_t style parameter */
1380/* pass in using dict. */
1381static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) {
1382 varInds_t i;
1383 size_t j = 0;
1384 for(i = 0; i < NUM_PARAMS; i++) {
1385 if(paramConstraints.vals[i] == PARAM_UNSET) {
1386 if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */
1387 res[j] = i; j++;
1388 }
1389 }
1390 return j;
1391}
1392
1393/* length of memo table given free variables */
1394static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) {
1395 size_t arrayLen = 1;
1396 size_t i;
1397 for(i = 0; i < varyLen; i++) {
1398 if(varyParams[i] == strt_ind) continue; /* strategy separated by table */
1399 arrayLen *= rangetable[varyParams[i]];
1400 }
1401 return arrayLen;
1402}
1403
1404/* returns unique index in memotable of compression parameters */
1405static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) {
1406 size_t i;
1407 unsigned ind = 0;
1408 for(i = 0; i < varyLen; i++) {
1409 varInds_t v = varyParams[i];
1410 if(v == strt_ind) continue; /* exclude strategy from memotable */
1411 ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]);
1412 }
1413 return ind;
1414}
1415
1416static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) {
1417 const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1418 switch(mt.tableType) {
1419 case directMap:
1420 return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)];
1421 case xxhashMap:
1422 return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen];
1423 case noMemo:
1424 return 0;
1425 }
1426 return 0; /* should never happen, stop compiler warnings */
1427}
1428
1429static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) {
1430 const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1431 switch(mt.tableType) {
1432 case directMap:
1433 mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break;
1434 case xxhashMap:
1435 mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break;
1436 case noMemo:
1437 break;
1438 }
1439}
1440
1441/* frees all allocated memotables */
1442/* secret contract :
1443 * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */
1444static void freeMemoTableArray(memoTable_t* const mtAll) {
1445 int i;
1446 if(mtAll == NULL) { return; }
1447 for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1448 free(mtAll[i].table);
1449 }
1450 free(mtAll);
1451}
1452
1453/* inits memotables for all (including mallocs), all strategies */
1454/* takes unsanitized varyParams */
1455static memoTable_t*
1456createMemoTableArray(const paramValues_t p,
1457 const varInds_t* const varyParams,
1458 const size_t varyLen,
1459 const U32 memoTableLog)
1460{
1461 memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1));
1462 ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX;
1463
1464 if(mtAll == NULL) {
1465 return NULL;
1466 }
1467
1468 for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1469 mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i);
1470 }
1471
1472 /* no memoization */
1473 if(memoTableLog == 0) {
1474 for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1475 mtAll[i].tableType = noMemo;
1476 mtAll[i].table = NULL;
1477 mtAll[i].tableLen = 0;
1478 }
1479 return mtAll;
1480 }
1481
1482
1483 if(p.vals[strt_ind] != PARAM_UNSET) {
1484 stratMin = p.vals[strt_ind];
1485 stratMax = p.vals[strt_ind];
1486 }
1487
1488
1489 for(i = stratMin; i <= stratMax; i++) {
1490 size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen);
1491 mtAll[i].tableType = directMap;
1492
1493 if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */
1494 mtAll[i].tableType = xxhashMap;
1495 mtl = ((size_t)1 << memoTableLog);
1496 }
1497
1498 mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl);
1499 mtAll[i].tableLen = mtl;
1500
1501 if(mtAll[i].table == NULL) {
1502 freeMemoTableArray(mtAll);
1503 return NULL;
1504 }
1505 }
1506
1507 return mtAll;
1508}
1509
1510/* Sets pc to random unmeasured set of parameters */
1511/* specify strategy */
1512static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st)
1513{
1514 size_t j;
1515 const memoTable_t mt = memoTableArray[st];
1516 pc->vals[strt_ind] = st;
1517 for(j = 0; j < mt.tableLen; j++) {
1518 int i;
1519 for(i = 0; i < NUM_PARAMS; i++) {
1520 varInds_t v = mt.varArray[i];
1521 if(v == strt_ind) continue;
1522 pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]);
1523 }
1524
1525 if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */
1526 }
1527}
1528
1529/*-************************************
1530* Benchmarking Functions
1531**************************************/
1532
1533static void display_params_tested(paramValues_t cParams)
1534{
1535 varInds_t vi;
1536 DISPLAYLEVEL(3, "\r testing :");
1537 for (vi=0; vi < NUM_PARAMS; vi++) {
1538 DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]);
1539 }
1540 DISPLAYLEVEL(3, "\b \r");
1541}
1542
1543/* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */
1544/* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */
1545/* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */
1546/* nbSeconds used in same way as in BMK_advancedParams_t */
1547/* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */
1548/* dictionary nullable, nothing else though. */
1549/* note : it would be a lot better if this function was present in benchzstd.c,
1550 * sharing code with benchMemAdvanced(), since it's technically a part of it */
1551static BMK_benchOutcome_t
1552BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
1553 int cLevel, const paramValues_t* comprParams,
1554 BMK_mode_t mode, unsigned nbSeconds)
1555{
1556 U32 i;
1557 BMK_benchResult_t bResult;
1558 const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs;
1559 size_t const *const srcSizes = buf.srcSizes;
1560 void** const dstPtrs = buf.dstPtrs;
1561 size_t const *const dstCapacities = buf.dstCapacities;
1562 size_t* const dstSizes = buf.dstSizes;
1563 void** const resPtrs = buf.resPtrs;
1564 size_t const *const resSizes = buf.resSizes;
1565 const void* dictBuffer = ctx.dictBuffer;
1566 const size_t dictBufferSize = ctx.dictSize;
1567 const size_t nbBlocks = buf.nbBlocks;
1568 const size_t srcSize = buf.srcSize;
1569 ZSTD_CCtx* cctx = ctx.cctx;
1570 ZSTD_DCtx* dctx = ctx.dctx;
1571
1572 /* init */
1573 display_params_tested(*comprParams);
1574 memset(&bResult, 0, sizeof(bResult));
1575
1576 /* warming up memory */
1577 for (i = 0; i < buf.nbBlocks; i++) {
1578 if (mode != BMK_decodeOnly) {
1579 RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1);
1580 } else {
1581 RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1);
1582 }
1583 }
1584
1585 /* Bench */
1586 {
1587 /* init args */
1588 int compressionCompleted = (mode == BMK_decodeOnly);
1589 int decompressionCompleted = (mode == BMK_compressOnly);
1590 BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1591 BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1592 BMK_benchParams_t cbp, dbp;
1593 BMK_initCCtxArgs cctxprep;
1594 BMK_initDCtxArgs dctxprep;
1595
1596 cbp.benchFn = local_defaultCompress;
1597 cbp.benchPayload = cctx;
1598 cbp.initFn = local_initCCtx;
1599 cbp.initPayload = &cctxprep;
1600 cbp.errorFn = ZSTD_isError;
1601 cbp.blockCount = nbBlocks;
1602 cbp.srcBuffers = srcPtrs;
1603 cbp.srcSizes = srcSizes;
1604 cbp.dstBuffers = dstPtrs;
1605 cbp.dstCapacities = dstCapacities;
1606 cbp.blockResults = dstSizes;
1607
1608 cctxprep.cctx = cctx;
1609 cctxprep.dictBuffer = dictBuffer;
1610 cctxprep.dictBufferSize = dictBufferSize;
1611 cctxprep.cLevel = cLevel;
1612 cctxprep.comprParams = comprParams;
1613
1614 dbp.benchFn = local_defaultDecompress;
1615 dbp.benchPayload = dctx;
1616 dbp.initFn = local_initDCtx;
1617 dbp.initPayload = &dctxprep;
1618 dbp.errorFn = ZSTD_isError;
1619 dbp.blockCount = nbBlocks;
1620 dbp.srcBuffers = (const void* const *) dstPtrs;
1621 dbp.srcSizes = dstCapacities;
1622 dbp.dstBuffers = resPtrs;
1623 dbp.dstCapacities = resSizes;
1624 dbp.blockResults = NULL;
1625
1626 dctxprep.dctx = dctx;
1627 dctxprep.dictBuffer = dictBuffer;
1628 dctxprep.dictBufferSize = dictBufferSize;
1629
1630 assert(timeStateCompress != NULL);
1631 assert(timeStateDecompress != NULL);
1632 while(!compressionCompleted) {
1633 BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp);
1634
1635 if (!BMK_isSuccessful_runOutcome(cOutcome)) {
1636 BMK_benchOutcome_t bOut;
1637 memset(&bOut, 0, sizeof(bOut));
1638 bOut.tag = 1; /* should rather be a function or a constant */
1639 BMK_freeTimedFnState(timeStateCompress);
1640 BMK_freeTimedFnState(timeStateDecompress);
1641 return bOut;
1642 }
1643 { BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome);
1644 bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1645 bResult.cSize = rResult.sumOfReturn;
1646 }
1647 compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
1648 }
1649
1650 while (!decompressionCompleted) {
1651 BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
1652
1653 if (!BMK_isSuccessful_runOutcome(dOutcome)) {
1654 BMK_benchOutcome_t bOut;
1655 memset(&bOut, 0, sizeof(bOut));
1656 bOut.tag = 1; /* should rather be a function or a constant */
1657 BMK_freeTimedFnState(timeStateCompress);
1658 BMK_freeTimedFnState(timeStateDecompress);
1659 return bOut;
1660 }
1661 { BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome);
1662 bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1663 }
1664 decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
1665 }
1666
1667 BMK_freeTimedFnState(timeStateCompress);
1668 BMK_freeTimedFnState(timeStateDecompress);
1669 }
1670
1671 /* Bench */
1672 bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx);
1673
1674 { BMK_benchOutcome_t bOut;
1675 bOut.tag = 0;
1676 bOut.internal_never_use_directly = bResult; /* should be a function */
1677 return bOut;
1678 }
1679}
1680
1681/* BMK_benchParam() :
1682 * benchmark a set of `cParams` over sample `buf`,
1683 * store the result in `resultPtr`.
1684 * @return : 0 if success, 1 if error */
1685static int BMK_benchParam ( BMK_benchResult_t* resultPtr,
1686 buffers_t buf, contexts_t ctx,
1687 paramValues_t cParams)
1688{
1689 BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx,
1690 BASE_CLEVEL, &cParams,
1691 BMK_both, 3);
1692 if (!BMK_isSuccessful_benchOutcome(outcome)) return 1;
1693 *resultPtr = BMK_extract_benchResult(outcome);
1694 return 0;
1695}
1696
1697
1698/* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */
1699#define VARIANCE 1.2
1700static int allBench(BMK_benchResult_t* resultPtr,
1701 const buffers_t buf, const contexts_t ctx,
1702 const paramValues_t cParams,
1703 const constraint_t target,
1704 BMK_benchResult_t* winnerResult, int feas)
1705{
1706 BMK_benchResult_t benchres;
1707 double uncertaintyConstantC = 3., uncertaintyConstantD = 3.;
1708 double winnerRS;
1709
1710 BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2);
1711 if (!BMK_isSuccessful_benchOutcome(outcome)) {
1712 DEBUGOUTPUT("Benchmarking failed \n");
1713 return ERROR_RESULT;
1714 }
1715 benchres = BMK_extract_benchResult(outcome);
1716
1717 winnerRS = resultScore(*winnerResult, buf.srcSize, target);
1718 DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS);
1719
1720 *resultPtr = benchres;
1721
1722 /* anything with worse ratio in feas is definitely worse, discard */
1723 if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) {
1724 return WORSE_RESULT;
1725 }
1726
1727 /* calculate uncertainty in compression / decompression runs */
1728 if (benchres.cSpeed) {
1729 double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
1730 uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC);
1731 }
1732
1733 if (benchres.dSpeed) {
1734 double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
1735 uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD);
1736 }
1737
1738 /* optimistic assumption of benchres */
1739 { BMK_benchResult_t resultMax = benchres;
1740 resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
1741 resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
1742
1743 /* disregard infeasible results in feas mode */
1744 /* disregard if resultMax < winner in infeas mode */
1745 if((feas && !feasible(resultMax, target)) ||
1746 (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) {
1747 return WORSE_RESULT;
1748 }
1749 }
1750
1751 /* compare by resultScore when in infeas */
1752 /* compare by compareResultLT when in feas */
1753 if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) ||
1754 (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) ) {
1755 return BETTER_RESULT;
1756 } else {
1757 return WORSE_RESULT;
1758 }
1759}
1760
1761
1762#define INFEASIBLE_THRESHOLD 200
1763/* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */
1764static int benchMemo(BMK_benchResult_t* resultPtr,
1765 const buffers_t buf, const contexts_t ctx,
1766 const paramValues_t cParams,
1767 const constraint_t target,
1768 BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray,
1769 const int feas) {
1770 static int bmcount = 0;
1771 int res;
1772
1773 if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD
1774 || redundantParams(cParams, target, buf.maxBlockSize) ) {
1775 return WORSE_RESULT;
1776 }
1777
1778 res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas);
1779
1780 if(DEBUG && !(bmcount % 250)) {
1781 DISPLAY("Count: %d\n", bmcount);
1782 bmcount++;
1783 }
1784 BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize);
1785
1786 if(res == BETTER_RESULT || feas) {
1787 memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */
1788 }
1789 return res;
1790}
1791
1792
1793typedef struct {
1794 U64 cSpeed_min;
1795 U64 dSpeed_min;
1796 U32 windowLog_max;
1797 ZSTD_strategy strategy_max;
1798} level_constraints_t;
1799
1800static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1];
1801
1802static void BMK_init_level_constraints(int bytePerSec_level1)
1803{
1804 assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel());
1805 memset(g_level_constraint, 0, sizeof(g_level_constraint));
1806 g_level_constraint[1].cSpeed_min = bytePerSec_level1;
1807 g_level_constraint[1].dSpeed_min = 0;
1808 g_level_constraint[1].windowLog_max = 19;
1809 g_level_constraint[1].strategy_max = ZSTD_fast;
1810
1811 /* establish speed objectives (relative to level 1) */
1812 { int l;
1813 for (l=2; l<=NB_LEVELS_TRACKED; l++) {
1814 g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64;
1815 g_level_constraint[l].dSpeed_min = 0;
1816 g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */
1817 g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX;
1818 } }
1819}
1820
1821static int BMK_seed(winnerInfo_t* winners,
1822 const paramValues_t params,
1823 const buffers_t buf,
1824 const contexts_t ctx)
1825{
1826 BMK_benchResult_t testResult;
1827 int better = 0;
1828 int cLevel;
1829
1830 BMK_benchParam(&testResult, buf, ctx, params);
1831
1832 for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) {
1833
1834 if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min)
1835 continue; /* not fast enough for this level */
1836 if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min)
1837 continue; /* not fast enough for this level */
1838 if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max)
1839 continue; /* too much memory for this level */
1840 if (params.vals[strt_ind] > (U32)g_level_constraint[cLevel].strategy_max)
1841 continue; /* forbidden strategy for this level */
1842 if (winners[cLevel].result.cSize==0) {
1843 /* first solution for this cLevel */
1844 winners[cLevel].result = testResult;
1845 winners[cLevel].params = params;
1846 BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1847 better = 1;
1848 continue;
1849 }
1850
1851 if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
1852 /* Validate solution is "good enough" */
1853 double W_ratio = (double)buf.srcSize / (double)testResult.cSize;
1854 double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize;
1855 double W_ratioNote = log (W_ratio);
1856 double O_ratioNote = log (O_ratio);
1857 size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB);
1858 size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB);
1859 double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
1860 double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
1861
1862 size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params));
1863 size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params));
1864 double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
1865 double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
1866
1867 double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed);
1868 double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed);
1869
1870 double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed);
1871 double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
1872
1873 if (W_DMemUsed_note < O_DMemUsed_note) {
1874 /* uses too much Decompression memory for too little benefit */
1875 if (W_ratio > O_ratio)
1876 DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n",
1877 W_ratio, (double)(W_DMemUsed) / 1024 / 1024,
1878 O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel);
1879 continue;
1880 }
1881 if (W_CMemUsed_note < O_CMemUsed_note) {
1882 /* uses too much memory for compression for too little benefit */
1883 if (W_ratio > O_ratio)
1884 DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n",
1885 W_ratio, (double)(W_CMemUsed) / 1024 / 1024,
1886 O_ratio, (double)(O_CMemUsed) / 1024 / 1024,
1887 cLevel);
1888 continue;
1889 }
1890 if (W_CSpeed_note < O_CSpeed_note ) {
1891 /* too large compression speed difference for the compression benefit */
1892 if (W_ratio > O_ratio)
1893 DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
1894 W_ratio, (double)testResult.cSpeed / MB_UNIT,
1895 O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT,
1896 cLevel);
1897 continue;
1898 }
1899 if (W_DSpeed_note < O_DSpeed_note ) {
1900 /* too large decompression speed difference for the compression benefit */
1901 if (W_ratio > O_ratio)
1902 DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n",
1903 W_ratio, (double)testResult.dSpeed / MB_UNIT,
1904 O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT,
1905 cLevel);
1906 continue;
1907 }
1908
1909 if (W_ratio < O_ratio)
1910 DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n",
1911 W_ratio, O_ratio, cLevel);
1912
1913 winners[cLevel].result = testResult;
1914 winners[cLevel].params = params;
1915 BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1916
1917 better = 1;
1918 } }
1919
1920 return better;
1921}
1922
1923/*-************************************
1924* Compression Level Table Generation Functions
1925**************************************/
1926
1927#define PARAMTABLELOG 25
1928#define PARAMTABLESIZE (1<<PARAMTABLELOG)
1929#define PARAMTABLEMASK (PARAMTABLESIZE-1)
1930static BYTE g_alreadyTested[PARAMTABLESIZE] = {0}; /* init to zero */
1931
1932static BYTE* NB_TESTS_PLAYED(paramValues_t p)
1933{
1934 ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p));
1935 unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0);
1936 return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK];
1937}
1938
1939static void playAround(FILE* f,
1940 winnerInfo_t* winners,
1941 paramValues_t p,
1942 const buffers_t buf, const contexts_t ctx)
1943{
1944 int nbVariations = 0;
1945 UTIL_time_t const clockStart = UTIL_getTime();
1946
1947 while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) {
1948 if (nbVariations++ > g_maxNbVariations) break;
1949
1950 do {
1951 int i;
1952 for(i = 0; i < 4; i++) {
1953 paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1),
1954 ((FUZ_rand(&g_rand) & 1) << 1) - 1,
1955 &p);
1956 }
1957 } while (!paramValid(p));
1958
1959 /* exclude faster if already played params */
1960 if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1))
1961 continue;
1962
1963 /* test */
1964 { BYTE* const b = NB_TESTS_PLAYED(p);
1965 (*b)++;
1966 }
1967 if (!BMK_seed(winners, p, buf, ctx)) continue;
1968
1969 /* improvement found => search more */
1970 BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
1971 playAround(f, winners, p, buf, ctx);
1972 }
1973
1974}
1975
1976static void
1977BMK_selectRandomStart( FILE* f,
1978 winnerInfo_t* winners,
1979 const buffers_t buf, const contexts_t ctx)
1980{
1981 U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1);
1982 if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) {
1983 /* use some random entry */
1984 paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */
1985 buf.srcSize, 0);
1986 playAround(f, winners, p, buf, ctx);
1987 } else {
1988 playAround(f, winners, winners[id].params, buf, ctx);
1989 }
1990}
1991
1992
1993/* BMK_generate_cLevelTable() :
1994 * test a large number of configurations
1995 * and distribute them across compression levels according to speed conditions.
1996 * display and save all intermediate results into rfName = "grillResults.txt".
1997 * the function automatically stops after g_timeLimit_s.
1998 * this function cannot error, it directly exit() in case of problem.
1999 */
2000static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx)
2001{
2002 paramValues_t params;
2003 winnerInfo_t winners[NB_LEVELS_TRACKED+1];
2004 const char* const rfName = "grillResults.txt";
2005 FILE* const f = fopen(rfName, "w");
2006
2007 /* init */
2008 assert(g_singleRun==0);
2009 memset(winners, 0, sizeof(winners));
2010 if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
2011
2012 if (g_target) {
2013 BMK_init_level_constraints(g_target * MB_UNIT);
2014 } else {
2015 /* baseline config for level 1 */
2016 paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize));
2017 BMK_benchResult_t testResult;
2018 BMK_benchParam(&testResult, buf, ctx, l1params);
2019 BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32));
2020 }
2021
2022 /* populate initial solution */
2023 { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2024 int i;
2025 for (i=0; i<=maxSeeds; i++) {
2026 params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0));
2027 BMK_seed(winners, params, buf, ctx);
2028 } }
2029 BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2030
2031 /* start tests */
2032 { const UTIL_time_t grillStart = UTIL_getTime();
2033 do {
2034 BMK_selectRandomStart(f, winners, buf, ctx);
2035 } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s);
2036 }
2037
2038 /* end summary */
2039 BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2040 DISPLAY("grillParams operations completed \n");
2041
2042 /* clean up*/
2043 fclose(f);
2044}
2045
2046
2047/*-************************************
2048* Single Benchmark Functions
2049**************************************/
2050
2051static int
2052benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel)
2053{
2054 BMK_benchResult_t testResult;
2055 g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2056
2057 if (BMK_benchParam(&testResult, buf, ctx, g_params)) {
2058 DISPLAY("Error during benchmarking\n");
2059 return 1;
2060 }
2061
2062 BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize);
2063
2064 return 0;
2065}
2066
2067static int benchSample(double compressibility, int cLevel)
2068{
2069 const char* const name = "Sample 10MB";
2070 size_t const benchedSize = 10 MB;
2071 void* const srcBuffer = malloc(benchedSize);
2072 int ret = 0;
2073
2074 buffers_t buf;
2075 contexts_t ctx;
2076
2077 if(srcBuffer == NULL) {
2078 DISPLAY("Out of Memory\n");
2079 return 2;
2080 }
2081
2082 RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
2083
2084 if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) {
2085 DISPLAY("Buffer Creation Error\n");
2086 free(srcBuffer);
2087 return 3;
2088 }
2089
2090 if(createContexts(&ctx, NULL)) {
2091 DISPLAY("Context Creation Error\n");
2092 freeBuffers(buf);
2093 return 1;
2094 }
2095
2096 /* bench */
2097 DISPLAY("\r%79s\r", "");
2098 DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100));
2099
2100 if(g_singleRun) {
2101 ret = benchOnce(buf, ctx, cLevel);
2102 } else {
2103 BMK_generate_cLevelTable(buf, ctx);
2104 }
2105
2106 freeBuffers(buf);
2107 freeContexts(ctx);
2108
2109 return ret;
2110}
2111
2112/* benchFiles() :
2113 * note: while this function takes a table of filenames,
2114 * in practice, only the first filename will be used */
2115static int benchFiles(const char** fileNamesTable, int nbFiles,
2116 const char* dictFileName, int cLevel)
2117{
2118 buffers_t buf;
2119 contexts_t ctx;
2120 int ret = 0;
2121
2122 if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2123 DISPLAY("unable to load files\n");
2124 return 1;
2125 }
2126
2127 if (createContexts(&ctx, dictFileName)) {
2128 DISPLAY("unable to load dictionary\n");
2129 freeBuffers(buf);
2130 return 2;
2131 }
2132
2133 DISPLAY("\r%79s\r", "");
2134 if (nbFiles == 1) {
2135 DISPLAY("using %s : \n", fileNamesTable[0]);
2136 } else {
2137 DISPLAY("using %d Files : \n", nbFiles);
2138 }
2139
2140 if (g_singleRun) {
2141 ret = benchOnce(buf, ctx, cLevel);
2142 } else {
2143 BMK_generate_cLevelTable(buf, ctx);
2144 }
2145
2146 freeBuffers(buf);
2147 freeContexts(ctx);
2148 return ret;
2149}
2150
2151
2152/*-************************************
2153* Local Optimization Functions
2154**************************************/
2155
2156/* One iteration of hill climbing. Specifically, it first tries all
2157 * valid parameter configurations w/ manhattan distance 1 and picks the best one
2158 * failing that, it progressively tries candidates further and further away (up to #dim + 2)
2159 * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the
2160 * current stage of hill climbing.
2161 * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to
2162 * the resultScore function, which is effectively a linear increase in reward until it reaches
2163 * the constraint-satisfying value, it which point any excess results in only logarithmic reward.
2164 * This aims to find some constraint-satisfying point.
2165 * Phase 2 optimizes in accordance with what the original function sets out to maximize, with
2166 * all feasible solutions valued over all infeasible solutions.
2167 */
2168
2169/* sanitize all params here.
2170 * all generation after random should be sanitized. (maybe sanitize random)
2171 */
2172static winnerInfo_t climbOnce(const constraint_t target,
2173 memoTable_t* mtAll,
2174 const buffers_t buf, const contexts_t ctx,
2175 const paramValues_t init)
2176{
2177 /*
2178 * cparam - currently considered 'center'
2179 * candidate - params to benchmark/results
2180 * winner - best option found so far.
2181 */
2182 paramValues_t cparam = init;
2183 winnerInfo_t candidateInfo, winnerInfo;
2184 int better = 1;
2185 int feas = 0;
2186
2187 winnerInfo = initWinnerInfo(init);
2188 candidateInfo = winnerInfo;
2189
2190 { winnerInfo_t bestFeasible1 = initWinnerInfo(cparam);
2191 DEBUGOUTPUT("Climb Part 1\n");
2192 while(better) {
2193 int offset;
2194 size_t i, dist;
2195 const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen;
2196 better = 0;
2197 DEBUGOUTPUT("Start\n");
2198 cparam = winnerInfo.params;
2199 candidateInfo.params = cparam;
2200 /* all dist-1 candidates */
2201 for (i = 0; i < varLen; i++) {
2202 for (offset = -1; offset <= 1; offset += 2) {
2203 CHECKTIME(winnerInfo);
2204 candidateInfo.params = cparam;
2205 paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i],
2206 offset,
2207 &candidateInfo.params);
2208
2209 if(paramValid(candidateInfo.params)) {
2210 int res;
2211 res = benchMemo(&candidateInfo.result, buf, ctx,
2212 sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas);
2213 DEBUGOUTPUT("Res: %d\n", res);
2214 if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */
2215 winnerInfo = candidateInfo;
2216 better = 1;
2217 if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2218 bestFeasible1 = winnerInfo;
2219 }
2220 }
2221 }
2222 } /* for (offset = -1; offset <= 1; offset += 2) */
2223 } /* for (i = 0; i < varLen; i++) */
2224
2225 if(better) {
2226 continue;
2227 }
2228
2229 for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */
2230 for (i = 0; i < (1ULL << varLen) / varLen + 2; i++) {
2231 int res;
2232 CHECKTIME(winnerInfo);
2233 candidateInfo.params = cparam;
2234 /* param error checking already done here */
2235 paramVariation(&candidateInfo.params, mtAll, (U32)dist);
2236
2237 res = benchMemo(&candidateInfo.result,
2238 buf, ctx,
2239 sanitizeParams(candidateInfo.params), target,
2240 &winnerInfo.result, mtAll, feas);
2241 DEBUGOUTPUT("Res: %d\n", res);
2242 if (res == BETTER_RESULT) { /* synonymous with better in this case*/
2243 winnerInfo = candidateInfo;
2244 better = 1;
2245 if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2246 bestFeasible1 = winnerInfo;
2247 }
2248 break;
2249 }
2250 }
2251
2252 if (better) {
2253 break;
2254 }
2255 } /* for(dist = 2; dist < varLen + 2; dist++) */
2256
2257 if (!better) { /* infeas -> feas -> stop */
2258 if (feas) return winnerInfo;
2259 feas = 1;
2260 better = 1;
2261 winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */
2262 DEBUGOUTPUT("Climb Part 2\n");
2263 }
2264 }
2265 winnerInfo = bestFeasible1;
2266 }
2267
2268 return winnerInfo;
2269}
2270
2271/* Optimizes for a fixed strategy */
2272
2273/* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited)
2274 weight more on visit for bad results, less on good results/more on later results / ones with more failures.
2275 allocate memoTable here.
2276 */
2277static winnerInfo_t
2278optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx,
2279 const constraint_t target, paramValues_t paramTarget,
2280 const ZSTD_strategy strat,
2281 memoTable_t* memoTableArray, const int tries)
2282{
2283 int i = 0;
2284
2285 paramValues_t init;
2286 winnerInfo_t winnerInfo, candidateInfo;
2287 winnerInfo = initWinnerInfo(emptyParams());
2288 /* so climb is given the right fixed strategy */
2289 paramTarget.vals[strt_ind] = strat;
2290 /* to pass ZSTD_checkCParams */
2291 paramTarget = cParamUnsetMin(paramTarget);
2292
2293 init = paramTarget;
2294
2295 for(i = 0; i < tries; i++) {
2296 DEBUGOUTPUT("Restart\n");
2297 do {
2298 randomConstrainedParams(&init, memoTableArray, strat);
2299 } while(redundantParams(init, target, buf.maxBlockSize));
2300 candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init);
2301 if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) {
2302 winnerInfo = candidateInfo;
2303 BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize);
2304 i = 0;
2305 continue;
2306 }
2307 CHECKTIME(winnerInfo);
2308 i++;
2309 }
2310 return winnerInfo;
2311}
2312
2313/* goes best, best-1, best+1, best-2, ... */
2314/* return 0 if nothing remaining */
2315static int nextStrategy(const int currentStrategy, const int bestStrategy)
2316{
2317 if(bestStrategy <= currentStrategy) {
2318 int candidate = 2 * bestStrategy - currentStrategy - 1;
2319 if(candidate < 1) {
2320 candidate = currentStrategy + 1;
2321 if(candidate > (int)ZSTD_STRATEGY_MAX) {
2322 return 0;
2323 } else {
2324 return candidate;
2325 }
2326 } else {
2327 return candidate;
2328 }
2329 } else { /* bestStrategy >= currentStrategy */
2330 int candidate = 2 * bestStrategy - currentStrategy;
2331 if(candidate > (int)ZSTD_STRATEGY_MAX) {
2332 candidate = currentStrategy - 1;
2333 if(candidate < 1) {
2334 return 0;
2335 } else {
2336 return candidate;
2337 }
2338 } else {
2339 return candidate;
2340 }
2341 }
2342}
2343
2344/* experiment with playing with this and decay value */
2345
2346/* main fn called when using --optimize */
2347/* Does strategy selection by benchmarking default compression levels
2348 * then optimizes by strategy, starting with the best one and moving
2349 * progressively moving further away by number
2350 * args:
2351 * fileNamesTable - list of files to benchmark
2352 * nbFiles - length of fileNamesTable
2353 * dictFileName - name of dictionary file if one, else NULL
2354 * target - performance constraints (cSpeed, dSpeed, cMem)
2355 * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast)
2356 * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio)
2357 */
2358
2359static unsigned g_maxTries = 5;
2360#define TRY_DECAY 1
2361
2362static int
2363optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles,
2364 const char* dictFileName,
2365 constraint_t target, paramValues_t paramTarget,
2366 const int cLevelOpt, const int cLevelRun,
2367 const U32 memoTableLog)
2368{
2369 varInds_t varArray [NUM_PARAMS];
2370 int ret = 0;
2371 const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL);
2372 winnerInfo_t winner = initWinnerInfo(emptyParams());
2373 memoTable_t* allMT = NULL;
2374 paramValues_t paramBase;
2375 contexts_t ctx;
2376 buffers_t buf;
2377 g_time = UTIL_getTime();
2378
2379 if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2380 DISPLAY("unable to load files\n");
2381 return 1;
2382 }
2383
2384 if (createContexts(&ctx, dictFileName)) {
2385 DISPLAY("unable to load dictionary\n");
2386 freeBuffers(buf);
2387 return 2;
2388 }
2389
2390 if (nbFiles == 1) {
2391 DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[0]);
2392 } else {
2393 DISPLAYLEVEL(2, "Loading %lu Files... \r", (unsigned long)nbFiles);
2394 }
2395
2396 /* sanitize paramTarget */
2397 optimizerAdjustInput(&paramTarget, buf.maxBlockSize);
2398 paramBase = cParamUnsetMin(paramTarget);
2399
2400 allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog);
2401
2402 if (!allMT) {
2403 DISPLAY("MemoTable Init Error\n");
2404 ret = 2;
2405 goto _cleanUp;
2406 }
2407
2408 /* default strictnesses */
2409 if (g_strictness == PARAM_UNSET) {
2410 if(g_optmode) {
2411 g_strictness = 100;
2412 } else {
2413 g_strictness = 90;
2414 }
2415 } else {
2416 if(0 >= g_strictness || g_strictness > 100) {
2417 DISPLAY("Strictness Outside of Bounds\n");
2418 ret = 4;
2419 goto _cleanUp;
2420 }
2421 }
2422
2423 /* use level'ing mode instead of normal target mode */
2424 if (g_optmode) {
2425 winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize));
2426 if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) {
2427 ret = 3;
2428 goto _cleanUp;
2429 }
2430
2431 g_lvltarget = winner.result;
2432 g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
2433 g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
2434 g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;
2435
2436 target.cSpeed = (U32)g_lvltarget.cSpeed;
2437 target.dSpeed = (U32)g_lvltarget.dSpeed;
2438
2439 BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize);
2440 }
2441
2442 /* Don't want it to return anything worse than the best known result */
2443 if (g_singleRun) {
2444 BMK_benchResult_t res;
2445 g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2446 if (BMK_benchParam(&res, buf, ctx, g_params)) {
2447 ret = 45;
2448 goto _cleanUp;
2449 }
2450 if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) {
2451 winner.result = res;
2452 winner.params = g_params;
2453 }
2454 }
2455
2456 /* bench */
2457 DISPLAYLEVEL(2, "\r%79s\r", "");
2458 if(nbFiles == 1) {
2459 DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]);
2460 } else {
2461 DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles);
2462 }
2463
2464 if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); }
2465 if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); }
2466 if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); }
2467
2468 DISPLAYLEVEL(2, "\n");
2469 init_clockGranularity();
2470
2471 { paramValues_t CParams;
2472
2473 /* find best solution from default params */
2474 { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2475 DEBUGOUTPUT("Strategy Selection\n");
2476 if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2477 BMK_benchResult_t candidate;
2478 int i;
2479 for (i=1; i<=maxSeeds; i++) {
2480 int ec;
2481 CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget);
2482 ec = BMK_benchParam(&candidate, buf, ctx, CParams);
2483 BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize);
2484
2485 if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) {
2486 winner.result = candidate;
2487 winner.params = CParams;
2488 }
2489
2490 CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */
2491 /* if the current params are too slow, just stop. */
2492 if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; }
2493 }
2494
2495 BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize);
2496 }
2497 }
2498
2499 DEBUGOUTPUT("Real Opt\n");
2500 /* start 'real' optimization */
2501 { int bestStrategy = (int)winner.params.vals[strt_ind];
2502 if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2503 int st = bestStrategy;
2504 int tries = g_maxTries;
2505
2506 /* one iterations of hill climbing with the level-defined parameters. */
2507 { winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params);
2508 if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) {
2509 winner = w1;
2510 }
2511 CHECKTIMEGT(ret, 0, _displayCleanUp);
2512 }
2513
2514 while(st && tries > 0) {
2515 winnerInfo_t wc;
2516 DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]);
2517
2518 wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries);
2519
2520 if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) {
2521 winner = wc;
2522 tries = g_maxTries;
2523 bestStrategy = st;
2524 } else {
2525 st = nextStrategy(st, bestStrategy);
2526 tries -= TRY_DECAY;
2527 }
2528 CHECKTIMEGT(ret, 0, _displayCleanUp);
2529 }
2530 } else {
2531 winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries);
2532 }
2533
2534 }
2535
2536 /* no solution found */
2537 if(winner.result.cSize == (size_t)-1) {
2538 ret = 1;
2539 DISPLAY("No feasible solution found\n");
2540 goto _cleanUp;
2541 }
2542
2543 /* end summary */
2544_displayCleanUp:
2545 if (g_displayLevel >= 0) {
2546 BMK_displayOneResult(stdout, winner, buf.srcSize);
2547 }
2548 BMK_paramValues_into_commandLine(stdout, winner.params);
2549 DISPLAYLEVEL(1, "grillParams size - optimizer completed \n");
2550 }
2551
2552_cleanUp:
2553 freeContexts(ctx);
2554 freeBuffers(buf);
2555 freeMemoTableArray(allMT);
2556 return ret;
2557}
2558
2559/*-************************************
2560* CLI parsing functions
2561**************************************/
2562
2563/** longCommandWArg() :
2564 * check if *stringPtr is the same as longCommand.
2565 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
2566 * @return 0 and doesn't modify *stringPtr otherwise.
2567 * from zstdcli.c
2568 */
2569static int longCommandWArg(const char** stringPtr, const char* longCommand)
2570{
2571 size_t const comSize = strlen(longCommand);
2572 int const result = !strncmp(*stringPtr, longCommand, comSize);
2573 if (result) *stringPtr += comSize;
2574 return result;
2575}
2576
2577static void errorOut(const char* msg)
2578{
2579 DISPLAY("%s \n", msg); exit(1);
2580}
2581
2582/*! readU32FromChar() :
2583 * @return : unsigned integer value read from input in `char` format.
2584 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
2585 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2586 * Note : function will exit() program if digit sequence overflows */
2587static unsigned readU32FromChar(const char** stringPtr)
2588{
2589 const char errorMsg[] = "error: numeric value too large";
2590 unsigned sign = 1;
2591 unsigned result = 0;
2592 if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; }
2593 while ((**stringPtr >='0') && (**stringPtr <='9')) {
2594 unsigned const max = (((unsigned)(-1)) / 10) - 1;
2595 if (result > max) errorOut(errorMsg);
2596 result *= 10;
2597 assert(**stringPtr >= '0');
2598 result += (unsigned)(**stringPtr - '0');
2599 (*stringPtr)++ ;
2600 }
2601 if ((**stringPtr=='K') || (**stringPtr=='M')) {
2602 unsigned const maxK = ((unsigned)(-1)) >> 10;
2603 if (result > maxK) errorOut(errorMsg);
2604 result <<= 10;
2605 if (**stringPtr=='M') {
2606 if (result > maxK) errorOut(errorMsg);
2607 result <<= 10;
2608 }
2609 (*stringPtr)++; /* skip `K` or `M` */
2610 if (**stringPtr=='i') (*stringPtr)++;
2611 if (**stringPtr=='B') (*stringPtr)++;
2612 }
2613 return result * sign;
2614}
2615
2616static double readDoubleFromChar(const char** stringPtr)
2617{
2618 double result = 0, divide = 10;
2619 while ((**stringPtr >='0') && (**stringPtr <='9')) {
2620 result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
2621 }
2622 if(**stringPtr!='.') {
2623 return result;
2624 }
2625 (*stringPtr)++;
2626 while ((**stringPtr >='0') && (**stringPtr <='9')) {
2627 result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ;
2628 }
2629 return result;
2630}
2631
2632static int usage(const char* exename)
2633{
2634 DISPLAY( "Usage :\n");
2635 DISPLAY( " %s [arg] file\n", exename);
2636 DISPLAY( "Arguments :\n");
2637 DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n");
2638 DISPLAY( " -H/-h : Help (this text + advanced options)\n");
2639 return 0;
2640}
2641
2642static int usage_advanced(void)
2643{
2644 DISPLAY( "\nAdvanced options :\n");
2645 DISPLAY( " -T# : set level 1 speed objective \n");
2646 DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n");
2647 DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n");
2648 DISPLAY( " -S : Single run \n");
2649 DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n");
2650 DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100);
2651 DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n",
2652 (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600);
2653 DISPLAY( " -v : Prints Benchmarking output\n");
2654 DISPLAY( " -D : Next argument dictionary file\n");
2655 DISPLAY( " -s : Separate Files\n");
2656 return 0;
2657}
2658
2659static int badusage(const char* exename)
2660{
2661 DISPLAY("Wrong parameters\n");
2662 usage(exename);
2663 return 1;
2664}
2665
2666#define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \
2667 if ( longCommandWArg(&argument, stringLong) \
2668 || longCommandWArg(&argument, stringShort) ) { \
2669 variable = readU32FromChar(&argument); \
2670 if (argument[0]==',') { \
2671 argument++; continue; \
2672 } else break; \
2673} }
2674
2675/* 1 if successful parse, 0 otherwise */
2676static int parse_params(const char** argptr, paramValues_t* pv) {
2677 int matched = 0;
2678 const char* argOrig = *argptr;
2679 varInds_t v;
2680 for(v = 0; v < NUM_PARAMS; v++) {
2681 if ( longCommandWArg(argptr,g_shortParamNames[v])
2682 || longCommandWArg(argptr, g_paramNames[v]) ) {
2683 if(**argptr == '=') {
2684 (*argptr)++;
2685 pv->vals[v] = readU32FromChar(argptr);
2686 matched = 1;
2687 break;
2688 }
2689 }
2690 /* reset and try again */
2691 *argptr = argOrig;
2692 }
2693 return matched;
2694}
2695
2696/*-************************************
2697* Main
2698**************************************/
2699
2700int main(int argc, const char** argv)
2701{
2702 int i,
2703 filenamesStart=0,
2704 result;
2705 const char* exename=argv[0];
2706 const char* input_filename = NULL;
2707 const char* dictFileName = NULL;
2708 U32 main_pause = 0;
2709 int cLevelOpt = 0, cLevelRun = 0;
2710 int separateFiles = 0;
2711 double compressibility = COMPRESSIBILITY_DEFAULT;
2712 U32 memoTableLog = PARAM_UNSET;
2713 constraint_t target = { 0, 0, (U32)-1 };
2714
2715 paramValues_t paramTarget = emptyParams();
2716 g_params = emptyParams();
2717
2718 assert(argc>=1); /* for exename */
2719
2720 for(i=1; i<argc; i++) {
2721 const char* argument = argv[i];
2722 DEBUGOUTPUT("%d: %s\n", i, argument);
2723 assert(argument != NULL);
2724
2725 if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
2726
2727 if (longCommandWArg(&argument, "--optimize=")) {
2728 g_optimizer = 1;
2729 for ( ; ;) {
2730 if(parse_params(&argument, &paramTarget)) { if(argument[0] == ',') { argument++; continue; } else break; }
2731 PARSE_SUB_ARGS("compressionSpeed=" , "cSpeed=", target.cSpeed);
2732 PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed);
2733 PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem);
2734 PARSE_SUB_ARGS("strict=", "stc=", g_strictness);
2735 PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries);
2736 PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog);
2737 if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; }
2738 if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; }
2739
2740 DISPLAY("invalid optimization parameter \n");
2741 return 1;
2742 }
2743
2744 if (argument[0] != 0) {
2745 DISPLAY("invalid --optimize= format\n");
2746 return 1; /* check the end of string */
2747 }
2748 continue;
2749 } else if (longCommandWArg(&argument, "--zstd=")) {
2750 /* Decode command (note : aggregated commands are allowed) */
2751 g_singleRun = 1;
2752 for ( ; ;) {
2753 if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; }
2754 if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; }
2755
2756 DISPLAY("invalid compression parameter \n");
2757 return 1;
2758 }
2759
2760 if (argument[0] != 0) {
2761 DISPLAY("invalid --zstd= format\n");
2762 return 1; /* check the end of string */
2763 }
2764 continue;
2765 /* if not return, success */
2766
2767 } else if (longCommandWArg(&argument, "--display=")) {
2768 /* Decode command (note : aggregated commands are allowed) */
2769 memset(g_silenceParams, 1, sizeof(g_silenceParams));
2770 for ( ; ;) {
2771 int found = 0;
2772 varInds_t v;
2773 for(v = 0; v < NUM_PARAMS; v++) {
2774 if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) {
2775 g_silenceParams[v] = 0;
2776 found = 1;
2777 }
2778 }
2779 if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) {
2780 for(v = 0; v <= strt_ind; v++) {
2781 g_silenceParams[v] = 0;
2782 }
2783 found = 1;
2784 }
2785
2786
2787 if(found) {
2788 if(argument[0]==',') {
2789 continue;
2790 } else {
2791 break;
2792 }
2793 }
2794 DISPLAY("invalid parameter name parameter \n");
2795 return 1;
2796 }
2797
2798 if (argument[0] != 0) {
2799 DISPLAY("invalid --display format\n");
2800 return 1; /* check the end of string */
2801 }
2802 continue;
2803 } else if (argument[0]=='-') {
2804 argument++;
2805
2806 while (argument[0]!=0) {
2807
2808 switch(argument[0])
2809 {
2810 /* Display help on usage */
2811 case 'h' :
2812 case 'H': usage(exename); usage_advanced(); return 0;
2813
2814 /* Pause at the end (hidden option) */
2815 case 'p': main_pause = 1; argument++; break;
2816
2817 /* Sample compressibility (when no file provided) */
2818 case 'P':
2819 argument++;
2820 { U32 const proba32 = readU32FromChar(&argument);
2821 compressibility = (double)proba32 / 100.;
2822 }
2823 break;
2824
2825 /* Run Single conf */
2826 case 'S':
2827 g_singleRun = 1;
2828 argument++;
2829 for ( ; ; ) {
2830 switch(*argument)
2831 {
2832 case 'w':
2833 argument++;
2834 g_params.vals[wlog_ind] = readU32FromChar(&argument);
2835 continue;
2836 case 'c':
2837 argument++;
2838 g_params.vals[clog_ind] = readU32FromChar(&argument);
2839 continue;
2840 case 'h':
2841 argument++;
2842 g_params.vals[hlog_ind] = readU32FromChar(&argument);
2843 continue;
2844 case 's':
2845 argument++;
2846 g_params.vals[slog_ind] = readU32FromChar(&argument);
2847 continue;
2848 case 'l': /* search length */
2849 argument++;
2850 g_params.vals[mml_ind] = readU32FromChar(&argument);
2851 continue;
2852 case 't': /* target length */
2853 argument++;
2854 g_params.vals[tlen_ind] = readU32FromChar(&argument);
2855 continue;
2856 case 'S': /* strategy */
2857 argument++;
2858 g_params.vals[strt_ind] = readU32FromChar(&argument);
2859 continue;
2860 case 'f': /* forceAttachDict */
2861 argument++;
2862 g_params.vals[fadt_ind] = readU32FromChar(&argument);
2863 continue;
2864 case 'L':
2865 { argument++;
2866 cLevelRun = (int)readU32FromChar(&argument);
2867 g_params = emptyParams();
2868 continue;
2869 }
2870 default : ;
2871 }
2872 break;
2873 }
2874
2875 break;
2876
2877 /* target level1 speed objective, in MB/s */
2878 case 'T':
2879 argument++;
2880 g_target = readU32FromChar(&argument);
2881 break;
2882
2883 /* cut input into blocks */
2884 case 'B':
2885 argument++;
2886 g_blockSize = readU32FromChar(&argument);
2887 DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10));
2888 break;
2889
2890 /* caps runtime (in seconds) */
2891 case 't':
2892 argument++;
2893 g_timeLimit_s = readU32FromChar(&argument);
2894 break;
2895
2896 case 's':
2897 argument++;
2898 separateFiles = 1;
2899 break;
2900
2901 case 'q':
2902 while (argument[0] == 'q') { argument++; g_displayLevel--; }
2903 break;
2904
2905 case 'v':
2906 while (argument[0] == 'v') { argument++; g_displayLevel++; }
2907 break;
2908
2909 /* load dictionary file (only applicable for optimizer rn) */
2910 case 'D':
2911 if(i == argc - 1) { /* last argument, return error. */
2912 DISPLAY("Dictionary file expected but not given : %d\n", i);
2913 return 1;
2914 } else {
2915 i++;
2916 dictFileName = argv[i];
2917 argument += strlen(argument);
2918 }
2919 break;
2920
2921 /* Unknown command */
2922 default : return badusage(exename);
2923 }
2924 }
2925 continue;
2926 } /* if (argument[0]=='-') */
2927
2928 /* first provided filename is input */
2929 if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
2930 }
2931
2932 /* Welcome message */
2933 DISPLAYLEVEL(2, WELCOME_MESSAGE);
2934
2935 if (filenamesStart==0) {
2936 if (g_optimizer) {
2937 DISPLAY("Optimizer Expects File\n");
2938 return 1;
2939 } else {
2940 result = benchSample(compressibility, cLevelRun);
2941 }
2942 } else {
2943 if(separateFiles) {
2944 for(i = 0; i < argc - filenamesStart; i++) {
2945 if (g_optimizer) {
2946 result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2947 if(result) { DISPLAY("Error on File %d", i); return result; }
2948 } else {
2949 result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun);
2950 if(result) { DISPLAY("Error on File %d", i); return result; }
2951 }
2952 }
2953 } else {
2954 if (g_optimizer) {
2955 assert(filenamesStart < argc);
2956 result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2957 } else {
2958 result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun);
2959 }
2960 }
2961 }
2962
2963 if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
2964
2965 return result;
2966}