| 1 | /* |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | * You may select, at your option, one of the above-listed licenses. |
| 9 | */ |
| 10 | |
| 11 | |
| 12 | /*-************************************ |
| 13 | * Dependencies |
| 14 | **************************************/ |
| 15 | #include "util.h" /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */ |
| 16 | #include <stdlib.h> /* malloc */ |
| 17 | #include <stdio.h> /* fprintf, fopen, ftello64 */ |
| 18 | #include <string.h> /* strcmp */ |
| 19 | #include <math.h> /* log */ |
| 20 | #include <assert.h> |
| 21 | |
| 22 | #include "timefn.h" /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */ |
| 23 | #include "mem.h" |
| 24 | #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_estimateCCtxSize */ |
| 25 | #include "zstd.h" |
| 26 | #include "datagen.h" |
| 27 | #include "xxhash.h" |
| 28 | #include "benchfn.h" |
| 29 | #include "benchzstd.h" |
| 30 | #include "zstd_errors.h" |
| 31 | #include "zstd_internal.h" /* should not be needed */ |
| 32 | |
| 33 | |
| 34 | /*-************************************ |
| 35 | * Constants |
| 36 | **************************************/ |
| 37 | #define PROGRAM_DESCRIPTION "ZSTD parameters tester" |
| 38 | #define AUTHOR "Yann Collet" |
| 39 | #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR |
| 40 | |
| 41 | #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ |
| 42 | #define NB_LEVELS_TRACKED 22 /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */ |
| 43 | |
| 44 | static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); |
| 45 | |
| 46 | #define COMPRESSIBILITY_DEFAULT 0.50 |
| 47 | |
| 48 | static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO; |
| 49 | static const int g_maxNbVariations = 64; |
| 50 | |
| 51 | |
| 52 | /*-************************************ |
| 53 | * Macros |
| 54 | **************************************/ |
| 55 | #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) |
| 56 | #define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); } |
| 57 | #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } |
| 58 | |
| 59 | #define TIMED 0 |
| 60 | #ifndef DEBUG |
| 61 | # define DEBUG 0 |
| 62 | #endif |
| 63 | |
| 64 | #undef MIN |
| 65 | #undef MAX |
| 66 | #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) |
| 67 | #define MAX(a,b) ( (a) > (b) ? (a) : (b) ) |
| 68 | #define CUSTOM_LEVEL 99 |
| 69 | #define BASE_CLEVEL 1 |
| 70 | |
| 71 | #define FADT_MIN 0 |
| 72 | #define FADT_MAX ((U32)-1) |
| 73 | |
| 74 | #define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1) |
| 75 | #define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1) |
| 76 | #define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1) |
| 77 | #define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1) |
| 78 | #define MML_RANGE (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1) |
| 79 | #define TLEN_RANGE 17 |
| 80 | #define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1) |
| 81 | #define FADT_RANGE 3 |
| 82 | |
| 83 | #define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } } |
| 84 | #define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } } |
| 85 | |
| 86 | #define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */ |
| 87 | |
| 88 | static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = { |
| 89 | "(none) ", "ZSTD_fast ", "ZSTD_dfast ", |
| 90 | "ZSTD_greedy ", "ZSTD_lazy ", "ZSTD_lazy2 ", |
| 91 | "ZSTD_btlazy2 ", "ZSTD_btopt ", "ZSTD_btultra ", |
| 92 | "ZSTD_btultra2"}; |
| 93 | |
| 94 | static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 }; |
| 95 | |
| 96 | |
| 97 | /*-************************************ |
| 98 | * Setup for Adding new params |
| 99 | **************************************/ |
| 100 | |
| 101 | /* indices for each of the variables */ |
| 102 | typedef enum { |
| 103 | wlog_ind = 0, |
| 104 | clog_ind = 1, |
| 105 | hlog_ind = 2, |
| 106 | slog_ind = 3, |
| 107 | mml_ind = 4, |
| 108 | tlen_ind = 5, |
| 109 | strt_ind = 6, |
| 110 | fadt_ind = 7, /* forceAttachDict */ |
| 111 | NUM_PARAMS = 8 |
| 112 | } varInds_t; |
| 113 | |
| 114 | typedef struct { |
| 115 | U32 vals[NUM_PARAMS]; |
| 116 | } paramValues_t; |
| 117 | |
| 118 | /* minimum value of parameters */ |
| 119 | static const U32 mintable[NUM_PARAMS] = |
| 120 | { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN }; |
| 121 | |
| 122 | /* maximum value of parameters */ |
| 123 | static const U32 maxtable[NUM_PARAMS] = |
| 124 | { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX }; |
| 125 | |
| 126 | /* # of values parameters can take on */ |
| 127 | static const U32 rangetable[NUM_PARAMS] = |
| 128 | { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE }; |
| 129 | |
| 130 | /* ZSTD_cctxSetParameter() index to set */ |
| 131 | static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] = |
| 132 | { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict }; |
| 133 | |
| 134 | /* names of parameters */ |
| 135 | static const char* g_paramNames[NUM_PARAMS] = |
| 136 | { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" }; |
| 137 | |
| 138 | /* shortened names of parameters */ |
| 139 | static const char* g_shortParamNames[NUM_PARAMS] = |
| 140 | { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" }; |
| 141 | |
| 142 | /* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */ |
| 143 | static U32 rangeMap(varInds_t param, int ind) |
| 144 | { |
| 145 | U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0); |
| 146 | switch(param) { |
| 147 | case wlog_ind: /* using default: triggers -Wswitch-enum */ |
| 148 | case clog_ind: |
| 149 | case hlog_ind: |
| 150 | case slog_ind: |
| 151 | case mml_ind: |
| 152 | case strt_ind: |
| 153 | return mintable[param] + uind; |
| 154 | case tlen_ind: |
| 155 | return tlen_table[uind]; |
| 156 | case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */ |
| 157 | return uind - 1; |
| 158 | case NUM_PARAMS: |
| 159 | default:; |
| 160 | } |
| 161 | DISPLAY("Error, not a valid param\n "); |
| 162 | assert(0); |
| 163 | return (U32)-1; |
| 164 | } |
| 165 | |
| 166 | /* inverse of rangeMap */ |
| 167 | static int invRangeMap(varInds_t param, U32 value) |
| 168 | { |
| 169 | value = MIN(MAX(mintable[param], value), maxtable[param]); |
| 170 | switch(param) { |
| 171 | case wlog_ind: |
| 172 | case clog_ind: |
| 173 | case hlog_ind: |
| 174 | case slog_ind: |
| 175 | case mml_ind: |
| 176 | case strt_ind: |
| 177 | return (int)(value - mintable[param]); |
| 178 | case tlen_ind: /* bin search */ |
| 179 | { |
| 180 | int lo = 0; |
| 181 | int hi = TLEN_RANGE; |
| 182 | while(lo < hi) { |
| 183 | int mid = (lo + hi) / 2; |
| 184 | if(tlen_table[mid] < value) { |
| 185 | lo = mid + 1; |
| 186 | } if(tlen_table[mid] == value) { |
| 187 | return mid; |
| 188 | } else { |
| 189 | hi = mid; |
| 190 | } |
| 191 | } |
| 192 | return lo; |
| 193 | } |
| 194 | case fadt_ind: |
| 195 | return (int)value + 1; |
| 196 | case NUM_PARAMS: |
| 197 | default:; |
| 198 | } |
| 199 | DISPLAY("Error, not a valid param\n "); |
| 200 | assert(0); |
| 201 | return -2; |
| 202 | } |
| 203 | |
| 204 | /* display of params */ |
| 205 | static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width) |
| 206 | { |
| 207 | switch(param) { |
| 208 | case wlog_ind: |
| 209 | case clog_ind: |
| 210 | case hlog_ind: |
| 211 | case slog_ind: |
| 212 | case mml_ind: |
| 213 | case tlen_ind: |
| 214 | if(width) { |
| 215 | fprintf(f, "%*u", width, value); |
| 216 | } else { |
| 217 | fprintf(f, "%u", value); |
| 218 | } |
| 219 | break; |
| 220 | case strt_ind: |
| 221 | if(width) { |
| 222 | fprintf(f, "%*s", width, g_stratName[value]); |
| 223 | } else { |
| 224 | fprintf(f, "%s", g_stratName[value]); |
| 225 | } |
| 226 | break; |
| 227 | case fadt_ind: /* force attach dict */ |
| 228 | if(width) { |
| 229 | fprintf(f, "%*d", width, (int)value); |
| 230 | } else { |
| 231 | fprintf(f, "%d", (int)value); |
| 232 | } |
| 233 | break; |
| 234 | case NUM_PARAMS: |
| 235 | default: |
| 236 | DISPLAY("Error, not a valid param\n "); |
| 237 | assert(0); |
| 238 | break; |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | |
| 243 | /*-************************************ |
| 244 | * Benchmark Parameters/Global Variables |
| 245 | **************************************/ |
| 246 | |
| 247 | /* General Utility */ |
| 248 | static U32 g_timeLimit_s = 99999; /* about 27 hours */ |
| 249 | static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */ |
| 250 | static U32 g_blockSize = 0; |
| 251 | static U32 g_rand = 1; |
| 252 | |
| 253 | /* Display */ |
| 254 | static int g_displayLevel = 3; |
| 255 | static BYTE g_silenceParams[NUM_PARAMS]; /* can selectively silence some params when displaying them */ |
| 256 | |
| 257 | /* Mode Selection */ |
| 258 | static U32 g_singleRun = 0; |
| 259 | static U32 g_optimizer = 0; |
| 260 | static int g_optmode = 0; |
| 261 | |
| 262 | /* For cLevel Table generation */ |
| 263 | static U32 g_target = 0; |
| 264 | static U32 g_noSeed = 0; |
| 265 | |
| 266 | /* For optimizer */ |
| 267 | static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */ |
| 268 | static double g_ratioMultiplier = 5.; |
| 269 | static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict */ |
| 270 | static BMK_benchResult_t g_lvltarget; |
| 271 | |
| 272 | typedef enum { |
| 273 | directMap, |
| 274 | xxhashMap, |
| 275 | noMemo |
| 276 | } memoTableType_t; |
| 277 | |
| 278 | typedef struct { |
| 279 | memoTableType_t tableType; |
| 280 | BYTE* table; |
| 281 | size_t tableLen; |
| 282 | varInds_t varArray[NUM_PARAMS]; |
| 283 | size_t varLen; |
| 284 | } memoTable_t; |
| 285 | |
| 286 | typedef struct { |
| 287 | BMK_benchResult_t result; |
| 288 | paramValues_t params; |
| 289 | } winnerInfo_t; |
| 290 | |
| 291 | typedef struct { |
| 292 | U32 cSpeed; /* bytes / sec */ |
| 293 | U32 dSpeed; |
| 294 | U32 cMem; /* bytes */ |
| 295 | } constraint_t; |
| 296 | |
| 297 | typedef struct winner_ll_node winner_ll_node; |
| 298 | struct winner_ll_node { |
| 299 | winnerInfo_t res; |
| 300 | winner_ll_node* next; |
| 301 | }; |
| 302 | |
| 303 | static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */ |
| 304 | |
| 305 | /* |
| 306 | * Additional Global Variables (Defined Above Use) |
| 307 | * g_level_constraint |
| 308 | * g_alreadyTested |
| 309 | * g_maxTries |
| 310 | * g_clockGranularity |
| 311 | */ |
| 312 | |
| 313 | |
| 314 | /*-******************************************************* |
| 315 | * General Util Functions |
| 316 | *********************************************************/ |
| 317 | |
| 318 | /* nullified useless params, to ensure count stats */ |
| 319 | /* cleans up params for memoizing / display */ |
| 320 | static paramValues_t sanitizeParams(paramValues_t params) |
| 321 | { |
| 322 | if (params.vals[strt_ind] == ZSTD_fast) |
| 323 | params.vals[clog_ind] = 0, params.vals[slog_ind] = 0; |
| 324 | if (params.vals[strt_ind] == ZSTD_dfast) |
| 325 | params.vals[slog_ind] = 0; |
| 326 | if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) ) |
| 327 | params.vals[tlen_ind] = 0; |
| 328 | |
| 329 | return params; |
| 330 | } |
| 331 | |
| 332 | static ZSTD_compressionParameters pvalsToCParams(paramValues_t p) |
| 333 | { |
| 334 | ZSTD_compressionParameters c; |
| 335 | memset(&c, 0, sizeof(ZSTD_compressionParameters)); |
| 336 | c.windowLog = p.vals[wlog_ind]; |
| 337 | c.chainLog = p.vals[clog_ind]; |
| 338 | c.hashLog = p.vals[hlog_ind]; |
| 339 | c.searchLog = p.vals[slog_ind]; |
| 340 | c.minMatch = p.vals[mml_ind]; |
| 341 | c.targetLength = p.vals[tlen_ind]; |
| 342 | c.strategy = p.vals[strt_ind]; |
| 343 | /* no forceAttachDict */ |
| 344 | return c; |
| 345 | } |
| 346 | |
| 347 | static paramValues_t cParamsToPVals(ZSTD_compressionParameters c) |
| 348 | { |
| 349 | paramValues_t p; |
| 350 | varInds_t i; |
| 351 | p.vals[wlog_ind] = c.windowLog; |
| 352 | p.vals[clog_ind] = c.chainLog; |
| 353 | p.vals[hlog_ind] = c.hashLog; |
| 354 | p.vals[slog_ind] = c.searchLog; |
| 355 | p.vals[mml_ind] = c.minMatch; |
| 356 | p.vals[tlen_ind] = c.targetLength; |
| 357 | p.vals[strt_ind] = c.strategy; |
| 358 | |
| 359 | /* set all other params to their minimum value */ |
| 360 | for (i = strt_ind + 1; i < NUM_PARAMS; i++) { |
| 361 | p.vals[i] = mintable[i]; |
| 362 | } |
| 363 | return p; |
| 364 | } |
| 365 | |
| 366 | /* equivalent of ZSTD_adjustCParams for paramValues_t */ |
| 367 | static paramValues_t |
| 368 | adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize) |
| 369 | { |
| 370 | paramValues_t ot = p; |
| 371 | varInds_t i; |
| 372 | p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize)); |
| 373 | if (!dictSize) { p.vals[fadt_ind] = 0; } |
| 374 | /* retain value of all other parameters */ |
| 375 | for(i = strt_ind + 1; i < NUM_PARAMS; i++) { |
| 376 | p.vals[i] = ot.vals[i]; |
| 377 | } |
| 378 | return p; |
| 379 | } |
| 380 | |
| 381 | static size_t BMK_findMaxMem(U64 requiredMem) |
| 382 | { |
| 383 | size_t const step = 64 MB; |
| 384 | void* testmem = NULL; |
| 385 | |
| 386 | requiredMem = (((requiredMem >> 26) + 1) << 26); |
| 387 | if (requiredMem > maxMemory) requiredMem = maxMemory; |
| 388 | |
| 389 | requiredMem += 2 * step; |
| 390 | while (!testmem && requiredMem > 0) { |
| 391 | testmem = malloc ((size_t)requiredMem); |
| 392 | requiredMem -= step; |
| 393 | } |
| 394 | |
| 395 | free (testmem); |
| 396 | return (size_t) requiredMem; |
| 397 | } |
| 398 | |
| 399 | /* accuracy in seconds only, span can be multiple years */ |
| 400 | static U32 BMK_timeSpan_s(const UTIL_time_t tStart) |
| 401 | { |
| 402 | return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL); |
| 403 | } |
| 404 | |
| 405 | static U32 FUZ_rotl32(U32 x, U32 r) |
| 406 | { |
| 407 | return ((x << r) | (x >> (32 - r))); |
| 408 | } |
| 409 | |
| 410 | static U32 FUZ_rand(U32* src) |
| 411 | { |
| 412 | const U32 prime1 = 2654435761U; |
| 413 | const U32 prime2 = 2246822519U; |
| 414 | U32 rand32 = *src; |
| 415 | rand32 *= prime1; |
| 416 | rand32 += prime2; |
| 417 | rand32 = FUZ_rotl32(rand32, 13); |
| 418 | *src = rand32; |
| 419 | return rand32 >> 5; |
| 420 | } |
| 421 | |
| 422 | #define BOUNDCHECK(val,min,max) { \ |
| 423 | if (((val)<(min)) | ((val)>(max))) { \ |
| 424 | DISPLAY("INVALID PARAMETER CONSTRAINTS\n"); \ |
| 425 | return 0; \ |
| 426 | } } |
| 427 | |
| 428 | static int paramValid(const paramValues_t paramTarget) |
| 429 | { |
| 430 | U32 i; |
| 431 | for(i = 0; i < NUM_PARAMS; i++) { |
| 432 | BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]); |
| 433 | } |
| 434 | return 1; |
| 435 | } |
| 436 | |
| 437 | /* cParamUnsetMin() : |
| 438 | * if any parameter in paramTarget is not yet set, |
| 439 | * it will receive its corresponding minimal value. |
| 440 | * This function never fails */ |
| 441 | static paramValues_t cParamUnsetMin(paramValues_t paramTarget) |
| 442 | { |
| 443 | varInds_t vi; |
| 444 | for (vi = 0; vi < NUM_PARAMS; vi++) { |
| 445 | if (paramTarget.vals[vi] == PARAM_UNSET) { |
| 446 | paramTarget.vals[vi] = mintable[vi]; |
| 447 | } |
| 448 | } |
| 449 | return paramTarget; |
| 450 | } |
| 451 | |
| 452 | static paramValues_t emptyParams(void) |
| 453 | { |
| 454 | U32 i; |
| 455 | paramValues_t p; |
| 456 | for(i = 0; i < NUM_PARAMS; i++) { |
| 457 | p.vals[i] = PARAM_UNSET; |
| 458 | } |
| 459 | return p; |
| 460 | } |
| 461 | |
| 462 | static winnerInfo_t initWinnerInfo(const paramValues_t p) |
| 463 | { |
| 464 | winnerInfo_t w1; |
| 465 | w1.result.cSpeed = 0; |
| 466 | w1.result.dSpeed = 0; |
| 467 | w1.result.cMem = (size_t)-1; |
| 468 | w1.result.cSize = (size_t)-1; |
| 469 | w1.params = p; |
| 470 | return w1; |
| 471 | } |
| 472 | |
| 473 | static paramValues_t |
| 474 | overwriteParams(paramValues_t base, const paramValues_t mask) |
| 475 | { |
| 476 | U32 i; |
| 477 | for(i = 0; i < NUM_PARAMS; i++) { |
| 478 | if(mask.vals[i] != PARAM_UNSET) { |
| 479 | base.vals[i] = mask.vals[i]; |
| 480 | } |
| 481 | } |
| 482 | return base; |
| 483 | } |
| 484 | |
| 485 | static void |
| 486 | paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr) |
| 487 | { |
| 488 | ptr->vals[paramIndex] = rangeMap(paramIndex, |
| 489 | invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt); |
| 490 | } |
| 491 | |
| 492 | /* varies ptr by nbChanges respecting varyParams*/ |
| 493 | static void |
| 494 | paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges) |
| 495 | { |
| 496 | paramValues_t p; |
| 497 | int validated = 0; |
| 498 | while (!validated) { |
| 499 | U32 i; |
| 500 | p = *ptr; |
| 501 | for (i = 0 ; i < nbChanges ; i++) { |
| 502 | const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1); |
| 503 | paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1], |
| 504 | (int)((changeID & 1) << 1) - 1, |
| 505 | &p); |
| 506 | } |
| 507 | validated = paramValid(p); |
| 508 | } |
| 509 | *ptr = p; |
| 510 | } |
| 511 | |
| 512 | /* Completely random parameter selection */ |
| 513 | static paramValues_t randomParams(void) |
| 514 | { |
| 515 | varInds_t v; paramValues_t p; |
| 516 | for(v = 0; v < NUM_PARAMS; v++) { |
| 517 | p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v])); |
| 518 | } |
| 519 | return p; |
| 520 | } |
| 521 | |
| 522 | static U64 g_clockGranularity = 100000000ULL; |
| 523 | |
| 524 | static void init_clockGranularity(void) |
| 525 | { |
| 526 | UTIL_time_t const clockStart = UTIL_getTime(); |
| 527 | U64 el1 = 0, el2 = 0; |
| 528 | int i = 0; |
| 529 | do { |
| 530 | el1 = el2; |
| 531 | el2 = UTIL_clockSpanNano(clockStart); |
| 532 | if(el1 < el2) { |
| 533 | U64 iv = el2 - el1; |
| 534 | if(g_clockGranularity > iv) { |
| 535 | g_clockGranularity = iv; |
| 536 | i = 0; |
| 537 | } else { |
| 538 | i++; |
| 539 | } |
| 540 | } |
| 541 | } while(i < 10); |
| 542 | DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity); |
| 543 | } |
| 544 | |
| 545 | /*-************************************ |
| 546 | * Optimizer Util Functions |
| 547 | **************************************/ |
| 548 | |
| 549 | /* checks results are feasible */ |
| 550 | static int feasible(const BMK_benchResult_t results, const constraint_t target) { |
| 551 | return (results.cSpeed >= target.cSpeed) |
| 552 | && (results.dSpeed >= target.dSpeed) |
| 553 | && (results.cMem <= target.cMem) |
| 554 | && (!g_optmode || results.cSize <= g_lvltarget.cSize); |
| 555 | } |
| 556 | |
| 557 | /* hill climbing value for part 1 */ |
| 558 | /* Scoring here is a linear reward for all set constraints normalized between 0 and 1 |
| 559 | * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic |
| 560 | * bonus to exceeding the constraint value. We also give linear ratio for compression ratio. |
| 561 | * The constant factors are experimental. |
| 562 | */ |
| 563 | static double |
| 564 | resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target) |
| 565 | { |
| 566 | double cs = 0., ds = 0., rt, cm = 0.; |
| 567 | const double r1 = 1, r2 = 0.1, rtr = 0.5; |
| 568 | double ret; |
| 569 | if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; } |
| 570 | if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; } |
| 571 | if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; } |
| 572 | rt = ((double)srcSize / (double)res.cSize); |
| 573 | |
| 574 | ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr + |
| 575 | (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2; |
| 576 | |
| 577 | return ret; |
| 578 | } |
| 579 | |
| 580 | /* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */ |
| 581 | static double |
| 582 | resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes) |
| 583 | { |
| 584 | double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1; |
| 585 | double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1; |
| 586 | if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) { |
| 587 | return 0.0; |
| 588 | } |
| 589 | return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1; |
| 590 | } |
| 591 | |
| 592 | /* return true if r2 strictly better than r1 */ |
| 593 | static int |
| 594 | compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize) |
| 595 | { |
| 596 | if(feasible(result1, target) && feasible(result2, target)) { |
| 597 | if(g_optmode) { |
| 598 | return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget); |
| 599 | } else { |
| 600 | return (result1.cSize > result2.cSize) |
| 601 | || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed) |
| 602 | || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed); |
| 603 | } |
| 604 | } |
| 605 | return feasible(result2, target) |
| 606 | || (!feasible(result1, target) |
| 607 | && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target))); |
| 608 | } |
| 609 | |
| 610 | static constraint_t relaxTarget(constraint_t target) { |
| 611 | target.cMem = (U32)-1; |
| 612 | target.cSpeed = (target.cSpeed * g_strictness) / 100; |
| 613 | target.dSpeed = (target.dSpeed * g_strictness) / 100; |
| 614 | return target; |
| 615 | } |
| 616 | |
| 617 | static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize) |
| 618 | { |
| 619 | varInds_t v; |
| 620 | for(v = 0; v < NUM_PARAMS; v++) { |
| 621 | if(pc->vals[v] != PARAM_UNSET) { |
| 622 | U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]); |
| 623 | if(newval != pc->vals[v]) { |
| 624 | pc->vals[v] = newval; |
| 625 | DISPLAY("Warning: parameter %s not in valid range, adjusting to ", |
| 626 | g_paramNames[v]); |
| 627 | displayParamVal(stderr, v, newval, 0); DISPLAY("\n"); |
| 628 | } |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | if(pc->vals[wlog_ind] != PARAM_UNSET) { |
| 633 | |
| 634 | U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1; |
| 635 | /* edge case of highBit not working for 0 */ |
| 636 | |
| 637 | if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) { |
| 638 | U32 adjust = MAX(mintable[wlog_ind], sshb); |
| 639 | if(adjust != pc->vals[wlog_ind]) { |
| 640 | pc->vals[wlog_ind] = adjust; |
| 641 | DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n", |
| 642 | (unsigned)pc->vals[wlog_ind]); |
| 643 | } |
| 644 | } |
| 645 | } |
| 646 | |
| 647 | if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { |
| 648 | U32 maxclog; |
| 649 | if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) { |
| 650 | maxclog = pc->vals[wlog_ind] + 1; |
| 651 | } else { |
| 652 | maxclog = pc->vals[wlog_ind]; |
| 653 | } |
| 654 | |
| 655 | if(pc->vals[clog_ind] > maxclog) { |
| 656 | pc->vals[clog_ind] = maxclog; |
| 657 | DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n", |
| 658 | (unsigned)pc->vals[clog_ind]); |
| 659 | } |
| 660 | } |
| 661 | |
| 662 | if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) { |
| 663 | if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) { |
| 664 | pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1; |
| 665 | DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n", |
| 666 | (unsigned)pc->vals[hlog_ind]); |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) { |
| 671 | if(pc->vals[slog_ind] > pc->vals[clog_ind]) { |
| 672 | pc->vals[clog_ind] = pc->vals[slog_ind]; |
| 673 | DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n", |
| 674 | (unsigned)pc->vals[slog_ind]); |
| 675 | } |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | static int |
| 680 | redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize) |
| 681 | { |
| 682 | return |
| 683 | (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */ |
| 684 | || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */ |
| 685 | || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/ |
| 686 | || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */ |
| 687 | || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */ |
| 688 | } |
| 689 | |
| 690 | |
| 691 | /*-************************************ |
| 692 | * Display Functions |
| 693 | **************************************/ |
| 694 | |
| 695 | /* BMK_paramValues_into_commandLine() : |
| 696 | * transform a set of parameters paramValues_t |
| 697 | * into a command line compatible with `zstd` syntax |
| 698 | * and writes it into FILE* f. |
| 699 | * f must be already opened and writable */ |
| 700 | static void |
| 701 | BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params) |
| 702 | { |
| 703 | varInds_t v; |
| 704 | int first = 1; |
| 705 | fprintf(f,"--zstd="); |
| 706 | for (v = 0; v < NUM_PARAMS; v++) { |
| 707 | if (g_silenceParams[v]) { continue; } |
| 708 | if (!first) { fprintf(f, ","); } |
| 709 | fprintf(f,"%s=", g_paramNames[v]); |
| 710 | |
| 711 | if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); } |
| 712 | else { displayParamVal(f, v, params.vals[v], 0); } |
| 713 | first = 0; |
| 714 | } |
| 715 | fprintf(f, "\n"); |
| 716 | } |
| 717 | |
| 718 | |
| 719 | /* comparison function: */ |
| 720 | /* strictly better, strictly worse, equal, speed-side adv, size-side adv */ |
| 721 | #define WORSE_RESULT 0 |
| 722 | #define BETTER_RESULT 1 |
| 723 | #define ERROR_RESULT 2 |
| 724 | |
| 725 | #define SPEED_RESULT 4 |
| 726 | #define SIZE_RESULT 5 |
| 727 | /* maybe have epsilon-eq to limit table size? */ |
| 728 | static int |
| 729 | speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2) |
| 730 | { |
| 731 | if(r1.cSpeed < r2.cSpeed) { |
| 732 | if(r1.cSize >= r2.cSize) { |
| 733 | return BETTER_RESULT; |
| 734 | } |
| 735 | return SPEED_RESULT; /* r2 is smaller but not faster. */ |
| 736 | } else { |
| 737 | if(r1.cSize <= r2.cSize) { |
| 738 | return WORSE_RESULT; |
| 739 | } |
| 740 | return SIZE_RESULT; /* r2 is faster but not smaller */ |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | /* 0 for insertion, 1 for no insert */ |
| 745 | /* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */ |
| 746 | static int |
| 747 | insertWinner(const winnerInfo_t w, const constraint_t targetConstraints) |
| 748 | { |
| 749 | BMK_benchResult_t r = w.result; |
| 750 | winner_ll_node* cur_node = g_winners; |
| 751 | /* first node to insert */ |
| 752 | if(!feasible(r, targetConstraints)) { |
| 753 | return 1; |
| 754 | } |
| 755 | |
| 756 | if(g_winners == NULL) { |
| 757 | winner_ll_node* first_node = malloc(sizeof(winner_ll_node)); |
| 758 | if(first_node == NULL) { |
| 759 | return 1; |
| 760 | } |
| 761 | first_node->next = NULL; |
| 762 | first_node->res = w; |
| 763 | g_winners = first_node; |
| 764 | return 0; |
| 765 | } |
| 766 | |
| 767 | while(cur_node->next != NULL) { |
| 768 | switch(speedSizeCompare(cur_node->res.result, r)) { |
| 769 | case WORSE_RESULT: |
| 770 | { |
| 771 | return 1; /* never insert if better */ |
| 772 | } |
| 773 | case BETTER_RESULT: |
| 774 | { |
| 775 | winner_ll_node* tmp; |
| 776 | cur_node->res = cur_node->next->res; |
| 777 | tmp = cur_node->next; |
| 778 | cur_node->next = cur_node->next->next; |
| 779 | free(tmp); |
| 780 | break; |
| 781 | } |
| 782 | case SIZE_RESULT: |
| 783 | { |
| 784 | cur_node = cur_node->next; |
| 785 | break; |
| 786 | } |
| 787 | case SPEED_RESULT: /* insert after first size result, then return */ |
| 788 | { |
| 789 | winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); |
| 790 | if(newnode == NULL) { |
| 791 | return 1; |
| 792 | } |
| 793 | newnode->res = cur_node->res; |
| 794 | cur_node->res = w; |
| 795 | newnode->next = cur_node->next; |
| 796 | cur_node->next = newnode; |
| 797 | return 0; |
| 798 | } |
| 799 | } |
| 800 | |
| 801 | } |
| 802 | |
| 803 | assert(cur_node->next == NULL); |
| 804 | switch(speedSizeCompare(cur_node->res.result, r)) { |
| 805 | case WORSE_RESULT: |
| 806 | { |
| 807 | return 1; /* never insert if better */ |
| 808 | } |
| 809 | case BETTER_RESULT: |
| 810 | { |
| 811 | cur_node->res = w; |
| 812 | return 0; |
| 813 | } |
| 814 | case SIZE_RESULT: |
| 815 | { |
| 816 | winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); |
| 817 | if(newnode == NULL) { |
| 818 | return 1; |
| 819 | } |
| 820 | newnode->res = w; |
| 821 | newnode->next = NULL; |
| 822 | cur_node->next = newnode; |
| 823 | return 0; |
| 824 | } |
| 825 | case SPEED_RESULT: /* insert before first size result, then return */ |
| 826 | { |
| 827 | winner_ll_node* newnode = malloc(sizeof(winner_ll_node)); |
| 828 | if(newnode == NULL) { |
| 829 | return 1; |
| 830 | } |
| 831 | newnode->res = cur_node->res; |
| 832 | cur_node->res = w; |
| 833 | newnode->next = cur_node->next; |
| 834 | cur_node->next = newnode; |
| 835 | return 0; |
| 836 | } |
| 837 | default: |
| 838 | return 1; |
| 839 | } |
| 840 | } |
| 841 | |
| 842 | static void |
| 843 | BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize) |
| 844 | { |
| 845 | varInds_t v; |
| 846 | int first = 1; |
| 847 | res.params = cParamUnsetMin(res.params); |
| 848 | fprintf(f, " {"); |
| 849 | for (v = 0; v < NUM_PARAMS; v++) { |
| 850 | if (g_silenceParams[v]) { continue; } |
| 851 | if (!first) { fprintf(f, ","); } |
| 852 | displayParamVal(f, v, res.params.vals[v], 3); |
| 853 | first = 0; |
| 854 | } |
| 855 | |
| 856 | { double const ratio = res.result.cSize ? |
| 857 | (double)srcSize / (double)res.result.cSize : 0; |
| 858 | double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT; |
| 859 | double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT; |
| 860 | |
| 861 | fprintf(f, " }, /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n", |
| 862 | ratio, cSpeedMBps, dSpeedMBps); |
| 863 | } |
| 864 | } |
| 865 | |
| 866 | /* Writes to f the results of a parameter benchmark */ |
| 867 | /* when used with --optimize, will only print results better than previously discovered */ |
| 868 | static void |
| 869 | BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize) |
| 870 | { |
| 871 | char lvlstr[15] = "Custom Level"; |
| 872 | winnerInfo_t w; |
| 873 | w.params = params; |
| 874 | w.result = result; |
| 875 | |
| 876 | fprintf(f, "\r%79s\r", ""); |
| 877 | |
| 878 | if(cLevel != CUSTOM_LEVEL) { |
| 879 | snprintf(lvlstr, 15, " Level %2d ", cLevel); |
| 880 | } |
| 881 | |
| 882 | if(TIMED) { |
| 883 | const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC; |
| 884 | const U64 time_ns = UTIL_clockSpanNano(g_time); |
| 885 | const U64 minutes = time_ns / mn_in_ns; |
| 886 | fprintf(f, "%1lu:%2lu:%05.2f - ", |
| 887 | (unsigned long) minutes / 60, |
| 888 | (unsigned long) minutes % 60, |
| 889 | (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC ); |
| 890 | } |
| 891 | |
| 892 | fprintf(f, "/* %s */ ", lvlstr); |
| 893 | BMK_displayOneResult(f, w, srcSize); |
| 894 | } |
| 895 | |
| 896 | static void |
| 897 | BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize) |
| 898 | { |
| 899 | /* global winner used for constraints */ |
| 900 | /* cSize, cSpeed, dSpeed, cMem */ |
| 901 | static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL }, |
| 902 | { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } } |
| 903 | }; |
| 904 | if ( DEBUG |
| 905 | || compareResultLT(g_winner.result, result, targetConstraints, srcSize) |
| 906 | || g_displayLevel >= 4) { |
| 907 | if ( DEBUG |
| 908 | && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { |
| 909 | DISPLAY("New Winner: \n"); |
| 910 | } |
| 911 | |
| 912 | if(g_displayLevel >= 2) { |
| 913 | BMK_printWinner(f, cLevel, result, params, srcSize); |
| 914 | } |
| 915 | |
| 916 | if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) { |
| 917 | if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); } |
| 918 | g_winner.result = result; |
| 919 | g_winner.params = params; |
| 920 | } |
| 921 | } |
| 922 | |
| 923 | if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) { |
| 924 | winnerInfo_t w; |
| 925 | winner_ll_node* n; |
| 926 | w.result = result; |
| 927 | w.params = params; |
| 928 | insertWinner(w, targetConstraints); |
| 929 | |
| 930 | if(!DEBUG) { fprintf(f, "\033c"); } |
| 931 | fprintf(f, "\n"); |
| 932 | |
| 933 | /* the table */ |
| 934 | fprintf(f, "================================\n"); |
| 935 | for(n = g_winners; n != NULL; n = n->next) { |
| 936 | BMK_displayOneResult(f, n->res, srcSize); |
| 937 | } |
| 938 | fprintf(f, "================================\n"); |
| 939 | fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n", |
| 940 | (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT); |
| 941 | |
| 942 | |
| 943 | fprintf(f, "Overall Winner: \n"); |
| 944 | BMK_displayOneResult(f, g_winner, srcSize); |
| 945 | BMK_paramValues_into_commandLine(f, g_winner.params); |
| 946 | |
| 947 | fprintf(f, "Latest BMK: \n");\ |
| 948 | BMK_displayOneResult(f, w, srcSize); |
| 949 | } |
| 950 | } |
| 951 | |
| 952 | |
| 953 | /* BMK_print_cLevelEntry() : |
| 954 | * Writes one cLevelTable entry, for one level. |
| 955 | * f must exist, be already opened, and be seekable. |
| 956 | * this function cannot error. |
| 957 | */ |
| 958 | static void |
| 959 | BMK_print_cLevelEntry(FILE* f, const int cLevel, |
| 960 | paramValues_t params, |
| 961 | const BMK_benchResult_t result, const size_t srcSize) |
| 962 | { |
| 963 | varInds_t v; |
| 964 | int first = 1; |
| 965 | |
| 966 | assert(cLevel >= 0); |
| 967 | assert(cLevel <= NB_LEVELS_TRACKED); |
| 968 | params = cParamUnsetMin(params); |
| 969 | |
| 970 | fprintf(f, " {"); |
| 971 | /* print cParams. |
| 972 | * assumption : all cParams are present and in order in the following range */ |
| 973 | for (v = 0; v <= strt_ind; v++) { |
| 974 | if (!first) { fprintf(f, ","); } |
| 975 | displayParamVal(f, v, params.vals[v], 3); |
| 976 | first = 0; |
| 977 | } |
| 978 | /* print comment */ |
| 979 | { double const ratio = result.cSize ? |
| 980 | (double)srcSize / (double)result.cSize : 0; |
| 981 | double const cSpeedMBps = (double)result.cSpeed / MB_UNIT; |
| 982 | double const dSpeedMBps = (double)result.dSpeed / MB_UNIT; |
| 983 | |
| 984 | fprintf(f, " }, /* level %2i: R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n", |
| 985 | cLevel, ratio, cSpeedMBps, dSpeedMBps); |
| 986 | } |
| 987 | } |
| 988 | |
| 989 | |
| 990 | /* BMK_print_cLevelTable() : |
| 991 | * print candidate compression table into proposed FILE* f. |
| 992 | * f must exist, be already opened, and be seekable. |
| 993 | * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized |
| 994 | * this function cannot error. |
| 995 | */ |
| 996 | static void |
| 997 | BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize) |
| 998 | { |
| 999 | int cLevel; |
| 1000 | |
| 1001 | fprintf(f, "\n /* Proposed configurations : */ \n"); |
| 1002 | fprintf(f, " /* W, C, H, S, L, T, strat */ \n"); |
| 1003 | |
| 1004 | for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++) |
| 1005 | BMK_print_cLevelEntry(f, |
| 1006 | cLevel, winners[cLevel].params, |
| 1007 | winners[cLevel].result, srcSize); |
| 1008 | } |
| 1009 | |
| 1010 | |
| 1011 | /* BMK_saveAndPrint_cLevelTable() : |
| 1012 | * save candidate compression table into FILE* f, |
| 1013 | * and then to stdout. |
| 1014 | * f must exist, be already opened, and be seekable. |
| 1015 | * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized |
| 1016 | * this function cannot error. |
| 1017 | */ |
| 1018 | static void |
| 1019 | BMK_saveAndPrint_cLevelTable(FILE* const f, |
| 1020 | const winnerInfo_t* winners, |
| 1021 | const size_t srcSize) |
| 1022 | { |
| 1023 | fseek(f, 0, SEEK_SET); |
| 1024 | BMK_print_cLevelTable(f, winners, srcSize); |
| 1025 | fflush(f); |
| 1026 | BMK_print_cLevelTable(stdout, winners, srcSize); |
| 1027 | } |
| 1028 | |
| 1029 | |
| 1030 | /*-******************************************************* |
| 1031 | * Functions to Benchmark |
| 1032 | *********************************************************/ |
| 1033 | |
| 1034 | typedef struct { |
| 1035 | ZSTD_CCtx* cctx; |
| 1036 | const void* dictBuffer; |
| 1037 | size_t dictBufferSize; |
| 1038 | int cLevel; |
| 1039 | const paramValues_t* comprParams; |
| 1040 | } BMK_initCCtxArgs; |
| 1041 | |
| 1042 | static size_t local_initCCtx(void* payload) { |
| 1043 | const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload; |
| 1044 | varInds_t i; |
| 1045 | ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters); |
| 1046 | ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel); |
| 1047 | |
| 1048 | for(i = 0; i < NUM_PARAMS; i++) { |
| 1049 | if(ag->comprParams->vals[i] != PARAM_UNSET) |
| 1050 | ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]); |
| 1051 | } |
| 1052 | ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize); |
| 1053 | |
| 1054 | return 0; |
| 1055 | } |
| 1056 | |
| 1057 | typedef struct { |
| 1058 | ZSTD_DCtx* dctx; |
| 1059 | const void* dictBuffer; |
| 1060 | size_t dictBufferSize; |
| 1061 | } BMK_initDCtxArgs; |
| 1062 | |
| 1063 | static size_t local_initDCtx(void* payload) { |
| 1064 | const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload; |
| 1065 | ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters); |
| 1066 | ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize); |
| 1067 | return 0; |
| 1068 | } |
| 1069 | |
| 1070 | /* additional argument is just the context */ |
| 1071 | static size_t local_defaultCompress( |
| 1072 | const void* srcBuffer, size_t srcSize, |
| 1073 | void* dstBuffer, size_t dstSize, |
| 1074 | void* addArgs) |
| 1075 | { |
| 1076 | ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs; |
| 1077 | assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */ |
| 1078 | return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); |
| 1079 | } |
| 1080 | |
| 1081 | /* additional argument is just the context */ |
| 1082 | static size_t local_defaultDecompress( |
| 1083 | const void* srcBuffer, size_t srcSize, |
| 1084 | void* dstBuffer, size_t dstSize, |
| 1085 | void* addArgs) { |
| 1086 | size_t moreToFlush = 1; |
| 1087 | ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs; |
| 1088 | ZSTD_inBuffer in; |
| 1089 | ZSTD_outBuffer out; |
| 1090 | in.src = srcBuffer; |
| 1091 | in.size = srcSize; |
| 1092 | in.pos = 0; |
| 1093 | out.dst = dstBuffer; |
| 1094 | out.size = dstSize; |
| 1095 | out.pos = 0; |
| 1096 | while (moreToFlush) { |
| 1097 | if(out.pos == out.size) { |
| 1098 | return (size_t)-ZSTD_error_dstSize_tooSmall; |
| 1099 | } |
| 1100 | moreToFlush = ZSTD_decompressStream(dctx, |
| 1101 | &out, &in); |
| 1102 | if (ZSTD_isError(moreToFlush)) { |
| 1103 | return moreToFlush; |
| 1104 | } |
| 1105 | } |
| 1106 | return out.pos; |
| 1107 | |
| 1108 | } |
| 1109 | |
| 1110 | /*-************************************ |
| 1111 | * Data Initialization Functions |
| 1112 | **************************************/ |
| 1113 | |
| 1114 | typedef struct { |
| 1115 | void* srcBuffer; |
| 1116 | size_t srcSize; |
| 1117 | const void** srcPtrs; |
| 1118 | size_t* srcSizes; |
| 1119 | void** dstPtrs; |
| 1120 | size_t* dstCapacities; |
| 1121 | size_t* dstSizes; |
| 1122 | void** resPtrs; |
| 1123 | size_t* resSizes; |
| 1124 | size_t nbBlocks; |
| 1125 | size_t maxBlockSize; |
| 1126 | } buffers_t; |
| 1127 | |
| 1128 | typedef struct { |
| 1129 | size_t dictSize; |
| 1130 | void* dictBuffer; |
| 1131 | ZSTD_CCtx* cctx; |
| 1132 | ZSTD_DCtx* dctx; |
| 1133 | } contexts_t; |
| 1134 | |
| 1135 | static void freeNonSrcBuffers(const buffers_t b) { |
| 1136 | free((void*)b.srcPtrs); |
| 1137 | free(b.srcSizes); |
| 1138 | |
| 1139 | if(b.dstPtrs != NULL) { |
| 1140 | free(b.dstPtrs[0]); |
| 1141 | } |
| 1142 | free(b.dstPtrs); |
| 1143 | free(b.dstCapacities); |
| 1144 | free(b.dstSizes); |
| 1145 | |
| 1146 | if(b.resPtrs != NULL) { |
| 1147 | free(b.resPtrs[0]); |
| 1148 | } |
| 1149 | free(b.resPtrs); |
| 1150 | free(b.resSizes); |
| 1151 | } |
| 1152 | |
| 1153 | static void freeBuffers(const buffers_t b) { |
| 1154 | if(b.srcPtrs != NULL) { |
| 1155 | free(b.srcBuffer); |
| 1156 | } |
| 1157 | freeNonSrcBuffers(b); |
| 1158 | } |
| 1159 | |
| 1160 | /* srcBuffer will be freed by freeBuffers now */ |
| 1161 | static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles, |
| 1162 | const size_t* fileSizes) |
| 1163 | { |
| 1164 | size_t pos = 0, n, blockSize; |
| 1165 | U32 maxNbBlocks, blockNb = 0; |
| 1166 | buff->srcSize = 0; |
| 1167 | for(n = 0; n < nbFiles; n++) { |
| 1168 | buff->srcSize += fileSizes[n]; |
| 1169 | } |
| 1170 | |
| 1171 | if(buff->srcSize == 0) { |
| 1172 | DISPLAY("No data to bench\n"); |
| 1173 | return 1; |
| 1174 | } |
| 1175 | |
| 1176 | blockSize = g_blockSize ? g_blockSize : buff->srcSize; |
| 1177 | maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles; |
| 1178 | |
| 1179 | buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*)); |
| 1180 | buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
| 1181 | |
| 1182 | buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); |
| 1183 | buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
| 1184 | buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
| 1185 | |
| 1186 | buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*)); |
| 1187 | buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
| 1188 | |
| 1189 | if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) { |
| 1190 | DISPLAY("alloc error\n"); |
| 1191 | freeNonSrcBuffers(*buff); |
| 1192 | return 1; |
| 1193 | } |
| 1194 | |
| 1195 | buff->srcBuffer = srcBuffer; |
| 1196 | buff->srcPtrs[0] = (const void*)buff->srcBuffer; |
| 1197 | buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024)); |
| 1198 | buff->resPtrs[0] = malloc(buff->srcSize); |
| 1199 | |
| 1200 | if(!buff->dstPtrs[0] || !buff->resPtrs[0]) { |
| 1201 | DISPLAY("alloc error\n"); |
| 1202 | freeNonSrcBuffers(*buff); |
| 1203 | return 1; |
| 1204 | } |
| 1205 | |
| 1206 | for(n = 0; n < nbFiles; n++) { |
| 1207 | size_t pos_end = pos + fileSizes[n]; |
| 1208 | for(; pos < pos_end; blockNb++) { |
| 1209 | buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos); |
| 1210 | buff->srcSizes[blockNb] = blockSize; |
| 1211 | pos += blockSize; |
| 1212 | } |
| 1213 | |
| 1214 | if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; } |
| 1215 | pos = pos_end; |
| 1216 | } |
| 1217 | |
| 1218 | buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]); |
| 1219 | buff->dstSizes[0] = buff->dstCapacities[0]; |
| 1220 | buff->resSizes[0] = buff->srcSizes[0]; |
| 1221 | buff->maxBlockSize = buff->srcSizes[0]; |
| 1222 | |
| 1223 | for(n = 1; n < blockNb; n++) { |
| 1224 | buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1]; |
| 1225 | buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1]; |
| 1226 | buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]); |
| 1227 | buff->dstSizes[n] = buff->dstCapacities[n]; |
| 1228 | buff->resSizes[n] = buff->srcSizes[n]; |
| 1229 | |
| 1230 | buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]); |
| 1231 | } |
| 1232 | |
| 1233 | buff->nbBlocks = blockNb; |
| 1234 | |
| 1235 | return 0; |
| 1236 | } |
| 1237 | |
| 1238 | /* allocates buffer's arguments. returns success / failure */ |
| 1239 | static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable, |
| 1240 | size_t nbFiles) { |
| 1241 | size_t pos = 0; |
| 1242 | size_t n; |
| 1243 | size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles); |
| 1244 | size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad); |
| 1245 | size_t* fileSizes = calloc(sizeof(size_t), nbFiles); |
| 1246 | void* srcBuffer = NULL; |
| 1247 | int ret = 0; |
| 1248 | |
| 1249 | if(!totalSizeToLoad || !benchedSize) { |
| 1250 | ret = 1; |
| 1251 | DISPLAY("Nothing to Bench\n"); |
| 1252 | goto _cleanUp; |
| 1253 | } |
| 1254 | |
| 1255 | srcBuffer = malloc(benchedSize); |
| 1256 | |
| 1257 | if(!fileSizes || !srcBuffer) { |
| 1258 | ret = 1; |
| 1259 | goto _cleanUp; |
| 1260 | } |
| 1261 | |
| 1262 | for(n = 0; n < nbFiles; n++) { |
| 1263 | FILE* f; |
| 1264 | U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); |
| 1265 | if (UTIL_isDirectory(fileNamesTable[n])) { |
| 1266 | DISPLAY("Ignoring %s directory... \n", fileNamesTable[n]); |
| 1267 | continue; |
| 1268 | } |
| 1269 | if (fileSize == UTIL_FILESIZE_UNKNOWN) { |
| 1270 | DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); |
| 1271 | continue; |
| 1272 | } |
| 1273 | f = fopen(fileNamesTable[n], "rb"); |
| 1274 | if (f==NULL) { |
| 1275 | DISPLAY("impossible to open file %s\n", fileNamesTable[n]); |
| 1276 | fclose(f); |
| 1277 | ret = 10; |
| 1278 | goto _cleanUp; |
| 1279 | } |
| 1280 | |
| 1281 | DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]); |
| 1282 | |
| 1283 | if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n; /* buffer too small - stop after this file */ |
| 1284 | { |
| 1285 | char* buffer = (char*)(srcBuffer); |
| 1286 | size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f); |
| 1287 | fclose(f); |
| 1288 | if (readSize != (size_t)fileSize) { |
| 1289 | DISPLAY("could not read %s", fileNamesTable[n]); |
| 1290 | ret = 1; |
| 1291 | goto _cleanUp; |
| 1292 | } |
| 1293 | |
| 1294 | fileSizes[n] = readSize; |
| 1295 | pos += readSize; |
| 1296 | } |
| 1297 | } |
| 1298 | |
| 1299 | ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes); |
| 1300 | |
| 1301 | _cleanUp: |
| 1302 | if(ret) { free(srcBuffer); } |
| 1303 | free(fileSizes); |
| 1304 | return ret; |
| 1305 | } |
| 1306 | |
| 1307 | static void freeContexts(const contexts_t ctx) { |
| 1308 | free(ctx.dictBuffer); |
| 1309 | ZSTD_freeCCtx(ctx.cctx); |
| 1310 | ZSTD_freeDCtx(ctx.dctx); |
| 1311 | } |
| 1312 | |
| 1313 | static int createContexts(contexts_t* ctx, const char* dictFileName) { |
| 1314 | FILE* f; |
| 1315 | size_t readSize; |
| 1316 | ctx->cctx = ZSTD_createCCtx(); |
| 1317 | ctx->dctx = ZSTD_createDCtx(); |
| 1318 | assert(ctx->cctx != NULL); |
| 1319 | assert(ctx->dctx != NULL); |
| 1320 | |
| 1321 | if(dictFileName == NULL) { |
| 1322 | ctx->dictSize = 0; |
| 1323 | ctx->dictBuffer = NULL; |
| 1324 | return 0; |
| 1325 | } |
| 1326 | { U64 const dictFileSize = UTIL_getFileSize(dictFileName); |
| 1327 | assert(dictFileSize != UTIL_FILESIZE_UNKNOWN); |
| 1328 | ctx->dictSize = (size_t)dictFileSize; |
| 1329 | assert((U64)ctx->dictSize == dictFileSize); /* check overflow */ |
| 1330 | } |
| 1331 | ctx->dictBuffer = malloc(ctx->dictSize); |
| 1332 | |
| 1333 | f = fopen(dictFileName, "rb"); |
| 1334 | |
| 1335 | if (f==NULL) { |
| 1336 | DISPLAY("unable to open file\n"); |
| 1337 | freeContexts(*ctx); |
| 1338 | return 1; |
| 1339 | } |
| 1340 | |
| 1341 | if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) { |
| 1342 | DISPLAY("dictionary too large\n"); |
| 1343 | fclose(f); |
| 1344 | freeContexts(*ctx); |
| 1345 | return 1; |
| 1346 | } |
| 1347 | readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f); |
| 1348 | fclose(f); |
| 1349 | if (readSize != ctx->dictSize) { |
| 1350 | DISPLAY("unable to read file\n"); |
| 1351 | freeContexts(*ctx); |
| 1352 | return 1; |
| 1353 | } |
| 1354 | return 0; |
| 1355 | } |
| 1356 | |
| 1357 | /*-************************************ |
| 1358 | * Optimizer Memoization Functions |
| 1359 | **************************************/ |
| 1360 | |
| 1361 | /* return: new length */ |
| 1362 | /* keep old array, will need if iter over strategy. */ |
| 1363 | /* prunes useless params */ |
| 1364 | static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) { |
| 1365 | size_t i, j = 0; |
| 1366 | for(i = 0; i < varLength; i++) { |
| 1367 | if( !((varArray[i] == clog_ind && strat == ZSTD_fast) |
| 1368 | || (varArray[i] == slog_ind && strat == ZSTD_fast) |
| 1369 | || (varArray[i] == slog_ind && strat == ZSTD_dfast) |
| 1370 | || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) { |
| 1371 | varNew[j] = varArray[i]; |
| 1372 | j++; |
| 1373 | } |
| 1374 | } |
| 1375 | return j; |
| 1376 | } |
| 1377 | |
| 1378 | /* res should be NUM_PARAMS size */ |
| 1379 | /* constructs varArray from paramValues_t style parameter */ |
| 1380 | /* pass in using dict. */ |
| 1381 | static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) { |
| 1382 | varInds_t i; |
| 1383 | size_t j = 0; |
| 1384 | for(i = 0; i < NUM_PARAMS; i++) { |
| 1385 | if(paramConstraints.vals[i] == PARAM_UNSET) { |
| 1386 | if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */ |
| 1387 | res[j] = i; j++; |
| 1388 | } |
| 1389 | } |
| 1390 | return j; |
| 1391 | } |
| 1392 | |
| 1393 | /* length of memo table given free variables */ |
| 1394 | static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) { |
| 1395 | size_t arrayLen = 1; |
| 1396 | size_t i; |
| 1397 | for(i = 0; i < varyLen; i++) { |
| 1398 | if(varyParams[i] == strt_ind) continue; /* strategy separated by table */ |
| 1399 | arrayLen *= rangetable[varyParams[i]]; |
| 1400 | } |
| 1401 | return arrayLen; |
| 1402 | } |
| 1403 | |
| 1404 | /* returns unique index in memotable of compression parameters */ |
| 1405 | static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) { |
| 1406 | size_t i; |
| 1407 | unsigned ind = 0; |
| 1408 | for(i = 0; i < varyLen; i++) { |
| 1409 | varInds_t v = varyParams[i]; |
| 1410 | if(v == strt_ind) continue; /* exclude strategy from memotable */ |
| 1411 | ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]); |
| 1412 | } |
| 1413 | return ind; |
| 1414 | } |
| 1415 | |
| 1416 | static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) { |
| 1417 | const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; |
| 1418 | switch(mt.tableType) { |
| 1419 | case directMap: |
| 1420 | return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)]; |
| 1421 | case xxhashMap: |
| 1422 | return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen]; |
| 1423 | case noMemo: |
| 1424 | return 0; |
| 1425 | } |
| 1426 | return 0; /* should never happen, stop compiler warnings */ |
| 1427 | } |
| 1428 | |
| 1429 | static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) { |
| 1430 | const memoTable_t mt = memoTableArray[p.vals[strt_ind]]; |
| 1431 | switch(mt.tableType) { |
| 1432 | case directMap: |
| 1433 | mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break; |
| 1434 | case xxhashMap: |
| 1435 | mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break; |
| 1436 | case noMemo: |
| 1437 | break; |
| 1438 | } |
| 1439 | } |
| 1440 | |
| 1441 | /* frees all allocated memotables */ |
| 1442 | /* secret contract : |
| 1443 | * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */ |
| 1444 | static void freeMemoTableArray(memoTable_t* const mtAll) { |
| 1445 | int i; |
| 1446 | if(mtAll == NULL) { return; } |
| 1447 | for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { |
| 1448 | free(mtAll[i].table); |
| 1449 | } |
| 1450 | free(mtAll); |
| 1451 | } |
| 1452 | |
| 1453 | /* inits memotables for all (including mallocs), all strategies */ |
| 1454 | /* takes unsanitized varyParams */ |
| 1455 | static memoTable_t* |
| 1456 | createMemoTableArray(const paramValues_t p, |
| 1457 | const varInds_t* const varyParams, |
| 1458 | const size_t varyLen, |
| 1459 | const U32 memoTableLog) |
| 1460 | { |
| 1461 | memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1)); |
| 1462 | ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX; |
| 1463 | |
| 1464 | if(mtAll == NULL) { |
| 1465 | return NULL; |
| 1466 | } |
| 1467 | |
| 1468 | for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { |
| 1469 | mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i); |
| 1470 | } |
| 1471 | |
| 1472 | /* no memoization */ |
| 1473 | if(memoTableLog == 0) { |
| 1474 | for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) { |
| 1475 | mtAll[i].tableType = noMemo; |
| 1476 | mtAll[i].table = NULL; |
| 1477 | mtAll[i].tableLen = 0; |
| 1478 | } |
| 1479 | return mtAll; |
| 1480 | } |
| 1481 | |
| 1482 | |
| 1483 | if(p.vals[strt_ind] != PARAM_UNSET) { |
| 1484 | stratMin = p.vals[strt_ind]; |
| 1485 | stratMax = p.vals[strt_ind]; |
| 1486 | } |
| 1487 | |
| 1488 | |
| 1489 | for(i = stratMin; i <= stratMax; i++) { |
| 1490 | size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen); |
| 1491 | mtAll[i].tableType = directMap; |
| 1492 | |
| 1493 | if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */ |
| 1494 | mtAll[i].tableType = xxhashMap; |
| 1495 | mtl = ((size_t)1 << memoTableLog); |
| 1496 | } |
| 1497 | |
| 1498 | mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl); |
| 1499 | mtAll[i].tableLen = mtl; |
| 1500 | |
| 1501 | if(mtAll[i].table == NULL) { |
| 1502 | freeMemoTableArray(mtAll); |
| 1503 | return NULL; |
| 1504 | } |
| 1505 | } |
| 1506 | |
| 1507 | return mtAll; |
| 1508 | } |
| 1509 | |
| 1510 | /* Sets pc to random unmeasured set of parameters */ |
| 1511 | /* specify strategy */ |
| 1512 | static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st) |
| 1513 | { |
| 1514 | size_t j; |
| 1515 | const memoTable_t mt = memoTableArray[st]; |
| 1516 | pc->vals[strt_ind] = st; |
| 1517 | for(j = 0; j < mt.tableLen; j++) { |
| 1518 | int i; |
| 1519 | for(i = 0; i < NUM_PARAMS; i++) { |
| 1520 | varInds_t v = mt.varArray[i]; |
| 1521 | if(v == strt_ind) continue; |
| 1522 | pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]); |
| 1523 | } |
| 1524 | |
| 1525 | if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */ |
| 1526 | } |
| 1527 | } |
| 1528 | |
| 1529 | /*-************************************ |
| 1530 | * Benchmarking Functions |
| 1531 | **************************************/ |
| 1532 | |
| 1533 | static void display_params_tested(paramValues_t cParams) |
| 1534 | { |
| 1535 | varInds_t vi; |
| 1536 | DISPLAYLEVEL(3, "\r testing :"); |
| 1537 | for (vi=0; vi < NUM_PARAMS; vi++) { |
| 1538 | DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]); |
| 1539 | } |
| 1540 | DISPLAYLEVEL(3, "\b \r"); |
| 1541 | } |
| 1542 | |
| 1543 | /* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */ |
| 1544 | /* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */ |
| 1545 | /* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */ |
| 1546 | /* nbSeconds used in same way as in BMK_advancedParams_t */ |
| 1547 | /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */ |
| 1548 | /* dictionary nullable, nothing else though. */ |
| 1549 | /* note : it would be a lot better if this function was present in benchzstd.c, |
| 1550 | * sharing code with benchMemAdvanced(), since it's technically a part of it */ |
| 1551 | static BMK_benchOutcome_t |
| 1552 | BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, |
| 1553 | int cLevel, const paramValues_t* comprParams, |
| 1554 | BMK_mode_t mode, unsigned nbSeconds) |
| 1555 | { |
| 1556 | U32 i; |
| 1557 | BMK_benchResult_t bResult; |
| 1558 | const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs; |
| 1559 | size_t const *const srcSizes = buf.srcSizes; |
| 1560 | void** const dstPtrs = buf.dstPtrs; |
| 1561 | size_t const *const dstCapacities = buf.dstCapacities; |
| 1562 | size_t* const dstSizes = buf.dstSizes; |
| 1563 | void** const resPtrs = buf.resPtrs; |
| 1564 | size_t const *const resSizes = buf.resSizes; |
| 1565 | const void* dictBuffer = ctx.dictBuffer; |
| 1566 | const size_t dictBufferSize = ctx.dictSize; |
| 1567 | const size_t nbBlocks = buf.nbBlocks; |
| 1568 | const size_t srcSize = buf.srcSize; |
| 1569 | ZSTD_CCtx* cctx = ctx.cctx; |
| 1570 | ZSTD_DCtx* dctx = ctx.dctx; |
| 1571 | |
| 1572 | /* init */ |
| 1573 | display_params_tested(*comprParams); |
| 1574 | memset(&bResult, 0, sizeof(bResult)); |
| 1575 | |
| 1576 | /* warming up memory */ |
| 1577 | for (i = 0; i < buf.nbBlocks; i++) { |
| 1578 | if (mode != BMK_decodeOnly) { |
| 1579 | RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1); |
| 1580 | } else { |
| 1581 | RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1); |
| 1582 | } |
| 1583 | } |
| 1584 | |
| 1585 | /* Bench */ |
| 1586 | { |
| 1587 | /* init args */ |
| 1588 | int compressionCompleted = (mode == BMK_decodeOnly); |
| 1589 | int decompressionCompleted = (mode == BMK_compressOnly); |
| 1590 | BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); |
| 1591 | BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000); |
| 1592 | BMK_benchParams_t cbp, dbp; |
| 1593 | BMK_initCCtxArgs cctxprep; |
| 1594 | BMK_initDCtxArgs dctxprep; |
| 1595 | |
| 1596 | cbp.benchFn = local_defaultCompress; |
| 1597 | cbp.benchPayload = cctx; |
| 1598 | cbp.initFn = local_initCCtx; |
| 1599 | cbp.initPayload = &cctxprep; |
| 1600 | cbp.errorFn = ZSTD_isError; |
| 1601 | cbp.blockCount = nbBlocks; |
| 1602 | cbp.srcBuffers = srcPtrs; |
| 1603 | cbp.srcSizes = srcSizes; |
| 1604 | cbp.dstBuffers = dstPtrs; |
| 1605 | cbp.dstCapacities = dstCapacities; |
| 1606 | cbp.blockResults = dstSizes; |
| 1607 | |
| 1608 | cctxprep.cctx = cctx; |
| 1609 | cctxprep.dictBuffer = dictBuffer; |
| 1610 | cctxprep.dictBufferSize = dictBufferSize; |
| 1611 | cctxprep.cLevel = cLevel; |
| 1612 | cctxprep.comprParams = comprParams; |
| 1613 | |
| 1614 | dbp.benchFn = local_defaultDecompress; |
| 1615 | dbp.benchPayload = dctx; |
| 1616 | dbp.initFn = local_initDCtx; |
| 1617 | dbp.initPayload = &dctxprep; |
| 1618 | dbp.errorFn = ZSTD_isError; |
| 1619 | dbp.blockCount = nbBlocks; |
| 1620 | dbp.srcBuffers = (const void* const *) dstPtrs; |
| 1621 | dbp.srcSizes = dstCapacities; |
| 1622 | dbp.dstBuffers = resPtrs; |
| 1623 | dbp.dstCapacities = resSizes; |
| 1624 | dbp.blockResults = NULL; |
| 1625 | |
| 1626 | dctxprep.dctx = dctx; |
| 1627 | dctxprep.dictBuffer = dictBuffer; |
| 1628 | dctxprep.dictBufferSize = dictBufferSize; |
| 1629 | |
| 1630 | assert(timeStateCompress != NULL); |
| 1631 | assert(timeStateDecompress != NULL); |
| 1632 | while(!compressionCompleted) { |
| 1633 | BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp); |
| 1634 | |
| 1635 | if (!BMK_isSuccessful_runOutcome(cOutcome)) { |
| 1636 | BMK_benchOutcome_t bOut; |
| 1637 | memset(&bOut, 0, sizeof(bOut)); |
| 1638 | bOut.tag = 1; /* should rather be a function or a constant */ |
| 1639 | BMK_freeTimedFnState(timeStateCompress); |
| 1640 | BMK_freeTimedFnState(timeStateDecompress); |
| 1641 | return bOut; |
| 1642 | } |
| 1643 | { BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome); |
| 1644 | bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun); |
| 1645 | bResult.cSize = rResult.sumOfReturn; |
| 1646 | } |
| 1647 | compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress); |
| 1648 | } |
| 1649 | |
| 1650 | while (!decompressionCompleted) { |
| 1651 | BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); |
| 1652 | |
| 1653 | if (!BMK_isSuccessful_runOutcome(dOutcome)) { |
| 1654 | BMK_benchOutcome_t bOut; |
| 1655 | memset(&bOut, 0, sizeof(bOut)); |
| 1656 | bOut.tag = 1; /* should rather be a function or a constant */ |
| 1657 | BMK_freeTimedFnState(timeStateCompress); |
| 1658 | BMK_freeTimedFnState(timeStateDecompress); |
| 1659 | return bOut; |
| 1660 | } |
| 1661 | { BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome); |
| 1662 | bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun); |
| 1663 | } |
| 1664 | decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); |
| 1665 | } |
| 1666 | |
| 1667 | BMK_freeTimedFnState(timeStateCompress); |
| 1668 | BMK_freeTimedFnState(timeStateDecompress); |
| 1669 | } |
| 1670 | |
| 1671 | /* Bench */ |
| 1672 | bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx); |
| 1673 | |
| 1674 | { BMK_benchOutcome_t bOut; |
| 1675 | bOut.tag = 0; |
| 1676 | bOut.internal_never_use_directly = bResult; /* should be a function */ |
| 1677 | return bOut; |
| 1678 | } |
| 1679 | } |
| 1680 | |
| 1681 | /* BMK_benchParam() : |
| 1682 | * benchmark a set of `cParams` over sample `buf`, |
| 1683 | * store the result in `resultPtr`. |
| 1684 | * @return : 0 if success, 1 if error */ |
| 1685 | static int BMK_benchParam ( BMK_benchResult_t* resultPtr, |
| 1686 | buffers_t buf, contexts_t ctx, |
| 1687 | paramValues_t cParams) |
| 1688 | { |
| 1689 | BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, |
| 1690 | BASE_CLEVEL, &cParams, |
| 1691 | BMK_both, 3); |
| 1692 | if (!BMK_isSuccessful_benchOutcome(outcome)) return 1; |
| 1693 | *resultPtr = BMK_extract_benchResult(outcome); |
| 1694 | return 0; |
| 1695 | } |
| 1696 | |
| 1697 | |
| 1698 | /* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */ |
| 1699 | #define VARIANCE 1.2 |
| 1700 | static int allBench(BMK_benchResult_t* resultPtr, |
| 1701 | const buffers_t buf, const contexts_t ctx, |
| 1702 | const paramValues_t cParams, |
| 1703 | const constraint_t target, |
| 1704 | BMK_benchResult_t* winnerResult, int feas) |
| 1705 | { |
| 1706 | BMK_benchResult_t benchres; |
| 1707 | double uncertaintyConstantC = 3., uncertaintyConstantD = 3.; |
| 1708 | double winnerRS; |
| 1709 | |
| 1710 | BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2); |
| 1711 | if (!BMK_isSuccessful_benchOutcome(outcome)) { |
| 1712 | DEBUGOUTPUT("Benchmarking failed \n"); |
| 1713 | return ERROR_RESULT; |
| 1714 | } |
| 1715 | benchres = BMK_extract_benchResult(outcome); |
| 1716 | |
| 1717 | winnerRS = resultScore(*winnerResult, buf.srcSize, target); |
| 1718 | DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS); |
| 1719 | |
| 1720 | *resultPtr = benchres; |
| 1721 | |
| 1722 | /* anything with worse ratio in feas is definitely worse, discard */ |
| 1723 | if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) { |
| 1724 | return WORSE_RESULT; |
| 1725 | } |
| 1726 | |
| 1727 | /* calculate uncertainty in compression / decompression runs */ |
| 1728 | if (benchres.cSpeed) { |
| 1729 | double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed); |
| 1730 | uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC); |
| 1731 | } |
| 1732 | |
| 1733 | if (benchres.dSpeed) { |
| 1734 | double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed); |
| 1735 | uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD); |
| 1736 | } |
| 1737 | |
| 1738 | /* optimistic assumption of benchres */ |
| 1739 | { BMK_benchResult_t resultMax = benchres; |
| 1740 | resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE); |
| 1741 | resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE); |
| 1742 | |
| 1743 | /* disregard infeasible results in feas mode */ |
| 1744 | /* disregard if resultMax < winner in infeas mode */ |
| 1745 | if((feas && !feasible(resultMax, target)) || |
| 1746 | (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) { |
| 1747 | return WORSE_RESULT; |
| 1748 | } |
| 1749 | } |
| 1750 | |
| 1751 | /* compare by resultScore when in infeas */ |
| 1752 | /* compare by compareResultLT when in feas */ |
| 1753 | if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) || |
| 1754 | (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) ) { |
| 1755 | return BETTER_RESULT; |
| 1756 | } else { |
| 1757 | return WORSE_RESULT; |
| 1758 | } |
| 1759 | } |
| 1760 | |
| 1761 | |
| 1762 | #define INFEASIBLE_THRESHOLD 200 |
| 1763 | /* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */ |
| 1764 | static int benchMemo(BMK_benchResult_t* resultPtr, |
| 1765 | const buffers_t buf, const contexts_t ctx, |
| 1766 | const paramValues_t cParams, |
| 1767 | const constraint_t target, |
| 1768 | BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray, |
| 1769 | const int feas) { |
| 1770 | static int bmcount = 0; |
| 1771 | int res; |
| 1772 | |
| 1773 | if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD |
| 1774 | || redundantParams(cParams, target, buf.maxBlockSize) ) { |
| 1775 | return WORSE_RESULT; |
| 1776 | } |
| 1777 | |
| 1778 | res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas); |
| 1779 | |
| 1780 | if(DEBUG && !(bmcount % 250)) { |
| 1781 | DISPLAY("Count: %d\n", bmcount); |
| 1782 | bmcount++; |
| 1783 | } |
| 1784 | BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize); |
| 1785 | |
| 1786 | if(res == BETTER_RESULT || feas) { |
| 1787 | memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */ |
| 1788 | } |
| 1789 | return res; |
| 1790 | } |
| 1791 | |
| 1792 | |
| 1793 | typedef struct { |
| 1794 | U64 cSpeed_min; |
| 1795 | U64 dSpeed_min; |
| 1796 | U32 windowLog_max; |
| 1797 | ZSTD_strategy strategy_max; |
| 1798 | } level_constraints_t; |
| 1799 | |
| 1800 | static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1]; |
| 1801 | |
| 1802 | static void BMK_init_level_constraints(int bytePerSec_level1) |
| 1803 | { |
| 1804 | assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel()); |
| 1805 | memset(g_level_constraint, 0, sizeof(g_level_constraint)); |
| 1806 | g_level_constraint[1].cSpeed_min = bytePerSec_level1; |
| 1807 | g_level_constraint[1].dSpeed_min = 0; |
| 1808 | g_level_constraint[1].windowLog_max = 19; |
| 1809 | g_level_constraint[1].strategy_max = ZSTD_fast; |
| 1810 | |
| 1811 | /* establish speed objectives (relative to level 1) */ |
| 1812 | { int l; |
| 1813 | for (l=2; l<=NB_LEVELS_TRACKED; l++) { |
| 1814 | g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64; |
| 1815 | g_level_constraint[l].dSpeed_min = 0; |
| 1816 | g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5; /* only --ultra levels >= 20 can use windowlog > 23 */ |
| 1817 | g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX; |
| 1818 | } } |
| 1819 | } |
| 1820 | |
| 1821 | static int BMK_seed(winnerInfo_t* winners, |
| 1822 | const paramValues_t params, |
| 1823 | const buffers_t buf, |
| 1824 | const contexts_t ctx) |
| 1825 | { |
| 1826 | BMK_benchResult_t testResult; |
| 1827 | int better = 0; |
| 1828 | int cLevel; |
| 1829 | |
| 1830 | BMK_benchParam(&testResult, buf, ctx, params); |
| 1831 | |
| 1832 | for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) { |
| 1833 | |
| 1834 | if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min) |
| 1835 | continue; /* not fast enough for this level */ |
| 1836 | if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min) |
| 1837 | continue; /* not fast enough for this level */ |
| 1838 | if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max) |
| 1839 | continue; /* too much memory for this level */ |
| 1840 | if (params.vals[strt_ind] > (U32)g_level_constraint[cLevel].strategy_max) |
| 1841 | continue; /* forbidden strategy for this level */ |
| 1842 | if (winners[cLevel].result.cSize==0) { |
| 1843 | /* first solution for this cLevel */ |
| 1844 | winners[cLevel].result = testResult; |
| 1845 | winners[cLevel].params = params; |
| 1846 | BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize); |
| 1847 | better = 1; |
| 1848 | continue; |
| 1849 | } |
| 1850 | |
| 1851 | if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) { |
| 1852 | /* Validate solution is "good enough" */ |
| 1853 | double W_ratio = (double)buf.srcSize / (double)testResult.cSize; |
| 1854 | double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize; |
| 1855 | double W_ratioNote = log (W_ratio); |
| 1856 | double O_ratioNote = log (O_ratio); |
| 1857 | size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB); |
| 1858 | size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB); |
| 1859 | double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed); |
| 1860 | double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed); |
| 1861 | |
| 1862 | size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params)); |
| 1863 | size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params)); |
| 1864 | double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed); |
| 1865 | double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed); |
| 1866 | |
| 1867 | double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed); |
| 1868 | double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed); |
| 1869 | |
| 1870 | double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed); |
| 1871 | double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed); |
| 1872 | |
| 1873 | if (W_DMemUsed_note < O_DMemUsed_note) { |
| 1874 | /* uses too much Decompression memory for too little benefit */ |
| 1875 | if (W_ratio > O_ratio) |
| 1876 | DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", |
| 1877 | W_ratio, (double)(W_DMemUsed) / 1024 / 1024, |
| 1878 | O_ratio, (double)(O_DMemUsed) / 1024 / 1024, cLevel); |
| 1879 | continue; |
| 1880 | } |
| 1881 | if (W_CMemUsed_note < O_CMemUsed_note) { |
| 1882 | /* uses too much memory for compression for too little benefit */ |
| 1883 | if (W_ratio > O_ratio) |
| 1884 | DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB vs %5.3f @ %4.1f MB : not enough for level %i\n", |
| 1885 | W_ratio, (double)(W_CMemUsed) / 1024 / 1024, |
| 1886 | O_ratio, (double)(O_CMemUsed) / 1024 / 1024, |
| 1887 | cLevel); |
| 1888 | continue; |
| 1889 | } |
| 1890 | if (W_CSpeed_note < O_CSpeed_note ) { |
| 1891 | /* too large compression speed difference for the compression benefit */ |
| 1892 | if (W_ratio > O_ratio) |
| 1893 | DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", |
| 1894 | W_ratio, (double)testResult.cSpeed / MB_UNIT, |
| 1895 | O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT, |
| 1896 | cLevel); |
| 1897 | continue; |
| 1898 | } |
| 1899 | if (W_DSpeed_note < O_DSpeed_note ) { |
| 1900 | /* too large decompression speed difference for the compression benefit */ |
| 1901 | if (W_ratio > O_ratio) |
| 1902 | DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s vs %5.3f @ %4.1f MB/s : not enough for level %i\n", |
| 1903 | W_ratio, (double)testResult.dSpeed / MB_UNIT, |
| 1904 | O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT, |
| 1905 | cLevel); |
| 1906 | continue; |
| 1907 | } |
| 1908 | |
| 1909 | if (W_ratio < O_ratio) |
| 1910 | DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n", |
| 1911 | W_ratio, O_ratio, cLevel); |
| 1912 | |
| 1913 | winners[cLevel].result = testResult; |
| 1914 | winners[cLevel].params = params; |
| 1915 | BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize); |
| 1916 | |
| 1917 | better = 1; |
| 1918 | } } |
| 1919 | |
| 1920 | return better; |
| 1921 | } |
| 1922 | |
| 1923 | /*-************************************ |
| 1924 | * Compression Level Table Generation Functions |
| 1925 | **************************************/ |
| 1926 | |
| 1927 | #define PARAMTABLELOG 25 |
| 1928 | #define PARAMTABLESIZE (1<<PARAMTABLELOG) |
| 1929 | #define PARAMTABLEMASK (PARAMTABLESIZE-1) |
| 1930 | static BYTE g_alreadyTested[PARAMTABLESIZE] = {0}; /* init to zero */ |
| 1931 | |
| 1932 | static BYTE* NB_TESTS_PLAYED(paramValues_t p) |
| 1933 | { |
| 1934 | ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p)); |
| 1935 | unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0); |
| 1936 | return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK]; |
| 1937 | } |
| 1938 | |
| 1939 | static void playAround(FILE* f, |
| 1940 | winnerInfo_t* winners, |
| 1941 | paramValues_t p, |
| 1942 | const buffers_t buf, const contexts_t ctx) |
| 1943 | { |
| 1944 | int nbVariations = 0; |
| 1945 | UTIL_time_t const clockStart = UTIL_getTime(); |
| 1946 | |
| 1947 | while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) { |
| 1948 | if (nbVariations++ > g_maxNbVariations) break; |
| 1949 | |
| 1950 | do { |
| 1951 | int i; |
| 1952 | for(i = 0; i < 4; i++) { |
| 1953 | paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1), |
| 1954 | ((FUZ_rand(&g_rand) & 1) << 1) - 1, |
| 1955 | &p); |
| 1956 | } |
| 1957 | } while (!paramValid(p)); |
| 1958 | |
| 1959 | /* exclude faster if already played params */ |
| 1960 | if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1)) |
| 1961 | continue; |
| 1962 | |
| 1963 | /* test */ |
| 1964 | { BYTE* const b = NB_TESTS_PLAYED(p); |
| 1965 | (*b)++; |
| 1966 | } |
| 1967 | if (!BMK_seed(winners, p, buf, ctx)) continue; |
| 1968 | |
| 1969 | /* improvement found => search more */ |
| 1970 | BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); |
| 1971 | playAround(f, winners, p, buf, ctx); |
| 1972 | } |
| 1973 | |
| 1974 | } |
| 1975 | |
| 1976 | static void |
| 1977 | BMK_selectRandomStart( FILE* f, |
| 1978 | winnerInfo_t* winners, |
| 1979 | const buffers_t buf, const contexts_t ctx) |
| 1980 | { |
| 1981 | U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1); |
| 1982 | if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) { |
| 1983 | /* use some random entry */ |
| 1984 | paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */ |
| 1985 | buf.srcSize, 0); |
| 1986 | playAround(f, winners, p, buf, ctx); |
| 1987 | } else { |
| 1988 | playAround(f, winners, winners[id].params, buf, ctx); |
| 1989 | } |
| 1990 | } |
| 1991 | |
| 1992 | |
| 1993 | /* BMK_generate_cLevelTable() : |
| 1994 | * test a large number of configurations |
| 1995 | * and distribute them across compression levels according to speed conditions. |
| 1996 | * display and save all intermediate results into rfName = "grillResults.txt". |
| 1997 | * the function automatically stops after g_timeLimit_s. |
| 1998 | * this function cannot error, it directly exit() in case of problem. |
| 1999 | */ |
| 2000 | static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx) |
| 2001 | { |
| 2002 | paramValues_t params; |
| 2003 | winnerInfo_t winners[NB_LEVELS_TRACKED+1]; |
| 2004 | const char* const rfName = "grillResults.txt"; |
| 2005 | FILE* const f = fopen(rfName, "w"); |
| 2006 | |
| 2007 | /* init */ |
| 2008 | assert(g_singleRun==0); |
| 2009 | memset(winners, 0, sizeof(winners)); |
| 2010 | if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); } |
| 2011 | |
| 2012 | if (g_target) { |
| 2013 | BMK_init_level_constraints(g_target * MB_UNIT); |
| 2014 | } else { |
| 2015 | /* baseline config for level 1 */ |
| 2016 | paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize)); |
| 2017 | BMK_benchResult_t testResult; |
| 2018 | BMK_benchParam(&testResult, buf, ctx, l1params); |
| 2019 | BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32)); |
| 2020 | } |
| 2021 | |
| 2022 | /* populate initial solution */ |
| 2023 | { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); |
| 2024 | int i; |
| 2025 | for (i=0; i<=maxSeeds; i++) { |
| 2026 | params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0)); |
| 2027 | BMK_seed(winners, params, buf, ctx); |
| 2028 | } } |
| 2029 | BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); |
| 2030 | |
| 2031 | /* start tests */ |
| 2032 | { const UTIL_time_t grillStart = UTIL_getTime(); |
| 2033 | do { |
| 2034 | BMK_selectRandomStart(f, winners, buf, ctx); |
| 2035 | } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s); |
| 2036 | } |
| 2037 | |
| 2038 | /* end summary */ |
| 2039 | BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize); |
| 2040 | DISPLAY("grillParams operations completed \n"); |
| 2041 | |
| 2042 | /* clean up*/ |
| 2043 | fclose(f); |
| 2044 | } |
| 2045 | |
| 2046 | |
| 2047 | /*-************************************ |
| 2048 | * Single Benchmark Functions |
| 2049 | **************************************/ |
| 2050 | |
| 2051 | static int |
| 2052 | benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel) |
| 2053 | { |
| 2054 | BMK_benchResult_t testResult; |
| 2055 | g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); |
| 2056 | |
| 2057 | if (BMK_benchParam(&testResult, buf, ctx, g_params)) { |
| 2058 | DISPLAY("Error during benchmarking\n"); |
| 2059 | return 1; |
| 2060 | } |
| 2061 | |
| 2062 | BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize); |
| 2063 | |
| 2064 | return 0; |
| 2065 | } |
| 2066 | |
| 2067 | static int benchSample(double compressibility, int cLevel) |
| 2068 | { |
| 2069 | const char* const name = "Sample 10MB"; |
| 2070 | size_t const benchedSize = 10 MB; |
| 2071 | void* const srcBuffer = malloc(benchedSize); |
| 2072 | int ret = 0; |
| 2073 | |
| 2074 | buffers_t buf; |
| 2075 | contexts_t ctx; |
| 2076 | |
| 2077 | if(srcBuffer == NULL) { |
| 2078 | DISPLAY("Out of Memory\n"); |
| 2079 | return 2; |
| 2080 | } |
| 2081 | |
| 2082 | RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); |
| 2083 | |
| 2084 | if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) { |
| 2085 | DISPLAY("Buffer Creation Error\n"); |
| 2086 | free(srcBuffer); |
| 2087 | return 3; |
| 2088 | } |
| 2089 | |
| 2090 | if(createContexts(&ctx, NULL)) { |
| 2091 | DISPLAY("Context Creation Error\n"); |
| 2092 | freeBuffers(buf); |
| 2093 | return 1; |
| 2094 | } |
| 2095 | |
| 2096 | /* bench */ |
| 2097 | DISPLAY("\r%79s\r", ""); |
| 2098 | DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100)); |
| 2099 | |
| 2100 | if(g_singleRun) { |
| 2101 | ret = benchOnce(buf, ctx, cLevel); |
| 2102 | } else { |
| 2103 | BMK_generate_cLevelTable(buf, ctx); |
| 2104 | } |
| 2105 | |
| 2106 | freeBuffers(buf); |
| 2107 | freeContexts(ctx); |
| 2108 | |
| 2109 | return ret; |
| 2110 | } |
| 2111 | |
| 2112 | /* benchFiles() : |
| 2113 | * note: while this function takes a table of filenames, |
| 2114 | * in practice, only the first filename will be used */ |
| 2115 | static int benchFiles(const char** fileNamesTable, int nbFiles, |
| 2116 | const char* dictFileName, int cLevel) |
| 2117 | { |
| 2118 | buffers_t buf; |
| 2119 | contexts_t ctx; |
| 2120 | int ret = 0; |
| 2121 | |
| 2122 | if (createBuffers(&buf, fileNamesTable, nbFiles)) { |
| 2123 | DISPLAY("unable to load files\n"); |
| 2124 | return 1; |
| 2125 | } |
| 2126 | |
| 2127 | if (createContexts(&ctx, dictFileName)) { |
| 2128 | DISPLAY("unable to load dictionary\n"); |
| 2129 | freeBuffers(buf); |
| 2130 | return 2; |
| 2131 | } |
| 2132 | |
| 2133 | DISPLAY("\r%79s\r", ""); |
| 2134 | if (nbFiles == 1) { |
| 2135 | DISPLAY("using %s : \n", fileNamesTable[0]); |
| 2136 | } else { |
| 2137 | DISPLAY("using %d Files : \n", nbFiles); |
| 2138 | } |
| 2139 | |
| 2140 | if (g_singleRun) { |
| 2141 | ret = benchOnce(buf, ctx, cLevel); |
| 2142 | } else { |
| 2143 | BMK_generate_cLevelTable(buf, ctx); |
| 2144 | } |
| 2145 | |
| 2146 | freeBuffers(buf); |
| 2147 | freeContexts(ctx); |
| 2148 | return ret; |
| 2149 | } |
| 2150 | |
| 2151 | |
| 2152 | /*-************************************ |
| 2153 | * Local Optimization Functions |
| 2154 | **************************************/ |
| 2155 | |
| 2156 | /* One iteration of hill climbing. Specifically, it first tries all |
| 2157 | * valid parameter configurations w/ manhattan distance 1 and picks the best one |
| 2158 | * failing that, it progressively tries candidates further and further away (up to #dim + 2) |
| 2159 | * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the |
| 2160 | * current stage of hill climbing. |
| 2161 | * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to |
| 2162 | * the resultScore function, which is effectively a linear increase in reward until it reaches |
| 2163 | * the constraint-satisfying value, it which point any excess results in only logarithmic reward. |
| 2164 | * This aims to find some constraint-satisfying point. |
| 2165 | * Phase 2 optimizes in accordance with what the original function sets out to maximize, with |
| 2166 | * all feasible solutions valued over all infeasible solutions. |
| 2167 | */ |
| 2168 | |
| 2169 | /* sanitize all params here. |
| 2170 | * all generation after random should be sanitized. (maybe sanitize random) |
| 2171 | */ |
| 2172 | static winnerInfo_t climbOnce(const constraint_t target, |
| 2173 | memoTable_t* mtAll, |
| 2174 | const buffers_t buf, const contexts_t ctx, |
| 2175 | const paramValues_t init) |
| 2176 | { |
| 2177 | /* |
| 2178 | * cparam - currently considered 'center' |
| 2179 | * candidate - params to benchmark/results |
| 2180 | * winner - best option found so far. |
| 2181 | */ |
| 2182 | paramValues_t cparam = init; |
| 2183 | winnerInfo_t candidateInfo, winnerInfo; |
| 2184 | int better = 1; |
| 2185 | int feas = 0; |
| 2186 | |
| 2187 | winnerInfo = initWinnerInfo(init); |
| 2188 | candidateInfo = winnerInfo; |
| 2189 | |
| 2190 | { winnerInfo_t bestFeasible1 = initWinnerInfo(cparam); |
| 2191 | DEBUGOUTPUT("Climb Part 1\n"); |
| 2192 | while(better) { |
| 2193 | int offset; |
| 2194 | size_t i, dist; |
| 2195 | const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen; |
| 2196 | better = 0; |
| 2197 | DEBUGOUTPUT("Start\n"); |
| 2198 | cparam = winnerInfo.params; |
| 2199 | candidateInfo.params = cparam; |
| 2200 | /* all dist-1 candidates */ |
| 2201 | for (i = 0; i < varLen; i++) { |
| 2202 | for (offset = -1; offset <= 1; offset += 2) { |
| 2203 | CHECKTIME(winnerInfo); |
| 2204 | candidateInfo.params = cparam; |
| 2205 | paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i], |
| 2206 | offset, |
| 2207 | &candidateInfo.params); |
| 2208 | |
| 2209 | if(paramValid(candidateInfo.params)) { |
| 2210 | int res; |
| 2211 | res = benchMemo(&candidateInfo.result, buf, ctx, |
| 2212 | sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas); |
| 2213 | DEBUGOUTPUT("Res: %d\n", res); |
| 2214 | if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */ |
| 2215 | winnerInfo = candidateInfo; |
| 2216 | better = 1; |
| 2217 | if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { |
| 2218 | bestFeasible1 = winnerInfo; |
| 2219 | } |
| 2220 | } |
| 2221 | } |
| 2222 | } /* for (offset = -1; offset <= 1; offset += 2) */ |
| 2223 | } /* for (i = 0; i < varLen; i++) */ |
| 2224 | |
| 2225 | if(better) { |
| 2226 | continue; |
| 2227 | } |
| 2228 | |
| 2229 | for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */ |
| 2230 | for (i = 0; i < (1ULL << varLen) / varLen + 2; i++) { |
| 2231 | int res; |
| 2232 | CHECKTIME(winnerInfo); |
| 2233 | candidateInfo.params = cparam; |
| 2234 | /* param error checking already done here */ |
| 2235 | paramVariation(&candidateInfo.params, mtAll, (U32)dist); |
| 2236 | |
| 2237 | res = benchMemo(&candidateInfo.result, |
| 2238 | buf, ctx, |
| 2239 | sanitizeParams(candidateInfo.params), target, |
| 2240 | &winnerInfo.result, mtAll, feas); |
| 2241 | DEBUGOUTPUT("Res: %d\n", res); |
| 2242 | if (res == BETTER_RESULT) { /* synonymous with better in this case*/ |
| 2243 | winnerInfo = candidateInfo; |
| 2244 | better = 1; |
| 2245 | if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) { |
| 2246 | bestFeasible1 = winnerInfo; |
| 2247 | } |
| 2248 | break; |
| 2249 | } |
| 2250 | } |
| 2251 | |
| 2252 | if (better) { |
| 2253 | break; |
| 2254 | } |
| 2255 | } /* for(dist = 2; dist < varLen + 2; dist++) */ |
| 2256 | |
| 2257 | if (!better) { /* infeas -> feas -> stop */ |
| 2258 | if (feas) return winnerInfo; |
| 2259 | feas = 1; |
| 2260 | better = 1; |
| 2261 | winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */ |
| 2262 | DEBUGOUTPUT("Climb Part 2\n"); |
| 2263 | } |
| 2264 | } |
| 2265 | winnerInfo = bestFeasible1; |
| 2266 | } |
| 2267 | |
| 2268 | return winnerInfo; |
| 2269 | } |
| 2270 | |
| 2271 | /* Optimizes for a fixed strategy */ |
| 2272 | |
| 2273 | /* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited) |
| 2274 | weight more on visit for bad results, less on good results/more on later results / ones with more failures. |
| 2275 | allocate memoTable here. |
| 2276 | */ |
| 2277 | static winnerInfo_t |
| 2278 | optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx, |
| 2279 | const constraint_t target, paramValues_t paramTarget, |
| 2280 | const ZSTD_strategy strat, |
| 2281 | memoTable_t* memoTableArray, const int tries) |
| 2282 | { |
| 2283 | int i = 0; |
| 2284 | |
| 2285 | paramValues_t init; |
| 2286 | winnerInfo_t winnerInfo, candidateInfo; |
| 2287 | winnerInfo = initWinnerInfo(emptyParams()); |
| 2288 | /* so climb is given the right fixed strategy */ |
| 2289 | paramTarget.vals[strt_ind] = strat; |
| 2290 | /* to pass ZSTD_checkCParams */ |
| 2291 | paramTarget = cParamUnsetMin(paramTarget); |
| 2292 | |
| 2293 | init = paramTarget; |
| 2294 | |
| 2295 | for(i = 0; i < tries; i++) { |
| 2296 | DEBUGOUTPUT("Restart\n"); |
| 2297 | do { |
| 2298 | randomConstrainedParams(&init, memoTableArray, strat); |
| 2299 | } while(redundantParams(init, target, buf.maxBlockSize)); |
| 2300 | candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init); |
| 2301 | if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) { |
| 2302 | winnerInfo = candidateInfo; |
| 2303 | BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize); |
| 2304 | i = 0; |
| 2305 | continue; |
| 2306 | } |
| 2307 | CHECKTIME(winnerInfo); |
| 2308 | i++; |
| 2309 | } |
| 2310 | return winnerInfo; |
| 2311 | } |
| 2312 | |
| 2313 | /* goes best, best-1, best+1, best-2, ... */ |
| 2314 | /* return 0 if nothing remaining */ |
| 2315 | static int nextStrategy(const int currentStrategy, const int bestStrategy) |
| 2316 | { |
| 2317 | if(bestStrategy <= currentStrategy) { |
| 2318 | int candidate = 2 * bestStrategy - currentStrategy - 1; |
| 2319 | if(candidate < 1) { |
| 2320 | candidate = currentStrategy + 1; |
| 2321 | if(candidate > (int)ZSTD_STRATEGY_MAX) { |
| 2322 | return 0; |
| 2323 | } else { |
| 2324 | return candidate; |
| 2325 | } |
| 2326 | } else { |
| 2327 | return candidate; |
| 2328 | } |
| 2329 | } else { /* bestStrategy >= currentStrategy */ |
| 2330 | int candidate = 2 * bestStrategy - currentStrategy; |
| 2331 | if(candidate > (int)ZSTD_STRATEGY_MAX) { |
| 2332 | candidate = currentStrategy - 1; |
| 2333 | if(candidate < 1) { |
| 2334 | return 0; |
| 2335 | } else { |
| 2336 | return candidate; |
| 2337 | } |
| 2338 | } else { |
| 2339 | return candidate; |
| 2340 | } |
| 2341 | } |
| 2342 | } |
| 2343 | |
| 2344 | /* experiment with playing with this and decay value */ |
| 2345 | |
| 2346 | /* main fn called when using --optimize */ |
| 2347 | /* Does strategy selection by benchmarking default compression levels |
| 2348 | * then optimizes by strategy, starting with the best one and moving |
| 2349 | * progressively moving further away by number |
| 2350 | * args: |
| 2351 | * fileNamesTable - list of files to benchmark |
| 2352 | * nbFiles - length of fileNamesTable |
| 2353 | * dictFileName - name of dictionary file if one, else NULL |
| 2354 | * target - performance constraints (cSpeed, dSpeed, cMem) |
| 2355 | * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast) |
| 2356 | * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio) |
| 2357 | */ |
| 2358 | |
| 2359 | static unsigned g_maxTries = 5; |
| 2360 | #define TRY_DECAY 1 |
| 2361 | |
| 2362 | static int |
| 2363 | optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles, |
| 2364 | const char* dictFileName, |
| 2365 | constraint_t target, paramValues_t paramTarget, |
| 2366 | const int cLevelOpt, const int cLevelRun, |
| 2367 | const U32 memoTableLog) |
| 2368 | { |
| 2369 | varInds_t varArray [NUM_PARAMS]; |
| 2370 | int ret = 0; |
| 2371 | const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL); |
| 2372 | winnerInfo_t winner = initWinnerInfo(emptyParams()); |
| 2373 | memoTable_t* allMT = NULL; |
| 2374 | paramValues_t paramBase; |
| 2375 | contexts_t ctx; |
| 2376 | buffers_t buf; |
| 2377 | g_time = UTIL_getTime(); |
| 2378 | |
| 2379 | if (createBuffers(&buf, fileNamesTable, nbFiles)) { |
| 2380 | DISPLAY("unable to load files\n"); |
| 2381 | return 1; |
| 2382 | } |
| 2383 | |
| 2384 | if (createContexts(&ctx, dictFileName)) { |
| 2385 | DISPLAY("unable to load dictionary\n"); |
| 2386 | freeBuffers(buf); |
| 2387 | return 2; |
| 2388 | } |
| 2389 | |
| 2390 | if (nbFiles == 1) { |
| 2391 | DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[0]); |
| 2392 | } else { |
| 2393 | DISPLAYLEVEL(2, "Loading %lu Files... \r", (unsigned long)nbFiles); |
| 2394 | } |
| 2395 | |
| 2396 | /* sanitize paramTarget */ |
| 2397 | optimizerAdjustInput(¶mTarget, buf.maxBlockSize); |
| 2398 | paramBase = cParamUnsetMin(paramTarget); |
| 2399 | |
| 2400 | allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog); |
| 2401 | |
| 2402 | if (!allMT) { |
| 2403 | DISPLAY("MemoTable Init Error\n"); |
| 2404 | ret = 2; |
| 2405 | goto _cleanUp; |
| 2406 | } |
| 2407 | |
| 2408 | /* default strictnesses */ |
| 2409 | if (g_strictness == PARAM_UNSET) { |
| 2410 | if(g_optmode) { |
| 2411 | g_strictness = 100; |
| 2412 | } else { |
| 2413 | g_strictness = 90; |
| 2414 | } |
| 2415 | } else { |
| 2416 | if(0 >= g_strictness || g_strictness > 100) { |
| 2417 | DISPLAY("Strictness Outside of Bounds\n"); |
| 2418 | ret = 4; |
| 2419 | goto _cleanUp; |
| 2420 | } |
| 2421 | } |
| 2422 | |
| 2423 | /* use level'ing mode instead of normal target mode */ |
| 2424 | if (g_optmode) { |
| 2425 | winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize)); |
| 2426 | if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) { |
| 2427 | ret = 3; |
| 2428 | goto _cleanUp; |
| 2429 | } |
| 2430 | |
| 2431 | g_lvltarget = winner.result; |
| 2432 | g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100; |
| 2433 | g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100; |
| 2434 | g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness; |
| 2435 | |
| 2436 | target.cSpeed = (U32)g_lvltarget.cSpeed; |
| 2437 | target.dSpeed = (U32)g_lvltarget.dSpeed; |
| 2438 | |
| 2439 | BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize); |
| 2440 | } |
| 2441 | |
| 2442 | /* Don't want it to return anything worse than the best known result */ |
| 2443 | if (g_singleRun) { |
| 2444 | BMK_benchResult_t res; |
| 2445 | g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize); |
| 2446 | if (BMK_benchParam(&res, buf, ctx, g_params)) { |
| 2447 | ret = 45; |
| 2448 | goto _cleanUp; |
| 2449 | } |
| 2450 | if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) { |
| 2451 | winner.result = res; |
| 2452 | winner.params = g_params; |
| 2453 | } |
| 2454 | } |
| 2455 | |
| 2456 | /* bench */ |
| 2457 | DISPLAYLEVEL(2, "\r%79s\r", ""); |
| 2458 | if(nbFiles == 1) { |
| 2459 | DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]); |
| 2460 | } else { |
| 2461 | DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles); |
| 2462 | } |
| 2463 | |
| 2464 | if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); } |
| 2465 | if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); } |
| 2466 | if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); } |
| 2467 | |
| 2468 | DISPLAYLEVEL(2, "\n"); |
| 2469 | init_clockGranularity(); |
| 2470 | |
| 2471 | { paramValues_t CParams; |
| 2472 | |
| 2473 | /* find best solution from default params */ |
| 2474 | { const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel(); |
| 2475 | DEBUGOUTPUT("Strategy Selection\n"); |
| 2476 | if (paramTarget.vals[strt_ind] == PARAM_UNSET) { |
| 2477 | BMK_benchResult_t candidate; |
| 2478 | int i; |
| 2479 | for (i=1; i<=maxSeeds; i++) { |
| 2480 | int ec; |
| 2481 | CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget); |
| 2482 | ec = BMK_benchParam(&candidate, buf, ctx, CParams); |
| 2483 | BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize); |
| 2484 | |
| 2485 | if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) { |
| 2486 | winner.result = candidate; |
| 2487 | winner.params = CParams; |
| 2488 | } |
| 2489 | |
| 2490 | CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */ |
| 2491 | /* if the current params are too slow, just stop. */ |
| 2492 | if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; } |
| 2493 | } |
| 2494 | |
| 2495 | BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize); |
| 2496 | } |
| 2497 | } |
| 2498 | |
| 2499 | DEBUGOUTPUT("Real Opt\n"); |
| 2500 | /* start 'real' optimization */ |
| 2501 | { int bestStrategy = (int)winner.params.vals[strt_ind]; |
| 2502 | if (paramTarget.vals[strt_ind] == PARAM_UNSET) { |
| 2503 | int st = bestStrategy; |
| 2504 | int tries = g_maxTries; |
| 2505 | |
| 2506 | /* one iterations of hill climbing with the level-defined parameters. */ |
| 2507 | { winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params); |
| 2508 | if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) { |
| 2509 | winner = w1; |
| 2510 | } |
| 2511 | CHECKTIMEGT(ret, 0, _displayCleanUp); |
| 2512 | } |
| 2513 | |
| 2514 | while(st && tries > 0) { |
| 2515 | winnerInfo_t wc; |
| 2516 | DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]); |
| 2517 | |
| 2518 | wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries); |
| 2519 | |
| 2520 | if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) { |
| 2521 | winner = wc; |
| 2522 | tries = g_maxTries; |
| 2523 | bestStrategy = st; |
| 2524 | } else { |
| 2525 | st = nextStrategy(st, bestStrategy); |
| 2526 | tries -= TRY_DECAY; |
| 2527 | } |
| 2528 | CHECKTIMEGT(ret, 0, _displayCleanUp); |
| 2529 | } |
| 2530 | } else { |
| 2531 | winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries); |
| 2532 | } |
| 2533 | |
| 2534 | } |
| 2535 | |
| 2536 | /* no solution found */ |
| 2537 | if(winner.result.cSize == (size_t)-1) { |
| 2538 | ret = 1; |
| 2539 | DISPLAY("No feasible solution found\n"); |
| 2540 | goto _cleanUp; |
| 2541 | } |
| 2542 | |
| 2543 | /* end summary */ |
| 2544 | _displayCleanUp: |
| 2545 | if (g_displayLevel >= 0) { |
| 2546 | BMK_displayOneResult(stdout, winner, buf.srcSize); |
| 2547 | } |
| 2548 | BMK_paramValues_into_commandLine(stdout, winner.params); |
| 2549 | DISPLAYLEVEL(1, "grillParams size - optimizer completed \n"); |
| 2550 | } |
| 2551 | |
| 2552 | _cleanUp: |
| 2553 | freeContexts(ctx); |
| 2554 | freeBuffers(buf); |
| 2555 | freeMemoTableArray(allMT); |
| 2556 | return ret; |
| 2557 | } |
| 2558 | |
| 2559 | /*-************************************ |
| 2560 | * CLI parsing functions |
| 2561 | **************************************/ |
| 2562 | |
| 2563 | /** longCommandWArg() : |
| 2564 | * check if *stringPtr is the same as longCommand. |
| 2565 | * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. |
| 2566 | * @return 0 and doesn't modify *stringPtr otherwise. |
| 2567 | * from zstdcli.c |
| 2568 | */ |
| 2569 | static int longCommandWArg(const char** stringPtr, const char* longCommand) |
| 2570 | { |
| 2571 | size_t const comSize = strlen(longCommand); |
| 2572 | int const result = !strncmp(*stringPtr, longCommand, comSize); |
| 2573 | if (result) *stringPtr += comSize; |
| 2574 | return result; |
| 2575 | } |
| 2576 | |
| 2577 | static void errorOut(const char* msg) |
| 2578 | { |
| 2579 | DISPLAY("%s \n", msg); exit(1); |
| 2580 | } |
| 2581 | |
| 2582 | /*! readU32FromChar() : |
| 2583 | * @return : unsigned integer value read from input in `char` format. |
| 2584 | * allows and interprets K, KB, KiB, M, MB and MiB suffix. |
| 2585 | * Will also modify `*stringPtr`, advancing it to position where it stopped reading. |
| 2586 | * Note : function will exit() program if digit sequence overflows */ |
| 2587 | static unsigned readU32FromChar(const char** stringPtr) |
| 2588 | { |
| 2589 | const char errorMsg[] = "error: numeric value too large"; |
| 2590 | unsigned sign = 1; |
| 2591 | unsigned result = 0; |
| 2592 | if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; } |
| 2593 | while ((**stringPtr >='0') && (**stringPtr <='9')) { |
| 2594 | unsigned const max = (((unsigned)(-1)) / 10) - 1; |
| 2595 | if (result > max) errorOut(errorMsg); |
| 2596 | result *= 10; |
| 2597 | assert(**stringPtr >= '0'); |
| 2598 | result += (unsigned)(**stringPtr - '0'); |
| 2599 | (*stringPtr)++ ; |
| 2600 | } |
| 2601 | if ((**stringPtr=='K') || (**stringPtr=='M')) { |
| 2602 | unsigned const maxK = ((unsigned)(-1)) >> 10; |
| 2603 | if (result > maxK) errorOut(errorMsg); |
| 2604 | result <<= 10; |
| 2605 | if (**stringPtr=='M') { |
| 2606 | if (result > maxK) errorOut(errorMsg); |
| 2607 | result <<= 10; |
| 2608 | } |
| 2609 | (*stringPtr)++; /* skip `K` or `M` */ |
| 2610 | if (**stringPtr=='i') (*stringPtr)++; |
| 2611 | if (**stringPtr=='B') (*stringPtr)++; |
| 2612 | } |
| 2613 | return result * sign; |
| 2614 | } |
| 2615 | |
| 2616 | static double readDoubleFromChar(const char** stringPtr) |
| 2617 | { |
| 2618 | double result = 0, divide = 10; |
| 2619 | while ((**stringPtr >='0') && (**stringPtr <='9')) { |
| 2620 | result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; |
| 2621 | } |
| 2622 | if(**stringPtr!='.') { |
| 2623 | return result; |
| 2624 | } |
| 2625 | (*stringPtr)++; |
| 2626 | while ((**stringPtr >='0') && (**stringPtr <='9')) { |
| 2627 | result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ; |
| 2628 | } |
| 2629 | return result; |
| 2630 | } |
| 2631 | |
| 2632 | static int usage(const char* exename) |
| 2633 | { |
| 2634 | DISPLAY( "Usage :\n"); |
| 2635 | DISPLAY( " %s [arg] file\n", exename); |
| 2636 | DISPLAY( "Arguments :\n"); |
| 2637 | DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n"); |
| 2638 | DISPLAY( " -H/-h : Help (this text + advanced options)\n"); |
| 2639 | return 0; |
| 2640 | } |
| 2641 | |
| 2642 | static int usage_advanced(void) |
| 2643 | { |
| 2644 | DISPLAY( "\nAdvanced options :\n"); |
| 2645 | DISPLAY( " -T# : set level 1 speed objective \n"); |
| 2646 | DISPLAY( " -B# : cut input into blocks of size # (default : single block) \n"); |
| 2647 | DISPLAY( " --optimize= : same as -O with more verbose syntax (see README.md)\n"); |
| 2648 | DISPLAY( " -S : Single run \n"); |
| 2649 | DISPLAY( " --zstd : Single run, parameter selection same as zstdcli \n"); |
| 2650 | DISPLAY( " -P# : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100); |
| 2651 | DISPLAY( " -t# : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n", |
| 2652 | (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600); |
| 2653 | DISPLAY( " -v : Prints Benchmarking output\n"); |
| 2654 | DISPLAY( " -D : Next argument dictionary file\n"); |
| 2655 | DISPLAY( " -s : Separate Files\n"); |
| 2656 | return 0; |
| 2657 | } |
| 2658 | |
| 2659 | static int badusage(const char* exename) |
| 2660 | { |
| 2661 | DISPLAY("Wrong parameters\n"); |
| 2662 | usage(exename); |
| 2663 | return 1; |
| 2664 | } |
| 2665 | |
| 2666 | #define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \ |
| 2667 | if ( longCommandWArg(&argument, stringLong) \ |
| 2668 | || longCommandWArg(&argument, stringShort) ) { \ |
| 2669 | variable = readU32FromChar(&argument); \ |
| 2670 | if (argument[0]==',') { \ |
| 2671 | argument++; continue; \ |
| 2672 | } else break; \ |
| 2673 | } } |
| 2674 | |
| 2675 | /* 1 if successful parse, 0 otherwise */ |
| 2676 | static int parse_params(const char** argptr, paramValues_t* pv) { |
| 2677 | int matched = 0; |
| 2678 | const char* argOrig = *argptr; |
| 2679 | varInds_t v; |
| 2680 | for(v = 0; v < NUM_PARAMS; v++) { |
| 2681 | if ( longCommandWArg(argptr,g_shortParamNames[v]) |
| 2682 | || longCommandWArg(argptr, g_paramNames[v]) ) { |
| 2683 | if(**argptr == '=') { |
| 2684 | (*argptr)++; |
| 2685 | pv->vals[v] = readU32FromChar(argptr); |
| 2686 | matched = 1; |
| 2687 | break; |
| 2688 | } |
| 2689 | } |
| 2690 | /* reset and try again */ |
| 2691 | *argptr = argOrig; |
| 2692 | } |
| 2693 | return matched; |
| 2694 | } |
| 2695 | |
| 2696 | /*-************************************ |
| 2697 | * Main |
| 2698 | **************************************/ |
| 2699 | |
| 2700 | int main(int argc, const char** argv) |
| 2701 | { |
| 2702 | int i, |
| 2703 | filenamesStart=0, |
| 2704 | result; |
| 2705 | const char* exename=argv[0]; |
| 2706 | const char* input_filename = NULL; |
| 2707 | const char* dictFileName = NULL; |
| 2708 | U32 main_pause = 0; |
| 2709 | int cLevelOpt = 0, cLevelRun = 0; |
| 2710 | int separateFiles = 0; |
| 2711 | double compressibility = COMPRESSIBILITY_DEFAULT; |
| 2712 | U32 memoTableLog = PARAM_UNSET; |
| 2713 | constraint_t target = { 0, 0, (U32)-1 }; |
| 2714 | |
| 2715 | paramValues_t paramTarget = emptyParams(); |
| 2716 | g_params = emptyParams(); |
| 2717 | |
| 2718 | assert(argc>=1); /* for exename */ |
| 2719 | |
| 2720 | for(i=1; i<argc; i++) { |
| 2721 | const char* argument = argv[i]; |
| 2722 | DEBUGOUTPUT("%d: %s\n", i, argument); |
| 2723 | assert(argument != NULL); |
| 2724 | |
| 2725 | if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; } |
| 2726 | |
| 2727 | if (longCommandWArg(&argument, "--optimize=")) { |
| 2728 | g_optimizer = 1; |
| 2729 | for ( ; ;) { |
| 2730 | if(parse_params(&argument, ¶mTarget)) { if(argument[0] == ',') { argument++; continue; } else break; } |
| 2731 | PARSE_SUB_ARGS("compressionSpeed=" , "cSpeed=", target.cSpeed); |
| 2732 | PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed); |
| 2733 | PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem); |
| 2734 | PARSE_SUB_ARGS("strict=", "stc=", g_strictness); |
| 2735 | PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries); |
| 2736 | PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog); |
| 2737 | if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; } |
| 2738 | if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; } |
| 2739 | |
| 2740 | DISPLAY("invalid optimization parameter \n"); |
| 2741 | return 1; |
| 2742 | } |
| 2743 | |
| 2744 | if (argument[0] != 0) { |
| 2745 | DISPLAY("invalid --optimize= format\n"); |
| 2746 | return 1; /* check the end of string */ |
| 2747 | } |
| 2748 | continue; |
| 2749 | } else if (longCommandWArg(&argument, "--zstd=")) { |
| 2750 | /* Decode command (note : aggregated commands are allowed) */ |
| 2751 | g_singleRun = 1; |
| 2752 | for ( ; ;) { |
| 2753 | if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; } |
| 2754 | if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; } |
| 2755 | |
| 2756 | DISPLAY("invalid compression parameter \n"); |
| 2757 | return 1; |
| 2758 | } |
| 2759 | |
| 2760 | if (argument[0] != 0) { |
| 2761 | DISPLAY("invalid --zstd= format\n"); |
| 2762 | return 1; /* check the end of string */ |
| 2763 | } |
| 2764 | continue; |
| 2765 | /* if not return, success */ |
| 2766 | |
| 2767 | } else if (longCommandWArg(&argument, "--display=")) { |
| 2768 | /* Decode command (note : aggregated commands are allowed) */ |
| 2769 | memset(g_silenceParams, 1, sizeof(g_silenceParams)); |
| 2770 | for ( ; ;) { |
| 2771 | int found = 0; |
| 2772 | varInds_t v; |
| 2773 | for(v = 0; v < NUM_PARAMS; v++) { |
| 2774 | if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) { |
| 2775 | g_silenceParams[v] = 0; |
| 2776 | found = 1; |
| 2777 | } |
| 2778 | } |
| 2779 | if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) { |
| 2780 | for(v = 0; v <= strt_ind; v++) { |
| 2781 | g_silenceParams[v] = 0; |
| 2782 | } |
| 2783 | found = 1; |
| 2784 | } |
| 2785 | |
| 2786 | |
| 2787 | if(found) { |
| 2788 | if(argument[0]==',') { |
| 2789 | continue; |
| 2790 | } else { |
| 2791 | break; |
| 2792 | } |
| 2793 | } |
| 2794 | DISPLAY("invalid parameter name parameter \n"); |
| 2795 | return 1; |
| 2796 | } |
| 2797 | |
| 2798 | if (argument[0] != 0) { |
| 2799 | DISPLAY("invalid --display format\n"); |
| 2800 | return 1; /* check the end of string */ |
| 2801 | } |
| 2802 | continue; |
| 2803 | } else if (argument[0]=='-') { |
| 2804 | argument++; |
| 2805 | |
| 2806 | while (argument[0]!=0) { |
| 2807 | |
| 2808 | switch(argument[0]) |
| 2809 | { |
| 2810 | /* Display help on usage */ |
| 2811 | case 'h' : |
| 2812 | case 'H': usage(exename); usage_advanced(); return 0; |
| 2813 | |
| 2814 | /* Pause at the end (hidden option) */ |
| 2815 | case 'p': main_pause = 1; argument++; break; |
| 2816 | |
| 2817 | /* Sample compressibility (when no file provided) */ |
| 2818 | case 'P': |
| 2819 | argument++; |
| 2820 | { U32 const proba32 = readU32FromChar(&argument); |
| 2821 | compressibility = (double)proba32 / 100.; |
| 2822 | } |
| 2823 | break; |
| 2824 | |
| 2825 | /* Run Single conf */ |
| 2826 | case 'S': |
| 2827 | g_singleRun = 1; |
| 2828 | argument++; |
| 2829 | for ( ; ; ) { |
| 2830 | switch(*argument) |
| 2831 | { |
| 2832 | case 'w': |
| 2833 | argument++; |
| 2834 | g_params.vals[wlog_ind] = readU32FromChar(&argument); |
| 2835 | continue; |
| 2836 | case 'c': |
| 2837 | argument++; |
| 2838 | g_params.vals[clog_ind] = readU32FromChar(&argument); |
| 2839 | continue; |
| 2840 | case 'h': |
| 2841 | argument++; |
| 2842 | g_params.vals[hlog_ind] = readU32FromChar(&argument); |
| 2843 | continue; |
| 2844 | case 's': |
| 2845 | argument++; |
| 2846 | g_params.vals[slog_ind] = readU32FromChar(&argument); |
| 2847 | continue; |
| 2848 | case 'l': /* search length */ |
| 2849 | argument++; |
| 2850 | g_params.vals[mml_ind] = readU32FromChar(&argument); |
| 2851 | continue; |
| 2852 | case 't': /* target length */ |
| 2853 | argument++; |
| 2854 | g_params.vals[tlen_ind] = readU32FromChar(&argument); |
| 2855 | continue; |
| 2856 | case 'S': /* strategy */ |
| 2857 | argument++; |
| 2858 | g_params.vals[strt_ind] = readU32FromChar(&argument); |
| 2859 | continue; |
| 2860 | case 'f': /* forceAttachDict */ |
| 2861 | argument++; |
| 2862 | g_params.vals[fadt_ind] = readU32FromChar(&argument); |
| 2863 | continue; |
| 2864 | case 'L': |
| 2865 | { argument++; |
| 2866 | cLevelRun = (int)readU32FromChar(&argument); |
| 2867 | g_params = emptyParams(); |
| 2868 | continue; |
| 2869 | } |
| 2870 | default : ; |
| 2871 | } |
| 2872 | break; |
| 2873 | } |
| 2874 | |
| 2875 | break; |
| 2876 | |
| 2877 | /* target level1 speed objective, in MB/s */ |
| 2878 | case 'T': |
| 2879 | argument++; |
| 2880 | g_target = readU32FromChar(&argument); |
| 2881 | break; |
| 2882 | |
| 2883 | /* cut input into blocks */ |
| 2884 | case 'B': |
| 2885 | argument++; |
| 2886 | g_blockSize = readU32FromChar(&argument); |
| 2887 | DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10)); |
| 2888 | break; |
| 2889 | |
| 2890 | /* caps runtime (in seconds) */ |
| 2891 | case 't': |
| 2892 | argument++; |
| 2893 | g_timeLimit_s = readU32FromChar(&argument); |
| 2894 | break; |
| 2895 | |
| 2896 | case 's': |
| 2897 | argument++; |
| 2898 | separateFiles = 1; |
| 2899 | break; |
| 2900 | |
| 2901 | case 'q': |
| 2902 | while (argument[0] == 'q') { argument++; g_displayLevel--; } |
| 2903 | break; |
| 2904 | |
| 2905 | case 'v': |
| 2906 | while (argument[0] == 'v') { argument++; g_displayLevel++; } |
| 2907 | break; |
| 2908 | |
| 2909 | /* load dictionary file (only applicable for optimizer rn) */ |
| 2910 | case 'D': |
| 2911 | if(i == argc - 1) { /* last argument, return error. */ |
| 2912 | DISPLAY("Dictionary file expected but not given : %d\n", i); |
| 2913 | return 1; |
| 2914 | } else { |
| 2915 | i++; |
| 2916 | dictFileName = argv[i]; |
| 2917 | argument += strlen(argument); |
| 2918 | } |
| 2919 | break; |
| 2920 | |
| 2921 | /* Unknown command */ |
| 2922 | default : return badusage(exename); |
| 2923 | } |
| 2924 | } |
| 2925 | continue; |
| 2926 | } /* if (argument[0]=='-') */ |
| 2927 | |
| 2928 | /* first provided filename is input */ |
| 2929 | if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } |
| 2930 | } |
| 2931 | |
| 2932 | /* Welcome message */ |
| 2933 | DISPLAYLEVEL(2, WELCOME_MESSAGE); |
| 2934 | |
| 2935 | if (filenamesStart==0) { |
| 2936 | if (g_optimizer) { |
| 2937 | DISPLAY("Optimizer Expects File\n"); |
| 2938 | return 1; |
| 2939 | } else { |
| 2940 | result = benchSample(compressibility, cLevelRun); |
| 2941 | } |
| 2942 | } else { |
| 2943 | if(separateFiles) { |
| 2944 | for(i = 0; i < argc - filenamesStart; i++) { |
| 2945 | if (g_optimizer) { |
| 2946 | result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); |
| 2947 | if(result) { DISPLAY("Error on File %d", i); return result; } |
| 2948 | } else { |
| 2949 | result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun); |
| 2950 | if(result) { DISPLAY("Error on File %d", i); return result; } |
| 2951 | } |
| 2952 | } |
| 2953 | } else { |
| 2954 | if (g_optimizer) { |
| 2955 | assert(filenamesStart < argc); |
| 2956 | result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog); |
| 2957 | } else { |
| 2958 | result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun); |
| 2959 | } |
| 2960 | } |
| 2961 | } |
| 2962 | |
| 2963 | if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; } |
| 2964 | |
| 2965 | return result; |
| 2966 | } |