648db22b |
1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | |
12 | /* ************************************** |
13 | * Tuning parameters |
14 | ****************************************/ |
15 | #ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ |
16 | # define BMK_TIMETEST_DEFAULT_S 3 |
17 | #endif |
18 | |
19 | |
20 | /* ************************************* |
21 | * Includes |
22 | ***************************************/ |
23 | #include "platform.h" /* Large Files support */ |
24 | #include "util.h" /* UTIL_getFileSize, UTIL_sleep */ |
25 | #include <stdlib.h> /* malloc, free */ |
26 | #include <string.h> /* memset, strerror */ |
27 | #include <stdio.h> /* fprintf, fopen */ |
28 | #include <errno.h> |
29 | #include <assert.h> /* assert */ |
30 | |
31 | #include "timefn.h" /* UTIL_time_t */ |
32 | #include "benchfn.h" |
33 | #include "../lib/common/mem.h" |
34 | #ifndef ZSTD_STATIC_LINKING_ONLY |
35 | #define ZSTD_STATIC_LINKING_ONLY |
36 | #endif |
37 | #include "../lib/zstd.h" |
38 | #include "datagen.h" /* RDG_genBuffer */ |
39 | #ifndef XXH_INLINE_ALL |
40 | #define XXH_INLINE_ALL |
41 | #endif |
42 | #include "../lib/common/xxhash.h" |
43 | #include "benchzstd.h" |
44 | #include "../lib/zstd_errors.h" |
45 | |
46 | |
47 | /* ************************************* |
48 | * Constants |
49 | ***************************************/ |
50 | #ifndef ZSTD_GIT_COMMIT |
51 | # define ZSTD_GIT_COMMIT_STRING "" |
52 | #else |
53 | # define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) |
54 | #endif |
55 | |
56 | #define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */ |
57 | #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ |
58 | #define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */ |
59 | #define COOLPERIOD_SEC 10 |
60 | |
61 | #define KB *(1 <<10) |
62 | #define MB *(1 <<20) |
63 | #define GB *(1U<<30) |
64 | |
65 | #define BMK_RUNTEST_DEFAULT_MS 1000 |
66 | |
67 | static const size_t maxMemory = (sizeof(size_t)==4) ? |
68 | /* 32-bit */ (2 GB - 64 MB) : |
69 | /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31)); |
70 | |
71 | |
72 | /* ************************************* |
73 | * console display |
74 | ***************************************/ |
75 | #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush(NULL); } |
76 | #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } |
77 | /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ |
78 | #define OUTPUT(...) { fprintf(stdout, __VA_ARGS__); fflush(NULL); } |
79 | #define OUTPUTLEVEL(l, ...) if (displayLevel>=l) { OUTPUT(__VA_ARGS__); } |
80 | |
81 | |
82 | /* ************************************* |
83 | * Exceptions |
84 | ***************************************/ |
85 | #ifndef DEBUG |
86 | # define DEBUG 0 |
87 | #endif |
88 | #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } |
89 | |
90 | #define RETURN_ERROR_INT(errorNum, ...) { \ |
91 | DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ |
92 | DISPLAYLEVEL(1, "Error %i : ", errorNum); \ |
93 | DISPLAYLEVEL(1, __VA_ARGS__); \ |
94 | DISPLAYLEVEL(1, " \n"); \ |
95 | return errorNum; \ |
96 | } |
97 | |
98 | #define CHECK_Z(zf) { \ |
99 | size_t const zerr = zf; \ |
100 | if (ZSTD_isError(zerr)) { \ |
101 | DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ |
102 | DISPLAY("Error : "); \ |
103 | DISPLAY("%s failed : %s", \ |
104 | #zf, ZSTD_getErrorName(zerr)); \ |
105 | DISPLAY(" \n"); \ |
106 | exit(1); \ |
107 | } \ |
108 | } |
109 | |
110 | #define RETURN_ERROR(errorNum, retType, ...) { \ |
111 | retType r; \ |
112 | memset(&r, 0, sizeof(retType)); \ |
113 | DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ |
114 | DISPLAYLEVEL(1, "Error %i : ", errorNum); \ |
115 | DISPLAYLEVEL(1, __VA_ARGS__); \ |
116 | DISPLAYLEVEL(1, " \n"); \ |
117 | r.tag = errorNum; \ |
118 | return r; \ |
119 | } |
120 | |
121 | |
122 | /* ************************************* |
123 | * Benchmark Parameters |
124 | ***************************************/ |
125 | |
126 | BMK_advancedParams_t BMK_initAdvancedParams(void) { |
127 | BMK_advancedParams_t const res = { |
128 | BMK_both, /* mode */ |
129 | BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ |
130 | 0, /* blockSize */ |
131 | 0, /* nbWorkers */ |
132 | 0, /* realTime */ |
133 | 0, /* additionalParam */ |
134 | 0, /* ldmFlag */ |
135 | 0, /* ldmMinMatch */ |
136 | 0, /* ldmHashLog */ |
137 | 0, /* ldmBuckSizeLog */ |
138 | 0, /* ldmHashRateLog */ |
139 | ZSTD_ps_auto, /* literalCompressionMode */ |
140 | 0 /* useRowMatchFinder */ |
141 | }; |
142 | return res; |
143 | } |
144 | |
145 | |
146 | /* ******************************************************** |
147 | * Bench functions |
148 | **********************************************************/ |
149 | typedef struct { |
150 | const void* srcPtr; |
151 | size_t srcSize; |
152 | void* cPtr; |
153 | size_t cRoom; |
154 | size_t cSize; |
155 | void* resPtr; |
156 | size_t resSize; |
157 | } blockParam_t; |
158 | |
159 | #undef MIN |
160 | #undef MAX |
161 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) |
162 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) |
163 | |
164 | static void |
165 | BMK_initCCtx(ZSTD_CCtx* ctx, |
166 | const void* dictBuffer, size_t dictBufferSize, |
167 | int cLevel, |
168 | const ZSTD_compressionParameters* comprParams, |
169 | const BMK_advancedParams_t* adv) |
170 | { |
171 | ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters); |
172 | if (adv->nbWorkers==1) { |
173 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0)); |
174 | } else { |
175 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers)); |
176 | } |
177 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel)); |
178 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder)); |
179 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag)); |
180 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch)); |
181 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog)); |
182 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog)); |
183 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog)); |
184 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog)); |
185 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog)); |
186 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog)); |
187 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog)); |
188 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch)); |
189 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength)); |
190 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode)); |
191 | CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, (int)comprParams->strategy)); |
192 | CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize)); |
193 | } |
194 | |
195 | static void BMK_initDCtx(ZSTD_DCtx* dctx, |
196 | const void* dictBuffer, size_t dictBufferSize) { |
197 | CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); |
198 | CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize)); |
199 | } |
200 | |
201 | |
202 | typedef struct { |
203 | ZSTD_CCtx* cctx; |
204 | const void* dictBuffer; |
205 | size_t dictBufferSize; |
206 | int cLevel; |
207 | const ZSTD_compressionParameters* comprParams; |
208 | const BMK_advancedParams_t* adv; |
209 | } BMK_initCCtxArgs; |
210 | |
211 | static size_t local_initCCtx(void* payload) { |
212 | BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; |
213 | BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); |
214 | return 0; |
215 | } |
216 | |
217 | typedef struct { |
218 | ZSTD_DCtx* dctx; |
219 | const void* dictBuffer; |
220 | size_t dictBufferSize; |
221 | } BMK_initDCtxArgs; |
222 | |
223 | static size_t local_initDCtx(void* payload) { |
224 | BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload; |
225 | BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize); |
226 | return 0; |
227 | } |
228 | |
229 | |
230 | /* `addArgs` is the context */ |
231 | static size_t local_defaultCompress( |
232 | const void* srcBuffer, size_t srcSize, |
233 | void* dstBuffer, size_t dstSize, |
234 | void* addArgs) |
235 | { |
236 | ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs; |
237 | return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); |
238 | } |
239 | |
240 | /* `addArgs` is the context */ |
241 | static size_t local_defaultDecompress( |
242 | const void* srcBuffer, size_t srcSize, |
243 | void* dstBuffer, size_t dstCapacity, |
244 | void* addArgs) |
245 | { |
246 | size_t moreToFlush = 1; |
247 | ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs; |
248 | ZSTD_inBuffer in; |
249 | ZSTD_outBuffer out; |
250 | in.src = srcBuffer; in.size = srcSize; in.pos = 0; |
251 | out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0; |
252 | while (moreToFlush) { |
253 | if(out.pos == out.size) { |
254 | return (size_t)-ZSTD_error_dstSize_tooSmall; |
255 | } |
256 | moreToFlush = ZSTD_decompressStream(dctx, &out, &in); |
257 | if (ZSTD_isError(moreToFlush)) { |
258 | return moreToFlush; |
259 | } |
260 | } |
261 | return out.pos; |
262 | |
263 | } |
264 | |
265 | |
266 | /* ================================================================= */ |
267 | /* Benchmark Zstandard, mem-to-mem scenarios */ |
268 | /* ================================================================= */ |
269 | |
270 | int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome) |
271 | { |
272 | return outcome.tag == 0; |
273 | } |
274 | |
275 | BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome) |
276 | { |
277 | assert(outcome.tag == 0); |
278 | return outcome.internal_never_use_directly; |
279 | } |
280 | |
281 | static BMK_benchOutcome_t BMK_benchOutcome_error(void) |
282 | { |
283 | BMK_benchOutcome_t b; |
284 | memset(&b, 0, sizeof(b)); |
285 | b.tag = 1; |
286 | return b; |
287 | } |
288 | |
289 | static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result) |
290 | { |
291 | BMK_benchOutcome_t b; |
292 | b.tag = 0; |
293 | b.internal_never_use_directly = result; |
294 | return b; |
295 | } |
296 | |
297 | |
298 | /* benchMem with no allocation */ |
299 | static BMK_benchOutcome_t |
300 | BMK_benchMemAdvancedNoAlloc( |
301 | const void** srcPtrs, size_t* srcSizes, |
302 | void** cPtrs, size_t* cCapacities, size_t* cSizes, |
303 | void** resPtrs, size_t* resSizes, |
304 | void** resultBufferPtr, void* compressedBuffer, |
305 | size_t maxCompressedSize, |
306 | BMK_timedFnState_t* timeStateCompress, |
307 | BMK_timedFnState_t* timeStateDecompress, |
308 | |
309 | const void* srcBuffer, size_t srcSize, |
310 | const size_t* fileSizes, unsigned nbFiles, |
311 | const int cLevel, |
312 | const ZSTD_compressionParameters* comprParams, |
313 | const void* dictBuffer, size_t dictBufferSize, |
314 | ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, |
315 | int displayLevel, const char* displayName, |
316 | const BMK_advancedParams_t* adv) |
317 | { |
318 | size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ |
319 | BMK_benchResult_t benchResult; |
320 | size_t const loadedCompressedSize = srcSize; |
321 | size_t cSize = 0; |
322 | double ratio = 0.; |
323 | U32 nbBlocks; |
324 | |
325 | assert(cctx != NULL); assert(dctx != NULL); |
326 | |
327 | /* init */ |
328 | memset(&benchResult, 0, sizeof(benchResult)); |
329 | if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */ |
330 | if (adv->mode == BMK_decodeOnly) { |
331 | /* benchmark only decompression : source must be already compressed */ |
332 | const char* srcPtr = (const char*)srcBuffer; |
333 | U64 totalDSize64 = 0; |
334 | U32 fileNb; |
335 | for (fileNb=0; fileNb<nbFiles; fileNb++) { |
336 | U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]); |
337 | if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) { |
338 | RETURN_ERROR(32, BMK_benchOutcome_t, "Decompressed size cannot be determined: cannot benchmark"); |
339 | } |
340 | if (fSize64 == ZSTD_CONTENTSIZE_ERROR) { |
341 | RETURN_ERROR(32, BMK_benchOutcome_t, "Error while trying to assess decompressed size: data may be invalid"); |
342 | } |
343 | totalDSize64 += fSize64; |
344 | srcPtr += fileSizes[fileNb]; |
345 | } |
346 | { size_t const decodedSize = (size_t)totalDSize64; |
347 | assert((U64)decodedSize == totalDSize64); /* check overflow */ |
348 | free(*resultBufferPtr); |
349 | if (totalDSize64 > decodedSize) { /* size_t overflow */ |
350 | RETURN_ERROR(32, BMK_benchOutcome_t, "decompressed size is too large for local system"); |
351 | } |
352 | *resultBufferPtr = malloc(decodedSize); |
353 | if (!(*resultBufferPtr)) { |
354 | RETURN_ERROR(33, BMK_benchOutcome_t, "allocation error: not enough memory"); |
355 | } |
356 | cSize = srcSize; |
357 | srcSize = decodedSize; |
358 | ratio = (double)srcSize / (double)cSize; |
359 | } |
360 | } |
361 | |
362 | /* Init data blocks */ |
363 | { const char* srcPtr = (const char*)srcBuffer; |
364 | char* cPtr = (char*)compressedBuffer; |
365 | char* resPtr = (char*)(*resultBufferPtr); |
366 | U32 fileNb; |
367 | for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) { |
368 | size_t remaining = fileSizes[fileNb]; |
369 | U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); |
370 | U32 const blockEnd = nbBlocks + nbBlocksforThisFile; |
371 | for ( ; nbBlocks<blockEnd; nbBlocks++) { |
372 | size_t const thisBlockSize = MIN(remaining, blockSize); |
373 | srcPtrs[nbBlocks] = srcPtr; |
374 | srcSizes[nbBlocks] = thisBlockSize; |
375 | cPtrs[nbBlocks] = cPtr; |
376 | cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); |
377 | resPtrs[nbBlocks] = resPtr; |
378 | resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; |
379 | srcPtr += thisBlockSize; |
380 | cPtr += cCapacities[nbBlocks]; |
381 | resPtr += thisBlockSize; |
382 | remaining -= thisBlockSize; |
383 | if (adv->mode == BMK_decodeOnly) { |
384 | cSizes[nbBlocks] = thisBlockSize; |
385 | benchResult.cSize = thisBlockSize; |
386 | } } } } |
387 | |
388 | /* warming up `compressedBuffer` */ |
389 | if (adv->mode == BMK_decodeOnly) { |
390 | memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); |
391 | } else { |
392 | RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); |
393 | } |
394 | |
395 | if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) { |
396 | OUTPUTLEVEL(2, "Warning : time measurements may be incorrect in multithreading mode... \n") |
397 | } |
398 | |
399 | /* Bench */ |
400 | { U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); |
401 | # define NB_MARKS 4 |
402 | const char* marks[NB_MARKS] = { " |", " /", " =", " \\" }; |
403 | U32 markNb = 0; |
404 | int compressionCompleted = (adv->mode == BMK_decodeOnly); |
405 | int decompressionCompleted = (adv->mode == BMK_compressOnly); |
406 | BMK_benchParams_t cbp, dbp; |
407 | BMK_initCCtxArgs cctxprep; |
408 | BMK_initDCtxArgs dctxprep; |
409 | |
410 | cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */ |
411 | cbp.benchPayload = cctx; |
412 | cbp.initFn = local_initCCtx; /* BMK_initCCtx */ |
413 | cbp.initPayload = &cctxprep; |
414 | cbp.errorFn = ZSTD_isError; |
415 | cbp.blockCount = nbBlocks; |
416 | cbp.srcBuffers = srcPtrs; |
417 | cbp.srcSizes = srcSizes; |
418 | cbp.dstBuffers = cPtrs; |
419 | cbp.dstCapacities = cCapacities; |
420 | cbp.blockResults = cSizes; |
421 | |
422 | cctxprep.cctx = cctx; |
423 | cctxprep.dictBuffer = dictBuffer; |
424 | cctxprep.dictBufferSize = dictBufferSize; |
425 | cctxprep.cLevel = cLevel; |
426 | cctxprep.comprParams = comprParams; |
427 | cctxprep.adv = adv; |
428 | |
429 | dbp.benchFn = local_defaultDecompress; |
430 | dbp.benchPayload = dctx; |
431 | dbp.initFn = local_initDCtx; |
432 | dbp.initPayload = &dctxprep; |
433 | dbp.errorFn = ZSTD_isError; |
434 | dbp.blockCount = nbBlocks; |
435 | dbp.srcBuffers = (const void* const *) cPtrs; |
436 | dbp.srcSizes = cSizes; |
437 | dbp.dstBuffers = resPtrs; |
438 | dbp.dstCapacities = resSizes; |
439 | dbp.blockResults = NULL; |
440 | |
441 | dctxprep.dctx = dctx; |
442 | dctxprep.dictBuffer = dictBuffer; |
443 | dctxprep.dictBufferSize = dictBufferSize; |
444 | |
445 | OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */ |
446 | assert(srcSize < UINT_MAX); |
447 | OUTPUTLEVEL(2, "%2s-%-17.17s :%10u -> \r", marks[markNb], displayName, (unsigned)srcSize); |
448 | |
449 | while (!(compressionCompleted && decompressionCompleted)) { |
450 | if (!compressionCompleted) { |
451 | BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp); |
452 | |
453 | if (!BMK_isSuccessful_runOutcome(cOutcome)) { |
454 | RETURN_ERROR(30, BMK_benchOutcome_t, "compression error"); |
455 | } |
456 | |
457 | { BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome); |
458 | cSize = cResult.sumOfReturn; |
459 | ratio = (double)srcSize / (double)cSize; |
460 | { BMK_benchResult_t newResult; |
461 | newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun); |
462 | benchResult.cSize = cSize; |
463 | if (newResult.cSpeed > benchResult.cSpeed) |
464 | benchResult.cSpeed = newResult.cSpeed; |
465 | } } |
466 | |
467 | { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; |
468 | assert(cSize < UINT_MAX); |
469 | OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r", |
470 | marks[markNb], displayName, |
471 | (unsigned)srcSize, (unsigned)cSize, |
472 | ratioAccuracy, ratio, |
473 | benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT); |
474 | } |
475 | compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress); |
476 | } |
477 | |
478 | if(!decompressionCompleted) { |
479 | BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); |
480 | |
481 | if(!BMK_isSuccessful_runOutcome(dOutcome)) { |
482 | RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error"); |
483 | } |
484 | |
485 | { BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome); |
486 | U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun); |
487 | if (newDSpeed > benchResult.dSpeed) |
488 | benchResult.dSpeed = newDSpeed; |
489 | } |
490 | |
491 | { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; |
492 | OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r", |
493 | marks[markNb], displayName, |
494 | (unsigned)srcSize, (unsigned)cSize, |
495 | ratioAccuracy, ratio, |
496 | benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT, |
497 | (double)benchResult.dSpeed / MB_UNIT); |
498 | } |
499 | decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); |
500 | } |
501 | markNb = (markNb+1) % NB_MARKS; |
502 | } /* while (!(compressionCompleted && decompressionCompleted)) */ |
503 | |
504 | /* CRC Checking */ |
505 | { const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); |
506 | U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); |
507 | if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { |
508 | size_t u; |
509 | DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", |
510 | displayName, (unsigned)crcOrig, (unsigned)crcCheck); |
511 | for (u=0; u<srcSize; u++) { |
512 | if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) { |
513 | unsigned segNb, bNb, pos; |
514 | size_t bacc = 0; |
515 | DISPLAY("Decoding error at pos %u ", (unsigned)u); |
516 | for (segNb = 0; segNb < nbBlocks; segNb++) { |
517 | if (bacc + srcSizes[segNb] > u) break; |
518 | bacc += srcSizes[segNb]; |
519 | } |
520 | pos = (U32)(u - bacc); |
521 | bNb = pos / (128 KB); |
522 | DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); |
523 | { size_t const lowest = (u>5) ? 5 : u; |
524 | size_t n; |
525 | DISPLAY("origin: "); |
526 | for (n=lowest; n>0; n--) |
527 | DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]); |
528 | DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); |
529 | for (n=1; n<3; n++) |
530 | DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); |
531 | DISPLAY(" \n"); |
532 | DISPLAY("decode: "); |
533 | for (n=lowest; n>0; n--) |
534 | DISPLAY("%02X ", resultBuffer[u-n]); |
535 | DISPLAY(" :%02X: ", resultBuffer[u]); |
536 | for (n=1; n<3; n++) |
537 | DISPLAY("%02X ", resultBuffer[u+n]); |
538 | DISPLAY(" \n"); |
539 | } |
540 | break; |
541 | } |
542 | if (u==srcSize-1) { /* should never happen */ |
543 | DISPLAY("no difference detected\n"); |
544 | } |
545 | } /* for (u=0; u<srcSize; u++) */ |
546 | } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */ |
547 | } /* CRC Checking */ |
548 | |
549 | if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ |
550 | double const cSpeed = (double)benchResult.cSpeed / MB_UNIT; |
551 | double const dSpeed = (double)benchResult.dSpeed / MB_UNIT; |
552 | if (adv->additionalParam) { |
553 | OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam); |
554 | } else { |
555 | OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); |
556 | } |
557 | } |
558 | |
559 | OUTPUTLEVEL(2, "%2i#\n", cLevel); |
560 | } /* Bench */ |
561 | |
562 | benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx); |
563 | return BMK_benchOutcome_setValidResult(benchResult); |
564 | } |
565 | |
566 | BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, |
567 | void* dstBuffer, size_t dstCapacity, |
568 | const size_t* fileSizes, unsigned nbFiles, |
569 | int cLevel, const ZSTD_compressionParameters* comprParams, |
570 | const void* dictBuffer, size_t dictBufferSize, |
571 | int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) |
572 | |
573 | { |
574 | int const dstParamsError = !dstBuffer ^ !dstCapacity; /* must be both NULL or none */ |
575 | |
576 | size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; |
577 | U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; |
578 | |
579 | /* these are the blockTable parameters, just split up */ |
580 | const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*)); |
581 | size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
582 | |
583 | |
584 | void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); |
585 | size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
586 | size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
587 | |
588 | void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); |
589 | size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); |
590 | |
591 | BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); |
592 | BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); |
593 | |
594 | ZSTD_CCtx* const cctx = ZSTD_createCCtx(); |
595 | ZSTD_DCtx* const dctx = ZSTD_createDCtx(); |
596 | |
597 | const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); |
598 | |
599 | void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize); |
600 | void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer; |
601 | |
602 | BMK_benchOutcome_t outcome = BMK_benchOutcome_error(); /* error by default */ |
603 | |
604 | void* resultBuffer = srcSize ? malloc(srcSize) : NULL; |
605 | |
606 | int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || |
607 | !cSizes || !cCapacities || !resPtrs || !resSizes || |
608 | !timeStateCompress || !timeStateDecompress || |
609 | !cctx || !dctx || |
610 | !compressedBuffer || !resultBuffer; |
611 | |
612 | |
613 | if (!allocationincomplete && !dstParamsError) { |
614 | outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, |
615 | cPtrs, cCapacities, cSizes, |
616 | resPtrs, resSizes, |
617 | &resultBuffer, |
618 | compressedBuffer, maxCompressedSize, |
619 | timeStateCompress, timeStateDecompress, |
620 | srcBuffer, srcSize, |
621 | fileSizes, nbFiles, |
622 | cLevel, comprParams, |
623 | dictBuffer, dictBufferSize, |
624 | cctx, dctx, |
625 | displayLevel, displayName, adv); |
626 | } |
627 | |
628 | /* clean up */ |
629 | BMK_freeTimedFnState(timeStateCompress); |
630 | BMK_freeTimedFnState(timeStateDecompress); |
631 | |
632 | ZSTD_freeCCtx(cctx); |
633 | ZSTD_freeDCtx(dctx); |
634 | |
635 | free(internalDstBuffer); |
636 | free(resultBuffer); |
637 | |
638 | free((void*)srcPtrs); |
639 | free(srcSizes); |
640 | free(cPtrs); |
641 | free(cSizes); |
642 | free(cCapacities); |
643 | free(resPtrs); |
644 | free(resSizes); |
645 | |
646 | if(allocationincomplete) { |
647 | RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory"); |
648 | } |
649 | |
650 | if(dstParamsError) { |
651 | RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent"); |
652 | } |
653 | return outcome; |
654 | } |
655 | |
656 | BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize, |
657 | const size_t* fileSizes, unsigned nbFiles, |
658 | int cLevel, const ZSTD_compressionParameters* comprParams, |
659 | const void* dictBuffer, size_t dictBufferSize, |
660 | int displayLevel, const char* displayName) { |
661 | |
662 | BMK_advancedParams_t const adv = BMK_initAdvancedParams(); |
663 | return BMK_benchMemAdvanced(srcBuffer, srcSize, |
664 | NULL, 0, |
665 | fileSizes, nbFiles, |
666 | cLevel, comprParams, |
667 | dictBuffer, dictBufferSize, |
668 | displayLevel, displayName, &adv); |
669 | } |
670 | |
671 | static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, |
672 | const size_t* fileSizes, unsigned nbFiles, |
673 | int cLevel, const ZSTD_compressionParameters* comprParams, |
674 | const void* dictBuffer, size_t dictBufferSize, |
675 | int displayLevel, const char* displayName, |
676 | BMK_advancedParams_t const * const adv) |
677 | { |
678 | const char* pch = strrchr(displayName, '\\'); /* Windows */ |
679 | if (!pch) pch = strrchr(displayName, '/'); /* Linux */ |
680 | if (pch) displayName = pch+1; |
681 | |
682 | if (adv->realTime) { |
683 | DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); |
684 | SET_REALTIME_PRIORITY; |
685 | } |
686 | |
687 | if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */ |
688 | OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", |
689 | ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, |
690 | (unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10)); |
691 | |
692 | return BMK_benchMemAdvanced(srcBuffer, benchedSize, |
693 | NULL, 0, |
694 | fileSizes, nbFiles, |
695 | cLevel, comprParams, |
696 | dictBuffer, dictBufferSize, |
697 | displayLevel, displayName, adv); |
698 | } |
699 | |
700 | int BMK_syntheticTest(int cLevel, double compressibility, |
701 | const ZSTD_compressionParameters* compressionParams, |
702 | int displayLevel, const BMK_advancedParams_t* adv) |
703 | { |
704 | char name[20] = {0}; |
705 | size_t const benchedSize = 10000000; |
706 | void* srcBuffer; |
707 | BMK_benchOutcome_t res; |
708 | |
709 | if (cLevel > ZSTD_maxCLevel()) { |
710 | DISPLAYLEVEL(1, "Invalid Compression Level"); |
711 | return 15; |
712 | } |
713 | |
714 | /* Memory allocation */ |
715 | srcBuffer = malloc(benchedSize); |
716 | if (!srcBuffer) { |
717 | DISPLAYLEVEL(1, "allocation error : not enough memory"); |
718 | return 16; |
719 | } |
720 | |
721 | /* Fill input buffer */ |
722 | RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); |
723 | |
724 | /* Bench */ |
725 | snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); |
726 | res = BMK_benchCLevel(srcBuffer, benchedSize, |
727 | &benchedSize /* ? */, 1 /* ? */, |
728 | cLevel, compressionParams, |
729 | NULL, 0, /* dictionary */ |
730 | displayLevel, name, adv); |
731 | |
732 | /* clean up */ |
733 | free(srcBuffer); |
734 | |
735 | return !BMK_isSuccessful_benchOutcome(res); |
736 | } |
737 | |
738 | |
739 | |
740 | static size_t BMK_findMaxMem(U64 requiredMem) |
741 | { |
742 | size_t const step = 64 MB; |
743 | BYTE* testmem = NULL; |
744 | |
745 | requiredMem = (((requiredMem >> 26) + 1) << 26); |
746 | requiredMem += step; |
747 | if (requiredMem > maxMemory) requiredMem = maxMemory; |
748 | |
749 | do { |
750 | testmem = (BYTE*)malloc((size_t)requiredMem); |
751 | requiredMem -= step; |
752 | } while (!testmem && requiredMem > 0); |
753 | |
754 | free(testmem); |
755 | return (size_t)(requiredMem); |
756 | } |
757 | |
758 | /*! BMK_loadFiles() : |
759 | * Loads `buffer` with content of files listed within `fileNamesTable`. |
760 | * At most, fills `buffer` entirely. */ |
761 | static int BMK_loadFiles(void* buffer, size_t bufferSize, |
762 | size_t* fileSizes, |
763 | const char* const * fileNamesTable, unsigned nbFiles, |
764 | int displayLevel) |
765 | { |
766 | size_t pos = 0, totalSize = 0; |
767 | unsigned n; |
768 | for (n=0; n<nbFiles; n++) { |
769 | U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); /* last file may be shortened */ |
770 | if (UTIL_isDirectory(fileNamesTable[n])) { |
771 | DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]); |
772 | fileSizes[n] = 0; |
773 | continue; |
774 | } |
775 | if (fileSize == UTIL_FILESIZE_UNKNOWN) { |
776 | DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); |
777 | fileSizes[n] = 0; |
778 | continue; |
779 | } |
780 | { FILE* const f = fopen(fileNamesTable[n], "rb"); |
781 | if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]); |
782 | OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]); |
783 | if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ |
784 | { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); |
785 | if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]); |
786 | pos += readSize; |
787 | } |
788 | fileSizes[n] = (size_t)fileSize; |
789 | totalSize += (size_t)fileSize; |
790 | fclose(f); |
791 | } } |
792 | |
793 | if (totalSize == 0) RETURN_ERROR_INT(12, "no data to bench"); |
794 | return 0; |
795 | } |
796 | |
797 | int BMK_benchFilesAdvanced( |
798 | const char* const * fileNamesTable, unsigned nbFiles, |
799 | const char* dictFileName, int cLevel, |
800 | const ZSTD_compressionParameters* compressionParams, |
801 | int displayLevel, const BMK_advancedParams_t* adv) |
802 | { |
803 | void* srcBuffer = NULL; |
804 | size_t benchedSize; |
805 | void* dictBuffer = NULL; |
806 | size_t dictBufferSize = 0; |
807 | size_t* fileSizes = NULL; |
808 | BMK_benchOutcome_t res; |
809 | U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); |
810 | |
811 | if (!nbFiles) { |
812 | DISPLAYLEVEL(1, "No Files to Benchmark"); |
813 | return 13; |
814 | } |
815 | |
816 | if (cLevel > ZSTD_maxCLevel()) { |
817 | DISPLAYLEVEL(1, "Invalid Compression Level"); |
818 | return 14; |
819 | } |
820 | |
821 | if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) { |
822 | DISPLAYLEVEL(1, "Error loading files"); |
823 | return 15; |
824 | } |
825 | |
826 | fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); |
827 | if (!fileSizes) { |
828 | DISPLAYLEVEL(1, "not enough memory for fileSizes"); |
829 | return 16; |
830 | } |
831 | |
832 | /* Load dictionary */ |
833 | if (dictFileName != NULL) { |
834 | U64 const dictFileSize = UTIL_getFileSize(dictFileName); |
835 | if (dictFileSize == UTIL_FILESIZE_UNKNOWN) { |
836 | DISPLAYLEVEL(1, "error loading %s : %s \n", dictFileName, strerror(errno)); |
837 | free(fileSizes); |
838 | DISPLAYLEVEL(1, "benchmark aborted"); |
839 | return 17; |
840 | } |
841 | if (dictFileSize > 64 MB) { |
842 | free(fileSizes); |
843 | DISPLAYLEVEL(1, "dictionary file %s too large", dictFileName); |
844 | return 18; |
845 | } |
846 | dictBufferSize = (size_t)dictFileSize; |
847 | dictBuffer = malloc(dictBufferSize); |
848 | if (dictBuffer==NULL) { |
849 | free(fileSizes); |
850 | DISPLAYLEVEL(1, "not enough memory for dictionary (%u bytes)", |
851 | (unsigned)dictBufferSize); |
852 | return 19; |
853 | } |
854 | |
855 | { int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, |
856 | fileSizes, &dictFileName /*?*/, |
857 | 1 /*?*/, displayLevel); |
858 | if (errorCode) { |
859 | res = BMK_benchOutcome_error(); |
860 | goto _cleanUp; |
861 | } } |
862 | } |
863 | |
864 | /* Memory allocation & restrictions */ |
865 | benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; |
866 | if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; |
867 | if (benchedSize < totalSizeToLoad) |
868 | DISPLAY("Not enough memory; testing %u MB only...\n", (unsigned)(benchedSize >> 20)); |
869 | |
870 | srcBuffer = benchedSize ? malloc(benchedSize) : NULL; |
871 | if (!srcBuffer) { |
872 | free(dictBuffer); |
873 | free(fileSizes); |
874 | DISPLAYLEVEL(1, "not enough memory for srcBuffer"); |
875 | return 20; |
876 | } |
877 | |
878 | /* Load input buffer */ |
879 | { int const errorCode = BMK_loadFiles(srcBuffer, benchedSize, |
880 | fileSizes, fileNamesTable, nbFiles, |
881 | displayLevel); |
882 | if (errorCode) { |
883 | res = BMK_benchOutcome_error(); |
884 | goto _cleanUp; |
885 | } } |
886 | |
887 | /* Bench */ |
888 | { char mfName[20] = {0}; |
889 | snprintf (mfName, sizeof(mfName), " %u files", nbFiles); |
890 | { const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; |
891 | res = BMK_benchCLevel(srcBuffer, benchedSize, |
892 | fileSizes, nbFiles, |
893 | cLevel, compressionParams, |
894 | dictBuffer, dictBufferSize, |
895 | displayLevel, displayName, |
896 | adv); |
897 | } } |
898 | |
899 | _cleanUp: |
900 | free(srcBuffer); |
901 | free(dictBuffer); |
902 | free(fileSizes); |
903 | return !BMK_isSuccessful_benchOutcome(res); |
904 | } |
905 | |
906 | |
907 | int BMK_benchFiles(const char* const * fileNamesTable, unsigned nbFiles, |
908 | const char* dictFileName, |
909 | int cLevel, const ZSTD_compressionParameters* compressionParams, |
910 | int displayLevel) |
911 | { |
912 | BMK_advancedParams_t const adv = BMK_initAdvancedParams(); |
913 | return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv); |
914 | } |