gpu_neon: revive the old tests
[pcsx_rearmed.git] / deps / libchdr / deps / zstd-1.5.6 / programs / fileio.c
CommitLineData
648db22b 1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11
12/* *************************************
13* Compiler Options
14***************************************/
15#ifdef _MSC_VER /* Visual */
16# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
17# pragma warning(disable : 4204) /* non-constant aggregate initializer */
18#endif
19#if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
20# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
21#endif
22
23/*-*************************************
24* Includes
25***************************************/
26#include "platform.h" /* Large Files support, SET_BINARY_MODE */
27#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
28#include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
29#include <stdlib.h> /* malloc, free */
30#include <string.h> /* strcmp, strlen */
31#include <time.h> /* clock_t, to measure process time */
32#include <fcntl.h> /* O_WRONLY */
33#include <assert.h>
34#include <errno.h> /* errno */
35#include <limits.h> /* INT_MAX */
36#include <signal.h>
37#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
38
39#if defined (_MSC_VER)
40# include <sys/stat.h>
41# include <io.h>
42#endif
43
44#include "fileio.h"
45#include "fileio_asyncio.h"
46#include "fileio_common.h"
47
48FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
49UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
50
51#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
52#include "../lib/zstd.h"
53#include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
54
55#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
56# include <zlib.h>
57# if !defined(z_const)
58# define z_const
59# endif
60#endif
61
62#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
63# include <lzma.h>
64#endif
65
66#define LZ4_MAGICNUMBER 0x184D2204
67#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
68# define LZ4F_ENABLE_OBSOLETE_ENUMS
69# include <lz4frame.h>
70# include <lz4.h>
71#endif
72
73char const* FIO_zlibVersion(void)
74{
75#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
76 return zlibVersion();
77#else
78 return "Unsupported";
79#endif
80}
81
82char const* FIO_lz4Version(void)
83{
84#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
85 /* LZ4_versionString() added in v1.7.3 */
86# if LZ4_VERSION_NUMBER >= 10703
87 return LZ4_versionString();
88# else
89# define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
90# define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION)
91 return ZSTD_LZ4_VERSION_STRING;
92# endif
93#else
94 return "Unsupported";
95#endif
96}
97
98char const* FIO_lzmaVersion(void)
99{
100#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
101 return lzma_version_string();
102#else
103 return "Unsupported";
104#endif
105}
106
107
108/*-*************************************
109* Constants
110***************************************/
111#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
112#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
113
114#define FNSPACE 30
115
116/* Default file permissions 0666 (modulated by umask) */
117/* Temporary restricted file permissions are used when we're going to
118 * chmod/chown at the end of the operation. */
119#if !defined(_WIN32)
120/* These macros aren't defined on windows. */
121#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
122#define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR)
123#else
124#define DEFAULT_FILE_PERMISSIONS (0666)
125#define TEMPORARY_FILE_PERMISSIONS (0600)
126#endif
127
128/*-************************************
129* Signal (Ctrl-C trapping)
130**************************************/
131static const char* g_artefact = NULL;
132static void INThandler(int sig)
133{
134 assert(sig==SIGINT); (void)sig;
135#if !defined(_MSC_VER)
136 signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
137#endif
138 if (g_artefact) {
139 assert(UTIL_isRegularFile(g_artefact));
140 remove(g_artefact);
141 }
142 DISPLAY("\n");
143 exit(2);
144}
145static void addHandler(char const* dstFileName)
146{
147 if (UTIL_isRegularFile(dstFileName)) {
148 g_artefact = dstFileName;
149 signal(SIGINT, INThandler);
150 } else {
151 g_artefact = NULL;
152 }
153}
154/* Idempotent */
155static void clearHandler(void)
156{
157 if (g_artefact) signal(SIGINT, SIG_DFL);
158 g_artefact = NULL;
159}
160
161
162/*-*********************************************************
163* Termination signal trapping (Print debug stack trace)
164***********************************************************/
165#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
166# if (__has_feature(address_sanitizer))
167# define BACKTRACE_ENABLE 0
168# endif /* __has_feature(address_sanitizer) */
169#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
170# define BACKTRACE_ENABLE 0
171#endif
172
173#if !defined(BACKTRACE_ENABLE)
174/* automatic detector : backtrace enabled by default on linux+glibc and osx */
175# if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
176 || (defined(__APPLE__) && defined(__MACH__))
177# define BACKTRACE_ENABLE 1
178# else
179# define BACKTRACE_ENABLE 0
180# endif
181#endif
182
183/* note : after this point, BACKTRACE_ENABLE is necessarily defined */
184
185
186#if BACKTRACE_ENABLE
187
188#include <execinfo.h> /* backtrace, backtrace_symbols */
189
190#define MAX_STACK_FRAMES 50
191
192static void ABRThandler(int sig) {
193 const char* name;
194 void* addrlist[MAX_STACK_FRAMES];
195 char** symbollist;
196 int addrlen, i;
197
198 switch (sig) {
199 case SIGABRT: name = "SIGABRT"; break;
200 case SIGFPE: name = "SIGFPE"; break;
201 case SIGILL: name = "SIGILL"; break;
202 case SIGINT: name = "SIGINT"; break;
203 case SIGSEGV: name = "SIGSEGV"; break;
204 default: name = "UNKNOWN";
205 }
206
207 DISPLAY("Caught %s signal, printing stack:\n", name);
208 /* Retrieve current stack addresses. */
209 addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
210 if (addrlen == 0) {
211 DISPLAY("\n");
212 return;
213 }
214 /* Create readable strings to each frame. */
215 symbollist = backtrace_symbols(addrlist, addrlen);
216 /* Print the stack trace, excluding calls handling the signal. */
217 for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
218 DISPLAY("%s\n", symbollist[i]);
219 }
220 free(symbollist);
221 /* Reset and raise the signal so default handler runs. */
222 signal(sig, SIG_DFL);
223 raise(sig);
224}
225#endif
226
227void FIO_addAbortHandler(void)
228{
229#if BACKTRACE_ENABLE
230 signal(SIGABRT, ABRThandler);
231 signal(SIGFPE, ABRThandler);
232 signal(SIGILL, ABRThandler);
233 signal(SIGSEGV, ABRThandler);
234 signal(SIGBUS, ABRThandler);
235#endif
236}
237
238/*-*************************************
239* Parameters: FIO_ctx_t
240***************************************/
241
242/* typedef'd to FIO_ctx_t within fileio.h */
243struct FIO_ctx_s {
244
245 /* file i/o info */
246 int nbFilesTotal;
247 int hasStdinInput;
248 int hasStdoutOutput;
249
250 /* file i/o state */
251 int currFileIdx;
252 int nbFilesProcessed;
253 size_t totalBytesInput;
254 size_t totalBytesOutput;
255};
256
257static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx)
258{
259 return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3;
260}
261
262static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx)
263{
264 int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1);
265 assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0);
266 return shouldDisplay;
267}
268
269
270/*-*************************************
271* Parameters: Initialization
272***************************************/
273
274#define FIO_OVERLAP_LOG_NOTSET 9999
275#define FIO_LDM_PARAM_NOTSET 9999
276
277
278FIO_prefs_t* FIO_createPreferences(void)
279{
280 FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
281 if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
282
283 ret->compressionType = FIO_zstdCompression;
284 ret->overwrite = 0;
285 ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
286 ret->dictIDFlag = 1;
287 ret->checksumFlag = 1;
288 ret->removeSrcFile = 0;
289 ret->memLimit = 0;
290 ret->nbWorkers = 1;
291 ret->blockSize = 0;
292 ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
293 ret->adaptiveMode = 0;
294 ret->rsyncable = 0;
295 ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
296 ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
297 ret->ldmFlag = 0;
298 ret->ldmHashLog = 0;
299 ret->ldmMinMatch = 0;
300 ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
301 ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
302 ret->streamSrcSize = 0;
303 ret->targetCBlockSize = 0;
304 ret->srcSizeHint = 0;
305 ret->testMode = 0;
306 ret->literalCompressionMode = ZSTD_ps_auto;
307 ret->excludeCompressedFiles = 0;
308 ret->allowBlockDevices = 0;
309 ret->asyncIO = AIO_supported();
310 ret->passThrough = -1;
311 return ret;
312}
313
314FIO_ctx_t* FIO_createContext(void)
315{
316 FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
317 if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
318
319 ret->currFileIdx = 0;
320 ret->hasStdinInput = 0;
321 ret->hasStdoutOutput = 0;
322 ret->nbFilesTotal = 1;
323 ret->nbFilesProcessed = 0;
324 ret->totalBytesInput = 0;
325 ret->totalBytesOutput = 0;
326 return ret;
327}
328
329void FIO_freePreferences(FIO_prefs_t* const prefs)
330{
331 free(prefs);
332}
333
334void FIO_freeContext(FIO_ctx_t* const fCtx)
335{
336 free(fCtx);
337}
338
339
340/*-*************************************
341* Parameters: Display Options
342***************************************/
343
344void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
345
346void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
347
348
349/*-*************************************
350* Parameters: Setters
351***************************************/
352
353/* FIO_prefs_t functions */
354
355void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
356
357void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
358
359void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; }
360
361void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
362
363void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
364
365void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); }
366
367void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
368
369void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
370#ifndef ZSTD_MULTITHREAD
371 if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
372#endif
373 prefs->nbWorkers = nbWorkers;
374}
375
376void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
377
378void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
379
380void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
381 if (blockSize && prefs->nbWorkers==0)
382 DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
383 prefs->blockSize = blockSize;
384}
385
386void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
387 if (overlapLog && prefs->nbWorkers==0)
388 DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
389 prefs->overlapLog = overlapLog;
390}
391
392void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) {
393 if ((adapt>0) && (prefs->nbWorkers==0))
394 EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
395 prefs->adaptiveMode = adapt;
396}
397
398void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
399 prefs->useRowMatchFinder = useRowMatchFinder;
400}
401
402void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
403 if ((rsyncable>0) && (prefs->nbWorkers==0))
404 EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
405 prefs->rsyncable = rsyncable;
406}
407
408void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
409 prefs->streamSrcSize = streamSrcSize;
410}
411
412void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
413 prefs->targetCBlockSize = targetCBlockSize;
414}
415
416void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
417 prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
418}
419
420void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
421 prefs->testMode = (testMode!=0);
422}
423
424void FIO_setLiteralCompressionMode(
425 FIO_prefs_t* const prefs,
426 ZSTD_paramSwitch_e mode) {
427 prefs->literalCompressionMode = mode;
428}
429
430void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
431{
432#ifndef ZSTD_NOCOMPRESS
433 assert(minCLevel >= ZSTD_minCLevel());
434#endif
435 prefs->minAdaptLevel = minCLevel;
436}
437
438void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
439{
440 prefs->maxAdaptLevel = maxCLevel;
441}
442
443void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
444 prefs->ldmFlag = (ldmFlag>0);
445}
446
447void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
448 prefs->ldmHashLog = ldmHashLog;
449}
450
451void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
452 prefs->ldmMinMatch = ldmMinMatch;
453}
454
455void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
456 prefs->ldmBucketSizeLog = ldmBucketSizeLog;
457}
458
459
460void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
461 prefs->ldmHashRateLog = ldmHashRateLog;
462}
463
464void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
465{
466 prefs->patchFromMode = value != 0;
467}
468
469void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
470{
471 prefs->contentSize = value != 0;
472}
473
474void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) {
475#ifdef ZSTD_MULTITHREAD
476 prefs->asyncIO = value;
477#else
478 (void) prefs;
479 (void) value;
480 DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n");
481#endif
482}
483
484void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) {
485 prefs->passThrough = (value != 0);
486}
487
488void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value)
489{
490 prefs->mmapDict = value;
491}
492
493/* FIO_ctx_t functions */
494
495void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
496 fCtx->hasStdoutOutput = value;
497}
498
499void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
500{
501 fCtx->nbFilesTotal = value;
502}
503
504void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
505 size_t i = 0;
506 for ( ; i < filenames->tableSize; ++i) {
507 if (!strcmp(stdinmark, filenames->fileNames[i])) {
508 fCtx->hasStdinInput = 1;
509 return;
510 }
511 }
512}
513
514/*-*************************************
515* Functions
516***************************************/
517/** FIO_removeFile() :
518 * @result : Unlink `fileName`, even if it's read-only */
519static int FIO_removeFile(const char* path)
520{
521 stat_t statbuf;
522 if (!UTIL_stat(path, &statbuf)) {
523 DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
524 return 0;
525 }
526 if (!UTIL_isRegularFileStat(&statbuf)) {
527 DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
528 return 0;
529 }
f535537f 530#if defined(_WIN32)
648db22b 531 /* windows doesn't allow remove read-only files,
532 * so try to make it writable first */
533 if (!(statbuf.st_mode & _S_IWRITE)) {
534 UTIL_chmod(path, &statbuf, _S_IWRITE);
535 }
536#endif
537 return remove(path);
538}
539
540/** FIO_openSrcFile() :
541 * condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
542 * @result : FILE* to `srcFileName`, or NULL if it fails */
543static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
544{
545 int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
546 assert(srcFileName != NULL);
547 assert(statbuf != NULL);
548 if (!strcmp (srcFileName, stdinmark)) {
549 DISPLAYLEVEL(4,"Using stdin for input \n");
550 SET_BINARY_MODE(stdin);
551 return stdin;
552 }
553
554 if (!UTIL_stat(srcFileName, statbuf)) {
555 DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
556 srcFileName, strerror(errno));
557 return NULL;
558 }
559
560 if (!UTIL_isRegularFileStat(statbuf)
561 && !UTIL_isFIFOStat(statbuf)
562 && !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
563 ) {
564 DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
565 srcFileName);
566 return NULL;
567 }
568
569 { FILE* const f = fopen(srcFileName, "rb");
570 if (f == NULL)
571 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
572 return f;
573 }
574}
575
576/** FIO_openDstFile() :
577 * condition : `dstFileName` must be non-NULL.
578 * @result : FILE* to `dstFileName`, or NULL if it fails */
579static FILE*
580FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
581 const char* srcFileName, const char* dstFileName,
582 const int mode)
583{
584 int isDstRegFile;
585
586 if (prefs->testMode) return NULL; /* do not open file in test mode */
587
588 assert(dstFileName != NULL);
589 if (!strcmp (dstFileName, stdoutmark)) {
590 DISPLAYLEVEL(4,"Using stdout for output \n");
591 SET_BINARY_MODE(stdout);
592 if (prefs->sparseFileSupport == 1) {
593 prefs->sparseFileSupport = 0;
594 DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
595 }
596 return stdout;
597 }
598
599 /* ensure dst is not the same as src */
600 if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
601 DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
602 return NULL;
603 }
604
605 isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */
606 if (prefs->sparseFileSupport == 1) {
607 prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
608 if (!isDstRegFile) {
609 prefs->sparseFileSupport = 0;
610 DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n");
611 }
612 }
613
614 if (isDstRegFile) {
615 /* Check if destination file already exists */
616#if !defined(_WIN32)
617 /* this test does not work on Windows :
618 * `NUL` and `nul` are detected as regular files */
619 if (!strcmp(dstFileName, nulmark)) {
620 EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
621 dstFileName);
622 }
623#endif
624 if (!prefs->overwrite) {
625 if (g_display_prefs.displayLevel <= 1) {
626 /* No interaction possible */
627 DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n",
628 dstFileName);
629 return NULL;
630 }
631 DISPLAY("zstd: %s already exists; ", dstFileName);
632 if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput))
633 return NULL;
634 }
635 /* need to unlink */
636 FIO_removeFile(dstFileName);
637 }
638
639 {
640#if defined(_WIN32)
641 /* Windows requires opening the file as a "binary" file to avoid
642 * mangling. This macro doesn't exist on unix. */
643 const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
644 const int fd = _open(dstFileName, openflags, mode);
645 FILE* f = NULL;
646 if (fd != -1) {
647 f = _fdopen(fd, "wb");
648 }
649#else
650 const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
651 const int fd = open(dstFileName, openflags, mode);
652 FILE* f = NULL;
653 if (fd != -1) {
654 f = fdopen(fd, "wb");
655 }
656#endif
657 if (f == NULL) {
658 DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
659 } else {
660 /* An increased buffer size can provide a significant performance
661 * boost on some platforms. Note that providing a NULL buf with a
662 * size that's not 0 is not defined in ANSI C, but is defined in an
663 * extension. There are three possibilities here:
664 * 1. Libc supports the extended version and everything is good.
665 * 2. Libc ignores the size when buf is NULL, in which case
666 * everything will continue as if we didn't call `setvbuf()`.
667 * 3. We fail the call and execution continues but a warning
668 * message might be shown.
669 * In all cases due execution continues. For now, I believe that
670 * this is a more cost-effective solution than managing the buffers
671 * allocations ourselves (will require an API change).
672 */
673 if (setvbuf(f, NULL, _IOFBF, 1 MB)) {
674 DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName);
675 }
676 }
677 return f;
678 }
679}
680
681
682/* FIO_getDictFileStat() :
683 */
684static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) {
685 assert(dictFileStat != NULL);
686 if (fileName == NULL) return;
687
688 if (!UTIL_stat(fileName, dictFileStat)) {
689 EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
690 }
691
692 if (!UTIL_isRegularFileStat(dictFileStat)) {
693 EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
694 }
695}
696
697/* FIO_setDictBufferMalloc() :
698 * allocates a buffer, pointed by `dict->dictBuffer`,
699 * loads `filename` content into it, up to DICTSIZE_MAX bytes.
700 * @return : loaded size
701 * if fileName==NULL, returns 0 and a NULL pointer
702 */
703static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
704{
705 FILE* fileHandle;
706 U64 fileSize;
707 void** bufferPtr = &dict->dictBuffer;
708
709 assert(bufferPtr != NULL);
710 assert(dictFileStat != NULL);
711 *bufferPtr = NULL;
712 if (fileName == NULL) return 0;
713
714 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
715
716 fileHandle = fopen(fileName, "rb");
717
718 if (fileHandle == NULL) {
719 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
720 }
721
722 fileSize = UTIL_getFileSizeStat(dictFileStat);
723 {
724 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
725 if (fileSize > dictSizeMax) {
726 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
727 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
728 }
729 }
730 *bufferPtr = malloc((size_t)fileSize);
731 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
732 { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
733 if (readSize != fileSize) {
734 EXM_THROW(35, "Error reading dictionary file %s : %s",
735 fileName, strerror(errno));
736 }
737 }
738 fclose(fileHandle);
739 return (size_t)fileSize;
740}
741
742#if (PLATFORM_POSIX_VERSION > 0)
743#include <sys/mman.h>
744static void FIO_munmap(FIO_Dict_t* dict)
745{
746 munmap(dict->dictBuffer, dict->dictBufferSize);
747 dict->dictBuffer = NULL;
748 dict->dictBufferSize = 0;
749}
750static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
751{
752 int fileHandle;
753 U64 fileSize;
754 void** bufferPtr = &dict->dictBuffer;
755
756 assert(bufferPtr != NULL);
757 assert(dictFileStat != NULL);
758 *bufferPtr = NULL;
759 if (fileName == NULL) return 0;
760
761 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
762
763 fileHandle = open(fileName, O_RDONLY);
764
765 if (fileHandle == -1) {
766 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
767 }
768
769 fileSize = UTIL_getFileSizeStat(dictFileStat);
770 {
771 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
772 if (fileSize > dictSizeMax) {
773 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
774 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
775 }
776 }
777
778 *bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0);
779 if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
780
781 close(fileHandle);
782 return (size_t)fileSize;
783}
784#elif defined(_MSC_VER) || defined(_WIN32)
785#include <windows.h>
786static void FIO_munmap(FIO_Dict_t* dict)
787{
788 UnmapViewOfFile(dict->dictBuffer);
789 CloseHandle(dict->dictHandle);
790 dict->dictBuffer = NULL;
791 dict->dictBufferSize = 0;
792}
793static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
794{
795 HANDLE fileHandle, mapping;
796 U64 fileSize;
797 void** bufferPtr = &dict->dictBuffer;
798
799 assert(bufferPtr != NULL);
800 assert(dictFileStat != NULL);
801 *bufferPtr = NULL;
802 if (fileName == NULL) return 0;
803
804 DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
805
806 fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
807
808 if (fileHandle == INVALID_HANDLE_VALUE) {
809 EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
810 }
811
812 fileSize = UTIL_getFileSizeStat(dictFileStat);
813 {
814 size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
815 if (fileSize > dictSizeMax) {
816 EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
817 fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
818 }
819 }
820
821 mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
822 if (mapping == NULL) {
823 EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno));
824 }
825
826 *bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */
827 if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno));
828
829 dict->dictHandle = fileHandle;
830 return (size_t)fileSize;
831}
832#else
833static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
834{
835 return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
836}
837static void FIO_munmap(FIO_Dict_t* dict) {
838 free(dict->dictBuffer);
839 dict->dictBuffer = NULL;
840 dict->dictBufferSize = 0;
841}
842#endif
843
844static void FIO_freeDict(FIO_Dict_t* dict) {
845 if (dict->dictBufferType == FIO_mallocDict) {
846 free(dict->dictBuffer);
847 dict->dictBuffer = NULL;
848 dict->dictBufferSize = 0;
849 } else if (dict->dictBufferType == FIO_mmapDict) {
850 FIO_munmap(dict);
851 } else {
852 assert(0); /* Should not reach this case */
853 }
854}
855
856static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) {
857 dict->dictBufferType = dictBufferType;
858 if (dict->dictBufferType == FIO_mallocDict) {
859 dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
860 } else if (dict->dictBufferType == FIO_mmapDict) {
861 dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat);
862 } else {
863 assert(0); /* Should not reach this case */
864 }
865}
866
867
868/* FIO_checkFilenameCollisions() :
869 * Checks for and warns if there are any files that would have the same output path
870 */
871int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
872 const char **filenameTableSorted, *prevElem, *filename;
873 unsigned u;
874
875 filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
876 if (!filenameTableSorted) {
877 DISPLAYLEVEL(1, "Allocation error during filename collision checking \n");
878 return 1;
879 }
880
881 for (u = 0; u < nbFiles; ++u) {
882 filename = strrchr(filenameTable[u], PATH_SEP);
883 if (filename == NULL) {
884 filenameTableSorted[u] = filenameTable[u];
885 } else {
886 filenameTableSorted[u] = filename+1;
887 }
888 }
889
890 qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
891 prevElem = filenameTableSorted[0];
892 for (u = 1; u < nbFiles; ++u) {
893 if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
894 DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem);
895 }
896 prevElem = filenameTableSorted[u];
897 }
898
899 free((void*)filenameTableSorted);
900 return 0;
901}
902
903static const char*
904extractFilename(const char* path, char separator)
905{
906 const char* search = strrchr(path, separator);
907 if (search == NULL) return path;
908 return search+1;
909}
910
911/* FIO_createFilename_fromOutDir() :
912 * Takes a source file name and specified output directory, and
913 * allocates memory for and returns a pointer to final path.
914 * This function never returns an error (it may abort() in case of pb)
915 */
916static char*
917FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
918{
919 const char* filenameStart;
920 char separator;
921 char* result;
922
923#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
924 separator = '\\';
925#else
926 separator = '/';
927#endif
928
929 filenameStart = extractFilename(path, separator);
930#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
931 filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
932#endif
933
934 result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
935 if (!result) {
936 EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
937 }
938
939 memcpy(result, outDirName, strlen(outDirName));
940 if (outDirName[strlen(outDirName)-1] == separator) {
941 memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
942 } else {
943 memcpy(result + strlen(outDirName), &separator, 1);
944 memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
945 }
946
947 return result;
948}
949
950/* FIO_highbit64() :
951 * gives position of highest bit.
952 * note : only works for v > 0 !
953 */
954static unsigned FIO_highbit64(unsigned long long v)
955{
956 unsigned count = 0;
957 assert(v != 0);
958 v >>= 1;
959 while (v) { v >>= 1; count++; }
960 return count;
961}
962
963static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
964 unsigned long long const dictSize,
965 unsigned long long const maxSrcFileSize)
966{
967 unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
968 unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
969 if (maxSize == UTIL_FILESIZE_UNKNOWN)
970 EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
971 assert(maxSize != UTIL_FILESIZE_UNKNOWN);
972 if (maxSize > maxWindowSize)
973 EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
974 FIO_setMemLimit(prefs, (unsigned)maxSize);
975}
976
977/* FIO_multiFilesConcatWarning() :
978 * This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts.
979 * Returns 1 if the console should abort, 0 if console should proceed.
980 *
981 * If output is stdout or test mode is active, check that `--rm` disabled.
982 *
983 * If there is just 1 file to process, zstd will proceed as usual.
984 * If each file get processed into its own separate destination file, proceed as usual.
985 *
986 * When multiple files are processed into a single output,
987 * display a warning message, then disable --rm if it's set.
988 *
989 * If -f is specified or if output is stdout, just proceed.
990 * If output is set with -o, prompt for confirmation.
991 */
992static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff)
993{
994 if (fCtx->hasStdoutOutput) {
995 if (prefs->removeSrcFile)
996 /* this should not happen ; hard fail, to protect user's data
997 * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
998 EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. "
999 "This scenario is not supposed to be possible. "
1000 "This is a programming error. File an issue for it to be fixed.");
1001 }
1002 if (prefs->testMode) {
1003 if (prefs->removeSrcFile)
1004 /* this should not happen ; hard fail, to protect user's data
1005 * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
1006 EXM_THROW(43, "Test mode shall not remove input files! "
1007 "This scenario is not supposed to be possible. "
1008 "This is a programming error. File an issue for it to be fixed.");
1009 return 0;
1010 }
1011
1012 if (fCtx->nbFilesTotal == 1) return 0;
1013 assert(fCtx->nbFilesTotal > 1);
1014
1015 if (!outFileName) return 0;
1016
1017 if (fCtx->hasStdoutOutput) {
1018 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n");
1019 } else {
1020 DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName);
1021 }
1022 DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n")
1023
1024 /* multi-input into single output : --rm is not allowed */
1025 if (prefs->removeSrcFile) {
1026 DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n");
1027 prefs->removeSrcFile = 0;
1028 }
1029
1030 if (fCtx->hasStdoutOutput) return 0;
1031 if (prefs->overwrite) return 0;
1032
1033 /* multiple files concatenated into single destination file using -o without -f */
1034 if (g_display_prefs.displayLevel <= displayLevelCutoff) {
1035 /* quiet mode => no prompt => fail automatically */
1036 DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n");
1037 DISPLAYLEVEL(1, "Aborting. \n");
1038 return 1;
1039 }
1040 /* normal mode => prompt */
1041 return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
1042}
1043
1044static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos)
1045{
1046 ZSTD_inBuffer i;
1047 i.src = buf;
1048 i.size = s;
1049 i.pos = pos;
1050 return i;
1051}
1052
1053static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos)
1054{
1055 ZSTD_outBuffer o;
1056 o.dst = buf;
1057 o.size = s;
1058 o.pos = pos;
1059 return o;
1060}
1061
1062#ifndef ZSTD_NOCOMPRESS
1063
1064/* **********************************************************************
1065 * Compression
1066 ************************************************************************/
1067typedef struct {
1068 FIO_Dict_t dict;
1069 const char* dictFileName;
1070 stat_t dictFileStat;
1071 ZSTD_CStream* cctx;
1072 WritePoolCtx_t *writeCtx;
1073 ReadPoolCtx_t *readCtx;
1074} cRess_t;
1075
1076/** ZSTD_cycleLog() :
1077 * condition for correct operation : hashLog > 1 */
1078static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
1079{
1080 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
1081 assert(hashLog > 1);
1082 return hashLog - btScale;
1083}
1084
1085static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
1086 ZSTD_compressionParameters* comprParams,
1087 unsigned long long const dictSize,
1088 unsigned long long const maxSrcFileSize,
1089 int cLevel)
1090{
1091 unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
1092 ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
1093 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
1094 if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
1095 DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
1096 comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
1097 if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
1098 if (!prefs->ldmFlag)
f535537f 1099 DISPLAYLEVEL(2, "long mode automatically triggered\n");
648db22b 1100 FIO_setLdmFlag(prefs, 1);
1101 }
1102 if (cParams.strategy >= ZSTD_btopt) {
f535537f 1103 DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
1104 DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
1105 DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
1106 DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
1107 DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
648db22b 1108 }
1109}
1110
1111static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
1112 const char* dictFileName, unsigned long long const maxSrcFileSize,
1113 int cLevel, ZSTD_compressionParameters comprParams) {
1114 int useMMap = prefs->mmapDict == ZSTD_ps_enable;
1115 int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
1116 FIO_dictBufferType_t dictBufferType;
1117 cRess_t ress;
1118 memset(&ress, 0, sizeof(ress));
1119
1120 DISPLAYLEVEL(6, "FIO_createCResources \n");
1121 ress.cctx = ZSTD_createCCtx();
1122 if (ress.cctx == NULL)
1123 EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
1124 strerror(errno));
1125
1126 FIO_getDictFileStat(dictFileName, &ress.dictFileStat);
1127
1128 /* need to update memLimit before calling createDictBuffer
1129 * because of memLimit check inside it */
1130 if (prefs->patchFromMode) {
1131 U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat);
1132 unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
1133 useMMap |= dictSize > prefs->memLimit;
1134 FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
1135 }
1136
1137 dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
1138 FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */
1139
1140 ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
1141 ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
1142
1143 /* Advanced parameters, including dictionary */
1144 if (dictFileName && (ress.dict.dictBuffer==NULL))
1145 EXM_THROW(32, "allocation error : can't create dictBuffer");
1146 ress.dictFileName = dictFileName;
1147
1148 if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
1149 comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
1150
1151 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
1152 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
1153 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
1154 /* compression level */
1155 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
1156 /* max compressed block size */
1157 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
1158 /* source size hint */
1159 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
1160 /* long distance matching */
1161 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
1162 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
1163 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
1164 if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
1165 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
1166 }
1167 if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
1168 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
1169 }
1170 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
1171 /* compression parameters */
1172 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
1173 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
1174 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
1175 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
1176 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
1177 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
1178 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
1179 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
1180 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
1181 /* multi-threading */
1182#ifdef ZSTD_MULTITHREAD
1183 DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
1184 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
1185 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
1186 if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
1187 DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
1188 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
1189 }
1190 CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
1191#endif
1192 /* dictionary */
1193 if (prefs->patchFromMode) {
1194 CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
1195 } else {
1196 CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
1197 }
1198
1199 return ress;
1200}
1201
1202static void FIO_freeCResources(cRess_t* const ress)
1203{
1204 FIO_freeDict(&(ress->dict));
1205 AIO_WritePool_free(ress->writeCtx);
1206 AIO_ReadPool_free(ress->readCtx);
1207 ZSTD_freeCStream(ress->cctx); /* never fails */
1208}
1209
1210
1211#ifdef ZSTD_GZCOMPRESS
1212static unsigned long long
1213FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
1214 const char* srcFileName, U64 const srcFileSize,
1215 int compressionLevel, U64* readsize)
1216{
1217 unsigned long long inFileSize = 0, outFileSize = 0;
1218 z_stream strm;
1219 IOJob_t *writeJob = NULL;
1220
1221 if (compressionLevel > Z_BEST_COMPRESSION)
1222 compressionLevel = Z_BEST_COMPRESSION;
1223
1224 strm.zalloc = Z_NULL;
1225 strm.zfree = Z_NULL;
1226 strm.opaque = Z_NULL;
1227
1228 { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
1229 15 /* maxWindowLogSize */ + 16 /* gzip only */,
1230 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */
1231 if (ret != Z_OK) {
1232 EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
1233 } }
1234
1235 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
1236 strm.next_in = 0;
1237 strm.avail_in = 0;
1238 strm.next_out = (Bytef*)writeJob->buffer;
1239 strm.avail_out = (uInt)writeJob->bufferSize;
1240
1241 while (1) {
1242 int ret;
1243 if (strm.avail_in == 0) {
1244 AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
1245 if (ress->readCtx->srcBufferLoaded == 0) break;
1246 inFileSize += ress->readCtx->srcBufferLoaded;
1247 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
1248 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
1249 }
1250
1251 {
1252 size_t const availBefore = strm.avail_in;
1253 ret = deflate(&strm, Z_NO_FLUSH);
1254 AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
1255 }
1256
1257 if (ret != Z_OK)
1258 EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
1259 { size_t const cSize = writeJob->bufferSize - strm.avail_out;
1260 if (cSize) {
1261 writeJob->usedBufferSize = cSize;
1262 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1263 outFileSize += cSize;
1264 strm.next_out = (Bytef*)writeJob->buffer;
1265 strm.avail_out = (uInt)writeJob->bufferSize;
1266 } }
1267 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1268 DISPLAYUPDATE_PROGRESS(
1269 "\rRead : %u MB ==> %.2f%% ",
1270 (unsigned)(inFileSize>>20),
1271 (double)outFileSize/(double)inFileSize*100)
1272 } else {
1273 DISPLAYUPDATE_PROGRESS(
1274 "\rRead : %u / %u MB ==> %.2f%% ",
1275 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1276 (double)outFileSize/(double)inFileSize*100);
1277 } }
1278
1279 while (1) {
1280 int const ret = deflate(&strm, Z_FINISH);
1281 { size_t const cSize = writeJob->bufferSize - strm.avail_out;
1282 if (cSize) {
1283 writeJob->usedBufferSize = cSize;
1284 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1285 outFileSize += cSize;
1286 strm.next_out = (Bytef*)writeJob->buffer;
1287 strm.avail_out = (uInt)writeJob->bufferSize;
1288 } }
1289 if (ret == Z_STREAM_END) break;
1290 if (ret != Z_BUF_ERROR)
1291 EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
1292 }
1293
1294 { int const ret = deflateEnd(&strm);
1295 if (ret != Z_OK) {
1296 EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
1297 } }
1298 *readsize = inFileSize;
1299 AIO_WritePool_releaseIoJob(writeJob);
1300 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
1301 return outFileSize;
1302}
1303#endif
1304
1305
1306#ifdef ZSTD_LZMACOMPRESS
1307static unsigned long long
1308FIO_compressLzmaFrame(cRess_t* ress,
1309 const char* srcFileName, U64 const srcFileSize,
1310 int compressionLevel, U64* readsize, int plain_lzma)
1311{
1312 unsigned long long inFileSize = 0, outFileSize = 0;
1313 lzma_stream strm = LZMA_STREAM_INIT;
1314 lzma_action action = LZMA_RUN;
1315 lzma_ret ret;
1316 IOJob_t *writeJob = NULL;
1317
1318 if (compressionLevel < 0) compressionLevel = 0;
1319 if (compressionLevel > 9) compressionLevel = 9;
1320
1321 if (plain_lzma) {
1322 lzma_options_lzma opt_lzma;
1323 if (lzma_lzma_preset(&opt_lzma, compressionLevel))
1324 EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
1325 ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
1326 if (ret != LZMA_OK)
1327 EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
1328 } else {
1329 ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
1330 if (ret != LZMA_OK)
1331 EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
1332 }
1333
1334 writeJob =AIO_WritePool_acquireJob(ress->writeCtx);
1335 strm.next_out = (BYTE*)writeJob->buffer;
1336 strm.avail_out = writeJob->bufferSize;
1337 strm.next_in = 0;
1338 strm.avail_in = 0;
1339
1340 while (1) {
1341 if (strm.avail_in == 0) {
1342 size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
1343 if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
1344 inFileSize += inSize;
1345 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
1346 strm.avail_in = ress->readCtx->srcBufferLoaded;
1347 }
1348
1349 {
1350 size_t const availBefore = strm.avail_in;
1351 ret = lzma_code(&strm, action);
1352 AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
1353 }
1354
1355
1356 if (ret != LZMA_OK && ret != LZMA_STREAM_END)
1357 EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
1358 { size_t const compBytes = writeJob->bufferSize - strm.avail_out;
1359 if (compBytes) {
1360 writeJob->usedBufferSize = compBytes;
1361 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1362 outFileSize += compBytes;
1363 strm.next_out = (BYTE*)writeJob->buffer;
1364 strm.avail_out = writeJob->bufferSize;
1365 } }
1366 if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
1367 DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
1368 (unsigned)(inFileSize>>20),
1369 (double)outFileSize/(double)inFileSize*100)
1370 else
1371 DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
1372 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1373 (double)outFileSize/(double)inFileSize*100);
1374 if (ret == LZMA_STREAM_END) break;
1375 }
1376
1377 lzma_end(&strm);
1378 *readsize = inFileSize;
1379
1380 AIO_WritePool_releaseIoJob(writeJob);
1381 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
1382
1383 return outFileSize;
1384}
1385#endif
1386
1387#ifdef ZSTD_LZ4COMPRESS
1388
1389#if LZ4_VERSION_NUMBER <= 10600
1390#define LZ4F_blockLinked blockLinked
1391#define LZ4F_max64KB max64KB
1392#endif
1393
1394static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
1395
1396static unsigned long long
1397FIO_compressLz4Frame(cRess_t* ress,
1398 const char* srcFileName, U64 const srcFileSize,
1399 int compressionLevel, int checksumFlag,
1400 U64* readsize)
1401{
1402 const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
1403 unsigned long long inFileSize = 0, outFileSize = 0;
1404
1405 LZ4F_preferences_t prefs;
1406 LZ4F_compressionContext_t ctx;
1407
1408 IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
1409
1410 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
1411 if (LZ4F_isError(errorCode))
1412 EXM_THROW(31, "zstd: failed to create lz4 compression context");
1413
1414 memset(&prefs, 0, sizeof(prefs));
1415
1416 assert(blockSize <= ress->readCtx->base.jobBufferSize);
1417
1418 /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */
1419 prefs.autoFlush = 0;
1420 prefs.compressionLevel = compressionLevel;
1421 prefs.frameInfo.blockMode = LZ4F_blockLinked;
1422 prefs.frameInfo.blockSizeID = LZ4F_max64KB;
1423 prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
1424#if LZ4_VERSION_NUMBER >= 10600
1425 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
1426#endif
1427 assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize);
1428
1429 {
1430 size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs);
1431 if (LZ4F_isError(headerSize))
1432 EXM_THROW(33, "File header generation failed : %s",
1433 LZ4F_getErrorName(headerSize));
1434 writeJob->usedBufferSize = headerSize;
1435 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1436 outFileSize += headerSize;
1437
1438 /* Read first block */
1439 inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
1440
1441 /* Main Loop */
1442 while (ress->readCtx->srcBufferLoaded) {
1443 size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
1444 size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize,
1445 ress->readCtx->srcBuffer, inSize, NULL);
1446 if (LZ4F_isError(outSize))
1447 EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
1448 srcFileName, LZ4F_getErrorName(outSize));
1449 outFileSize += outSize;
1450 if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
1451 DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
1452 (unsigned)(inFileSize>>20),
1453 (double)outFileSize/(double)inFileSize*100)
1454 } else {
1455 DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
1456 (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
1457 (double)outFileSize/(double)inFileSize*100);
1458 }
1459
1460 /* Write Block */
1461 writeJob->usedBufferSize = outSize;
1462 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1463
1464 /* Read next block */
1465 AIO_ReadPool_consumeBytes(ress->readCtx, inSize);
1466 inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
1467 }
1468
1469 /* End of Stream mark */
1470 headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL);
1471 if (LZ4F_isError(headerSize))
1472 EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
1473 srcFileName, LZ4F_getErrorName(headerSize));
1474
1475 writeJob->usedBufferSize = headerSize;
1476 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1477 outFileSize += headerSize;
1478 }
1479
1480 *readsize = inFileSize;
1481 LZ4F_freeCompressionContext(ctx);
1482 AIO_WritePool_releaseIoJob(writeJob);
1483 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
1484
1485 return outFileSize;
1486}
1487#endif
1488
1489static unsigned long long
1490FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
1491 FIO_prefs_t* const prefs,
1492 const cRess_t* ressPtr,
1493 const char* srcFileName, U64 fileSize,
1494 int compressionLevel, U64* readsize)
1495{
1496 cRess_t const ress = *ressPtr;
1497 IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx);
1498
1499 U64 compressedfilesize = 0;
1500 ZSTD_EndDirective directive = ZSTD_e_continue;
1501 U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1502
1503 /* stats */
1504 ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
1505 ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
1506 typedef enum { noChange, slower, faster } speedChange_e;
1507 speedChange_e speedChange = noChange;
1508 unsigned flushWaiting = 0;
1509 unsigned inputPresented = 0;
1510 unsigned inputBlocked = 0;
1511 unsigned lastJobID = 0;
1512 UTIL_time_t lastAdaptTime = UTIL_getTime();
1513 U64 const adaptEveryMicro = REFRESH_RATE;
1514
1515 UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
1516
1517 DISPLAYLEVEL(6, "compression using zstd format \n");
1518
1519 /* init */
1520 if (fileSize != UTIL_FILESIZE_UNKNOWN) {
1521 pledgedSrcSize = fileSize;
1522 CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
1523 } else if (prefs->streamSrcSize > 0) {
1524 /* unknown source size; use the declared stream size */
1525 pledgedSrcSize = prefs->streamSrcSize;
1526 CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
1527 }
1528
1529 {
1530 int windowLog;
1531 UTIL_HumanReadableSize_t windowSize;
1532 CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog));
1533 if (windowLog == 0) {
1534 if (prefs->ldmFlag) {
1535 /* If long mode is set without a window size libzstd will set this size internally */
1536 windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
1537 } else {
1538 const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0);
1539 windowLog = (int)cParams.windowLog;
1540 }
1541 }
1542 windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize)));
1543 DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix);
1544 }
1545 (void)srcFileName;
1546
1547 /* Main compression loop */
1548 do {
1549 size_t stillToFlush;
1550 /* Fill input Buffer */
1551 size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize());
1552 ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 );
1553 DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
1554 *readsize += inSize;
1555
1556 if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize))
1557 directive = ZSTD_e_end;
1558
1559 stillToFlush = 1;
1560 while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
1561 || (directive == ZSTD_e_end && stillToFlush != 0) ) {
1562
1563 size_t const oldIPos = inBuff.pos;
1564 ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
1565 size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
1566 CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
1567 AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos);
1568
1569 /* count stats */
1570 inputPresented++;
1571 if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
1572 if (!toFlushNow) flushWaiting = 1;
1573
1574 /* Write compressed stream */
1575 DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
1576 (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
1577 if (outBuff.pos) {
1578 writeJob->usedBufferSize = outBuff.pos;
1579 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
1580 compressedfilesize += outBuff.pos;
1581 }
1582
1583 /* adaptive mode : statistics measurement and speed correction */
1584 if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) {
1585 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
1586
1587 lastAdaptTime = UTIL_getTime();
1588
1589 /* check output speed */
1590 if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
1591
1592 unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
1593 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
1594 assert(zfp.produced >= previous_zfp_update.produced);
1595 assert(prefs->nbWorkers >= 1);
1596
1597 /* test if compression is blocked
1598 * either because output is slow and all buffers are full
1599 * or because input is slow and no job can start while waiting for at least one buffer to be filled.
1600 * note : exclude starting part, since currentJobID > 1 */
1601 if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
1602 && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
1603 ) {
1604 DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
1605 speedChange = slower;
1606 }
1607
1608 previous_zfp_update = zfp;
1609
1610 if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
1611 && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
1612 ) {
1613 DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
1614 speedChange = slower;
1615 }
1616 flushWaiting = 0;
1617 }
1618
1619 /* course correct only if there is at least one new job completed */
1620 if (zfp.currentJobID > lastJobID) {
1621 DISPLAYLEVEL(6, "compression level adaptation check \n")
1622
1623 /* check input speed */
1624 if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
1625 if (inputBlocked <= 0) {
1626 DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
1627 speedChange = slower;
1628 } else if (speedChange == noChange) {
1629 unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
1630 unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
1631 unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
1632 unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
1633 previous_zfp_correction = zfp;
1634 assert(inputPresented > 0);
1635 DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
1636 inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
1637 (unsigned)newlyIngested, (unsigned)newlyConsumed,
1638 (unsigned)newlyFlushed, (unsigned)newlyProduced);
1639 if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
1640 && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
1641 && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
1642 ) {
1643 DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
1644 newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
1645 speedChange = faster;
1646 }
1647 }
1648 inputBlocked = 0;
1649 inputPresented = 0;
1650 }
1651
1652 if (speedChange == slower) {
1653 DISPLAYLEVEL(6, "slower speed , higher compression \n")
1654 compressionLevel ++;
1655 if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
1656 if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
1657 compressionLevel += (compressionLevel == 0); /* skip 0 */
1658 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1659 }
1660 if (speedChange == faster) {
1661 DISPLAYLEVEL(6, "faster speed , lighter compression \n")
1662 compressionLevel --;
1663 if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
1664 compressionLevel -= (compressionLevel == 0); /* skip 0 */
1665 ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
1666 }
1667 speedChange = noChange;
1668
1669 lastJobID = zfp.currentJobID;
1670 } /* if (zfp.currentJobID > lastJobID) */
1671 } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */
1672
1673 /* display notification */
1674 if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) {
1675 ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
1676 double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
1677 UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
1678 UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
1679 UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
1680
1681 DELAY_NEXT_UPDATE();
1682
1683 /* display progress notifications */
1684 DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */
1685 if (g_display_prefs.displayLevel >= 3) {
1686 /* Verbose progress update */
1687 DISPLAY_PROGRESS(
1688 "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ",
1689 compressionLevel,
1690 buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
1691 consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
1692 produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
1693 cShare );
1694 } else {
1695 /* Require level 2 or forcibly displayed progress counter for summarized updates */
1696 if (fCtx->nbFilesTotal > 1) {
1697 size_t srcFileNameSize = strlen(srcFileName);
1698 /* Ensure that the string we print is roughly the same size each time */
1699 if (srcFileNameSize > 18) {
1700 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
1701 DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ",
1702 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
1703 } else {
1704 DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ",
1705 fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
1706 }
1707 }
1708 DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
1709 if (fileSize != UTIL_FILESIZE_UNKNOWN)
1710 DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
1711 DISPLAY_PROGRESS(" ==> %2.f%%", cShare);
1712 }
1713 } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */
1714 } /* while ((inBuff.pos != inBuff.size) */
1715 } while (directive != ZSTD_e_end);
1716
1717 if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
1718 EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
1719 (unsigned long long)*readsize, (unsigned long long)fileSize);
1720 }
1721
1722 AIO_WritePool_releaseIoJob(writeJob);
1723 AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx);
1724
1725 return compressedfilesize;
1726}
1727
1728/*! FIO_compressFilename_internal() :
1729 * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
1730 * @return : 0 : compression completed correctly,
1731 * 1 : missing or pb opening srcFileName
1732 */
1733static int
1734FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
1735 FIO_prefs_t* const prefs,
1736 cRess_t ress,
1737 const char* dstFileName, const char* srcFileName,
1738 int compressionLevel)
1739{
1740 UTIL_time_t const timeStart = UTIL_getTime();
1741 clock_t const cpuStart = clock();
1742 U64 readsize = 0;
1743 U64 compressedfilesize = 0;
1744 U64 const fileSize = UTIL_getFileSize(srcFileName);
1745 DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
1746
1747 /* compression format selection */
1748 switch (prefs->compressionType) {
1749 default:
1750 case FIO_zstdCompression:
1751 compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
1752 break;
1753
1754 case FIO_gzipCompression:
1755#ifdef ZSTD_GZCOMPRESS
1756 compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
1757#else
1758 (void)compressionLevel;
1759 EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
1760 srcFileName);
1761#endif
1762 break;
1763
1764 case FIO_xzCompression:
1765 case FIO_lzmaCompression:
1766#ifdef ZSTD_LZMACOMPRESS
1767 compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
1768#else
1769 (void)compressionLevel;
1770 EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
1771 srcFileName);
1772#endif
1773 break;
1774
1775 case FIO_lz4Compression:
1776#ifdef ZSTD_LZ4COMPRESS
1777 compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
1778#else
1779 (void)compressionLevel;
1780 EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
1781 srcFileName);
1782#endif
1783 break;
1784 }
1785
1786 /* Status */
1787 fCtx->totalBytesInput += (size_t)readsize;
1788 fCtx->totalBytesOutput += (size_t)compressedfilesize;
1789 DISPLAY_PROGRESS("\r%79s\r", "");
1790 if (FIO_shouldDisplayFileSummary(fCtx)) {
1791 UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
1792 UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
1793 if (readsize == 0) {
1794 DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n",
1795 srcFileName,
1796 hr_isize.precision, hr_isize.value, hr_isize.suffix,
1797 hr_osize.precision, hr_osize.value, hr_osize.suffix,
1798 dstFileName);
1799 } else {
1800 DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n",
1801 srcFileName,
1802 (double)compressedfilesize / (double)readsize * 100,
1803 hr_isize.precision, hr_isize.value, hr_isize.suffix,
1804 hr_osize.precision, hr_osize.value, hr_osize.suffix,
1805 dstFileName);
1806 }
1807 }
1808
1809 /* Elapsed Time and CPU Load */
1810 { clock_t const cpuEnd = clock();
1811 double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
1812 U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
1813 double const timeLength_s = (double)timeLength_ns / 1000000000;
1814 double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
1815 DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
1816 srcFileName, timeLength_s, cpuLoad_pct);
1817 }
1818 return 0;
1819}
1820
1821
1822/*! FIO_compressFilename_dstFile() :
1823 * open dstFileName, or pass-through if ress.file != NULL,
1824 * then start compression with FIO_compressFilename_internal().
1825 * Manages source removal (--rm) and file permissions transfer.
1826 * note : ress.srcFile must be != NULL,
1827 * so reach this function through FIO_compressFilename_srcFile().
1828 * @return : 0 : compression completed correctly,
1829 * 1 : pb
1830 */
1831static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
1832 FIO_prefs_t* const prefs,
1833 cRess_t ress,
1834 const char* dstFileName,
1835 const char* srcFileName,
1836 const stat_t* srcFileStat,
1837 int compressionLevel)
1838{
1839 int closeDstFile = 0;
1840 int result;
1841 int transferStat = 0;
648db22b 1842 int dstFd = -1;
1843
1844 assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
1845 if (AIO_WritePool_getFile(ress.writeCtx) == NULL) {
1846 int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS;
1847 if ( strcmp (srcFileName, stdinmark)
1848 && strcmp (dstFileName, stdoutmark)
1849 && UTIL_isRegularFileStat(srcFileStat) ) {
1850 transferStat = 1;
1851 dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS;
1852 }
1853
1854 closeDstFile = 1;
1855 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
f535537f 1856 { FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
1857 if (dstFile==NULL) return 1; /* could not open dstFileName */
1858 dstFd = fileno(dstFile);
1859 AIO_WritePool_setFile(ress.writeCtx, dstFile);
1860 }
648db22b 1861 /* Must only be added after FIO_openDstFile() succeeds.
1862 * Otherwise we may delete the destination file if it already exists,
1863 * and the user presses Ctrl-C when asked if they wish to overwrite.
1864 */
1865 addHandler(dstFileName);
1866 }
1867
1868 result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
1869
1870 if (closeDstFile) {
1871 clearHandler();
1872
1873 if (transferStat) {
1874 UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
1875 }
1876
1877 DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
1878 if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */
1879 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
1880 result=1;
1881 }
1882
1883 if (transferStat) {
1884 UTIL_utime(dstFileName, srcFileStat);
1885 }
1886
1887 if ( (result != 0) /* operation failure */
1888 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
1889 ) {
1890 FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
1891 }
1892 }
1893
1894 return result;
1895}
1896
1897/* List used to compare file extensions (used with --exclude-compressed flag)
1898* Different from the suffixList and should only apply to ZSTD compress operationResult
1899*/
1900static const char *compressedFileExtensions[] = {
1901 ZSTD_EXTENSION,
1902 TZSTD_EXTENSION,
1903 GZ_EXTENSION,
1904 TGZ_EXTENSION,
1905 LZMA_EXTENSION,
1906 XZ_EXTENSION,
1907 TXZ_EXTENSION,
1908 LZ4_EXTENSION,
1909 TLZ4_EXTENSION,
f535537f 1910 ".7z",
1911 ".aa3",
1912 ".aac",
1913 ".aar",
1914 ".ace",
1915 ".alac",
1916 ".ape",
1917 ".apk",
1918 ".apng",
1919 ".arc",
1920 ".archive",
1921 ".arj",
1922 ".ark",
1923 ".asf",
1924 ".avi",
1925 ".avif",
1926 ".ba",
1927 ".br",
1928 ".bz2",
1929 ".cab",
1930 ".cdx",
1931 ".chm",
1932 ".cr2",
1933 ".divx",
1934 ".dmg",
1935 ".dng",
1936 ".docm",
1937 ".docx",
1938 ".dotm",
1939 ".dotx",
1940 ".dsft",
1941 ".ear",
1942 ".eftx",
1943 ".emz",
1944 ".eot",
1945 ".epub",
1946 ".f4v",
1947 ".flac",
1948 ".flv",
1949 ".gho",
1950 ".gif",
1951 ".gifv",
1952 ".gnp",
1953 ".iso",
1954 ".jar",
1955 ".jpeg",
1956 ".jpg",
1957 ".jxl",
1958 ".lz",
1959 ".lzh",
1960 ".m4a",
1961 ".m4v",
1962 ".mkv",
1963 ".mov",
1964 ".mp2",
1965 ".mp3",
1966 ".mp4",
1967 ".mpa",
1968 ".mpc",
1969 ".mpe",
1970 ".mpeg",
1971 ".mpg",
1972 ".mpl",
1973 ".mpv",
1974 ".msi",
1975 ".odp",
1976 ".ods",
1977 ".odt",
1978 ".ogg",
1979 ".ogv",
1980 ".otp",
1981 ".ots",
1982 ".ott",
1983 ".pea",
1984 ".png",
1985 ".pptx",
1986 ".qt",
1987 ".rar",
1988 ".s7z",
1989 ".sfx",
1990 ".sit",
1991 ".sitx",
1992 ".sqx",
1993 ".svgz",
1994 ".swf",
1995 ".tbz2",
1996 ".tib",
1997 ".tlz",
1998 ".vob",
1999 ".war",
2000 ".webm",
2001 ".webp",
2002 ".wma",
2003 ".wmv",
2004 ".woff",
2005 ".woff2",
2006 ".wvl",
2007 ".xlsx",
2008 ".xpi",
2009 ".xps",
2010 ".zip",
2011 ".zipx",
2012 ".zoo",
2013 ".zpaq",
648db22b 2014 NULL
2015};
2016
2017/*! FIO_compressFilename_srcFile() :
2018 * @return : 0 : compression completed correctly,
2019 * 1 : missing or pb opening srcFileName
2020 */
2021static int
2022FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
2023 FIO_prefs_t* const prefs,
2024 cRess_t ress,
2025 const char* dstFileName,
2026 const char* srcFileName,
2027 int compressionLevel)
2028{
2029 int result;
2030 FILE* srcFile;
2031 stat_t srcFileStat;
2032 U64 fileSize = UTIL_FILESIZE_UNKNOWN;
2033 DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
2034
2035 if (strcmp(srcFileName, stdinmark)) {
2036 if (UTIL_stat(srcFileName, &srcFileStat)) {
2037 /* failure to stat at all is handled during opening */
2038
2039 /* ensure src is not a directory */
2040 if (UTIL_isDirectoryStat(&srcFileStat)) {
2041 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2042 return 1;
2043 }
2044
2045 /* ensure src is not the same as dict (if present) */
2046 if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) {
2047 DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
2048 return 1;
2049 }
2050 }
2051 }
2052
2053 /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
2054 * YES => ZSTD will skip compression of the file and will return 0.
2055 * NO => ZSTD will resume with compress operation.
2056 */
2057 if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
2058 DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
2059 return 0;
2060 }
2061
2062 srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
2063 if (srcFile == NULL) return 1; /* srcFile could not be opened */
2064
2065 /* Don't use AsyncIO for small files */
2066 if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
2067 fileSize = UTIL_getFileSizeStat(&srcFileStat);
2068 if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
2069 AIO_ReadPool_setAsync(ress.readCtx, 0);
2070 AIO_WritePool_setAsync(ress.writeCtx, 0);
2071 } else {
2072 AIO_ReadPool_setAsync(ress.readCtx, 1);
2073 AIO_WritePool_setAsync(ress.writeCtx, 1);
2074 }
2075
2076 AIO_ReadPool_setFile(ress.readCtx, srcFile);
2077 result = FIO_compressFilename_dstFile(
2078 fCtx, prefs, ress,
2079 dstFileName, srcFileName,
2080 &srcFileStat, compressionLevel);
2081 AIO_ReadPool_closeFile(ress.readCtx);
2082
2083 if ( prefs->removeSrcFile /* --rm */
2084 && result == 0 /* success */
2085 && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
2086 ) {
2087 /* We must clear the handler, since after this point calling it would
2088 * delete both the source and destination files.
2089 */
2090 clearHandler();
2091 if (FIO_removeFile(srcFileName))
2092 EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
2093 }
2094 return result;
2095}
2096
2097static const char*
2098checked_index(const char* options[], size_t length, size_t index) {
2099 assert(index < length);
2100 /* Necessary to avoid warnings since -O3 will omit the above `assert` */
2101 (void) length;
2102 return options[index];
2103}
2104
2105#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index))
2106
2107void FIO_displayCompressionParameters(const FIO_prefs_t* prefs)
2108{
2109 static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION,
2110 LZMA_EXTENSION, LZ4_EXTENSION};
2111 static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"};
2112 static const char* checkSumOptions[3] = {" --no-check", "", " --check"};
2113 static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"};
2114 static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"};
2115
2116 assert(g_display_prefs.displayLevel >= 4);
2117
2118 DISPLAY("--format=%s", formatOptions[prefs->compressionType]);
2119 DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport));
2120 DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID");
2121 DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag));
2122 DISPLAY(" --block-size=%d", prefs->blockSize);
2123 if (prefs->adaptiveMode)
2124 DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel);
2125 DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder));
2126 DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : "");
2127 if (prefs->streamSrcSize)
2128 DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize);
2129 if (prefs->srcSizeHint)
2130 DISPLAY(" --size-hint=%d", prefs->srcSizeHint);
2131 if (prefs->targetCBlockSize)
2132 DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize);
2133 DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode));
2134 DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB);
2135 DISPLAY(" --threads=%d", prefs->nbWorkers);
2136 DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : "");
2137 DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-");
2138 DISPLAY("\n");
2139}
2140
2141#undef INDEX
2142
2143int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
2144 const char* srcFileName, const char* dictFileName,
2145 int compressionLevel, ZSTD_compressionParameters comprParams)
2146{
2147 cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
2148 int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
2149
2150#define DISPLAY_LEVEL_DEFAULT 2
2151
2152 FIO_freeCResources(&ress);
2153 return result;
2154}
2155
2156/* FIO_determineCompressedName() :
2157 * create a destination filename for compressed srcFileName.
2158 * @return a pointer to it.
2159 * This function never returns an error (it may abort() in case of pb)
2160 */
2161static const char*
2162FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
2163{
2164 static size_t dfnbCapacity = 0;
2165 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
2166 char* outDirFilename = NULL;
2167 size_t sfnSize = strlen(srcFileName);
2168 size_t const srcSuffixLen = strlen(suffix);
2169
2170 if(!strcmp(srcFileName, stdinmark)) {
2171 return stdoutmark;
2172 }
2173
2174 if (outDirName) {
2175 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
2176 sfnSize = strlen(outDirFilename);
2177 assert(outDirFilename != NULL);
2178 }
2179
2180 if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
2181 /* resize buffer for dstName */
2182 free(dstFileNameBuffer);
2183 dfnbCapacity = sfnSize + srcSuffixLen + 30;
2184 dstFileNameBuffer = (char*)malloc(dfnbCapacity);
2185 if (!dstFileNameBuffer) {
2186 EXM_THROW(30, "zstd: %s", strerror(errno));
2187 }
2188 }
2189 assert(dstFileNameBuffer != NULL);
2190
2191 if (outDirFilename) {
2192 memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
2193 free(outDirFilename);
2194 } else {
2195 memcpy(dstFileNameBuffer, srcFileName, sfnSize);
2196 }
2197 memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
2198 return dstFileNameBuffer;
2199}
2200
2201static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
2202{
2203 size_t i;
2204 unsigned long long fileSize, maxFileSize = 0;
2205 for (i = 0; i < nbFiles; i++) {
2206 fileSize = UTIL_getFileSize(inFileNames[i]);
2207 maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
2208 }
2209 return maxFileSize;
2210}
2211
2212/* FIO_compressMultipleFilenames() :
2213 * compress nbFiles files
2214 * into either one destination (outFileName),
2215 * or into one file each (outFileName == NULL, but suffix != NULL),
2216 * or into a destination folder (specified with -O)
2217 */
2218int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
2219 FIO_prefs_t* const prefs,
2220 const char** inFileNamesTable,
2221 const char* outMirroredRootDirName,
2222 const char* outDirName,
2223 const char* outFileName, const char* suffix,
2224 const char* dictFileName, int compressionLevel,
2225 ZSTD_compressionParameters comprParams)
2226{
2227 int status;
2228 int error = 0;
2229 cRess_t ress = FIO_createCResources(prefs, dictFileName,
2230 FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
2231 compressionLevel, comprParams);
2232
2233 /* init */
2234 assert(outFileName != NULL || suffix != NULL);
2235 if (outFileName != NULL) { /* output into a single destination (stdout typically) */
2236 FILE *dstFile;
2237 if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
2238 FIO_freeCResources(&ress);
2239 return 1;
2240 }
2241 dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
2242 if (dstFile == NULL) { /* could not open outFileName */
2243 error = 1;
2244 } else {
2245 AIO_WritePool_setFile(ress.writeCtx, dstFile);
2246 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
2247 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
2248 if (!status) fCtx->nbFilesProcessed++;
2249 error |= status;
2250 }
2251 if (AIO_WritePool_closeFile(ress.writeCtx))
2252 EXM_THROW(29, "Write error (%s) : cannot properly close %s",
2253 strerror(errno), outFileName);
2254 }
2255 } else {
2256 if (outMirroredRootDirName)
2257 UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
2258
2259 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
2260 const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
2261 const char* dstFileName = NULL;
2262 if (outMirroredRootDirName) {
2263 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
2264 if (validMirroredDirName) {
2265 dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
2266 free(validMirroredDirName);
2267 } else {
2268 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
2269 error=1;
2270 continue;
2271 }
2272 } else {
2273 dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
2274 }
2275 status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
2276 if (!status) fCtx->nbFilesProcessed++;
2277 error |= status;
2278 }
2279
2280 if (outDirName)
2281 FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
2282 }
2283
2284 if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
2285 UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
2286 UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
2287
2288 DISPLAY_PROGRESS("\r%79s\r", "");
2289 if (fCtx->totalBytesInput == 0) {
2290 DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n",
2291 fCtx->nbFilesProcessed,
2292 hr_isize.precision, hr_isize.value, hr_isize.suffix,
2293 hr_osize.precision, hr_osize.value, hr_osize.suffix);
2294 } else {
2295 DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n",
2296 fCtx->nbFilesProcessed,
2297 (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
2298 hr_isize.precision, hr_isize.value, hr_isize.suffix,
2299 hr_osize.precision, hr_osize.value, hr_osize.suffix);
2300 }
2301 }
2302
2303 FIO_freeCResources(&ress);
2304 return error;
2305}
2306
2307#endif /* #ifndef ZSTD_NOCOMPRESS */
2308
2309
2310
2311#ifndef ZSTD_NODECOMPRESS
2312
2313/* **************************************************************************
2314 * Decompression
2315 ***************************************************************************/
2316typedef struct {
2317 FIO_Dict_t dict;
2318 ZSTD_DStream* dctx;
2319 WritePoolCtx_t *writeCtx;
2320 ReadPoolCtx_t *readCtx;
2321} dRess_t;
2322
2323static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
2324{
2325 int useMMap = prefs->mmapDict == ZSTD_ps_enable;
2326 int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
2327 stat_t statbuf;
2328 dRess_t ress;
f535537f 2329 memset(&statbuf, 0, sizeof(statbuf));
648db22b 2330 memset(&ress, 0, sizeof(ress));
2331
2332 FIO_getDictFileStat(dictFileName, &statbuf);
2333
2334 if (prefs->patchFromMode){
2335 U64 const dictSize = UTIL_getFileSizeStat(&statbuf);
2336 useMMap |= dictSize > prefs->memLimit;
2337 FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */);
2338 }
2339
2340 /* Allocation */
2341 ress.dctx = ZSTD_createDStream();
2342 if (ress.dctx==NULL)
2343 EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
2344 CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
2345 CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
2346
2347 /* dictionary */
2348 {
2349 FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
2350 FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType);
2351
2352 CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
2353
2354 if (prefs->patchFromMode){
2355 CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
2356 } else {
2357 CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
2358 }
2359 }
2360
2361 ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
2362 ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
2363 return ress;
2364}
2365
2366static void FIO_freeDResources(dRess_t ress)
2367{
2368 FIO_freeDict(&(ress.dict));
2369 CHECK( ZSTD_freeDStream(ress.dctx) );
2370 AIO_WritePool_free(ress.writeCtx);
2371 AIO_ReadPool_free(ress.readCtx);
2372}
2373
2374/* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
2375 * @return : 0 (no error) */
2376static int FIO_passThrough(dRess_t *ress)
2377{
2378 size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize());
2379 IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2380 AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
2381
2382 while(ress->readCtx->srcBufferLoaded) {
2383 size_t writeSize;
2384 writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
2385 assert(writeSize <= writeJob->bufferSize);
2386 memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize);
2387 writeJob->usedBufferSize = writeSize;
2388 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
2389 AIO_ReadPool_consumeBytes(ress->readCtx, writeSize);
2390 AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
2391 }
2392 assert(ress->readCtx->reachedEof);
2393 AIO_WritePool_releaseIoJob(writeJob);
2394 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
2395 return 0;
2396}
2397
2398/* FIO_zstdErrorHelp() :
2399 * detailed error message when requested window size is too large */
2400static void
2401FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
2402 const dRess_t* ress,
2403 size_t err,
2404 const char* srcFileName)
2405{
2406 ZSTD_frameHeader header;
2407
2408 /* Help message only for one specific error */
2409 if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
2410 return;
2411
2412 /* Try to decode the frame header */
2413 err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded);
2414 if (err == 0) {
2415 unsigned long long const windowSize = header.windowSize;
2416 unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
2417 assert(prefs->memLimit > 0);
2418 DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
2419 srcFileName, windowSize, prefs->memLimit);
2420 if (windowLog <= ZSTD_WINDOWLOG_MAX) {
2421 unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
2422 assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
2423 DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
2424 srcFileName, windowLog, windowMB);
2425 return;
2426 } }
2427 DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
2428 srcFileName, ZSTD_WINDOWLOG_MAX);
2429}
2430
2431/** FIO_decompressFrame() :
2432 * @return : size of decoded zstd frame, or an error code
2433 */
2434#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
2435static unsigned long long
2436FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
2437 const FIO_prefs_t* const prefs,
2438 const char* srcFileName,
2439 U64 alreadyDecoded) /* for multi-frames streams */
2440{
2441 U64 frameSize = 0;
2442 IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2443
f535537f 2444 /* display last 20 characters only when not --verbose */
648db22b 2445 { size_t const srcFileLength = strlen(srcFileName);
f535537f 2446 if ((srcFileLength>20) && (g_display_prefs.displayLevel<3))
2447 srcFileName += srcFileLength-20;
648db22b 2448 }
2449
2450 ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
2451
2452 /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
2453 AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX);
2454
2455 /* Main decompression Loop */
2456 while (1) {
2457 ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 );
2458 ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
2459 size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
2460 UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
2461 if (ZSTD_isError(readSizeHint)) {
2462 DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
2463 srcFileName, ZSTD_getErrorName(readSizeHint));
2464 FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
2465 AIO_WritePool_releaseIoJob(writeJob);
2466 return FIO_ERROR_FRAME_DECODING;
2467 }
2468
2469 /* Write block */
2470 writeJob->usedBufferSize = outBuff.pos;
2471 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
2472 frameSize += outBuff.pos;
2473 if (fCtx->nbFilesTotal > 1) {
2474 size_t srcFileNameSize = strlen(srcFileName);
2475 if (srcFileNameSize > 18) {
2476 const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
2477 DISPLAYUPDATE_PROGRESS(
2478 "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ",
2479 fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
2480 } else {
2481 DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ",
2482 fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
2483 }
2484 } else {
2485 DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ",
2486 srcFileName, hrs.precision, hrs.value, hrs.suffix);
2487 }
2488
2489 AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos);
2490
2491 if (readSizeHint == 0) break; /* end of frame */
2492
2493 /* Fill input buffer */
2494 { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */
2495 if (ress->readCtx->srcBufferLoaded < toDecode) {
2496 size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode);
2497 if (readSize==0) {
2498 DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
2499 srcFileName);
2500 AIO_WritePool_releaseIoJob(writeJob);
2501 return FIO_ERROR_FRAME_DECODING;
2502 }
2503 } } }
2504
2505 AIO_WritePool_releaseIoJob(writeJob);
2506 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
2507
2508 return frameSize;
2509}
2510
2511
2512#ifdef ZSTD_GZDECOMPRESS
2513static unsigned long long
2514FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName)
2515{
2516 unsigned long long outFileSize = 0;
2517 z_stream strm;
2518 int flush = Z_NO_FLUSH;
2519 int decodingError = 0;
2520 IOJob_t *writeJob = NULL;
2521
2522 strm.zalloc = Z_NULL;
2523 strm.zfree = Z_NULL;
2524 strm.opaque = Z_NULL;
2525 strm.next_in = 0;
2526 strm.avail_in = 0;
2527 /* see https://www.zlib.net/manual.html */
2528 if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
2529 return FIO_ERROR_FRAME_DECODING;
2530
2531 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2532 strm.next_out = (Bytef*)writeJob->buffer;
2533 strm.avail_out = (uInt)writeJob->bufferSize;
2534 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
2535 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
2536
2537 for ( ; ; ) {
2538 int ret;
2539 if (strm.avail_in == 0) {
2540 AIO_ReadPool_consumeAndRefill(ress->readCtx);
2541 if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH;
2542 strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
2543 strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
2544 }
2545 ret = inflate(&strm, flush);
2546 if (ret == Z_BUF_ERROR) {
2547 DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
2548 decodingError = 1; break;
2549 }
2550 if (ret != Z_OK && ret != Z_STREAM_END) {
2551 DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
2552 decodingError = 1; break;
2553 }
2554 { size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
2555 if (decompBytes) {
2556 writeJob->usedBufferSize = decompBytes;
2557 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
2558 outFileSize += decompBytes;
2559 strm.next_out = (Bytef*)writeJob->buffer;
2560 strm.avail_out = (uInt)writeJob->bufferSize;
2561 }
2562 }
2563 if (ret == Z_STREAM_END) break;
2564 }
2565
2566 AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
2567
2568 if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
2569 && (decodingError==0) ) {
2570 DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
2571 decodingError = 1;
2572 }
2573 AIO_WritePool_releaseIoJob(writeJob);
2574 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
2575 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2576}
2577#endif
2578
2579#ifdef ZSTD_LZMADECOMPRESS
2580static unsigned long long
2581FIO_decompressLzmaFrame(dRess_t* ress,
2582 const char* srcFileName, int plain_lzma)
2583{
2584 unsigned long long outFileSize = 0;
2585 lzma_stream strm = LZMA_STREAM_INIT;
2586 lzma_action action = LZMA_RUN;
2587 lzma_ret initRet;
2588 int decodingError = 0;
2589 IOJob_t *writeJob = NULL;
2590
2591 strm.next_in = 0;
2592 strm.avail_in = 0;
2593 if (plain_lzma) {
2594 initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
2595 } else {
2596 initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
2597 }
2598
2599 if (initRet != LZMA_OK) {
2600 DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
2601 plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
2602 srcFileName, initRet);
2603 return FIO_ERROR_FRAME_DECODING;
2604 }
2605
2606 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2607 strm.next_out = (BYTE*)writeJob->buffer;
2608 strm.avail_out = writeJob->bufferSize;
2609 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
2610 strm.avail_in = ress->readCtx->srcBufferLoaded;
2611
2612 for ( ; ; ) {
2613 lzma_ret ret;
2614 if (strm.avail_in == 0) {
2615 AIO_ReadPool_consumeAndRefill(ress->readCtx);
2616 if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
2617 strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
2618 strm.avail_in = ress->readCtx->srcBufferLoaded;
2619 }
2620 ret = lzma_code(&strm, action);
2621
2622 if (ret == LZMA_BUF_ERROR) {
2623 DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
2624 decodingError = 1; break;
2625 }
2626 if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
2627 DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
2628 srcFileName, ret);
2629 decodingError = 1; break;
2630 }
2631 { size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
2632 if (decompBytes) {
2633 writeJob->usedBufferSize = decompBytes;
2634 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
2635 outFileSize += decompBytes;
2636 strm.next_out = (BYTE*)writeJob->buffer;
2637 strm.avail_out = writeJob->bufferSize;
2638 } }
2639 if (ret == LZMA_STREAM_END) break;
2640 }
2641
2642 AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
2643 lzma_end(&strm);
2644 AIO_WritePool_releaseIoJob(writeJob);
2645 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
2646 return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
2647}
2648#endif
2649
2650#ifdef ZSTD_LZ4DECOMPRESS
2651static unsigned long long
2652FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName)
2653{
2654 unsigned long long filesize = 0;
2655 LZ4F_errorCode_t nextToLoad = 4;
2656 LZ4F_decompressionContext_t dCtx;
2657 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
2658 int decodingError = 0;
2659 IOJob_t *writeJob = NULL;
2660
2661 if (LZ4F_isError(errorCode)) {
2662 DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
2663 return FIO_ERROR_FRAME_DECODING;
2664 }
2665
2666 writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
2667
2668 /* Main Loop */
2669 for (;nextToLoad;) {
2670 size_t pos = 0;
2671 size_t decodedBytes = writeJob->bufferSize;
2672 int fullBufferDecoded = 0;
2673
2674 /* Read input */
2675 AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad);
2676 if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */
2677
2678 while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */
2679 /* Decode Input (at least partially) */
2680 size_t remaining = ress->readCtx->srcBufferLoaded - pos;
2681 decodedBytes = writeJob->bufferSize;
2682 nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos,
2683 &remaining, NULL);
2684 if (LZ4F_isError(nextToLoad)) {
2685 DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
2686 srcFileName, LZ4F_getErrorName(nextToLoad));
2687 decodingError = 1; nextToLoad = 0; break;
2688 }
2689 pos += remaining;
2690 assert(pos <= ress->readCtx->srcBufferLoaded);
2691 fullBufferDecoded = decodedBytes == writeJob->bufferSize;
2692
2693 /* Write Block */
2694 if (decodedBytes) {
2695 UTIL_HumanReadableSize_t hrs;
2696 writeJob->usedBufferSize = decodedBytes;
2697 AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
2698 filesize += decodedBytes;
2699 hrs = UTIL_makeHumanReadableSize(filesize);
2700 DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix);
2701 }
2702
2703 if (!nextToLoad) break;
2704 }
2705 AIO_ReadPool_consumeBytes(ress->readCtx, pos);
2706 }
2707 if (nextToLoad!=0) {
2708 DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
2709 decodingError=1;
2710 }
2711
2712 LZ4F_freeDecompressionContext(dCtx);
2713 AIO_WritePool_releaseIoJob(writeJob);
2714 AIO_WritePool_sparseWriteEnd(ress->writeCtx);
2715
2716 return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
2717}
2718#endif
2719
2720
2721
2722/** FIO_decompressFrames() :
2723 * Find and decode frames inside srcFile
2724 * srcFile presumed opened and valid
2725 * @return : 0 : OK
2726 * 1 : error
2727 */
2728static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
2729 dRess_t ress, const FIO_prefs_t* const prefs,
2730 const char* dstFileName, const char* srcFileName)
2731{
2732 unsigned readSomething = 0;
2733 unsigned long long filesize = 0;
2734 int passThrough = prefs->passThrough;
2735
2736 if (passThrough == -1) {
2737 /* If pass-through mode is not explicitly enabled or disabled,
2738 * default to the legacy behavior of enabling it if we are writing
2739 * to stdout with the overwrite flag enabled.
2740 */
2741 passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark);
2742 }
2743 assert(passThrough == 0 || passThrough == 1);
2744
2745 /* for each frame */
2746 for ( ; ; ) {
2747 /* check magic number -> version */
2748 size_t const toRead = 4;
2749 const BYTE* buf;
2750 AIO_ReadPool_fillBuffer(ress.readCtx, toRead);
2751 buf = (const BYTE*)ress.readCtx->srcBuffer;
2752 if (ress.readCtx->srcBufferLoaded==0) {
2753 if (readSomething==0) { /* srcFile is empty (which is invalid) */
2754 DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
2755 return 1;
2756 } /* else, just reached frame boundary */
2757 break; /* no more input */
2758 }
2759 readSomething = 1; /* there is at least 1 byte in srcFile */
2760 if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */
2761 if (passThrough) {
2762 return FIO_passThrough(&ress);
2763 }
2764 DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
2765 return 1;
2766 }
2767 if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) {
2768 unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize);
2769 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2770 filesize += frameSize;
2771 } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
2772#ifdef ZSTD_GZDECOMPRESS
2773 unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName);
2774 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2775 filesize += frameSize;
2776#else
2777 DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
2778 return 1;
2779#endif
2780 } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
2781 || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
2782#ifdef ZSTD_LZMADECOMPRESS
2783 unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD);
2784 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2785 filesize += frameSize;
2786#else
2787 DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
2788 return 1;
2789#endif
2790 } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
2791#ifdef ZSTD_LZ4DECOMPRESS
2792 unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName);
2793 if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
2794 filesize += frameSize;
2795#else
2796 DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
2797 return 1;
2798#endif
2799 } else if (passThrough) {
2800 return FIO_passThrough(&ress);
2801 } else {
2802 DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
2803 return 1;
2804 } } /* for each frame */
2805
2806 /* Final Status */
2807 fCtx->totalBytesOutput += (size_t)filesize;
2808 DISPLAY_PROGRESS("\r%79s\r", "");
2809 if (FIO_shouldDisplayFileSummary(fCtx))
2810 DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize);
2811
2812 return 0;
2813}
2814
2815/** FIO_decompressDstFile() :
2816 open `dstFileName`, or pass-through if writeCtx's file is already != 0,
2817 then start decompression process (FIO_decompressFrames()).
2818 @return : 0 : OK
2819 1 : operation aborted
2820*/
2821static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
2822 FIO_prefs_t* const prefs,
2823 dRess_t ress,
2824 const char* dstFileName,
2825 const char* srcFileName,
2826 const stat_t* srcFileStat)
2827{
2828 int result;
2829 int releaseDstFile = 0;
2830 int transferStat = 0;
2831 int dstFd = 0;
2832
2833 if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) {
2834 FILE *dstFile;
2835 int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
2836 if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
2837 && strcmp(dstFileName, stdoutmark)
2838 && UTIL_isRegularFileStat(srcFileStat) ) {
2839 transferStat = 1;
2840 dstFilePermissions = TEMPORARY_FILE_PERMISSIONS;
2841 }
2842
2843 releaseDstFile = 1;
2844
2845 dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
2846 if (dstFile==NULL) return 1;
2847 dstFd = fileno(dstFile);
2848 AIO_WritePool_setFile(ress.writeCtx, dstFile);
2849
2850 /* Must only be added after FIO_openDstFile() succeeds.
2851 * Otherwise we may delete the destination file if it already exists,
2852 * and the user presses Ctrl-C when asked if they wish to overwrite.
2853 */
2854 addHandler(dstFileName);
2855 }
2856
2857 result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName);
2858
2859 if (releaseDstFile) {
2860 clearHandler();
2861
2862 if (transferStat) {
2863 UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
2864 }
2865
2866 if (AIO_WritePool_closeFile(ress.writeCtx)) {
2867 DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
2868 result = 1;
2869 }
2870
2871 if (transferStat) {
2872 UTIL_utime(dstFileName, srcFileStat);
2873 }
2874
2875 if ( (result != 0) /* operation failure */
2876 && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
2877 ) {
2878 FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
2879 }
2880 }
2881
2882 return result;
2883}
2884
2885
2886/** FIO_decompressSrcFile() :
2887 Open `srcFileName`, transfer control to decompressDstFile()
2888 @return : 0 : OK
2889 1 : error
2890*/
2891static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
2892{
2893 FILE* srcFile;
2894 stat_t srcFileStat;
2895 int result;
2896 U64 fileSize = UTIL_FILESIZE_UNKNOWN;
2897
2898 if (UTIL_isDirectory(srcFileName)) {
2899 DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
2900 return 1;
2901 }
2902
2903 srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
2904 if (srcFile==NULL) return 1;
2905
2906 /* Don't use AsyncIO for small files */
2907 if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
2908 fileSize = UTIL_getFileSizeStat(&srcFileStat);
2909 if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
2910 AIO_ReadPool_setAsync(ress.readCtx, 0);
2911 AIO_WritePool_setAsync(ress.writeCtx, 0);
2912 } else {
2913 AIO_ReadPool_setAsync(ress.readCtx, 1);
2914 AIO_WritePool_setAsync(ress.writeCtx, 1);
2915 }
2916
2917 AIO_ReadPool_setFile(ress.readCtx, srcFile);
2918
2919 result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat);
2920
2921 AIO_ReadPool_setFile(ress.readCtx, NULL);
2922
2923 /* Close file */
2924 if (fclose(srcFile)) {
2925 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
2926 return 1;
2927 }
2928 if ( prefs->removeSrcFile /* --rm */
2929 && (result==0) /* decompression successful */
2930 && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
2931 /* We must clear the handler, since after this point calling it would
2932 * delete both the source and destination files.
2933 */
2934 clearHandler();
2935 if (FIO_removeFile(srcFileName)) {
2936 /* failed to remove src file */
2937 DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
2938 return 1;
2939 } }
2940 return result;
2941}
2942
2943
2944
2945int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
2946 const char* dstFileName, const char* srcFileName,
2947 const char* dictFileName)
2948{
2949 dRess_t const ress = FIO_createDResources(prefs, dictFileName);
2950
2951 int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
2952
2953
2954
2955 FIO_freeDResources(ress);
2956 return decodingError;
2957}
2958
2959static const char *suffixList[] = {
2960 ZSTD_EXTENSION,
2961 TZSTD_EXTENSION,
2962#ifndef ZSTD_NODECOMPRESS
2963 ZSTD_ALT_EXTENSION,
2964#endif
2965#ifdef ZSTD_GZDECOMPRESS
2966 GZ_EXTENSION,
2967 TGZ_EXTENSION,
2968#endif
2969#ifdef ZSTD_LZMADECOMPRESS
2970 LZMA_EXTENSION,
2971 XZ_EXTENSION,
2972 TXZ_EXTENSION,
2973#endif
2974#ifdef ZSTD_LZ4DECOMPRESS
2975 LZ4_EXTENSION,
2976 TLZ4_EXTENSION,
2977#endif
2978 NULL
2979};
2980
2981static const char *suffixListStr =
2982 ZSTD_EXTENSION "/" TZSTD_EXTENSION
2983#ifdef ZSTD_GZDECOMPRESS
2984 "/" GZ_EXTENSION "/" TGZ_EXTENSION
2985#endif
2986#ifdef ZSTD_LZMADECOMPRESS
2987 "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
2988#endif
2989#ifdef ZSTD_LZ4DECOMPRESS
2990 "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
2991#endif
2992;
2993
2994/* FIO_determineDstName() :
2995 * create a destination filename from a srcFileName.
2996 * @return a pointer to it.
2997 * @return == NULL if there is an error */
2998static const char*
2999FIO_determineDstName(const char* srcFileName, const char* outDirName)
3000{
3001 static size_t dfnbCapacity = 0;
3002 static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
3003 size_t dstFileNameEndPos;
3004 char* outDirFilename = NULL;
3005 const char* dstSuffix = "";
3006 size_t dstSuffixLen = 0;
3007
3008 size_t sfnSize = strlen(srcFileName);
3009
3010 size_t srcSuffixLen;
3011 const char* const srcSuffix = strrchr(srcFileName, '.');
3012
3013 if(!strcmp(srcFileName, stdinmark)) {
3014 return stdoutmark;
3015 }
3016
3017 if (srcSuffix == NULL) {
3018 DISPLAYLEVEL(1,
3019 "zstd: %s: unknown suffix (%s expected). "
3020 "Can't derive the output file name. "
3021 "Specify it with -o dstFileName. Ignoring.\n",
3022 srcFileName, suffixListStr);
3023 return NULL;
3024 }
3025 srcSuffixLen = strlen(srcSuffix);
3026
3027 {
3028 const char** matchedSuffixPtr;
3029 for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
3030 if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
3031 break;
3032 }
3033 }
3034
3035 /* check suffix is authorized */
3036 if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
3037 DISPLAYLEVEL(1,
3038 "zstd: %s: unknown suffix (%s expected). "
3039 "Can't derive the output file name. "
3040 "Specify it with -o dstFileName. Ignoring.\n",
3041 srcFileName, suffixListStr);
3042 return NULL;
3043 }
3044
3045 if ((*matchedSuffixPtr)[1] == 't') {
3046 dstSuffix = ".tar";
3047 dstSuffixLen = strlen(dstSuffix);
3048 }
3049 }
3050
3051 if (outDirName) {
3052 outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
3053 sfnSize = strlen(outDirFilename);
3054 assert(outDirFilename != NULL);
3055 }
3056
3057 if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
3058 /* allocate enough space to write dstFilename into it */
3059 free(dstFileNameBuffer);
3060 dfnbCapacity = sfnSize + 20;
3061 dstFileNameBuffer = (char*)malloc(dfnbCapacity);
3062 if (dstFileNameBuffer==NULL)
3063 EXM_THROW(74, "%s : not enough memory for dstFileName",
3064 strerror(errno));
3065 }
3066
3067 /* return dst name == src name truncated from suffix */
3068 assert(dstFileNameBuffer != NULL);
3069 dstFileNameEndPos = sfnSize - srcSuffixLen;
3070 if (outDirFilename) {
3071 memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
3072 free(outDirFilename);
3073 } else {
3074 memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
3075 }
3076
3077 /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
3078 * extension on decompression. Also writes terminating null. */
3079 strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
3080 return dstFileNameBuffer;
3081
3082 /* note : dstFileNameBuffer memory is not going to be free */
3083}
3084
3085int
3086FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
3087 FIO_prefs_t* const prefs,
3088 const char** srcNamesTable,
3089 const char* outMirroredRootDirName,
3090 const char* outDirName, const char* outFileName,
3091 const char* dictFileName)
3092{
3093 int status;
3094 int error = 0;
3095 dRess_t ress = FIO_createDResources(prefs, dictFileName);
3096
3097 if (outFileName) {
3098 if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
3099 FIO_freeDResources(ress);
3100 return 1;
3101 }
3102 if (!prefs->testMode) {
3103 FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
3104 if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
3105 AIO_WritePool_setFile(ress.writeCtx, dstFile);
3106 }
3107 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
3108 status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
3109 if (!status) fCtx->nbFilesProcessed++;
3110 error |= status;
3111 }
3112 if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx)))
3113 EXM_THROW(72, "Write error : %s : cannot properly close output file",
3114 strerror(errno));
3115 } else {
3116 if (outMirroredRootDirName)
3117 UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
3118
3119 for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */
3120 const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
3121 const char* dstFileName = NULL;
3122 if (outMirroredRootDirName) {
3123 char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
3124 if (validMirroredDirName) {
3125 dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
3126 free(validMirroredDirName);
3127 } else {
3128 DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
3129 }
3130 } else {
3131 dstFileName = FIO_determineDstName(srcFileName, outDirName);
3132 }
3133 if (dstFileName == NULL) { error=1; continue; }
3134 status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
3135 if (!status) fCtx->nbFilesProcessed++;
3136 error |= status;
3137 }
3138 if (outDirName)
3139 FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
3140 }
3141
3142 if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
3143 DISPLAY_PROGRESS("\r%79s\r", "");
3144 DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n",
3145 fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput);
3146 }
3147
3148 FIO_freeDResources(ress);
3149 return error;
3150}
3151
3152/* **************************************************************************
3153 * .zst file info (--list command)
3154 ***************************************************************************/
3155
3156typedef struct {
3157 U64 decompressedSize;
3158 U64 compressedSize;
3159 U64 windowSize;
3160 int numActualFrames;
3161 int numSkippableFrames;
3162 int decompUnavailable;
3163 int usesCheck;
3164 BYTE checksum[4];
3165 U32 nbFiles;
3166 unsigned dictID;
3167} fileInfo_t;
3168
3169typedef enum {
3170 info_success=0,
3171 info_frame_error=1,
3172 info_not_zstd=2,
3173 info_file_error=3,
3174 info_truncated_input=4
3175} InfoError;
3176
3177#define ERROR_IF(c,n,...) { \
3178 if (c) { \
3179 DISPLAYLEVEL(1, __VA_ARGS__); \
3180 DISPLAYLEVEL(1, " \n"); \
3181 return n; \
3182 } \
3183}
3184
3185static InfoError
3186FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
3187{
3188 /* begin analyzing frame */
3189 for ( ; ; ) {
3190 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
3191 size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
3192 if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
3193 if ( feof(srcFile)
3194 && (numBytesRead == 0)
3195 && (info->compressedSize > 0)
3196 && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
3197 unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
3198 unsigned long long file_size = (unsigned long long) info->compressedSize;
3199 ERROR_IF(file_position != file_size, info_truncated_input,
3200 "Error: seeked to position %llu, which is beyond file size of %llu\n",
3201 file_position,
3202 file_size);
3203 break; /* correct end of file => success */
3204 }
3205 ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
3206 ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
3207 }
3208 { U32 const magicNumber = MEM_readLE32(headerBuffer);
3209 /* Zstandard frame */
3210 if (magicNumber == ZSTD_MAGICNUMBER) {
3211 ZSTD_frameHeader header;
3212 U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
3213 if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
3214 || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
3215 info->decompUnavailable = 1;
3216 } else {
3217 info->decompressedSize += frameContentSize;
3218 }
3219 ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
3220 info_frame_error, "Error: could not decode frame header");
3221 if (info->dictID != 0 && info->dictID != header.dictID) {
3222 DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead");
3223 info->dictID = 0;
3224 } else {
3225 info->dictID = header.dictID;
3226 }
3227 info->windowSize = header.windowSize;
3228 /* move to the end of the frame header */
3229 { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
3230 ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
3231 ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
3232 info_frame_error, "Error: could not move to end of frame header");
3233 }
3234
3235 /* skip all blocks in the frame */
3236 { int lastBlock = 0;
3237 do {
3238 BYTE blockHeaderBuffer[3];
3239 ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
3240 info_frame_error, "Error while reading block header");
3241 { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
3242 U32 const blockTypeID = (blockHeader >> 1) & 3;
3243 U32 const isRLE = (blockTypeID == 1);
3244 U32 const isWrongBlock = (blockTypeID == 3);
3245 long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
3246 ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
3247 lastBlock = blockHeader & 1;
3248 ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
3249 info_frame_error, "Error: could not skip to end of block");
3250 }
3251 } while (lastBlock != 1);
3252 }
3253
3254 /* check if checksum is used */
3255 { BYTE const frameHeaderDescriptor = headerBuffer[4];
3256 int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
3257 if (contentChecksumFlag) {
3258 info->usesCheck = 1;
3259 ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4,
3260 info_frame_error, "Error: could not read checksum");
3261 } }
3262 info->numActualFrames++;
3263 }
3264 /* Skippable frame */
3265 else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
3266 U32 const frameSize = MEM_readLE32(headerBuffer + 4);
3267 long const seek = (long)(8 + frameSize - numBytesRead);
3268 ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
3269 info_frame_error, "Error: could not find end of skippable frame");
3270 info->numSkippableFrames++;
3271 }
3272 /* unknown content */
3273 else {
3274 return info_not_zstd;
3275 }
3276 } /* magic number analysis */
3277 } /* end analyzing frames */
3278 return info_success;
3279}
3280
3281
3282static InfoError
3283getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
3284{
3285 InfoError status;
3286 stat_t srcFileStat;
3287 FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat);
3288 ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
3289
3290 info->compressedSize = UTIL_getFileSizeStat(&srcFileStat);
3291 status = FIO_analyzeFrames(info, srcFile);
3292
3293 fclose(srcFile);
3294 info->nbFiles = 1;
3295 return status;
3296}
3297
3298
3299/** getFileInfo() :
3300 * Reads information from file, stores in *info
3301 * @return : InfoError status
3302 */
3303static InfoError
3304getFileInfo(fileInfo_t* info, const char* srcFileName)
3305{
3306 ERROR_IF(!UTIL_isRegularFile(srcFileName),
3307 info_file_error, "Error : %s is not a file", srcFileName);
3308 return getFileInfo_fileConfirmed(info, srcFileName);
3309}
3310
3311
3312static void
3313displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
3314{
3315 UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize);
3316 UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize);
3317 UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize);
3318 double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
3319 const char* const checkString = (info->usesCheck ? "XXH64" : "None");
3320 if (displayLevel <= 2) {
3321 if (!info->decompUnavailable) {
3322 DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n",
3323 info->numSkippableFrames + info->numActualFrames,
3324 info->numSkippableFrames,
3325 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3326 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
3327 ratio, checkString, inFileName);
3328 } else {
3329 DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n",
3330 info->numSkippableFrames + info->numActualFrames,
3331 info->numSkippableFrames,
3332 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3333 checkString, inFileName);
3334 }
3335 } else {
3336 DISPLAYOUT("%s \n", inFileName);
3337 DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
3338 if (info->numSkippableFrames)
3339 DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
3340 DISPLAYOUT("DictID: %u\n", info->dictID);
3341 DISPLAYOUT("Window Size: %.*f%s (%llu B)\n",
3342 window_hrs.precision, window_hrs.value, window_hrs.suffix,
3343 (unsigned long long)info->windowSize);
3344 DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n",
3345 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3346 (unsigned long long)info->compressedSize);
3347 if (!info->decompUnavailable) {
3348 DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n",
3349 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
3350 (unsigned long long)info->decompressedSize);
3351 DISPLAYOUT("Ratio: %.4f\n", ratio);
3352 }
3353
3354 if (info->usesCheck && info->numActualFrames == 1) {
3355 DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString,
3356 info->checksum[3], info->checksum[2],
3357 info->checksum[1], info->checksum[0]
3358 );
3359 } else {
3360 DISPLAYOUT("Check: %s\n", checkString);
3361 }
3362
3363 DISPLAYOUT("\n");
3364 }
3365}
3366
3367static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
3368{
3369 fileInfo_t total;
3370 memset(&total, 0, sizeof(total));
3371 total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
3372 total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
3373 total.compressedSize = fi1.compressedSize + fi2.compressedSize;
3374 total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
3375 total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
3376 total.usesCheck = fi1.usesCheck & fi2.usesCheck;
3377 total.nbFiles = fi1.nbFiles + fi2.nbFiles;
3378 return total;
3379}
3380
3381static int
3382FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
3383{
3384 fileInfo_t info;
3385 memset(&info, 0, sizeof(info));
3386 { InfoError const error = getFileInfo(&info, inFileName);
3387 switch (error) {
3388 case info_frame_error:
3389 /* display error, but provide output */
3390 DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
3391 break;
3392 case info_not_zstd:
3393 DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
3394 if (displayLevel > 2) DISPLAYOUT("\n");
3395 return 1;
3396 case info_file_error:
3397 /* error occurred while opening the file */
3398 if (displayLevel > 2) DISPLAYOUT("\n");
3399 return 1;
3400 case info_truncated_input:
3401 DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
3402 if (displayLevel > 2) DISPLAYOUT("\n");
3403 return 1;
3404 case info_success:
3405 default:
3406 break;
3407 }
3408
3409 displayInfo(inFileName, &info, displayLevel);
3410 *total = FIO_addFInfo(*total, info);
3411 assert(error == info_success || error == info_frame_error);
3412 return (int)error;
3413 }
3414}
3415
3416int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
3417{
3418 /* ensure no specified input is stdin (needs fseek() capability) */
3419 { unsigned u;
3420 for (u=0; u<numFiles;u++) {
3421 ERROR_IF(!strcmp (filenameTable[u], stdinmark),
3422 1, "zstd: --list does not support reading from standard input");
3423 } }
3424
3425 if (numFiles == 0) {
3426 if (!UTIL_isConsole(stdin)) {
3427 DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
3428 }
3429 DISPLAYLEVEL(1, "No files given \n");
3430 return 1;
3431 }
3432
3433 if (displayLevel <= 2) {
3434 DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
3435 }
3436 { int error = 0;
3437 fileInfo_t total;
3438 memset(&total, 0, sizeof(total));
3439 total.usesCheck = 1;
3440 /* --list each file, and check for any error */
3441 { unsigned u;
3442 for (u=0; u<numFiles;u++) {
3443 error |= FIO_listFile(&total, filenameTable[u], displayLevel);
3444 } }
3445 if (numFiles > 1 && displayLevel <= 2) { /* display total */
3446 UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize);
3447 UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize);
3448 double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
3449 const char* const checkString = (total.usesCheck ? "XXH64" : "");
3450 DISPLAYOUT("----------------------------------------------------------------- \n");
3451 if (total.decompUnavailable) {
3452 DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n",
3453 total.numSkippableFrames + total.numActualFrames,
3454 total.numSkippableFrames,
3455 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3456 checkString, (unsigned)total.nbFiles);
3457 } else {
3458 DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n",
3459 total.numSkippableFrames + total.numActualFrames,
3460 total.numSkippableFrames,
3461 compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
3462 decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
3463 ratio, checkString, (unsigned)total.nbFiles);
3464 } }
3465 return error;
3466 }
3467}
3468
3469
3470#endif /* #ifndef ZSTD_NODECOMPRESS */