X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=deps%2Flibchdr%2Fdeps%2Fzstd-1.5.5%2Fprograms%2Ffileio.c;fp=deps%2Flibchdr%2Fdeps%2Fzstd-1.5.5%2Fprograms%2Ffileio.c;h=84a0f48f7821e1e36209634a06d1f7e7eb293e99;hb=648db22b0750712da893c306efcc8e4b2d3a4e3c;hp=0000000000000000000000000000000000000000;hpb=e2fb1389dc12376acb84e4993ed3b08760257252;p=pcsx_rearmed.git diff --git a/deps/libchdr/deps/zstd-1.5.5/programs/fileio.c b/deps/libchdr/deps/zstd-1.5.5/programs/fileio.c new file mode 100644 index 00000000..84a0f48f --- /dev/null +++ b/deps/libchdr/deps/zstd-1.5.5/programs/fileio.c @@ -0,0 +1,3364 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ************************************* +* Compiler Options +***************************************/ +#ifdef _MSC_VER /* Visual */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* non-constant aggregate initializer */ +#endif +#if defined(__MINGW32__) && !defined(_POSIX_SOURCE) +# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ +#endif + +/*-************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support, SET_BINARY_MODE */ +#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */ +#include /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */ +#include /* malloc, free */ +#include /* strcmp, strlen */ +#include /* clock_t, to measure process time */ +#include /* O_WRONLY */ +#include +#include /* errno */ +#include /* INT_MAX */ +#include +#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ + +#if defined (_MSC_VER) +# include +# include +#endif + +#include "fileio.h" +#include "fileio_asyncio.h" +#include "fileio_common.h" + +FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto}; +UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ +#include "../lib/zstd.h" +#include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ + +#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) +# include +# if !defined(z_const) +# define z_const +# endif +#endif + +#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) +# include +#endif + +#define LZ4_MAGICNUMBER 0x184D2204 +#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) +# define LZ4F_ENABLE_OBSOLETE_ENUMS +# include +# include +#endif + +char const* FIO_zlibVersion(void) +{ +#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) + return zlibVersion(); +#else + return "Unsupported"; +#endif +} + +char const* FIO_lz4Version(void) +{ +#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) + /* LZ4_versionString() added in v1.7.3 */ +# if LZ4_VERSION_NUMBER >= 10703 + return LZ4_versionString(); +# else +# define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +# define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION) + return ZSTD_LZ4_VERSION_STRING; +# endif +#else + return "Unsupported"; +#endif +} + +char const* FIO_lzmaVersion(void) +{ +#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) + return lzma_version_string(); +#else + return "Unsupported"; +#endif +} + + +/*-************************************* +* Constants +***************************************/ +#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */ +#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ + +#define FNSPACE 30 + +/* Default file permissions 0666 (modulated by umask) */ +/* Temporary restricted file permissions are used when we're going to + * chmod/chown at the end of the operation. */ +#if !defined(_WIN32) +/* These macros aren't defined on windows. */ +#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) +#define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR) +#else +#define DEFAULT_FILE_PERMISSIONS (0666) +#define TEMPORARY_FILE_PERMISSIONS (0600) +#endif + +/*-************************************ +* Signal (Ctrl-C trapping) +**************************************/ +static const char* g_artefact = NULL; +static void INThandler(int sig) +{ + assert(sig==SIGINT); (void)sig; +#if !defined(_MSC_VER) + signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ +#endif + if (g_artefact) { + assert(UTIL_isRegularFile(g_artefact)); + remove(g_artefact); + } + DISPLAY("\n"); + exit(2); +} +static void addHandler(char const* dstFileName) +{ + if (UTIL_isRegularFile(dstFileName)) { + g_artefact = dstFileName; + signal(SIGINT, INThandler); + } else { + g_artefact = NULL; + } +} +/* Idempotent */ +static void clearHandler(void) +{ + if (g_artefact) signal(SIGINT, SIG_DFL); + g_artefact = NULL; +} + + +/*-********************************************************* +* Termination signal trapping (Print debug stack trace) +***********************************************************/ +#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */ +# if (__has_feature(address_sanitizer)) +# define BACKTRACE_ENABLE 0 +# endif /* __has_feature(address_sanitizer) */ +#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */ +# define BACKTRACE_ENABLE 0 +#endif + +#if !defined(BACKTRACE_ENABLE) +/* automatic detector : backtrace enabled by default on linux+glibc and osx */ +# if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \ + || (defined(__APPLE__) && defined(__MACH__)) +# define BACKTRACE_ENABLE 1 +# else +# define BACKTRACE_ENABLE 0 +# endif +#endif + +/* note : after this point, BACKTRACE_ENABLE is necessarily defined */ + + +#if BACKTRACE_ENABLE + +#include /* backtrace, backtrace_symbols */ + +#define MAX_STACK_FRAMES 50 + +static void ABRThandler(int sig) { + const char* name; + void* addrlist[MAX_STACK_FRAMES]; + char** symbollist; + int addrlen, i; + + switch (sig) { + case SIGABRT: name = "SIGABRT"; break; + case SIGFPE: name = "SIGFPE"; break; + case SIGILL: name = "SIGILL"; break; + case SIGINT: name = "SIGINT"; break; + case SIGSEGV: name = "SIGSEGV"; break; + default: name = "UNKNOWN"; + } + + DISPLAY("Caught %s signal, printing stack:\n", name); + /* Retrieve current stack addresses. */ + addrlen = backtrace(addrlist, MAX_STACK_FRAMES); + if (addrlen == 0) { + DISPLAY("\n"); + return; + } + /* Create readable strings to each frame. */ + symbollist = backtrace_symbols(addrlist, addrlen); + /* Print the stack trace, excluding calls handling the signal. */ + for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) { + DISPLAY("%s\n", symbollist[i]); + } + free(symbollist); + /* Reset and raise the signal so default handler runs. */ + signal(sig, SIG_DFL); + raise(sig); +} +#endif + +void FIO_addAbortHandler(void) +{ +#if BACKTRACE_ENABLE + signal(SIGABRT, ABRThandler); + signal(SIGFPE, ABRThandler); + signal(SIGILL, ABRThandler); + signal(SIGSEGV, ABRThandler); + signal(SIGBUS, ABRThandler); +#endif +} + +/*-************************************* +* Parameters: FIO_ctx_t +***************************************/ + +/* typedef'd to FIO_ctx_t within fileio.h */ +struct FIO_ctx_s { + + /* file i/o info */ + int nbFilesTotal; + int hasStdinInput; + int hasStdoutOutput; + + /* file i/o state */ + int currFileIdx; + int nbFilesProcessed; + size_t totalBytesInput; + size_t totalBytesOutput; +}; + +static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx) +{ + return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3; +} + +static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx) +{ + int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1); + assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0); + return shouldDisplay; +} + + +/*-************************************* +* Parameters: Initialization +***************************************/ + +#define FIO_OVERLAP_LOG_NOTSET 9999 +#define FIO_LDM_PARAM_NOTSET 9999 + + +FIO_prefs_t* FIO_createPreferences(void) +{ + FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t)); + if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); + + ret->compressionType = FIO_zstdCompression; + ret->overwrite = 0; + ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT; + ret->dictIDFlag = 1; + ret->checksumFlag = 1; + ret->removeSrcFile = 0; + ret->memLimit = 0; + ret->nbWorkers = 1; + ret->blockSize = 0; + ret->overlapLog = FIO_OVERLAP_LOG_NOTSET; + ret->adaptiveMode = 0; + ret->rsyncable = 0; + ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */ + ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */ + ret->ldmFlag = 0; + ret->ldmHashLog = 0; + ret->ldmMinMatch = 0; + ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; + ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->streamSrcSize = 0; + ret->targetCBlockSize = 0; + ret->srcSizeHint = 0; + ret->testMode = 0; + ret->literalCompressionMode = ZSTD_ps_auto; + ret->excludeCompressedFiles = 0; + ret->allowBlockDevices = 0; + ret->asyncIO = AIO_supported(); + ret->passThrough = -1; + return ret; +} + +FIO_ctx_t* FIO_createContext(void) +{ + FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t)); + if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); + + ret->currFileIdx = 0; + ret->hasStdinInput = 0; + ret->hasStdoutOutput = 0; + ret->nbFilesTotal = 1; + ret->nbFilesProcessed = 0; + ret->totalBytesInput = 0; + ret->totalBytesOutput = 0; + return ret; +} + +void FIO_freePreferences(FIO_prefs_t* const prefs) +{ + free(prefs); +} + +void FIO_freeContext(FIO_ctx_t* const fCtx) +{ + free(fCtx); +} + + +/*-************************************* +* Parameters: Display Options +***************************************/ + +void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; } + +void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; } + + +/*-************************************* +* Parameters: Setters +***************************************/ + +/* FIO_prefs_t functions */ + +void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; } + +void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } + +void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; } + +void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; } + +void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; } + +void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); } + +void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; } + +void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { +#ifndef ZSTD_MULTITHREAD + if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); +#endif + prefs->nbWorkers = nbWorkers; +} + +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } + +void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; } + +void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { + if (blockSize && prefs->nbWorkers==0) + DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); + prefs->blockSize = blockSize; +} + +void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){ + if (overlapLog && prefs->nbWorkers==0) + DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); + prefs->overlapLog = overlapLog; +} + +void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) { + if ((adapt>0) && (prefs->nbWorkers==0)) + EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); + prefs->adaptiveMode = adapt; +} + +void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) { + prefs->useRowMatchFinder = useRowMatchFinder; +} + +void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { + if ((rsyncable>0) && (prefs->nbWorkers==0)) + EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n"); + prefs->rsyncable = rsyncable; +} + +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { + prefs->streamSrcSize = streamSrcSize; +} + +void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { + prefs->targetCBlockSize = targetCBlockSize; +} + +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) { + prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint); +} + +void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) { + prefs->testMode = (testMode!=0); +} + +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_paramSwitch_e mode) { + prefs->literalCompressionMode = mode; +} + +void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) +{ +#ifndef ZSTD_NOCOMPRESS + assert(minCLevel >= ZSTD_minCLevel()); +#endif + prefs->minAdaptLevel = minCLevel; +} + +void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel) +{ + prefs->maxAdaptLevel = maxCLevel; +} + +void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) { + prefs->ldmFlag = (ldmFlag>0); +} + +void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) { + prefs->ldmHashLog = ldmHashLog; +} + +void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) { + prefs->ldmMinMatch = ldmMinMatch; +} + +void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) { + prefs->ldmBucketSizeLog = ldmBucketSizeLog; +} + + +void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) { + prefs->ldmHashRateLog = ldmHashRateLog; +} + +void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value) +{ + prefs->patchFromMode = value != 0; +} + +void FIO_setContentSize(FIO_prefs_t* const prefs, int value) +{ + prefs->contentSize = value != 0; +} + +void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) { +#ifdef ZSTD_MULTITHREAD + prefs->asyncIO = value; +#else + (void) prefs; + (void) value; + DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n"); +#endif +} + +void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) { + prefs->passThrough = (value != 0); +} + +void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value) +{ + prefs->mmapDict = value; +} + +/* FIO_ctx_t functions */ + +void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { + fCtx->hasStdoutOutput = value; +} + +void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value) +{ + fCtx->nbFilesTotal = value; +} + +void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) { + size_t i = 0; + for ( ; i < filenames->tableSize; ++i) { + if (!strcmp(stdinmark, filenames->fileNames[i])) { + fCtx->hasStdinInput = 1; + return; + } + } +} + +/*-************************************* +* Functions +***************************************/ +/** FIO_removeFile() : + * @result : Unlink `fileName`, even if it's read-only */ +static int FIO_removeFile(const char* path) +{ + stat_t statbuf; + if (!UTIL_stat(path, &statbuf)) { + DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path); + return 0; + } + if (!UTIL_isRegularFileStat(&statbuf)) { + DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path); + return 0; + } +#if defined(_WIN32) || defined(WIN32) + /* windows doesn't allow remove read-only files, + * so try to make it writable first */ + if (!(statbuf.st_mode & _S_IWRITE)) { + UTIL_chmod(path, &statbuf, _S_IWRITE); + } +#endif + return remove(path); +} + +/** FIO_openSrcFile() : + * condition : `srcFileName` must be non-NULL. `prefs` may be NULL. + * @result : FILE* to `srcFileName`, or NULL if it fails */ +static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf) +{ + int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0; + assert(srcFileName != NULL); + assert(statbuf != NULL); + if (!strcmp (srcFileName, stdinmark)) { + DISPLAYLEVEL(4,"Using stdin for input \n"); + SET_BINARY_MODE(stdin); + return stdin; + } + + if (!UTIL_stat(srcFileName, statbuf)) { + DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", + srcFileName, strerror(errno)); + return NULL; + } + + if (!UTIL_isRegularFileStat(statbuf) + && !UTIL_isFIFOStat(statbuf) + && !(allowBlockDevices && UTIL_isBlockDevStat(statbuf)) + ) { + DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", + srcFileName); + return NULL; + } + + { FILE* const f = fopen(srcFileName, "rb"); + if (f == NULL) + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + return f; + } +} + +/** FIO_openDstFile() : + * condition : `dstFileName` must be non-NULL. + * @result : FILE* to `dstFileName`, or NULL if it fails */ +static FILE* +FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, + const char* srcFileName, const char* dstFileName, + const int mode) +{ + int isDstRegFile; + + if (prefs->testMode) return NULL; /* do not open file in test mode */ + + assert(dstFileName != NULL); + if (!strcmp (dstFileName, stdoutmark)) { + DISPLAYLEVEL(4,"Using stdout for output \n"); + SET_BINARY_MODE(stdout); + if (prefs->sparseFileSupport == 1) { + prefs->sparseFileSupport = 0; + DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); + } + return stdout; + } + + /* ensure dst is not the same as src */ + if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) { + DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n"); + return NULL; + } + + isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */ + if (prefs->sparseFileSupport == 1) { + prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT; + if (!isDstRegFile) { + prefs->sparseFileSupport = 0; + DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n"); + } + } + + if (isDstRegFile) { + /* Check if destination file already exists */ +#if !defined(_WIN32) + /* this test does not work on Windows : + * `NUL` and `nul` are detected as regular files */ + if (!strcmp(dstFileName, nulmark)) { + EXM_THROW(40, "%s is unexpectedly categorized as a regular file", + dstFileName); + } +#endif + if (!prefs->overwrite) { + if (g_display_prefs.displayLevel <= 1) { + /* No interaction possible */ + DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n", + dstFileName); + return NULL; + } + DISPLAY("zstd: %s already exists; ", dstFileName); + if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput)) + return NULL; + } + /* need to unlink */ + FIO_removeFile(dstFileName); + } + + { +#if defined(_WIN32) + /* Windows requires opening the file as a "binary" file to avoid + * mangling. This macro doesn't exist on unix. */ + const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY; + const int fd = _open(dstFileName, openflags, mode); + FILE* f = NULL; + if (fd != -1) { + f = _fdopen(fd, "wb"); + } +#else + const int openflags = O_WRONLY|O_CREAT|O_TRUNC; + const int fd = open(dstFileName, openflags, mode); + FILE* f = NULL; + if (fd != -1) { + f = fdopen(fd, "wb"); + } +#endif + if (f == NULL) { + DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); + } else { + /* An increased buffer size can provide a significant performance + * boost on some platforms. Note that providing a NULL buf with a + * size that's not 0 is not defined in ANSI C, but is defined in an + * extension. There are three possibilities here: + * 1. Libc supports the extended version and everything is good. + * 2. Libc ignores the size when buf is NULL, in which case + * everything will continue as if we didn't call `setvbuf()`. + * 3. We fail the call and execution continues but a warning + * message might be shown. + * In all cases due execution continues. For now, I believe that + * this is a more cost-effective solution than managing the buffers + * allocations ourselves (will require an API change). + */ + if (setvbuf(f, NULL, _IOFBF, 1 MB)) { + DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName); + } + } + return f; + } +} + + +/* FIO_getDictFileStat() : + */ +static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) { + assert(dictFileStat != NULL); + if (fileName == NULL) return; + + if (!UTIL_stat(fileName, dictFileStat)) { + EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno)); + } + + if (!UTIL_isRegularFileStat(dictFileStat)) { + EXM_THROW(32, "Dictionary %s must be a regular file.", fileName); + } +} + +/* FIO_setDictBufferMalloc() : + * allocates a buffer, pointed by `dict->dictBuffer`, + * loads `filename` content into it, up to DICTSIZE_MAX bytes. + * @return : loaded size + * if fileName==NULL, returns 0 and a NULL pointer + */ +static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) +{ + FILE* fileHandle; + U64 fileSize; + void** bufferPtr = &dict->dictBuffer; + + assert(bufferPtr != NULL); + assert(dictFileStat != NULL); + *bufferPtr = NULL; + if (fileName == NULL) return 0; + + DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); + + fileHandle = fopen(fileName, "rb"); + + if (fileHandle == NULL) { + EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); + } + + fileSize = UTIL_getFileSizeStat(dictFileStat); + { + size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; + if (fileSize > dictSizeMax) { + EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", + fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ + } + } + *bufferPtr = malloc((size_t)fileSize); + if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); + { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); + if (readSize != fileSize) { + EXM_THROW(35, "Error reading dictionary file %s : %s", + fileName, strerror(errno)); + } + } + fclose(fileHandle); + return (size_t)fileSize; +} + +#if (PLATFORM_POSIX_VERSION > 0) +#include +static void FIO_munmap(FIO_Dict_t* dict) +{ + munmap(dict->dictBuffer, dict->dictBufferSize); + dict->dictBuffer = NULL; + dict->dictBufferSize = 0; +} +static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) +{ + int fileHandle; + U64 fileSize; + void** bufferPtr = &dict->dictBuffer; + + assert(bufferPtr != NULL); + assert(dictFileStat != NULL); + *bufferPtr = NULL; + if (fileName == NULL) return 0; + + DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); + + fileHandle = open(fileName, O_RDONLY); + + if (fileHandle == -1) { + EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); + } + + fileSize = UTIL_getFileSizeStat(dictFileStat); + { + size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; + if (fileSize > dictSizeMax) { + EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", + fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ + } + } + + *bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0); + if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); + + close(fileHandle); + return (size_t)fileSize; +} +#elif defined(_MSC_VER) || defined(_WIN32) +#include +static void FIO_munmap(FIO_Dict_t* dict) +{ + UnmapViewOfFile(dict->dictBuffer); + CloseHandle(dict->dictHandle); + dict->dictBuffer = NULL; + dict->dictBufferSize = 0; +} +static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) +{ + HANDLE fileHandle, mapping; + U64 fileSize; + void** bufferPtr = &dict->dictBuffer; + + assert(bufferPtr != NULL); + assert(dictFileStat != NULL); + *bufferPtr = NULL; + if (fileName == NULL) return 0; + + DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); + + fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); + + if (fileHandle == INVALID_HANDLE_VALUE) { + EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); + } + + fileSize = UTIL_getFileSizeStat(dictFileStat); + { + size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; + if (fileSize > dictSizeMax) { + EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", + fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ + } + } + + mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL); + if (mapping == NULL) { + EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno)); + } + + *bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */ + if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno)); + + dict->dictHandle = fileHandle; + return (size_t)fileSize; +} +#else +static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) +{ + return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat); +} +static void FIO_munmap(FIO_Dict_t* dict) { + free(dict->dictBuffer); + dict->dictBuffer = NULL; + dict->dictBufferSize = 0; +} +#endif + +static void FIO_freeDict(FIO_Dict_t* dict) { + if (dict->dictBufferType == FIO_mallocDict) { + free(dict->dictBuffer); + dict->dictBuffer = NULL; + dict->dictBufferSize = 0; + } else if (dict->dictBufferType == FIO_mmapDict) { + FIO_munmap(dict); + } else { + assert(0); /* Should not reach this case */ + } +} + +static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) { + dict->dictBufferType = dictBufferType; + if (dict->dictBufferType == FIO_mallocDict) { + dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat); + } else if (dict->dictBufferType == FIO_mmapDict) { + dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat); + } else { + assert(0); /* Should not reach this case */ + } +} + + +/* FIO_checkFilenameCollisions() : + * Checks for and warns if there are any files that would have the same output path + */ +int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { + const char **filenameTableSorted, *prevElem, *filename; + unsigned u; + + filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); + if (!filenameTableSorted) { + DISPLAYLEVEL(1, "Allocation error during filename collision checking \n"); + return 1; + } + + for (u = 0; u < nbFiles; ++u) { + filename = strrchr(filenameTable[u], PATH_SEP); + if (filename == NULL) { + filenameTableSorted[u] = filenameTable[u]; + } else { + filenameTableSorted[u] = filename+1; + } + } + + qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr); + prevElem = filenameTableSorted[0]; + for (u = 1; u < nbFiles; ++u) { + if (strcmp(prevElem, filenameTableSorted[u]) == 0) { + DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem); + } + prevElem = filenameTableSorted[u]; + } + + free((void*)filenameTableSorted); + return 0; +} + +static const char* +extractFilename(const char* path, char separator) +{ + const char* search = strrchr(path, separator); + if (search == NULL) return path; + return search+1; +} + +/* FIO_createFilename_fromOutDir() : + * Takes a source file name and specified output directory, and + * allocates memory for and returns a pointer to final path. + * This function never returns an error (it may abort() in case of pb) + */ +static char* +FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen) +{ + const char* filenameStart; + char separator; + char* result; + +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + separator = '\\'; +#else + separator = '/'; +#endif + + filenameStart = extractFilename(path, separator); +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */ +#endif + + result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1); + if (!result) { + EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno)); + } + + memcpy(result, outDirName, strlen(outDirName)); + if (outDirName[strlen(outDirName)-1] == separator) { + memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart)); + } else { + memcpy(result + strlen(outDirName), &separator, 1); + memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart)); + } + + return result; +} + +/* FIO_highbit64() : + * gives position of highest bit. + * note : only works for v > 0 ! + */ +static unsigned FIO_highbit64(unsigned long long v) +{ + unsigned count = 0; + assert(v != 0); + v >>= 1; + while (v) { v >>= 1; count++; } + return count; +} + +static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, + unsigned long long const dictSize, + unsigned long long const maxSrcFileSize) +{ + unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize)); + unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX); + if (maxSize == UTIL_FILESIZE_UNKNOWN) + EXM_THROW(42, "Using --patch-from with stdin requires --stream-size"); + assert(maxSize != UTIL_FILESIZE_UNKNOWN); + if (maxSize > maxWindowSize) + EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB)); + FIO_setMemLimit(prefs, (unsigned)maxSize); +} + +/* FIO_multiFilesConcatWarning() : + * This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts. + * Returns 1 if the console should abort, 0 if console should proceed. + * + * If output is stdout or test mode is active, check that `--rm` disabled. + * + * If there is just 1 file to process, zstd will proceed as usual. + * If each file get processed into its own separate destination file, proceed as usual. + * + * When multiple files are processed into a single output, + * display a warning message, then disable --rm if it's set. + * + * If -f is specified or if output is stdout, just proceed. + * If output is set with -o, prompt for confirmation. + */ +static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff) +{ + if (fCtx->hasStdoutOutput) { + if (prefs->removeSrcFile) + /* this should not happen ; hard fail, to protect user's data + * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */ + EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. " + "This scenario is not supposed to be possible. " + "This is a programming error. File an issue for it to be fixed."); + } + if (prefs->testMode) { + if (prefs->removeSrcFile) + /* this should not happen ; hard fail, to protect user's data + * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */ + EXM_THROW(43, "Test mode shall not remove input files! " + "This scenario is not supposed to be possible. " + "This is a programming error. File an issue for it to be fixed."); + return 0; + } + + if (fCtx->nbFilesTotal == 1) return 0; + assert(fCtx->nbFilesTotal > 1); + + if (!outFileName) return 0; + + if (fCtx->hasStdoutOutput) { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n"); + } else { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName); + } + DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n") + + /* multi-input into single output : --rm is not allowed */ + if (prefs->removeSrcFile) { + DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n"); + prefs->removeSrcFile = 0; + } + + if (fCtx->hasStdoutOutput) return 0; + if (prefs->overwrite) return 0; + + /* multiple files concatenated into single destination file using -o without -f */ + if (g_display_prefs.displayLevel <= displayLevelCutoff) { + /* quiet mode => no prompt => fail automatically */ + DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n"); + DISPLAYLEVEL(1, "Aborting. \n"); + return 1; + } + /* normal mode => prompt */ + return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); +} + +static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos) +{ + ZSTD_inBuffer i; + i.src = buf; + i.size = s; + i.pos = pos; + return i; +} + +static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos) +{ + ZSTD_outBuffer o; + o.dst = buf; + o.size = s; + o.pos = pos; + return o; +} + +#ifndef ZSTD_NOCOMPRESS + +/* ********************************************************************** + * Compression + ************************************************************************/ +typedef struct { + FIO_Dict_t dict; + const char* dictFileName; + stat_t dictFileStat; + ZSTD_CStream* cctx; + WritePoolCtx_t *writeCtx; + ReadPoolCtx_t *readCtx; +} cRess_t; + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + assert(hashLog > 1); + return hashLog - btScale; +} + +static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, + ZSTD_compressionParameters* comprParams, + unsigned long long const dictSize, + unsigned long long const maxSrcFileSize, + int cLevel) +{ + unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize); + FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize); + if (fileWindowLog > ZSTD_WINDOWLOG_MAX) + DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n"); + comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog)); + if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) { + if (!prefs->ldmFlag) + DISPLAYLEVEL(1, "long mode automatically triggered\n"); + FIO_setLdmFlag(prefs, 1); + } + if (cParams.strategy >= ZSTD_btopt) { + DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); + DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n"); + DISPLAYLEVEL(1, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n"); + DISPLAYLEVEL(1, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); + DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n"); + } +} + +static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, + const char* dictFileName, unsigned long long const maxSrcFileSize, + int cLevel, ZSTD_compressionParameters comprParams) { + int useMMap = prefs->mmapDict == ZSTD_ps_enable; + int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable; + FIO_dictBufferType_t dictBufferType; + cRess_t ress; + memset(&ress, 0, sizeof(ress)); + + DISPLAYLEVEL(6, "FIO_createCResources \n"); + ress.cctx = ZSTD_createCCtx(); + if (ress.cctx == NULL) + EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", + strerror(errno)); + + FIO_getDictFileStat(dictFileName, &ress.dictFileStat); + + /* need to update memLimit before calling createDictBuffer + * because of memLimit check inside it */ + if (prefs->patchFromMode) { + U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat); + unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; + useMMap |= dictSize > prefs->memLimit; + FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); + } + + dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict; + FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */ + + ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize()); + ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize()); + + /* Advanced parameters, including dictionary */ + if (dictFileName && (ress.dict.dictBuffer==NULL)) + EXM_THROW(32, "allocation error : can't create dictBuffer"); + ress.dictFileName = dictFileName; + + if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog) + comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT; + + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) ); + /* compression level */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); + /* max compressed block size */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) ); + /* source size hint */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) ); + /* long distance matching */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) ); + if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) ); + } + if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) ); + } + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder)); + /* compression parameters */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); + /* multi-threading */ +#ifdef ZSTD_MULTITHREAD + DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) ); + if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) { + DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) ); + } + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); +#endif + /* dictionary */ + if (prefs->patchFromMode) { + CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) ); + } else { + CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) ); + } + + return ress; +} + +static void FIO_freeCResources(cRess_t* const ress) +{ + FIO_freeDict(&(ress->dict)); + AIO_WritePool_free(ress->writeCtx); + AIO_ReadPool_free(ress->readCtx); + ZSTD_freeCStream(ress->cctx); /* never fails */ +} + + +#ifdef ZSTD_GZCOMPRESS +static unsigned long long +FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */ + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + z_stream strm; + IOJob_t *writeJob = NULL; + + if (compressionLevel > Z_BEST_COMPRESSION) + compressionLevel = Z_BEST_COMPRESSION; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, + 15 /* maxWindowLogSize */ + 16 /* gzip only */, + 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */ + if (ret != Z_OK) { + EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); + } } + + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_in = 0; + strm.avail_in = 0; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + + while (1) { + int ret; + if (strm.avail_in == 0) { + AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); + if (ress->readCtx->srcBufferLoaded == 0) break; + inFileSize += ress->readCtx->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; + } + + { + size_t const availBefore = strm.avail_in; + ret = deflate(&strm, Z_NO_FLUSH); + AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); + } + + if (ret != Z_OK) + EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); + { size_t const cSize = writeJob->bufferSize - strm.avail_out; + if (cSize) { + writeJob->usedBufferSize = cSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += cSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } } + if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAYUPDATE_PROGRESS( + "\rRead : %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), + (double)outFileSize/(double)inFileSize*100) + } else { + DISPLAYUPDATE_PROGRESS( + "\rRead : %u / %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/(double)inFileSize*100); + } } + + while (1) { + int const ret = deflate(&strm, Z_FINISH); + { size_t const cSize = writeJob->bufferSize - strm.avail_out; + if (cSize) { + writeJob->usedBufferSize = cSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += cSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } } + if (ret == Z_STREAM_END) break; + if (ret != Z_BUF_ERROR) + EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); + } + + { int const ret = deflateEnd(&strm); + if (ret != Z_OK) { + EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); + } } + *readsize = inFileSize; + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + return outFileSize; +} +#endif + + +#ifdef ZSTD_LZMACOMPRESS +static unsigned long long +FIO_compressLzmaFrame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize, int plain_lzma) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret ret; + IOJob_t *writeJob = NULL; + + if (compressionLevel < 0) compressionLevel = 0; + if (compressionLevel > 9) compressionLevel = 9; + + if (plain_lzma) { + lzma_options_lzma opt_lzma; + if (lzma_lzma_preset(&opt_lzma, compressionLevel)) + EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName); + ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ + if (ret != LZMA_OK) + EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); + } else { + ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ + if (ret != LZMA_OK) + EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); + } + + writeJob =AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (BYTE*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; + strm.next_in = 0; + strm.avail_in = 0; + + while (1) { + if (strm.avail_in == 0) { + size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); + if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; + inFileSize += inSize; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; + } + + { + size_t const availBefore = strm.avail_in; + ret = lzma_code(&strm, action); + AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); + } + + + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); + { size_t const compBytes = writeJob->bufferSize - strm.avail_out; + if (compBytes) { + writeJob->usedBufferSize = compBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += compBytes; + strm.next_out = (BYTE*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; + } } + if (srcFileSize == UTIL_FILESIZE_UNKNOWN) + DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), + (double)outFileSize/(double)inFileSize*100) + else + DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/(double)inFileSize*100); + if (ret == LZMA_STREAM_END) break; + } + + lzma_end(&strm); + *readsize = inFileSize; + + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + + return outFileSize; +} +#endif + +#ifdef ZSTD_LZ4COMPRESS + +#if LZ4_VERSION_NUMBER <= 10600 +#define LZ4F_blockLinked blockLinked +#define LZ4F_max64KB max64KB +#endif + +static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } + +static unsigned long long +FIO_compressLz4Frame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, int checksumFlag, + U64* readsize) +{ + const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB); + unsigned long long inFileSize = 0, outFileSize = 0; + + LZ4F_preferences_t prefs; + LZ4F_compressionContext_t ctx; + + IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) + EXM_THROW(31, "zstd: failed to create lz4 compression context"); + + memset(&prefs, 0, sizeof(prefs)); + + assert(blockSize <= ress->readCtx->base.jobBufferSize); + + /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */ + prefs.autoFlush = 0; + prefs.compressionLevel = compressionLevel; + prefs.frameInfo.blockMode = LZ4F_blockLinked; + prefs.frameInfo.blockSizeID = LZ4F_max64KB; + prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag; +#if LZ4_VERSION_NUMBER >= 10600 + prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; +#endif + assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize); + + { + size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs); + if (LZ4F_isError(headerSize)) + EXM_THROW(33, "File header generation failed : %s", + LZ4F_getErrorName(headerSize)); + writeJob->usedBufferSize = headerSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += headerSize; + + /* Read first block */ + inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); + + /* Main Loop */ + while (ress->readCtx->srcBufferLoaded) { + size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); + size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize, + ress->readCtx->srcBuffer, inSize, NULL); + if (LZ4F_isError(outSize)) + EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", + srcFileName, LZ4F_getErrorName(outSize)); + outFileSize += outSize; + if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), + (double)outFileSize/(double)inFileSize*100) + } else { + DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/(double)inFileSize*100); + } + + /* Write Block */ + writeJob->usedBufferSize = outSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + + /* Read next block */ + AIO_ReadPool_consumeBytes(ress->readCtx, inSize); + inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); + } + + /* End of Stream mark */ + headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL); + if (LZ4F_isError(headerSize)) + EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", + srcFileName, LZ4F_getErrorName(headerSize)); + + writeJob->usedBufferSize = headerSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += headerSize; + } + + *readsize = inFileSize; + LZ4F_freeCompressionContext(ctx); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + + return outFileSize; +} +#endif + +static unsigned long long +FIO_compressZstdFrame(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const cRess_t* ressPtr, + const char* srcFileName, U64 fileSize, + int compressionLevel, U64* readsize) +{ + cRess_t const ress = *ressPtr; + IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx); + + U64 compressedfilesize = 0; + ZSTD_EndDirective directive = ZSTD_e_continue; + U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + + /* stats */ + ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 }; + ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 }; + typedef enum { noChange, slower, faster } speedChange_e; + speedChange_e speedChange = noChange; + unsigned flushWaiting = 0; + unsigned inputPresented = 0; + unsigned inputBlocked = 0; + unsigned lastJobID = 0; + UTIL_time_t lastAdaptTime = UTIL_getTime(); + U64 const adaptEveryMicro = REFRESH_RATE; + + UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize); + + DISPLAYLEVEL(6, "compression using zstd format \n"); + + /* init */ + if (fileSize != UTIL_FILESIZE_UNKNOWN) { + pledgedSrcSize = fileSize; + CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); + } else if (prefs->streamSrcSize > 0) { + /* unknown source size; use the declared stream size */ + pledgedSrcSize = prefs->streamSrcSize; + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); + } + + { + int windowLog; + UTIL_HumanReadableSize_t windowSize; + CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog)); + if (windowLog == 0) { + if (prefs->ldmFlag) { + /* If long mode is set without a window size libzstd will set this size internally */ + windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + } else { + const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0); + windowLog = (int)cParams.windowLog; + } + } + windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize))); + DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix); + } + (void)srcFileName; + + /* Main compression loop */ + do { + size_t stillToFlush; + /* Fill input Buffer */ + size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize()); + ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 ); + DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); + *readsize += inSize; + + if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize)) + directive = ZSTD_e_end; + + stillToFlush = 1; + while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */ + || (directive == ZSTD_e_end && stillToFlush != 0) ) { + + size_t const oldIPos = inBuff.pos; + ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 ); + size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); + CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); + AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos); + + /* count stats */ + inputPresented++; + if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */ + if (!toFlushNow) flushWaiting = 1; + + /* Write compressed stream */ + DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", + (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); + if (outBuff.pos) { + writeJob->usedBufferSize = outBuff.pos; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + compressedfilesize += outBuff.pos; + } + + /* adaptive mode : statistics measurement and speed correction */ + if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) { + ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); + + lastAdaptTime = UTIL_getTime(); + + /* check output speed */ + if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ + + unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; + assert(zfp.produced >= previous_zfp_update.produced); + assert(prefs->nbWorkers >= 1); + + /* test if compression is blocked + * either because output is slow and all buffers are full + * or because input is slow and no job can start while waiting for at least one buffer to be filled. + * note : exclude starting part, since currentJobID > 1 */ + if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ + && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ + ) { + DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") + speedChange = slower; + } + + previous_zfp_update = zfp; + + if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ + && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ + ) { + DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); + speedChange = slower; + } + flushWaiting = 0; + } + + /* course correct only if there is at least one new job completed */ + if (zfp.currentJobID > lastJobID) { + DISPLAYLEVEL(6, "compression level adaptation check \n") + + /* check input speed */ + if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ + if (inputBlocked <= 0) { + DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); + speedChange = slower; + } else if (speedChange == noChange) { + unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; + unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; + unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; + previous_zfp_correction = zfp; + assert(inputPresented > 0); + DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", + inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, + (unsigned)newlyIngested, (unsigned)newlyConsumed, + (unsigned)newlyFlushed, (unsigned)newlyProduced); + if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ + && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ + && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ + ) { + DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", + newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); + speedChange = faster; + } + } + inputBlocked = 0; + inputPresented = 0; + } + + if (speedChange == slower) { + DISPLAYLEVEL(6, "slower speed , higher compression \n") + compressionLevel ++; + if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); + if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; + compressionLevel += (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + if (speedChange == faster) { + DISPLAYLEVEL(6, "faster speed , lighter compression \n") + compressionLevel --; + if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; + compressionLevel -= (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + speedChange = noChange; + + lastJobID = zfp.currentJobID; + } /* if (zfp.currentJobID > lastJobID) */ + } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */ + + /* display notification */ + if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) { + ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); + double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; + UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed); + UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed); + UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced); + + DELAY_NEXT_UPDATE(); + + /* display progress notifications */ + DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */ + if (g_display_prefs.displayLevel >= 3) { + /* Verbose progress update */ + DISPLAY_PROGRESS( + "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ", + compressionLevel, + buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, + consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, + produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, + cShare ); + } else { + /* Require level 2 or forcibly displayed progress counter for summarized updates */ + if (fCtx->nbFilesTotal > 1) { + size_t srcFileNameSize = strlen(srcFileName); + /* Ensure that the string we print is roughly the same size each time */ + if (srcFileNameSize > 18) { + const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; + DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); + } else { + DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); + } + } + DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); + if (fileSize != UTIL_FILESIZE_UNKNOWN) + DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); + DISPLAY_PROGRESS(" ==> %2.f%%", cShare); + } + } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */ + } /* while ((inBuff.pos != inBuff.size) */ + } while (directive != ZSTD_e_end); + + if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { + EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", + (unsigned long long)*readsize, (unsigned long long)fileSize); + } + + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx); + + return compressedfilesize; +} + +/*! FIO_compressFilename_internal() : + * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. + * @return : 0 : compression completed correctly, + * 1 : missing or pb opening srcFileName + */ +static int +FIO_compressFilename_internal(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + cRess_t ress, + const char* dstFileName, const char* srcFileName, + int compressionLevel) +{ + UTIL_time_t const timeStart = UTIL_getTime(); + clock_t const cpuStart = clock(); + U64 readsize = 0; + U64 compressedfilesize = 0; + U64 const fileSize = UTIL_getFileSize(srcFileName); + DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize); + + /* compression format selection */ + switch (prefs->compressionType) { + default: + case FIO_zstdCompression: + compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); + break; + + case FIO_gzipCompression: +#ifdef ZSTD_GZCOMPRESS + compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", + srcFileName); +#endif + break; + + case FIO_xzCompression: + case FIO_lzmaCompression: +#ifdef ZSTD_LZMACOMPRESS + compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", + srcFileName); +#endif + break; + + case FIO_lz4Compression: +#ifdef ZSTD_LZ4COMPRESS + compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", + srcFileName); +#endif + break; + } + + /* Status */ + fCtx->totalBytesInput += (size_t)readsize; + fCtx->totalBytesOutput += (size_t)compressedfilesize; + DISPLAY_PROGRESS("\r%79s\r", ""); + if (FIO_shouldDisplayFileSummary(fCtx)) { + UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize); + UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize); + if (readsize == 0) { + DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n", + srcFileName, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix, + dstFileName); + } else { + DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n", + srcFileName, + (double)compressedfilesize / (double)readsize * 100, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix, + dstFileName); + } + } + + /* Elapsed Time and CPU Load */ + { clock_t const cpuEnd = clock(); + double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; + U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); + double const timeLength_s = (double)timeLength_ns / 1000000000; + double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100; + DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n", + srcFileName, timeLength_s, cpuLoad_pct); + } + return 0; +} + + +/*! FIO_compressFilename_dstFile() : + * open dstFileName, or pass-through if ress.file != NULL, + * then start compression with FIO_compressFilename_internal(). + * Manages source removal (--rm) and file permissions transfer. + * note : ress.srcFile must be != NULL, + * so reach this function through FIO_compressFilename_srcFile(). + * @return : 0 : compression completed correctly, + * 1 : pb + */ +static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + cRess_t ress, + const char* dstFileName, + const char* srcFileName, + const stat_t* srcFileStat, + int compressionLevel) +{ + int closeDstFile = 0; + int result; + int transferStat = 0; + FILE *dstFile; + int dstFd = -1; + + assert(AIO_ReadPool_getFile(ress.readCtx) != NULL); + if (AIO_WritePool_getFile(ress.writeCtx) == NULL) { + int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS; + if ( strcmp (srcFileName, stdinmark) + && strcmp (dstFileName, stdoutmark) + && UTIL_isRegularFileStat(srcFileStat) ) { + transferStat = 1; + dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS; + } + + closeDstFile = 1; + DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); + dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions); + if (dstFile==NULL) return 1; /* could not open dstFileName */ + dstFd = fileno(dstFile); + AIO_WritePool_setFile(ress.writeCtx, dstFile); + /* Must only be added after FIO_openDstFile() succeeds. + * Otherwise we may delete the destination file if it already exists, + * and the user presses Ctrl-C when asked if they wish to overwrite. + */ + addHandler(dstFileName); + } + + result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + + if (closeDstFile) { + clearHandler(); + + if (transferStat) { + UTIL_setFDStat(dstFd, dstFileName, srcFileStat); + } + + DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName); + if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */ + DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); + result=1; + } + + if (transferStat) { + UTIL_utime(dstFileName, srcFileStat); + } + + if ( (result != 0) /* operation failure */ + && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ + ) { + FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ + } + } + + return result; +} + +/* List used to compare file extensions (used with --exclude-compressed flag) +* Different from the suffixList and should only apply to ZSTD compress operationResult +*/ +static const char *compressedFileExtensions[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, + GZ_EXTENSION, + TGZ_EXTENSION, + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, + LZ4_EXTENSION, + TLZ4_EXTENSION, + NULL +}; + +/*! FIO_compressFilename_srcFile() : + * @return : 0 : compression completed correctly, + * 1 : missing or pb opening srcFileName + */ +static int +FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + cRess_t ress, + const char* dstFileName, + const char* srcFileName, + int compressionLevel) +{ + int result; + FILE* srcFile; + stat_t srcFileStat; + U64 fileSize = UTIL_FILESIZE_UNKNOWN; + DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName); + + if (strcmp(srcFileName, stdinmark)) { + if (UTIL_stat(srcFileName, &srcFileStat)) { + /* failure to stat at all is handled during opening */ + + /* ensure src is not a directory */ + if (UTIL_isDirectoryStat(&srcFileStat)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + /* ensure src is not the same as dict (if present) */ + if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) { + DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); + return 1; + } + } + } + + /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used + * YES => ZSTD will skip compression of the file and will return 0. + * NO => ZSTD will resume with compress operation. + */ + if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { + DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); + return 0; + } + + srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat); + if (srcFile == NULL) return 1; /* srcFile could not be opened */ + + /* Don't use AsyncIO for small files */ + if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */ + fileSize = UTIL_getFileSizeStat(&srcFileStat); + if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) { + AIO_ReadPool_setAsync(ress.readCtx, 0); + AIO_WritePool_setAsync(ress.writeCtx, 0); + } else { + AIO_ReadPool_setAsync(ress.readCtx, 1); + AIO_WritePool_setAsync(ress.writeCtx, 1); + } + + AIO_ReadPool_setFile(ress.readCtx, srcFile); + result = FIO_compressFilename_dstFile( + fCtx, prefs, ress, + dstFileName, srcFileName, + &srcFileStat, compressionLevel); + AIO_ReadPool_closeFile(ress.readCtx); + + if ( prefs->removeSrcFile /* --rm */ + && result == 0 /* success */ + && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ + ) { + /* We must clear the handler, since after this point calling it would + * delete both the source and destination files. + */ + clearHandler(); + if (FIO_removeFile(srcFileName)) + EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); + } + return result; +} + +static const char* +checked_index(const char* options[], size_t length, size_t index) { + assert(index < length); + /* Necessary to avoid warnings since -O3 will omit the above `assert` */ + (void) length; + return options[index]; +} + +#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index)) + +void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) +{ + static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION, + LZMA_EXTENSION, LZ4_EXTENSION}; + static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"}; + static const char* checkSumOptions[3] = {" --no-check", "", " --check"}; + static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"}; + static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"}; + + assert(g_display_prefs.displayLevel >= 4); + + DISPLAY("--format=%s", formatOptions[prefs->compressionType]); + DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport)); + DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID"); + DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag)); + DISPLAY(" --block-size=%d", prefs->blockSize); + if (prefs->adaptiveMode) + DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel); + DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder)); + DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : ""); + if (prefs->streamSrcSize) + DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize); + if (prefs->srcSizeHint) + DISPLAY(" --size-hint=%d", prefs->srcSizeHint); + if (prefs->targetCBlockSize) + DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize); + DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode)); + DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB); + DISPLAY(" --threads=%d", prefs->nbWorkers); + DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : ""); + DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-"); + DISPLAY("\n"); +} + +#undef INDEX + +int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName, + const char* srcFileName, const char* dictFileName, + int compressionLevel, ZSTD_compressionParameters comprParams) +{ + cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams); + int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + +#define DISPLAY_LEVEL_DEFAULT 2 + + FIO_freeCResources(&ress); + return result; +} + +/* FIO_determineCompressedName() : + * create a destination filename for compressed srcFileName. + * @return a pointer to it. + * This function never returns an error (it may abort() in case of pb) + */ +static const char* +FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix) +{ + static size_t dfnbCapacity = 0; + static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + char* outDirFilename = NULL; + size_t sfnSize = strlen(srcFileName); + size_t const srcSuffixLen = strlen(suffix); + + if(!strcmp(srcFileName, stdinmark)) { + return stdoutmark; + } + + if (outDirName) { + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); + sfnSize = strlen(outDirFilename); + assert(outDirFilename != NULL); + } + + if (dfnbCapacity <= sfnSize+srcSuffixLen+1) { + /* resize buffer for dstName */ + free(dstFileNameBuffer); + dfnbCapacity = sfnSize + srcSuffixLen + 30; + dstFileNameBuffer = (char*)malloc(dfnbCapacity); + if (!dstFileNameBuffer) { + EXM_THROW(30, "zstd: %s", strerror(errno)); + } + } + assert(dstFileNameBuffer != NULL); + + if (outDirFilename) { + memcpy(dstFileNameBuffer, outDirFilename, sfnSize); + free(outDirFilename); + } else { + memcpy(dstFileNameBuffer, srcFileName, sfnSize); + } + memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */); + return dstFileNameBuffer; +} + +static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles) +{ + size_t i; + unsigned long long fileSize, maxFileSize = 0; + for (i = 0; i < nbFiles; i++) { + fileSize = UTIL_getFileSize(inFileNames[i]); + maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize; + } + return maxFileSize; +} + +/* FIO_compressMultipleFilenames() : + * compress nbFiles files + * into either one destination (outFileName), + * or into one file each (outFileName == NULL, but suffix != NULL), + * or into a destination folder (specified with -O) + */ +int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** inFileNamesTable, + const char* outMirroredRootDirName, + const char* outDirName, + const char* outFileName, const char* suffix, + const char* dictFileName, int compressionLevel, + ZSTD_compressionParameters comprParams) +{ + int status; + int error = 0; + cRess_t ress = FIO_createCResources(prefs, dictFileName, + FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal), + compressionLevel, comprParams); + + /* init */ + assert(outFileName != NULL || suffix != NULL); + if (outFileName != NULL) { /* output into a single destination (stdout typically) */ + FILE *dstFile; + if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FIO_freeCResources(&ress); + return 1; + } + dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (dstFile == NULL) { /* could not open outFileName */ + error = 1; + } else { + AIO_WritePool_setFile(ress.writeCtx, dstFile); + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { + status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + if (AIO_WritePool_closeFile(ress.writeCtx)) + EXM_THROW(29, "Write error (%s) : cannot properly close %s", + strerror(errno), outFileName); + } + } else { + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); + + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { + const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx]; + const char* dstFileName = NULL; + if (outMirroredRootDirName) { + char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); + if (validMirroredDirName) { + dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix); + free(validMirroredDirName); + } else { + DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName); + error=1; + continue; + } + } else { + dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ + } + status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + + if (outDirName) + FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal); + } + + if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { + UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput); + UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput); + + DISPLAY_PROGRESS("\r%79s\r", ""); + if (fCtx->totalBytesInput == 0) { + DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } else { + DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } + } + + FIO_freeCResources(&ress); + return error; +} + +#endif /* #ifndef ZSTD_NOCOMPRESS */ + + + +#ifndef ZSTD_NODECOMPRESS + +/* ************************************************************************** + * Decompression + ***************************************************************************/ +typedef struct { + FIO_Dict_t dict; + ZSTD_DStream* dctx; + WritePoolCtx_t *writeCtx; + ReadPoolCtx_t *readCtx; +} dRess_t; + +static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) +{ + int useMMap = prefs->mmapDict == ZSTD_ps_enable; + int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable; + stat_t statbuf; + dRess_t ress; + memset(&ress, 0, sizeof(ress)); + + FIO_getDictFileStat(dictFileName, &statbuf); + + if (prefs->patchFromMode){ + U64 const dictSize = UTIL_getFileSizeStat(&statbuf); + useMMap |= dictSize > prefs->memLimit; + FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */); + } + + /* Allocation */ + ress.dctx = ZSTD_createDStream(); + if (ress.dctx==NULL) + EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); + CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); + CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); + + /* dictionary */ + { + FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict; + FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType); + + CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) ); + + if (prefs->patchFromMode){ + CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize)); + } else { + CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize)); + } + } + + ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize()); + ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize()); + return ress; +} + +static void FIO_freeDResources(dRess_t ress) +{ + FIO_freeDict(&(ress.dict)); + CHECK( ZSTD_freeDStream(ress.dctx) ); + AIO_WritePool_free(ress.writeCtx); + AIO_ReadPool_free(ress.readCtx); +} + +/* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode + * @return : 0 (no error) */ +static int FIO_passThrough(dRess_t *ress) +{ + size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize()); + IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); + + while(ress->readCtx->srcBufferLoaded) { + size_t writeSize; + writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); + assert(writeSize <= writeJob->bufferSize); + memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize); + writeJob->usedBufferSize = writeSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + AIO_ReadPool_consumeBytes(ress->readCtx, writeSize); + AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); + } + assert(ress->readCtx->reachedEof); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + return 0; +} + +/* FIO_zstdErrorHelp() : + * detailed error message when requested window size is too large */ +static void +FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, + const dRess_t* ress, + size_t err, + const char* srcFileName) +{ + ZSTD_frameHeader header; + + /* Help message only for one specific error */ + if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge) + return; + + /* Try to decode the frame header */ + err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded); + if (err == 0) { + unsigned long long const windowSize = header.windowSize; + unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); + assert(prefs->memLimit > 0); + DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n", + srcFileName, windowSize, prefs->memLimit); + if (windowLog <= ZSTD_WINDOWLOG_MAX) { + unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0)); + assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */ + DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n", + srcFileName, windowLog, windowMB); + return; + } } + DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n", + srcFileName, ZSTD_WINDOWLOG_MAX); +} + +/** FIO_decompressFrame() : + * @return : size of decoded zstd frame, or an error code + */ +#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) +static unsigned long long +FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, + const FIO_prefs_t* const prefs, + const char* srcFileName, + U64 alreadyDecoded) /* for multi-frames streams */ +{ + U64 frameSize = 0; + IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + + /* display last 20 characters only */ + { size_t const srcFileLength = strlen(srcFileName); + if (srcFileLength>20) srcFileName += srcFileLength-20; + } + + ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only); + + /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ + AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX); + + /* Main decompression Loop */ + while (1) { + ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 ); + ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 ); + size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); + UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize); + if (ZSTD_isError(readSizeHint)) { + DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", + srcFileName, ZSTD_getErrorName(readSizeHint)); + FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName); + AIO_WritePool_releaseIoJob(writeJob); + return FIO_ERROR_FRAME_DECODING; + } + + /* Write block */ + writeJob->usedBufferSize = outBuff.pos; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + frameSize += outBuff.pos; + if (fCtx->nbFilesTotal > 1) { + size_t srcFileNameSize = strlen(srcFileName); + if (srcFileNameSize > 18) { + const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; + DISPLAYUPDATE_PROGRESS( + "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); + } else { + DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix); + } + } else { + DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ", + srcFileName, hrs.precision, hrs.value, hrs.suffix); + } + + AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos); + + if (readSizeHint == 0) break; /* end of frame */ + + /* Fill input buffer */ + { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */ + if (ress->readCtx->srcBufferLoaded < toDecode) { + size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode); + if (readSize==0) { + DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", + srcFileName); + AIO_WritePool_releaseIoJob(writeJob); + return FIO_ERROR_FRAME_DECODING; + } + } } } + + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + + return frameSize; +} + + +#ifdef ZSTD_GZDECOMPRESS +static unsigned long long +FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName) +{ + unsigned long long outFileSize = 0; + z_stream strm; + int flush = Z_NO_FLUSH; + int decodingError = 0; + IOJob_t *writeJob = NULL; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = 0; + strm.avail_in = 0; + /* see https://www.zlib.net/manual.html */ + if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) + return FIO_ERROR_FRAME_DECODING; + + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + + for ( ; ; ) { + int ret; + if (strm.avail_in == 0) { + AIO_ReadPool_consumeAndRefill(ress->readCtx); + if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; + } + ret = inflate(&strm, flush); + if (ret == Z_BUF_ERROR) { + DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); + decodingError = 1; break; + } + if (ret != Z_OK && ret != Z_STREAM_END) { + DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); + decodingError = 1; break; + } + { size_t const decompBytes = writeJob->bufferSize - strm.avail_out; + if (decompBytes) { + writeJob->usedBufferSize = decompBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += decompBytes; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } + } + if (ret == Z_STREAM_END) break; + } + + AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); + + if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ + && (decodingError==0) ) { + DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); + decodingError = 1; + } + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; +} +#endif + +#ifdef ZSTD_LZMADECOMPRESS +static unsigned long long +FIO_decompressLzmaFrame(dRess_t* ress, + const char* srcFileName, int plain_lzma) +{ + unsigned long long outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret initRet; + int decodingError = 0; + IOJob_t *writeJob = NULL; + + strm.next_in = 0; + strm.avail_in = 0; + if (plain_lzma) { + initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ + } else { + initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ + } + + if (initRet != LZMA_OK) { + DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", + plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", + srcFileName, initRet); + return FIO_ERROR_FRAME_DECODING; + } + + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (BYTE*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; + + for ( ; ; ) { + lzma_ret ret; + if (strm.avail_in == 0) { + AIO_ReadPool_consumeAndRefill(ress->readCtx); + if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; + } + ret = lzma_code(&strm, action); + + if (ret == LZMA_BUF_ERROR) { + DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); + decodingError = 1; break; + } + if (ret != LZMA_OK && ret != LZMA_STREAM_END) { + DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", + srcFileName, ret); + decodingError = 1; break; + } + { size_t const decompBytes = writeJob->bufferSize - strm.avail_out; + if (decompBytes) { + writeJob->usedBufferSize = decompBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + outFileSize += decompBytes; + strm.next_out = (BYTE*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; + } } + if (ret == LZMA_STREAM_END) break; + } + + AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); + lzma_end(&strm); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; +} +#endif + +#ifdef ZSTD_LZ4DECOMPRESS +static unsigned long long +FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName) +{ + unsigned long long filesize = 0; + LZ4F_errorCode_t nextToLoad = 4; + LZ4F_decompressionContext_t dCtx; + LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + int decodingError = 0; + IOJob_t *writeJob = NULL; + + if (LZ4F_isError(errorCode)) { + DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); + return FIO_ERROR_FRAME_DECODING; + } + + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + + /* Main Loop */ + for (;nextToLoad;) { + size_t pos = 0; + size_t decodedBytes = writeJob->bufferSize; + int fullBufferDecoded = 0; + + /* Read input */ + AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad); + if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */ + + while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */ + /* Decode Input (at least partially) */ + size_t remaining = ress->readCtx->srcBufferLoaded - pos; + decodedBytes = writeJob->bufferSize; + nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos, + &remaining, NULL); + if (LZ4F_isError(nextToLoad)) { + DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", + srcFileName, LZ4F_getErrorName(nextToLoad)); + decodingError = 1; nextToLoad = 0; break; + } + pos += remaining; + assert(pos <= ress->readCtx->srcBufferLoaded); + fullBufferDecoded = decodedBytes == writeJob->bufferSize; + + /* Write Block */ + if (decodedBytes) { + UTIL_HumanReadableSize_t hrs; + writeJob->usedBufferSize = decodedBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + filesize += decodedBytes; + hrs = UTIL_makeHumanReadableSize(filesize); + DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); + } + + if (!nextToLoad) break; + } + AIO_ReadPool_consumeBytes(ress->readCtx, pos); + } + if (nextToLoad!=0) { + DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); + decodingError=1; + } + + LZ4F_freeDecompressionContext(dCtx); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + + return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; +} +#endif + + + +/** FIO_decompressFrames() : + * Find and decode frames inside srcFile + * srcFile presumed opened and valid + * @return : 0 : OK + * 1 : error + */ +static int FIO_decompressFrames(FIO_ctx_t* const fCtx, + dRess_t ress, const FIO_prefs_t* const prefs, + const char* dstFileName, const char* srcFileName) +{ + unsigned readSomething = 0; + unsigned long long filesize = 0; + int passThrough = prefs->passThrough; + + if (passThrough == -1) { + /* If pass-through mode is not explicitly enabled or disabled, + * default to the legacy behavior of enabling it if we are writing + * to stdout with the overwrite flag enabled. + */ + passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark); + } + assert(passThrough == 0 || passThrough == 1); + + /* for each frame */ + for ( ; ; ) { + /* check magic number -> version */ + size_t const toRead = 4; + const BYTE* buf; + AIO_ReadPool_fillBuffer(ress.readCtx, toRead); + buf = (const BYTE*)ress.readCtx->srcBuffer; + if (ress.readCtx->srcBufferLoaded==0) { + if (readSomething==0) { /* srcFile is empty (which is invalid) */ + DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); + return 1; + } /* else, just reached frame boundary */ + break; /* no more input */ + } + readSomething = 1; /* there is at least 1 byte in srcFile */ + if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */ + if (passThrough) { + return FIO_passThrough(&ress); + } + DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); + return 1; + } + if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) { + unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; + } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ +#ifdef ZSTD_GZDECOMPRESS + unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); + return 1; +#endif + } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ + || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ +#ifdef ZSTD_LZMADECOMPRESS + unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); + return 1; +#endif + } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { +#ifdef ZSTD_LZ4DECOMPRESS + unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); + return 1; +#endif + } else if (passThrough) { + return FIO_passThrough(&ress); + } else { + DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); + return 1; + } } /* for each frame */ + + /* Final Status */ + fCtx->totalBytesOutput += (size_t)filesize; + DISPLAY_PROGRESS("\r%79s\r", ""); + if (FIO_shouldDisplayFileSummary(fCtx)) + DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize); + + return 0; +} + +/** FIO_decompressDstFile() : + open `dstFileName`, or pass-through if writeCtx's file is already != 0, + then start decompression process (FIO_decompressFrames()). + @return : 0 : OK + 1 : operation aborted +*/ +static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + dRess_t ress, + const char* dstFileName, + const char* srcFileName, + const stat_t* srcFileStat) +{ + int result; + int releaseDstFile = 0; + int transferStat = 0; + int dstFd = 0; + + if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) { + FILE *dstFile; + int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; + if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ + && strcmp(dstFileName, stdoutmark) + && UTIL_isRegularFileStat(srcFileStat) ) { + transferStat = 1; + dstFilePermissions = TEMPORARY_FILE_PERMISSIONS; + } + + releaseDstFile = 1; + + dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); + if (dstFile==NULL) return 1; + dstFd = fileno(dstFile); + AIO_WritePool_setFile(ress.writeCtx, dstFile); + + /* Must only be added after FIO_openDstFile() succeeds. + * Otherwise we may delete the destination file if it already exists, + * and the user presses Ctrl-C when asked if they wish to overwrite. + */ + addHandler(dstFileName); + } + + result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName); + + if (releaseDstFile) { + clearHandler(); + + if (transferStat) { + UTIL_setFDStat(dstFd, dstFileName, srcFileStat); + } + + if (AIO_WritePool_closeFile(ress.writeCtx)) { + DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); + result = 1; + } + + if (transferStat) { + UTIL_utime(dstFileName, srcFileStat); + } + + if ( (result != 0) /* operation failure */ + && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ + ) { + FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ + } + } + + return result; +} + + +/** FIO_decompressSrcFile() : + Open `srcFileName`, transfer control to decompressDstFile() + @return : 0 : OK + 1 : error +*/ +static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) +{ + FILE* srcFile; + stat_t srcFileStat; + int result; + U64 fileSize = UTIL_FILESIZE_UNKNOWN; + + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat); + if (srcFile==NULL) return 1; + + /* Don't use AsyncIO for small files */ + if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */ + fileSize = UTIL_getFileSizeStat(&srcFileStat); + if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) { + AIO_ReadPool_setAsync(ress.readCtx, 0); + AIO_WritePool_setAsync(ress.writeCtx, 0); + } else { + AIO_ReadPool_setAsync(ress.readCtx, 1); + AIO_WritePool_setAsync(ress.writeCtx, 1); + } + + AIO_ReadPool_setFile(ress.readCtx, srcFile); + + result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat); + + AIO_ReadPool_setFile(ress.readCtx, NULL); + + /* Close file */ + if (fclose(srcFile)) { + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ + return 1; + } + if ( prefs->removeSrcFile /* --rm */ + && (result==0) /* decompression successful */ + && strcmp(srcFileName, stdinmark) ) /* not stdin */ { + /* We must clear the handler, since after this point calling it would + * delete both the source and destination files. + */ + clearHandler(); + if (FIO_removeFile(srcFileName)) { + /* failed to remove src file */ + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + return 1; + } } + return result; +} + + + +int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, + const char* dstFileName, const char* srcFileName, + const char* dictFileName) +{ + dRess_t const ress = FIO_createDResources(prefs, dictFileName); + + int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); + + + + FIO_freeDResources(ress); + return decodingError; +} + +static const char *suffixList[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, +#ifndef ZSTD_NODECOMPRESS + ZSTD_ALT_EXTENSION, +#endif +#ifdef ZSTD_GZDECOMPRESS + GZ_EXTENSION, + TGZ_EXTENSION, +#endif +#ifdef ZSTD_LZMADECOMPRESS + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, +#endif +#ifdef ZSTD_LZ4DECOMPRESS + LZ4_EXTENSION, + TLZ4_EXTENSION, +#endif + NULL +}; + +static const char *suffixListStr = + ZSTD_EXTENSION "/" TZSTD_EXTENSION +#ifdef ZSTD_GZDECOMPRESS + "/" GZ_EXTENSION "/" TGZ_EXTENSION +#endif +#ifdef ZSTD_LZMADECOMPRESS + "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION +#endif +#ifdef ZSTD_LZ4DECOMPRESS + "/" LZ4_EXTENSION "/" TLZ4_EXTENSION +#endif +; + +/* FIO_determineDstName() : + * create a destination filename from a srcFileName. + * @return a pointer to it. + * @return == NULL if there is an error */ +static const char* +FIO_determineDstName(const char* srcFileName, const char* outDirName) +{ + static size_t dfnbCapacity = 0; + static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + size_t dstFileNameEndPos; + char* outDirFilename = NULL; + const char* dstSuffix = ""; + size_t dstSuffixLen = 0; + + size_t sfnSize = strlen(srcFileName); + + size_t srcSuffixLen; + const char* const srcSuffix = strrchr(srcFileName, '.'); + + if(!strcmp(srcFileName, stdinmark)) { + return stdoutmark; + } + + if (srcSuffix == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); + return NULL; + } + srcSuffixLen = strlen(srcSuffix); + + { + const char** matchedSuffixPtr; + for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) { + if (!strcmp(*matchedSuffixPtr, srcSuffix)) { + break; + } + } + + /* check suffix is authorized */ + if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); + return NULL; + } + + if ((*matchedSuffixPtr)[1] == 't') { + dstSuffix = ".tar"; + dstSuffixLen = strlen(dstSuffix); + } + } + + if (outDirName) { + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0); + sfnSize = strlen(outDirFilename); + assert(outDirFilename != NULL); + } + + if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) { + /* allocate enough space to write dstFilename into it */ + free(dstFileNameBuffer); + dfnbCapacity = sfnSize + 20; + dstFileNameBuffer = (char*)malloc(dfnbCapacity); + if (dstFileNameBuffer==NULL) + EXM_THROW(74, "%s : not enough memory for dstFileName", + strerror(errno)); + } + + /* return dst name == src name truncated from suffix */ + assert(dstFileNameBuffer != NULL); + dstFileNameEndPos = sfnSize - srcSuffixLen; + if (outDirFilename) { + memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos); + free(outDirFilename); + } else { + memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); + } + + /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" + * extension on decompression. Also writes terminating null. */ + strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); + return dstFileNameBuffer; + + /* note : dstFileNameBuffer memory is not going to be free */ +} + +int +FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** srcNamesTable, + const char* outMirroredRootDirName, + const char* outDirName, const char* outFileName, + const char* dictFileName) +{ + int status; + int error = 0; + dRess_t ress = FIO_createDResources(prefs, dictFileName); + + if (outFileName) { + if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FIO_freeDResources(ress); + return 1; + } + if (!prefs->testMode) { + FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); + AIO_WritePool_setFile(ress.writeCtx, dstFile); + } + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { + status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx))) + EXM_THROW(72, "Write error : %s : cannot properly close output file", + strerror(errno)); + } else { + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); + + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */ + const char* const srcFileName = srcNamesTable[fCtx->currFileIdx]; + const char* dstFileName = NULL; + if (outMirroredRootDirName) { + char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); + if (validMirroredDirName) { + dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName); + free(validMirroredDirName); + } else { + DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName); + } + } else { + dstFileName = FIO_determineDstName(srcFileName, outDirName); + } + if (dstFileName == NULL) { error=1; continue; } + status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + if (outDirName) + FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal); + } + + if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { + DISPLAY_PROGRESS("\r%79s\r", ""); + DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n", + fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput); + } + + FIO_freeDResources(ress); + return error; +} + +/* ************************************************************************** + * .zst file info (--list command) + ***************************************************************************/ + +typedef struct { + U64 decompressedSize; + U64 compressedSize; + U64 windowSize; + int numActualFrames; + int numSkippableFrames; + int decompUnavailable; + int usesCheck; + BYTE checksum[4]; + U32 nbFiles; + unsigned dictID; +} fileInfo_t; + +typedef enum { + info_success=0, + info_frame_error=1, + info_not_zstd=2, + info_file_error=3, + info_truncated_input=4 +} InfoError; + +#define ERROR_IF(c,n,...) { \ + if (c) { \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + return n; \ + } \ +} + +static InfoError +FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) +{ + /* begin analyzing frame */ + for ( ; ; ) { + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); + if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) { + if ( feof(srcFile) + && (numBytesRead == 0) + && (info->compressedSize > 0) + && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { + unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile); + unsigned long long file_size = (unsigned long long) info->compressedSize; + ERROR_IF(file_position != file_size, info_truncated_input, + "Error: seeked to position %llu, which is beyond file size of %llu\n", + file_position, + file_size); + break; /* correct end of file => success */ + } + ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame"); + ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames"); + } + { U32 const magicNumber = MEM_readLE32(headerBuffer); + /* Zstandard frame */ + if (magicNumber == ZSTD_MAGICNUMBER) { + ZSTD_frameHeader header; + U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); + if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR + || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) { + info->decompUnavailable = 1; + } else { + info->decompressedSize += frameContentSize; + } + ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0, + info_frame_error, "Error: could not decode frame header"); + if (info->dictID != 0 && info->dictID != header.dictID) { + DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead"); + info->dictID = 0; + } else { + info->dictID = header.dictID; + } + info->windowSize = header.windowSize; + /* move to the end of the frame header */ + { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); + ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size"); + ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0, + info_frame_error, "Error: could not move to end of frame header"); + } + + /* skip all blocks in the frame */ + { int lastBlock = 0; + do { + BYTE blockHeaderBuffer[3]; + ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3, + info_frame_error, "Error while reading block header"); + { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); + U32 const blockTypeID = (blockHeader >> 1) & 3; + U32 const isRLE = (blockTypeID == 1); + U32 const isWrongBlock = (blockTypeID == 3); + long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); + ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type"); + lastBlock = blockHeader & 1; + ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0, + info_frame_error, "Error: could not skip to end of block"); + } + } while (lastBlock != 1); + } + + /* check if checksum is used */ + { BYTE const frameHeaderDescriptor = headerBuffer[4]; + int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; + if (contentChecksumFlag) { + info->usesCheck = 1; + ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4, + info_frame_error, "Error: could not read checksum"); + } } + info->numActualFrames++; + } + /* Skippable frame */ + else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + U32 const frameSize = MEM_readLE32(headerBuffer + 4); + long const seek = (long)(8 + frameSize - numBytesRead); + ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0, + info_frame_error, "Error: could not find end of skippable frame"); + info->numSkippableFrames++; + } + /* unknown content */ + else { + return info_not_zstd; + } + } /* magic number analysis */ + } /* end analyzing frames */ + return info_success; +} + + +static InfoError +getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName) +{ + InfoError status; + stat_t srcFileStat; + FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat); + ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName); + + info->compressedSize = UTIL_getFileSizeStat(&srcFileStat); + status = FIO_analyzeFrames(info, srcFile); + + fclose(srcFile); + info->nbFiles = 1; + return status; +} + + +/** getFileInfo() : + * Reads information from file, stores in *info + * @return : InfoError status + */ +static InfoError +getFileInfo(fileInfo_t* info, const char* srcFileName) +{ + ERROR_IF(!UTIL_isRegularFile(srcFileName), + info_file_error, "Error : %s is not a file", srcFileName); + return getFileInfo_fileConfirmed(info, srcFileName); +} + + +static void +displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) +{ + UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize); + UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize); + UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize); + double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize; + const char* const checkString = (info->usesCheck ? "XXH64" : "None"); + if (displayLevel <= 2) { + if (!info->decompUnavailable) { + DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n", + info->numSkippableFrames + info->numActualFrames, + info->numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, + ratio, checkString, inFileName); + } else { + DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n", + info->numSkippableFrames + info->numActualFrames, + info->numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + checkString, inFileName); + } + } else { + DISPLAYOUT("%s \n", inFileName); + DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); + if (info->numSkippableFrames) + DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); + DISPLAYOUT("DictID: %u\n", info->dictID); + DISPLAYOUT("Window Size: %.*f%s (%llu B)\n", + window_hrs.precision, window_hrs.value, window_hrs.suffix, + (unsigned long long)info->windowSize); + DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n", + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + (unsigned long long)info->compressedSize); + if (!info->decompUnavailable) { + DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n", + decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, + (unsigned long long)info->decompressedSize); + DISPLAYOUT("Ratio: %.4f\n", ratio); + } + + if (info->usesCheck && info->numActualFrames == 1) { + DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString, + info->checksum[3], info->checksum[2], + info->checksum[1], info->checksum[0] + ); + } else { + DISPLAYOUT("Check: %s\n", checkString); + } + + DISPLAYOUT("\n"); + } +} + +static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) +{ + fileInfo_t total; + memset(&total, 0, sizeof(total)); + total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; + total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; + total.compressedSize = fi1.compressedSize + fi2.compressedSize; + total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; + total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; + total.usesCheck = fi1.usesCheck & fi2.usesCheck; + total.nbFiles = fi1.nbFiles + fi2.nbFiles; + return total; +} + +static int +FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel) +{ + fileInfo_t info; + memset(&info, 0, sizeof(info)); + { InfoError const error = getFileInfo(&info, inFileName); + switch (error) { + case info_frame_error: + /* display error, but provide output */ + DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName); + break; + case info_not_zstd: + DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName); + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + case info_file_error: + /* error occurred while opening the file */ + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + case info_truncated_input: + DISPLAYOUT("File \"%s\" is truncated \n", inFileName); + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + case info_success: + default: + break; + } + + displayInfo(inFileName, &info, displayLevel); + *total = FIO_addFInfo(*total, info); + assert(error == info_success || error == info_frame_error); + return (int)error; + } +} + +int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel) +{ + /* ensure no specified input is stdin (needs fseek() capability) */ + { unsigned u; + for (u=0; u 1 && displayLevel <= 2) { /* display total */ + UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize); + UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize); + double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize; + const char* const checkString = (total.usesCheck ? "XXH64" : ""); + DISPLAYOUT("----------------------------------------------------------------- \n"); + if (total.decompUnavailable) { + DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n", + total.numSkippableFrames + total.numActualFrames, + total.numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + checkString, (unsigned)total.nbFiles); + } else { + DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n", + total.numSkippableFrames + total.numActualFrames, + total.numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, + ratio, checkString, (unsigned)total.nbFiles); + } } + return error; + } +} + + +#endif /* #ifndef ZSTD_NODECOMPRESS */