| 1 | /* |
| 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * This source code is licensed under both the BSD-style license (found in the |
| 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 7 | * in the COPYING file in the root directory of this source tree). |
| 8 | */ |
| 9 | #include "Options.h" |
| 10 | #include "util.h" |
| 11 | #include "utils/ScopeGuard.h" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <cassert> |
| 15 | #include <cstdio> |
| 16 | #include <cstring> |
| 17 | #include <iterator> |
| 18 | #include <thread> |
| 19 | #include <vector> |
| 20 | |
| 21 | |
| 22 | namespace pzstd { |
| 23 | |
| 24 | namespace { |
| 25 | unsigned defaultNumThreads() { |
| 26 | #ifdef PZSTD_NUM_THREADS |
| 27 | return PZSTD_NUM_THREADS; |
| 28 | #else |
| 29 | return std::thread::hardware_concurrency(); |
| 30 | #endif |
| 31 | } |
| 32 | |
| 33 | unsigned parseUnsigned(const char **arg) { |
| 34 | unsigned result = 0; |
| 35 | while (**arg >= '0' && **arg <= '9') { |
| 36 | result *= 10; |
| 37 | result += **arg - '0'; |
| 38 | ++(*arg); |
| 39 | } |
| 40 | return result; |
| 41 | } |
| 42 | |
| 43 | const char *getArgument(const char *options, const char **argv, int &i, |
| 44 | int argc) { |
| 45 | if (options[1] != 0) { |
| 46 | return options + 1; |
| 47 | } |
| 48 | ++i; |
| 49 | if (i == argc) { |
| 50 | std::fprintf(stderr, "Option -%c requires an argument, but none provided\n", |
| 51 | *options); |
| 52 | return nullptr; |
| 53 | } |
| 54 | return argv[i]; |
| 55 | } |
| 56 | |
| 57 | const std::string kZstdExtension = ".zst"; |
| 58 | constexpr char kStdIn[] = "-"; |
| 59 | constexpr char kStdOut[] = "-"; |
| 60 | constexpr unsigned kDefaultCompressionLevel = 3; |
| 61 | constexpr unsigned kMaxNonUltraCompressionLevel = 19; |
| 62 | |
| 63 | #ifdef _WIN32 |
| 64 | const char nullOutput[] = "nul"; |
| 65 | #else |
| 66 | const char nullOutput[] = "/dev/null"; |
| 67 | #endif |
| 68 | |
| 69 | void notSupported(const char *option) { |
| 70 | std::fprintf(stderr, "Operation not supported: %s\n", option); |
| 71 | } |
| 72 | |
| 73 | void usage() { |
| 74 | std::fprintf(stderr, "Usage:\n"); |
| 75 | std::fprintf(stderr, " pzstd [args] [FILE(s)]\n"); |
| 76 | std::fprintf(stderr, "Parallel ZSTD options:\n"); |
| 77 | std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n"); |
| 78 | |
| 79 | std::fprintf(stderr, "ZSTD options:\n"); |
| 80 | std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel); |
| 81 | std::fprintf(stderr, " -d, --decompress : decompression\n"); |
| 82 | std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n"); |
| 83 | std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n"); |
| 84 | std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n"); |
| 85 | std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n"); |
| 86 | std::fprintf(stderr, " -h, --help : display help and exit\n"); |
| 87 | std::fprintf(stderr, " -V, --version : display version number and exit\n"); |
| 88 | std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n"); |
| 89 | std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n"); |
| 90 | std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n"); |
| 91 | #ifdef UTIL_HAS_CREATEFILELIST |
| 92 | std::fprintf(stderr, " -r : operate recursively on directories\n"); |
| 93 | #endif |
| 94 | std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel()); |
| 95 | std::fprintf(stderr, " -C, --check : integrity check (default)\n"); |
| 96 | std::fprintf(stderr, " --no-check : no integrity check\n"); |
| 97 | std::fprintf(stderr, " -t, --test : test compressed file integrity\n"); |
| 98 | std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n"); |
| 99 | } |
| 100 | } // anonymous namespace |
| 101 | |
| 102 | Options::Options() |
| 103 | : numThreads(defaultNumThreads()), maxWindowLog(23), |
| 104 | compressionLevel(kDefaultCompressionLevel), decompress(false), |
| 105 | overwrite(false), keepSource(true), writeMode(WriteMode::Auto), |
| 106 | checksum(true), verbosity(2) {} |
| 107 | |
| 108 | Options::Status Options::parse(int argc, const char **argv) { |
| 109 | bool test = false; |
| 110 | bool recursive = false; |
| 111 | bool ultra = false; |
| 112 | bool forceStdout = false; |
| 113 | bool followLinks = false; |
| 114 | // Local copy of input files, which are pointers into argv. |
| 115 | std::vector<const char *> localInputFiles; |
| 116 | for (int i = 1; i < argc; ++i) { |
| 117 | const char *arg = argv[i]; |
| 118 | // Protect against empty arguments |
| 119 | if (arg[0] == 0) { |
| 120 | continue; |
| 121 | } |
| 122 | // Everything after "--" is an input file |
| 123 | if (!std::strcmp(arg, "--")) { |
| 124 | ++i; |
| 125 | std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles)); |
| 126 | break; |
| 127 | } |
| 128 | // Long arguments that don't have a short option |
| 129 | { |
| 130 | bool isLongOption = true; |
| 131 | if (!std::strcmp(arg, "--rm")) { |
| 132 | keepSource = false; |
| 133 | } else if (!std::strcmp(arg, "--ultra")) { |
| 134 | ultra = true; |
| 135 | maxWindowLog = 0; |
| 136 | } else if (!std::strcmp(arg, "--no-check")) { |
| 137 | checksum = false; |
| 138 | } else if (!std::strcmp(arg, "--sparse")) { |
| 139 | writeMode = WriteMode::Sparse; |
| 140 | notSupported("Sparse mode"); |
| 141 | return Status::Failure; |
| 142 | } else if (!std::strcmp(arg, "--no-sparse")) { |
| 143 | writeMode = WriteMode::Regular; |
| 144 | notSupported("Sparse mode"); |
| 145 | return Status::Failure; |
| 146 | } else if (!std::strcmp(arg, "--dictID")) { |
| 147 | notSupported(arg); |
| 148 | return Status::Failure; |
| 149 | } else if (!std::strcmp(arg, "--no-dictID")) { |
| 150 | notSupported(arg); |
| 151 | return Status::Failure; |
| 152 | } else { |
| 153 | isLongOption = false; |
| 154 | } |
| 155 | if (isLongOption) { |
| 156 | continue; |
| 157 | } |
| 158 | } |
| 159 | // Arguments with a short option simply set their short option. |
| 160 | const char *options = nullptr; |
| 161 | if (!std::strcmp(arg, "--processes")) { |
| 162 | options = "p"; |
| 163 | } else if (!std::strcmp(arg, "--version")) { |
| 164 | options = "V"; |
| 165 | } else if (!std::strcmp(arg, "--help")) { |
| 166 | options = "h"; |
| 167 | } else if (!std::strcmp(arg, "--decompress")) { |
| 168 | options = "d"; |
| 169 | } else if (!std::strcmp(arg, "--force")) { |
| 170 | options = "f"; |
| 171 | } else if (!std::strcmp(arg, "--stdout")) { |
| 172 | options = "c"; |
| 173 | } else if (!std::strcmp(arg, "--keep")) { |
| 174 | options = "k"; |
| 175 | } else if (!std::strcmp(arg, "--verbose")) { |
| 176 | options = "v"; |
| 177 | } else if (!std::strcmp(arg, "--quiet")) { |
| 178 | options = "q"; |
| 179 | } else if (!std::strcmp(arg, "--check")) { |
| 180 | options = "C"; |
| 181 | } else if (!std::strcmp(arg, "--test")) { |
| 182 | options = "t"; |
| 183 | } else if (arg[0] == '-' && arg[1] != 0) { |
| 184 | options = arg + 1; |
| 185 | } else { |
| 186 | localInputFiles.emplace_back(arg); |
| 187 | continue; |
| 188 | } |
| 189 | assert(options != nullptr); |
| 190 | |
| 191 | bool finished = false; |
| 192 | while (!finished && *options != 0) { |
| 193 | // Parse the compression level |
| 194 | if (*options >= '0' && *options <= '9') { |
| 195 | compressionLevel = parseUnsigned(&options); |
| 196 | continue; |
| 197 | } |
| 198 | |
| 199 | switch (*options) { |
| 200 | case 'h': |
| 201 | case 'H': |
| 202 | usage(); |
| 203 | return Status::Message; |
| 204 | case 'V': |
| 205 | std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING); |
| 206 | return Status::Message; |
| 207 | case 'p': { |
| 208 | finished = true; |
| 209 | const char *optionArgument = getArgument(options, argv, i, argc); |
| 210 | if (optionArgument == nullptr) { |
| 211 | return Status::Failure; |
| 212 | } |
| 213 | if (*optionArgument < '0' || *optionArgument > '9') { |
| 214 | std::fprintf(stderr, "Option -p expects a number, but %s provided\n", |
| 215 | optionArgument); |
| 216 | return Status::Failure; |
| 217 | } |
| 218 | numThreads = parseUnsigned(&optionArgument); |
| 219 | if (*optionArgument != 0) { |
| 220 | std::fprintf(stderr, |
| 221 | "Option -p expects a number, but %u%s provided\n", |
| 222 | numThreads, optionArgument); |
| 223 | return Status::Failure; |
| 224 | } |
| 225 | break; |
| 226 | } |
| 227 | case 'o': { |
| 228 | finished = true; |
| 229 | const char *optionArgument = getArgument(options, argv, i, argc); |
| 230 | if (optionArgument == nullptr) { |
| 231 | return Status::Failure; |
| 232 | } |
| 233 | outputFile = optionArgument; |
| 234 | break; |
| 235 | } |
| 236 | case 'C': |
| 237 | checksum = true; |
| 238 | break; |
| 239 | case 'k': |
| 240 | keepSource = true; |
| 241 | break; |
| 242 | case 'd': |
| 243 | decompress = true; |
| 244 | break; |
| 245 | case 'f': |
| 246 | overwrite = true; |
| 247 | forceStdout = true; |
| 248 | followLinks = true; |
| 249 | break; |
| 250 | case 't': |
| 251 | test = true; |
| 252 | decompress = true; |
| 253 | break; |
| 254 | #ifdef UTIL_HAS_CREATEFILELIST |
| 255 | case 'r': |
| 256 | recursive = true; |
| 257 | break; |
| 258 | #endif |
| 259 | case 'c': |
| 260 | outputFile = kStdOut; |
| 261 | forceStdout = true; |
| 262 | break; |
| 263 | case 'v': |
| 264 | ++verbosity; |
| 265 | break; |
| 266 | case 'q': |
| 267 | --verbosity; |
| 268 | // Ignore them for now |
| 269 | break; |
| 270 | // Unsupported options from Zstd |
| 271 | case 'D': |
| 272 | case 's': |
| 273 | notSupported("Zstd dictionaries."); |
| 274 | return Status::Failure; |
| 275 | case 'b': |
| 276 | case 'e': |
| 277 | case 'i': |
| 278 | case 'B': |
| 279 | notSupported("Zstd benchmarking options."); |
| 280 | return Status::Failure; |
| 281 | default: |
| 282 | std::fprintf(stderr, "Invalid argument: %s\n", arg); |
| 283 | return Status::Failure; |
| 284 | } |
| 285 | if (!finished) { |
| 286 | ++options; |
| 287 | } |
| 288 | } // while (*options != 0); |
| 289 | } // for (int i = 1; i < argc; ++i); |
| 290 | |
| 291 | // Set options for test mode |
| 292 | if (test) { |
| 293 | outputFile = nullOutput; |
| 294 | keepSource = true; |
| 295 | } |
| 296 | |
| 297 | // Input file defaults to standard input if not provided. |
| 298 | if (localInputFiles.empty()) { |
| 299 | localInputFiles.emplace_back(kStdIn); |
| 300 | } |
| 301 | |
| 302 | // Check validity of input files |
| 303 | if (localInputFiles.size() > 1) { |
| 304 | const auto it = std::find(localInputFiles.begin(), localInputFiles.end(), |
| 305 | std::string{kStdIn}); |
| 306 | if (it != localInputFiles.end()) { |
| 307 | std::fprintf( |
| 308 | stderr, |
| 309 | "Cannot specify standard input when handling multiple files\n"); |
| 310 | return Status::Failure; |
| 311 | } |
| 312 | } |
| 313 | if (localInputFiles.size() > 1 || recursive) { |
| 314 | if (!outputFile.empty() && outputFile != nullOutput) { |
| 315 | std::fprintf( |
| 316 | stderr, |
| 317 | "Cannot specify an output file when handling multiple inputs\n"); |
| 318 | return Status::Failure; |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | g_utilDisplayLevel = verbosity; |
| 323 | // Remove local input files that are symbolic links |
| 324 | if (!followLinks) { |
| 325 | std::remove_if(localInputFiles.begin(), localInputFiles.end(), |
| 326 | [&](const char *path) { |
| 327 | bool isLink = UTIL_isLink(path); |
| 328 | if (isLink && verbosity >= 2) { |
| 329 | std::fprintf( |
| 330 | stderr, |
| 331 | "Warning : %s is symbolic link, ignoring\n", |
| 332 | path); |
| 333 | } |
| 334 | return isLink; |
| 335 | }); |
| 336 | } |
| 337 | |
| 338 | // Translate input files/directories into files to (de)compress |
| 339 | if (recursive) { |
| 340 | FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks); |
| 341 | if (files == nullptr) { |
| 342 | std::fprintf(stderr, "Error traversing directories\n"); |
| 343 | return Status::Failure; |
| 344 | } |
| 345 | auto guard = |
| 346 | makeScopeGuard([&] { UTIL_freeFileNamesTable(files); }); |
| 347 | if (files->tableSize == 0) { |
| 348 | std::fprintf(stderr, "No files found\n"); |
| 349 | return Status::Failure; |
| 350 | } |
| 351 | inputFiles.resize(files->tableSize); |
| 352 | std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin()); |
| 353 | } else { |
| 354 | inputFiles.resize(localInputFiles.size()); |
| 355 | std::copy(localInputFiles.begin(), localInputFiles.end(), |
| 356 | inputFiles.begin()); |
| 357 | } |
| 358 | localInputFiles.clear(); |
| 359 | assert(!inputFiles.empty()); |
| 360 | |
| 361 | // If reading from standard input, default to standard output |
| 362 | if (inputFiles[0] == kStdIn && outputFile.empty()) { |
| 363 | assert(inputFiles.size() == 1); |
| 364 | outputFile = "-"; |
| 365 | } |
| 366 | |
| 367 | if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) { |
| 368 | assert(inputFiles.size() == 1); |
| 369 | std::fprintf(stderr, "Cannot read input from interactive console\n"); |
| 370 | return Status::Failure; |
| 371 | } |
| 372 | if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) { |
| 373 | std::fprintf(stderr, "Will not write to console stdout unless -c or -f is " |
| 374 | "specified and decompressing\n"); |
| 375 | return Status::Failure; |
| 376 | } |
| 377 | |
| 378 | // Check compression level |
| 379 | { |
| 380 | unsigned maxCLevel = |
| 381 | ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel; |
| 382 | if (compressionLevel > maxCLevel || compressionLevel == 0) { |
| 383 | std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel); |
| 384 | return Status::Failure; |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | // Check that numThreads is set |
| 389 | if (numThreads == 0) { |
| 390 | std::fprintf(stderr, "Invalid arguments: # of threads not specified " |
| 391 | "and unable to determine hardware concurrency.\n"); |
| 392 | return Status::Failure; |
| 393 | } |
| 394 | |
| 395 | // Modify verbosity |
| 396 | // If we are piping input and output, turn off interaction |
| 397 | if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) { |
| 398 | verbosity = 1; |
| 399 | } |
| 400 | // If we are in multi-file mode, turn off interaction |
| 401 | if (inputFiles.size() > 1 && verbosity == 2) { |
| 402 | verbosity = 1; |
| 403 | } |
| 404 | |
| 405 | return Status::Success; |
| 406 | } |
| 407 | |
| 408 | std::string Options::getOutputFile(const std::string &inputFile) const { |
| 409 | if (!outputFile.empty()) { |
| 410 | return outputFile; |
| 411 | } |
| 412 | // Attempt to add/remove zstd extension from the input file |
| 413 | if (decompress) { |
| 414 | int stemSize = inputFile.size() - kZstdExtension.size(); |
| 415 | if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) { |
| 416 | return inputFile.substr(0, stemSize); |
| 417 | } else { |
| 418 | return ""; |
| 419 | } |
| 420 | } else { |
| 421 | return inputFile + kZstdExtension; |
| 422 | } |
| 423 | } |
| 424 | } |