648db22b |
1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | */ |
9 | #include "Options.h" |
10 | #include "util.h" |
11 | #include "utils/ScopeGuard.h" |
12 | |
13 | #include <algorithm> |
14 | #include <cassert> |
15 | #include <cstdio> |
16 | #include <cstring> |
17 | #include <iterator> |
18 | #include <thread> |
19 | #include <vector> |
20 | |
21 | |
22 | namespace pzstd { |
23 | |
24 | namespace { |
25 | unsigned defaultNumThreads() { |
26 | #ifdef PZSTD_NUM_THREADS |
27 | return PZSTD_NUM_THREADS; |
28 | #else |
29 | return std::thread::hardware_concurrency(); |
30 | #endif |
31 | } |
32 | |
33 | unsigned parseUnsigned(const char **arg) { |
34 | unsigned result = 0; |
35 | while (**arg >= '0' && **arg <= '9') { |
36 | result *= 10; |
37 | result += **arg - '0'; |
38 | ++(*arg); |
39 | } |
40 | return result; |
41 | } |
42 | |
43 | const char *getArgument(const char *options, const char **argv, int &i, |
44 | int argc) { |
45 | if (options[1] != 0) { |
46 | return options + 1; |
47 | } |
48 | ++i; |
49 | if (i == argc) { |
50 | std::fprintf(stderr, "Option -%c requires an argument, but none provided\n", |
51 | *options); |
52 | return nullptr; |
53 | } |
54 | return argv[i]; |
55 | } |
56 | |
57 | const std::string kZstdExtension = ".zst"; |
58 | constexpr char kStdIn[] = "-"; |
59 | constexpr char kStdOut[] = "-"; |
60 | constexpr unsigned kDefaultCompressionLevel = 3; |
61 | constexpr unsigned kMaxNonUltraCompressionLevel = 19; |
62 | |
63 | #ifdef _WIN32 |
64 | const char nullOutput[] = "nul"; |
65 | #else |
66 | const char nullOutput[] = "/dev/null"; |
67 | #endif |
68 | |
69 | void notSupported(const char *option) { |
70 | std::fprintf(stderr, "Operation not supported: %s\n", option); |
71 | } |
72 | |
73 | void usage() { |
74 | std::fprintf(stderr, "Usage:\n"); |
75 | std::fprintf(stderr, " pzstd [args] [FILE(s)]\n"); |
76 | std::fprintf(stderr, "Parallel ZSTD options:\n"); |
77 | std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n"); |
78 | |
79 | std::fprintf(stderr, "ZSTD options:\n"); |
80 | std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel); |
81 | std::fprintf(stderr, " -d, --decompress : decompression\n"); |
82 | std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n"); |
83 | std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n"); |
84 | std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n"); |
85 | std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n"); |
86 | std::fprintf(stderr, " -h, --help : display help and exit\n"); |
87 | std::fprintf(stderr, " -V, --version : display version number and exit\n"); |
88 | std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n"); |
89 | std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n"); |
90 | std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n"); |
91 | #ifdef UTIL_HAS_CREATEFILELIST |
92 | std::fprintf(stderr, " -r : operate recursively on directories\n"); |
93 | #endif |
94 | std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel()); |
95 | std::fprintf(stderr, " -C, --check : integrity check (default)\n"); |
96 | std::fprintf(stderr, " --no-check : no integrity check\n"); |
97 | std::fprintf(stderr, " -t, --test : test compressed file integrity\n"); |
98 | std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n"); |
99 | } |
100 | } // anonymous namespace |
101 | |
102 | Options::Options() |
103 | : numThreads(defaultNumThreads()), maxWindowLog(23), |
104 | compressionLevel(kDefaultCompressionLevel), decompress(false), |
105 | overwrite(false), keepSource(true), writeMode(WriteMode::Auto), |
106 | checksum(true), verbosity(2) {} |
107 | |
108 | Options::Status Options::parse(int argc, const char **argv) { |
109 | bool test = false; |
110 | bool recursive = false; |
111 | bool ultra = false; |
112 | bool forceStdout = false; |
113 | bool followLinks = false; |
114 | // Local copy of input files, which are pointers into argv. |
115 | std::vector<const char *> localInputFiles; |
116 | for (int i = 1; i < argc; ++i) { |
117 | const char *arg = argv[i]; |
118 | // Protect against empty arguments |
119 | if (arg[0] == 0) { |
120 | continue; |
121 | } |
122 | // Everything after "--" is an input file |
123 | if (!std::strcmp(arg, "--")) { |
124 | ++i; |
125 | std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles)); |
126 | break; |
127 | } |
128 | // Long arguments that don't have a short option |
129 | { |
130 | bool isLongOption = true; |
131 | if (!std::strcmp(arg, "--rm")) { |
132 | keepSource = false; |
133 | } else if (!std::strcmp(arg, "--ultra")) { |
134 | ultra = true; |
135 | maxWindowLog = 0; |
136 | } else if (!std::strcmp(arg, "--no-check")) { |
137 | checksum = false; |
138 | } else if (!std::strcmp(arg, "--sparse")) { |
139 | writeMode = WriteMode::Sparse; |
140 | notSupported("Sparse mode"); |
141 | return Status::Failure; |
142 | } else if (!std::strcmp(arg, "--no-sparse")) { |
143 | writeMode = WriteMode::Regular; |
144 | notSupported("Sparse mode"); |
145 | return Status::Failure; |
146 | } else if (!std::strcmp(arg, "--dictID")) { |
147 | notSupported(arg); |
148 | return Status::Failure; |
149 | } else if (!std::strcmp(arg, "--no-dictID")) { |
150 | notSupported(arg); |
151 | return Status::Failure; |
152 | } else { |
153 | isLongOption = false; |
154 | } |
155 | if (isLongOption) { |
156 | continue; |
157 | } |
158 | } |
159 | // Arguments with a short option simply set their short option. |
160 | const char *options = nullptr; |
161 | if (!std::strcmp(arg, "--processes")) { |
162 | options = "p"; |
163 | } else if (!std::strcmp(arg, "--version")) { |
164 | options = "V"; |
165 | } else if (!std::strcmp(arg, "--help")) { |
166 | options = "h"; |
167 | } else if (!std::strcmp(arg, "--decompress")) { |
168 | options = "d"; |
169 | } else if (!std::strcmp(arg, "--force")) { |
170 | options = "f"; |
171 | } else if (!std::strcmp(arg, "--stdout")) { |
172 | options = "c"; |
173 | } else if (!std::strcmp(arg, "--keep")) { |
174 | options = "k"; |
175 | } else if (!std::strcmp(arg, "--verbose")) { |
176 | options = "v"; |
177 | } else if (!std::strcmp(arg, "--quiet")) { |
178 | options = "q"; |
179 | } else if (!std::strcmp(arg, "--check")) { |
180 | options = "C"; |
181 | } else if (!std::strcmp(arg, "--test")) { |
182 | options = "t"; |
183 | } else if (arg[0] == '-' && arg[1] != 0) { |
184 | options = arg + 1; |
185 | } else { |
186 | localInputFiles.emplace_back(arg); |
187 | continue; |
188 | } |
189 | assert(options != nullptr); |
190 | |
191 | bool finished = false; |
192 | while (!finished && *options != 0) { |
193 | // Parse the compression level |
194 | if (*options >= '0' && *options <= '9') { |
195 | compressionLevel = parseUnsigned(&options); |
196 | continue; |
197 | } |
198 | |
199 | switch (*options) { |
200 | case 'h': |
201 | case 'H': |
202 | usage(); |
203 | return Status::Message; |
204 | case 'V': |
205 | std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING); |
206 | return Status::Message; |
207 | case 'p': { |
208 | finished = true; |
209 | const char *optionArgument = getArgument(options, argv, i, argc); |
210 | if (optionArgument == nullptr) { |
211 | return Status::Failure; |
212 | } |
213 | if (*optionArgument < '0' || *optionArgument > '9') { |
214 | std::fprintf(stderr, "Option -p expects a number, but %s provided\n", |
215 | optionArgument); |
216 | return Status::Failure; |
217 | } |
218 | numThreads = parseUnsigned(&optionArgument); |
219 | if (*optionArgument != 0) { |
220 | std::fprintf(stderr, |
221 | "Option -p expects a number, but %u%s provided\n", |
222 | numThreads, optionArgument); |
223 | return Status::Failure; |
224 | } |
225 | break; |
226 | } |
227 | case 'o': { |
228 | finished = true; |
229 | const char *optionArgument = getArgument(options, argv, i, argc); |
230 | if (optionArgument == nullptr) { |
231 | return Status::Failure; |
232 | } |
233 | outputFile = optionArgument; |
234 | break; |
235 | } |
236 | case 'C': |
237 | checksum = true; |
238 | break; |
239 | case 'k': |
240 | keepSource = true; |
241 | break; |
242 | case 'd': |
243 | decompress = true; |
244 | break; |
245 | case 'f': |
246 | overwrite = true; |
247 | forceStdout = true; |
248 | followLinks = true; |
249 | break; |
250 | case 't': |
251 | test = true; |
252 | decompress = true; |
253 | break; |
254 | #ifdef UTIL_HAS_CREATEFILELIST |
255 | case 'r': |
256 | recursive = true; |
257 | break; |
258 | #endif |
259 | case 'c': |
260 | outputFile = kStdOut; |
261 | forceStdout = true; |
262 | break; |
263 | case 'v': |
264 | ++verbosity; |
265 | break; |
266 | case 'q': |
267 | --verbosity; |
268 | // Ignore them for now |
269 | break; |
270 | // Unsupported options from Zstd |
271 | case 'D': |
272 | case 's': |
273 | notSupported("Zstd dictionaries."); |
274 | return Status::Failure; |
275 | case 'b': |
276 | case 'e': |
277 | case 'i': |
278 | case 'B': |
279 | notSupported("Zstd benchmarking options."); |
280 | return Status::Failure; |
281 | default: |
282 | std::fprintf(stderr, "Invalid argument: %s\n", arg); |
283 | return Status::Failure; |
284 | } |
285 | if (!finished) { |
286 | ++options; |
287 | } |
288 | } // while (*options != 0); |
289 | } // for (int i = 1; i < argc; ++i); |
290 | |
291 | // Set options for test mode |
292 | if (test) { |
293 | outputFile = nullOutput; |
294 | keepSource = true; |
295 | } |
296 | |
297 | // Input file defaults to standard input if not provided. |
298 | if (localInputFiles.empty()) { |
299 | localInputFiles.emplace_back(kStdIn); |
300 | } |
301 | |
302 | // Check validity of input files |
303 | if (localInputFiles.size() > 1) { |
304 | const auto it = std::find(localInputFiles.begin(), localInputFiles.end(), |
305 | std::string{kStdIn}); |
306 | if (it != localInputFiles.end()) { |
307 | std::fprintf( |
308 | stderr, |
309 | "Cannot specify standard input when handling multiple files\n"); |
310 | return Status::Failure; |
311 | } |
312 | } |
313 | if (localInputFiles.size() > 1 || recursive) { |
314 | if (!outputFile.empty() && outputFile != nullOutput) { |
315 | std::fprintf( |
316 | stderr, |
317 | "Cannot specify an output file when handling multiple inputs\n"); |
318 | return Status::Failure; |
319 | } |
320 | } |
321 | |
322 | g_utilDisplayLevel = verbosity; |
323 | // Remove local input files that are symbolic links |
324 | if (!followLinks) { |
325 | std::remove_if(localInputFiles.begin(), localInputFiles.end(), |
326 | [&](const char *path) { |
327 | bool isLink = UTIL_isLink(path); |
328 | if (isLink && verbosity >= 2) { |
329 | std::fprintf( |
330 | stderr, |
331 | "Warning : %s is symbolic link, ignoring\n", |
332 | path); |
333 | } |
334 | return isLink; |
335 | }); |
336 | } |
337 | |
338 | // Translate input files/directories into files to (de)compress |
339 | if (recursive) { |
340 | FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks); |
341 | if (files == nullptr) { |
342 | std::fprintf(stderr, "Error traversing directories\n"); |
343 | return Status::Failure; |
344 | } |
345 | auto guard = |
346 | makeScopeGuard([&] { UTIL_freeFileNamesTable(files); }); |
347 | if (files->tableSize == 0) { |
348 | std::fprintf(stderr, "No files found\n"); |
349 | return Status::Failure; |
350 | } |
351 | inputFiles.resize(files->tableSize); |
352 | std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin()); |
353 | } else { |
354 | inputFiles.resize(localInputFiles.size()); |
355 | std::copy(localInputFiles.begin(), localInputFiles.end(), |
356 | inputFiles.begin()); |
357 | } |
358 | localInputFiles.clear(); |
359 | assert(!inputFiles.empty()); |
360 | |
361 | // If reading from standard input, default to standard output |
362 | if (inputFiles[0] == kStdIn && outputFile.empty()) { |
363 | assert(inputFiles.size() == 1); |
364 | outputFile = "-"; |
365 | } |
366 | |
367 | if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) { |
368 | assert(inputFiles.size() == 1); |
369 | std::fprintf(stderr, "Cannot read input from interactive console\n"); |
370 | return Status::Failure; |
371 | } |
372 | if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) { |
373 | std::fprintf(stderr, "Will not write to console stdout unless -c or -f is " |
374 | "specified and decompressing\n"); |
375 | return Status::Failure; |
376 | } |
377 | |
378 | // Check compression level |
379 | { |
380 | unsigned maxCLevel = |
381 | ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel; |
382 | if (compressionLevel > maxCLevel || compressionLevel == 0) { |
383 | std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel); |
384 | return Status::Failure; |
385 | } |
386 | } |
387 | |
388 | // Check that numThreads is set |
389 | if (numThreads == 0) { |
390 | std::fprintf(stderr, "Invalid arguments: # of threads not specified " |
391 | "and unable to determine hardware concurrency.\n"); |
392 | return Status::Failure; |
393 | } |
394 | |
395 | // Modify verbosity |
396 | // If we are piping input and output, turn off interaction |
397 | if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) { |
398 | verbosity = 1; |
399 | } |
400 | // If we are in multi-file mode, turn off interaction |
401 | if (inputFiles.size() > 1 && verbosity == 2) { |
402 | verbosity = 1; |
403 | } |
404 | |
405 | return Status::Success; |
406 | } |
407 | |
408 | std::string Options::getOutputFile(const std::string &inputFile) const { |
409 | if (!outputFile.empty()) { |
410 | return outputFile; |
411 | } |
412 | // Attempt to add/remove zstd extension from the input file |
413 | if (decompress) { |
414 | int stemSize = inputFile.size() - kZstdExtension.size(); |
415 | if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) { |
416 | return inputFile.substr(0, stemSize); |
417 | } else { |
418 | return ""; |
419 | } |
420 | } else { |
421 | return inputFile + kZstdExtension; |
422 | } |
423 | } |
424 | } |