1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Argument parsing
6 /// \note Filter-specific options parsing is in options.c.
8 // Author: Lasse Collin
10 // This file has been put into the public domain.
11 // You can do whatever you want with this file.
13 ///////////////////////////////////////////////////////////////////////////////
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
26 // We don't modify or free() this, but we need to assign it in some
27 // non-const pointers.
28 const char *stdin_filename = "(stdin)";
32 parse_real(args_info *args, int argc, char **argv)
35 OPT_SUBBLOCK = INT_MIN,
53 static const char short_opts[]
54 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
56 static const struct option long_opts[] = {
58 { "compress", no_argument, NULL, 'z' },
59 { "decompress", no_argument, NULL, 'd' },
60 { "uncompress", no_argument, NULL, 'd' },
61 { "test", no_argument, NULL, 't' },
62 { "list", no_argument, NULL, 'l' },
64 // Operation modifiers
65 { "keep", no_argument, NULL, 'k' },
66 { "force", no_argument, NULL, 'f' },
67 { "stdout", no_argument, NULL, 'c' },
68 { "to-stdout", no_argument, NULL, 'c' },
69 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
70 { "suffix", required_argument, NULL, 'S' },
71 // { "recursive", no_argument, NULL, 'r' }, // TODO
72 { "files", optional_argument, NULL, OPT_FILES },
73 { "files0", optional_argument, NULL, OPT_FILES0 },
75 // Basic compression settings
76 { "format", required_argument, NULL, 'F' },
77 { "check", required_argument, NULL, 'C' },
78 { "memory", required_argument, NULL, 'M' },
79 { "threads", required_argument, NULL, 'T' },
81 { "extreme", no_argument, NULL, 'e' },
82 { "fast", no_argument, NULL, '0' },
83 { "best", no_argument, NULL, '9' },
86 { "lzma1", optional_argument, NULL, OPT_LZMA1 },
87 { "lzma2", optional_argument, NULL, OPT_LZMA2 },
88 { "x86", optional_argument, NULL, OPT_X86 },
89 { "powerpc", optional_argument, NULL, OPT_POWERPC },
90 { "ia64", optional_argument, NULL, OPT_IA64 },
91 { "arm", optional_argument, NULL, OPT_ARM },
92 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB },
93 { "sparc", optional_argument, NULL, OPT_SPARC },
94 { "delta", optional_argument, NULL, OPT_DELTA },
95 { "subblock", optional_argument, NULL, OPT_SUBBLOCK },
98 { "quiet", no_argument, NULL, 'q' },
99 { "verbose", no_argument, NULL, 'v' },
100 { "no-warn", no_argument, NULL, 'Q' },
101 { "robot", no_argument, NULL, OPT_ROBOT },
102 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY },
103 { "help", no_argument, NULL, 'h' },
104 { "long-help", no_argument, NULL, 'H' },
105 { "version", no_argument, NULL, 'V' },
112 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
115 // Compression preset (also for decompression if --format=raw)
116 case '0': case '1': case '2': case '3': case '4':
117 case '5': case '6': case '7': case '8': case '9':
118 coder_set_preset(c - '0');
123 // Support specifying the limit as a percentage of
124 // installed physical RAM.
125 size_t len = strlen(optarg);
126 if (len > 0 && optarg[len - 1] == '%') {
127 optarg[len - 1] = '\0';
128 hardware_memlimit_set_percentage(
130 "memory%", optarg, 1, 100));
132 // On 32-bit systems, SIZE_MAX would make more
133 // sense than UINT64_MAX. But use UINT64_MAX
134 // still so that scripts that assume > 4 GiB
135 // values don't break.
136 hardware_memlimit_set(str_to_uint64(
150 hardware_threadlimit_set(str_to_uint64(
151 "threads", optarg, 0, UINT32_MAX));
156 // This doesn't return.
166 opt_mode = MODE_DECOMPRESS;
180 case OPT_INFO_MEMORY:
181 // This doesn't return.
186 // This doesn't return.
191 // This doesn't return.
196 opt_mode = MODE_LIST;
201 opt_keep_original = true;
206 message_verbosity_decrease();
214 opt_mode = MODE_TEST;
219 message_verbosity_increase();
226 // This is to make sure that floating point numbers
227 // always have a dot as decimal separator.
228 setlocale(LC_NUMERIC, "C");
232 opt_mode = MODE_COMPRESS;
238 coder_add_filter(LZMA_FILTER_SUBBLOCK,
239 options_subblock(optarg));
243 coder_add_filter(LZMA_FILTER_X86,
244 options_bcj(optarg));
248 coder_add_filter(LZMA_FILTER_POWERPC,
249 options_bcj(optarg));
253 coder_add_filter(LZMA_FILTER_IA64,
254 options_bcj(optarg));
258 coder_add_filter(LZMA_FILTER_ARM,
259 options_bcj(optarg));
263 coder_add_filter(LZMA_FILTER_ARMTHUMB,
264 options_bcj(optarg));
268 coder_add_filter(LZMA_FILTER_SPARC,
269 options_bcj(optarg));
273 coder_add_filter(LZMA_FILTER_DELTA,
274 options_delta(optarg));
278 coder_add_filter(LZMA_FILTER_LZMA1,
279 options_lzma(optarg));
283 coder_add_filter(LZMA_FILTER_LZMA2,
284 options_lzma(optarg));
291 // Just in case, support both "lzma" and "alone" since
292 // the latter was used for forward compatibility in
293 // LZMA Utils 4.32.x.
294 static const struct {
296 enum format_type format;
298 { "auto", FORMAT_AUTO },
300 { "lzma", FORMAT_LZMA },
301 { "alone", FORMAT_LZMA },
302 // { "gzip", FORMAT_GZIP },
303 // { "gz", FORMAT_GZIP },
304 { "raw", FORMAT_RAW },
308 while (strcmp(types[i].str, optarg) != 0)
309 if (++i == ARRAY_SIZE(types))
310 message_fatal(_("%s: Unknown file "
314 opt_format = types[i].format;
320 static const struct {
324 { "none", LZMA_CHECK_NONE },
325 { "crc32", LZMA_CHECK_CRC32 },
326 { "crc64", LZMA_CHECK_CRC64 },
327 { "sha256", LZMA_CHECK_SHA256 },
331 while (strcmp(types[i].str, optarg) != 0) {
332 if (++i == ARRAY_SIZE(types))
333 message_fatal(_("%s: Unsupported "
335 "check type"), optarg);
338 // Use a separate check in case we are using different
339 // liblzma than what was used to compile us.
340 if (!lzma_check_is_supported(types[i].check))
341 message_fatal(_("%s: Unsupported integrity "
342 "check type"), optarg);
344 coder_set_check(types[i].check);
353 args->files_delim = '\n';
358 if (args->files_name != NULL)
359 message_fatal(_("Only one file can be "
360 "specified with `--files'"
363 if (optarg == NULL) {
364 args->files_name = (char *)stdin_filename;
365 args->files_file = stdin;
367 args->files_name = optarg;
368 args->files_file = fopen(optarg,
369 c == OPT_FILES ? "r" : "rb");
370 if (args->files_file == NULL)
371 message_fatal("%s: %s", optarg,
379 tuklib_exit(E_ERROR, E_ERROR, false);
388 parse_environment(args_info *args, char *argv0)
390 char *env = getenv("XZ_OPT");
394 // We modify the string, so make a copy of it.
397 // Calculate the number of arguments in env. argc stats at one
398 // to include space for the program name.
400 bool prev_was_space = true;
401 for (size_t i = 0; env[i] != '\0'; ++i) {
402 // NOTE: Cast to unsigned char is needed so that correct
403 // value gets passed to isspace(), which expects
404 // unsigned char cast to int. Casting to int is done
405 // automatically due to integer promotion, but we need to
406 // force char to unsigned char manually. Otherwise 8-bit
407 // characters would get promoted to wrong value if
409 if (isspace((unsigned char)env[i])) {
410 prev_was_space = true;
411 } else if (prev_was_space) {
412 prev_was_space = false;
414 // Keep argc small enough to fit into a singed int
415 // and to keep it usable for memory allocation.
416 if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
417 message_fatal(_("The environment variable "
418 "XZ_OPT contains too many "
423 // Allocate memory to hold pointers to the arguments. Add one to get
424 // space for the terminating NULL (if some systems happen to need it).
425 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
429 // Go through the string again. Split the arguments using '\0'
430 // characters and add pointers to the resulting strings to argv.
432 prev_was_space = true;
433 for (size_t i = 0; env[i] != '\0'; ++i) {
434 if (isspace((unsigned char)env[i])) {
435 prev_was_space = true;
437 } else if (prev_was_space) {
438 prev_was_space = false;
439 argv[argc++] = env + i;
443 // Parse the argument list we got from the environment. All non-option
444 // arguments i.e. filenames are ignored.
445 parse_real(args, argc, argv);
447 // Reset the state of the getopt_long() so that we can parse the
448 // command line options too. There are two incompatible ways to
459 // We don't need the argument list from environment anymore.
468 args_parse(args_info *args, int argc, char **argv)
470 // Initialize those parts of *args that we need later.
471 args->files_name = NULL;
472 args->files_file = NULL;
473 args->files_delim = '\0';
475 // Check how we were called.
477 // Remove the leading path name, if any.
478 const char *name = strrchr(argv[0], '/');
484 // NOTE: It's possible that name[0] is now '\0' if argv[0]
485 // is weird, but it doesn't matter here.
487 // Look for full command names instead of substrings like
488 // "un", "cat", and "lz" to reduce possibility of false
489 // positives when the programs have been renamed.
490 if (strstr(name, "xzcat") != NULL) {
491 opt_mode = MODE_DECOMPRESS;
493 } else if (strstr(name, "unxz") != NULL) {
494 opt_mode = MODE_DECOMPRESS;
495 } else if (strstr(name, "lzcat") != NULL) {
496 opt_format = FORMAT_LZMA;
497 opt_mode = MODE_DECOMPRESS;
499 } else if (strstr(name, "unlzma") != NULL) {
500 opt_format = FORMAT_LZMA;
501 opt_mode = MODE_DECOMPRESS;
502 } else if (strstr(name, "lzma") != NULL) {
503 opt_format = FORMAT_LZMA;
507 // First the flags from environment
508 parse_environment(args, argv[0]);
510 // Then from the command line
511 parse_real(args, argc, argv);
513 // Never remove the source file when the destination is not on disk.
514 // In test mode the data is written nowhere, but setting opt_stdout
515 // will make the rest of the code behave well.
516 if (opt_stdout || opt_mode == MODE_TEST) {
517 opt_keep_original = true;
521 // When compressing, if no --format flag was used, or it
522 // was --format=auto, we compress to the .xz format.
523 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
524 opt_format = FORMAT_XZ;
526 // Compression settings need to be validated (options themselves and
527 // their memory usage) when compressing to any file format. It has to
528 // be done also when uncompressing raw data, since for raw decoding
529 // the options given on the command line are used to know what kind
530 // of raw data we are supposed to decode.
531 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
532 coder_set_compression_settings();
534 // If no filenames are given, use stdin.
535 if (argv[optind] == NULL && args->files_name == NULL) {
536 // We don't modify or free() the "-" constant. The caller
537 // modifies this so don't make the struct itself const.
538 static char *names_stdin[2] = { (char *)"-", NULL };
539 args->arg_names = names_stdin;
542 // We got at least one filename from the command line, or
543 // --files or --files0 was specified.
544 args->arg_names = argv + optind;
545 args->arg_count = argc - optind;