]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
Rename MIN() and MAX() to my_min() and my_max().
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25
26 // We don't modify or free() this, but we need to assign it in some
27 // non-const pointers.
28 const char *stdin_filename = "(stdin)";
29
30
31 static void
32 parse_real(args_info *args, int argc, char **argv)
33 {
34         enum {
35                 OPT_X86 = INT_MIN,
36                 OPT_POWERPC,
37                 OPT_IA64,
38                 OPT_ARM,
39                 OPT_ARMTHUMB,
40                 OPT_SPARC,
41                 OPT_DELTA,
42                 OPT_LZMA1,
43                 OPT_LZMA2,
44
45                 OPT_NO_SPARSE,
46                 OPT_FILES,
47                 OPT_FILES0,
48                 OPT_INFO_MEMORY,
49                 OPT_ROBOT,
50         };
51
52         static const char short_opts[]
53                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
54
55         static const struct option long_opts[] = {
56                 // Operation mode
57                 { "compress",     no_argument,       NULL,  'z' },
58                 { "decompress",   no_argument,       NULL,  'd' },
59                 { "uncompress",   no_argument,       NULL,  'd' },
60                 { "test",         no_argument,       NULL,  't' },
61                 { "list",         no_argument,       NULL,  'l' },
62
63                 // Operation modifiers
64                 { "keep",         no_argument,       NULL,  'k' },
65                 { "force",        no_argument,       NULL,  'f' },
66                 { "stdout",       no_argument,       NULL,  'c' },
67                 { "to-stdout",    no_argument,       NULL,  'c' },
68                 { "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
69                 { "suffix",       required_argument, NULL,  'S' },
70                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
71                 { "files",        optional_argument, NULL,  OPT_FILES },
72                 { "files0",       optional_argument, NULL,  OPT_FILES0 },
73
74                 // Basic compression settings
75                 { "format",       required_argument, NULL,  'F' },
76                 { "check",        required_argument, NULL,  'C' },
77                 { "memory",       required_argument, NULL,  'M' },
78                 { "threads",      required_argument, NULL,  'T' },
79
80                 { "extreme",      no_argument,       NULL,  'e' },
81                 { "fast",         no_argument,       NULL,  '0' },
82                 { "best",         no_argument,       NULL,  '9' },
83
84                 // Filters
85                 { "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
86                 { "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
87                 { "x86",          optional_argument, NULL,  OPT_X86 },
88                 { "powerpc",      optional_argument, NULL,  OPT_POWERPC },
89                 { "ia64",         optional_argument, NULL,  OPT_IA64 },
90                 { "arm",          optional_argument, NULL,  OPT_ARM },
91                 { "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
92                 { "sparc",        optional_argument, NULL,  OPT_SPARC },
93                 { "delta",        optional_argument, NULL,  OPT_DELTA },
94
95                 // Other options
96                 { "quiet",        no_argument,       NULL,  'q' },
97                 { "verbose",      no_argument,       NULL,  'v' },
98                 { "no-warn",      no_argument,       NULL,  'Q' },
99                 { "robot",        no_argument,       NULL,  OPT_ROBOT },
100                 { "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
101                 { "help",         no_argument,       NULL,  'h' },
102                 { "long-help",    no_argument,       NULL,  'H' },
103                 { "version",      no_argument,       NULL,  'V' },
104
105                 { NULL,                 0,                 NULL,   0 }
106         };
107
108         int c;
109
110         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
111                         != -1) {
112                 switch (c) {
113                 // Compression preset (also for decompression if --format=raw)
114                 case '0': case '1': case '2': case '3': case '4':
115                 case '5': case '6': case '7': case '8': case '9':
116                         coder_set_preset(c - '0');
117                         break;
118
119                 // --memory
120                 case 'M': {
121                         // Support specifying the limit as a percentage of
122                         // installed physical RAM.
123                         size_t len = strlen(optarg);
124                         if (len > 0 && optarg[len - 1] == '%') {
125                                 optarg[len - 1] = '\0';
126                                 hardware_memlimit_set_percentage(
127                                                 str_to_uint64(
128                                                 "memory%", optarg, 1, 100));
129                         } else {
130                                 // On 32-bit systems, SIZE_MAX would make more
131                                 // sense than UINT64_MAX. But use UINT64_MAX
132                                 // still so that scripts that assume > 4 GiB
133                                 // values don't break.
134                                 hardware_memlimit_set(str_to_uint64(
135                                                 "memory", optarg,
136                                                 0, UINT64_MAX));
137                         }
138
139                         break;
140                 }
141
142                 // --suffix
143                 case 'S':
144                         suffix_set(optarg);
145                         break;
146
147                 case 'T':
148                         hardware_threadlimit_set(str_to_uint64(
149                                         "threads", optarg, 0, UINT32_MAX));
150                         break;
151
152                 // --version
153                 case 'V':
154                         // This doesn't return.
155                         message_version();
156
157                 // --stdout
158                 case 'c':
159                         opt_stdout = true;
160                         break;
161
162                 // --decompress
163                 case 'd':
164                         opt_mode = MODE_DECOMPRESS;
165                         break;
166
167                 // --extreme
168                 case 'e':
169                         coder_set_extreme();
170                         break;
171
172                 // --force
173                 case 'f':
174                         opt_force = true;
175                         break;
176
177                 // --info-memory
178                 case OPT_INFO_MEMORY:
179                         // This doesn't return.
180                         message_memlimit();
181
182                 // --help
183                 case 'h':
184                         // This doesn't return.
185                         message_help(false);
186
187                 // --long-help
188                 case 'H':
189                         // This doesn't return.
190                         message_help(true);
191
192                 // --list
193                 case 'l':
194                         opt_mode = MODE_LIST;
195                         break;
196
197                 // --keep
198                 case 'k':
199                         opt_keep_original = true;
200                         break;
201
202                 // --quiet
203                 case 'q':
204                         message_verbosity_decrease();
205                         break;
206
207                 case 'Q':
208                         set_exit_no_warn();
209                         break;
210
211                 case 't':
212                         opt_mode = MODE_TEST;
213                         break;
214
215                 // --verbose
216                 case 'v':
217                         message_verbosity_increase();
218                         break;
219
220                 // --robot
221                 case OPT_ROBOT:
222                         opt_robot = true;
223
224                         // This is to make sure that floating point numbers
225                         // always have a dot as decimal separator.
226                         setlocale(LC_NUMERIC, "C");
227                         break;
228
229                 case 'z':
230                         opt_mode = MODE_COMPRESS;
231                         break;
232
233                 // Filter setup
234
235                 case OPT_X86:
236                         coder_add_filter(LZMA_FILTER_X86,
237                                         options_bcj(optarg));
238                         break;
239
240                 case OPT_POWERPC:
241                         coder_add_filter(LZMA_FILTER_POWERPC,
242                                         options_bcj(optarg));
243                         break;
244
245                 case OPT_IA64:
246                         coder_add_filter(LZMA_FILTER_IA64,
247                                         options_bcj(optarg));
248                         break;
249
250                 case OPT_ARM:
251                         coder_add_filter(LZMA_FILTER_ARM,
252                                         options_bcj(optarg));
253                         break;
254
255                 case OPT_ARMTHUMB:
256                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
257                                         options_bcj(optarg));
258                         break;
259
260                 case OPT_SPARC:
261                         coder_add_filter(LZMA_FILTER_SPARC,
262                                         options_bcj(optarg));
263                         break;
264
265                 case OPT_DELTA:
266                         coder_add_filter(LZMA_FILTER_DELTA,
267                                         options_delta(optarg));
268                         break;
269
270                 case OPT_LZMA1:
271                         coder_add_filter(LZMA_FILTER_LZMA1,
272                                         options_lzma(optarg));
273                         break;
274
275                 case OPT_LZMA2:
276                         coder_add_filter(LZMA_FILTER_LZMA2,
277                                         options_lzma(optarg));
278                         break;
279
280                 // Other
281
282                 // --format
283                 case 'F': {
284                         // Just in case, support both "lzma" and "alone" since
285                         // the latter was used for forward compatibility in
286                         // LZMA Utils 4.32.x.
287                         static const struct {
288                                 char str[8];
289                                 enum format_type format;
290                         } types[] = {
291                                 { "auto",   FORMAT_AUTO },
292                                 { "xz",     FORMAT_XZ },
293                                 { "lzma",   FORMAT_LZMA },
294                                 { "alone",  FORMAT_LZMA },
295                                 // { "gzip",   FORMAT_GZIP },
296                                 // { "gz",     FORMAT_GZIP },
297                                 { "raw",    FORMAT_RAW },
298                         };
299
300                         size_t i = 0;
301                         while (strcmp(types[i].str, optarg) != 0)
302                                 if (++i == ARRAY_SIZE(types))
303                                         message_fatal(_("%s: Unknown file "
304                                                         "format type"),
305                                                         optarg);
306
307                         opt_format = types[i].format;
308                         break;
309                 }
310
311                 // --check
312                 case 'C': {
313                         static const struct {
314                                 char str[8];
315                                 lzma_check check;
316                         } types[] = {
317                                 { "none",   LZMA_CHECK_NONE },
318                                 { "crc32",  LZMA_CHECK_CRC32 },
319                                 { "crc64",  LZMA_CHECK_CRC64 },
320                                 { "sha256", LZMA_CHECK_SHA256 },
321                         };
322
323                         size_t i = 0;
324                         while (strcmp(types[i].str, optarg) != 0) {
325                                 if (++i == ARRAY_SIZE(types))
326                                         message_fatal(_("%s: Unsupported "
327                                                         "integrity "
328                                                         "check type"), optarg);
329                         }
330
331                         // Use a separate check in case we are using different
332                         // liblzma than what was used to compile us.
333                         if (!lzma_check_is_supported(types[i].check))
334                                 message_fatal(_("%s: Unsupported integrity "
335                                                 "check type"), optarg);
336
337                         coder_set_check(types[i].check);
338                         break;
339                 }
340
341                 case OPT_NO_SPARSE:
342                         io_no_sparse();
343                         break;
344
345                 case OPT_FILES:
346                         args->files_delim = '\n';
347
348                 // Fall through
349
350                 case OPT_FILES0:
351                         if (args->files_name != NULL)
352                                 message_fatal(_("Only one file can be "
353                                                 "specified with `--files' "
354                                                 "or `--files0'."));
355
356                         if (optarg == NULL) {
357                                 args->files_name = (char *)stdin_filename;
358                                 args->files_file = stdin;
359                         } else {
360                                 args->files_name = optarg;
361                                 args->files_file = fopen(optarg,
362                                                 c == OPT_FILES ? "r" : "rb");
363                                 if (args->files_file == NULL)
364                                         message_fatal("%s: %s", optarg,
365                                                         strerror(errno));
366                         }
367
368                         break;
369
370                 default:
371                         message_try_help();
372                         tuklib_exit(E_ERROR, E_ERROR, false);
373                 }
374         }
375
376         return;
377 }
378
379
380 static void
381 parse_environment(args_info *args, char *argv0)
382 {
383         char *env = getenv("XZ_OPT");
384         if (env == NULL)
385                 return;
386
387         // We modify the string, so make a copy of it.
388         env = xstrdup(env);
389
390         // Calculate the number of arguments in env. argc stats at one
391         // to include space for the program name.
392         int argc = 1;
393         bool prev_was_space = true;
394         for (size_t i = 0; env[i] != '\0'; ++i) {
395                 // NOTE: Cast to unsigned char is needed so that correct
396                 // value gets passed to isspace(), which expects
397                 // unsigned char cast to int. Casting to int is done
398                 // automatically due to integer promotion, but we need to
399                 // force char to unsigned char manually. Otherwise 8-bit
400                 // characters would get promoted to wrong value if
401                 // char is signed.
402                 if (isspace((unsigned char)env[i])) {
403                         prev_was_space = true;
404                 } else if (prev_was_space) {
405                         prev_was_space = false;
406
407                         // Keep argc small enough to fit into a singed int
408                         // and to keep it usable for memory allocation.
409                         if (++argc == my_min(
410                                         INT_MAX, SIZE_MAX / sizeof(char *)))
411                                 message_fatal(_("The environment variable "
412                                                 "XZ_OPT contains too many "
413                                                 "arguments"));
414                 }
415         }
416
417         // Allocate memory to hold pointers to the arguments. Add one to get
418         // space for the terminating NULL (if some systems happen to need it).
419         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
420         argv[0] = argv0;
421         argv[argc] = NULL;
422
423         // Go through the string again. Split the arguments using '\0'
424         // characters and add pointers to the resulting strings to argv.
425         argc = 1;
426         prev_was_space = true;
427         for (size_t i = 0; env[i] != '\0'; ++i) {
428                 if (isspace((unsigned char)env[i])) {
429                         prev_was_space = true;
430                         env[i] = '\0';
431                 } else if (prev_was_space) {
432                         prev_was_space = false;
433                         argv[argc++] = env + i;
434                 }
435         }
436
437         // Parse the argument list we got from the environment. All non-option
438         // arguments i.e. filenames are ignored.
439         parse_real(args, argc, argv);
440
441         // Reset the state of the getopt_long() so that we can parse the
442         // command line options too. There are two incompatible ways to
443         // do it.
444 #ifdef HAVE_OPTRESET
445         // BSD
446         optind = 1;
447         optreset = 1;
448 #else
449         // GNU, Solaris
450         optind = 0;
451 #endif
452
453         // We don't need the argument list from environment anymore.
454         free(argv);
455         free(env);
456
457         return;
458 }
459
460
461 extern void
462 args_parse(args_info *args, int argc, char **argv)
463 {
464         // Initialize those parts of *args that we need later.
465         args->files_name = NULL;
466         args->files_file = NULL;
467         args->files_delim = '\0';
468
469         // Check how we were called.
470         {
471                 // Remove the leading path name, if any.
472                 const char *name = strrchr(argv[0], '/');
473                 if (name == NULL)
474                         name = argv[0];
475                 else
476                         ++name;
477
478                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
479                 // is weird, but it doesn't matter here.
480
481                 // Look for full command names instead of substrings like
482                 // "un", "cat", and "lz" to reduce possibility of false
483                 // positives when the programs have been renamed.
484                 if (strstr(name, "xzcat") != NULL) {
485                         opt_mode = MODE_DECOMPRESS;
486                         opt_stdout = true;
487                 } else if (strstr(name, "unxz") != NULL) {
488                         opt_mode = MODE_DECOMPRESS;
489                 } else if (strstr(name, "lzcat") != NULL) {
490                         opt_format = FORMAT_LZMA;
491                         opt_mode = MODE_DECOMPRESS;
492                         opt_stdout = true;
493                 } else if (strstr(name, "unlzma") != NULL) {
494                         opt_format = FORMAT_LZMA;
495                         opt_mode = MODE_DECOMPRESS;
496                 } else if (strstr(name, "lzma") != NULL) {
497                         opt_format = FORMAT_LZMA;
498                 }
499         }
500
501         // First the flags from environment
502         parse_environment(args, argv[0]);
503
504         // Then from the command line
505         parse_real(args, argc, argv);
506
507         // Never remove the source file when the destination is not on disk.
508         // In test mode the data is written nowhere, but setting opt_stdout
509         // will make the rest of the code behave well.
510         if (opt_stdout || opt_mode == MODE_TEST) {
511                 opt_keep_original = true;
512                 opt_stdout = true;
513         }
514
515         // When compressing, if no --format flag was used, or it
516         // was --format=auto, we compress to the .xz format.
517         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
518                 opt_format = FORMAT_XZ;
519
520         // Compression settings need to be validated (options themselves and
521         // their memory usage) when compressing to any file format. It has to
522         // be done also when uncompressing raw data, since for raw decoding
523         // the options given on the command line are used to know what kind
524         // of raw data we are supposed to decode.
525         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
526                 coder_set_compression_settings();
527
528         // If no filenames are given, use stdin.
529         if (argv[optind] == NULL && args->files_name == NULL) {
530                 // We don't modify or free() the "-" constant. The caller
531                 // modifies this so don't make the struct itself const.
532                 static char *names_stdin[2] = { (char *)"-", NULL };
533                 args->arg_names = names_stdin;
534                 args->arg_count = 1;
535         } else {
536                 // We got at least one filename from the command line, or
537                 // --files or --files0 was specified.
538                 args->arg_names = argv + optind;
539                 args->arg_count = argc - optind;
540         }
541
542         return;
543 }