]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
xz: Use an array instead of pointer for stdin_filename.
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25
26 // We don't modify or free() this, but we need to assign it in some
27 // non-const pointers.
28 const char stdin_filename[] = "(stdin)";
29
30
31 /// Parse and set the memory usage limit for compression and/or decompression.
32 static void
33 parse_memlimit(const char *name, const char *name_percentage, char *str,
34                 bool set_compress, bool set_decompress)
35 {
36         bool is_percentage = false;
37         uint64_t value;
38
39         const size_t len = strlen(str);
40         if (len > 0 && str[len - 1] == '%') {
41                 str[len - 1] = '\0';
42                 is_percentage = true;
43                 value = str_to_uint64(name_percentage, str, 1, 100);
44         } else {
45                 // On 32-bit systems, SIZE_MAX would make more sense than
46                 // UINT64_MAX. But use UINT64_MAX still so that scripts
47                 // that assume > 4 GiB values don't break.
48                 value = str_to_uint64(name, str, 0, UINT64_MAX);
49         }
50
51         hardware_memlimit_set(
52                         value, set_compress, set_decompress, is_percentage);
53         return;
54 }
55
56
57 static void
58 parse_real(args_info *args, int argc, char **argv)
59 {
60         enum {
61                 OPT_X86 = INT_MIN,
62                 OPT_POWERPC,
63                 OPT_IA64,
64                 OPT_ARM,
65                 OPT_ARMTHUMB,
66                 OPT_SPARC,
67                 OPT_DELTA,
68                 OPT_LZMA1,
69                 OPT_LZMA2,
70
71                 OPT_NO_SPARSE,
72                 OPT_FILES,
73                 OPT_FILES0,
74                 OPT_MEM_COMPRESS,
75                 OPT_MEM_DECOMPRESS,
76                 OPT_NO_ADJUST,
77                 OPT_INFO_MEMORY,
78                 OPT_ROBOT,
79         };
80
81         static const char short_opts[]
82                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
83
84         static const struct option long_opts[] = {
85                 // Operation mode
86                 { "compress",     no_argument,       NULL,  'z' },
87                 { "decompress",   no_argument,       NULL,  'd' },
88                 { "uncompress",   no_argument,       NULL,  'd' },
89                 { "test",         no_argument,       NULL,  't' },
90                 { "list",         no_argument,       NULL,  'l' },
91
92                 // Operation modifiers
93                 { "keep",         no_argument,       NULL,  'k' },
94                 { "force",        no_argument,       NULL,  'f' },
95                 { "stdout",       no_argument,       NULL,  'c' },
96                 { "to-stdout",    no_argument,       NULL,  'c' },
97                 { "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
98                 { "suffix",       required_argument, NULL,  'S' },
99                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
100                 { "files",        optional_argument, NULL,  OPT_FILES },
101                 { "files0",       optional_argument, NULL,  OPT_FILES0 },
102
103                 // Basic compression settings
104                 { "format",       required_argument, NULL,  'F' },
105                 { "check",        required_argument, NULL,  'C' },
106                 { "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
107                 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
108                 { "memlimit",     required_argument, NULL,  'M' },
109                 { "memory",       required_argument, NULL,  'M' }, // Old alias
110                 { "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
111                 { "threads",      required_argument, NULL,  'T' },
112
113                 { "extreme",      no_argument,       NULL,  'e' },
114                 { "fast",         no_argument,       NULL,  '0' },
115                 { "best",         no_argument,       NULL,  '9' },
116
117                 // Filters
118                 { "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
119                 { "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
120                 { "x86",          optional_argument, NULL,  OPT_X86 },
121                 { "powerpc",      optional_argument, NULL,  OPT_POWERPC },
122                 { "ia64",         optional_argument, NULL,  OPT_IA64 },
123                 { "arm",          optional_argument, NULL,  OPT_ARM },
124                 { "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
125                 { "sparc",        optional_argument, NULL,  OPT_SPARC },
126                 { "delta",        optional_argument, NULL,  OPT_DELTA },
127
128                 // Other options
129                 { "quiet",        no_argument,       NULL,  'q' },
130                 { "verbose",      no_argument,       NULL,  'v' },
131                 { "no-warn",      no_argument,       NULL,  'Q' },
132                 { "robot",        no_argument,       NULL,  OPT_ROBOT },
133                 { "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
134                 { "help",         no_argument,       NULL,  'h' },
135                 { "long-help",    no_argument,       NULL,  'H' },
136                 { "version",      no_argument,       NULL,  'V' },
137
138                 { NULL,           0,                 NULL,   0 }
139         };
140
141         int c;
142
143         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
144                         != -1) {
145                 switch (c) {
146                 // Compression preset (also for decompression if --format=raw)
147                 case '0': case '1': case '2': case '3': case '4':
148                 case '5': case '6': case '7': case '8': case '9':
149                         coder_set_preset(c - '0');
150                         break;
151
152                 // --memlimit-compress
153                 case OPT_MEM_COMPRESS:
154                         parse_memlimit("memlimit-compress",
155                                         "memlimit-compress%", optarg,
156                                         true, false);
157                         break;
158
159                 // --memlimit-decompress
160                 case OPT_MEM_DECOMPRESS:
161                         parse_memlimit("memlimit-decompress",
162                                         "memlimit-decompress%", optarg,
163                                         false, true);
164                         break;
165
166                 // --memlimit
167                 case 'M':
168                         parse_memlimit("memlimit", "memlimit%", optarg,
169                                         true, true);
170                         break;
171
172                 // --suffix
173                 case 'S':
174                         suffix_set(optarg);
175                         break;
176
177                 case 'T':
178                         hardware_threadlimit_set(str_to_uint64(
179                                         "threads", optarg, 0, UINT32_MAX));
180                         break;
181
182                 // --version
183                 case 'V':
184                         // This doesn't return.
185                         message_version();
186
187                 // --stdout
188                 case 'c':
189                         opt_stdout = true;
190                         break;
191
192                 // --decompress
193                 case 'd':
194                         opt_mode = MODE_DECOMPRESS;
195                         break;
196
197                 // --extreme
198                 case 'e':
199                         coder_set_extreme();
200                         break;
201
202                 // --force
203                 case 'f':
204                         opt_force = true;
205                         break;
206
207                 // --info-memory
208                 case OPT_INFO_MEMORY:
209                         // This doesn't return.
210                         hardware_memlimit_show();
211
212                 // --help
213                 case 'h':
214                         // This doesn't return.
215                         message_help(false);
216
217                 // --long-help
218                 case 'H':
219                         // This doesn't return.
220                         message_help(true);
221
222                 // --list
223                 case 'l':
224                         opt_mode = MODE_LIST;
225                         break;
226
227                 // --keep
228                 case 'k':
229                         opt_keep_original = true;
230                         break;
231
232                 // --quiet
233                 case 'q':
234                         message_verbosity_decrease();
235                         break;
236
237                 case 'Q':
238                         set_exit_no_warn();
239                         break;
240
241                 case 't':
242                         opt_mode = MODE_TEST;
243                         break;
244
245                 // --verbose
246                 case 'v':
247                         message_verbosity_increase();
248                         break;
249
250                 // --robot
251                 case OPT_ROBOT:
252                         opt_robot = true;
253
254                         // This is to make sure that floating point numbers
255                         // always have a dot as decimal separator.
256                         setlocale(LC_NUMERIC, "C");
257                         break;
258
259                 case 'z':
260                         opt_mode = MODE_COMPRESS;
261                         break;
262
263                 // Filter setup
264
265                 case OPT_X86:
266                         coder_add_filter(LZMA_FILTER_X86,
267                                         options_bcj(optarg));
268                         break;
269
270                 case OPT_POWERPC:
271                         coder_add_filter(LZMA_FILTER_POWERPC,
272                                         options_bcj(optarg));
273                         break;
274
275                 case OPT_IA64:
276                         coder_add_filter(LZMA_FILTER_IA64,
277                                         options_bcj(optarg));
278                         break;
279
280                 case OPT_ARM:
281                         coder_add_filter(LZMA_FILTER_ARM,
282                                         options_bcj(optarg));
283                         break;
284
285                 case OPT_ARMTHUMB:
286                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
287                                         options_bcj(optarg));
288                         break;
289
290                 case OPT_SPARC:
291                         coder_add_filter(LZMA_FILTER_SPARC,
292                                         options_bcj(optarg));
293                         break;
294
295                 case OPT_DELTA:
296                         coder_add_filter(LZMA_FILTER_DELTA,
297                                         options_delta(optarg));
298                         break;
299
300                 case OPT_LZMA1:
301                         coder_add_filter(LZMA_FILTER_LZMA1,
302                                         options_lzma(optarg));
303                         break;
304
305                 case OPT_LZMA2:
306                         coder_add_filter(LZMA_FILTER_LZMA2,
307                                         options_lzma(optarg));
308                         break;
309
310                 // Other
311
312                 // --format
313                 case 'F': {
314                         // Just in case, support both "lzma" and "alone" since
315                         // the latter was used for forward compatibility in
316                         // LZMA Utils 4.32.x.
317                         static const struct {
318                                 char str[8];
319                                 enum format_type format;
320                         } types[] = {
321                                 { "auto",   FORMAT_AUTO },
322                                 { "xz",     FORMAT_XZ },
323                                 { "lzma",   FORMAT_LZMA },
324                                 { "alone",  FORMAT_LZMA },
325                                 // { "gzip",   FORMAT_GZIP },
326                                 // { "gz",     FORMAT_GZIP },
327                                 { "raw",    FORMAT_RAW },
328                         };
329
330                         size_t i = 0;
331                         while (strcmp(types[i].str, optarg) != 0)
332                                 if (++i == ARRAY_SIZE(types))
333                                         message_fatal(_("%s: Unknown file "
334                                                         "format type"),
335                                                         optarg);
336
337                         opt_format = types[i].format;
338                         break;
339                 }
340
341                 // --check
342                 case 'C': {
343                         static const struct {
344                                 char str[8];
345                                 lzma_check check;
346                         } types[] = {
347                                 { "none",   LZMA_CHECK_NONE },
348                                 { "crc32",  LZMA_CHECK_CRC32 },
349                                 { "crc64",  LZMA_CHECK_CRC64 },
350                                 { "sha256", LZMA_CHECK_SHA256 },
351                         };
352
353                         size_t i = 0;
354                         while (strcmp(types[i].str, optarg) != 0) {
355                                 if (++i == ARRAY_SIZE(types))
356                                         message_fatal(_("%s: Unsupported "
357                                                         "integrity "
358                                                         "check type"), optarg);
359                         }
360
361                         // Use a separate check in case we are using different
362                         // liblzma than what was used to compile us.
363                         if (!lzma_check_is_supported(types[i].check))
364                                 message_fatal(_("%s: Unsupported integrity "
365                                                 "check type"), optarg);
366
367                         coder_set_check(types[i].check);
368                         break;
369                 }
370
371                 case OPT_NO_SPARSE:
372                         io_no_sparse();
373                         break;
374
375                 case OPT_FILES:
376                         args->files_delim = '\n';
377
378                 // Fall through
379
380                 case OPT_FILES0:
381                         if (args->files_name != NULL)
382                                 message_fatal(_("Only one file can be "
383                                                 "specified with `--files' "
384                                                 "or `--files0'."));
385
386                         if (optarg == NULL) {
387                                 args->files_name = (char *)stdin_filename;
388                                 args->files_file = stdin;
389                         } else {
390                                 args->files_name = optarg;
391                                 args->files_file = fopen(optarg,
392                                                 c == OPT_FILES ? "r" : "rb");
393                                 if (args->files_file == NULL)
394                                         message_fatal("%s: %s", optarg,
395                                                         strerror(errno));
396                         }
397
398                         break;
399
400                 case OPT_NO_ADJUST:
401                         opt_auto_adjust = false;
402                         break;
403
404                 default:
405                         message_try_help();
406                         tuklib_exit(E_ERROR, E_ERROR, false);
407                 }
408         }
409
410         return;
411 }
412
413
414 static void
415 parse_environment(args_info *args, char *argv0, const char *varname)
416 {
417         char *env = getenv(varname);
418         if (env == NULL)
419                 return;
420
421         // We modify the string, so make a copy of it.
422         env = xstrdup(env);
423
424         // Calculate the number of arguments in env. argc stats at one
425         // to include space for the program name.
426         int argc = 1;
427         bool prev_was_space = true;
428         for (size_t i = 0; env[i] != '\0'; ++i) {
429                 // NOTE: Cast to unsigned char is needed so that correct
430                 // value gets passed to isspace(), which expects
431                 // unsigned char cast to int. Casting to int is done
432                 // automatically due to integer promotion, but we need to
433                 // force char to unsigned char manually. Otherwise 8-bit
434                 // characters would get promoted to wrong value if
435                 // char is signed.
436                 if (isspace((unsigned char)env[i])) {
437                         prev_was_space = true;
438                 } else if (prev_was_space) {
439                         prev_was_space = false;
440
441                         // Keep argc small enough to fit into a singed int
442                         // and to keep it usable for memory allocation.
443                         if (++argc == my_min(
444                                         INT_MAX, SIZE_MAX / sizeof(char *)))
445                                 message_fatal(_("The environment variable "
446                                                 "%s contains too many "
447                                                 "arguments"), varname);
448                 }
449         }
450
451         // Allocate memory to hold pointers to the arguments. Add one to get
452         // space for the terminating NULL (if some systems happen to need it).
453         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
454         argv[0] = argv0;
455         argv[argc] = NULL;
456
457         // Go through the string again. Split the arguments using '\0'
458         // characters and add pointers to the resulting strings to argv.
459         argc = 1;
460         prev_was_space = true;
461         for (size_t i = 0; env[i] != '\0'; ++i) {
462                 if (isspace((unsigned char)env[i])) {
463                         prev_was_space = true;
464                         env[i] = '\0';
465                 } else if (prev_was_space) {
466                         prev_was_space = false;
467                         argv[argc++] = env + i;
468                 }
469         }
470
471         // Parse the argument list we got from the environment. All non-option
472         // arguments i.e. filenames are ignored.
473         parse_real(args, argc, argv);
474
475         // Reset the state of the getopt_long() so that we can parse the
476         // command line options too. There are two incompatible ways to
477         // do it.
478 #ifdef HAVE_OPTRESET
479         // BSD
480         optind = 1;
481         optreset = 1;
482 #else
483         // GNU, Solaris
484         optind = 0;
485 #endif
486
487         // We don't need the argument list from environment anymore.
488         free(argv);
489         free(env);
490
491         return;
492 }
493
494
495 extern void
496 args_parse(args_info *args, int argc, char **argv)
497 {
498         // Initialize those parts of *args that we need later.
499         args->files_name = NULL;
500         args->files_file = NULL;
501         args->files_delim = '\0';
502
503         // Check how we were called.
504         {
505                 // Remove the leading path name, if any.
506                 const char *name = strrchr(argv[0], '/');
507                 if (name == NULL)
508                         name = argv[0];
509                 else
510                         ++name;
511
512                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
513                 // is weird, but it doesn't matter here.
514
515                 // Look for full command names instead of substrings like
516                 // "un", "cat", and "lz" to reduce possibility of false
517                 // positives when the programs have been renamed.
518                 if (strstr(name, "xzcat") != NULL) {
519                         opt_mode = MODE_DECOMPRESS;
520                         opt_stdout = true;
521                 } else if (strstr(name, "unxz") != NULL) {
522                         opt_mode = MODE_DECOMPRESS;
523                 } else if (strstr(name, "lzcat") != NULL) {
524                         opt_format = FORMAT_LZMA;
525                         opt_mode = MODE_DECOMPRESS;
526                         opt_stdout = true;
527                 } else if (strstr(name, "unlzma") != NULL) {
528                         opt_format = FORMAT_LZMA;
529                         opt_mode = MODE_DECOMPRESS;
530                 } else if (strstr(name, "lzma") != NULL) {
531                         opt_format = FORMAT_LZMA;
532                 }
533         }
534
535         // First the flags from the environment
536         parse_environment(args, argv[0], "XZ_DEFAULTS");
537         parse_environment(args, argv[0], "XZ_OPT");
538
539         // Then from the command line
540         parse_real(args, argc, argv);
541
542         // Never remove the source file when the destination is not on disk.
543         // In test mode the data is written nowhere, but setting opt_stdout
544         // will make the rest of the code behave well.
545         if (opt_stdout || opt_mode == MODE_TEST) {
546                 opt_keep_original = true;
547                 opt_stdout = true;
548         }
549
550         // When compressing, if no --format flag was used, or it
551         // was --format=auto, we compress to the .xz format.
552         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
553                 opt_format = FORMAT_XZ;
554
555         // Compression settings need to be validated (options themselves and
556         // their memory usage) when compressing to any file format. It has to
557         // be done also when uncompressing raw data, since for raw decoding
558         // the options given on the command line are used to know what kind
559         // of raw data we are supposed to decode.
560         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
561                 coder_set_compression_settings();
562
563         // If no filenames are given, use stdin.
564         if (argv[optind] == NULL && args->files_name == NULL) {
565                 // We don't modify or free() the "-" constant. The caller
566                 // modifies this so don't make the struct itself const.
567                 static char *names_stdin[2] = { (char *)"-", NULL };
568                 args->arg_names = names_stdin;
569                 args->arg_count = 1;
570         } else {
571                 // We got at least one filename from the command line, or
572                 // --files or --files0 was specified.
573                 args->arg_names = argv + optind;
574                 args->arg_count = argc - optind;
575         }
576
577         return;
578 }