]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
Fix handling of integrity check type in the xz command line tool.
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Copyright (C) 2007 Lasse Collin
9 //
10 //  This program is free software; you can redistribute it and/or
11 //  modify it under the terms of the GNU Lesser General Public
12 //  License as published by the Free Software Foundation; either
13 //  version 2.1 of the License, or (at your option) any later version.
14 //
15 //  This program is distributed in the hope that it will be useful,
16 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 //  Lesser General Public License for more details.
19 //
20 ///////////////////////////////////////////////////////////////////////////////
21
22 #include "private.h"
23
24 #include "getopt.h"
25 #include <ctype.h>
26
27
28 bool opt_stdout = false;
29 bool opt_force = false;
30 bool opt_keep_original = false;
31
32 // We don't modify or free() this, but we need to assign it in some
33 // non-const pointers.
34 const char *stdin_filename = "(stdin)";
35
36
37 static void
38 parse_real(args_info *args, int argc, char **argv)
39 {
40         enum {
41                 OPT_SUBBLOCK = INT_MIN,
42                 OPT_X86,
43                 OPT_POWERPC,
44                 OPT_IA64,
45                 OPT_ARM,
46                 OPT_ARMTHUMB,
47                 OPT_SPARC,
48                 OPT_DELTA,
49                 OPT_LZMA1,
50                 OPT_LZMA2,
51
52                 OPT_FILES,
53                 OPT_FILES0,
54         };
55
56         static const char short_opts[] = "cC:defF:hHlkM:qrS:tT:vVz0123456789";
57
58         static const struct option long_opts[] = {
59                 // Operation mode
60                 { "compress",       no_argument,       NULL,  'z' },
61                 { "decompress",     no_argument,       NULL,  'd' },
62                 { "uncompress",     no_argument,       NULL,  'd' },
63                 { "test",           no_argument,       NULL,  't' },
64                 { "list",           no_argument,       NULL,  'l' },
65                 { "info",           no_argument,       NULL,  'l' },
66
67                 // Operation modifiers
68                 { "keep",           no_argument,       NULL,  'k' },
69                 { "force",          no_argument,       NULL,  'f' },
70                 { "stdout",         no_argument,       NULL,  'c' },
71                 { "to-stdout",      no_argument,       NULL,  'c' },
72                 { "suffix",         required_argument, NULL,  'S' },
73                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
74                 { "files",          optional_argument, NULL,  OPT_FILES },
75                 { "files0",         optional_argument, NULL,  OPT_FILES0 },
76
77                 // Basic compression settings
78                 { "format",         required_argument, NULL,  'F' },
79                 { "check",          required_argument, NULL,  'C' },
80                 { "memory",         required_argument, NULL,  'M' },
81                 { "threads",        required_argument, NULL,  'T' },
82
83                 { "extreme",        no_argument,       NULL,  'e' },
84                 { "fast",           no_argument,       NULL,  '0' },
85                 { "best",           no_argument,       NULL,  '9' },
86
87                 // Filters
88                 { "lzma1",          optional_argument, NULL,  OPT_LZMA1 },
89                 { "lzma2",          optional_argument, NULL,  OPT_LZMA2 },
90                 { "x86",            no_argument,       NULL,  OPT_X86 },
91                 { "bcj",            no_argument,       NULL,  OPT_X86 },
92                 { "powerpc",        no_argument,       NULL,  OPT_POWERPC },
93                 { "ppc",            no_argument,       NULL,  OPT_POWERPC },
94                 { "ia64",           no_argument,       NULL,  OPT_IA64 },
95                 { "itanium",        no_argument,       NULL,  OPT_IA64 },
96                 { "arm",            no_argument,       NULL,  OPT_ARM },
97                 { "armthumb",       no_argument,       NULL,  OPT_ARMTHUMB },
98                 { "sparc",          no_argument,       NULL,  OPT_SPARC },
99                 { "delta",          optional_argument, NULL,  OPT_DELTA },
100                 { "subblock",       optional_argument, NULL,  OPT_SUBBLOCK },
101
102                 // Other options
103                 { "quiet",          no_argument,       NULL,  'q' },
104                 { "verbose",        no_argument,       NULL,  'v' },
105                 { "help",           no_argument,       NULL,  'h' },
106                 { "long-help",      no_argument,       NULL,  'H' },
107                 { "version",        no_argument,       NULL,  'V' },
108
109                 { NULL,                 0,                 NULL,   0 }
110         };
111
112         int c;
113
114         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
115                         != -1) {
116                 switch (c) {
117                 // Compression preset (also for decompression if --format=raw)
118                 case '0': case '1': case '2': case '3': case '4':
119                 case '5': case '6': case '7': case '8': case '9':
120                         coder_set_preset(c - '0');
121                         break;
122
123                 // --memory
124                 case 'M':
125                         // On 32-bit systems, SIZE_MAX would make more sense
126                         // than UINT64_MAX. But use UINT64_MAX still so that
127                         // scripts that assume > 4 GiB values don't break.
128                         hardware_memlimit_set(str_to_uint64(
129                                         "memory", optarg, 0, UINT64_MAX));
130                         break;
131
132                 // --suffix
133                 case 'S':
134                         suffix_set(optarg);
135                         break;
136
137                 case 'T':
138                         opt_threads = str_to_uint64("threads", optarg,
139                                         1, SIZE_MAX);
140                         break;
141
142                 // --version
143                 case 'V':
144                         // This doesn't return.
145                         message_version();
146
147                 // --stdout
148                 case 'c':
149                         opt_stdout = true;
150                         break;
151
152                 // --decompress
153                 case 'd':
154                         opt_mode = MODE_DECOMPRESS;
155                         break;
156
157                 // --extreme
158                 case 'e':
159                         coder_set_extreme();
160                         break;
161
162                 // --force
163                 case 'f':
164                         opt_force = true;
165                         break;
166
167                 // --help
168                 case 'h':
169                         // This doesn't return.
170                         message_help(false);
171
172                 // --long-help
173                 case 'H':
174                         // This doesn't return.
175                         message_help(true);
176
177                 // --list
178                 case 'l':
179                         opt_mode = MODE_LIST;
180                         break;
181
182                 // --keep
183                 case 'k':
184                         opt_keep_original = true;
185                         break;
186
187                 // --quiet
188                 case 'q':
189                         message_verbosity_decrease();
190                         break;
191
192                 case 't':
193                         opt_mode = MODE_TEST;
194                         break;
195
196                 // --verbose
197                 case 'v':
198                         message_verbosity_increase();
199                         break;
200
201                 case 'z':
202                         opt_mode = MODE_COMPRESS;
203                         break;
204
205                 // Filter setup
206
207                 case OPT_SUBBLOCK:
208                         coder_add_filter(LZMA_FILTER_SUBBLOCK,
209                                         options_subblock(optarg));
210                         break;
211
212                 case OPT_X86:
213                         coder_add_filter(LZMA_FILTER_X86, NULL);
214                         break;
215
216                 case OPT_POWERPC:
217                         coder_add_filter(LZMA_FILTER_POWERPC, NULL);
218                         break;
219
220                 case OPT_IA64:
221                         coder_add_filter(LZMA_FILTER_IA64, NULL);
222                         break;
223
224                 case OPT_ARM:
225                         coder_add_filter(LZMA_FILTER_ARM, NULL);
226                         break;
227
228                 case OPT_ARMTHUMB:
229                         coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL);
230                         break;
231
232                 case OPT_SPARC:
233                         coder_add_filter(LZMA_FILTER_SPARC, NULL);
234                         break;
235
236                 case OPT_DELTA:
237                         coder_add_filter(LZMA_FILTER_DELTA,
238                                         options_delta(optarg));
239                         break;
240
241                 case OPT_LZMA1:
242                         coder_add_filter(LZMA_FILTER_LZMA1,
243                                         options_lzma(optarg));
244                         break;
245
246                 case OPT_LZMA2:
247                         coder_add_filter(LZMA_FILTER_LZMA2,
248                                         options_lzma(optarg));
249                         break;
250
251                 // Other
252
253                 // --format
254                 case 'F': {
255                         // Just in case, support both "lzma" and "alone" since
256                         // the latter was used for forward compatibility in
257                         // LZMA Utils 4.32.x.
258                         static const struct {
259                                 char str[8];
260                                 enum format_type format;
261                         } types[] = {
262                                 { "auto",   FORMAT_AUTO },
263                                 { "xz",     FORMAT_XZ },
264                                 { "lzma",   FORMAT_LZMA },
265                                 { "alone",  FORMAT_LZMA },
266                                 // { "gzip",   FORMAT_GZIP },
267                                 // { "gz",     FORMAT_GZIP },
268                                 { "raw",    FORMAT_RAW },
269                         };
270
271                         size_t i = 0;
272                         while (strcmp(types[i].str, optarg) != 0)
273                                 if (++i == ARRAY_SIZE(types))
274                                         message_fatal(_("%s: Unknown file "
275                                                         "format type"),
276                                                         optarg);
277
278                         opt_format = types[i].format;
279                         break;
280                 }
281
282                 // --check
283                 case 'C': {
284                         static const struct {
285                                 char str[8];
286                                 lzma_check check;
287                         } types[] = {
288                                 { "none",   LZMA_CHECK_NONE },
289                                 { "crc32",  LZMA_CHECK_CRC32 },
290                                 { "crc64",  LZMA_CHECK_CRC64 },
291                                 { "sha256", LZMA_CHECK_SHA256 },
292                         };
293
294                         size_t i = 0;
295                         while (strcmp(types[i].str, optarg) != 0) {
296                                 if (++i == ARRAY_SIZE(types))
297                                         message_fatal(_("%s: Unsupported "
298                                                         "integrity "
299                                                         "check type"), optarg);
300                         }
301
302                         // Use a separate check in case we are using different
303                         // liblzma than what was used to compile us.
304                         if (!lzma_check_is_supported(types[i].check))
305                                 message_fatal(_("%s: Unsupported integrity "
306                                                 "check type"), optarg);
307
308                         coder_set_check(types[i].check);
309                         break;
310                 }
311
312                 case OPT_FILES:
313                         args->files_delim = '\n';
314
315                 // Fall through
316
317                 case OPT_FILES0:
318                         if (args->files_name != NULL)
319                                 message_fatal(_("Only one file can be "
320                                                 "specified with `--files'"
321                                                 "or `--files0'."));
322
323                         if (optarg == NULL) {
324                                 args->files_name = (char *)stdin_filename;
325                                 args->files_file = stdin;
326                         } else {
327                                 args->files_name = optarg;
328                                 args->files_file = fopen(optarg,
329                                                 c == OPT_FILES ? "r" : "rb");
330                                 if (args->files_file == NULL)
331                                         message_fatal("%s: %s", optarg,
332                                                         strerror(errno));
333                         }
334
335                         break;
336
337                 default:
338                         message_try_help();
339                         my_exit(E_ERROR);
340                 }
341         }
342
343         return;
344 }
345
346
347 static void
348 parse_environment(args_info *args, char *argv0)
349 {
350         char *env = getenv("XZ_OPT");
351         if (env == NULL)
352                 return;
353
354         // We modify the string, so make a copy of it.
355         env = xstrdup(env);
356
357         // Calculate the number of arguments in env. argc stats at one
358         // to include space for the program name.
359         int argc = 1;
360         bool prev_was_space = true;
361         for (size_t i = 0; env[i] != '\0'; ++i) {
362                 if (isspace(env[i])) {
363                         prev_was_space = true;
364                 } else if (prev_was_space) {
365                         prev_was_space = false;
366
367                         // Keep argc small enough to fit into a singed int
368                         // and to keep it usable for memory allocation.
369                         if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
370                                 message_fatal(_("The environment variable "
371                                                 "XZ_OPT contains too many "
372                                                 "arguments"));
373                 }
374         }
375
376         // Allocate memory to hold pointers to the arguments. Add one to get
377         // space for the terminating NULL (if some systems happen to need it).
378         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
379         argv[0] = argv0;
380         argv[argc] = NULL;
381
382         // Go through the string again. Split the arguments using '\0'
383         // characters and add pointers to the resulting strings to argv.
384         argc = 1;
385         prev_was_space = true;
386         for (size_t i = 0; env[i] != '\0'; ++i) {
387                 if (isspace(env[i])) {
388                         prev_was_space = true;
389                         env[i] = '\0';
390                 } else if (prev_was_space) {
391                         prev_was_space = false;
392                         argv[argc++] = env + i;
393                 }
394         }
395
396         // Parse the argument list we got from the environment. All non-option
397         // arguments i.e. filenames are ignored.
398         parse_real(args, argc, argv);
399
400         // Reset the state of the getopt_long() so that we can parse the
401         // command line options too. There are two incompatible ways to
402         // do it.
403 #ifdef HAVE_OPTRESET
404         // BSD
405         optind = 1;
406         optreset = 1;
407 #else
408         // GNU, Solaris
409         optind = 0;
410 #endif
411
412         // We don't need the argument list from environment anymore.
413         free(argv);
414         free(env);
415
416         return;
417 }
418
419
420 extern void
421 args_parse(args_info *args, int argc, char **argv)
422 {
423         // Initialize those parts of *args that we need later.
424         args->files_name = NULL;
425         args->files_file = NULL;
426         args->files_delim = '\0';
427
428         // Type of the file format to use when --format=auto or no --format
429         // was specified.
430         enum format_type format_compress_auto = FORMAT_XZ;
431
432         // Check how we were called.
433         {
434 #ifdef DOSLIKE
435                 // We adjusted argv[0] in the beginning of main() so we don't
436                 // need to do anything here.
437                 const char *name = argv[0];
438 #else
439                 // Remove the leading path name, if any.
440                 const char *name = strrchr(argv[0], '/');
441                 if (name == NULL)
442                         name = argv[0];
443                 else
444                         ++name;
445 #endif
446
447                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
448                 // is weird, but it doesn't matter here.
449
450                 // The default file format is .lzma if the command name
451                 // contains "lz".
452                 if (strstr(name, "lz") != NULL)
453                         format_compress_auto = FORMAT_LZMA;
454
455                 // Operation mode
456                 if (strstr(name, "cat") != NULL) {
457                         // Imply --decompress --stdout
458                         opt_mode = MODE_DECOMPRESS;
459                         opt_stdout = true;
460                 } else if (strstr(name, "un") != NULL) {
461                         // Imply --decompress
462                         opt_mode = MODE_DECOMPRESS;
463                 }
464         }
465
466         // First the flags from environment
467         parse_environment(args, argv[0]);
468
469         // Then from the command line
470         optind = 1;
471         parse_real(args, argc, argv);
472
473         // Never remove the source file when the destination is not on disk.
474         // In test mode the data is written nowhere, but setting opt_stdout
475         // will make the rest of the code behave well.
476         if (opt_stdout || opt_mode == MODE_TEST) {
477                 opt_keep_original = true;
478                 opt_stdout = true;
479         }
480
481         // If no --format flag was used, or it was --format=auto, we need to
482         // decide what is the target file format we are going to use. This
483         // depends on how we were called (checked earlier in this function).
484         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
485                 opt_format = format_compress_auto;
486
487         // Compression settings need to be validated (options themselves and
488         // their memory usage) when compressing to any file format. It has to
489         // be done also when uncompressing raw data, since for raw decoding
490         // the options given on the command line are used to know what kind
491         // of raw data we are supposed to decode.
492         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
493                 coder_set_compression_settings();
494
495         // If no filenames are given, use stdin.
496         if (argv[optind] == NULL && args->files_name == NULL) {
497                 // We don't modify or free() the "-" constant. The caller
498                 // modifies this so don't make the struct itself const.
499                 static char *names_stdin[2] = { (char *)"-", NULL };
500                 args->arg_names = names_stdin;
501                 args->arg_count = 1;
502         } else {
503                 // We got at least one filename from the command line, or
504                 // --files or --files0 was specified.
505                 args->arg_names = argv + optind;
506                 args->arg_count = argc - optind;
507         }
508
509         return;
510 }