]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
Updated comments to match renamed files.
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24
25 // We don't modify or free() this, but we need to assign it in some
26 // non-const pointers.
27 const char *stdin_filename = "(stdin)";
28
29
30 static void
31 parse_real(args_info *args, int argc, char **argv)
32 {
33         enum {
34                 OPT_SUBBLOCK = INT_MIN,
35                 OPT_X86,
36                 OPT_POWERPC,
37                 OPT_IA64,
38                 OPT_ARM,
39                 OPT_ARMTHUMB,
40                 OPT_SPARC,
41                 OPT_DELTA,
42                 OPT_LZMA1,
43                 OPT_LZMA2,
44
45                 OPT_FILES,
46                 OPT_FILES0,
47         };
48
49         static const char short_opts[]
50                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
51
52         static const struct option long_opts[] = {
53                 // Operation mode
54                 { "compress",       no_argument,       NULL,  'z' },
55                 { "decompress",     no_argument,       NULL,  'd' },
56                 { "uncompress",     no_argument,       NULL,  'd' },
57                 { "test",           no_argument,       NULL,  't' },
58                 { "list",           no_argument,       NULL,  'l' },
59
60                 // Operation modifiers
61                 { "keep",           no_argument,       NULL,  'k' },
62                 { "force",          no_argument,       NULL,  'f' },
63                 { "stdout",         no_argument,       NULL,  'c' },
64                 { "to-stdout",      no_argument,       NULL,  'c' },
65                 { "suffix",         required_argument, NULL,  'S' },
66                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
67                 { "files",          optional_argument, NULL,  OPT_FILES },
68                 { "files0",         optional_argument, NULL,  OPT_FILES0 },
69
70                 // Basic compression settings
71                 { "format",         required_argument, NULL,  'F' },
72                 { "check",          required_argument, NULL,  'C' },
73                 { "memory",         required_argument, NULL,  'M' },
74                 { "threads",        required_argument, NULL,  'T' },
75
76                 { "extreme",        no_argument,       NULL,  'e' },
77                 { "fast",           no_argument,       NULL,  '0' },
78                 { "best",           no_argument,       NULL,  '9' },
79
80                 // Filters
81                 { "lzma1",          optional_argument, NULL,  OPT_LZMA1 },
82                 { "lzma2",          optional_argument, NULL,  OPT_LZMA2 },
83                 { "x86",            optional_argument, NULL,  OPT_X86 },
84                 { "powerpc",        optional_argument, NULL,  OPT_POWERPC },
85                 { "ia64",           optional_argument, NULL,  OPT_IA64 },
86                 { "arm",            optional_argument, NULL,  OPT_ARM },
87                 { "armthumb",       optional_argument, NULL,  OPT_ARMTHUMB },
88                 { "sparc",          optional_argument, NULL,  OPT_SPARC },
89                 { "delta",          optional_argument, NULL,  OPT_DELTA },
90                 { "subblock",       optional_argument, NULL,  OPT_SUBBLOCK },
91
92                 // Other options
93                 { "quiet",          no_argument,       NULL,  'q' },
94                 { "verbose",        no_argument,       NULL,  'v' },
95                 { "no-warn",        no_argument,       NULL,  'Q' },
96                 { "help",           no_argument,       NULL,  'h' },
97                 { "long-help",      no_argument,       NULL,  'H' },
98                 { "version",        no_argument,       NULL,  'V' },
99
100                 { NULL,                 0,                 NULL,   0 }
101         };
102
103         int c;
104
105         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
106                         != -1) {
107                 switch (c) {
108                 // Compression preset (also for decompression if --format=raw)
109                 case '0': case '1': case '2': case '3': case '4':
110                 case '5': case '6': case '7': case '8': case '9':
111                         coder_set_preset(c - '0');
112                         break;
113
114                 // --memory
115                 case 'M': {
116                         // Support specifying the limit as a percentage of
117                         // installed physical RAM.
118                         size_t len = strlen(optarg);
119                         if (len > 0 && optarg[len - 1] == '%') {
120                                 optarg[len - 1] = '\0';
121                                 hardware_memlimit_set_percentage(
122                                                 str_to_uint64(
123                                                 "memory%", optarg, 1, 100));
124                         } else {
125                                 // On 32-bit systems, SIZE_MAX would make more
126                                 // sense than UINT64_MAX. But use UINT64_MAX
127                                 // still so that scripts that assume > 4 GiB
128                                 // values don't break.
129                                 hardware_memlimit_set(str_to_uint64(
130                                                 "memory", optarg,
131                                                 0, UINT64_MAX));
132                         }
133
134                         break;
135                 }
136
137                 // --suffix
138                 case 'S':
139                         suffix_set(optarg);
140                         break;
141
142                 case 'T':
143                         hardware_threadlimit_set(str_to_uint64(
144                                         "threads", optarg, 0, UINT32_MAX));
145                         break;
146
147                 // --version
148                 case 'V':
149                         // This doesn't return.
150                         message_version();
151
152                 // --stdout
153                 case 'c':
154                         opt_stdout = true;
155                         break;
156
157                 // --decompress
158                 case 'd':
159                         opt_mode = MODE_DECOMPRESS;
160                         break;
161
162                 // --extreme
163                 case 'e':
164                         coder_set_extreme();
165                         break;
166
167                 // --force
168                 case 'f':
169                         opt_force = true;
170                         break;
171
172                 // --help
173                 case 'h':
174                         // This doesn't return.
175                         message_help(false);
176
177                 // --long-help
178                 case 'H':
179                         // This doesn't return.
180                         message_help(true);
181
182                 // --list
183                 case 'l':
184                         opt_mode = MODE_LIST;
185                         break;
186
187                 // --keep
188                 case 'k':
189                         opt_keep_original = true;
190                         break;
191
192                 // --quiet
193                 case 'q':
194                         message_verbosity_decrease();
195                         break;
196
197                 case 'Q':
198                         set_exit_no_warn();
199                         break;
200
201                 case 't':
202                         opt_mode = MODE_TEST;
203                         break;
204
205                 // --verbose
206                 case 'v':
207                         message_verbosity_increase();
208                         break;
209
210                 case 'z':
211                         opt_mode = MODE_COMPRESS;
212                         break;
213
214                 // Filter setup
215
216                 case OPT_SUBBLOCK:
217                         coder_add_filter(LZMA_FILTER_SUBBLOCK,
218                                         options_subblock(optarg));
219                         break;
220
221                 case OPT_X86:
222                         coder_add_filter(LZMA_FILTER_X86,
223                                         options_bcj(optarg));
224                         break;
225
226                 case OPT_POWERPC:
227                         coder_add_filter(LZMA_FILTER_POWERPC,
228                                         options_bcj(optarg));
229                         break;
230
231                 case OPT_IA64:
232                         coder_add_filter(LZMA_FILTER_IA64,
233                                         options_bcj(optarg));
234                         break;
235
236                 case OPT_ARM:
237                         coder_add_filter(LZMA_FILTER_ARM,
238                                         options_bcj(optarg));
239                         break;
240
241                 case OPT_ARMTHUMB:
242                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
243                                         options_bcj(optarg));
244                         break;
245
246                 case OPT_SPARC:
247                         coder_add_filter(LZMA_FILTER_SPARC,
248                                         options_bcj(optarg));
249                         break;
250
251                 case OPT_DELTA:
252                         coder_add_filter(LZMA_FILTER_DELTA,
253                                         options_delta(optarg));
254                         break;
255
256                 case OPT_LZMA1:
257                         coder_add_filter(LZMA_FILTER_LZMA1,
258                                         options_lzma(optarg));
259                         break;
260
261                 case OPT_LZMA2:
262                         coder_add_filter(LZMA_FILTER_LZMA2,
263                                         options_lzma(optarg));
264                         break;
265
266                 // Other
267
268                 // --format
269                 case 'F': {
270                         // Just in case, support both "lzma" and "alone" since
271                         // the latter was used for forward compatibility in
272                         // LZMA Utils 4.32.x.
273                         static const struct {
274                                 char str[8];
275                                 enum format_type format;
276                         } types[] = {
277                                 { "auto",   FORMAT_AUTO },
278                                 { "xz",     FORMAT_XZ },
279                                 { "lzma",   FORMAT_LZMA },
280                                 { "alone",  FORMAT_LZMA },
281                                 // { "gzip",   FORMAT_GZIP },
282                                 // { "gz",     FORMAT_GZIP },
283                                 { "raw",    FORMAT_RAW },
284                         };
285
286                         size_t i = 0;
287                         while (strcmp(types[i].str, optarg) != 0)
288                                 if (++i == ARRAY_SIZE(types))
289                                         message_fatal(_("%s: Unknown file "
290                                                         "format type"),
291                                                         optarg);
292
293                         opt_format = types[i].format;
294                         break;
295                 }
296
297                 // --check
298                 case 'C': {
299                         static const struct {
300                                 char str[8];
301                                 lzma_check check;
302                         } types[] = {
303                                 { "none",   LZMA_CHECK_NONE },
304                                 { "crc32",  LZMA_CHECK_CRC32 },
305                                 { "crc64",  LZMA_CHECK_CRC64 },
306                                 { "sha256", LZMA_CHECK_SHA256 },
307                         };
308
309                         size_t i = 0;
310                         while (strcmp(types[i].str, optarg) != 0) {
311                                 if (++i == ARRAY_SIZE(types))
312                                         message_fatal(_("%s: Unsupported "
313                                                         "integrity "
314                                                         "check type"), optarg);
315                         }
316
317                         // Use a separate check in case we are using different
318                         // liblzma than what was used to compile us.
319                         if (!lzma_check_is_supported(types[i].check))
320                                 message_fatal(_("%s: Unsupported integrity "
321                                                 "check type"), optarg);
322
323                         coder_set_check(types[i].check);
324                         break;
325                 }
326
327                 case OPT_FILES:
328                         args->files_delim = '\n';
329
330                 // Fall through
331
332                 case OPT_FILES0:
333                         if (args->files_name != NULL)
334                                 message_fatal(_("Only one file can be "
335                                                 "specified with `--files'"
336                                                 "or `--files0'."));
337
338                         if (optarg == NULL) {
339                                 args->files_name = (char *)stdin_filename;
340                                 args->files_file = stdin;
341                         } else {
342                                 args->files_name = optarg;
343                                 args->files_file = fopen(optarg,
344                                                 c == OPT_FILES ? "r" : "rb");
345                                 if (args->files_file == NULL)
346                                         message_fatal("%s: %s", optarg,
347                                                         strerror(errno));
348                         }
349
350                         break;
351
352                 default:
353                         message_try_help();
354                         my_exit(E_ERROR);
355                 }
356         }
357
358         return;
359 }
360
361
362 static void
363 parse_environment(args_info *args, char *argv0)
364 {
365         char *env = getenv("XZ_OPT");
366         if (env == NULL)
367                 return;
368
369         // We modify the string, so make a copy of it.
370         env = xstrdup(env);
371
372         // Calculate the number of arguments in env. argc stats at one
373         // to include space for the program name.
374         int argc = 1;
375         bool prev_was_space = true;
376         for (size_t i = 0; env[i] != '\0'; ++i) {
377                 // NOTE: Cast to unsigned char is needed so that correct
378                 // value gets passed to isspace(), which expects
379                 // unsigned char cast to int. Casting to int is done
380                 // automatically due to integer promotion, but we need to
381                 // force char to unsigned char manually. Otherwise 8-bit
382                 // characters would get promoted to wrong value if
383                 // char is signed.
384                 if (isspace((unsigned char)env[i])) {
385                         prev_was_space = true;
386                 } else if (prev_was_space) {
387                         prev_was_space = false;
388
389                         // Keep argc small enough to fit into a singed int
390                         // and to keep it usable for memory allocation.
391                         if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
392                                 message_fatal(_("The environment variable "
393                                                 "XZ_OPT contains too many "
394                                                 "arguments"));
395                 }
396         }
397
398         // Allocate memory to hold pointers to the arguments. Add one to get
399         // space for the terminating NULL (if some systems happen to need it).
400         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
401         argv[0] = argv0;
402         argv[argc] = NULL;
403
404         // Go through the string again. Split the arguments using '\0'
405         // characters and add pointers to the resulting strings to argv.
406         argc = 1;
407         prev_was_space = true;
408         for (size_t i = 0; env[i] != '\0'; ++i) {
409                 if (isspace((unsigned char)env[i])) {
410                         prev_was_space = true;
411                         env[i] = '\0';
412                 } else if (prev_was_space) {
413                         prev_was_space = false;
414                         argv[argc++] = env + i;
415                 }
416         }
417
418         // Parse the argument list we got from the environment. All non-option
419         // arguments i.e. filenames are ignored.
420         parse_real(args, argc, argv);
421
422         // Reset the state of the getopt_long() so that we can parse the
423         // command line options too. There are two incompatible ways to
424         // do it.
425 #ifdef HAVE_OPTRESET
426         // BSD
427         optind = 1;
428         optreset = 1;
429 #else
430         // GNU, Solaris
431         optind = 0;
432 #endif
433
434         // We don't need the argument list from environment anymore.
435         free(argv);
436         free(env);
437
438         return;
439 }
440
441
442 extern void
443 args_parse(args_info *args, int argc, char **argv)
444 {
445         // Initialize those parts of *args that we need later.
446         args->files_name = NULL;
447         args->files_file = NULL;
448         args->files_delim = '\0';
449
450         // Check how we were called.
451         {
452 #ifdef DOSLIKE
453                 // We adjusted argv[0] in the beginning of main() so we don't
454                 // need to do anything here.
455                 const char *name = argv[0];
456 #else
457                 // Remove the leading path name, if any.
458                 const char *name = strrchr(argv[0], '/');
459                 if (name == NULL)
460                         name = argv[0];
461                 else
462                         ++name;
463 #endif
464
465                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
466                 // is weird, but it doesn't matter here.
467
468                 // If the command name contains "lz",
469                 // it implies --format=lzma.
470                 if (strstr(name, "lz") != NULL)
471                         opt_format = FORMAT_LZMA;
472
473                 // Operation mode
474                 if (strstr(name, "cat") != NULL) {
475                         // Imply --decompress --stdout
476                         opt_mode = MODE_DECOMPRESS;
477                         opt_stdout = true;
478                 } else if (strstr(name, "un") != NULL) {
479                         // Imply --decompress
480                         opt_mode = MODE_DECOMPRESS;
481                 }
482         }
483
484         // First the flags from environment
485         parse_environment(args, argv[0]);
486
487         // Then from the command line
488         optind = 1;
489         parse_real(args, argc, argv);
490
491         // Never remove the source file when the destination is not on disk.
492         // In test mode the data is written nowhere, but setting opt_stdout
493         // will make the rest of the code behave well.
494         if (opt_stdout || opt_mode == MODE_TEST) {
495                 opt_keep_original = true;
496                 opt_stdout = true;
497         }
498
499         // When compressing, if no --format flag was used, or it
500         // was --format=auto, we compress to the .xz format.
501         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
502                 opt_format = FORMAT_XZ;
503
504         // Compression settings need to be validated (options themselves and
505         // their memory usage) when compressing to any file format. It has to
506         // be done also when uncompressing raw data, since for raw decoding
507         // the options given on the command line are used to know what kind
508         // of raw data we are supposed to decode.
509         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
510                 coder_set_compression_settings();
511
512         // If no filenames are given, use stdin.
513         if (argv[optind] == NULL && args->files_name == NULL) {
514                 // We don't modify or free() the "-" constant. The caller
515                 // modifies this so don't make the struct itself const.
516                 static char *names_stdin[2] = { (char *)"-", NULL };
517                 args->arg_names = names_stdin;
518                 args->arg_count = 1;
519         } else {
520                 // We got at least one filename from the command line, or
521                 // --files or --files0 was specified.
522                 args->arg_names = argv + optind;
523                 args->arg_count = argc - optind;
524         }
525
526         return;
527 }