]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
Add --no-adjust.
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25
26 // We don't modify or free() this, but we need to assign it in some
27 // non-const pointers.
28 const char *stdin_filename = "(stdin)";
29
30
31 static void
32 parse_real(args_info *args, int argc, char **argv)
33 {
34         enum {
35                 OPT_X86 = INT_MIN,
36                 OPT_POWERPC,
37                 OPT_IA64,
38                 OPT_ARM,
39                 OPT_ARMTHUMB,
40                 OPT_SPARC,
41                 OPT_DELTA,
42                 OPT_LZMA1,
43                 OPT_LZMA2,
44
45                 OPT_NO_SPARSE,
46                 OPT_FILES,
47                 OPT_FILES0,
48                 OPT_NO_ADJUST,
49                 OPT_INFO_MEMORY,
50                 OPT_ROBOT,
51         };
52
53         static const char short_opts[]
54                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
55
56         static const struct option long_opts[] = {
57                 // Operation mode
58                 { "compress",     no_argument,       NULL,  'z' },
59                 { "decompress",   no_argument,       NULL,  'd' },
60                 { "uncompress",   no_argument,       NULL,  'd' },
61                 { "test",         no_argument,       NULL,  't' },
62                 { "list",         no_argument,       NULL,  'l' },
63
64                 // Operation modifiers
65                 { "keep",         no_argument,       NULL,  'k' },
66                 { "force",        no_argument,       NULL,  'f' },
67                 { "stdout",       no_argument,       NULL,  'c' },
68                 { "to-stdout",    no_argument,       NULL,  'c' },
69                 { "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
70                 { "suffix",       required_argument, NULL,  'S' },
71                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
72                 { "files",        optional_argument, NULL,  OPT_FILES },
73                 { "files0",       optional_argument, NULL,  OPT_FILES0 },
74
75                 // Basic compression settings
76                 { "format",       required_argument, NULL,  'F' },
77                 { "check",        required_argument, NULL,  'C' },
78                 { "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
79                 { "memory",       required_argument, NULL,  'M' },
80                 { "threads",      required_argument, NULL,  'T' },
81
82                 { "extreme",      no_argument,       NULL,  'e' },
83                 { "fast",         no_argument,       NULL,  '0' },
84                 { "best",         no_argument,       NULL,  '9' },
85
86                 // Filters
87                 { "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
88                 { "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
89                 { "x86",          optional_argument, NULL,  OPT_X86 },
90                 { "powerpc",      optional_argument, NULL,  OPT_POWERPC },
91                 { "ia64",         optional_argument, NULL,  OPT_IA64 },
92                 { "arm",          optional_argument, NULL,  OPT_ARM },
93                 { "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
94                 { "sparc",        optional_argument, NULL,  OPT_SPARC },
95                 { "delta",        optional_argument, NULL,  OPT_DELTA },
96
97                 // Other options
98                 { "quiet",        no_argument,       NULL,  'q' },
99                 { "verbose",      no_argument,       NULL,  'v' },
100                 { "no-warn",      no_argument,       NULL,  'Q' },
101                 { "robot",        no_argument,       NULL,  OPT_ROBOT },
102                 { "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
103                 { "help",         no_argument,       NULL,  'h' },
104                 { "long-help",    no_argument,       NULL,  'H' },
105                 { "version",      no_argument,       NULL,  'V' },
106
107                 { NULL,                 0,                 NULL,   0 }
108         };
109
110         int c;
111
112         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
113                         != -1) {
114                 switch (c) {
115                 // Compression preset (also for decompression if --format=raw)
116                 case '0': case '1': case '2': case '3': case '4':
117                 case '5': case '6': case '7': case '8': case '9':
118                         coder_set_preset(c - '0');
119                         break;
120
121                 // --memory
122                 case 'M': {
123                         // Support specifying the limit as a percentage of
124                         // installed physical RAM.
125                         size_t len = strlen(optarg);
126                         if (len > 0 && optarg[len - 1] == '%') {
127                                 optarg[len - 1] = '\0';
128                                 hardware_memlimit_set_percentage(
129                                                 str_to_uint64(
130                                                 "memory%", optarg, 1, 100));
131                         } else {
132                                 // On 32-bit systems, SIZE_MAX would make more
133                                 // sense than UINT64_MAX. But use UINT64_MAX
134                                 // still so that scripts that assume > 4 GiB
135                                 // values don't break.
136                                 hardware_memlimit_set(str_to_uint64(
137                                                 "memory", optarg,
138                                                 0, UINT64_MAX));
139                         }
140
141                         break;
142                 }
143
144                 // --suffix
145                 case 'S':
146                         suffix_set(optarg);
147                         break;
148
149                 case 'T':
150                         hardware_threadlimit_set(str_to_uint64(
151                                         "threads", optarg, 0, UINT32_MAX));
152                         break;
153
154                 // --version
155                 case 'V':
156                         // This doesn't return.
157                         message_version();
158
159                 // --stdout
160                 case 'c':
161                         opt_stdout = true;
162                         break;
163
164                 // --decompress
165                 case 'd':
166                         opt_mode = MODE_DECOMPRESS;
167                         break;
168
169                 // --extreme
170                 case 'e':
171                         coder_set_extreme();
172                         break;
173
174                 // --force
175                 case 'f':
176                         opt_force = true;
177                         break;
178
179                 // --info-memory
180                 case OPT_INFO_MEMORY:
181                         // This doesn't return.
182                         message_memlimit();
183
184                 // --help
185                 case 'h':
186                         // This doesn't return.
187                         message_help(false);
188
189                 // --long-help
190                 case 'H':
191                         // This doesn't return.
192                         message_help(true);
193
194                 // --list
195                 case 'l':
196                         opt_mode = MODE_LIST;
197                         break;
198
199                 // --keep
200                 case 'k':
201                         opt_keep_original = true;
202                         break;
203
204                 // --quiet
205                 case 'q':
206                         message_verbosity_decrease();
207                         break;
208
209                 case 'Q':
210                         set_exit_no_warn();
211                         break;
212
213                 case 't':
214                         opt_mode = MODE_TEST;
215                         break;
216
217                 // --verbose
218                 case 'v':
219                         message_verbosity_increase();
220                         break;
221
222                 // --robot
223                 case OPT_ROBOT:
224                         opt_robot = true;
225
226                         // This is to make sure that floating point numbers
227                         // always have a dot as decimal separator.
228                         setlocale(LC_NUMERIC, "C");
229                         break;
230
231                 case 'z':
232                         opt_mode = MODE_COMPRESS;
233                         break;
234
235                 // Filter setup
236
237                 case OPT_X86:
238                         coder_add_filter(LZMA_FILTER_X86,
239                                         options_bcj(optarg));
240                         break;
241
242                 case OPT_POWERPC:
243                         coder_add_filter(LZMA_FILTER_POWERPC,
244                                         options_bcj(optarg));
245                         break;
246
247                 case OPT_IA64:
248                         coder_add_filter(LZMA_FILTER_IA64,
249                                         options_bcj(optarg));
250                         break;
251
252                 case OPT_ARM:
253                         coder_add_filter(LZMA_FILTER_ARM,
254                                         options_bcj(optarg));
255                         break;
256
257                 case OPT_ARMTHUMB:
258                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
259                                         options_bcj(optarg));
260                         break;
261
262                 case OPT_SPARC:
263                         coder_add_filter(LZMA_FILTER_SPARC,
264                                         options_bcj(optarg));
265                         break;
266
267                 case OPT_DELTA:
268                         coder_add_filter(LZMA_FILTER_DELTA,
269                                         options_delta(optarg));
270                         break;
271
272                 case OPT_LZMA1:
273                         coder_add_filter(LZMA_FILTER_LZMA1,
274                                         options_lzma(optarg));
275                         break;
276
277                 case OPT_LZMA2:
278                         coder_add_filter(LZMA_FILTER_LZMA2,
279                                         options_lzma(optarg));
280                         break;
281
282                 // Other
283
284                 // --format
285                 case 'F': {
286                         // Just in case, support both "lzma" and "alone" since
287                         // the latter was used for forward compatibility in
288                         // LZMA Utils 4.32.x.
289                         static const struct {
290                                 char str[8];
291                                 enum format_type format;
292                         } types[] = {
293                                 { "auto",   FORMAT_AUTO },
294                                 { "xz",     FORMAT_XZ },
295                                 { "lzma",   FORMAT_LZMA },
296                                 { "alone",  FORMAT_LZMA },
297                                 // { "gzip",   FORMAT_GZIP },
298                                 // { "gz",     FORMAT_GZIP },
299                                 { "raw",    FORMAT_RAW },
300                         };
301
302                         size_t i = 0;
303                         while (strcmp(types[i].str, optarg) != 0)
304                                 if (++i == ARRAY_SIZE(types))
305                                         message_fatal(_("%s: Unknown file "
306                                                         "format type"),
307                                                         optarg);
308
309                         opt_format = types[i].format;
310                         break;
311                 }
312
313                 // --check
314                 case 'C': {
315                         static const struct {
316                                 char str[8];
317                                 lzma_check check;
318                         } types[] = {
319                                 { "none",   LZMA_CHECK_NONE },
320                                 { "crc32",  LZMA_CHECK_CRC32 },
321                                 { "crc64",  LZMA_CHECK_CRC64 },
322                                 { "sha256", LZMA_CHECK_SHA256 },
323                         };
324
325                         size_t i = 0;
326                         while (strcmp(types[i].str, optarg) != 0) {
327                                 if (++i == ARRAY_SIZE(types))
328                                         message_fatal(_("%s: Unsupported "
329                                                         "integrity "
330                                                         "check type"), optarg);
331                         }
332
333                         // Use a separate check in case we are using different
334                         // liblzma than what was used to compile us.
335                         if (!lzma_check_is_supported(types[i].check))
336                                 message_fatal(_("%s: Unsupported integrity "
337                                                 "check type"), optarg);
338
339                         coder_set_check(types[i].check);
340                         break;
341                 }
342
343                 case OPT_NO_SPARSE:
344                         io_no_sparse();
345                         break;
346
347                 case OPT_FILES:
348                         args->files_delim = '\n';
349
350                 // Fall through
351
352                 case OPT_FILES0:
353                         if (args->files_name != NULL)
354                                 message_fatal(_("Only one file can be "
355                                                 "specified with `--files' "
356                                                 "or `--files0'."));
357
358                         if (optarg == NULL) {
359                                 args->files_name = (char *)stdin_filename;
360                                 args->files_file = stdin;
361                         } else {
362                                 args->files_name = optarg;
363                                 args->files_file = fopen(optarg,
364                                                 c == OPT_FILES ? "r" : "rb");
365                                 if (args->files_file == NULL)
366                                         message_fatal("%s: %s", optarg,
367                                                         strerror(errno));
368                         }
369
370                         break;
371
372                 case OPT_NO_ADJUST:
373                         opt_auto_adjust = false;
374                         break;
375
376                 default:
377                         message_try_help();
378                         tuklib_exit(E_ERROR, E_ERROR, false);
379                 }
380         }
381
382         return;
383 }
384
385
386 static void
387 parse_environment(args_info *args, char *argv0)
388 {
389         char *env = getenv("XZ_OPT");
390         if (env == NULL)
391                 return;
392
393         // We modify the string, so make a copy of it.
394         env = xstrdup(env);
395
396         // Calculate the number of arguments in env. argc stats at one
397         // to include space for the program name.
398         int argc = 1;
399         bool prev_was_space = true;
400         for (size_t i = 0; env[i] != '\0'; ++i) {
401                 // NOTE: Cast to unsigned char is needed so that correct
402                 // value gets passed to isspace(), which expects
403                 // unsigned char cast to int. Casting to int is done
404                 // automatically due to integer promotion, but we need to
405                 // force char to unsigned char manually. Otherwise 8-bit
406                 // characters would get promoted to wrong value if
407                 // char is signed.
408                 if (isspace((unsigned char)env[i])) {
409                         prev_was_space = true;
410                 } else if (prev_was_space) {
411                         prev_was_space = false;
412
413                         // Keep argc small enough to fit into a singed int
414                         // and to keep it usable for memory allocation.
415                         if (++argc == my_min(
416                                         INT_MAX, SIZE_MAX / sizeof(char *)))
417                                 message_fatal(_("The environment variable "
418                                                 "XZ_OPT contains too many "
419                                                 "arguments"));
420                 }
421         }
422
423         // Allocate memory to hold pointers to the arguments. Add one to get
424         // space for the terminating NULL (if some systems happen to need it).
425         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
426         argv[0] = argv0;
427         argv[argc] = NULL;
428
429         // Go through the string again. Split the arguments using '\0'
430         // characters and add pointers to the resulting strings to argv.
431         argc = 1;
432         prev_was_space = true;
433         for (size_t i = 0; env[i] != '\0'; ++i) {
434                 if (isspace((unsigned char)env[i])) {
435                         prev_was_space = true;
436                         env[i] = '\0';
437                 } else if (prev_was_space) {
438                         prev_was_space = false;
439                         argv[argc++] = env + i;
440                 }
441         }
442
443         // Parse the argument list we got from the environment. All non-option
444         // arguments i.e. filenames are ignored.
445         parse_real(args, argc, argv);
446
447         // Reset the state of the getopt_long() so that we can parse the
448         // command line options too. There are two incompatible ways to
449         // do it.
450 #ifdef HAVE_OPTRESET
451         // BSD
452         optind = 1;
453         optreset = 1;
454 #else
455         // GNU, Solaris
456         optind = 0;
457 #endif
458
459         // We don't need the argument list from environment anymore.
460         free(argv);
461         free(env);
462
463         return;
464 }
465
466
467 extern void
468 args_parse(args_info *args, int argc, char **argv)
469 {
470         // Initialize those parts of *args that we need later.
471         args->files_name = NULL;
472         args->files_file = NULL;
473         args->files_delim = '\0';
474
475         // Check how we were called.
476         {
477                 // Remove the leading path name, if any.
478                 const char *name = strrchr(argv[0], '/');
479                 if (name == NULL)
480                         name = argv[0];
481                 else
482                         ++name;
483
484                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
485                 // is weird, but it doesn't matter here.
486
487                 // Look for full command names instead of substrings like
488                 // "un", "cat", and "lz" to reduce possibility of false
489                 // positives when the programs have been renamed.
490                 if (strstr(name, "xzcat") != NULL) {
491                         opt_mode = MODE_DECOMPRESS;
492                         opt_stdout = true;
493                 } else if (strstr(name, "unxz") != NULL) {
494                         opt_mode = MODE_DECOMPRESS;
495                 } else if (strstr(name, "lzcat") != NULL) {
496                         opt_format = FORMAT_LZMA;
497                         opt_mode = MODE_DECOMPRESS;
498                         opt_stdout = true;
499                 } else if (strstr(name, "unlzma") != NULL) {
500                         opt_format = FORMAT_LZMA;
501                         opt_mode = MODE_DECOMPRESS;
502                 } else if (strstr(name, "lzma") != NULL) {
503                         opt_format = FORMAT_LZMA;
504                 }
505         }
506
507         // First the flags from environment
508         parse_environment(args, argv[0]);
509
510         // Then from the command line
511         parse_real(args, argc, argv);
512
513         // Never remove the source file when the destination is not on disk.
514         // In test mode the data is written nowhere, but setting opt_stdout
515         // will make the rest of the code behave well.
516         if (opt_stdout || opt_mode == MODE_TEST) {
517                 opt_keep_original = true;
518                 opt_stdout = true;
519         }
520
521         // When compressing, if no --format flag was used, or it
522         // was --format=auto, we compress to the .xz format.
523         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
524                 opt_format = FORMAT_XZ;
525
526         // Compression settings need to be validated (options themselves and
527         // their memory usage) when compressing to any file format. It has to
528         // be done also when uncompressing raw data, since for raw decoding
529         // the options given on the command line are used to know what kind
530         // of raw data we are supposed to decode.
531         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
532                 coder_set_compression_settings();
533
534         // If no filenames are given, use stdin.
535         if (argv[optind] == NULL && args->files_name == NULL) {
536                 // We don't modify or free() the "-" constant. The caller
537                 // modifies this so don't make the struct itself const.
538                 static char *names_stdin[2] = { (char *)"-", NULL };
539                 args->arg_names = names_stdin;
540                 args->arg_count = 1;
541         } else {
542                 // We got at least one filename from the command line, or
543                 // --files or --files0 was specified.
544                 args->arg_names = argv + optind;
545                 args->arg_count = argc - optind;
546         }
547
548         return;
549 }