]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
Add support for specifying the BCJ filter start offset
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24
25 // We don't modify or free() this, but we need to assign it in some
26 // non-const pointers.
27 const char *stdin_filename = "(stdin)";
28
29
30 static void
31 parse_real(args_info *args, int argc, char **argv)
32 {
33         enum {
34                 OPT_SUBBLOCK = INT_MIN,
35                 OPT_X86,
36                 OPT_POWERPC,
37                 OPT_IA64,
38                 OPT_ARM,
39                 OPT_ARMTHUMB,
40                 OPT_SPARC,
41                 OPT_DELTA,
42                 OPT_LZMA1,
43                 OPT_LZMA2,
44
45                 OPT_FILES,
46                 OPT_FILES0,
47         };
48
49         static const char short_opts[]
50                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
51
52         static const struct option long_opts[] = {
53                 // Operation mode
54                 { "compress",       no_argument,       NULL,  'z' },
55                 { "decompress",     no_argument,       NULL,  'd' },
56                 { "uncompress",     no_argument,       NULL,  'd' },
57                 { "test",           no_argument,       NULL,  't' },
58                 { "list",           no_argument,       NULL,  'l' },
59
60                 // Operation modifiers
61                 { "keep",           no_argument,       NULL,  'k' },
62                 { "force",          no_argument,       NULL,  'f' },
63                 { "stdout",         no_argument,       NULL,  'c' },
64                 { "to-stdout",      no_argument,       NULL,  'c' },
65                 { "suffix",         required_argument, NULL,  'S' },
66                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
67                 { "files",          optional_argument, NULL,  OPT_FILES },
68                 { "files0",         optional_argument, NULL,  OPT_FILES0 },
69
70                 // Basic compression settings
71                 { "format",         required_argument, NULL,  'F' },
72                 { "check",          required_argument, NULL,  'C' },
73                 { "memory",         required_argument, NULL,  'M' },
74                 { "threads",        required_argument, NULL,  'T' },
75
76                 { "extreme",        no_argument,       NULL,  'e' },
77                 { "fast",           no_argument,       NULL,  '0' },
78                 { "best",           no_argument,       NULL,  '9' },
79
80                 // Filters
81                 { "lzma1",          optional_argument, NULL,  OPT_LZMA1 },
82                 { "lzma2",          optional_argument, NULL,  OPT_LZMA2 },
83                 { "x86",            optional_argument, NULL,  OPT_X86 },
84                 { "bcj",            optional_argument, NULL,  OPT_X86 },
85                 { "powerpc",        optional_argument, NULL,  OPT_POWERPC },
86                 { "ppc",            optional_argument, NULL,  OPT_POWERPC },
87                 { "ia64",           optional_argument, NULL,  OPT_IA64 },
88                 { "itanium",        optional_argument, NULL,  OPT_IA64 },
89                 { "arm",            optional_argument, NULL,  OPT_ARM },
90                 { "armthumb",       optional_argument, NULL,  OPT_ARMTHUMB },
91                 { "sparc",          optional_argument, NULL,  OPT_SPARC },
92                 { "delta",          optional_argument, NULL,  OPT_DELTA },
93                 { "subblock",       optional_argument, NULL,  OPT_SUBBLOCK },
94
95                 // Other options
96                 { "quiet",          no_argument,       NULL,  'q' },
97                 { "verbose",        no_argument,       NULL,  'v' },
98                 { "no-warn",        no_argument,       NULL,  'Q' },
99                 { "help",           no_argument,       NULL,  'h' },
100                 { "long-help",      no_argument,       NULL,  'H' },
101                 { "version",        no_argument,       NULL,  'V' },
102
103                 { NULL,                 0,                 NULL,   0 }
104         };
105
106         int c;
107
108         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
109                         != -1) {
110                 switch (c) {
111                 // Compression preset (also for decompression if --format=raw)
112                 case '0': case '1': case '2': case '3': case '4':
113                 case '5': case '6': case '7': case '8': case '9':
114                         coder_set_preset(c - '0');
115                         break;
116
117                 // --memory
118                 case 'M': {
119                         // Support specifying the limit as a percentage of
120                         // installed physical RAM.
121                         size_t len = strlen(optarg);
122                         if (len > 0 && optarg[len - 1] == '%') {
123                                 optarg[len - 1] = '\0';
124                                 hardware_memlimit_set_percentage(
125                                                 str_to_uint64(
126                                                 "memory%", optarg, 1, 100));
127                         } else {
128                                 // On 32-bit systems, SIZE_MAX would make more
129                                 // sense than UINT64_MAX. But use UINT64_MAX
130                                 // still so that scripts that assume > 4 GiB
131                                 // values don't break.
132                                 hardware_memlimit_set(str_to_uint64(
133                                                 "memory", optarg,
134                                                 0, UINT64_MAX));
135                         }
136
137                         break;
138                 }
139
140                 // --suffix
141                 case 'S':
142                         suffix_set(optarg);
143                         break;
144
145                 case 'T':
146                         hardware_threadlimit_set(str_to_uint64(
147                                         "threads", optarg, 0, UINT32_MAX));
148                         break;
149
150                 // --version
151                 case 'V':
152                         // This doesn't return.
153                         message_version();
154
155                 // --stdout
156                 case 'c':
157                         opt_stdout = true;
158                         break;
159
160                 // --decompress
161                 case 'd':
162                         opt_mode = MODE_DECOMPRESS;
163                         break;
164
165                 // --extreme
166                 case 'e':
167                         coder_set_extreme();
168                         break;
169
170                 // --force
171                 case 'f':
172                         opt_force = true;
173                         break;
174
175                 // --help
176                 case 'h':
177                         // This doesn't return.
178                         message_help(false);
179
180                 // --long-help
181                 case 'H':
182                         // This doesn't return.
183                         message_help(true);
184
185                 // --list
186                 case 'l':
187                         opt_mode = MODE_LIST;
188                         break;
189
190                 // --keep
191                 case 'k':
192                         opt_keep_original = true;
193                         break;
194
195                 // --quiet
196                 case 'q':
197                         message_verbosity_decrease();
198                         break;
199
200                 case 'Q':
201                         set_exit_no_warn();
202                         break;
203
204                 case 't':
205                         opt_mode = MODE_TEST;
206                         break;
207
208                 // --verbose
209                 case 'v':
210                         message_verbosity_increase();
211                         break;
212
213                 case 'z':
214                         opt_mode = MODE_COMPRESS;
215                         break;
216
217                 // Filter setup
218
219                 case OPT_SUBBLOCK:
220                         coder_add_filter(LZMA_FILTER_SUBBLOCK,
221                                         options_subblock(optarg));
222                         break;
223
224                 case OPT_X86:
225                         coder_add_filter(LZMA_FILTER_X86,
226                                         options_bcj(optarg));
227                         break;
228
229                 case OPT_POWERPC:
230                         coder_add_filter(LZMA_FILTER_POWERPC,
231                                         options_bcj(optarg));
232                         break;
233
234                 case OPT_IA64:
235                         coder_add_filter(LZMA_FILTER_IA64,
236                                         options_bcj(optarg));
237                         break;
238
239                 case OPT_ARM:
240                         coder_add_filter(LZMA_FILTER_ARM,
241                                         options_bcj(optarg));
242                         break;
243
244                 case OPT_ARMTHUMB:
245                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
246                                         options_bcj(optarg));
247                         break;
248
249                 case OPT_SPARC:
250                         coder_add_filter(LZMA_FILTER_SPARC,
251                                         options_bcj(optarg));
252                         break;
253
254                 case OPT_DELTA:
255                         coder_add_filter(LZMA_FILTER_DELTA,
256                                         options_delta(optarg));
257                         break;
258
259                 case OPT_LZMA1:
260                         coder_add_filter(LZMA_FILTER_LZMA1,
261                                         options_lzma(optarg));
262                         break;
263
264                 case OPT_LZMA2:
265                         coder_add_filter(LZMA_FILTER_LZMA2,
266                                         options_lzma(optarg));
267                         break;
268
269                 // Other
270
271                 // --format
272                 case 'F': {
273                         // Just in case, support both "lzma" and "alone" since
274                         // the latter was used for forward compatibility in
275                         // LZMA Utils 4.32.x.
276                         static const struct {
277                                 char str[8];
278                                 enum format_type format;
279                         } types[] = {
280                                 { "auto",   FORMAT_AUTO },
281                                 { "xz",     FORMAT_XZ },
282                                 { "lzma",   FORMAT_LZMA },
283                                 { "alone",  FORMAT_LZMA },
284                                 // { "gzip",   FORMAT_GZIP },
285                                 // { "gz",     FORMAT_GZIP },
286                                 { "raw",    FORMAT_RAW },
287                         };
288
289                         size_t i = 0;
290                         while (strcmp(types[i].str, optarg) != 0)
291                                 if (++i == ARRAY_SIZE(types))
292                                         message_fatal(_("%s: Unknown file "
293                                                         "format type"),
294                                                         optarg);
295
296                         opt_format = types[i].format;
297                         break;
298                 }
299
300                 // --check
301                 case 'C': {
302                         static const struct {
303                                 char str[8];
304                                 lzma_check check;
305                         } types[] = {
306                                 { "none",   LZMA_CHECK_NONE },
307                                 { "crc32",  LZMA_CHECK_CRC32 },
308                                 { "crc64",  LZMA_CHECK_CRC64 },
309                                 { "sha256", LZMA_CHECK_SHA256 },
310                         };
311
312                         size_t i = 0;
313                         while (strcmp(types[i].str, optarg) != 0) {
314                                 if (++i == ARRAY_SIZE(types))
315                                         message_fatal(_("%s: Unsupported "
316                                                         "integrity "
317                                                         "check type"), optarg);
318                         }
319
320                         // Use a separate check in case we are using different
321                         // liblzma than what was used to compile us.
322                         if (!lzma_check_is_supported(types[i].check))
323                                 message_fatal(_("%s: Unsupported integrity "
324                                                 "check type"), optarg);
325
326                         coder_set_check(types[i].check);
327                         break;
328                 }
329
330                 case OPT_FILES:
331                         args->files_delim = '\n';
332
333                 // Fall through
334
335                 case OPT_FILES0:
336                         if (args->files_name != NULL)
337                                 message_fatal(_("Only one file can be "
338                                                 "specified with `--files'"
339                                                 "or `--files0'."));
340
341                         if (optarg == NULL) {
342                                 args->files_name = (char *)stdin_filename;
343                                 args->files_file = stdin;
344                         } else {
345                                 args->files_name = optarg;
346                                 args->files_file = fopen(optarg,
347                                                 c == OPT_FILES ? "r" : "rb");
348                                 if (args->files_file == NULL)
349                                         message_fatal("%s: %s", optarg,
350                                                         strerror(errno));
351                         }
352
353                         break;
354
355                 default:
356                         message_try_help();
357                         my_exit(E_ERROR);
358                 }
359         }
360
361         return;
362 }
363
364
365 static void
366 parse_environment(args_info *args, char *argv0)
367 {
368         char *env = getenv("XZ_OPT");
369         if (env == NULL)
370                 return;
371
372         // We modify the string, so make a copy of it.
373         env = xstrdup(env);
374
375         // Calculate the number of arguments in env. argc stats at one
376         // to include space for the program name.
377         int argc = 1;
378         bool prev_was_space = true;
379         for (size_t i = 0; env[i] != '\0'; ++i) {
380                 if (isspace(env[i])) {
381                         prev_was_space = true;
382                 } else if (prev_was_space) {
383                         prev_was_space = false;
384
385                         // Keep argc small enough to fit into a singed int
386                         // and to keep it usable for memory allocation.
387                         if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
388                                 message_fatal(_("The environment variable "
389                                                 "XZ_OPT contains too many "
390                                                 "arguments"));
391                 }
392         }
393
394         // Allocate memory to hold pointers to the arguments. Add one to get
395         // space for the terminating NULL (if some systems happen to need it).
396         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
397         argv[0] = argv0;
398         argv[argc] = NULL;
399
400         // Go through the string again. Split the arguments using '\0'
401         // characters and add pointers to the resulting strings to argv.
402         argc = 1;
403         prev_was_space = true;
404         for (size_t i = 0; env[i] != '\0'; ++i) {
405                 if (isspace(env[i])) {
406                         prev_was_space = true;
407                         env[i] = '\0';
408                 } else if (prev_was_space) {
409                         prev_was_space = false;
410                         argv[argc++] = env + i;
411                 }
412         }
413
414         // Parse the argument list we got from the environment. All non-option
415         // arguments i.e. filenames are ignored.
416         parse_real(args, argc, argv);
417
418         // Reset the state of the getopt_long() so that we can parse the
419         // command line options too. There are two incompatible ways to
420         // do it.
421 #ifdef HAVE_OPTRESET
422         // BSD
423         optind = 1;
424         optreset = 1;
425 #else
426         // GNU, Solaris
427         optind = 0;
428 #endif
429
430         // We don't need the argument list from environment anymore.
431         free(argv);
432         free(env);
433
434         return;
435 }
436
437
438 extern void
439 args_parse(args_info *args, int argc, char **argv)
440 {
441         // Initialize those parts of *args that we need later.
442         args->files_name = NULL;
443         args->files_file = NULL;
444         args->files_delim = '\0';
445
446         // Check how we were called.
447         {
448 #ifdef DOSLIKE
449                 // We adjusted argv[0] in the beginning of main() so we don't
450                 // need to do anything here.
451                 const char *name = argv[0];
452 #else
453                 // Remove the leading path name, if any.
454                 const char *name = strrchr(argv[0], '/');
455                 if (name == NULL)
456                         name = argv[0];
457                 else
458                         ++name;
459 #endif
460
461                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
462                 // is weird, but it doesn't matter here.
463
464                 // If the command name contains "lz",
465                 // it implies --format=lzma.
466                 if (strstr(name, "lz") != NULL)
467                         opt_format = FORMAT_LZMA;
468
469                 // Operation mode
470                 if (strstr(name, "cat") != NULL) {
471                         // Imply --decompress --stdout
472                         opt_mode = MODE_DECOMPRESS;
473                         opt_stdout = true;
474                 } else if (strstr(name, "un") != NULL) {
475                         // Imply --decompress
476                         opt_mode = MODE_DECOMPRESS;
477                 }
478         }
479
480         // First the flags from environment
481         parse_environment(args, argv[0]);
482
483         // Then from the command line
484         optind = 1;
485         parse_real(args, argc, argv);
486
487         // Never remove the source file when the destination is not on disk.
488         // In test mode the data is written nowhere, but setting opt_stdout
489         // will make the rest of the code behave well.
490         if (opt_stdout || opt_mode == MODE_TEST) {
491                 opt_keep_original = true;
492                 opt_stdout = true;
493         }
494
495         // When compressing, if no --format flag was used, or it
496         // was --format=auto, we compress to the .xz format.
497         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
498                 opt_format = FORMAT_XZ;
499
500         // Compression settings need to be validated (options themselves and
501         // their memory usage) when compressing to any file format. It has to
502         // be done also when uncompressing raw data, since for raw decoding
503         // the options given on the command line are used to know what kind
504         // of raw data we are supposed to decode.
505         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
506                 coder_set_compression_settings();
507
508         // If no filenames are given, use stdin.
509         if (argv[optind] == NULL && args->files_name == NULL) {
510                 // We don't modify or free() the "-" constant. The caller
511                 // modifies this so don't make the struct itself const.
512                 static char *names_stdin[2] = { (char *)"-", NULL };
513                 args->arg_names = names_stdin;
514                 args->arg_count = 1;
515         } else {
516                 // We got at least one filename from the command line, or
517                 // --files or --files0 was specified.
518                 args->arg_names = argv + optind;
519                 args->arg_count = argc - optind;
520         }
521
522         return;
523 }