]> icculus.org git repositories - icculus/xz.git/blob - src/xz/args.c
Make the default memory usage limit 40 % of RAM for both
[icculus/xz.git] / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24
25 // We don't modify or free() this, but we need to assign it in some
26 // non-const pointers.
27 const char *stdin_filename = "(stdin)";
28
29
30 static void
31 parse_real(args_info *args, int argc, char **argv)
32 {
33         enum {
34                 OPT_SUBBLOCK = INT_MIN,
35                 OPT_X86,
36                 OPT_POWERPC,
37                 OPT_IA64,
38                 OPT_ARM,
39                 OPT_ARMTHUMB,
40                 OPT_SPARC,
41                 OPT_DELTA,
42                 OPT_LZMA1,
43                 OPT_LZMA2,
44
45                 OPT_FILES,
46                 OPT_FILES0,
47         };
48
49         static const char short_opts[] = "cC:defF:hHlkM:qrS:tT:vVz0123456789";
50
51         static const struct option long_opts[] = {
52                 // Operation mode
53                 { "compress",       no_argument,       NULL,  'z' },
54                 { "decompress",     no_argument,       NULL,  'd' },
55                 { "uncompress",     no_argument,       NULL,  'd' },
56                 { "test",           no_argument,       NULL,  't' },
57                 { "list",           no_argument,       NULL,  'l' },
58                 { "info",           no_argument,       NULL,  'l' },
59
60                 // Operation modifiers
61                 { "keep",           no_argument,       NULL,  'k' },
62                 { "force",          no_argument,       NULL,  'f' },
63                 { "stdout",         no_argument,       NULL,  'c' },
64                 { "to-stdout",      no_argument,       NULL,  'c' },
65                 { "suffix",         required_argument, NULL,  'S' },
66                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
67                 { "files",          optional_argument, NULL,  OPT_FILES },
68                 { "files0",         optional_argument, NULL,  OPT_FILES0 },
69
70                 // Basic compression settings
71                 { "format",         required_argument, NULL,  'F' },
72                 { "check",          required_argument, NULL,  'C' },
73                 { "memory",         required_argument, NULL,  'M' },
74                 { "threads",        required_argument, NULL,  'T' },
75
76                 { "extreme",        no_argument,       NULL,  'e' },
77                 { "fast",           no_argument,       NULL,  '0' },
78                 { "best",           no_argument,       NULL,  '9' },
79
80                 // Filters
81                 { "lzma1",          optional_argument, NULL,  OPT_LZMA1 },
82                 { "lzma2",          optional_argument, NULL,  OPT_LZMA2 },
83                 { "x86",            no_argument,       NULL,  OPT_X86 },
84                 { "bcj",            no_argument,       NULL,  OPT_X86 },
85                 { "powerpc",        no_argument,       NULL,  OPT_POWERPC },
86                 { "ppc",            no_argument,       NULL,  OPT_POWERPC },
87                 { "ia64",           no_argument,       NULL,  OPT_IA64 },
88                 { "itanium",        no_argument,       NULL,  OPT_IA64 },
89                 { "arm",            no_argument,       NULL,  OPT_ARM },
90                 { "armthumb",       no_argument,       NULL,  OPT_ARMTHUMB },
91                 { "sparc",          no_argument,       NULL,  OPT_SPARC },
92                 { "delta",          optional_argument, NULL,  OPT_DELTA },
93                 { "subblock",       optional_argument, NULL,  OPT_SUBBLOCK },
94
95                 // Other options
96                 { "quiet",          no_argument,       NULL,  'q' },
97                 { "verbose",        no_argument,       NULL,  'v' },
98                 { "help",           no_argument,       NULL,  'h' },
99                 { "long-help",      no_argument,       NULL,  'H' },
100                 { "version",        no_argument,       NULL,  'V' },
101
102                 { NULL,                 0,                 NULL,   0 }
103         };
104
105         int c;
106
107         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
108                         != -1) {
109                 switch (c) {
110                 // Compression preset (also for decompression if --format=raw)
111                 case '0': case '1': case '2': case '3': case '4':
112                 case '5': case '6': case '7': case '8': case '9':
113                         coder_set_preset(c - '0');
114                         break;
115
116                 // --memory
117                 case 'M': {
118                         // Support specifying the limit as a percentage of
119                         // installed physical RAM.
120                         size_t len = strlen(optarg);
121                         if (len > 0 && optarg[len - 1] == '%') {
122                                 optarg[len - 1] = '\0';
123                                 hardware_memlimit_set_percentage(
124                                                 str_to_uint64(
125                                                 "memory%", optarg, 1, 100));
126                         } else {
127                                 // On 32-bit systems, SIZE_MAX would make more
128                                 // sense than UINT64_MAX. But use UINT64_MAX
129                                 // still so that scripts that assume > 4 GiB
130                                 // values don't break.
131                                 hardware_memlimit_set(str_to_uint64(
132                                                 "memory", optarg,
133                                                 0, UINT64_MAX));
134                         }
135
136                         break;
137                 }
138
139                 // --suffix
140                 case 'S':
141                         suffix_set(optarg);
142                         break;
143
144                 case 'T':
145                         hardware_threadlimit_set(str_to_uint64(
146                                         "threads", optarg, 0, UINT32_MAX));
147                         break;
148
149                 // --version
150                 case 'V':
151                         // This doesn't return.
152                         message_version();
153
154                 // --stdout
155                 case 'c':
156                         opt_stdout = true;
157                         break;
158
159                 // --decompress
160                 case 'd':
161                         opt_mode = MODE_DECOMPRESS;
162                         break;
163
164                 // --extreme
165                 case 'e':
166                         coder_set_extreme();
167                         break;
168
169                 // --force
170                 case 'f':
171                         opt_force = true;
172                         break;
173
174                 // --help
175                 case 'h':
176                         // This doesn't return.
177                         message_help(false);
178
179                 // --long-help
180                 case 'H':
181                         // This doesn't return.
182                         message_help(true);
183
184                 // --list
185                 case 'l':
186                         opt_mode = MODE_LIST;
187                         break;
188
189                 // --keep
190                 case 'k':
191                         opt_keep_original = true;
192                         break;
193
194                 // --quiet
195                 case 'q':
196                         message_verbosity_decrease();
197                         break;
198
199                 case 't':
200                         opt_mode = MODE_TEST;
201                         break;
202
203                 // --verbose
204                 case 'v':
205                         message_verbosity_increase();
206                         break;
207
208                 case 'z':
209                         opt_mode = MODE_COMPRESS;
210                         break;
211
212                 // Filter setup
213
214                 case OPT_SUBBLOCK:
215                         coder_add_filter(LZMA_FILTER_SUBBLOCK,
216                                         options_subblock(optarg));
217                         break;
218
219                 case OPT_X86:
220                         coder_add_filter(LZMA_FILTER_X86, NULL);
221                         break;
222
223                 case OPT_POWERPC:
224                         coder_add_filter(LZMA_FILTER_POWERPC, NULL);
225                         break;
226
227                 case OPT_IA64:
228                         coder_add_filter(LZMA_FILTER_IA64, NULL);
229                         break;
230
231                 case OPT_ARM:
232                         coder_add_filter(LZMA_FILTER_ARM, NULL);
233                         break;
234
235                 case OPT_ARMTHUMB:
236                         coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL);
237                         break;
238
239                 case OPT_SPARC:
240                         coder_add_filter(LZMA_FILTER_SPARC, NULL);
241                         break;
242
243                 case OPT_DELTA:
244                         coder_add_filter(LZMA_FILTER_DELTA,
245                                         options_delta(optarg));
246                         break;
247
248                 case OPT_LZMA1:
249                         coder_add_filter(LZMA_FILTER_LZMA1,
250                                         options_lzma(optarg));
251                         break;
252
253                 case OPT_LZMA2:
254                         coder_add_filter(LZMA_FILTER_LZMA2,
255                                         options_lzma(optarg));
256                         break;
257
258                 // Other
259
260                 // --format
261                 case 'F': {
262                         // Just in case, support both "lzma" and "alone" since
263                         // the latter was used for forward compatibility in
264                         // LZMA Utils 4.32.x.
265                         static const struct {
266                                 char str[8];
267                                 enum format_type format;
268                         } types[] = {
269                                 { "auto",   FORMAT_AUTO },
270                                 { "xz",     FORMAT_XZ },
271                                 { "lzma",   FORMAT_LZMA },
272                                 { "alone",  FORMAT_LZMA },
273                                 // { "gzip",   FORMAT_GZIP },
274                                 // { "gz",     FORMAT_GZIP },
275                                 { "raw",    FORMAT_RAW },
276                         };
277
278                         size_t i = 0;
279                         while (strcmp(types[i].str, optarg) != 0)
280                                 if (++i == ARRAY_SIZE(types))
281                                         message_fatal(_("%s: Unknown file "
282                                                         "format type"),
283                                                         optarg);
284
285                         opt_format = types[i].format;
286                         break;
287                 }
288
289                 // --check
290                 case 'C': {
291                         static const struct {
292                                 char str[8];
293                                 lzma_check check;
294                         } types[] = {
295                                 { "none",   LZMA_CHECK_NONE },
296                                 { "crc32",  LZMA_CHECK_CRC32 },
297                                 { "crc64",  LZMA_CHECK_CRC64 },
298                                 { "sha256", LZMA_CHECK_SHA256 },
299                         };
300
301                         size_t i = 0;
302                         while (strcmp(types[i].str, optarg) != 0) {
303                                 if (++i == ARRAY_SIZE(types))
304                                         message_fatal(_("%s: Unsupported "
305                                                         "integrity "
306                                                         "check type"), optarg);
307                         }
308
309                         // Use a separate check in case we are using different
310                         // liblzma than what was used to compile us.
311                         if (!lzma_check_is_supported(types[i].check))
312                                 message_fatal(_("%s: Unsupported integrity "
313                                                 "check type"), optarg);
314
315                         coder_set_check(types[i].check);
316                         break;
317                 }
318
319                 case OPT_FILES:
320                         args->files_delim = '\n';
321
322                 // Fall through
323
324                 case OPT_FILES0:
325                         if (args->files_name != NULL)
326                                 message_fatal(_("Only one file can be "
327                                                 "specified with `--files'"
328                                                 "or `--files0'."));
329
330                         if (optarg == NULL) {
331                                 args->files_name = (char *)stdin_filename;
332                                 args->files_file = stdin;
333                         } else {
334                                 args->files_name = optarg;
335                                 args->files_file = fopen(optarg,
336                                                 c == OPT_FILES ? "r" : "rb");
337                                 if (args->files_file == NULL)
338                                         message_fatal("%s: %s", optarg,
339                                                         strerror(errno));
340                         }
341
342                         break;
343
344                 default:
345                         message_try_help();
346                         my_exit(E_ERROR);
347                 }
348         }
349
350         return;
351 }
352
353
354 static void
355 parse_environment(args_info *args, char *argv0)
356 {
357         char *env = getenv("XZ_OPT");
358         if (env == NULL)
359                 return;
360
361         // We modify the string, so make a copy of it.
362         env = xstrdup(env);
363
364         // Calculate the number of arguments in env. argc stats at one
365         // to include space for the program name.
366         int argc = 1;
367         bool prev_was_space = true;
368         for (size_t i = 0; env[i] != '\0'; ++i) {
369                 if (isspace(env[i])) {
370                         prev_was_space = true;
371                 } else if (prev_was_space) {
372                         prev_was_space = false;
373
374                         // Keep argc small enough to fit into a singed int
375                         // and to keep it usable for memory allocation.
376                         if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
377                                 message_fatal(_("The environment variable "
378                                                 "XZ_OPT contains too many "
379                                                 "arguments"));
380                 }
381         }
382
383         // Allocate memory to hold pointers to the arguments. Add one to get
384         // space for the terminating NULL (if some systems happen to need it).
385         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
386         argv[0] = argv0;
387         argv[argc] = NULL;
388
389         // Go through the string again. Split the arguments using '\0'
390         // characters and add pointers to the resulting strings to argv.
391         argc = 1;
392         prev_was_space = true;
393         for (size_t i = 0; env[i] != '\0'; ++i) {
394                 if (isspace(env[i])) {
395                         prev_was_space = true;
396                         env[i] = '\0';
397                 } else if (prev_was_space) {
398                         prev_was_space = false;
399                         argv[argc++] = env + i;
400                 }
401         }
402
403         // Parse the argument list we got from the environment. All non-option
404         // arguments i.e. filenames are ignored.
405         parse_real(args, argc, argv);
406
407         // Reset the state of the getopt_long() so that we can parse the
408         // command line options too. There are two incompatible ways to
409         // do it.
410 #ifdef HAVE_OPTRESET
411         // BSD
412         optind = 1;
413         optreset = 1;
414 #else
415         // GNU, Solaris
416         optind = 0;
417 #endif
418
419         // We don't need the argument list from environment anymore.
420         free(argv);
421         free(env);
422
423         return;
424 }
425
426
427 extern void
428 args_parse(args_info *args, int argc, char **argv)
429 {
430         // Initialize those parts of *args that we need later.
431         args->files_name = NULL;
432         args->files_file = NULL;
433         args->files_delim = '\0';
434
435         // Type of the file format to use when --format=auto or no --format
436         // was specified.
437         enum format_type format_compress_auto = FORMAT_XZ;
438
439         // Check how we were called.
440         {
441 #ifdef DOSLIKE
442                 // We adjusted argv[0] in the beginning of main() so we don't
443                 // need to do anything here.
444                 const char *name = argv[0];
445 #else
446                 // Remove the leading path name, if any.
447                 const char *name = strrchr(argv[0], '/');
448                 if (name == NULL)
449                         name = argv[0];
450                 else
451                         ++name;
452 #endif
453
454                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
455                 // is weird, but it doesn't matter here.
456
457                 // The default file format is .lzma if the command name
458                 // contains "lz".
459                 if (strstr(name, "lz") != NULL)
460                         format_compress_auto = FORMAT_LZMA;
461
462                 // Operation mode
463                 if (strstr(name, "cat") != NULL) {
464                         // Imply --decompress --stdout
465                         opt_mode = MODE_DECOMPRESS;
466                         opt_stdout = true;
467                 } else if (strstr(name, "un") != NULL) {
468                         // Imply --decompress
469                         opt_mode = MODE_DECOMPRESS;
470                 }
471         }
472
473         // First the flags from environment
474         parse_environment(args, argv[0]);
475
476         // Then from the command line
477         optind = 1;
478         parse_real(args, argc, argv);
479
480         // Never remove the source file when the destination is not on disk.
481         // In test mode the data is written nowhere, but setting opt_stdout
482         // will make the rest of the code behave well.
483         if (opt_stdout || opt_mode == MODE_TEST) {
484                 opt_keep_original = true;
485                 opt_stdout = true;
486         }
487
488         // If no --format flag was used, or it was --format=auto, we need to
489         // decide what is the target file format we are going to use. This
490         // depends on how we were called (checked earlier in this function).
491         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
492                 opt_format = format_compress_auto;
493
494         // Compression settings need to be validated (options themselves and
495         // their memory usage) when compressing to any file format. It has to
496         // be done also when uncompressing raw data, since for raw decoding
497         // the options given on the command line are used to know what kind
498         // of raw data we are supposed to decode.
499         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
500                 coder_set_compression_settings();
501
502         // If no filenames are given, use stdin.
503         if (argv[optind] == NULL && args->files_name == NULL) {
504                 // We don't modify or free() the "-" constant. The caller
505                 // modifies this so don't make the struct itself const.
506                 static char *names_stdin[2] = { (char *)"-", NULL };
507                 args->arg_names = names_stdin;
508                 args->arg_count = 1;
509         } else {
510                 // We got at least one filename from the command line, or
511                 // --files or --files0 was specified.
512                 args->arg_names = argv + optind;
513                 args->arg_count = argc - optind;
514         }
515
516         return;
517 }