]> icculus.org git repositories - icculus/xz.git/blob - src/xzdec/xzdec.c
Consistently round up the memory usage limit in messages.
[icculus/xz.git] / src / xzdec / xzdec.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "sysdefs.h"
14 #include "lzma.h"
15
16 #include <stdarg.h>
17 #include <errno.h>
18 #include <stdio.h>
19 #include <unistd.h>
20
21 #include "getopt.h"
22 #include "tuklib_progname.h"
23 #include "tuklib_exit.h"
24
25 #ifdef TUKLIB_DOSLIKE
26 #       include <fcntl.h>
27 #       include <io.h>
28 #endif
29
30
31 #ifdef LZMADEC
32 #       define TOOL_FORMAT "lzma"
33 #else
34 #       define TOOL_FORMAT "xz"
35 #endif
36
37
38 /// Number of bytes to use memory at maximum
39 static uint64_t memlimit;
40
41 /// Total amount of physical RAM
42 static uint64_t total_ram;
43
44 /// Error messages are suppressed if this is zero, which is the case when
45 /// --quiet has been given at least twice.
46 static unsigned int display_errors = 2;
47
48
49 static void lzma_attribute((format(printf, 1, 2)))
50 my_errorf(const char *fmt, ...)
51 {
52         va_list ap;
53         va_start(ap, fmt);
54
55         if (display_errors) {
56                 fprintf(stderr, "%s: ", progname);
57                 vfprintf(stderr, fmt, ap);
58                 fprintf(stderr, "\n");
59         }
60
61         va_end(ap);
62         return;
63 }
64
65
66 static void lzma_attribute((noreturn))
67 help(void)
68 {
69         // Round up to the next MiB and do it correctly also with UINT64_MAX.
70         const uint64_t mem_mib = (memlimit >> 20)
71                         + ((memlimit & ((UINT32_C(1) << 20) - 1)) != 0);
72
73         printf(
74 "Usage: %s [OPTION]... [FILE]...\n"
75 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
76 "\n"
77 "  -c, --stdout       (ignored)\n"
78 "  -d, --decompress   (ignored)\n"
79 "  -k, --keep         (ignored)\n"
80 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default)\n"
81 "  -q, --quiet        specify *twice* to suppress errors\n"
82 "  -Q, --no-warn      (ignored)\n"
83 "  -h, --help         display this help and exit\n"
84 "  -V, --version      display the version number and exit\n"
85 "\n"
86 "With no FILE, or when FILE is -, read standard input.\n"
87 "\n"
88 "On this system and configuration, this program will use a maximum of roughly\n"
89 "%" PRIu64 " MiB RAM.\n"
90 "\n"
91 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
92 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname, mem_mib);
93         tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
94 }
95
96
97 static void lzma_attribute((noreturn))
98 version(void)
99 {
100         printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
101                         "liblzma %s\n", lzma_version_string());
102
103         tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
104 }
105
106
107 /// Find out the amount of physical memory (RAM) in the system, and set
108 /// the memory usage limit to the given percentage of RAM.
109 static void
110 memlimit_set_percentage(uint32_t percentage)
111 {
112         memlimit = percentage * total_ram / 100;
113         return;
114 }
115
116
117 /// Set the memory usage limit to give number of bytes. Zero is a special
118 /// value to indicate the default limit.
119 static void
120 memlimit_set(uint64_t new_memlimit)
121 {
122         if (new_memlimit != 0) {
123                 memlimit = new_memlimit;
124         } else {
125                 memlimit = 40 * total_ram / 100;
126                 if (memlimit < UINT64_C(80) * 1024 * 1024) {
127                         memlimit = 80 * total_ram / 100;
128                         if (memlimit > UINT64_C(80) * 1024 * 1024)
129                                 memlimit = UINT64_C(80) * 1024 * 1024;
130                 }
131         }
132
133         return;
134 }
135
136
137 /// Get the total amount of physical RAM and set the memory usage limit
138 /// to the default value.
139 static void
140 memlimit_init(void)
141 {
142         // If we cannot determine the amount of RAM, use the assumption
143         // defined by the configure script.
144         total_ram = lzma_physmem();
145         if (total_ram == 0)
146                 total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
147
148         memlimit_set(0);
149         return;
150 }
151
152
153 /// \brief      Convert a string to uint64_t
154 ///
155 /// This is rudely copied from src/xz/util.c and modified a little. :-(
156 ///
157 /// \param      max     Return value when the string "max" was specified.
158 ///
159 static uint64_t
160 str_to_uint64(const char *value, uint64_t max)
161 {
162         uint64_t result = 0;
163
164         // Accept special value "max".
165         if (strcmp(value, "max") == 0)
166                 return max;
167
168         if (*value < '0' || *value > '9') {
169                 my_errorf("%s: Value is not a non-negative decimal integer",
170                                 value);
171                 exit(EXIT_FAILURE);
172         }
173
174         do {
175                 // Don't overflow.
176                 if (result > (UINT64_MAX - 9) / 10)
177                         return UINT64_MAX;
178
179                 result *= 10;
180                 result += *value - '0';
181                 ++value;
182         } while (*value >= '0' && *value <= '9');
183
184         if (*value != '\0') {
185                 // Look for suffix.
186                 static const struct {
187                         const char name[4];
188                         uint32_t multiplier;
189                 } suffixes[] = {
190                         { "k",   1000 },
191                         { "kB",  1000 },
192                         { "M",   1000000 },
193                         { "MB",  1000000 },
194                         { "G",   1000000000 },
195                         { "GB",  1000000000 },
196                         { "Ki",  1024 },
197                         { "KiB", 1024 },
198                         { "Mi",  1048576 },
199                         { "MiB", 1048576 },
200                         { "Gi",  1073741824 },
201                         { "GiB", 1073741824 }
202                 };
203
204                 uint32_t multiplier = 0;
205                 for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
206                         if (strcmp(value, suffixes[i].name) == 0) {
207                                 multiplier = suffixes[i].multiplier;
208                                 break;
209                         }
210                 }
211
212                 if (multiplier == 0) {
213                         my_errorf("%s: Invalid suffix", value);
214                         exit(EXIT_FAILURE);
215                 }
216
217                 // Don't overflow here either.
218                 if (result > UINT64_MAX / multiplier)
219                         result = UINT64_MAX;
220                 else
221                         result *= multiplier;
222         }
223
224         return result;
225 }
226
227
228 /// Parses command line options.
229 static void
230 parse_options(int argc, char **argv)
231 {
232         static const char short_opts[] = "cdkM:hqQV";
233         static const struct option long_opts[] = {
234                 { "stdout",       no_argument,         NULL, 'c' },
235                 { "to-stdout",    no_argument,         NULL, 'c' },
236                 { "decompress",   no_argument,         NULL, 'd' },
237                 { "uncompress",   no_argument,         NULL, 'd' },
238                 { "keep",         no_argument,         NULL, 'k' },
239                 { "memory",       required_argument,   NULL, 'M' },
240                 { "quiet",        no_argument,         NULL, 'q' },
241                 { "no-warn",      no_argument,         NULL, 'Q' },
242                 { "help",         no_argument,         NULL, 'h' },
243                 { "version",      no_argument,         NULL, 'V' },
244                 { NULL,           0,                   NULL, 0   }
245         };
246
247         int c;
248
249         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
250                         != -1) {
251                 switch (c) {
252                 case 'c':
253                 case 'd':
254                 case 'k':
255                 case 'Q':
256                         break;
257
258                 case 'M': {
259                         // Support specifying the limit as a percentage of
260                         // installed physical RAM.
261                         const size_t len = strlen(optarg);
262                         if (len > 0 && optarg[len - 1] == '%') {
263                                 // Memory limit is a percentage of total
264                                 // installed RAM.
265                                 optarg[len - 1] = '\0';
266                                 const uint64_t percentage
267                                                 = str_to_uint64(optarg, 100);
268                                 if (percentage < 1 || percentage > 100) {
269                                         my_errorf("Percentage must be in "
270                                                         "the range [1, 100]");
271                                         exit(EXIT_FAILURE);
272                                 }
273
274                                 memlimit_set_percentage(percentage);
275                         } else {
276                                 memlimit_set(str_to_uint64(
277                                                 optarg, UINT64_MAX));
278                         }
279
280                         break;
281                 }
282
283                 case 'q':
284                         if (display_errors > 0)
285                                 --display_errors;
286
287                         break;
288
289                 case 'h':
290                         help();
291
292                 case 'V':
293                         version();
294
295                 default:
296                         exit(EXIT_FAILURE);
297                 }
298         }
299
300         return;
301 }
302
303
304 static void
305 uncompress(lzma_stream *strm, FILE *file, const char *filename)
306 {
307         lzma_ret ret;
308
309         // Initialize the decoder
310 #ifdef LZMADEC
311         ret = lzma_alone_decoder(strm, memlimit);
312 #else
313         ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
314 #endif
315
316         // The only reasonable error here is LZMA_MEM_ERROR.
317         // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
318         if (ret != LZMA_OK) {
319                 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
320                                 : "Internal error (bug)");
321                 exit(EXIT_FAILURE);
322         }
323
324         // Input and output buffers
325         uint8_t in_buf[BUFSIZ];
326         uint8_t out_buf[BUFSIZ];
327
328         strm->avail_in = 0;
329         strm->next_out = out_buf;
330         strm->avail_out = BUFSIZ;
331
332         lzma_action action = LZMA_RUN;
333
334         while (true) {
335                 if (strm->avail_in == 0) {
336                         strm->next_in = in_buf;
337                         strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
338
339                         if (ferror(file)) {
340                                 // POSIX says that fread() sets errno if
341                                 // an error occurred. ferror() doesn't
342                                 // touch errno.
343                                 my_errorf("%s: Error reading input file: %s",
344                                                 filename, strerror(errno));
345                                 exit(EXIT_FAILURE);
346                         }
347
348 #ifndef LZMADEC
349                         // When using LZMA_CONCATENATED, we need to tell
350                         // liblzma when it has got all the input.
351                         if (feof(file))
352                                 action = LZMA_FINISH;
353 #endif
354                 }
355
356                 ret = lzma_code(strm, action);
357
358                 // Write and check write error before checking decoder error.
359                 // This way as much data as possible gets written to output
360                 // even if decoder detected an error.
361                 if (strm->avail_out == 0 || ret != LZMA_OK) {
362                         const size_t write_size = BUFSIZ - strm->avail_out;
363
364                         if (fwrite(out_buf, 1, write_size, stdout)
365                                         != write_size) {
366                                 // Wouldn't be a surprise if writing to stderr
367                                 // would fail too but at least try to show an
368                                 // error message.
369                                 my_errorf("Cannot write to standard output: "
370                                                 "%s", strerror(errno));
371                                 exit(EXIT_FAILURE);
372                         }
373
374                         strm->next_out = out_buf;
375                         strm->avail_out = BUFSIZ;
376                 }
377
378                 if (ret != LZMA_OK) {
379                         if (ret == LZMA_STREAM_END) {
380 #ifdef LZMADEC
381                                 // Check that there's no trailing garbage.
382                                 if (strm->avail_in != 0
383                                                 || fread(in_buf, 1, 1, file)
384                                                         != 0
385                                                 || !feof(file))
386                                         ret = LZMA_DATA_ERROR;
387                                 else
388                                         return;
389 #else
390                                 // lzma_stream_decoder() already guarantees
391                                 // that there's no trailing garbage.
392                                 assert(strm->avail_in == 0);
393                                 assert(action == LZMA_FINISH);
394                                 assert(feof(file));
395                                 return;
396 #endif
397                         }
398
399                         const char *msg;
400                         switch (ret) {
401                         case LZMA_MEM_ERROR:
402                                 msg = strerror(ENOMEM);
403                                 break;
404
405                         case LZMA_MEMLIMIT_ERROR:
406                                 msg = "Memory usage limit reached";
407                                 break;
408
409                         case LZMA_FORMAT_ERROR:
410                                 msg = "File format not recognized";
411                                 break;
412
413                         case LZMA_OPTIONS_ERROR:
414                                 // FIXME: Better message?
415                                 msg = "Unsupported compression options";
416                                 break;
417
418                         case LZMA_DATA_ERROR:
419                                 msg = "File is corrupt";
420                                 break;
421
422                         case LZMA_BUF_ERROR:
423                                 msg = "Unexpected end of input";
424                                 break;
425
426                         default:
427                                 msg = "Internal error (bug)";
428                                 break;
429                         }
430
431                         my_errorf("%s: %s", filename, msg);
432                         exit(EXIT_FAILURE);
433                 }
434         }
435 }
436
437
438 int
439 main(int argc, char **argv)
440 {
441         // Initialize progname which we will be used in error messages.
442         tuklib_progname_init(argv);
443
444         // Set the default memory usage limit. This is needed before parsing
445         // the command line arguments.
446         memlimit_init();
447
448         // Parse the command line options.
449         parse_options(argc, argv);
450
451         // The same lzma_stream is used for all files that we decode. This way
452         // we don't need to reallocate memory for every file if they use same
453         // compression settings.
454         lzma_stream strm = LZMA_STREAM_INIT;
455
456         // Some systems require setting stdin and stdout to binary mode.
457 #ifdef TUKLIB_DOSLIKE
458         setmode(fileno(stdin), O_BINARY);
459         setmode(fileno(stdout), O_BINARY);
460 #endif
461
462         if (optind == argc) {
463                 // No filenames given, decode from stdin.
464                 uncompress(&strm, stdin, "(stdin)");
465         } else {
466                 // Loop through the filenames given on the command line.
467                 do {
468                         // "-" indicates stdin.
469                         if (strcmp(argv[optind], "-") == 0) {
470                                 uncompress(&strm, stdin, "(stdin)");
471                         } else {
472                                 FILE *file = fopen(argv[optind], "rb");
473                                 if (file == NULL) {
474                                         my_errorf("%s: %s", argv[optind],
475                                                         strerror(errno));
476                                         exit(EXIT_FAILURE);
477                                 }
478
479                                 uncompress(&strm, file, argv[optind]);
480                                 fclose(file);
481                         }
482                 } while (++optind < argc);
483         }
484
485 #ifndef NDEBUG
486         // Free the memory only when debugging. Freeing wastes some time,
487         // but allows detecting possible memory leaks with Valgrind.
488         lzma_end(&strm);
489 #endif
490
491         tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
492 }