]> icculus.org git repositories - icculus/xz.git/blob - src/xzdec/xzdec.c
Add support for --enable-assume-ram=SIZE.
[icculus/xz.git] / src / xzdec / xzdec.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "sysdefs.h"
14 #include "lzma.h"
15
16 #include <stdarg.h>
17 #include <errno.h>
18 #include <stdio.h>
19 #include <unistd.h>
20
21 #include "getopt.h"
22 #include "tuklib_progname.h"
23 #include "tuklib_exit.h"
24 #include "tuklib_physmem.h"
25
26 #ifdef TUKLIB_DOSLIKE
27 #       include <fcntl.h>
28 #       include <io.h>
29 #endif
30
31
32 #ifdef LZMADEC
33 #       define TOOL_FORMAT "lzma"
34 #else
35 #       define TOOL_FORMAT "xz"
36 #endif
37
38
39 /// Number of bytes to use memory at maximum
40 static uint64_t memlimit;
41
42 /// Error messages are suppressed if this is zero, which is the case when
43 /// --quiet has been given at least twice.
44 static unsigned int display_errors = 2;
45
46
47 static void lzma_attribute((format(printf, 1, 2)))
48 my_errorf(const char *fmt, ...)
49 {
50         va_list ap;
51         va_start(ap, fmt);
52
53         if (display_errors) {
54                 fprintf(stderr, "%s: ", progname);
55                 vfprintf(stderr, fmt, ap);
56                 fprintf(stderr, "\n");
57         }
58
59         va_end(ap);
60         return;
61 }
62
63
64 static void lzma_attribute((noreturn))
65 help(void)
66 {
67         printf(
68 "Usage: %s [OPTION]... [FILE]...\n"
69 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
70 "\n"
71 "  -c, --stdout       (ignored)\n"
72 "  -d, --decompress   (ignored)\n"
73 "  -k, --keep         (ignored)\n"
74 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default)\n"
75 "  -q, --quiet        specify *twice* to suppress errors\n"
76 "  -Q, --no-warn      (ignored)\n"
77 "  -h, --help         display this help and exit\n"
78 "  -V, --version      display the version number and exit\n"
79 "\n"
80 "With no FILE, or when FILE is -, read standard input.\n"
81 "\n"
82 "On this system and configuration, this program will use a maximum of roughly\n"
83 "%" PRIu64 " MiB RAM.\n"
84 "\n"
85 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
86 PACKAGE_NAME " home page: <" PACKAGE_HOMEPAGE ">\n",
87                 progname, memlimit / (1024 * 1024));
88         tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
89 }
90
91
92 static void lzma_attribute((noreturn))
93 version(void)
94 {
95         printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
96                         "liblzma %s\n", lzma_version_string());
97
98         tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
99 }
100
101
102 /// Find out the amount of physical memory (RAM) in the system, and set
103 /// the memory usage limit to the given percentage of RAM.
104 static void
105 memlimit_set_percentage(uint32_t percentage)
106 {
107         uint64_t mem = tuklib_physmem();
108
109         // If we cannot determine the amount of RAM, use the assumption
110         // set by the configure script.
111         if (mem == 0)
112                 mem = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
113
114         memlimit = percentage * mem / 100;
115         return;
116 }
117
118
119 /// Set the memory usage limit to give number of bytes. Zero is a special
120 /// value to indicate the default limit.
121 static void
122 memlimit_set(uint64_t new_memlimit)
123 {
124         if (new_memlimit == 0)
125                 memlimit_set_percentage(40);
126         else
127                 memlimit = new_memlimit;
128
129         return;
130 }
131
132
133 /// \brief      Convert a string to uint64_t
134 ///
135 /// This is rudely copied from src/xz/util.c and modified a little. :-(
136 ///
137 /// \param      max     Return value when the string "max" was specified.
138 ///
139 static uint64_t
140 str_to_uint64(const char *value, uint64_t max)
141 {
142         uint64_t result = 0;
143
144         // Accept special value "max".
145         if (strcmp(value, "max") == 0)
146                 return max;
147
148         if (*value < '0' || *value > '9') {
149                 my_errorf("%s: Value is not a non-negative decimal integer",
150                                 value);
151                 exit(EXIT_FAILURE);
152         }
153
154         do {
155                 // Don't overflow.
156                 if (result > (UINT64_MAX - 9) / 10)
157                         return UINT64_MAX;
158
159                 result *= 10;
160                 result += *value - '0';
161                 ++value;
162         } while (*value >= '0' && *value <= '9');
163
164         if (*value != '\0') {
165                 // Look for suffix.
166                 static const struct {
167                         const char name[4];
168                         uint32_t multiplier;
169                 } suffixes[] = {
170                         { "k",   1000 },
171                         { "kB",  1000 },
172                         { "M",   1000000 },
173                         { "MB",  1000000 },
174                         { "G",   1000000000 },
175                         { "GB",  1000000000 },
176                         { "Ki",  1024 },
177                         { "KiB", 1024 },
178                         { "Mi",  1048576 },
179                         { "MiB", 1048576 },
180                         { "Gi",  1073741824 },
181                         { "GiB", 1073741824 }
182                 };
183
184                 uint32_t multiplier = 0;
185                 for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
186                         if (strcmp(value, suffixes[i].name) == 0) {
187                                 multiplier = suffixes[i].multiplier;
188                                 break;
189                         }
190                 }
191
192                 if (multiplier == 0) {
193                         my_errorf("%s: Invalid suffix", value);
194                         exit(EXIT_FAILURE);
195                 }
196
197                 // Don't overflow here either.
198                 if (result > UINT64_MAX / multiplier)
199                         result = UINT64_MAX;
200                 else
201                         result *= multiplier;
202         }
203
204         return result;
205 }
206
207
208 /// Parses command line options.
209 static void
210 parse_options(int argc, char **argv)
211 {
212         static const char short_opts[] = "cdkM:hqQV";
213         static const struct option long_opts[] = {
214                 { "stdout",       no_argument,         NULL, 'c' },
215                 { "to-stdout",    no_argument,         NULL, 'c' },
216                 { "decompress",   no_argument,         NULL, 'd' },
217                 { "uncompress",   no_argument,         NULL, 'd' },
218                 { "keep",         no_argument,         NULL, 'k' },
219                 { "memory",       required_argument,   NULL, 'M' },
220                 { "quiet",        no_argument,         NULL, 'q' },
221                 { "no-warn",      no_argument,         NULL, 'Q' },
222                 { "help",         no_argument,         NULL, 'h' },
223                 { "version",      no_argument,         NULL, 'V' },
224                 { NULL,           0,                   NULL, 0   }
225         };
226
227         int c;
228
229         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
230                         != -1) {
231                 switch (c) {
232                 case 'c':
233                 case 'd':
234                 case 'k':
235                 case 'Q':
236                         break;
237
238                 case 'M': {
239                         // Support specifying the limit as a percentage of
240                         // installed physical RAM.
241                         const size_t len = strlen(optarg);
242                         if (len > 0 && optarg[len - 1] == '%') {
243                                 // Memory limit is a percentage of total
244                                 // installed RAM.
245                                 optarg[len - 1] = '\0';
246                                 const uint64_t percentage
247                                                 = str_to_uint64(optarg, 100);
248                                 if (percentage < 1 || percentage > 100) {
249                                         my_errorf("Percentage must be in "
250                                                         "the range [1, 100]");
251                                         exit(EXIT_FAILURE);
252                                 }
253
254                                 memlimit_set_percentage(percentage);
255                         } else {
256                                 memlimit_set(str_to_uint64(
257                                                 optarg, UINT64_MAX));
258                         }
259
260                         break;
261                 }
262
263                 case 'q':
264                         if (display_errors > 0)
265                                 --display_errors;
266
267                         break;
268
269                 case 'h':
270                         help();
271
272                 case 'V':
273                         version();
274
275                 default:
276                         exit(EXIT_FAILURE);
277                 }
278         }
279
280         return;
281 }
282
283
284 static void
285 uncompress(lzma_stream *strm, FILE *file, const char *filename)
286 {
287         lzma_ret ret;
288
289         // Initialize the decoder
290 #ifdef LZMADEC
291         ret = lzma_alone_decoder(strm, memlimit);
292 #else
293         ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
294 #endif
295
296         // The only reasonable error here is LZMA_MEM_ERROR.
297         // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
298         if (ret != LZMA_OK) {
299                 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
300                                 : "Internal error (bug)");
301                 exit(EXIT_FAILURE);
302         }
303
304         // Input and output buffers
305         uint8_t in_buf[BUFSIZ];
306         uint8_t out_buf[BUFSIZ];
307
308         strm->avail_in = 0;
309         strm->next_out = out_buf;
310         strm->avail_out = BUFSIZ;
311
312         lzma_action action = LZMA_RUN;
313
314         while (true) {
315                 if (strm->avail_in == 0) {
316                         strm->next_in = in_buf;
317                         strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
318
319                         if (ferror(file)) {
320                                 // POSIX says that fread() sets errno if
321                                 // an error occurred. ferror() doesn't
322                                 // touch errno.
323                                 my_errorf("%s: Error reading input file: %s",
324                                                 filename, strerror(errno));
325                                 exit(EXIT_FAILURE);
326                         }
327
328 #ifndef LZMADEC
329                         // When using LZMA_CONCATENATED, we need to tell
330                         // liblzma when it has got all the input.
331                         if (feof(file))
332                                 action = LZMA_FINISH;
333 #endif
334                 }
335
336                 ret = lzma_code(strm, action);
337
338                 // Write and check write error before checking decoder error.
339                 // This way as much data as possible gets written to output
340                 // even if decoder detected an error.
341                 if (strm->avail_out == 0 || ret != LZMA_OK) {
342                         const size_t write_size = BUFSIZ - strm->avail_out;
343
344                         if (fwrite(out_buf, 1, write_size, stdout)
345                                         != write_size) {
346                                 // Wouldn't be a surprise if writing to stderr
347                                 // would fail too but at least try to show an
348                                 // error message.
349                                 my_errorf("Cannot write to standard output: "
350                                                 "%s", strerror(errno));
351                                 exit(EXIT_FAILURE);
352                         }
353
354                         strm->next_out = out_buf;
355                         strm->avail_out = BUFSIZ;
356                 }
357
358                 if (ret != LZMA_OK) {
359                         if (ret == LZMA_STREAM_END) {
360 #ifdef LZMADEC
361                                 // Check that there's no trailing garbage.
362                                 if (strm->avail_in != 0
363                                                 || fread(in_buf, 1, 1, file)
364                                                         != 0
365                                                 || !feof(file))
366                                         ret = LZMA_DATA_ERROR;
367                                 else
368                                         return;
369 #else
370                                 // lzma_stream_decoder() already guarantees
371                                 // that there's no trailing garbage.
372                                 assert(strm->avail_in == 0);
373                                 assert(action == LZMA_FINISH);
374                                 assert(feof(file));
375                                 return;
376 #endif
377                         }
378
379                         const char *msg;
380                         switch (ret) {
381                         case LZMA_MEM_ERROR:
382                                 msg = strerror(ENOMEM);
383                                 break;
384
385                         case LZMA_MEMLIMIT_ERROR:
386                                 msg = "Memory usage limit reached";
387                                 break;
388
389                         case LZMA_FORMAT_ERROR:
390                                 msg = "File format not recognized";
391                                 break;
392
393                         case LZMA_OPTIONS_ERROR:
394                                 // FIXME: Better message?
395                                 msg = "Unsupported compression options";
396                                 break;
397
398                         case LZMA_DATA_ERROR:
399                                 msg = "File is corrupt";
400                                 break;
401
402                         case LZMA_BUF_ERROR:
403                                 msg = "Unexpected end of input";
404                                 break;
405
406                         default:
407                                 msg = "Internal error (bug)";
408                                 break;
409                         }
410
411                         my_errorf("%s: %s", filename, msg);
412                         exit(EXIT_FAILURE);
413                 }
414         }
415 }
416
417
418 int
419 main(int argc, char **argv)
420 {
421         // Initialize progname which we will be used in error messages.
422         tuklib_progname_init(argv);
423
424         // Set the default memory usage limit. This is needed before parsing
425         // the command line arguments.
426         memlimit_set(0);
427
428         // Parse the command line options.
429         parse_options(argc, argv);
430
431         // The same lzma_stream is used for all files that we decode. This way
432         // we don't need to reallocate memory for every file if they use same
433         // compression settings.
434         lzma_stream strm = LZMA_STREAM_INIT;
435
436         // Some systems require setting stdin and stdout to binary mode.
437 #ifdef TUKLIB_DOSLIKE
438         setmode(fileno(stdin), O_BINARY);
439         setmode(fileno(stdout), O_BINARY);
440 #endif
441
442         if (optind == argc) {
443                 // No filenames given, decode from stdin.
444                 uncompress(&strm, stdin, "(stdin)");
445         } else {
446                 // Loop through the filenames given on the command line.
447                 do {
448                         // "-" indicates stdin.
449                         if (strcmp(argv[optind], "-") == 0) {
450                                 uncompress(&strm, stdin, "(stdin)");
451                         } else {
452                                 FILE *file = fopen(argv[optind], "rb");
453                                 if (file == NULL) {
454                                         my_errorf("%s: %s", argv[optind],
455                                                         strerror(errno));
456                                         exit(EXIT_FAILURE);
457                                 }
458
459                                 uncompress(&strm, file, argv[optind]);
460                                 fclose(file);
461                         }
462                 } while (++optind < argc);
463         }
464
465 #ifndef NDEBUG
466         // Free the memory only when debugging. Freeing wastes some time,
467         // but allows detecting possible memory leaks with Valgrind.
468         lzma_end(&strm);
469 #endif
470
471         tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
472 }