]> icculus.org git repositories - icculus/xz.git/blob - src/xzdec/xzdec.c
Define PACKAGE_HOMEPAGE in configure.ac and use it in
[icculus/xz.git] / src / xzdec / xzdec.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "sysdefs.h"
14 #include "lzma.h"
15
16 #include <stdarg.h>
17 #include <errno.h>
18 #include <stdio.h>
19 #include <unistd.h>
20
21 #ifdef DOSLIKE
22 #       include <fcntl.h>
23 #       include <io.h>
24 #endif
25
26 #include "getopt.h"
27 #include "physmem.h"
28
29
30 #ifdef LZMADEC
31 #       define TOOL_FORMAT "lzma"
32 #else
33 #       define TOOL_FORMAT "xz"
34 #endif
35
36
37 /// Number of bytes to use memory at maximum
38 static uint64_t memlimit;
39
40 /// Error messages are suppressed if this is zero, which is the case when
41 /// --quiet has been given at least twice.
42 static unsigned int display_errors = 2;
43
44 /// Program name to be shown in error messages
45 static const char *argv0;
46
47
48 static void lzma_attribute((format(printf, 1, 2)))
49 my_errorf(const char *fmt, ...)
50 {
51         va_list ap;
52         va_start(ap, fmt);
53
54         if (display_errors) {
55                 fprintf(stderr, "%s: ", argv0);
56                 vfprintf(stderr, fmt, ap);
57                 fprintf(stderr, "\n");
58         }
59
60         va_end(ap);
61         return;
62 }
63
64
65 static void lzma_attribute((noreturn))
66 my_exit(void)
67 {
68         int status = EXIT_SUCCESS;
69
70         // Close stdout. We don't care about stderr, because we write to it
71         // only when an error has already occurred.
72         const int ferror_err = ferror(stdout);
73         const int fclose_err = fclose(stdout);
74
75         if (ferror_err || fclose_err) {
76                 // If it was fclose() that failed, we have the reason
77                 // in errno. If only ferror() indicated an error,
78                 // we have no idea what the reason was.
79                 my_errorf("Cannot write to standard output: %s", fclose_err
80                                 ? strerror(errno) : "Unknown error");
81                 status = EXIT_FAILURE;
82         }
83
84         exit(status);
85 }
86
87
88 static void lzma_attribute((noreturn))
89 help(void)
90 {
91         printf(
92 "Usage: %s [OPTION]... [FILE]...\n"
93 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
94 "\n"
95 "  -c, --stdout       (ignored)\n"
96 "  -d, --decompress   (ignored)\n"
97 "  -k, --keep         (ignored)\n"
98 "  -f, --force        (ignored)\n"
99 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default)\n"
100 "  -q, --quiet        specify *twice* to suppress errors\n"
101 "  -Q, --no-warn      (ignored)\n"
102 "  -h, --help         display this help and exit\n"
103 "  -V, --version      display the version number and exit\n"
104 "\n"
105 "With no FILE, or when FILE is -, read standard input.\n"
106 "\n"
107 "On this system and configuration, this program will use at maximum of roughly\n"
108 "%" PRIu64 " MiB RAM.\n"
109 "\n"
110 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
111 PACKAGE_NAME " home page: <" PACKAGE_HOMEPAGE ">\n",
112                 argv0, memlimit / (1024 * 1024));
113         my_exit();
114 }
115
116
117 static void lzma_attribute((noreturn))
118 version(void)
119 {
120         printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
121                         "liblzma %s\n", lzma_version_string());
122
123         my_exit();
124 }
125
126
127 /// Find out the amount of physical memory (RAM) in the system, and set
128 /// the memory usage limit to the given percentage of RAM.
129 static void
130 memlimit_set_percentage(uint32_t percentage)
131 {
132         uint64_t mem = physmem();
133
134         // If we cannot determine the amount of RAM, assume 32 MiB.
135         if (mem == 0)
136                 mem = UINT64_C(32) * 1024 * 1024;
137
138         memlimit = percentage * mem / 100;
139         return;
140 }
141
142
143 /// Set the memory usage limit to give number of bytes. Zero is a special
144 /// value to indicate the default limit.
145 static void
146 memlimit_set(uint64_t new_memlimit)
147 {
148         if (new_memlimit == 0)
149                 memlimit_set_percentage(40);
150         else
151                 memlimit = new_memlimit;
152
153         return;
154 }
155
156
157 /// \brief      Convert a string to uint64_t
158 ///
159 /// This is rudely copied from src/xz/util.c and modified a little. :-(
160 ///
161 /// \param      max     Return value when the string "max" was specified.
162 ///
163 static uint64_t
164 str_to_uint64(const char *value, uint64_t max)
165 {
166         uint64_t result = 0;
167
168         // Accept special value "max".
169         if (strcmp(value, "max") == 0)
170                 return max;
171
172         if (*value < '0' || *value > '9') {
173                 my_errorf("%s: Value is not a non-negative decimal integer",
174                                 value);
175                 exit(EXIT_FAILURE);
176         }
177
178         do {
179                 // Don't overflow.
180                 if (result > (UINT64_MAX - 9) / 10)
181                         return UINT64_MAX;
182
183                 result *= 10;
184                 result += *value - '0';
185                 ++value;
186         } while (*value >= '0' && *value <= '9');
187
188         if (*value != '\0') {
189                 // Look for suffix.
190                 static const struct {
191                         const char name[4];
192                         uint32_t multiplier;
193                 } suffixes[] = {
194                         { "k",   1000 },
195                         { "kB",  1000 },
196                         { "M",   1000000 },
197                         { "MB",  1000000 },
198                         { "G",   1000000000 },
199                         { "GB",  1000000000 },
200                         { "Ki",  1024 },
201                         { "KiB", 1024 },
202                         { "Mi",  1048576 },
203                         { "MiB", 1048576 },
204                         { "Gi",  1073741824 },
205                         { "GiB", 1073741824 }
206                 };
207
208                 uint32_t multiplier = 0;
209                 for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
210                         if (strcmp(value, suffixes[i].name) == 0) {
211                                 multiplier = suffixes[i].multiplier;
212                                 break;
213                         }
214                 }
215
216                 if (multiplier == 0) {
217                         my_errorf("%s: Invalid suffix", value);
218                         exit(EXIT_FAILURE);
219                 }
220
221                 // Don't overflow here either.
222                 if (result > UINT64_MAX / multiplier)
223                         result = UINT64_MAX;
224                 else
225                         result *= multiplier;
226         }
227
228         return result;
229 }
230
231
232 /// Parses command line options.
233 static void
234 parse_options(int argc, char **argv)
235 {
236         static const char short_opts[] = "cdkfM:hqQV";
237         static const struct option long_opts[] = {
238                 { "stdout",       no_argument,         NULL, 'c' },
239                 { "to-stdout",    no_argument,         NULL, 'c' },
240                 { "decompress",   no_argument,         NULL, 'd' },
241                 { "uncompress",   no_argument,         NULL, 'd' },
242                 { "force",        no_argument,         NULL, 'f' },
243                 { "keep",         no_argument,         NULL, 'k' },
244                 { "memory",       required_argument,   NULL, 'M' },
245                 { "quiet",        no_argument,         NULL, 'q' },
246                 { "no-warn",      no_argument,         NULL, 'Q' },
247                 { "help",         no_argument,         NULL, 'h' },
248                 { "version",      no_argument,         NULL, 'V' },
249                 { NULL,           0,                   NULL, 0   }
250         };
251
252         int c;
253
254         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
255                         != -1) {
256                 switch (c) {
257                 case 'c':
258                 case 'd':
259                 case 'f':
260                 case 'k':
261                 case 'Q':
262                         break;
263
264                 case 'M': {
265                         // Support specifying the limit as a percentage of
266                         // installed physical RAM.
267                         const size_t len = strlen(optarg);
268                         if (len > 0 && optarg[len - 1] == '%') {
269                                 // Memory limit is a percentage of total
270                                 // installed RAM.
271                                 optarg[len - 1] = '\0';
272                                 const uint64_t percentage
273                                                 = str_to_uint64(optarg, 100);
274                                 if (percentage < 1 || percentage > 100) {
275                                         my_errorf("Percentage must be in "
276                                                         "the range [1, 100]");
277                                         exit(EXIT_FAILURE);
278                                 }
279
280                                 memlimit_set_percentage(percentage);
281                         } else {
282                                 memlimit_set(str_to_uint64(
283                                                 optarg, UINT64_MAX));
284                         }
285
286                         break;
287                 }
288
289                 case 'q':
290                         if (display_errors > 0)
291                                 --display_errors;
292
293                         break;
294
295                 case 'h':
296                         help();
297
298                 case 'V':
299                         version();
300
301                 default:
302                         exit(EXIT_FAILURE);
303                 }
304         }
305
306         return;
307 }
308
309
310 static void
311 uncompress(lzma_stream *strm, FILE *file, const char *filename)
312 {
313         lzma_ret ret;
314
315         // Initialize the decoder
316 #ifdef LZMADEC
317         ret = lzma_alone_decoder(strm, memlimit);
318 #else
319         ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
320 #endif
321
322         // The only reasonable error here is LZMA_MEM_ERROR.
323         // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
324         if (ret != LZMA_OK) {
325                 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
326                                 : "Internal program error (bug)");
327                 exit(EXIT_FAILURE);
328         }
329
330         // Input and output buffers
331         uint8_t in_buf[BUFSIZ];
332         uint8_t out_buf[BUFSIZ];
333
334         strm->avail_in = 0;
335         strm->next_out = out_buf;
336         strm->avail_out = BUFSIZ;
337
338         lzma_action action = LZMA_RUN;
339
340         while (true) {
341                 if (strm->avail_in == 0) {
342                         strm->next_in = in_buf;
343                         strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
344
345                         if (ferror(file)) {
346                                 // POSIX says that fread() sets errno if
347                                 // an error occurred. ferror() doesn't
348                                 // touch errno.
349                                 my_errorf("%s: Error reading input file: %s",
350                                                 filename, strerror(errno));
351                                 exit(EXIT_FAILURE);
352                         }
353
354 #ifndef LZMADEC
355                         // When using LZMA_CONCATENATED, we need to tell
356                         // liblzma when it has got all the input.
357                         if (feof(file))
358                                 action = LZMA_FINISH;
359 #endif
360                 }
361
362                 ret = lzma_code(strm, action);
363
364                 // Write and check write error before checking decoder error.
365                 // This way as much data as possible gets written to output
366                 // even if decoder detected an error.
367                 if (strm->avail_out == 0 || ret != LZMA_OK) {
368                         const size_t write_size = BUFSIZ - strm->avail_out;
369
370                         if (fwrite(out_buf, 1, write_size, stdout)
371                                         != write_size) {
372                                 // Wouldn't be a surprise if writing to stderr
373                                 // would fail too but at least try to show an
374                                 // error message.
375                                 my_errorf("Cannot write to standard output: "
376                                                 "%s", strerror(errno));
377                                 exit(EXIT_FAILURE);
378                         }
379
380                         strm->next_out = out_buf;
381                         strm->avail_out = BUFSIZ;
382                 }
383
384                 if (ret != LZMA_OK) {
385                         if (ret == LZMA_STREAM_END) {
386 #ifdef LZMADEC
387                                 // Check that there's no trailing garbage.
388                                 if (strm->avail_in != 0
389                                                 || fread(in_buf, 1, 1, file)
390                                                         != 0
391                                                 || !feof(file))
392                                         ret = LZMA_DATA_ERROR;
393                                 else
394                                         return;
395 #else
396                                 // lzma_stream_decoder() already guarantees
397                                 // that there's no trailing garbage.
398                                 assert(strm->avail_in == 0);
399                                 assert(action == LZMA_FINISH);
400                                 assert(feof(file));
401                                 return;
402 #endif
403                         }
404
405                         const char *msg;
406                         switch (ret) {
407                         case LZMA_MEM_ERROR:
408                                 msg = strerror(ENOMEM);
409                                 break;
410
411                         case LZMA_MEMLIMIT_ERROR:
412                                 msg = "Memory usage limit reached";
413                                 break;
414
415                         case LZMA_FORMAT_ERROR:
416                                 msg = "File format not recognized";
417                                 break;
418
419                         case LZMA_OPTIONS_ERROR:
420                                 // FIXME: Better message?
421                                 msg = "Unsupported compression options";
422                                 break;
423
424                         case LZMA_DATA_ERROR:
425                                 msg = "File is corrupt";
426                                 break;
427
428                         case LZMA_BUF_ERROR:
429                                 msg = "Unexpected end of input";
430                                 break;
431
432                         default:
433                                 msg = "Internal program error (bug)";
434                                 break;
435                         }
436
437                         my_errorf("%s: %s", filename, msg);
438                         exit(EXIT_FAILURE);
439                 }
440         }
441 }
442
443
444 int
445 main(int argc, char **argv)
446 {
447         // Set the argv0 global so that we can print the command name in
448         // error and help messages.
449         argv0 = argv[0];
450
451         // Set the default memory usage limit. This is needed before parsing
452         // the command line arguments.
453         memlimit_set(0);
454
455         // Parse the command line options.
456         parse_options(argc, argv);
457
458         // The same lzma_stream is used for all files that we decode. This way
459         // we don't need to reallocate memory for every file if they use same
460         // compression settings.
461         lzma_stream strm = LZMA_STREAM_INIT;
462
463         // Some systems require setting stdin and stdout to binary mode.
464 #ifdef DOSLIKE
465         setmode(fileno(stdin), O_BINARY);
466         setmode(fileno(stdout), O_BINARY);
467 #endif
468
469         if (optind == argc) {
470                 // No filenames given, decode from stdin.
471                 uncompress(&strm, stdin, "(stdin)");
472         } else {
473                 // Loop through the filenames given on the command line.
474                 do {
475                         // "-" indicates stdin.
476                         if (strcmp(argv[optind], "-") == 0) {
477                                 uncompress(&strm, stdin, "(stdin)");
478                         } else {
479                                 FILE *file = fopen(argv[optind], "rb");
480                                 if (file == NULL) {
481                                         my_errorf("%s: %s", argv[optind],
482                                                         strerror(errno));
483                                         exit(EXIT_FAILURE);
484                                 }
485
486                                 uncompress(&strm, file, argv[optind]);
487                                 fclose(file);
488                         }
489                 } while (++optind < argc);
490         }
491
492 #ifndef NDEBUG
493         // Free the memory only when debugging. Freeing wastes some time,
494         // but allows detecting possible memory leaks with Valgrind.
495         lzma_end(&strm);
496 #endif
497
498         my_exit();
499 }