]> icculus.org git repositories - icculus/xz.git/blob - src/xzdec/xzdec.c
a8d05859446347e20cc1e3e34caa8821c29ff334
[icculus/xz.git] / src / xzdec / xzdec.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This program is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This program is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "sysdefs.h"
21
22 #include <errno.h>
23 #include <stdio.h>
24 #include <unistd.h>
25
26 #ifdef WIN32
27 #       include <fcntl.h>
28 #endif
29
30 #include "getopt.h"
31 #include "physmem.h"
32
33
34 #ifdef LZMADEC
35 #       define TOOL_FORMAT "lzma"
36 #else
37 #       define TOOL_FORMAT "xz"
38 #endif
39
40
41 /// Number of bytes to use memory at maximum
42 static uint64_t memlimit;
43
44 /// Program name to be shown in error messages
45 static const char *argv0;
46
47
48 static void lzma_attribute((noreturn))
49 my_exit(void)
50 {
51         int status = EXIT_SUCCESS;
52
53         // Close stdout. We don't care about stderr, because we write to it
54         // only when an error has already occurred.
55         const int ferror_err = ferror(stdout);
56         const int fclose_err = fclose(stdout);
57
58         if (ferror_err || fclose_err) {
59                 // If it was fclose() that failed, we have the reason
60                 // in errno. If only ferror() indicated an error,
61                 // we have no idea what the reason was.
62                 fprintf(stderr, "%s: Cannot write to standard output: %s\n",
63                                 argv0, fclose_err
64                                         ? strerror(errno) : "Unknown error");
65                 status = EXIT_FAILURE;
66         }
67
68         exit(status);
69 }
70
71
72 static void lzma_attribute((noreturn))
73 help(void)
74 {
75         printf(
76 "Usage: %s [OPTION]... [FILE]...\n"
77 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
78 "\n"
79 "  -c, --stdout       (ignored)\n"
80 "  -d, --decompress   (ignored)\n"
81 "  -k, --keep         (ignored)\n"
82 "  -f, --force        (ignored)\n"
83 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default);\n"
84 "                     the suffixes k, M, G, Ki, Mi, and Gi are supported.\n"
85 "  -h, --help         display this help and exit\n"
86 "  -V, --version      display version and license information and exit\n"
87 "\n"
88 "With no FILE, or when FILE is -, read standard input.\n"
89 "\n"
90 "On this configuration, the tool will use about %" PRIu64
91                 " MiB of memory at maximum.\n"
92 "\n"
93 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n",
94                 argv0, (memlimit + 512 * 1024) / (1024 * 1024));
95         my_exit();
96 }
97
98
99 static void lzma_attribute((noreturn))
100 version(void)
101 {
102         printf(TOOL_FORMAT "dec " PACKAGE_VERSION "\n"
103                         "liblzma %s\n", lzma_version_string());
104
105         my_exit();
106 }
107
108
109 /// Finds out the amount of physical memory in the system, and sets
110 /// a default memory usage limit.
111 static void
112 set_default_memlimit(void)
113 {
114         const uint64_t mem = physmem();
115
116         if (mem == 0)
117                 // Cannot autodetect, use 10 MiB as the default limit.
118                 memlimit = (1U << 23) + (1U << 21);
119         else
120                 // Limit is 33 % of RAM.
121                 memlimit = mem / 3;
122
123         return;
124 }
125
126
127 /// \brief      Converts a string to uint64_t
128 ///
129 /// This is rudely copied from src/xz/util.c and modified a little. :-(
130 ///
131 static uint64_t
132 str_to_uint64(const char *value)
133 {
134         uint64_t result = 0;
135
136         if (*value < '0' || *value > '9') {
137                 fprintf(stderr, "%s: %s: Not a number", argv0, value);
138                 exit(EXIT_FAILURE);
139         }
140
141         do {
142                 // Don't overflow.
143                 if (result > (UINT64_MAX - 9) / 10)
144                         return UINT64_MAX;
145
146                 result *= 10;
147                 result += *value - '0';
148                 ++value;
149         } while (*value >= '0' && *value <= '9');
150
151         if (*value != '\0') {
152                 // Look for suffix.
153                 static const struct {
154                         const char name[4];
155                         uint32_t multiplier;
156                 } suffixes[] = {
157                         { "k",   1000 },
158                         { "kB",  1000 },
159                         { "M",   1000000 },
160                         { "MB",  1000000 },
161                         { "G",   1000000000 },
162                         { "GB",  1000000000 },
163                         { "Ki",  1024 },
164                         { "KiB", 1024 },
165                         { "Mi",  1048576 },
166                         { "MiB", 1048576 },
167                         { "Gi",  1073741824 },
168                         { "GiB", 1073741824 }
169                 };
170
171                 uint32_t multiplier = 0;
172                 for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
173                         if (strcmp(value, suffixes[i].name) == 0) {
174                                 multiplier = suffixes[i].multiplier;
175                                 break;
176                         }
177                 }
178
179                 if (multiplier == 0) {
180                         fprintf(stderr, "%s: %s: Invalid suffix",
181                                         argv0, value);
182                         exit(EXIT_FAILURE);
183                 }
184
185                 // Don't overflow here either.
186                 if (result > UINT64_MAX / multiplier)
187                         result = UINT64_MAX;
188                 else
189                         result *= multiplier;
190         }
191
192         return result;
193 }
194
195
196 /// Parses command line options.
197 static void
198 parse_options(int argc, char **argv)
199 {
200         static const char short_opts[] = "cdkfM:hV";
201         static const struct option long_opts[] = {
202                 { "stdout",       no_argument,         NULL, 'c' },
203                 { "to-stdout",    no_argument,         NULL, 'c' },
204                 { "decompress",   no_argument,         NULL, 'd' },
205                 { "uncompress",   no_argument,         NULL, 'd' },
206                 { "force",        no_argument,         NULL, 'f' },
207                 { "keep",         no_argument,         NULL, 'k' },
208                 { "memory",       required_argument,   NULL, 'M' },
209                 { "help",         no_argument,         NULL, 'h' },
210                 { "version",      no_argument,         NULL, 'V' },
211                 { NULL,           0,                   NULL, 0   }
212         };
213
214         int c;
215
216         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
217                         != -1) {
218                 switch (c) {
219                 case 'c':
220                 case 'd':
221                 case 'f':
222                 case 'k':
223                         break;
224
225                 case 'M':
226                         memlimit = str_to_uint64(optarg);
227                         if (memlimit == 0)
228                                 set_default_memlimit();
229
230                         break;
231
232                 case 'h':
233                         help();
234
235                 case 'V':
236                         version();
237
238                 default:
239                         exit(EXIT_FAILURE);
240                 }
241         }
242
243         return;
244 }
245
246
247 static void
248 uncompress(lzma_stream *strm, FILE *file, const char *filename)
249 {
250         lzma_ret ret;
251
252         // Initialize the decoder
253 #ifdef LZMADEC
254         ret = lzma_alone_decoder(strm, memlimit);
255 #else
256         ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
257 #endif
258
259         // The only reasonable error here is LZMA_MEM_ERROR.
260         // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
261         if (ret != LZMA_OK) {
262                 fprintf(stderr, "%s: ", argv0);
263
264                 if (ret == LZMA_MEM_ERROR)
265                         fprintf(stderr, "%s\n", strerror(ENOMEM));
266                 else
267                         fprintf(stderr, "Internal program error (bug)\n");
268
269                 exit(EXIT_FAILURE);
270         }
271
272         // Input and output buffers
273         uint8_t in_buf[BUFSIZ];
274         uint8_t out_buf[BUFSIZ];
275
276         strm->avail_in = 0;
277         strm->next_out = out_buf;
278         strm->avail_out = BUFSIZ;
279
280         lzma_action action = LZMA_RUN;
281
282         while (true) {
283                 if (strm->avail_in == 0) {
284                         strm->next_in = in_buf;
285                         strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
286
287                         if (ferror(file)) {
288                                 // POSIX says that fread() sets errno if
289                                 // an error occurred. ferror() doesn't
290                                 // touch errno.
291                                 fprintf(stderr, "%s: %s: Error reading "
292                                                 "input file: %s\n",
293                                                 argv0, filename,
294                                                 strerror(errno));
295                                 exit(EXIT_FAILURE);
296                         }
297
298 #ifndef LZMADEC
299                         // When using LZMA_CONCATENATED, we need to tell
300                         // liblzma when it has got all the input.
301                         if (feof(file))
302                                 action = LZMA_FINISH;
303 #endif
304                 }
305
306                 ret = lzma_code(strm, action);
307
308                 // Write and check write error before checking decoder error.
309                 // This way as much data as possible gets written to output
310                 // even if decoder detected an error.
311                 if (strm->avail_out == 0 || ret != LZMA_OK) {
312                         const size_t write_size = BUFSIZ - strm->avail_out;
313
314                         if (fwrite(out_buf, 1, write_size, stdout)
315                                         != write_size) {
316                                 // Wouldn't be a surprise if writing to stderr
317                                 // would fail too but at least try to show an
318                                 // error message.
319                                 fprintf(stderr, "%s: Cannot write to "
320                                                 "standard output: %s\n", argv0,
321                                                 strerror(errno));
322                                 exit(EXIT_FAILURE);
323                         }
324
325                         strm->next_out = out_buf;
326                         strm->avail_out = BUFSIZ;
327                 }
328
329                 if (ret != LZMA_OK) {
330                         if (ret == LZMA_STREAM_END) {
331 #ifdef LZMADEC
332                                 // Check that there's no trailing garbage.
333                                 if (strm->avail_in != 0
334                                                 || fread(in_buf, 1, 1, file)
335                                                         != 0
336                                                 || !feof(file))
337                                         ret = LZMA_DATA_ERROR;
338                                 else
339                                         return;
340 #else
341                                 // lzma_stream_decoder() already guarantees
342                                 // that there's no trailing garbage.
343                                 assert(strm->avail_in == 0);
344                                 assert(action == LZMA_FINISH);
345                                 assert(feof(file));
346                                 return;
347 #endif
348                         }
349
350                         const char *msg;
351                         switch (ret) {
352                         case LZMA_MEM_ERROR:
353                                 msg = strerror(ENOMEM);
354                                 break;
355
356                         case LZMA_MEMLIMIT_ERROR:
357                                 msg = "Memory usage limit reached";
358                                 break;
359
360                         case LZMA_FORMAT_ERROR:
361                                 msg = "File format not recognized";
362                                 break;
363
364                         case LZMA_OPTIONS_ERROR:
365                                 // FIXME: Better message?
366                                 msg = "Unsupported compression options";
367                                 break;
368
369                         case LZMA_DATA_ERROR:
370                                 msg = "File is corrupt";
371                                 break;
372
373                         case LZMA_BUF_ERROR:
374                                 msg = "Unexpected end of input";
375                                 break;
376
377                         default:
378                                 msg = "Internal program error (bug)";
379                                 break;
380                         }
381
382                         fprintf(stderr, "%s: %s: %s", argv0, filename, msg);
383
384                         exit(EXIT_FAILURE);
385                 }
386         }
387 }
388
389
390 int
391 main(int argc, char **argv)
392 {
393         // Set the argv0 global so that we can print the command name in
394         // error and help messages.
395         argv0 = argv[0];
396
397         // Detect amount of installed RAM and set the memory usage limit.
398         // This is needed before parsing the command line arguments.
399         set_default_memlimit();
400
401         // Parse the command line options.
402         parse_options(argc, argv);
403
404         // The same lzma_stream is used for all files that we decode. This way
405         // we don't need to reallocate memory for every file if they use same
406         // compression settings.
407         lzma_stream strm = LZMA_STREAM_INIT;
408
409         // Some systems require setting stdin and stdout to binary mode.
410 #ifdef WIN32
411         setmode(fileno(stdin), O_BINARY);
412         setmode(fileno(stdout), O_BINARY);
413 #endif
414
415         if (optind == argc) {
416                 // No filenames given, decode from stdin.
417                 uncompress(&strm, stdin, "(stdin)");
418         } else {
419                 // Loop through the filenames given on the command line.
420                 do {
421                         // "-" indicates stdin.
422                         if (strcmp(argv[optind], "-") == 0) {
423                                 uncompress(&strm, stdin, "(stdin)");
424                         } else {
425                                 FILE *file = fopen(argv[optind], "rb");
426                                 if (file == NULL) {
427                                         fprintf(stderr, "%s: %s: %s\n",
428                                                         argv0, argv[optind],
429                                                         strerror(errno));
430                                         exit(EXIT_FAILURE);
431                                 }
432
433                                 uncompress(&strm, file, argv[optind]);
434                                 fclose(file);
435                         }
436                 } while (++optind < argc);
437         }
438
439 #ifndef NDEBUG
440         // Free the memory only when debugging. Freeing wastes some time,
441         // but allows detecting possible memory leaks with Valgrind.
442         lzma_end(&strm);
443 #endif
444
445         my_exit();
446 }