]> icculus.org git repositories - icculus/xz.git/blob - src/xzdec/xzdec.c
Improve support for DOS-like systems.
[icculus/xz.git] / src / xzdec / xzdec.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This program is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This program is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "sysdefs.h"
21 #include "lzma.h"
22
23 #include <errno.h>
24 #include <stdio.h>
25 #include <unistd.h>
26
27 #ifdef DOSLIKE
28 #       include <fcntl.h>
29 #       include <io.h>
30 #endif
31
32 #include "getopt.h"
33 #include "physmem.h"
34
35
36 #ifdef LZMADEC
37 #       define TOOL_FORMAT "lzma"
38 #else
39 #       define TOOL_FORMAT "xz"
40 #endif
41
42
43 /// Number of bytes to use memory at maximum
44 static uint64_t memlimit;
45
46 /// Program name to be shown in error messages
47 static const char *argv0;
48
49
50 static void lzma_attribute((noreturn))
51 my_exit(void)
52 {
53         int status = EXIT_SUCCESS;
54
55         // Close stdout. We don't care about stderr, because we write to it
56         // only when an error has already occurred.
57         const int ferror_err = ferror(stdout);
58         const int fclose_err = fclose(stdout);
59
60         if (ferror_err || fclose_err) {
61                 // If it was fclose() that failed, we have the reason
62                 // in errno. If only ferror() indicated an error,
63                 // we have no idea what the reason was.
64                 fprintf(stderr, "%s: Cannot write to standard output: %s\n",
65                                 argv0, fclose_err
66                                         ? strerror(errno) : "Unknown error");
67                 status = EXIT_FAILURE;
68         }
69
70         exit(status);
71 }
72
73
74 static void lzma_attribute((noreturn))
75 help(void)
76 {
77         printf(
78 "Usage: %s [OPTION]... [FILE]...\n"
79 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
80 "\n"
81 "  -c, --stdout       (ignored)\n"
82 "  -d, --decompress   (ignored)\n"
83 "  -k, --keep         (ignored)\n"
84 "  -f, --force        (ignored)\n"
85 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default);\n"
86 "                     the suffixes k, M, G, Ki, Mi, and Gi are supported.\n"
87 "  -h, --help         display this help and exit\n"
88 "  -V, --version      display version and license information and exit\n"
89 "\n"
90 "With no FILE, or when FILE is -, read standard input.\n"
91 "\n"
92 "On this configuration, the tool will use about %" PRIu64
93                 " MiB of memory at maximum.\n"
94 "\n"
95 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n",
96                 argv0, (memlimit + 512 * 1024) / (1024 * 1024));
97         my_exit();
98 }
99
100
101 static void lzma_attribute((noreturn))
102 version(void)
103 {
104         printf(TOOL_FORMAT "dec " PACKAGE_VERSION "\n"
105                         "liblzma %s\n", lzma_version_string());
106
107         my_exit();
108 }
109
110
111 /// Finds out the amount of physical memory in the system, and sets
112 /// a default memory usage limit.
113 static void
114 set_default_memlimit(void)
115 {
116         const uint64_t mem = physmem();
117
118         if (mem == 0)
119                 // Cannot autodetect, use 10 MiB as the default limit.
120                 memlimit = (1U << 23) + (1U << 21);
121         else
122                 // Limit is 33 % of RAM.
123                 memlimit = mem / 3;
124
125         return;
126 }
127
128
129 /// \brief      Converts a string to uint64_t
130 ///
131 /// This is rudely copied from src/xz/util.c and modified a little. :-(
132 ///
133 static uint64_t
134 str_to_uint64(const char *value)
135 {
136         uint64_t result = 0;
137
138         if (*value < '0' || *value > '9') {
139                 fprintf(stderr, "%s: %s: Not a number\n", argv0, value);
140                 exit(EXIT_FAILURE);
141         }
142
143         do {
144                 // Don't overflow.
145                 if (result > (UINT64_MAX - 9) / 10)
146                         return UINT64_MAX;
147
148                 result *= 10;
149                 result += *value - '0';
150                 ++value;
151         } while (*value >= '0' && *value <= '9');
152
153         if (*value != '\0') {
154                 // Look for suffix.
155                 static const struct {
156                         const char name[4];
157                         uint32_t multiplier;
158                 } suffixes[] = {
159                         { "k",   1000 },
160                         { "kB",  1000 },
161                         { "M",   1000000 },
162                         { "MB",  1000000 },
163                         { "G",   1000000000 },
164                         { "GB",  1000000000 },
165                         { "Ki",  1024 },
166                         { "KiB", 1024 },
167                         { "Mi",  1048576 },
168                         { "MiB", 1048576 },
169                         { "Gi",  1073741824 },
170                         { "GiB", 1073741824 }
171                 };
172
173                 uint32_t multiplier = 0;
174                 for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
175                         if (strcmp(value, suffixes[i].name) == 0) {
176                                 multiplier = suffixes[i].multiplier;
177                                 break;
178                         }
179                 }
180
181                 if (multiplier == 0) {
182                         fprintf(stderr, "%s: %s: Invalid suffix\n",
183                                         argv0, value);
184                         exit(EXIT_FAILURE);
185                 }
186
187                 // Don't overflow here either.
188                 if (result > UINT64_MAX / multiplier)
189                         result = UINT64_MAX;
190                 else
191                         result *= multiplier;
192         }
193
194         return result;
195 }
196
197
198 /// Parses command line options.
199 static void
200 parse_options(int argc, char **argv)
201 {
202         static const char short_opts[] = "cdkfM:hV";
203         static const struct option long_opts[] = {
204                 { "stdout",       no_argument,         NULL, 'c' },
205                 { "to-stdout",    no_argument,         NULL, 'c' },
206                 { "decompress",   no_argument,         NULL, 'd' },
207                 { "uncompress",   no_argument,         NULL, 'd' },
208                 { "force",        no_argument,         NULL, 'f' },
209                 { "keep",         no_argument,         NULL, 'k' },
210                 { "memory",       required_argument,   NULL, 'M' },
211                 { "help",         no_argument,         NULL, 'h' },
212                 { "version",      no_argument,         NULL, 'V' },
213                 { NULL,           0,                   NULL, 0   }
214         };
215
216         int c;
217
218         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
219                         != -1) {
220                 switch (c) {
221                 case 'c':
222                 case 'd':
223                 case 'f':
224                 case 'k':
225                         break;
226
227                 case 'M':
228                         memlimit = str_to_uint64(optarg);
229                         if (memlimit == 0)
230                                 set_default_memlimit();
231
232                         break;
233
234                 case 'h':
235                         help();
236
237                 case 'V':
238                         version();
239
240                 default:
241                         exit(EXIT_FAILURE);
242                 }
243         }
244
245         return;
246 }
247
248
249 static void
250 uncompress(lzma_stream *strm, FILE *file, const char *filename)
251 {
252         lzma_ret ret;
253
254         // Initialize the decoder
255 #ifdef LZMADEC
256         ret = lzma_alone_decoder(strm, memlimit);
257 #else
258         ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
259 #endif
260
261         // The only reasonable error here is LZMA_MEM_ERROR.
262         // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
263         if (ret != LZMA_OK) {
264                 fprintf(stderr, "%s: ", argv0);
265
266                 if (ret == LZMA_MEM_ERROR)
267                         fprintf(stderr, "%s\n", strerror(ENOMEM));
268                 else
269                         fprintf(stderr, "Internal program error (bug)\n");
270
271                 exit(EXIT_FAILURE);
272         }
273
274         // Input and output buffers
275         uint8_t in_buf[BUFSIZ];
276         uint8_t out_buf[BUFSIZ];
277
278         strm->avail_in = 0;
279         strm->next_out = out_buf;
280         strm->avail_out = BUFSIZ;
281
282         lzma_action action = LZMA_RUN;
283
284         while (true) {
285                 if (strm->avail_in == 0) {
286                         strm->next_in = in_buf;
287                         strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
288
289                         if (ferror(file)) {
290                                 // POSIX says that fread() sets errno if
291                                 // an error occurred. ferror() doesn't
292                                 // touch errno.
293                                 fprintf(stderr, "%s: %s: Error reading "
294                                                 "input file: %s\n",
295                                                 argv0, filename,
296                                                 strerror(errno));
297                                 exit(EXIT_FAILURE);
298                         }
299
300 #ifndef LZMADEC
301                         // When using LZMA_CONCATENATED, we need to tell
302                         // liblzma when it has got all the input.
303                         if (feof(file))
304                                 action = LZMA_FINISH;
305 #endif
306                 }
307
308                 ret = lzma_code(strm, action);
309
310                 // Write and check write error before checking decoder error.
311                 // This way as much data as possible gets written to output
312                 // even if decoder detected an error.
313                 if (strm->avail_out == 0 || ret != LZMA_OK) {
314                         const size_t write_size = BUFSIZ - strm->avail_out;
315
316                         if (fwrite(out_buf, 1, write_size, stdout)
317                                         != write_size) {
318                                 // Wouldn't be a surprise if writing to stderr
319                                 // would fail too but at least try to show an
320                                 // error message.
321                                 fprintf(stderr, "%s: Cannot write to "
322                                                 "standard output: %s\n", argv0,
323                                                 strerror(errno));
324                                 exit(EXIT_FAILURE);
325                         }
326
327                         strm->next_out = out_buf;
328                         strm->avail_out = BUFSIZ;
329                 }
330
331                 if (ret != LZMA_OK) {
332                         if (ret == LZMA_STREAM_END) {
333 #ifdef LZMADEC
334                                 // Check that there's no trailing garbage.
335                                 if (strm->avail_in != 0
336                                                 || fread(in_buf, 1, 1, file)
337                                                         != 0
338                                                 || !feof(file))
339                                         ret = LZMA_DATA_ERROR;
340                                 else
341                                         return;
342 #else
343                                 // lzma_stream_decoder() already guarantees
344                                 // that there's no trailing garbage.
345                                 assert(strm->avail_in == 0);
346                                 assert(action == LZMA_FINISH);
347                                 assert(feof(file));
348                                 return;
349 #endif
350                         }
351
352                         const char *msg;
353                         switch (ret) {
354                         case LZMA_MEM_ERROR:
355                                 msg = strerror(ENOMEM);
356                                 break;
357
358                         case LZMA_MEMLIMIT_ERROR:
359                                 msg = "Memory usage limit reached";
360                                 break;
361
362                         case LZMA_FORMAT_ERROR:
363                                 msg = "File format not recognized";
364                                 break;
365
366                         case LZMA_OPTIONS_ERROR:
367                                 // FIXME: Better message?
368                                 msg = "Unsupported compression options";
369                                 break;
370
371                         case LZMA_DATA_ERROR:
372                                 msg = "File is corrupt";
373                                 break;
374
375                         case LZMA_BUF_ERROR:
376                                 msg = "Unexpected end of input";
377                                 break;
378
379                         default:
380                                 msg = "Internal program error (bug)";
381                                 break;
382                         }
383
384                         fprintf(stderr, "%s: %s: %s\n", argv0, filename, msg);
385
386                         exit(EXIT_FAILURE);
387                 }
388         }
389 }
390
391
392 int
393 main(int argc, char **argv)
394 {
395         // Set the argv0 global so that we can print the command name in
396         // error and help messages.
397         argv0 = argv[0];
398
399         // Detect amount of installed RAM and set the memory usage limit.
400         // This is needed before parsing the command line arguments.
401         set_default_memlimit();
402
403         // Parse the command line options.
404         parse_options(argc, argv);
405
406         // The same lzma_stream is used for all files that we decode. This way
407         // we don't need to reallocate memory for every file if they use same
408         // compression settings.
409         lzma_stream strm = LZMA_STREAM_INIT;
410
411         // Some systems require setting stdin and stdout to binary mode.
412 #ifdef DOSLIKE
413         setmode(fileno(stdin), O_BINARY);
414         setmode(fileno(stdout), O_BINARY);
415 #endif
416
417         if (optind == argc) {
418                 // No filenames given, decode from stdin.
419                 uncompress(&strm, stdin, "(stdin)");
420         } else {
421                 // Loop through the filenames given on the command line.
422                 do {
423                         // "-" indicates stdin.
424                         if (strcmp(argv[optind], "-") == 0) {
425                                 uncompress(&strm, stdin, "(stdin)");
426                         } else {
427                                 FILE *file = fopen(argv[optind], "rb");
428                                 if (file == NULL) {
429                                         fprintf(stderr, "%s: %s: %s\n",
430                                                         argv0, argv[optind],
431                                                         strerror(errno));
432                                         exit(EXIT_FAILURE);
433                                 }
434
435                                 uncompress(&strm, file, argv[optind]);
436                                 fclose(file);
437                         }
438                 } while (++optind < argc);
439         }
440
441 #ifndef NDEBUG
442         // Free the memory only when debugging. Freeing wastes some time,
443         // but allows detecting possible memory leaks with Valgrind.
444         lzma_end(&strm);
445 #endif
446
447         my_exit();
448 }