]> icculus.org git repositories - icculus/xz.git/blob - src/xzdec/xzdec.c
Support special value "max" where xz and xzdec accept an integer.
[icculus/xz.git] / src / xzdec / xzdec.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "sysdefs.h"
14 #include "lzma.h"
15
16 #include <errno.h>
17 #include <stdio.h>
18 #include <unistd.h>
19
20 #ifdef DOSLIKE
21 #       include <fcntl.h>
22 #       include <io.h>
23 #endif
24
25 #include "getopt.h"
26 #include "physmem.h"
27
28
29 #ifdef LZMADEC
30 #       define TOOL_FORMAT "lzma"
31 #else
32 #       define TOOL_FORMAT "xz"
33 #endif
34
35
36 /// Number of bytes to use memory at maximum
37 static uint64_t memlimit;
38
39 /// Program name to be shown in error messages
40 static const char *argv0;
41
42
43 static void lzma_attribute((noreturn))
44 my_exit(void)
45 {
46         int status = EXIT_SUCCESS;
47
48         // Close stdout. We don't care about stderr, because we write to it
49         // only when an error has already occurred.
50         const int ferror_err = ferror(stdout);
51         const int fclose_err = fclose(stdout);
52
53         if (ferror_err || fclose_err) {
54                 // If it was fclose() that failed, we have the reason
55                 // in errno. If only ferror() indicated an error,
56                 // we have no idea what the reason was.
57                 fprintf(stderr, "%s: Cannot write to standard output: %s\n",
58                                 argv0, fclose_err
59                                         ? strerror(errno) : "Unknown error");
60                 status = EXIT_FAILURE;
61         }
62
63         exit(status);
64 }
65
66
67 static void lzma_attribute((noreturn))
68 help(void)
69 {
70         printf(
71 "Usage: %s [OPTION]... [FILE]...\n"
72 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
73 "\n"
74 "  -c, --stdout       (ignored)\n"
75 "  -d, --decompress   (ignored)\n"
76 "  -k, --keep         (ignored)\n"
77 "  -f, --force        (ignored)\n"
78 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default);\n"
79 "                     the suffixes k, M, G, Ki, Mi, and Gi are supported.\n"
80 "  -h, --help         display this help and exit\n"
81 "  -V, --version      display version and license information and exit\n"
82 "\n"
83 "With no FILE, or when FILE is -, read standard input.\n"
84 "\n"
85 "On this configuration, the tool will use about %" PRIu64
86                 " MiB of memory at maximum.\n"
87 "\n"
88 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n",
89                 argv0, memlimit / (1024 * 1024));
90         my_exit();
91 }
92
93
94 static void lzma_attribute((noreturn))
95 version(void)
96 {
97         printf(TOOL_FORMAT "dec " LZMA_VERSION_STRING "\n"
98                         "liblzma %s\n", lzma_version_string());
99
100         my_exit();
101 }
102
103
104 /// Finds out the amount of physical memory in the system, and sets
105 /// a default memory usage limit.
106 static void
107 set_default_memlimit(void)
108 {
109         const uint64_t mem = physmem();
110
111         if (mem == 0)
112                 // Cannot autodetect, use 10 MiB as the default limit.
113                 memlimit = (1U << 23) + (1U << 21);
114         else
115                 // Limit is 33 % of RAM.
116                 memlimit = mem / 3;
117
118         return;
119 }
120
121
122 /// \brief      Converts a string to uint64_t
123 ///
124 /// This is rudely copied from src/xz/util.c and modified a little. :-(
125 ///
126 static uint64_t
127 str_to_uint64(const char *value)
128 {
129         uint64_t result = 0;
130
131         // Accept special value "max".
132         if (strcmp(value, "max") == 0)
133                 return UINT64_MAX;
134
135         if (*value < '0' || *value > '9') {
136                 fprintf(stderr, "%s: %s: Not a number\n", argv0, value);
137                 exit(EXIT_FAILURE);
138         }
139
140         do {
141                 // Don't overflow.
142                 if (result > (UINT64_MAX - 9) / 10)
143                         return UINT64_MAX;
144
145                 result *= 10;
146                 result += *value - '0';
147                 ++value;
148         } while (*value >= '0' && *value <= '9');
149
150         if (*value != '\0') {
151                 // Look for suffix.
152                 static const struct {
153                         const char name[4];
154                         uint32_t multiplier;
155                 } suffixes[] = {
156                         { "k",   1000 },
157                         { "kB",  1000 },
158                         { "M",   1000000 },
159                         { "MB",  1000000 },
160                         { "G",   1000000000 },
161                         { "GB",  1000000000 },
162                         { "Ki",  1024 },
163                         { "KiB", 1024 },
164                         { "Mi",  1048576 },
165                         { "MiB", 1048576 },
166                         { "Gi",  1073741824 },
167                         { "GiB", 1073741824 }
168                 };
169
170                 uint32_t multiplier = 0;
171                 for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
172                         if (strcmp(value, suffixes[i].name) == 0) {
173                                 multiplier = suffixes[i].multiplier;
174                                 break;
175                         }
176                 }
177
178                 if (multiplier == 0) {
179                         fprintf(stderr, "%s: %s: Invalid suffix\n",
180                                         argv0, value);
181                         exit(EXIT_FAILURE);
182                 }
183
184                 // Don't overflow here either.
185                 if (result > UINT64_MAX / multiplier)
186                         result = UINT64_MAX;
187                 else
188                         result *= multiplier;
189         }
190
191         return result;
192 }
193
194
195 /// Parses command line options.
196 static void
197 parse_options(int argc, char **argv)
198 {
199         static const char short_opts[] = "cdkfM:hV";
200         static const struct option long_opts[] = {
201                 { "stdout",       no_argument,         NULL, 'c' },
202                 { "to-stdout",    no_argument,         NULL, 'c' },
203                 { "decompress",   no_argument,         NULL, 'd' },
204                 { "uncompress",   no_argument,         NULL, 'd' },
205                 { "force",        no_argument,         NULL, 'f' },
206                 { "keep",         no_argument,         NULL, 'k' },
207                 { "memory",       required_argument,   NULL, 'M' },
208                 { "help",         no_argument,         NULL, 'h' },
209                 { "version",      no_argument,         NULL, 'V' },
210                 { NULL,           0,                   NULL, 0   }
211         };
212
213         int c;
214
215         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
216                         != -1) {
217                 switch (c) {
218                 case 'c':
219                 case 'd':
220                 case 'f':
221                 case 'k':
222                         break;
223
224                 case 'M':
225                         memlimit = str_to_uint64(optarg);
226                         if (memlimit == 0)
227                                 set_default_memlimit();
228
229                         break;
230
231                 case 'h':
232                         help();
233
234                 case 'V':
235                         version();
236
237                 default:
238                         exit(EXIT_FAILURE);
239                 }
240         }
241
242         return;
243 }
244
245
246 static void
247 uncompress(lzma_stream *strm, FILE *file, const char *filename)
248 {
249         lzma_ret ret;
250
251         // Initialize the decoder
252 #ifdef LZMADEC
253         ret = lzma_alone_decoder(strm, memlimit);
254 #else
255         ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
256 #endif
257
258         // The only reasonable error here is LZMA_MEM_ERROR.
259         // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
260         if (ret != LZMA_OK) {
261                 fprintf(stderr, "%s: ", argv0);
262
263                 if (ret == LZMA_MEM_ERROR)
264                         fprintf(stderr, "%s\n", strerror(ENOMEM));
265                 else
266                         fprintf(stderr, "Internal program error (bug)\n");
267
268                 exit(EXIT_FAILURE);
269         }
270
271         // Input and output buffers
272         uint8_t in_buf[BUFSIZ];
273         uint8_t out_buf[BUFSIZ];
274
275         strm->avail_in = 0;
276         strm->next_out = out_buf;
277         strm->avail_out = BUFSIZ;
278
279         lzma_action action = LZMA_RUN;
280
281         while (true) {
282                 if (strm->avail_in == 0) {
283                         strm->next_in = in_buf;
284                         strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
285
286                         if (ferror(file)) {
287                                 // POSIX says that fread() sets errno if
288                                 // an error occurred. ferror() doesn't
289                                 // touch errno.
290                                 fprintf(stderr, "%s: %s: Error reading "
291                                                 "input file: %s\n",
292                                                 argv0, filename,
293                                                 strerror(errno));
294                                 exit(EXIT_FAILURE);
295                         }
296
297 #ifndef LZMADEC
298                         // When using LZMA_CONCATENATED, we need to tell
299                         // liblzma when it has got all the input.
300                         if (feof(file))
301                                 action = LZMA_FINISH;
302 #endif
303                 }
304
305                 ret = lzma_code(strm, action);
306
307                 // Write and check write error before checking decoder error.
308                 // This way as much data as possible gets written to output
309                 // even if decoder detected an error.
310                 if (strm->avail_out == 0 || ret != LZMA_OK) {
311                         const size_t write_size = BUFSIZ - strm->avail_out;
312
313                         if (fwrite(out_buf, 1, write_size, stdout)
314                                         != write_size) {
315                                 // Wouldn't be a surprise if writing to stderr
316                                 // would fail too but at least try to show an
317                                 // error message.
318                                 fprintf(stderr, "%s: Cannot write to "
319                                                 "standard output: %s\n", argv0,
320                                                 strerror(errno));
321                                 exit(EXIT_FAILURE);
322                         }
323
324                         strm->next_out = out_buf;
325                         strm->avail_out = BUFSIZ;
326                 }
327
328                 if (ret != LZMA_OK) {
329                         if (ret == LZMA_STREAM_END) {
330 #ifdef LZMADEC
331                                 // Check that there's no trailing garbage.
332                                 if (strm->avail_in != 0
333                                                 || fread(in_buf, 1, 1, file)
334                                                         != 0
335                                                 || !feof(file))
336                                         ret = LZMA_DATA_ERROR;
337                                 else
338                                         return;
339 #else
340                                 // lzma_stream_decoder() already guarantees
341                                 // that there's no trailing garbage.
342                                 assert(strm->avail_in == 0);
343                                 assert(action == LZMA_FINISH);
344                                 assert(feof(file));
345                                 return;
346 #endif
347                         }
348
349                         const char *msg;
350                         switch (ret) {
351                         case LZMA_MEM_ERROR:
352                                 msg = strerror(ENOMEM);
353                                 break;
354
355                         case LZMA_MEMLIMIT_ERROR:
356                                 msg = "Memory usage limit reached";
357                                 break;
358
359                         case LZMA_FORMAT_ERROR:
360                                 msg = "File format not recognized";
361                                 break;
362
363                         case LZMA_OPTIONS_ERROR:
364                                 // FIXME: Better message?
365                                 msg = "Unsupported compression options";
366                                 break;
367
368                         case LZMA_DATA_ERROR:
369                                 msg = "File is corrupt";
370                                 break;
371
372                         case LZMA_BUF_ERROR:
373                                 msg = "Unexpected end of input";
374                                 break;
375
376                         default:
377                                 msg = "Internal program error (bug)";
378                                 break;
379                         }
380
381                         fprintf(stderr, "%s: %s: %s\n", argv0, filename, msg);
382
383                         exit(EXIT_FAILURE);
384                 }
385         }
386 }
387
388
389 int
390 main(int argc, char **argv)
391 {
392         // Set the argv0 global so that we can print the command name in
393         // error and help messages.
394         argv0 = argv[0];
395
396         // Detect amount of installed RAM and set the memory usage limit.
397         // This is needed before parsing the command line arguments.
398         set_default_memlimit();
399
400         // Parse the command line options.
401         parse_options(argc, argv);
402
403         // The same lzma_stream is used for all files that we decode. This way
404         // we don't need to reallocate memory for every file if they use same
405         // compression settings.
406         lzma_stream strm = LZMA_STREAM_INIT;
407
408         // Some systems require setting stdin and stdout to binary mode.
409 #ifdef DOSLIKE
410         setmode(fileno(stdin), O_BINARY);
411         setmode(fileno(stdout), O_BINARY);
412 #endif
413
414         if (optind == argc) {
415                 // No filenames given, decode from stdin.
416                 uncompress(&strm, stdin, "(stdin)");
417         } else {
418                 // Loop through the filenames given on the command line.
419                 do {
420                         // "-" indicates stdin.
421                         if (strcmp(argv[optind], "-") == 0) {
422                                 uncompress(&strm, stdin, "(stdin)");
423                         } else {
424                                 FILE *file = fopen(argv[optind], "rb");
425                                 if (file == NULL) {
426                                         fprintf(stderr, "%s: %s: %s\n",
427                                                         argv0, argv[optind],
428                                                         strerror(errno));
429                                         exit(EXIT_FAILURE);
430                                 }
431
432                                 uncompress(&strm, file, argv[optind]);
433                                 fclose(file);
434                         }
435                 } while (++optind < argc);
436         }
437
438 #ifndef NDEBUG
439         // Free the memory only when debugging. Freeing wastes some time,
440         // but allows detecting possible memory leaks with Valgrind.
441         lzma_end(&strm);
442 #endif
443
444         my_exit();
445 }