]> icculus.org git repositories - icculus/xz.git/blob - src/lzma/list.c
Replaced the range decoder optimization that used arithmetic
[icculus/xz.git] / src / lzma / list.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       list.c
4 /// \brief      Listing information about .lzma files
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This program is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This program is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "private.h"
21
22
23 /*
24
25 1. Check the file type: native, alone, unknown
26
27 Alone:
28 1. Show info about header. Don't look for concatenated parts.
29
30 Native:
31 1. Check that Stream Header is valid.
32 2. Seek to the end of the file.
33 3. Skip padding.
34 4. Reverse decode Stream Footer.
35 5. Seek Backward Size bytes.
36 6.
37
38 */
39
40
41 static void
42 unsupported_file(file_handle *handle)
43 {
44         errmsg(V_ERROR, "%s: Unsupported file type", handle->name);
45         set_exit_status(ERROR);
46         (void)io_close(handle);
47         return;
48 }
49
50
51 /// Primitive escaping function, that escapes only ASCII control characters.
52 static void
53 print_escaped(const uint8_t *str)
54 {
55         while (*str != '\0') {
56                 if (*str <= 0x1F || *str == 0x7F)
57                         printf("\\x%02X", *str);
58                 else
59                         putchar(*str);
60
61                 ++str;
62         }
63
64         return;
65 }
66
67
68 static void
69 list_native(file_handle *handle)
70 {
71         lzma_stream strm = LZMA_STREAM_INIT;
72         lzma_stream_flags flags;
73         lzma_ret ret = lzma_stream_header_decoder(&strm, &flags);
74
75 }
76
77
78 static void
79 list_alone(const listing_handle *handle)
80 {
81         if (handle->buffer[0] > (4 * 5 + 4) * 9 + 8) {
82                 unsupported_file(handle);
83                 return;
84         }
85
86         const unsigned int pb = handle->buffer[0] / (9 * 5);
87         handle->buffer[0] -= pb * 9 * 5;
88         const unsigned int lp = handle->buffer[0] / 9;
89         const unsigned int lc = handle->buffer[0] - lp * 9;
90
91         uint32_t dict = 0;
92         for (size_t i = 1; i < 5; ++i) {
93                 dict <<= 8;
94                 dict |= header[i];
95         }
96
97         if (dict > LZMA_DICTIONARY_SIZE_MAX) {
98                 unsupported_file(handle);
99                 return;
100         }
101
102         uint64_t uncompressed_size = 0;
103         for (size_t i = 5; i < 13; ++i) {
104                 uncompressed_size <<= 8;
105                 uncompressed_size |= header[i];
106         }
107
108         // Reject files with uncompressed size of 256 GiB or more. It's
109         // an arbitrary limit trying to avoid at least some false positives.
110         if (uncompressed_size != UINT64_MAX
111                         && uncompressed_size >= (UINT64_C(1) << 38)) {
112                 unsupported_file(handle);
113                 return;
114         }
115
116         if (verbosity < V_WARNING) {
117                 printf("name=");
118                 print_escaped(handle->name);
119                 printf("\nformat=alone\n");
120
121                 if (uncompressed_size == UINT64_MAX)
122                         printf("uncompressed_size=unknown\n");
123                 else
124                         printf("uncompressed_size=%" PRIu64 "\n",
125                                         uncompressed_size);
126
127                 printf("dict=%" PRIu32 "\n", dict);
128
129                 printf("lc=%u\nlp=%u\npb=%u\n\n", lc, lp, pb);
130
131         } else {
132                 printf("File name:                   ");
133                 print_escaped(handle->name);
134                 printf("\nFile format:                 LZMA_Alone\n")
135
136                 printf("Uncompressed size:           ");
137                 if (uncompressed_size == UINT64_MAX)
138                         printf("unknown\n");
139                 else
140                         printf("%," PRIu64 " bytes (%" PRIu64 " MiB)\n",
141                                         uncompressed_size,
142                                         (uncompressed_size + 1024 * 512)
143                                                 / (1024 * 1024));
144
145                 printf("Dictionary size:             %," PRIu32 " bytes "
146                                 "(%" PRIu32 " MiB)\n",
147                                 dict, (dict + 1024 * 512) / (1024 * 1024));
148
149                 printf("Literal context bits (lc):   %u\n", lc);
150                 printf("Literal position bits (lc):  %u\n", lp);
151                 printf("Position bits (pb):          %u\n", pb);
152         }
153
154         return;
155 }
156
157
158
159
160 typedef struct {
161         const char *filename;
162         struct stat st;
163         int fd;
164
165         lzma_stream strm;
166         lzma_stream_flags stream_flags;
167         lzma_info *info;
168
169         lzma_vli backward_size;
170         lzma_vli uncompressed_size;
171
172         size_t buffer_size;
173         uint8_t buffer[IO_BUFFER_SIZE];
174 } listing_handle;
175
176
177 static bool
178 listing_pread(listing_handle *handle, uint64_t offset)
179 {
180         if (offset >= (uint64_t)(handle->st.st_size)) {
181                 errmsg(V_ERROR, "%s: Trying to read past the end of "
182                                 "the file.", handle->filename);
183                 return true;
184         }
185
186 #ifdef HAVE_PREAD
187         const ssize_t ret = pread(handle->fd, handle->buffer, IO_BUFFER_SIZE,
188                         (off_t)(offset));
189 #else
190         // Use lseek() + read() since we don't have pread(). We don't care
191         // to which offset the reading position is left.
192         if (lseek(handle->fd, (off_t)(offset), SEEK_SET) == -1) {
193                 errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno));
194                 return true;
195         }
196
197         const ssize_t ret = read(handle->fd, handle->buffer, IO_BUFFER_SIZE);
198 #endif
199
200         if (ret == -1) {
201                 errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno));
202                 return true;
203         }
204
205         if (ret == 0) {
206                 errmsg(V_ERROR, "%s: Trying to read past the end of "
207                                 "the file.", handle->filename);
208                 return true;
209         }
210
211         handle->buffer_size = (size_t)(ret);
212         return false;
213 }
214
215
216
217 static bool
218 parse_stream_header(listing_handle *handle)
219 {
220         if (listing_pread(handle, 0))
221                 return true;
222
223         // TODO Got enough input?
224
225         lzma_ret ret = lzma_stream_header_decoder(
226                         &handle->strm, &handle->stream_flags);
227         if (ret != LZMA_OK) {
228                 errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
229                 return true;
230         }
231
232         handle->strm.next_in = handle->buffer;
233         handle->strm.avail_in = handle->buffer_size;
234         ret = lzma_code(&handle->strm, LZMA_RUN);
235         if (ret != LZMA_STREAM_END) {
236                 assert(ret != LZMA_OK);
237                 errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
238                 return true;
239         }
240
241         return false;
242 }
243
244
245 static bool
246 parse_stream_tail(listing_handle *handle)
247 {
248         uint64_t offset = (uint64_t)(handle->st.st_size);
249
250         // Skip padding
251         do {
252                 if (offset == 0) {
253                         errmsg(V_ERROR, "%s: %s", handle->name,
254                                         str_strm_error(LZMA_DATA_ERROR));
255                         return true;
256                 }
257
258                 if (offset < IO_BUFFER_SIZE)
259                         offset = 0;
260                 else
261                         offset -= IO_BUFFER_SIZE;
262
263                 if (listing_pread(handle, offset))
264                         return true;
265
266                 while (handle->buffer_size > 0
267                                 && handle->buffer[handle->buffer_size - 1]
268                                         == '\0')
269                         --handle->buffer_size;
270
271         } while (handle->buffer_size == 0);
272
273         if (handle->buffer_size < LZMA_STREAM_TAIL_SIZE) {
274                 // TODO
275         }
276
277         lzma_stream_flags stream_flags;
278         lzma_ret ret = lzma_stream_tail_decoder(&handle->strm, &stream_flags);
279         if (ret != LZMA_OK) {
280                 errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
281                 return true;
282         }
283
284         handle->strm.next_in = handle->buffer + handle->buffer_size
285                         - LZMA_STREAM_TAIL_SIZE;
286         handle->strm.avail_in = LZMA_STREAM_TAIL_SIZE;
287         handle->buffer_size -= LZMA_STREAM_TAIL_SIZE;
288         ret = lzma_code(&handle->strm, LZMA_RUN);
289         if (ret != LZMA_OK) {
290                 assert(ret != LZMA_OK);
291                 errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
292                 return true;
293         }
294
295         if (!lzma_stream_flags_is_equal(handle->stream_flags, stream_flags)) {
296                 // TODO
297                 // Possibly corrupt, possibly concatenated file.
298         }
299
300         handle->backward_size = 0;
301         ret = lzma_vli_reverse_decode(&handle->backward_size, handle->buffer,
302                         &handle->buffer_size);
303         if (ret != LZMA_OK) {
304                 // It may be LZMA_BUF_ERROR too, but it doesn't make sense
305                 // as an error message displayed to the user.
306                 errmsg(V_ERROR, "%s: %s", handle->name,
307                                 str_strm_error(LZMA_DATA_ERROR));
308                 return true;
309         }
310
311         if (!stream_flags.is_multi) {
312                 handle->uncompressed_size = 0;
313                 size_t tmp = handle->buffer_size;
314                 ret = lzma_vli_reverse_decode(&handle->uncompressed_size,
315                                 handle->buffer, &tmp);
316                 if (ret != LZMA_OK)
317                         handle->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
318         }
319
320         // Calculate the Header Metadata Block start offset.
321
322
323         return false;
324 }
325
326
327
328 static void
329 list_native(listing_handle *handle)
330 {
331         lzma_memory_limitter *limitter
332                         = lzma_memory_limitter_create(opt_memory);
333         if (limitter == NULL) {
334                 errmsg(V_ERROR,
335         }
336         lzma_info *info =
337
338
339         // Parse Stream Header
340         //
341         // Single-Block Stream:
342         //  - Parse Block Header
343         //  - Parse Stream Footer
344         //  - If Backward Size doesn't match, error out
345         //
346         // Multi-Block Stream:
347         //  - Parse Header Metadata Block, if any
348         //  - Parse Footer Metadata Block
349         //  - Parse Stream Footer
350         //  - If Footer Metadata Block doesn't match the Stream, error out
351         //
352         // In other words, we don't support concatened files.
353         if (parse_stream_header(handle))
354                 return;
355
356         if (parse_block_header(handle))
357                 return;
358
359         if (handle->stream_flags.is_multi) {
360                 if (handle->block_options.is_metadata) {
361                         if (parse_metadata(handle)
362                                 return;
363                 }
364
365                 if (my_seek(handle,
366
367         } else {
368                 if (handle->block_options.is_metadata) {
369                         FILE_IS_CORRUPT();
370                         return;
371                 }
372
373                 if (parse_stream_footer(handle))
374                         return;
375
376                 // If Uncompressed Size isn't present in Block Header,
377                 // it must be present in Stream Footer.
378                 if (handle->block_options.uncompressed_size
379                                         == LZMA_VLI_VALUE_UNKNOWN
380                                 && handle->stream_flags.uncompressed_size
381                                         == LZMA_VLI_VALUE_UNKNOWN) {
382                         FILE_IS_CORRUPT();
383                         return;
384                 }
385
386                 // Construct a single-Record Index.
387                 lzma_index *index = malloc(sizeof(lzma_index));
388                 if (index == NULL) {
389                         out_of_memory();
390                         return;
391                 }
392
393                 // Pohdintaa:
394                 // Jos Block coder hoitaisi Uncompressed ja Backward Sizet,
395                 // voisi index->total_sizeksi laittaa suoraan Backward Sizen.
396                 index->total_size =
397
398                 if () {
399
400                 }
401         }
402
403
404         if (handle->block_options.is_metadata) {
405                 if (!handle->stream_flags.is_multi) {
406                         FILE_IS_CORRUPT();
407                         return;
408                 }
409
410                 if (parse_metadata(handle))
411                         return;
412
413         }
414 }
415
416
417
418 extern void
419 list(const char *filename)
420 {
421         if (strcmp(filename, "-") == 0) {
422                 errmsg(V_ERROR, "%s: --list does not support reading from "
423                                 "standard input", filename);
424                 return;
425         }
426
427         if (is_empty_filename(filename))
428                 return;
429
430         listing_handle handle;
431         handle.filename = filename;
432
433         handle.fd = open(filename, O_RDONLY | O_NOCTTY);
434         if (handle.fd == -1) {
435                 errmsg(V_ERROR, "%s: %s", filename, strerror(errno));
436                 return;
437         }
438
439         if (fstat(handle.fd, &handle.st)) {
440                 errmsg(V_ERROR, "%s: %s", filename, strerror(errno));
441                 goto out;
442         }
443
444         if (!S_ISREG(handle.st.st_mode)) {
445                 errmsg(V_WARNING, _("%s: Not a regular file, skipping"),
446                                 filename);
447                 goto out;
448         }
449
450         if (handle.st.st_size <= 0) {
451                 errmsg(V_ERROR, _("%s: File is empty"), filename);
452                 goto out;
453         }
454
455         if (listing_pread(&handle, 0))
456                 goto out;
457
458         if (handle.buffer[0] == 0xFF) {
459                 if (opt_header == HEADER_ALONE) {
460                         errmsg(V_ERROR, "%s: FIXME", filename); // FIXME
461                         goto out;
462                 }
463
464                 list_native(&handle);
465         } else {
466                 if (opt_header != HEADER_AUTO && opt_header != HEADER_ALONE) {
467                         errmsg(V_ERROR, "%s: FIXME", filename); // FIXME
468                         goto out;
469                 }
470
471                 list_alone(&handle);
472         }
473
474 out:
475         (void)close(fd);
476         return;
477 }