1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Listing information about .xz files
6 // Author: Lasse Collin
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
14 #include "tuklib_integer.h"
17 /// Totals that are displayed if there was more than one file.
18 /// The "files" counter is also used in print_info_adv() to show
24 uint64_t compressed_size;
25 uint64_t uncompressed_size;
27 } totals = { 0, 0, 0, 0, 0, 0 };
30 /// \brief Parse the Index(es) from the given .xz file
32 /// \param idx If decoding is successful, *idx will be set to point
33 /// to lzma_index containing the decoded information.
34 /// On error, *idx is not modified.
35 /// \param pair Input file
37 /// \return On success, false is returned. On error, true is returned.
39 // TODO: This function is pretty big. liblzma should have a function that
40 // takes a callback function to parse the Index(es) from a .xz file to make
41 // it easy for applications.
43 parse_indexes(lzma_index **idx, file_pair *pair)
45 if (pair->src_st.st_size <= 0) {
46 message_error(_("%s: File is empty"), pair->src_name);
50 if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
51 message_error(_("%s: Too small to be a valid .xz file"),
57 lzma_stream_flags header_flags;
58 lzma_stream_flags footer_flags;
61 // lzma_stream for the Index decoder
62 lzma_stream strm = LZMA_STREAM_INIT;
64 // All Indexes decoded so far
65 lzma_index *combined_index = NULL;
67 // The Index currently being decoded
68 lzma_index *this_index = NULL;
70 // Current position in the file. We parse the file backwards so
71 // initialize it to point to the end of the file.
72 off_t pos = pair->src_st.st_size;
74 // Each loop iteration decodes one Index.
76 // Check that there is enough data left to contain at least
77 // the Stream Header and Stream Footer. This check cannot
78 // fail in the first pass of this loop.
79 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
80 message_error("%s: %s", pair->src_name,
81 message_strm(LZMA_DATA_ERROR));
85 pos -= LZMA_STREAM_HEADER_SIZE;
86 lzma_vli stream_padding = 0;
88 // Locate the Stream Footer. There may be Stream Padding which
89 // we must skip when reading backwards.
91 if (pos < LZMA_STREAM_HEADER_SIZE) {
92 message_error("%s: %s", pair->src_name,
98 if (io_pread(pair, &buf,
99 LZMA_STREAM_HEADER_SIZE, pos))
102 // Stream Padding is always a multiple of four bytes.
107 // To avoid calling io_pread() for every four bytes
108 // of Stream Padding, take advantage that we read
109 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
110 // check them too before calling io_pread() again.
115 } while (i >= 0 && buf.u32[i] == 0);
118 // Decode the Stream Footer.
119 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
120 if (ret != LZMA_OK) {
121 message_error("%s: %s", pair->src_name,
126 // Check that the size of the Index field looks sane.
127 lzma_vli index_size = footer_flags.backward_size;
128 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
129 message_error("%s: %s", pair->src_name,
130 message_strm(LZMA_DATA_ERROR));
134 // Set pos to the beginning of the Index.
137 // See how much memory we can use for decoding this Index.
138 uint64_t memlimit = hardware_memlimit_get();
139 uint64_t memused = 0;
140 if (combined_index != NULL) {
141 memused = lzma_index_memused(combined_index);
142 if (memused > memlimit)
149 ret = lzma_index_decoder(&strm, &this_index, memlimit);
150 if (ret != LZMA_OK) {
151 message_error("%s: %s", pair->src_name,
157 // Don't give the decoder more input than the
159 strm.avail_in = MIN(IO_BUFFER_SIZE, index_size);
160 if (io_pread(pair, &buf, strm.avail_in, pos))
163 pos += strm.avail_in;
164 index_size -= strm.avail_in;
166 strm.next_in = buf.u8;
167 ret = lzma_code(&strm, LZMA_RUN);
169 } while (ret == LZMA_OK);
171 // If the decoding seems to be successful, check also that
172 // the Index decoder consumed as much input as indicated
173 // by the Backward Size field.
174 if (ret == LZMA_STREAM_END)
175 if (index_size != 0 || strm.avail_in != 0)
176 ret = LZMA_DATA_ERROR;
178 if (ret != LZMA_STREAM_END) {
179 // LZMA_BUFFER_ERROR means that the Index decoder
180 // would have liked more input than what the Index
181 // size should be according to Stream Footer.
182 // The message for LZMA_DATA_ERROR makes more
183 // sense in that case.
184 if (ret == LZMA_BUF_ERROR)
185 ret = LZMA_DATA_ERROR;
187 message_error("%s: %s", pair->src_name,
190 // If the error was too low memory usage limit,
191 // show also how much memory would have been needed.
192 if (ret == LZMA_MEMLIMIT_ERROR) {
193 uint64_t needed = lzma_memusage(&strm);
194 if (UINT64_MAX - needed < memused)
199 message_mem_needed(V_ERROR, needed);
205 // Decode the Stream Header and check that its Stream Flags
206 // match the Stream Footer.
207 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
208 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
209 message_error("%s: %s", pair->src_name,
210 message_strm(LZMA_DATA_ERROR));
214 pos -= lzma_index_total_size(this_index);
215 if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos))
218 ret = lzma_stream_header_decode(&header_flags, buf.u8);
219 if (ret != LZMA_OK) {
220 message_error("%s: %s", pair->src_name,
225 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
226 if (ret != LZMA_OK) {
227 message_error("%s: %s", pair->src_name,
232 // Store the decoded Stream Flags into this_index. This is
233 // needed so that we can print which Check is used in each
235 ret = lzma_index_stream_flags(this_index, &footer_flags);
239 // Store also the size of the Stream Padding field. It is
240 // needed to show the offsets of the Streams correctly.
241 ret = lzma_index_stream_padding(this_index, stream_padding);
245 if (combined_index != NULL) {
246 // Append the earlier decoded Indexes
248 ret = lzma_index_cat(
249 this_index, combined_index, NULL);
250 if (ret != LZMA_OK) {
251 message_error("%s: %s", pair->src_name,
257 combined_index = this_index;
264 // All OK. Make combined_index available to the caller.
265 *idx = combined_index;
269 // Something went wrong, free the allocated memory.
271 lzma_index_end(combined_index, NULL);
272 lzma_index_end(this_index, NULL);
277 /// \brief Get the compression ratio
279 /// This has slightly different format than that is used by in message.c.
281 get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
283 if (uncompressed_size == 0)
286 const double ratio = (double)(compressed_size)
287 / (double)(uncompressed_size);
292 snprintf(buf, sizeof(buf), "%.3f", ratio);
297 static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
317 /// \brief Get a comma-separated list of Check names
319 /// \param checks Bit mask of Checks to print
320 /// \param space_after_comma
321 /// It's better to not use spaces in table-like listings,
322 /// but in more verbose formats a space after a comma
323 /// is good for readability.
325 get_check_names(uint32_t checks, bool space_after_comma)
329 static char buf[sizeof(check_names)];
331 size_t left = sizeof(buf);
333 const char *sep = space_after_comma ? ", " : ",";
336 for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
337 if (checks & (UINT32_C(1) << i)) {
338 my_snprintf(&pos, &left, "%s%s",
339 comma ? sep : "", check_names[i]);
348 /// \brief Read the Check value from the .xz file and print it
350 /// Since this requires a seek, listing all Check values for all Blocks can
353 /// \param pair Input file
354 /// \param iter Location of the Block whose Check value should
357 /// \return False on success, true on I/O error.
359 print_check_value(file_pair *pair, const lzma_index_iter *iter)
361 // Don't read anything from the file if there is no integrity Check.
362 if (iter->stream.flags->check == LZMA_CHECK_NONE) {
367 // Locate and read the Check field.
368 const uint32_t size = lzma_check_size(iter->stream.flags->check);
369 const off_t offset = iter->block.compressed_file_offset
370 + iter->block.total_size - size;
372 if (io_pread(pair, &buf, size, offset))
375 // CRC32 and CRC64 are in little endian. Guess that all the future
376 // 32-bit and 64-bit Check values are little endian too. It shouldn't
377 // be a too big problem if this guess is wrong.
379 printf("%08" PRIx32, conv32le(buf.u32[0]));
380 } else if (size == 8) {
381 printf("%016" PRIx64, conv64le(buf.u64[0]));
383 for (size_t i = 0; i < size; ++i)
384 printf("%02x", buf.u8[i]);
392 print_info_basic(const lzma_index *idx, file_pair *pair)
394 static bool headings_displayed = false;
395 if (!headings_displayed) {
396 headings_displayed = true;
397 // TRANSLATORS: These are column titles. From Strms (Streams)
398 // to Ratio, the columns are right aligned. Check and Filename
399 // are left aligned. If you need longer words, it's OK to
400 // use two lines here. Test with xz --list.
401 puts(_("Strms Blocks Compressed Uncompressed Ratio "
405 printf("%5s %7s %11s %11s %5s %-7s %s\n",
406 uint64_to_str(lzma_index_stream_count(idx), 0),
407 uint64_to_str(lzma_index_block_count(idx), 1),
408 uint64_to_nicestr(lzma_index_file_size(idx),
409 NICESTR_B, NICESTR_TIB, false, 2),
410 uint64_to_nicestr(lzma_index_uncompressed_size(idx),
411 NICESTR_B, NICESTR_TIB, false, 3),
412 get_ratio(lzma_index_file_size(idx),
413 lzma_index_uncompressed_size(idx)),
414 get_check_names(lzma_index_checks(idx), false),
422 print_adv_helper(uint64_t stream_count, uint64_t block_count,
423 uint64_t compressed_size, uint64_t uncompressed_size,
426 printf(_(" Stream count: %s\n"),
427 uint64_to_str(stream_count, 0));
428 printf(_(" Block count: %s\n"),
429 uint64_to_str(block_count, 0));
430 printf(_(" Compressed size: %s\n"),
431 uint64_to_nicestr(compressed_size,
432 NICESTR_B, NICESTR_TIB, true, 0));
433 printf(_(" Uncompressed size: %s\n"),
434 uint64_to_nicestr(uncompressed_size,
435 NICESTR_B, NICESTR_TIB, true, 0));
436 printf(_(" Ratio: %s\n"),
437 get_ratio(compressed_size, uncompressed_size));
438 printf(_(" Check: %s\n"),
439 get_check_names(checks, true));
445 print_info_adv(const lzma_index *idx, file_pair *pair)
447 // Print the overall information.
448 print_adv_helper(lzma_index_stream_count(idx),
449 lzma_index_block_count(idx),
450 lzma_index_file_size(idx),
451 lzma_index_uncompressed_size(idx),
452 lzma_index_checks(idx));
454 // TODO: The rest of this function needs some work. Currently
455 // the offsets are not printed, which could be useful even when
456 // printed in a less accurate format. On the other hand, maybe
457 // this should print the information with exact byte values,
458 // or maybe there should be at least an option to do that.
460 // We could also display some other info. E.g. it could be useful
461 // to quickly see how big is the biggest Block (uncompressed size)
462 // and if all Blocks have Compressed Size and Uncompressed Size
463 // fields present, which can be used e.g. for multithreaded
466 // Avoid printing Stream and Block lists when they wouldn't be useful.
467 bool show_blocks = false;
468 if (lzma_index_stream_count(idx) > 1) {
469 puts(_(" Streams:"));
470 puts(_(" Number Blocks Compressed "
471 "Uncompressed Ratio Check"));
473 lzma_index_iter iter;
474 lzma_index_iter_init(&iter, idx);
475 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
476 if (iter.stream.block_count > 1)
479 printf(" %8s %10s %11s %11s %5s %s\n",
480 uint64_to_str(iter.stream.number, 0),
481 uint64_to_str(iter.stream.block_count, 1),
483 iter.stream.compressed_size,
484 NICESTR_B, NICESTR_TIB, false, 2),
486 iter.stream.uncompressed_size,
487 NICESTR_B, NICESTR_TIB, false, 3),
488 get_ratio(iter.stream.compressed_size,
489 iter.stream.uncompressed_size),
490 check_names[iter.stream.flags->check]);
494 if (show_blocks || lzma_index_block_count(idx)
495 > lzma_index_stream_count(idx)
496 || message_verbosity_get() >= V_DEBUG) {
498 // FIXME: Number in Stream/file, which one is better?
499 puts(_(" Stream Number Compressed "
500 "Uncompressed Ratio Check"));
502 lzma_index_iter iter;
503 lzma_index_iter_init(&iter, idx);
504 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
505 printf(" %8s %10s %11s %11s %5s %-7s",
506 uint64_to_str(iter.stream.number, 0),
507 uint64_to_str(iter.block.number_in_stream, 1),
508 uint64_to_nicestr(iter.block.total_size,
509 NICESTR_B, NICESTR_TIB, false, 2),
511 iter.block.uncompressed_size,
512 NICESTR_B, NICESTR_TIB, false, 3),
513 get_ratio(iter.block.total_size,
514 iter.block.uncompressed_size),
515 check_names[iter.stream.flags->check]);
517 if (message_verbosity_get() >= V_DEBUG)
518 if (print_check_value(pair, &iter))
528 print_info_robot(const lzma_index *idx, file_pair *pair)
530 printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
532 lzma_index_stream_count(idx),
533 lzma_index_block_count(idx),
534 lzma_index_file_size(idx),
535 lzma_index_uncompressed_size(idx),
536 get_ratio(lzma_index_file_size(idx),
537 lzma_index_uncompressed_size(idx)),
538 get_check_names(lzma_index_checks(idx), false),
541 if (message_verbosity_get() >= V_VERBOSE) {
542 lzma_index_iter iter;
543 lzma_index_iter_init(&iter, idx);
545 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM))
546 printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
547 "\t%" PRIu64 "\t%" PRIu64
548 "\t%s\t%" PRIu64 "\t%s\n",
550 iter.stream.compressed_offset,
551 iter.stream.uncompressed_offset,
552 iter.stream.compressed_size,
553 iter.stream.uncompressed_size,
554 get_ratio(iter.stream.compressed_size,
555 iter.stream.uncompressed_size),
557 check_names[iter.stream.flags->check]);
559 lzma_index_iter_rewind(&iter);
560 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
561 printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
562 "\t%" PRIu64 "\t%" PRIu64
563 "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s",
565 iter.block.number_in_stream,
566 iter.block.number_in_file,
567 iter.block.compressed_file_offset,
568 iter.block.uncompressed_file_offset,
569 iter.block.total_size,
570 iter.block.uncompressed_size,
571 get_ratio(iter.block.total_size,
572 iter.block.uncompressed_size),
573 check_names[iter.stream.flags->check]);
575 if (message_verbosity_get() >= V_DEBUG) {
577 if (print_check_value(pair, &iter))
590 update_totals(const lzma_index *idx)
592 // TODO: Integer overflow checks
594 totals.streams += lzma_index_stream_count(idx);
595 totals.blocks += lzma_index_block_count(idx);
596 totals.compressed_size += lzma_index_file_size(idx);
597 totals.uncompressed_size += lzma_index_uncompressed_size(idx);
598 totals.checks |= lzma_index_checks(idx);
604 print_totals_basic(void)
606 // Print a separator line.
608 memset(line, '-', sizeof(line));
609 line[sizeof(line) - 1] = '\0';
612 // Print the totals except the file count, which needs
614 printf("%5s %7s %11s %11s %5s %-7s ",
615 uint64_to_str(totals.streams, 0),
616 uint64_to_str(totals.blocks, 1),
617 uint64_to_nicestr(totals.compressed_size,
618 NICESTR_B, NICESTR_TIB, false, 2),
619 uint64_to_nicestr(totals.uncompressed_size,
620 NICESTR_B, NICESTR_TIB, false, 3),
621 get_ratio(totals.compressed_size,
622 totals.uncompressed_size),
623 get_check_names(totals.checks, false));
625 // Since we print totals only when there are at least two files,
626 // the English message will always use "%s files". But some other
627 // languages need different forms for different plurals so we
628 // have to translate this string still.
630 // TRANSLATORS: This simply indicates the number of files shown
631 // by --list even though the format string uses %s.
632 printf(N_("%s file", "%s files\n",
633 totals.files <= ULONG_MAX ? totals.files
634 : (totals.files % 1000000) + 1000000),
635 uint64_to_str(totals.files, 0));
642 print_totals_adv(void)
646 printf(_(" Number of files: %s\n"),
647 uint64_to_str(totals.files, 0));
648 print_adv_helper(totals.streams, totals.blocks,
649 totals.compressed_size, totals.uncompressed_size,
657 print_totals_robot(void)
659 printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
660 "\t%s\t%s\t%" PRIu64 "\n",
663 totals.compressed_size,
664 totals.uncompressed_size,
665 get_ratio(totals.compressed_size,
666 totals.uncompressed_size),
667 get_check_names(totals.checks, false),
678 // Always print totals in --robot mode. It can be convenient
679 // in some cases and doesn't complicate usage of the
680 // single-file case much.
681 print_totals_robot();
683 } else if (totals.files > 1) {
684 // For non-robot mode, totals are printed only if there
685 // is more than one file.
686 if (message_verbosity_get() <= V_WARNING)
687 print_totals_basic();
697 list_file(const char *filename)
699 if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO)
700 message_fatal(_("--list works only on .xz files "
701 "(--format=xz or --format=auto)"));
703 message_filename(filename);
705 if (filename == stdin_filename) {
706 message_error(_("--list does not support reading from "
711 // Unset opt_stdout so that io_open_src() won't accept special files.
712 // Set opt_force so that io_open_src() will follow symlinks.
715 file_pair *pair = io_open_src(filename);
720 if (!parse_indexes(&idx, pair)) {
721 // Update the totals that are displayed after all
722 // the individual files have been listed.
725 // We have three main modes:
726 // - --robot, which has submodes if --verbose is specified
728 // - Normal --list without --verbose
729 // - --list with one or two --verbose
731 print_info_robot(idx, pair);
732 else if (message_verbosity_get() <= V_WARNING)
733 print_info_basic(idx, pair);
735 print_info_adv(idx, pair);
737 lzma_index_end(idx, NULL);
740 io_close(pair, false);