1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file metadata_decoder.c
4 /// \brief Decodes metadata stored in Metadata Blocks
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
20 #include "metadata_decoder.h"
21 #include "block_decoder.h"
24 /// Maximum size of a single Extra Record. Again, this is mostly to make
25 /// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
27 #define EXTRA_SIZE_MAX (SIZE_MAX / 4)
33 SEQ_HEADER_METADATA_SIZE,
35 SEQ_UNCOMPRESSED_SIZE,
39 SEQ_INDEX_UNCOMPRESSED_SIZE,
46 SEQ_EXTRA_DUMMY_ALLOC,
52 /// Number of "things" left to be parsed. If we hit end of input
53 /// when this isn't zero, we have corrupt Metadata Block.
56 /// Position in variable-length integers
59 /// Temporary variable needed to decode variables whose type
60 /// is size_t instead of lzma_vli.
63 /// Pointer to target structure to hold the parsed results.
64 lzma_metadata *metadata;
66 /// The Index Record we currently are parsing
67 lzma_index *index_current;
69 /// Number of Records in Index
72 /// Sum of Total Size fields in the Index
73 lzma_vli index_total_size;
75 /// Sum of Uncompressed Size fields in the Index
76 lzma_vli index_uncompressed_size;
78 /// True if Extra is present.
81 /// True if we have been requested to store the Extra to *metadata.
84 /// Pointer to the end of the Extra Record list.
85 lzma_extra *extra_tail;
87 /// Dummy Extra Record used when only verifying integrity of Extra
88 /// (not storing it to RAM).
89 lzma_extra extra_dummy;
92 lzma_next_coder block_decoder;
94 /// buffer[buffer_pos] is the next byte to process.
97 /// buffer[buffer_size] is the first byte to not process.
100 /// Temporary buffer to which encoded Metadata is read before
102 uint8_t buffer[LZMA_BUFFER_SIZE];
106 /// Reads a variable-length integer to coder->num.
107 #define read_vli(num) \
109 const lzma_ret ret = lzma_vli_decode( \
111 coder->buffer, &coder->buffer_pos, \
112 coder->buffer_size); \
113 if (ret != LZMA_STREAM_END) \
121 process(lzma_coder *coder, lzma_allocator *allocator)
123 while (coder->buffer_pos < coder->buffer_size)
124 switch (coder->sequence) {
126 // Reserved bits must be unset.
127 if (coder->buffer[coder->buffer_pos] & 0x70)
128 return LZMA_HEADER_ERROR;
130 // If Size of Header Metadata is present, prepare the
131 // variable for variable-length integer decoding. Otherwise
132 // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
133 // field isn't present.
134 if (coder->buffer[coder->buffer_pos] & 0x01) {
135 coder->metadata->header_metadata_size = 0;
139 if (coder->buffer[coder->buffer_pos] & 0x02) {
140 coder->metadata->total_size = 0;
144 if (coder->buffer[coder->buffer_pos] & 0x04) {
145 coder->metadata->uncompressed_size = 0;
149 if (coder->buffer[coder->buffer_pos] & 0x08) {
150 // Setting index_count to 1 is just to indicate that
151 // Index is present. The real size is parsed later.
152 coder->index_count = 1;
156 coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
160 coder->sequence = SEQ_HEADER_METADATA_SIZE;
163 case SEQ_HEADER_METADATA_SIZE:
164 if (coder->metadata->header_metadata_size
165 != LZMA_VLI_VALUE_UNKNOWN) {
166 read_vli(coder->metadata->header_metadata_size);
168 if (coder->metadata->header_metadata_size == 0)
169 return LZMA_DATA_ERROR;
174 coder->sequence = SEQ_TOTAL_SIZE;
178 if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
179 read_vli(coder->metadata->total_size);
181 if (coder->metadata->total_size == 0)
182 return LZMA_DATA_ERROR;
187 coder->sequence = SEQ_UNCOMPRESSED_SIZE;
190 case SEQ_UNCOMPRESSED_SIZE:
191 if (coder->metadata->uncompressed_size
192 != LZMA_VLI_VALUE_UNKNOWN) {
193 read_vli(coder->metadata->uncompressed_size);
197 coder->sequence = SEQ_INDEX_COUNT;
200 case SEQ_INDEX_COUNT:
201 if (coder->index_count == 0) {
202 coder->sequence = SEQ_EXTRA_PREPARE;
206 read_vli(coder->tmp);
208 // Index must not be empty nor far too big (wouldn't fit
210 if (coder->tmp == 0 || coder->tmp
211 >= SIZE_MAX / sizeof(lzma_index))
212 return LZMA_DATA_ERROR;
214 coder->index_count = (size_t)(coder->tmp);
217 coder->sequence = SEQ_INDEX_ALLOC;
220 case SEQ_INDEX_ALLOC: {
221 lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
223 return LZMA_MEM_ERROR;
226 i->uncompressed_size = 0;
229 if (coder->metadata->index == NULL)
230 coder->metadata->index = i;
232 coder->index_current->next = i;
234 coder->index_current = i;
236 coder->sequence = SEQ_INDEX_TOTAL_SIZE;
241 case SEQ_INDEX_TOTAL_SIZE: {
242 read_vli(coder->index_current->total_size);
244 coder->index_total_size += coder->index_current->total_size;
245 if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
246 return LZMA_DATA_ERROR;
248 // No Block can have Total Size of zero bytes.
249 if (coder->index_current->total_size == 0)
250 return LZMA_DATA_ERROR;
252 if (--coder->index_count == 0) {
253 // If Total Size is present, it must match the sum
254 // of Total Sizes in Index.
255 if (coder->metadata->total_size
256 != LZMA_VLI_VALUE_UNKNOWN
257 && coder->metadata->total_size
258 != coder->index_total_size)
259 return LZMA_DATA_ERROR;
261 coder->index_current = coder->metadata->index;
262 coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
264 coder->sequence = SEQ_INDEX_ALLOC;
270 case SEQ_INDEX_UNCOMPRESSED_SIZE: {
271 read_vli(coder->index_current->uncompressed_size);
273 coder->index_uncompressed_size
274 += coder->index_current->uncompressed_size;
275 if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
276 return LZMA_DATA_ERROR;
278 coder->index_current = coder->index_current->next;
279 if (coder->index_current == NULL) {
280 if (coder->metadata->uncompressed_size
281 != LZMA_VLI_VALUE_UNKNOWN
282 && coder->metadata->uncompressed_size
283 != coder->index_uncompressed_size)
284 return LZMA_DATA_ERROR;
287 coder->sequence = SEQ_EXTRA_PREPARE;
293 case SEQ_EXTRA_PREPARE:
294 assert(coder->todo_count == 0);
296 // If we get here, we have at least one byte of input left.
297 // If "Extra is present" flag is unset in Metadata Flags,
298 // it means that there is some garbage and we return an error.
299 if (!coder->has_extra)
300 return LZMA_DATA_ERROR;
302 if (!coder->want_extra) {
303 coder->extra_tail = &coder->extra_dummy;
304 coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
308 coder->sequence = SEQ_EXTRA_ALLOC;
312 case SEQ_EXTRA_ALLOC: {
313 lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
315 return LZMA_MEM_ERROR;
322 if (coder->metadata->extra == NULL)
323 coder->metadata->extra = e;
325 coder->extra_tail->next = e;
327 coder->extra_tail = e;
329 coder->todo_count = 1;
330 coder->sequence = SEQ_EXTRA_ID;
336 case SEQ_EXTRA_DUMMY_ID:
337 read_vli(coder->extra_tail->id);
339 if (coder->extra_tail->id == 0) {
340 coder->extra_tail->size = 0;
341 coder->extra_tail->data = NULL;
342 coder->todo_count = 0;
351 case SEQ_EXTRA_DUMMY_SIZE:
352 read_vli(coder->tmp);
354 if (coder->tmp == 0) {
355 // We have no Data in the Extra Record. Don't
356 // allocate any memory for it. Go back to
357 // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC.
359 coder->sequence -= 2;
360 coder->todo_count = 0;
367 case SEQ_EXTRA_DATA_ALLOC: {
368 if (coder->tmp > EXTRA_SIZE_MAX)
369 return LZMA_DATA_ERROR;
371 coder->extra_tail->size = (size_t)(coder->tmp);
374 // We reserve space for the trailing '\0' too.
375 uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1,
378 return LZMA_MEM_ERROR;
380 coder->extra_tail->data = d;
381 coder->sequence = SEQ_EXTRA_DATA_COPY;
386 case SEQ_EXTRA_DATA_COPY:
387 bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
388 coder->extra_tail->data, &coder->pos,
389 (size_t)(coder->extra_tail->size));
391 if ((size_t)(coder->extra_tail->size) == coder->pos) {
392 coder->extra_tail->data[coder->pos] = '\0';
394 coder->todo_count = 0;
395 coder->sequence = SEQ_EXTRA_ALLOC;
400 case SEQ_EXTRA_DUMMY_ALLOC:
401 // Not really alloc, just initialize the dummy entry.
402 coder->extra_dummy = (lzma_extra){
409 coder->todo_count = 1;
410 coder->sequence = SEQ_EXTRA_DUMMY_ID;
413 case SEQ_EXTRA_DUMMY_COPY: {
414 // Simply skip as many bytes as indicated by Extra Record Size.
415 // We don't check lzma_extra_size_max because we don't
416 // allocate any memory to hold the data.
417 const size_t in_avail = coder->buffer_size - coder->buffer_pos;
418 const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
419 coder->buffer_pos += skip;
422 if (coder->tmp == 0) {
423 coder->todo_count = 0;
424 coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
431 return LZMA_PROG_ERROR;
439 metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
440 const uint8_t *restrict in, size_t *restrict in_pos,
441 size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
442 size_t *restrict out_pos lzma_attribute((unused)),
443 size_t out_size lzma_attribute((unused)),
444 lzma_action action lzma_attribute((unused)))
446 bool end_was_reached = false;
449 // Fill the buffer if it is empty.
450 if (coder->buffer_pos == coder->buffer_size) {
451 coder->buffer_pos = 0;
452 coder->buffer_size = 0;
454 const lzma_ret ret = coder->block_decoder.code(
455 coder->block_decoder.coder, allocator,
456 in, in_pos, in_size, coder->buffer,
457 &coder->buffer_size, LZMA_BUFFER_SIZE,
462 // Return immediatelly if we got no new data.
463 if (coder->buffer_size == 0)
468 case LZMA_STREAM_END:
469 end_was_reached = true;
477 // Process coder->buffer.
478 const lzma_ret ret = process(coder, allocator);
482 // On success, process() eats all the input.
483 assert(coder->buffer_pos == coder->buffer_size);
485 if (end_was_reached) {
486 // Check that the sequence is not in the
487 // middle of anything.
488 if (coder->todo_count != 0)
489 return LZMA_DATA_ERROR;
491 // If Size of Header Metadata Block was not
492 // present, we use zero as its size instead
493 // of LZMA_VLI_VALUE_UNKNOWN.
494 if (coder->metadata->header_metadata_size
495 == LZMA_VLI_VALUE_UNKNOWN)
496 coder->metadata->header_metadata_size = 0;
498 return LZMA_STREAM_END;
505 metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
507 lzma_next_coder_end(&coder->block_decoder, allocator);
508 lzma_free(coder, allocator);
514 metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
515 lzma_options_block *options, lzma_metadata *metadata,
518 if (options == NULL || metadata == NULL)
519 return LZMA_PROG_ERROR;
521 if (next->coder == NULL) {
522 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
523 if (next->coder == NULL)
524 return LZMA_MEM_ERROR;
526 next->code = &metadata_decode;
527 next->end = &metadata_decoder_end;
528 next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
531 metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
532 metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
533 metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
534 metadata->index = NULL;
535 metadata->extra = NULL;
537 next->coder->sequence = SEQ_FLAGS;
538 next->coder->todo_count = 0;
539 next->coder->pos = 0;
540 next->coder->tmp = 0;
541 next->coder->metadata = metadata;
542 next->coder->index_current = NULL;
543 next->coder->index_count = 0;
544 next->coder->index_total_size = 0;
545 next->coder->index_uncompressed_size = 0;
546 next->coder->want_extra = want_extra;
547 next->coder->extra_tail = NULL;
548 next->coder->buffer_pos = 0;
549 next->coder->buffer_size = 0;
551 return lzma_block_decoder_init(
552 &next->coder->block_decoder, allocator, options);
557 lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
558 lzma_options_block *options, lzma_metadata *metadata,
561 lzma_next_coder_init(metadata_decoder_init, next, allocator,
562 options, metadata, want_extra);
566 extern LZMA_API lzma_ret
567 lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
568 lzma_metadata *metadata, lzma_bool want_extra)
570 lzma_next_strm_init(strm, lzma_metadata_decoder_init,
571 options, metadata, want_extra);
573 strm->internal->supported_actions[LZMA_RUN] = true;