1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file subblock_decoder.c
4 /// \brief Decoder of the Subblock filter
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "raw_decoder.h"
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
49 /// Number of bytes left in the current Subblock Data field.
52 /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown.
53 lzma_vli uncompressed_size;
55 /// Number of consecutive Subblocks with Subblock Type Padding
58 /// True when .next.code() has returned LZMA_STREAM_END.
61 /// True when the Subblock decoder has detected End of Payload Marker.
62 /// This may become true before next_finished becomes true.
65 /// True if Subfilters are allowed.
66 bool allow_subfilters;
68 /// Indicates if at least one byte of decoded output has been
69 /// produced after enabling Subfilter.
70 bool got_output_with_subfilter;
72 /// Possible subfilter
73 lzma_next_coder subfilter;
75 /// Filter Flags decoder is needed to parse the ID and Properties
77 lzma_next_coder filter_flags_decoder;
79 /// The filter_flags_decoder stores its results here.
80 lzma_options_filter filter_flags;
82 /// Options for the Subblock decoder helper. This is used to tell
83 /// the helper when it should return LZMA_STREAM_END to the subfilter.
84 lzma_options_subblock_helper helper;
87 /// How many times buffer should be repeated
90 /// Size of the buffer
93 /// Position in the buffer
96 /// Buffer to hold the data to be repeated
97 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
100 /// Temporary buffer needed when the Subblock filter is not the last
101 /// filter in the chain. The output of the next filter is first
102 /// decoded into buffer[], which is then used as input for the actual
103 /// Subblock decoder.
107 uint8_t buffer[LZMA_BUFFER_SIZE];
112 /// Values of valid Subblock Flags
123 /// Substracts size from coder->uncompressed_size uncompressed size is known
124 /// and size isn't bigger than coder->uncompressed_size.
126 update_uncompressed_size(lzma_coder *coder, size_t size)
128 if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
129 if ((lzma_vli)(size) > coder->uncompressed_size)
132 coder->uncompressed_size -= size;
139 /// Calls the subfilter and updates coder->uncompressed_size.
141 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
142 const uint8_t *in, size_t *in_pos,
143 size_t in_size, uint8_t *restrict out,
144 size_t *restrict out_pos, size_t out_size, lzma_action action)
146 assert(coder->subfilter.code != NULL);
148 const size_t out_start = *out_pos;
150 // Call the subfilter.
151 const lzma_ret ret = coder->subfilter.code(
152 coder->subfilter.coder, allocator,
153 in, in_pos, in_size, out, out_pos, out_size, action);
155 // Update uncompressed_size.
156 if (update_uncompressed_size(coder, *out_pos - out_start))
157 return LZMA_DATA_ERROR;
164 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
165 const uint8_t *in, size_t *in_pos,
166 size_t in_size, uint8_t *restrict out,
167 size_t *restrict out_pos, size_t out_size, lzma_action action)
169 while (*out_pos < out_size && (*in_pos < in_size
170 || coder->sequence == SEQ_DATA))
171 switch (coder->sequence) {
173 if ((in[*in_pos] >> 4) != FLAG_PADDING)
176 // Do the correct action depending on the Subblock Type.
177 switch (in[*in_pos] >> 4) {
179 // Only check that reserved bits are zero.
180 if (++coder->padding > PADDING_MAX
181 || in[*in_pos] & 0x0F)
182 return LZMA_DATA_ERROR;
187 // Check that reserved bits are zero.
188 if (in[*in_pos] & 0x0F)
189 return LZMA_DATA_ERROR;
191 // There must be no Subfilter enabled.
192 if (coder->subfilter.code != NULL)
193 return LZMA_DATA_ERROR;
195 // End of Payload Marker must not be used if
196 // uncompressed size is known.
197 if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN)
198 return LZMA_DATA_ERROR;
201 return LZMA_STREAM_END;
204 // First four bits of the Subblock Data size.
205 coder->size = in[*in_pos] & 0x0F;
207 coder->got_output_with_subfilter = true;
208 coder->sequence = SEQ_SIZE_1;
212 // First four bits of the Repeat Count. We use
213 // coder->size as a temporary place for it.
214 coder->size = in[*in_pos] & 0x0F;
216 coder->got_output_with_subfilter = true;
217 coder->sequence = SEQ_REPEAT_COUNT_1;
220 case FLAG_SET_SUBFILTER: {
221 if ((in[*in_pos] & 0x0F)
222 || coder->subfilter.code != NULL
223 || !coder->allow_subfilters)
224 return LZMA_DATA_ERROR;
226 assert(coder->filter_flags.options == NULL);
227 return_if_error(lzma_filter_flags_decoder_init(
228 &coder->filter_flags_decoder,
229 allocator, &coder->filter_flags));
231 coder->got_output_with_subfilter = false;
234 coder->sequence = SEQ_FILTER_FLAGS;
238 case FLAG_END_SUBFILTER:
239 if (coder->subfilter.code == NULL
240 || !coder->got_output_with_subfilter)
241 return LZMA_DATA_ERROR;
243 // Tell the helper filter to indicate End of Input
245 coder->helper.end_was_reached = true;
248 const lzma_ret ret = subfilter_decode(coder, allocator,
249 NULL, &dummy, 0, out, out_pos,out_size,
252 // If we didn't reach the end of the subfilter's output
253 // yet, return to the application. On the next call we
254 // will get to this same switch-case again, because we
255 // haven't updated *in_pos yet.
256 if (ret != LZMA_STREAM_END)
259 // Free Subfilter's memory. This is a bit debatable,
260 // since we could avoid some malloc()/free() calls
261 // if the same Subfilter gets used soon again. But
262 // if Subfilter isn't used again, we could leave
263 // a memory-hogging filter dangling until someone
264 // frees Subblock filter itself.
265 lzma_next_coder_end(&coder->subfilter, allocator);
267 // Free memory used for subfilter options. This is
268 // safe, because we don't support any Subfilter that
269 // would allow pointers in the options structure.
270 lzma_free(coder->filter_flags.options, allocator);
271 coder->filter_flags.options = NULL;
275 if (coder->uncompressed_size == 0)
276 return LZMA_STREAM_END;
281 return LZMA_DATA_ERROR;
288 case SEQ_REPEAT_COUNT_1:
289 // We use the same code to parse
290 // - the Size (28 bits) in Subblocks of type Data; and
291 // - the Repeat count (28 bits) in Subblocks of type
293 coder->size |= (size_t)(in[*in_pos]) << 4;
299 case SEQ_REPEAT_COUNT_2:
300 coder->size |= (size_t)(in[*in_pos]) << 12;
306 case SEQ_REPEAT_COUNT_3:
307 coder->size |= (size_t)(in[*in_pos]) << 20;
309 // The real value is the stored value plus one.
316 case SEQ_REPEAT_SIZE:
317 // Move the Repeat Count to the correct variable and parse
318 // the Size of the Data to be repeated.
319 coder->repeat.count = coder->size;
320 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
321 coder->repeat.pos = 0;
323 coder->sequence = SEQ_REPEAT_READ_DATA;
326 case SEQ_REPEAT_READ_DATA: {
327 // Fill coder->repeat.buffer[].
328 const size_t in_avail = in_size - *in_pos;
329 const size_t out_avail
330 = coder->repeat.size - coder->repeat.pos;
331 const size_t copy_size = MIN(in_avail, out_avail);
333 memcpy(coder->repeat.buffer + coder->repeat.pos,
334 in + *in_pos, copy_size);
335 *in_pos += copy_size;
336 coder->repeat.pos += copy_size;
338 if (coder->repeat.pos == coder->repeat.size) {
339 coder->repeat.pos = 0;
341 if (coder->repeat.size == 1
342 && coder->subfilter.code == NULL)
343 coder->sequence = SEQ_REPEAT_FAST;
345 coder->sequence = SEQ_REPEAT_NORMAL;
351 case SEQ_REPEAT_FAST: {
352 // Optimization for cases when there is only one byte to
353 // repeat and no Subfilter.
354 const size_t out_avail = out_size - *out_pos;
355 const size_t copy_size = MIN(coder->repeat.count, out_avail);
357 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
359 *out_pos += copy_size;
360 coder->repeat.count -= copy_size;
362 if (update_uncompressed_size(coder, copy_size))
363 return LZMA_DATA_ERROR;
365 if (coder->repeat.count == 0) {
366 if (coder->uncompressed_size == 0)
367 return LZMA_STREAM_END;
372 coder->sequence = SEQ_FLAGS;
376 case SEQ_REPEAT_NORMAL:
378 // Cycle the repeat buffer if needed.
379 if (coder->repeat.pos == coder->repeat.size) {
380 if (--coder->repeat.count == 0) {
381 coder->sequence = SEQ_FLAGS;
385 coder->repeat.pos = 0;
388 if (coder->subfilter.code == NULL) {
389 const size_t copy_size = bufcpy(
390 coder->repeat.buffer,
393 out, out_pos, out_size);
395 if (update_uncompressed_size(coder, copy_size))
396 return LZMA_DATA_ERROR;
399 const lzma_ret ret = subfilter_decode(
401 coder->repeat.buffer,
404 out, out_pos, out_size,
407 if (ret == LZMA_STREAM_END) {
408 // End of Subfilter can occur only at
409 // a Subblock boundary.
410 if (coder->repeat.pos
411 != coder->repeat.size
414 return LZMA_DATA_ERROR;
416 // We need a Subblock with Unset
417 // Subfilter before more data.
418 coder->sequence = SEQ_FILTER_END;
421 } else if (ret != LZMA_OK) {
425 } while (*out_pos < out_size);
430 // Limit the amount of input to match the available
431 // Subblock Data size.
433 if (in_size - *in_pos > coder->size)
434 in_limit = *in_pos + coder->size;
438 if (coder->subfilter.code == NULL) {
439 const size_t copy_size = bufcpy(
440 in, in_pos, in_limit,
441 out, out_pos, out_size);
443 coder->size -= copy_size;
445 if (update_uncompressed_size(coder, copy_size))
446 return LZMA_DATA_ERROR;
449 const size_t in_start = *in_pos;
450 const lzma_ret ret = subfilter_decode(
452 in, in_pos, in_limit,
453 out, out_pos, out_size,
456 // Update the number of unprocessed bytes left in
457 // this Subblock. This assert() is true because
458 // in_limit prevents *in_pos getting too big.
459 assert(*in_pos - in_start <= coder->size);
460 coder->size -= *in_pos - in_start;
462 if (ret == LZMA_STREAM_END) {
463 // End of Subfilter can occur only at
464 // a Subblock boundary.
465 if (coder->size != 0)
466 return LZMA_DATA_ERROR;
468 // We need a Subblock with Unset
469 // Subfilter before more data.
470 coder->sequence = SEQ_FILTER_END;
478 // If we couldn't process the whole Subblock Data yet, return.
482 // Check if we have decoded all the data.
483 if (coder->uncompressed_size == 0
484 && coder->subfilter.code == NULL)
485 return LZMA_STREAM_END;
487 coder->sequence = SEQ_FLAGS;
491 case SEQ_FILTER_FLAGS: {
492 const lzma_ret ret = coder->filter_flags_decoder.code(
493 coder->filter_flags_decoder.coder, allocator,
494 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
495 if (ret != LZMA_STREAM_END)
496 return ret == LZMA_HEADER_ERROR
497 ? LZMA_DATA_ERROR : ret;
499 // Don't free the filter_flags_decoder. It doesn't take much
500 // memory and we may need it again.
502 // Initialize the Subfilter. Subblock and Copy filters are
504 if (coder->filter_flags.id == LZMA_FILTER_COPY
505 || coder->filter_flags.id
506 == LZMA_FILTER_SUBBLOCK)
507 return LZMA_DATA_ERROR;
509 coder->helper.end_was_reached = false;
511 lzma_options_filter filters[3] = {
513 .id = coder->filter_flags.id,
514 .options = coder->filter_flags.options,
516 .id = LZMA_FILTER_SUBBLOCK_HELPER,
517 .options = &coder->helper,
519 .id = LZMA_VLI_VALUE_UNKNOWN,
524 // Optimization: We know that LZMA uses End of Payload Marker
525 // (not End of Input), so we can omit the helper filter.
526 if (filters[0].id == LZMA_FILTER_LZMA)
527 filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
529 return_if_error(lzma_raw_decoder_init(
530 &coder->subfilter, allocator,
531 filters, LZMA_VLI_VALUE_UNKNOWN, false));
533 coder->sequence = SEQ_FLAGS;
538 // We are in the beginning of a Subblock. The next Subblock
539 // whose type is not Padding, must indicate end of Subfilter.
540 if (in[*in_pos] == (FLAG_PADDING << 4)) {
545 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
546 return LZMA_DATA_ERROR;
548 coder->sequence = SEQ_FLAGS;
552 return LZMA_PROG_ERROR;
560 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
561 const uint8_t *restrict in, size_t *restrict in_pos,
562 size_t in_size, uint8_t *restrict out,
563 size_t *restrict out_pos, size_t out_size, lzma_action action)
565 if (coder->next.code == NULL)
566 return decode_buffer(coder, allocator, in, in_pos, in_size,
567 out, out_pos, out_size, action);
569 while (*out_pos < out_size) {
570 if (!coder->next_finished
571 && coder->temp.pos == coder->temp.size) {
573 coder->temp.size = 0;
575 const lzma_ret ret = coder->next.code(
577 allocator, in, in_pos, in_size,
578 coder->temp.buffer, &coder->temp.size,
579 LZMA_BUFFER_SIZE, action);
581 if (ret == LZMA_STREAM_END)
582 coder->next_finished = true;
583 else if (coder->temp.size == 0 || ret != LZMA_OK)
587 if (coder->this_finished) {
588 if (coder->temp.pos != coder->temp.size)
589 return LZMA_DATA_ERROR;
591 if (coder->next_finished)
592 return LZMA_STREAM_END;
597 const lzma_ret ret = decode_buffer(coder, allocator,
598 coder->temp.buffer, &coder->temp.pos,
600 out, out_pos, out_size, action);
602 if (ret == LZMA_STREAM_END)
603 // The next coder in the chain hasn't finished
604 // yet. If the input data is valid, there
605 // must be no more output coming, but the
606 // next coder may still need a litle more
607 // input to detect End of Payload Marker.
608 coder->this_finished = true;
609 else if (ret != LZMA_OK)
611 else if (coder->next_finished && *out_pos < out_size)
612 return LZMA_DATA_ERROR;
620 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
622 lzma_next_coder_end(&coder->next, allocator);
623 lzma_next_coder_end(&coder->subfilter, allocator);
624 lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
625 lzma_free(coder->filter_flags.options, allocator);
626 lzma_free(coder, allocator);
632 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
633 const lzma_filter_info *filters)
635 if (next->coder == NULL) {
636 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
637 if (next->coder == NULL)
638 return LZMA_MEM_ERROR;
640 next->code = &subblock_decode;
641 next->end = &subblock_decoder_end;
643 next->coder->next = LZMA_NEXT_CODER_INIT;
644 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
645 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
648 lzma_next_coder_end(&next->coder->subfilter, allocator);
649 lzma_free(next->coder->filter_flags.options, allocator);
652 next->coder->filter_flags.options = NULL;
654 next->coder->sequence = SEQ_FLAGS;
655 next->coder->uncompressed_size = filters[0].uncompressed_size;
656 next->coder->padding = 0;
657 next->coder->next_finished = false;
658 next->coder->this_finished = false;
659 next->coder->temp.pos = 0;
660 next->coder->temp.size = 0;
662 if (filters[0].options != NULL)
663 next->coder->allow_subfilters = ((lzma_options_subblock *)(
664 filters[0].options))->allow_subfilters;
666 next->coder->allow_subfilters = false;
668 return lzma_next_filter_init(
669 &next->coder->next, allocator, filters + 1);