1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file subblock_decoder.c
4 /// \brief Decoder of the Subblock filter
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "raw_decoder.h"
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
33 // These require that there is at least one input
45 SEQ_SIZE_3, // This must be right before SEQ_DATA.
47 // These don't require any input to be available.
53 /// Number of bytes left in the current Subblock Data field.
56 /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown.
57 lzma_vli uncompressed_size;
59 /// Number of consecutive Subblocks with Subblock Type Padding
62 /// True when .next.code() has returned LZMA_STREAM_END.
65 /// True when the Subblock decoder has detected End of Payload Marker.
66 /// This may become true before next_finished becomes true.
69 /// True if Subfilters are allowed.
70 bool allow_subfilters;
72 /// Indicates if at least one byte of decoded output has been
73 /// produced after enabling Subfilter.
74 bool got_output_with_subfilter;
76 /// Possible subfilter
77 lzma_next_coder subfilter;
79 /// Filter Flags decoder is needed to parse the ID and Properties
81 lzma_next_coder filter_flags_decoder;
83 /// The filter_flags_decoder stores its results here.
84 lzma_options_filter filter_flags;
86 /// Options for the Subblock decoder helper. This is used to tell
87 /// the helper when it should return LZMA_STREAM_END to the subfilter.
88 lzma_options_subblock_helper helper;
91 /// How many times buffer should be repeated
94 /// Size of the buffer
97 /// Position in the buffer
100 /// Buffer to hold the data to be repeated
101 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
104 /// Temporary buffer needed when the Subblock filter is not the last
105 /// filter in the chain. The output of the next filter is first
106 /// decoded into buffer[], which is then used as input for the actual
107 /// Subblock decoder.
111 uint8_t buffer[LZMA_BUFFER_SIZE];
116 /// Values of valid Subblock Flags
127 /// Substracts size from coder->uncompressed_size uncompressed size is known
128 /// and size isn't bigger than coder->uncompressed_size.
130 update_uncompressed_size(lzma_coder *coder, size_t size)
132 if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
133 if ((lzma_vli)(size) > coder->uncompressed_size)
136 coder->uncompressed_size -= size;
143 /// Calls the subfilter and updates coder->uncompressed_size.
145 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
146 const uint8_t *in, size_t *in_pos,
147 size_t in_size, uint8_t *restrict out,
148 size_t *restrict out_pos, size_t out_size, lzma_action action)
150 assert(coder->subfilter.code != NULL);
152 const size_t out_start = *out_pos;
154 // Call the subfilter.
155 const lzma_ret ret = coder->subfilter.code(
156 coder->subfilter.coder, allocator,
157 in, in_pos, in_size, out, out_pos, out_size, action);
159 // Update uncompressed_size.
160 if (update_uncompressed_size(coder, *out_pos - out_start))
161 return LZMA_DATA_ERROR;
168 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
169 const uint8_t *in, size_t *in_pos,
170 size_t in_size, uint8_t *restrict out,
171 size_t *restrict out_pos, size_t out_size, lzma_action action)
173 while (*out_pos < out_size && (*in_pos < in_size
174 || coder->sequence >= SEQ_DATA))
175 switch (coder->sequence) {
177 if ((in[*in_pos] >> 4) != FLAG_PADDING)
180 // Do the correct action depending on the Subblock Type.
181 switch (in[*in_pos] >> 4) {
183 // Only check that reserved bits are zero.
184 if (++coder->padding > PADDING_MAX
185 || in[*in_pos] & 0x0F)
186 return LZMA_DATA_ERROR;
191 // Check that reserved bits are zero.
192 if (in[*in_pos] & 0x0F)
193 return LZMA_DATA_ERROR;
195 // There must be no Subfilter enabled.
196 if (coder->subfilter.code != NULL)
197 return LZMA_DATA_ERROR;
199 // End of Payload Marker must not be used if
200 // uncompressed size is known.
201 if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN)
202 return LZMA_DATA_ERROR;
205 return LZMA_STREAM_END;
208 // First four bits of the Subblock Data size.
209 coder->size = in[*in_pos] & 0x0F;
211 coder->got_output_with_subfilter = true;
212 coder->sequence = SEQ_SIZE_1;
216 // First four bits of the Repeat Count. We use
217 // coder->size as a temporary place for it.
218 coder->size = in[*in_pos] & 0x0F;
220 coder->got_output_with_subfilter = true;
221 coder->sequence = SEQ_REPEAT_COUNT_1;
224 case FLAG_SET_SUBFILTER: {
225 if ((in[*in_pos] & 0x0F)
226 || coder->subfilter.code != NULL
227 || !coder->allow_subfilters)
228 return LZMA_DATA_ERROR;
230 assert(coder->filter_flags.options == NULL);
231 return_if_error(lzma_filter_flags_decoder_init(
232 &coder->filter_flags_decoder,
233 allocator, &coder->filter_flags));
235 coder->got_output_with_subfilter = false;
238 coder->sequence = SEQ_FILTER_FLAGS;
242 case FLAG_END_SUBFILTER:
243 if (coder->subfilter.code == NULL
244 || !coder->got_output_with_subfilter)
245 return LZMA_DATA_ERROR;
247 // Tell the helper filter to indicate End of Input
249 coder->helper.end_was_reached = true;
252 const lzma_ret ret = subfilter_decode(coder, allocator,
253 NULL, &dummy, 0, out, out_pos,out_size,
256 // If we didn't reach the end of the subfilter's output
257 // yet, return to the application. On the next call we
258 // will get to this same switch-case again, because we
259 // haven't updated *in_pos yet.
260 if (ret != LZMA_STREAM_END)
263 // Free Subfilter's memory. This is a bit debatable,
264 // since we could avoid some malloc()/free() calls
265 // if the same Subfilter gets used soon again. But
266 // if Subfilter isn't used again, we could leave
267 // a memory-hogging filter dangling until someone
268 // frees Subblock filter itself.
269 lzma_next_coder_end(&coder->subfilter, allocator);
271 // Free memory used for subfilter options. This is
272 // safe, because we don't support any Subfilter that
273 // would allow pointers in the options structure.
274 lzma_free(coder->filter_flags.options, allocator);
275 coder->filter_flags.options = NULL;
279 if (coder->uncompressed_size == 0)
280 return LZMA_STREAM_END;
285 return LZMA_DATA_ERROR;
291 case SEQ_FILTER_FLAGS: {
292 const lzma_ret ret = coder->filter_flags_decoder.code(
293 coder->filter_flags_decoder.coder, allocator,
294 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
295 if (ret != LZMA_STREAM_END)
296 return ret == LZMA_HEADER_ERROR
297 ? LZMA_DATA_ERROR : ret;
299 // Don't free the filter_flags_decoder. It doesn't take much
300 // memory and we may need it again.
302 // Initialize the Subfilter. Subblock and Copy filters are
304 if (coder->filter_flags.id == LZMA_FILTER_COPY
305 || coder->filter_flags.id
306 == LZMA_FILTER_SUBBLOCK)
307 return LZMA_DATA_ERROR;
309 coder->helper.end_was_reached = false;
311 lzma_options_filter filters[3] = {
313 .id = coder->filter_flags.id,
314 .options = coder->filter_flags.options,
316 .id = LZMA_FILTER_SUBBLOCK_HELPER,
317 .options = &coder->helper,
319 .id = LZMA_VLI_VALUE_UNKNOWN,
324 // Optimization: We know that LZMA uses End of Payload Marker
325 // (not End of Input), so we can omit the helper filter.
326 if (filters[0].id == LZMA_FILTER_LZMA)
327 filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
329 return_if_error(lzma_raw_decoder_init(
330 &coder->subfilter, allocator,
331 filters, LZMA_VLI_VALUE_UNKNOWN, false));
333 coder->sequence = SEQ_FLAGS;
338 // We are in the beginning of a Subblock. The next Subblock
339 // whose type is not Padding, must indicate end of Subfilter.
340 if (in[*in_pos] == (FLAG_PADDING << 4)) {
345 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
346 return LZMA_DATA_ERROR;
348 coder->sequence = SEQ_FLAGS;
351 case SEQ_REPEAT_COUNT_1:
353 // We use the same code to parse
354 // - the Size (28 bits) in Subblocks of type Data; and
355 // - the Repeat count (28 bits) in Subblocks of type
357 coder->size |= (size_t)(in[*in_pos]) << 4;
362 case SEQ_REPEAT_COUNT_2:
364 coder->size |= (size_t)(in[*in_pos]) << 12;
369 case SEQ_REPEAT_COUNT_3:
371 coder->size |= (size_t)(in[*in_pos]) << 20;
374 // The real value is the stored value plus one.
377 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
378 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
382 case SEQ_REPEAT_SIZE:
383 // Move the Repeat Count to the correct variable and parse
384 // the Size of the Data to be repeated.
385 coder->repeat.count = coder->size;
386 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
387 coder->repeat.pos = 0;
389 coder->sequence = SEQ_REPEAT_READ_DATA;
392 case SEQ_REPEAT_READ_DATA: {
393 // Fill coder->repeat.buffer[].
394 const size_t in_avail = in_size - *in_pos;
395 const size_t out_avail
396 = coder->repeat.size - coder->repeat.pos;
397 const size_t copy_size = MIN(in_avail, out_avail);
399 memcpy(coder->repeat.buffer + coder->repeat.pos,
400 in + *in_pos, copy_size);
401 *in_pos += copy_size;
402 coder->repeat.pos += copy_size;
404 if (coder->repeat.pos == coder->repeat.size) {
405 coder->repeat.pos = 0;
407 if (coder->repeat.size == 1
408 && coder->subfilter.code == NULL)
409 coder->sequence = SEQ_REPEAT_FAST;
411 coder->sequence = SEQ_REPEAT_NORMAL;
418 // Limit the amount of input to match the available
419 // Subblock Data size.
421 if (in_size - *in_pos > coder->size)
422 in_limit = *in_pos + coder->size;
426 if (coder->subfilter.code == NULL) {
427 const size_t copy_size = bufcpy(
428 in, in_pos, in_limit,
429 out, out_pos, out_size);
431 coder->size -= copy_size;
433 if (update_uncompressed_size(coder, copy_size))
434 return LZMA_DATA_ERROR;
437 const size_t in_start = *in_pos;
438 const lzma_ret ret = subfilter_decode(
440 in, in_pos, in_limit,
441 out, out_pos, out_size,
444 // Update the number of unprocessed bytes left in
445 // this Subblock. This assert() is true because
446 // in_limit prevents *in_pos getting too big.
447 assert(*in_pos - in_start <= coder->size);
448 coder->size -= *in_pos - in_start;
450 if (ret == LZMA_STREAM_END) {
451 // End of Subfilter can occur only at
452 // a Subblock boundary.
453 if (coder->size != 0)
454 return LZMA_DATA_ERROR;
456 // We need a Subblock with Unset
457 // Subfilter before more data.
458 coder->sequence = SEQ_FILTER_END;
466 // If we couldn't process the whole Subblock Data yet, return.
470 // Check if we have decoded all the data.
471 if (coder->uncompressed_size == 0
472 && coder->subfilter.code == NULL)
473 return LZMA_STREAM_END;
475 coder->sequence = SEQ_FLAGS;
479 case SEQ_REPEAT_FAST: {
480 // Optimization for cases when there is only one byte to
481 // repeat and no Subfilter.
482 const size_t out_avail = out_size - *out_pos;
483 const size_t copy_size = MIN(coder->repeat.count, out_avail);
485 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
487 *out_pos += copy_size;
488 coder->repeat.count -= copy_size;
490 if (update_uncompressed_size(coder, copy_size))
491 return LZMA_DATA_ERROR;
493 if (coder->repeat.count == 0) {
494 assert(coder->subfilter.code == NULL);
495 if (coder->uncompressed_size == 0)
496 return LZMA_STREAM_END;
501 coder->sequence = SEQ_FLAGS;
505 case SEQ_REPEAT_NORMAL:
507 // Cycle the repeat buffer if needed.
508 if (coder->repeat.pos == coder->repeat.size) {
509 if (--coder->repeat.count == 0) {
510 coder->sequence = SEQ_FLAGS;
514 coder->repeat.pos = 0;
517 if (coder->subfilter.code == NULL) {
518 const size_t copy_size = bufcpy(
519 coder->repeat.buffer,
522 out, out_pos, out_size);
524 if (update_uncompressed_size(coder, copy_size))
525 return LZMA_DATA_ERROR;
528 const lzma_ret ret = subfilter_decode(
530 coder->repeat.buffer,
533 out, out_pos, out_size,
536 if (ret == LZMA_STREAM_END) {
537 // End of Subfilter can occur only at
538 // a Subblock boundary.
539 if (coder->repeat.pos
540 != coder->repeat.size
543 return LZMA_DATA_ERROR;
545 // We need a Subblock with Unset
546 // Subfilter before more data.
547 coder->sequence = SEQ_FILTER_END;
550 } else if (ret != LZMA_OK) {
554 } while (*out_pos < out_size);
556 // Check if we have decoded all the data.
557 if (coder->uncompressed_size == 0
558 && coder->subfilter.code == NULL)
559 return LZMA_STREAM_END;
564 return LZMA_PROG_ERROR;
572 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
573 const uint8_t *restrict in, size_t *restrict in_pos,
574 size_t in_size, uint8_t *restrict out,
575 size_t *restrict out_pos, size_t out_size, lzma_action action)
577 if (coder->next.code == NULL)
578 return decode_buffer(coder, allocator, in, in_pos, in_size,
579 out, out_pos, out_size, action);
581 while (*out_pos < out_size) {
582 if (!coder->next_finished
583 && coder->temp.pos == coder->temp.size) {
585 coder->temp.size = 0;
587 const lzma_ret ret = coder->next.code(
589 allocator, in, in_pos, in_size,
590 coder->temp.buffer, &coder->temp.size,
591 LZMA_BUFFER_SIZE, action);
593 if (ret == LZMA_STREAM_END)
594 coder->next_finished = true;
595 else if (coder->temp.size == 0 || ret != LZMA_OK)
599 if (coder->this_finished) {
600 if (coder->temp.pos != coder->temp.size)
601 return LZMA_DATA_ERROR;
603 if (coder->next_finished)
604 return LZMA_STREAM_END;
609 const lzma_ret ret = decode_buffer(coder, allocator,
610 coder->temp.buffer, &coder->temp.pos,
612 out, out_pos, out_size, action);
614 if (ret == LZMA_STREAM_END)
615 // The next coder in the chain hasn't finished
616 // yet. If the input data is valid, there
617 // must be no more output coming, but the
618 // next coder may still need a litle more
619 // input to detect End of Payload Marker.
620 coder->this_finished = true;
621 else if (ret != LZMA_OK)
623 else if (coder->next_finished && *out_pos < out_size)
624 return LZMA_DATA_ERROR;
632 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
634 lzma_next_coder_end(&coder->next, allocator);
635 lzma_next_coder_end(&coder->subfilter, allocator);
636 lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
637 lzma_free(coder->filter_flags.options, allocator);
638 lzma_free(coder, allocator);
644 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
645 const lzma_filter_info *filters)
647 if (next->coder == NULL) {
648 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
649 if (next->coder == NULL)
650 return LZMA_MEM_ERROR;
652 next->code = &subblock_decode;
653 next->end = &subblock_decoder_end;
655 next->coder->next = LZMA_NEXT_CODER_INIT;
656 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
657 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
660 lzma_next_coder_end(&next->coder->subfilter, allocator);
661 lzma_free(next->coder->filter_flags.options, allocator);
664 next->coder->filter_flags.options = NULL;
666 next->coder->sequence = SEQ_FLAGS;
667 next->coder->uncompressed_size = filters[0].uncompressed_size;
668 next->coder->padding = 0;
669 next->coder->next_finished = false;
670 next->coder->this_finished = false;
671 next->coder->temp.pos = 0;
672 next->coder->temp.size = 0;
674 if (filters[0].options != NULL)
675 next->coder->allow_subfilters = ((lzma_options_subblock *)(
676 filters[0].options))->allow_subfilters;
678 next->coder->allow_subfilters = false;
680 return lzma_next_filter_init(
681 &next->coder->next, allocator, filters + 1);