1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file subblock_decoder.c
4 /// \brief Decoder of the Subblock filter
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "filter_decoder.h"
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
33 // These require that there is at least one input
45 SEQ_SIZE_3, // This must be right before SEQ_DATA.
47 // These don't require any input to be available.
53 /// Number of bytes left in the current Subblock Data field.
56 /// Number of consecutive Subblocks with Subblock Type Padding
59 /// True when .next.code() has returned LZMA_STREAM_END.
62 /// True when the Subblock decoder has detected End of Payload Marker.
63 /// This may become true before next_finished becomes true.
66 /// True if Subfilters are allowed.
67 bool allow_subfilters;
69 /// Indicates if at least one byte of decoded output has been
70 /// produced after enabling Subfilter.
71 bool got_output_with_subfilter;
73 /// Possible subfilter
74 lzma_next_coder subfilter;
76 /// Filter Flags decoder is needed to parse the ID and Properties
78 lzma_next_coder filter_flags_decoder;
80 /// The filter_flags_decoder stores its results here.
81 lzma_filter filter_flags;
83 /// Options for the Subblock decoder helper. This is used to tell
84 /// the helper when it should return LZMA_STREAM_END to the subfilter.
85 lzma_options_subblock_helper helper;
88 /// How many times buffer should be repeated
91 /// Size of the buffer
94 /// Position in the buffer
97 /// Buffer to hold the data to be repeated
98 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
101 /// Temporary buffer needed when the Subblock filter is not the last
102 /// filter in the chain. The output of the next filter is first
103 /// decoded into buffer[], which is then used as input for the actual
104 /// Subblock decoder.
108 uint8_t buffer[LZMA_BUFFER_SIZE];
113 /// Values of valid Subblock Flags
124 /// Calls the subfilter and updates coder->uncompressed_size.
126 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
127 const uint8_t *in, size_t *in_pos,
128 size_t in_size, uint8_t *restrict out,
129 size_t *restrict out_pos, size_t out_size, lzma_action action)
131 assert(coder->subfilter.code != NULL);
133 // Call the subfilter.
134 const lzma_ret ret = coder->subfilter.code(
135 coder->subfilter.coder, allocator,
136 in, in_pos, in_size, out, out_pos, out_size, action);
143 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
144 const uint8_t *in, size_t *in_pos,
145 size_t in_size, uint8_t *restrict out,
146 size_t *restrict out_pos, size_t out_size, lzma_action action)
148 while (*out_pos < out_size && (*in_pos < in_size
149 || coder->sequence >= SEQ_DATA))
150 switch (coder->sequence) {
152 // Do the correct action depending on the Subblock Type.
153 switch (in[*in_pos] >> 4) {
155 // Only check that reserved bits are zero.
156 if (++coder->padding > PADDING_MAX
157 || in[*in_pos] & 0x0F)
158 return LZMA_DATA_ERROR;
163 // There must be no Padding before EOPM.
164 if (coder->padding != 0)
165 return LZMA_DATA_ERROR;
167 // Check that reserved bits are zero.
168 if (in[*in_pos] & 0x0F)
169 return LZMA_DATA_ERROR;
171 // There must be no Subfilter enabled.
172 if (coder->subfilter.code != NULL)
173 return LZMA_DATA_ERROR;
176 return LZMA_STREAM_END;
179 // First four bits of the Subblock Data size.
180 coder->size = in[*in_pos] & 0x0F;
182 coder->got_output_with_subfilter = true;
183 coder->sequence = SEQ_SIZE_1;
187 // First four bits of the Repeat Count. We use
188 // coder->size as a temporary place for it.
189 coder->size = in[*in_pos] & 0x0F;
191 coder->got_output_with_subfilter = true;
192 coder->sequence = SEQ_REPEAT_COUNT_1;
195 case FLAG_SET_SUBFILTER: {
196 if (coder->padding != 0 || (in[*in_pos] & 0x0F)
197 || coder->subfilter.code != NULL
198 || !coder->allow_subfilters)
199 return LZMA_DATA_ERROR;
201 assert(coder->filter_flags.options == NULL);
203 // return_if_error(lzma_filter_flags_decoder_init(
204 // &coder->filter_flags_decoder,
205 // allocator, &coder->filter_flags));
207 coder->got_output_with_subfilter = false;
210 coder->sequence = SEQ_FILTER_FLAGS;
214 case FLAG_END_SUBFILTER: {
215 if (coder->padding != 0 || (in[*in_pos] & 0x0F)
216 || coder->subfilter.code == NULL
217 || !coder->got_output_with_subfilter)
218 return LZMA_DATA_ERROR;
220 // Tell the helper filter to indicate End of Input
222 coder->helper.end_was_reached = true;
225 const lzma_ret ret = subfilter_decode(coder, allocator,
226 NULL, &dummy, 0, out, out_pos,out_size,
229 // If we didn't reach the end of the subfilter's output
230 // yet, return to the application. On the next call we
231 // will get to this same switch-case again, because we
232 // haven't updated *in_pos yet.
233 if (ret != LZMA_STREAM_END)
236 // Free Subfilter's memory. This is a bit debatable,
237 // since we could avoid some malloc()/free() calls
238 // if the same Subfilter gets used soon again. But
239 // if Subfilter isn't used again, we could leave
240 // a memory-hogging filter dangling until someone
241 // frees Subblock filter itself.
242 lzma_next_end(&coder->subfilter, allocator);
244 // Free memory used for subfilter options. This is
245 // safe, because we don't support any Subfilter that
246 // would allow pointers in the options structure.
247 lzma_free(coder->filter_flags.options, allocator);
248 coder->filter_flags.options = NULL;
256 return LZMA_DATA_ERROR;
262 case SEQ_FILTER_FLAGS: {
263 const lzma_ret ret = coder->filter_flags_decoder.code(
264 coder->filter_flags_decoder.coder, allocator,
265 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
266 if (ret != LZMA_STREAM_END)
267 return ret == LZMA_OPTIONS_ERROR
268 ? LZMA_DATA_ERROR : ret;
270 // Don't free the filter_flags_decoder. It doesn't take much
271 // memory and we may need it again.
273 // Initialize the Subfilter. Subblock and Copy filters are
275 if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
276 return LZMA_DATA_ERROR;
278 coder->helper.end_was_reached = false;
280 lzma_filter filters[3] = {
282 .id = coder->filter_flags.id,
283 .options = coder->filter_flags.options,
285 .id = LZMA_FILTER_SUBBLOCK_HELPER,
286 .options = &coder->helper,
288 .id = LZMA_VLI_UNKNOWN,
293 // Optimization: We know that LZMA uses End of Payload Marker
294 // (not End of Input), so we can omit the helper filter.
295 if (filters[0].id == LZMA_FILTER_LZMA1)
296 filters[1].id = LZMA_VLI_UNKNOWN;
298 return_if_error(lzma_raw_decoder_init(
299 &coder->subfilter, allocator, filters));
301 coder->sequence = SEQ_FLAGS;
306 // We are in the beginning of a Subblock. The next Subblock
307 // whose type is not Padding, must indicate end of Subfilter.
308 if (in[*in_pos] == (FLAG_PADDING << 4)) {
313 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
314 return LZMA_DATA_ERROR;
316 coder->sequence = SEQ_FLAGS;
319 case SEQ_REPEAT_COUNT_1:
321 // We use the same code to parse
322 // - the Size (28 bits) in Subblocks of type Data; and
323 // - the Repeat count (28 bits) in Subblocks of type
325 coder->size |= (size_t)(in[*in_pos]) << 4;
330 case SEQ_REPEAT_COUNT_2:
332 coder->size |= (size_t)(in[*in_pos]) << 12;
337 case SEQ_REPEAT_COUNT_3:
339 coder->size |= (size_t)(in[*in_pos]) << 20;
342 // The real value is the stored value plus one.
345 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
346 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
350 case SEQ_REPEAT_SIZE:
351 // Move the Repeat Count to the correct variable and parse
352 // the Size of the Data to be repeated.
353 coder->repeat.count = coder->size;
354 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
355 coder->repeat.pos = 0;
357 // The size of the Data field must be bigger than the number
358 // of Padding bytes before this Subblock.
359 if (coder->repeat.size <= coder->padding)
360 return LZMA_DATA_ERROR;
364 coder->sequence = SEQ_REPEAT_READ_DATA;
367 case SEQ_REPEAT_READ_DATA: {
368 // Fill coder->repeat.buffer[].
369 const size_t in_avail = in_size - *in_pos;
370 const size_t out_avail
371 = coder->repeat.size - coder->repeat.pos;
372 const size_t copy_size = MIN(in_avail, out_avail);
374 memcpy(coder->repeat.buffer + coder->repeat.pos,
375 in + *in_pos, copy_size);
376 *in_pos += copy_size;
377 coder->repeat.pos += copy_size;
379 if (coder->repeat.pos == coder->repeat.size) {
380 coder->repeat.pos = 0;
382 if (coder->repeat.size == 1
383 && coder->subfilter.code == NULL)
384 coder->sequence = SEQ_REPEAT_FAST;
386 coder->sequence = SEQ_REPEAT_NORMAL;
393 // The size of the Data field must be bigger than the number
394 // of Padding bytes before this Subblock.
395 assert(coder->size > 0);
396 if (coder->size <= coder->padding)
397 return LZMA_DATA_ERROR;
401 // Limit the amount of input to match the available
402 // Subblock Data size.
404 if (in_size - *in_pos > coder->size)
405 in_limit = *in_pos + coder->size;
409 if (coder->subfilter.code == NULL) {
410 const size_t copy_size = lzma_bufcpy(
411 in, in_pos, in_limit,
412 out, out_pos, out_size);
414 coder->size -= copy_size;
416 const size_t in_start = *in_pos;
417 const lzma_ret ret = subfilter_decode(
419 in, in_pos, in_limit,
420 out, out_pos, out_size,
423 // Update the number of unprocessed bytes left in
424 // this Subblock. This assert() is true because
425 // in_limit prevents *in_pos getting too big.
426 assert(*in_pos - in_start <= coder->size);
427 coder->size -= *in_pos - in_start;
429 if (ret == LZMA_STREAM_END) {
430 // End of Subfilter can occur only at
431 // a Subblock boundary.
432 if (coder->size != 0)
433 return LZMA_DATA_ERROR;
435 // We need a Subblock with Unset
436 // Subfilter before more data.
437 coder->sequence = SEQ_FILTER_END;
445 // If we couldn't process the whole Subblock Data yet, return.
449 coder->sequence = SEQ_FLAGS;
453 case SEQ_REPEAT_FAST: {
454 // Optimization for cases when there is only one byte to
455 // repeat and no Subfilter.
456 const size_t out_avail = out_size - *out_pos;
457 const size_t copy_size = MIN(coder->repeat.count, out_avail);
459 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
461 *out_pos += copy_size;
462 coder->repeat.count -= copy_size;
464 if (coder->repeat.count != 0)
467 coder->sequence = SEQ_FLAGS;
471 case SEQ_REPEAT_NORMAL:
473 // Cycle the repeat buffer if needed.
474 if (coder->repeat.pos == coder->repeat.size) {
475 if (--coder->repeat.count == 0) {
476 coder->sequence = SEQ_FLAGS;
480 coder->repeat.pos = 0;
483 if (coder->subfilter.code == NULL) {
484 lzma_bufcpy(coder->repeat.buffer,
487 out, out_pos, out_size);
489 const lzma_ret ret = subfilter_decode(
491 coder->repeat.buffer,
494 out, out_pos, out_size,
497 if (ret == LZMA_STREAM_END) {
498 // End of Subfilter can occur only at
499 // a Subblock boundary.
500 if (coder->repeat.pos
501 != coder->repeat.size
504 return LZMA_DATA_ERROR;
506 // We need a Subblock with Unset
507 // Subfilter before more data.
508 coder->sequence = SEQ_FILTER_END;
511 } else if (ret != LZMA_OK) {
515 } while (*out_pos < out_size);
520 return LZMA_PROG_ERROR;
528 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
529 const uint8_t *restrict in, size_t *restrict in_pos,
530 size_t in_size, uint8_t *restrict out,
531 size_t *restrict out_pos, size_t out_size, lzma_action action)
533 if (coder->next.code == NULL)
534 return decode_buffer(coder, allocator, in, in_pos, in_size,
535 out, out_pos, out_size, action);
537 while (*out_pos < out_size) {
538 if (!coder->next_finished
539 && coder->temp.pos == coder->temp.size) {
541 coder->temp.size = 0;
543 const lzma_ret ret = coder->next.code(
545 allocator, in, in_pos, in_size,
546 coder->temp.buffer, &coder->temp.size,
547 LZMA_BUFFER_SIZE, action);
549 if (ret == LZMA_STREAM_END)
550 coder->next_finished = true;
551 else if (coder->temp.size == 0 || ret != LZMA_OK)
555 if (coder->this_finished) {
556 if (coder->temp.pos != coder->temp.size)
557 return LZMA_DATA_ERROR;
559 if (coder->next_finished)
560 return LZMA_STREAM_END;
565 const lzma_ret ret = decode_buffer(coder, allocator,
566 coder->temp.buffer, &coder->temp.pos,
568 out, out_pos, out_size, action);
570 if (ret == LZMA_STREAM_END)
571 // The next coder in the chain hasn't finished
572 // yet. If the input data is valid, there
573 // must be no more output coming, but the
574 // next coder may still need a litle more
575 // input to detect End of Payload Marker.
576 coder->this_finished = true;
577 else if (ret != LZMA_OK)
579 else if (coder->next_finished && *out_pos < out_size)
580 return LZMA_DATA_ERROR;
588 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
590 lzma_next_end(&coder->next, allocator);
591 lzma_next_end(&coder->subfilter, allocator);
592 lzma_next_end(&coder->filter_flags_decoder, allocator);
593 lzma_free(coder->filter_flags.options, allocator);
594 lzma_free(coder, allocator);
600 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
601 const lzma_filter_info *filters)
603 if (next->coder == NULL) {
604 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
605 if (next->coder == NULL)
606 return LZMA_MEM_ERROR;
608 next->code = &subblock_decode;
609 next->end = &subblock_decoder_end;
611 next->coder->next = LZMA_NEXT_CODER_INIT;
612 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
613 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
616 lzma_next_end(&next->coder->subfilter, allocator);
617 lzma_free(next->coder->filter_flags.options, allocator);
620 next->coder->filter_flags.options = NULL;
622 next->coder->sequence = SEQ_FLAGS;
623 next->coder->padding = 0;
624 next->coder->next_finished = false;
625 next->coder->this_finished = false;
626 next->coder->temp.pos = 0;
627 next->coder->temp.size = 0;
629 if (filters[0].options != NULL)
630 next->coder->allow_subfilters = ((lzma_options_subblock *)(
631 filters[0].options))->allow_subfilters;
633 next->coder->allow_subfilters = false;
635 return lzma_next_filter_init(
636 &next->coder->next, allocator, filters + 1);