1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file subblock_decoder.c
4 /// \brief Decoder of the Subblock filter
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "raw_decoder.h"
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
33 // These require that there is at least one input
45 SEQ_SIZE_3, // This must be right before SEQ_DATA.
47 // These don't require any input to be available.
53 /// Number of bytes left in the current Subblock Data field.
56 /// Number of consecutive Subblocks with Subblock Type Padding
59 /// True when .next.code() has returned LZMA_STREAM_END.
62 /// True when the Subblock decoder has detected End of Payload Marker.
63 /// This may become true before next_finished becomes true.
66 /// True if Subfilters are allowed.
67 bool allow_subfilters;
69 /// Indicates if at least one byte of decoded output has been
70 /// produced after enabling Subfilter.
71 bool got_output_with_subfilter;
73 /// Possible subfilter
74 lzma_next_coder subfilter;
76 /// Filter Flags decoder is needed to parse the ID and Properties
78 lzma_next_coder filter_flags_decoder;
80 /// The filter_flags_decoder stores its results here.
81 lzma_options_filter filter_flags;
83 /// Options for the Subblock decoder helper. This is used to tell
84 /// the helper when it should return LZMA_STREAM_END to the subfilter.
85 lzma_options_subblock_helper helper;
88 /// How many times buffer should be repeated
91 /// Size of the buffer
94 /// Position in the buffer
97 /// Buffer to hold the data to be repeated
98 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
101 /// Temporary buffer needed when the Subblock filter is not the last
102 /// filter in the chain. The output of the next filter is first
103 /// decoded into buffer[], which is then used as input for the actual
104 /// Subblock decoder.
108 uint8_t buffer[LZMA_BUFFER_SIZE];
113 /// Values of valid Subblock Flags
124 /// Calls the subfilter and updates coder->uncompressed_size.
126 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
127 const uint8_t *in, size_t *in_pos,
128 size_t in_size, uint8_t *restrict out,
129 size_t *restrict out_pos, size_t out_size, lzma_action action)
131 assert(coder->subfilter.code != NULL);
133 // Call the subfilter.
134 const lzma_ret ret = coder->subfilter.code(
135 coder->subfilter.coder, allocator,
136 in, in_pos, in_size, out, out_pos, out_size, action);
143 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
144 const uint8_t *in, size_t *in_pos,
145 size_t in_size, uint8_t *restrict out,
146 size_t *restrict out_pos, size_t out_size, lzma_action action)
148 while (*out_pos < out_size && (*in_pos < in_size
149 || coder->sequence >= SEQ_DATA))
150 switch (coder->sequence) {
152 // Do the correct action depending on the Subblock Type.
153 switch (in[*in_pos] >> 4) {
155 // Only check that reserved bits are zero.
156 if (++coder->padding > PADDING_MAX
157 || in[*in_pos] & 0x0F)
158 return LZMA_DATA_ERROR;
163 // There must be no Padding before EOPM.
164 if (coder->padding != 0)
165 return LZMA_DATA_ERROR;
167 // Check that reserved bits are zero.
168 if (in[*in_pos] & 0x0F)
169 return LZMA_DATA_ERROR;
171 // There must be no Subfilter enabled.
172 if (coder->subfilter.code != NULL)
173 return LZMA_DATA_ERROR;
176 return LZMA_STREAM_END;
179 // First four bits of the Subblock Data size.
180 coder->size = in[*in_pos] & 0x0F;
182 coder->got_output_with_subfilter = true;
183 coder->sequence = SEQ_SIZE_1;
187 // First four bits of the Repeat Count. We use
188 // coder->size as a temporary place for it.
189 coder->size = in[*in_pos] & 0x0F;
191 coder->got_output_with_subfilter = true;
192 coder->sequence = SEQ_REPEAT_COUNT_1;
195 case FLAG_SET_SUBFILTER: {
196 if (coder->padding != 0 || (in[*in_pos] & 0x0F)
197 || coder->subfilter.code != NULL
198 || !coder->allow_subfilters)
199 return LZMA_DATA_ERROR;
201 assert(coder->filter_flags.options == NULL);
203 // return_if_error(lzma_filter_flags_decoder_init(
204 // &coder->filter_flags_decoder,
205 // allocator, &coder->filter_flags));
207 coder->got_output_with_subfilter = false;
210 coder->sequence = SEQ_FILTER_FLAGS;
214 case FLAG_END_SUBFILTER:
215 if (coder->padding != 0 || (in[*in_pos] & 0x0F)
216 || coder->subfilter.code == NULL
217 || !coder->got_output_with_subfilter)
218 return LZMA_DATA_ERROR;
220 // Tell the helper filter to indicate End of Input
222 coder->helper.end_was_reached = true;
225 const lzma_ret ret = subfilter_decode(coder, allocator,
226 NULL, &dummy, 0, out, out_pos,out_size,
229 // If we didn't reach the end of the subfilter's output
230 // yet, return to the application. On the next call we
231 // will get to this same switch-case again, because we
232 // haven't updated *in_pos yet.
233 if (ret != LZMA_STREAM_END)
236 // Free Subfilter's memory. This is a bit debatable,
237 // since we could avoid some malloc()/free() calls
238 // if the same Subfilter gets used soon again. But
239 // if Subfilter isn't used again, we could leave
240 // a memory-hogging filter dangling until someone
241 // frees Subblock filter itself.
242 lzma_next_coder_end(&coder->subfilter, allocator);
244 // Free memory used for subfilter options. This is
245 // safe, because we don't support any Subfilter that
246 // would allow pointers in the options structure.
247 lzma_free(coder->filter_flags.options, allocator);
248 coder->filter_flags.options = NULL;
255 return LZMA_DATA_ERROR;
261 case SEQ_FILTER_FLAGS: {
262 const lzma_ret ret = coder->filter_flags_decoder.code(
263 coder->filter_flags_decoder.coder, allocator,
264 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
265 if (ret != LZMA_STREAM_END)
266 return ret == LZMA_HEADER_ERROR
267 ? LZMA_DATA_ERROR : ret;
269 // Don't free the filter_flags_decoder. It doesn't take much
270 // memory and we may need it again.
272 // Initialize the Subfilter. Subblock and Copy filters are
274 if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
275 return LZMA_DATA_ERROR;
277 coder->helper.end_was_reached = false;
279 lzma_options_filter filters[3] = {
281 .id = coder->filter_flags.id,
282 .options = coder->filter_flags.options,
284 .id = LZMA_FILTER_SUBBLOCK_HELPER,
285 .options = &coder->helper,
287 .id = LZMA_VLI_VALUE_UNKNOWN,
292 // Optimization: We know that LZMA uses End of Payload Marker
293 // (not End of Input), so we can omit the helper filter.
294 if (filters[0].id == LZMA_FILTER_LZMA)
295 filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
297 return_if_error(lzma_raw_decoder_init(
298 &coder->subfilter, allocator, filters));
300 coder->sequence = SEQ_FLAGS;
305 // We are in the beginning of a Subblock. The next Subblock
306 // whose type is not Padding, must indicate end of Subfilter.
307 if (in[*in_pos] == (FLAG_PADDING << 4)) {
312 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
313 return LZMA_DATA_ERROR;
315 coder->sequence = SEQ_FLAGS;
318 case SEQ_REPEAT_COUNT_1:
320 // We use the same code to parse
321 // - the Size (28 bits) in Subblocks of type Data; and
322 // - the Repeat count (28 bits) in Subblocks of type
324 coder->size |= (size_t)(in[*in_pos]) << 4;
329 case SEQ_REPEAT_COUNT_2:
331 coder->size |= (size_t)(in[*in_pos]) << 12;
336 case SEQ_REPEAT_COUNT_3:
338 coder->size |= (size_t)(in[*in_pos]) << 20;
341 // The real value is the stored value plus one.
344 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
345 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
349 case SEQ_REPEAT_SIZE:
350 // Move the Repeat Count to the correct variable and parse
351 // the Size of the Data to be repeated.
352 coder->repeat.count = coder->size;
353 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
354 coder->repeat.pos = 0;
356 // The size of the Data field must be bigger than the number
357 // of Padding bytes before this Subblock.
358 if (coder->repeat.size <= coder->padding)
359 return LZMA_DATA_ERROR;
363 coder->sequence = SEQ_REPEAT_READ_DATA;
366 case SEQ_REPEAT_READ_DATA: {
367 // Fill coder->repeat.buffer[].
368 const size_t in_avail = in_size - *in_pos;
369 const size_t out_avail
370 = coder->repeat.size - coder->repeat.pos;
371 const size_t copy_size = MIN(in_avail, out_avail);
373 memcpy(coder->repeat.buffer + coder->repeat.pos,
374 in + *in_pos, copy_size);
375 *in_pos += copy_size;
376 coder->repeat.pos += copy_size;
378 if (coder->repeat.pos == coder->repeat.size) {
379 coder->repeat.pos = 0;
381 if (coder->repeat.size == 1
382 && coder->subfilter.code == NULL)
383 coder->sequence = SEQ_REPEAT_FAST;
385 coder->sequence = SEQ_REPEAT_NORMAL;
392 // The size of the Data field must be bigger than the number
393 // of Padding bytes before this Subblock.
394 assert(coder->size > 0);
395 if (coder->size <= coder->padding)
396 return LZMA_DATA_ERROR;
400 // Limit the amount of input to match the available
401 // Subblock Data size.
403 if (in_size - *in_pos > coder->size)
404 in_limit = *in_pos + coder->size;
408 if (coder->subfilter.code == NULL) {
409 const size_t copy_size = bufcpy(
410 in, in_pos, in_limit,
411 out, out_pos, out_size);
413 coder->size -= copy_size;
415 const size_t in_start = *in_pos;
416 const lzma_ret ret = subfilter_decode(
418 in, in_pos, in_limit,
419 out, out_pos, out_size,
422 // Update the number of unprocessed bytes left in
423 // this Subblock. This assert() is true because
424 // in_limit prevents *in_pos getting too big.
425 assert(*in_pos - in_start <= coder->size);
426 coder->size -= *in_pos - in_start;
428 if (ret == LZMA_STREAM_END) {
429 // End of Subfilter can occur only at
430 // a Subblock boundary.
431 if (coder->size != 0)
432 return LZMA_DATA_ERROR;
434 // We need a Subblock with Unset
435 // Subfilter before more data.
436 coder->sequence = SEQ_FILTER_END;
444 // If we couldn't process the whole Subblock Data yet, return.
448 coder->sequence = SEQ_FLAGS;
452 case SEQ_REPEAT_FAST: {
453 // Optimization for cases when there is only one byte to
454 // repeat and no Subfilter.
455 const size_t out_avail = out_size - *out_pos;
456 const size_t copy_size = MIN(coder->repeat.count, out_avail);
458 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
460 *out_pos += copy_size;
461 coder->repeat.count -= copy_size;
463 if (coder->repeat.count != 0)
466 coder->sequence = SEQ_FLAGS;
470 case SEQ_REPEAT_NORMAL:
472 // Cycle the repeat buffer if needed.
473 if (coder->repeat.pos == coder->repeat.size) {
474 if (--coder->repeat.count == 0) {
475 coder->sequence = SEQ_FLAGS;
479 coder->repeat.pos = 0;
482 if (coder->subfilter.code == NULL) {
483 bufcpy(coder->repeat.buffer,
486 out, out_pos, out_size);
488 const lzma_ret ret = subfilter_decode(
490 coder->repeat.buffer,
493 out, out_pos, out_size,
496 if (ret == LZMA_STREAM_END) {
497 // End of Subfilter can occur only at
498 // a Subblock boundary.
499 if (coder->repeat.pos
500 != coder->repeat.size
503 return LZMA_DATA_ERROR;
505 // We need a Subblock with Unset
506 // Subfilter before more data.
507 coder->sequence = SEQ_FILTER_END;
510 } else if (ret != LZMA_OK) {
514 } while (*out_pos < out_size);
519 return LZMA_PROG_ERROR;
527 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
528 const uint8_t *restrict in, size_t *restrict in_pos,
529 size_t in_size, uint8_t *restrict out,
530 size_t *restrict out_pos, size_t out_size, lzma_action action)
532 if (coder->next.code == NULL)
533 return decode_buffer(coder, allocator, in, in_pos, in_size,
534 out, out_pos, out_size, action);
536 while (*out_pos < out_size) {
537 if (!coder->next_finished
538 && coder->temp.pos == coder->temp.size) {
540 coder->temp.size = 0;
542 const lzma_ret ret = coder->next.code(
544 allocator, in, in_pos, in_size,
545 coder->temp.buffer, &coder->temp.size,
546 LZMA_BUFFER_SIZE, action);
548 if (ret == LZMA_STREAM_END)
549 coder->next_finished = true;
550 else if (coder->temp.size == 0 || ret != LZMA_OK)
554 if (coder->this_finished) {
555 if (coder->temp.pos != coder->temp.size)
556 return LZMA_DATA_ERROR;
558 if (coder->next_finished)
559 return LZMA_STREAM_END;
564 const lzma_ret ret = decode_buffer(coder, allocator,
565 coder->temp.buffer, &coder->temp.pos,
567 out, out_pos, out_size, action);
569 if (ret == LZMA_STREAM_END)
570 // The next coder in the chain hasn't finished
571 // yet. If the input data is valid, there
572 // must be no more output coming, but the
573 // next coder may still need a litle more
574 // input to detect End of Payload Marker.
575 coder->this_finished = true;
576 else if (ret != LZMA_OK)
578 else if (coder->next_finished && *out_pos < out_size)
579 return LZMA_DATA_ERROR;
587 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
589 lzma_next_coder_end(&coder->next, allocator);
590 lzma_next_coder_end(&coder->subfilter, allocator);
591 lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
592 lzma_free(coder->filter_flags.options, allocator);
593 lzma_free(coder, allocator);
599 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
600 const lzma_filter_info *filters)
602 if (next->coder == NULL) {
603 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
604 if (next->coder == NULL)
605 return LZMA_MEM_ERROR;
607 next->code = &subblock_decode;
608 next->end = &subblock_decoder_end;
610 next->coder->next = LZMA_NEXT_CODER_INIT;
611 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
612 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
615 lzma_next_coder_end(&next->coder->subfilter, allocator);
616 lzma_free(next->coder->filter_flags.options, allocator);
619 next->coder->filter_flags.options = NULL;
621 next->coder->sequence = SEQ_FLAGS;
622 next->coder->padding = 0;
623 next->coder->next_finished = false;
624 next->coder->this_finished = false;
625 next->coder->temp.pos = 0;
626 next->coder->temp.size = 0;
628 if (filters[0].options != NULL)
629 next->coder->allow_subfilters = ((lzma_options_subblock *)(
630 filters[0].options))->allow_subfilters;
632 next->coder->allow_subfilters = false;
634 return lzma_next_filter_init(
635 &next->coder->next, allocator, filters + 1);