]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/subblock/subblock_decoder.c
Subblock decoder: Don't exit the main loop in decode_buffer()
[icculus/xz.git] / src / liblzma / subblock / subblock_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       subblock_decoder.c
4 /// \brief      Decoder of the Subblock filter
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "raw_decoder.h"
23
24
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
27
28
29 struct lzma_coder_s {
30         lzma_next_coder next;
31
32         enum {
33                 // These require that there is at least one input
34                 // byte available.
35                 SEQ_FLAGS,
36                 SEQ_FILTER_FLAGS,
37                 SEQ_FILTER_END,
38                 SEQ_REPEAT_COUNT_1,
39                 SEQ_REPEAT_COUNT_2,
40                 SEQ_REPEAT_COUNT_3,
41                 SEQ_REPEAT_SIZE,
42                 SEQ_REPEAT_READ_DATA,
43                 SEQ_SIZE_1,
44                 SEQ_SIZE_2,
45                 SEQ_SIZE_3, // This must be right before SEQ_DATA.
46
47                 // These don't require any input to be available.
48                 SEQ_DATA,
49                 SEQ_REPEAT_FAST,
50                 SEQ_REPEAT_NORMAL,
51         } sequence;
52
53         /// Number of bytes left in the current Subblock Data field.
54         size_t size;
55
56         /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown.
57         lzma_vli uncompressed_size;
58
59         /// Number of consecutive Subblocks with Subblock Type Padding
60         uint32_t padding;
61
62         /// True when .next.code() has returned LZMA_STREAM_END.
63         bool next_finished;
64
65         /// True when the Subblock decoder has detected End of Payload Marker.
66         /// This may become true before next_finished becomes true.
67         bool this_finished;
68
69         /// True if Subfilters are allowed.
70         bool allow_subfilters;
71
72         /// Indicates if at least one byte of decoded output has been
73         /// produced after enabling Subfilter.
74         bool got_output_with_subfilter;
75
76         /// Possible subfilter
77         lzma_next_coder subfilter;
78
79         /// Filter Flags decoder is needed to parse the ID and Properties
80         /// of the subfilter.
81         lzma_next_coder filter_flags_decoder;
82
83         /// The filter_flags_decoder stores its results here.
84         lzma_options_filter filter_flags;
85
86         /// Options for the Subblock decoder helper. This is used to tell
87         /// the helper when it should return LZMA_STREAM_END to the subfilter.
88         lzma_options_subblock_helper helper;
89
90         struct {
91                 /// How many times buffer should be repeated
92                 size_t count;
93
94                 /// Size of the buffer
95                 size_t size;
96
97                 /// Position in the buffer
98                 size_t pos;
99
100                 /// Buffer to hold the data to be repeated
101                 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
102         } repeat;
103
104         /// Temporary buffer needed when the Subblock filter is not the last
105         /// filter in the chain. The output of the next filter is first
106         /// decoded into buffer[], which is then used as input for the actual
107         /// Subblock decoder.
108         struct {
109                 size_t pos;
110                 size_t size;
111                 uint8_t buffer[LZMA_BUFFER_SIZE];
112         } temp;
113 };
114
115
116 /// Values of valid Subblock Flags
117 enum {
118         FLAG_PADDING,
119         FLAG_EOPM,
120         FLAG_DATA,
121         FLAG_REPEAT,
122         FLAG_SET_SUBFILTER,
123         FLAG_END_SUBFILTER,
124 };
125
126
127 /// Substracts size from coder->uncompressed_size uncompressed size is known
128 /// and size isn't bigger than coder->uncompressed_size.
129 static inline bool
130 update_uncompressed_size(lzma_coder *coder, size_t size)
131 {
132         if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
133                 if ((lzma_vli)(size) > coder->uncompressed_size)
134                         return true;
135
136                 coder->uncompressed_size -= size;
137         }
138
139         return false;
140 }
141
142
143 /// Calls the subfilter and updates coder->uncompressed_size.
144 static lzma_ret
145 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
146                 const uint8_t *in, size_t *in_pos,
147                 size_t in_size, uint8_t *restrict out,
148                 size_t *restrict out_pos, size_t out_size, lzma_action action)
149 {
150         assert(coder->subfilter.code != NULL);
151
152         const size_t out_start = *out_pos;
153
154         // Call the subfilter.
155         const lzma_ret ret = coder->subfilter.code(
156                         coder->subfilter.coder, allocator,
157                         in, in_pos, in_size, out, out_pos, out_size, action);
158
159         // Update uncompressed_size.
160         if (update_uncompressed_size(coder, *out_pos - out_start))
161                 return LZMA_DATA_ERROR;
162
163         return ret;
164 }
165
166
167 static lzma_ret
168 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
169                 const uint8_t *in, size_t *in_pos,
170                 size_t in_size, uint8_t *restrict out,
171                 size_t *restrict out_pos, size_t out_size, lzma_action action)
172 {
173         while (*out_pos < out_size && (*in_pos < in_size
174                         || coder->sequence >= SEQ_DATA))
175         switch (coder->sequence) {
176         case SEQ_FLAGS: {
177                 if ((in[*in_pos] >> 4) != FLAG_PADDING)
178                         coder->padding = 0;
179
180                 // Do the correct action depending on the Subblock Type.
181                 switch (in[*in_pos] >> 4) {
182                 case FLAG_PADDING:
183                         // Only check that reserved bits are zero.
184                         if (++coder->padding > PADDING_MAX
185                                         || in[*in_pos] & 0x0F)
186                                 return LZMA_DATA_ERROR;
187                         ++*in_pos;
188                         break;
189
190                 case FLAG_EOPM:
191                         // Check that reserved bits are zero.
192                         if (in[*in_pos] & 0x0F)
193                                 return LZMA_DATA_ERROR;
194
195                         // There must be no Subfilter enabled.
196                         if (coder->subfilter.code != NULL)
197                                 return LZMA_DATA_ERROR;
198
199                         // End of Payload Marker must not be used if
200                         // uncompressed size is known.
201                         if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN)
202                                 return LZMA_DATA_ERROR;
203
204                         ++*in_pos;
205                         return LZMA_STREAM_END;
206
207                 case FLAG_DATA:
208                         // First four bits of the Subblock Data size.
209                         coder->size = in[*in_pos] & 0x0F;
210                         ++*in_pos;
211                         coder->got_output_with_subfilter = true;
212                         coder->sequence = SEQ_SIZE_1;
213                         break;
214
215                 case FLAG_REPEAT:
216                         // First four bits of the Repeat Count. We use
217                         // coder->size as a temporary place for it.
218                         coder->size = in[*in_pos] & 0x0F;
219                         ++*in_pos;
220                         coder->got_output_with_subfilter = true;
221                         coder->sequence = SEQ_REPEAT_COUNT_1;
222                         break;
223
224                 case FLAG_SET_SUBFILTER: {
225                         if ((in[*in_pos] & 0x0F)
226                                         || coder->subfilter.code != NULL
227                                         || !coder->allow_subfilters)
228                                 return LZMA_DATA_ERROR;
229
230                         assert(coder->filter_flags.options == NULL);
231                         return_if_error(lzma_filter_flags_decoder_init(
232                                         &coder->filter_flags_decoder,
233                                         allocator, &coder->filter_flags));
234
235                         coder->got_output_with_subfilter = false;
236
237                         ++*in_pos;
238                         coder->sequence = SEQ_FILTER_FLAGS;
239                         break;
240                 }
241
242                 case FLAG_END_SUBFILTER:
243                         if (coder->subfilter.code == NULL
244                                         || !coder->got_output_with_subfilter)
245                                 return LZMA_DATA_ERROR;
246
247                         // Tell the helper filter to indicate End of Input
248                         // to our subfilter.
249                         coder->helper.end_was_reached = true;
250
251                         size_t dummy = 0;
252                         const lzma_ret ret = subfilter_decode(coder, allocator,
253                                         NULL, &dummy, 0, out, out_pos,out_size,
254                                         action);
255
256                         // If we didn't reach the end of the subfilter's output
257                         // yet, return to the application. On the next call we
258                         // will get to this same switch-case again, because we
259                         // haven't updated *in_pos yet.
260                         if (ret != LZMA_STREAM_END)
261                                 return ret;
262
263                         // Free Subfilter's memory. This is a bit debatable,
264                         // since we could avoid some malloc()/free() calls
265                         // if the same Subfilter gets used soon again. But
266                         // if Subfilter isn't used again, we could leave
267                         // a memory-hogging filter dangling until someone
268                         // frees Subblock filter itself.
269                         lzma_next_coder_end(&coder->subfilter, allocator);
270
271                         // Free memory used for subfilter options. This is
272                         // safe, because we don't support any Subfilter that
273                         // would allow pointers in the options structure.
274                         lzma_free(coder->filter_flags.options, allocator);
275                         coder->filter_flags.options = NULL;
276
277                         ++*in_pos;
278
279                         if (coder->uncompressed_size == 0)
280                                 return LZMA_STREAM_END;
281
282                         break;
283
284                 default:
285                         return LZMA_DATA_ERROR;
286                 }
287
288                 break;
289         }
290
291         case SEQ_FILTER_FLAGS: {
292                 const lzma_ret ret = coder->filter_flags_decoder.code(
293                                 coder->filter_flags_decoder.coder, allocator,
294                                 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
295                 if (ret != LZMA_STREAM_END)
296                         return ret == LZMA_HEADER_ERROR
297                                         ? LZMA_DATA_ERROR : ret;
298
299                 // Don't free the filter_flags_decoder. It doesn't take much
300                 // memory and we may need it again.
301
302                 // Initialize the Subfilter. Subblock and Copy filters are
303                 // not allowed.
304                 if (coder->filter_flags.id == LZMA_FILTER_COPY
305                                 || coder->filter_flags.id
306                                         == LZMA_FILTER_SUBBLOCK)
307                         return LZMA_DATA_ERROR;
308
309                 coder->helper.end_was_reached = false;
310
311                 lzma_options_filter filters[3] = {
312                         {
313                                 .id = coder->filter_flags.id,
314                                 .options = coder->filter_flags.options,
315                         }, {
316                                 .id = LZMA_FILTER_SUBBLOCK_HELPER,
317                                 .options = &coder->helper,
318                         }, {
319                                 .id = LZMA_VLI_VALUE_UNKNOWN,
320                                 .options = NULL,
321                         }
322                 };
323
324                 // Optimization: We know that LZMA uses End of Payload Marker
325                 // (not End of Input), so we can omit the helper filter.
326                 if (filters[0].id == LZMA_FILTER_LZMA)
327                         filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
328
329                 return_if_error(lzma_raw_decoder_init(
330                                 &coder->subfilter, allocator,
331                                 filters, LZMA_VLI_VALUE_UNKNOWN, false));
332
333                 coder->sequence = SEQ_FLAGS;
334                 break;
335         }
336
337         case SEQ_FILTER_END:
338                 // We are in the beginning of a Subblock. The next Subblock
339                 // whose type is not Padding, must indicate end of Subfilter.
340                 if (in[*in_pos] == (FLAG_PADDING << 4)) {
341                         ++*in_pos;
342                         break;
343                 }
344
345                 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
346                         return LZMA_DATA_ERROR;
347
348                 coder->sequence = SEQ_FLAGS;
349                 break;
350
351         case SEQ_REPEAT_COUNT_1:
352         case SEQ_SIZE_1:
353                 // We use the same code to parse
354                 //  - the Size (28 bits) in Subblocks of type Data; and
355                 //  - the Repeat count (28 bits) in Subblocks of type
356                 //    Repeating Data.
357                 coder->size |= (size_t)(in[*in_pos]) << 4;
358                 ++*in_pos;
359                 ++coder->sequence;
360                 break;
361
362         case SEQ_REPEAT_COUNT_2:
363         case SEQ_SIZE_2:
364                 coder->size |= (size_t)(in[*in_pos]) << 12;
365                 ++*in_pos;
366                 ++coder->sequence;
367                 break;
368
369         case SEQ_REPEAT_COUNT_3:
370         case SEQ_SIZE_3:
371                 coder->size |= (size_t)(in[*in_pos]) << 20;
372                 ++*in_pos;
373
374                 // The real value is the stored value plus one.
375                 ++coder->size;
376
377                 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
378                 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
379                 ++coder->sequence;
380                 break;
381
382         case SEQ_REPEAT_SIZE:
383                 // Move the Repeat Count to the correct variable and parse
384                 // the Size of the Data to be repeated.
385                 coder->repeat.count = coder->size;
386                 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
387                 coder->repeat.pos = 0;
388                 ++*in_pos;
389                 coder->sequence = SEQ_REPEAT_READ_DATA;
390                 break;
391
392         case SEQ_REPEAT_READ_DATA: {
393                 // Fill coder->repeat.buffer[].
394                 const size_t in_avail = in_size - *in_pos;
395                 const size_t out_avail
396                                 = coder->repeat.size - coder->repeat.pos;
397                 const size_t copy_size = MIN(in_avail, out_avail);
398
399                 memcpy(coder->repeat.buffer + coder->repeat.pos,
400                                 in + *in_pos, copy_size);
401                 *in_pos += copy_size;
402                 coder->repeat.pos += copy_size;
403
404                 if (coder->repeat.pos == coder->repeat.size) {
405                         coder->repeat.pos = 0;
406
407                         if (coder->repeat.size == 1
408                                         && coder->subfilter.code == NULL)
409                                 coder->sequence = SEQ_REPEAT_FAST;
410                         else
411                                 coder->sequence = SEQ_REPEAT_NORMAL;
412                 }
413
414                 break;
415         }
416
417         case SEQ_DATA: {
418                 // Limit the amount of input to match the available
419                 // Subblock Data size.
420                 size_t in_limit;
421                 if (in_size - *in_pos > coder->size)
422                         in_limit = *in_pos + coder->size;
423                 else
424                         in_limit = in_size;
425
426                 if (coder->subfilter.code == NULL) {
427                         const size_t copy_size = bufcpy(
428                                         in, in_pos, in_limit,
429                                         out, out_pos, out_size);
430
431                         coder->size -= copy_size;
432
433                         if (update_uncompressed_size(coder, copy_size))
434                                 return LZMA_DATA_ERROR;
435
436                 } else {
437                         const size_t in_start = *in_pos;
438                         const lzma_ret ret = subfilter_decode(
439                                         coder, allocator,
440                                         in, in_pos, in_limit,
441                                         out, out_pos, out_size,
442                                         action);
443
444                         // Update the number of unprocessed bytes left in
445                         // this Subblock. This assert() is true because
446                         // in_limit prevents *in_pos getting too big.
447                         assert(*in_pos - in_start <= coder->size);
448                         coder->size -= *in_pos - in_start;
449
450                         if (ret == LZMA_STREAM_END) {
451                                 // End of Subfilter can occur only at
452                                 // a Subblock boundary.
453                                 if (coder->size != 0)
454                                         return LZMA_DATA_ERROR;
455
456                                 // We need a Subblock with Unset
457                                 // Subfilter before more data.
458                                 coder->sequence = SEQ_FILTER_END;
459                                 break;
460                         }
461
462                         if (ret != LZMA_OK)
463                                 return ret;
464                 }
465
466                 // If we couldn't process the whole Subblock Data yet, return.
467                 if (coder->size > 0)
468                         return LZMA_OK;
469
470                 // Check if we have decoded all the data.
471                 if (coder->uncompressed_size == 0
472                                 && coder->subfilter.code == NULL)
473                         return LZMA_STREAM_END;
474
475                 coder->sequence = SEQ_FLAGS;
476                 break;
477         }
478
479         case SEQ_REPEAT_FAST: {
480                 // Optimization for cases when there is only one byte to
481                 // repeat and no Subfilter.
482                 const size_t out_avail = out_size - *out_pos;
483                 const size_t copy_size = MIN(coder->repeat.count, out_avail);
484
485                 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
486
487                 *out_pos += copy_size;
488                 coder->repeat.count -= copy_size;
489
490                 if (update_uncompressed_size(coder, copy_size))
491                         return LZMA_DATA_ERROR;
492
493                 if (coder->repeat.count == 0) {
494                         assert(coder->subfilter.code == NULL);
495                         if (coder->uncompressed_size == 0)
496                                 return LZMA_STREAM_END;
497                 } else {
498                         return LZMA_OK;
499                 }
500
501                 coder->sequence = SEQ_FLAGS;
502                 break;
503         }
504
505         case SEQ_REPEAT_NORMAL:
506                 do {
507                         // Cycle the repeat buffer if needed.
508                         if (coder->repeat.pos == coder->repeat.size) {
509                                 if (--coder->repeat.count == 0) {
510                                         coder->sequence = SEQ_FLAGS;
511                                         break;
512                                 }
513
514                                 coder->repeat.pos = 0;
515                         }
516
517                         if (coder->subfilter.code == NULL) {
518                                 const size_t copy_size = bufcpy(
519                                                 coder->repeat.buffer,
520                                                 &coder->repeat.pos,
521                                                 coder->repeat.size,
522                                                 out, out_pos, out_size);
523
524                                 if (update_uncompressed_size(coder, copy_size))
525                                         return LZMA_DATA_ERROR;
526
527                         } else {
528                                 const lzma_ret ret = subfilter_decode(
529                                                 coder, allocator,
530                                                 coder->repeat.buffer,
531                                                 &coder->repeat.pos,
532                                                 coder->repeat.size,
533                                                 out, out_pos, out_size,
534                                                 action);
535
536                                 if (ret == LZMA_STREAM_END) {
537                                         // End of Subfilter can occur only at
538                                         // a Subblock boundary.
539                                         if (coder->repeat.pos
540                                                         != coder->repeat.size
541                                                         || --coder->repeat
542                                                                 .count != 0)
543                                                 return LZMA_DATA_ERROR;
544
545                                         // We need a Subblock with Unset
546                                         // Subfilter before more data.
547                                         coder->sequence = SEQ_FILTER_END;
548                                         break;
549
550                                 } else if (ret != LZMA_OK) {
551                                         return ret;
552                                 }
553                         }
554                 } while (*out_pos < out_size);
555
556                 // Check if we have decoded all the data.
557                 if (coder->uncompressed_size == 0
558                                 && coder->subfilter.code == NULL)
559                         return LZMA_STREAM_END;
560
561                 break;
562
563         default:
564                 return LZMA_PROG_ERROR;
565         }
566
567         return LZMA_OK;
568 }
569
570
571 static lzma_ret
572 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
573                 const uint8_t *restrict in, size_t *restrict in_pos,
574                 size_t in_size, uint8_t *restrict out,
575                 size_t *restrict out_pos, size_t out_size, lzma_action action)
576 {
577         if (coder->next.code == NULL)
578                 return decode_buffer(coder, allocator, in, in_pos, in_size,
579                                 out, out_pos, out_size, action);
580
581         while (*out_pos < out_size) {
582                 if (!coder->next_finished
583                                 && coder->temp.pos == coder->temp.size) {
584                         coder->temp.pos = 0;
585                         coder->temp.size = 0;
586
587                         const lzma_ret ret = coder->next.code(
588                                         coder->next.coder,
589                                         allocator, in, in_pos, in_size,
590                                         coder->temp.buffer, &coder->temp.size,
591                                         LZMA_BUFFER_SIZE, action);
592
593                         if (ret == LZMA_STREAM_END)
594                                 coder->next_finished = true;
595                         else if (coder->temp.size == 0 || ret != LZMA_OK)
596                                 return ret;
597                 }
598
599                 if (coder->this_finished) {
600                         if (coder->temp.pos != coder->temp.size)
601                                 return LZMA_DATA_ERROR;
602
603                         if (coder->next_finished)
604                                 return LZMA_STREAM_END;
605
606                         return LZMA_OK;
607                 }
608
609                 const lzma_ret ret = decode_buffer(coder, allocator,
610                                 coder->temp.buffer, &coder->temp.pos,
611                                 coder->temp.size,
612                                 out, out_pos, out_size, action);
613
614                 if (ret == LZMA_STREAM_END)
615                         // The next coder in the chain hasn't finished
616                         // yet. If the input data is valid, there
617                         // must be no more output coming, but the
618                         // next coder may still need a litle more
619                         // input to detect End of Payload Marker.
620                         coder->this_finished = true;
621                 else if (ret != LZMA_OK)
622                         return ret;
623                 else if (coder->next_finished && *out_pos < out_size)
624                         return LZMA_DATA_ERROR;
625         }
626
627         return LZMA_OK;
628 }
629
630
631 static void
632 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
633 {
634         lzma_next_coder_end(&coder->next, allocator);
635         lzma_next_coder_end(&coder->subfilter, allocator);
636         lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
637         lzma_free(coder->filter_flags.options, allocator);
638         lzma_free(coder, allocator);
639         return;
640 }
641
642
643 extern lzma_ret
644 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
645                 const lzma_filter_info *filters)
646 {
647         if (next->coder == NULL) {
648                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
649                 if (next->coder == NULL)
650                         return LZMA_MEM_ERROR;
651
652                 next->code = &subblock_decode;
653                 next->end = &subblock_decoder_end;
654
655                 next->coder->next = LZMA_NEXT_CODER_INIT;
656                 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
657                 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
658
659         } else {
660                 lzma_next_coder_end(&next->coder->subfilter, allocator);
661                 lzma_free(next->coder->filter_flags.options, allocator);
662         }
663
664         next->coder->filter_flags.options = NULL;
665
666         next->coder->sequence = SEQ_FLAGS;
667         next->coder->uncompressed_size = filters[0].uncompressed_size;
668         next->coder->padding = 0;
669         next->coder->next_finished = false;
670         next->coder->this_finished = false;
671         next->coder->temp.pos = 0;
672         next->coder->temp.size = 0;
673
674         if (filters[0].options != NULL)
675                 next->coder->allow_subfilters = ((lzma_options_subblock *)(
676                                 filters[0].options))->allow_subfilters;
677         else
678                 next->coder->allow_subfilters = false;
679
680         return lzma_next_filter_init(
681                         &next->coder->next, allocator, filters + 1);
682 }