]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/subblock/subblock_decoder.c
Take advantage of return_if_error() macro in more places.
[icculus/xz.git] / src / liblzma / subblock / subblock_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       subblock_decoder.c
4 /// \brief      Decoder of the Subblock filter
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "raw_decoder.h"
23
24
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
27
28
29 struct lzma_coder_s {
30         lzma_next_coder next;
31
32         enum {
33                 SEQ_FLAGS,
34                 SEQ_SIZE_1,
35                 SEQ_SIZE_2,
36                 SEQ_SIZE_3,
37                 SEQ_DATA,
38                 SEQ_REPEAT_COUNT_1,
39                 SEQ_REPEAT_COUNT_2,
40                 SEQ_REPEAT_COUNT_3,
41                 SEQ_REPEAT_SIZE,
42                 SEQ_REPEAT_READ_DATA,
43                 SEQ_REPEAT_FAST,
44                 SEQ_REPEAT_NORMAL,
45                 SEQ_FILTER_FLAGS,
46                 SEQ_FILTER_END,
47         } sequence;
48
49         /// Number of bytes left in the current Subblock Data field.
50         size_t size;
51
52         /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown.
53         lzma_vli uncompressed_size;
54
55         /// Number of consecutive Subblocks with Subblock Type Padding
56         uint32_t padding;
57
58         /// True when .next.code() has returned LZMA_STREAM_END.
59         bool next_finished;
60
61         /// True when the Subblock decoder has detected End of Payload Marker.
62         /// This may become true before next_finished becomes true.
63         bool this_finished;
64
65         /// True if Subfilters are allowed.
66         bool allow_subfilters;
67
68         /// Indicates if at least one byte of decoded output has been
69         /// produced after enabling Subfilter.
70         bool got_output_with_subfilter;
71
72         /// Possible subfilter
73         lzma_next_coder subfilter;
74
75         /// Filter Flags decoder is needed to parse the ID and Properties
76         /// of the subfilter.
77         lzma_next_coder filter_flags_decoder;
78
79         /// The filter_flags_decoder stores its results here.
80         lzma_options_filter filter_flags;
81
82         /// Options for the Subblock decoder helper. This is used to tell
83         /// the helper when it should return LZMA_STREAM_END to the subfilter.
84         lzma_options_subblock_helper helper;
85
86         struct {
87                 /// How many times buffer should be repeated
88                 size_t count;
89
90                 /// Size of the buffer
91                 size_t size;
92
93                 /// Position in the buffer
94                 size_t pos;
95
96                 /// Buffer to hold the data to be repeated
97                 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
98         } repeat;
99
100         /// Temporary buffer needed when the Subblock filter is not the last
101         /// filter in the chain. The output of the next filter is first
102         /// decoded into buffer[], which is then used as input for the actual
103         /// Subblock decoder.
104         struct {
105                 size_t pos;
106                 size_t size;
107                 uint8_t buffer[LZMA_BUFFER_SIZE];
108         } temp;
109 };
110
111
112 /// Values of valid Subblock Flags
113 enum {
114         FLAG_PADDING,
115         FLAG_EOPM,
116         FLAG_DATA,
117         FLAG_REPEAT,
118         FLAG_SET_SUBFILTER,
119         FLAG_END_SUBFILTER,
120 };
121
122
123 /// Substracts size from coder->uncompressed_size uncompressed size is known
124 /// and size isn't bigger than coder->uncompressed_size.
125 static inline bool
126 update_uncompressed_size(lzma_coder *coder, size_t size)
127 {
128         if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
129                 if ((lzma_vli)(size) > coder->uncompressed_size)
130                         return true;
131
132                 coder->uncompressed_size -= size;
133         }
134
135         return false;
136 }
137
138
139 /// Calls the subfilter and updates coder->uncompressed_size.
140 static lzma_ret
141 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
142                 const uint8_t *in, size_t *in_pos,
143                 size_t in_size, uint8_t *restrict out,
144                 size_t *restrict out_pos, size_t out_size, lzma_action action)
145 {
146         assert(coder->subfilter.code != NULL);
147
148         const size_t out_start = *out_pos;
149
150         // Call the subfilter.
151         const lzma_ret ret = coder->subfilter.code(
152                         coder->subfilter.coder, allocator,
153                         in, in_pos, in_size, out, out_pos, out_size, action);
154
155         // Update uncompressed_size.
156         if (update_uncompressed_size(coder, *out_pos - out_start))
157                 return LZMA_DATA_ERROR;
158
159         return ret;
160 }
161
162
163 static lzma_ret
164 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
165                 const uint8_t *in, size_t *in_pos,
166                 size_t in_size, uint8_t *restrict out,
167                 size_t *restrict out_pos, size_t out_size, lzma_action action)
168 {
169         while (*out_pos < out_size && (*in_pos < in_size
170                         || coder->sequence == SEQ_DATA))
171         switch (coder->sequence) {
172         case SEQ_FLAGS: {
173                 if ((in[*in_pos] >> 4) != FLAG_PADDING)
174                         coder->padding = 0;
175
176                 // Do the correct action depending on the Subblock Type.
177                 switch (in[*in_pos] >> 4) {
178                 case FLAG_PADDING:
179                         // Only check that reserved bits are zero.
180                         if (++coder->padding > PADDING_MAX
181                                         || in[*in_pos] & 0x0F)
182                                 return LZMA_DATA_ERROR;
183                         ++*in_pos;
184                         break;
185
186                 case FLAG_EOPM:
187                         // Check that reserved bits are zero.
188                         if (in[*in_pos] & 0x0F)
189                                 return LZMA_DATA_ERROR;
190
191                         // There must be no Subfilter enabled.
192                         if (coder->subfilter.code != NULL)
193                                 return LZMA_DATA_ERROR;
194
195                         // End of Payload Marker must not be used if
196                         // uncompressed size is known.
197                         if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN)
198                                 return LZMA_DATA_ERROR;
199
200                         ++*in_pos;
201                         return LZMA_STREAM_END;
202
203                 case FLAG_DATA:
204                         // First four bits of the Subblock Data size.
205                         coder->size = in[*in_pos] & 0x0F;
206                         ++*in_pos;
207                         coder->got_output_with_subfilter = true;
208                         coder->sequence = SEQ_SIZE_1;
209                         break;
210
211                 case FLAG_REPEAT:
212                         // First four bits of the Repeat Count. We use
213                         // coder->size as a temporary place for it.
214                         coder->size = in[*in_pos] & 0x0F;
215                         ++*in_pos;
216                         coder->got_output_with_subfilter = true;
217                         coder->sequence = SEQ_REPEAT_COUNT_1;
218                         break;
219
220                 case FLAG_SET_SUBFILTER: {
221                         if ((in[*in_pos] & 0x0F)
222                                         || coder->subfilter.code != NULL
223                                         || !coder->allow_subfilters)
224                                 return LZMA_DATA_ERROR;
225
226                         assert(coder->filter_flags.options == NULL);
227                         return_if_error(lzma_filter_flags_decoder_init(
228                                         &coder->filter_flags_decoder,
229                                         allocator, &coder->filter_flags));
230
231                         coder->got_output_with_subfilter = false;
232
233                         ++*in_pos;
234                         coder->sequence = SEQ_FILTER_FLAGS;
235                         break;
236                 }
237
238                 case FLAG_END_SUBFILTER:
239                         if (coder->subfilter.code == NULL
240                                         || !coder->got_output_with_subfilter)
241                                 return LZMA_DATA_ERROR;
242
243                         // Tell the helper filter to indicate End of Input
244                         // to our subfilter.
245                         coder->helper.end_was_reached = true;
246
247                         size_t dummy = 0;
248                         const lzma_ret ret = subfilter_decode(coder, allocator,
249                                         NULL, &dummy, 0, out, out_pos,out_size,
250                                         action);
251
252                         // If we didn't reach the end of the subfilter's output
253                         // yet, return to the application. On the next call we
254                         // will get to this same switch-case again, because we
255                         // haven't updated *in_pos yet.
256                         if (ret != LZMA_STREAM_END)
257                                 return ret;
258
259                         // Free Subfilter's memory. This is a bit debatable,
260                         // since we could avoid some malloc()/free() calls
261                         // if the same Subfilter gets used soon again. But
262                         // if Subfilter isn't used again, we could leave
263                         // a memory-hogging filter dangling until someone
264                         // frees Subblock filter itself.
265                         lzma_next_coder_end(&coder->subfilter, allocator);
266
267                         // Free memory used for subfilter options. This is
268                         // safe, because we don't support any Subfilter that
269                         // would allow pointers in the options structure.
270                         lzma_free(coder->filter_flags.options, allocator);
271                         coder->filter_flags.options = NULL;
272
273                         ++*in_pos;
274
275                         if (coder->uncompressed_size == 0)
276                                 return LZMA_STREAM_END;
277
278                         break;
279
280                 default:
281                         return LZMA_DATA_ERROR;
282                 }
283
284                 break;
285         }
286
287         case SEQ_SIZE_1:
288         case SEQ_REPEAT_COUNT_1:
289                 // We use the same code to parse
290                 //  - the Size (28 bits) in Subblocks of type Data; and
291                 //  - the Repeat count (28 bits) in Subblocks of type
292                 //    Repeating Data.
293                 coder->size |= (size_t)(in[*in_pos]) << 4;
294                 ++*in_pos;
295                 ++coder->sequence;
296                 break;
297
298         case SEQ_SIZE_2:
299         case SEQ_REPEAT_COUNT_2:
300                 coder->size |= (size_t)(in[*in_pos]) << 12;
301                 ++*in_pos;
302                 ++coder->sequence;
303                 break;
304
305         case SEQ_SIZE_3:
306         case SEQ_REPEAT_COUNT_3:
307                 coder->size |= (size_t)(in[*in_pos]) << 20;
308
309                 // The real value is the stored value plus one.
310                 ++coder->size;
311
312                 ++*in_pos;
313                 ++coder->sequence;
314                 break;
315
316         case SEQ_REPEAT_SIZE:
317                 // Move the Repeat Count to the correct variable and parse
318                 // the Size of the Data to be repeated.
319                 coder->repeat.count = coder->size;
320                 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
321                 coder->repeat.pos = 0;
322                 ++*in_pos;
323                 coder->sequence = SEQ_REPEAT_READ_DATA;
324                 break;
325
326         case SEQ_REPEAT_READ_DATA: {
327                 // Fill coder->repeat.buffer[].
328                 const size_t in_avail = in_size - *in_pos;
329                 const size_t out_avail
330                                 = coder->repeat.size - coder->repeat.pos;
331                 const size_t copy_size = MIN(in_avail, out_avail);
332
333                 memcpy(coder->repeat.buffer + coder->repeat.pos,
334                                 in + *in_pos, copy_size);
335                 *in_pos += copy_size;
336                 coder->repeat.pos += copy_size;
337
338                 if (coder->repeat.pos == coder->repeat.size) {
339                         coder->repeat.pos = 0;
340
341                         if (coder->repeat.size == 1
342                                         && coder->subfilter.code == NULL)
343                                 coder->sequence = SEQ_REPEAT_FAST;
344                         else
345                                 coder->sequence = SEQ_REPEAT_NORMAL;
346                 }
347
348                 break;
349         }
350
351         case SEQ_REPEAT_FAST: {
352                 // Optimization for cases when there is only one byte to
353                 // repeat and no Subfilter.
354                 const size_t out_avail = out_size - *out_pos;
355                 const size_t copy_size = MIN(coder->repeat.count, out_avail);
356
357                 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
358
359                 *out_pos += copy_size;
360                 coder->repeat.count -= copy_size;
361
362                 if (update_uncompressed_size(coder, copy_size))
363                         return LZMA_DATA_ERROR;
364
365                 if (coder->repeat.count == 0) {
366                         if (coder->uncompressed_size == 0)
367                                 return LZMA_STREAM_END;
368                 } else {
369                         return LZMA_OK;
370                 }
371
372                 coder->sequence = SEQ_FLAGS;
373                 break;
374         }
375
376         case SEQ_REPEAT_NORMAL:
377                 do {
378                         // Cycle the repeat buffer if needed.
379                         if (coder->repeat.pos == coder->repeat.size) {
380                                 if (--coder->repeat.count == 0) {
381                                         coder->sequence = SEQ_FLAGS;
382                                         break;
383                                 }
384
385                                 coder->repeat.pos = 0;
386                         }
387
388                         if (coder->subfilter.code == NULL) {
389                                 const size_t copy_size = bufcpy(
390                                                 coder->repeat.buffer,
391                                                 &coder->repeat.pos,
392                                                 coder->repeat.size,
393                                                 out, out_pos, out_size);
394
395                                 if (update_uncompressed_size(coder, copy_size))
396                                         return LZMA_DATA_ERROR;
397
398                         } else {
399                                 const lzma_ret ret = subfilter_decode(
400                                                 coder, allocator,
401                                                 coder->repeat.buffer,
402                                                 &coder->repeat.pos,
403                                                 coder->repeat.size,
404                                                 out, out_pos, out_size,
405                                                 action);
406
407                                 if (ret == LZMA_STREAM_END) {
408                                         // End of Subfilter can occur only at
409                                         // a Subblock boundary.
410                                         if (coder->repeat.pos
411                                                         != coder->repeat.size
412                                                         || --coder->repeat
413                                                                 .count != 0)
414                                                 return LZMA_DATA_ERROR;
415
416                                         // We need a Subblock with Unset
417                                         // Subfilter before more data.
418                                         coder->sequence = SEQ_FILTER_END;
419                                         break;
420
421                                 } else if (ret != LZMA_OK) {
422                                         return ret;
423                                 }
424                         }
425                 } while (*out_pos < out_size);
426
427                 break;
428
429         case SEQ_DATA: {
430                 // Limit the amount of input to match the available
431                 // Subblock Data size.
432                 size_t in_limit;
433                 if (in_size - *in_pos > coder->size)
434                         in_limit = *in_pos + coder->size;
435                 else
436                         in_limit = in_size;
437
438                 if (coder->subfilter.code == NULL) {
439                         const size_t copy_size = bufcpy(
440                                         in, in_pos, in_limit,
441                                         out, out_pos, out_size);
442
443                         coder->size -= copy_size;
444
445                         if (update_uncompressed_size(coder, copy_size))
446                                 return LZMA_DATA_ERROR;
447
448                 } else {
449                         const size_t in_start = *in_pos;
450                         const lzma_ret ret = subfilter_decode(
451                                         coder, allocator,
452                                         in, in_pos, in_limit,
453                                         out, out_pos, out_size,
454                                         action);
455
456                         // Update the number of unprocessed bytes left in
457                         // this Subblock. This assert() is true because
458                         // in_limit prevents *in_pos getting too big.
459                         assert(*in_pos - in_start <= coder->size);
460                         coder->size -= *in_pos - in_start;
461
462                         if (ret == LZMA_STREAM_END) {
463                                 // End of Subfilter can occur only at
464                                 // a Subblock boundary.
465                                 if (coder->size != 0)
466                                         return LZMA_DATA_ERROR;
467
468                                 // We need a Subblock with Unset
469                                 // Subfilter before more data.
470                                 coder->sequence = SEQ_FILTER_END;
471                                 break;
472                         }
473
474                         if (ret != LZMA_OK)
475                                 return ret;
476                 }
477
478                 // If we couldn't process the whole Subblock Data yet, return.
479                 if (coder->size > 0)
480                         return LZMA_OK;
481
482                 // Check if we have decoded all the data.
483                 if (coder->uncompressed_size == 0
484                                 && coder->subfilter.code == NULL)
485                         return LZMA_STREAM_END;
486
487                 coder->sequence = SEQ_FLAGS;
488                 break;
489         }
490
491         case SEQ_FILTER_FLAGS: {
492                 const lzma_ret ret = coder->filter_flags_decoder.code(
493                                 coder->filter_flags_decoder.coder, allocator,
494                                 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
495                 if (ret != LZMA_STREAM_END)
496                         return ret == LZMA_HEADER_ERROR
497                                         ? LZMA_DATA_ERROR : ret;
498
499                 // Don't free the filter_flags_decoder. It doesn't take much
500                 // memory and we may need it again.
501
502                 // Initialize the Subfilter. Subblock and Copy filters are
503                 // not allowed.
504                 if (coder->filter_flags.id == LZMA_FILTER_COPY
505                                 || coder->filter_flags.id
506                                         == LZMA_FILTER_SUBBLOCK)
507                         return LZMA_DATA_ERROR;
508
509                 coder->helper.end_was_reached = false;
510
511                 lzma_options_filter filters[3] = {
512                         {
513                                 .id = coder->filter_flags.id,
514                                 .options = coder->filter_flags.options,
515                         }, {
516                                 .id = LZMA_FILTER_SUBBLOCK_HELPER,
517                                 .options = &coder->helper,
518                         }, {
519                                 .id = LZMA_VLI_VALUE_UNKNOWN,
520                                 .options = NULL,
521                         }
522                 };
523
524                 // Optimization: We know that LZMA uses End of Payload Marker
525                 // (not End of Input), so we can omit the helper filter.
526                 if (filters[0].id == LZMA_FILTER_LZMA)
527                         filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
528
529                 return_if_error(lzma_raw_decoder_init(
530                                 &coder->subfilter, allocator,
531                                 filters, LZMA_VLI_VALUE_UNKNOWN, false));
532
533                 coder->sequence = SEQ_FLAGS;
534                 break;
535         }
536
537         case SEQ_FILTER_END:
538                 // We are in the beginning of a Subblock. The next Subblock
539                 // whose type is not Padding, must indicate end of Subfilter.
540                 if (in[*in_pos] == (FLAG_PADDING << 4)) {
541                         ++*in_pos;
542                         break;
543                 }
544
545                 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
546                         return LZMA_DATA_ERROR;
547
548                 coder->sequence = SEQ_FLAGS;
549                 break;
550
551         default:
552                 return LZMA_PROG_ERROR;
553         }
554
555         return LZMA_OK;
556 }
557
558
559 static lzma_ret
560 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
561                 const uint8_t *restrict in, size_t *restrict in_pos,
562                 size_t in_size, uint8_t *restrict out,
563                 size_t *restrict out_pos, size_t out_size, lzma_action action)
564 {
565         if (coder->next.code == NULL)
566                 return decode_buffer(coder, allocator, in, in_pos, in_size,
567                                 out, out_pos, out_size, action);
568
569         while (*out_pos < out_size) {
570                 if (!coder->next_finished
571                                 && coder->temp.pos == coder->temp.size) {
572                         coder->temp.pos = 0;
573                         coder->temp.size = 0;
574
575                         const lzma_ret ret = coder->next.code(
576                                         coder->next.coder,
577                                         allocator, in, in_pos, in_size,
578                                         coder->temp.buffer, &coder->temp.size,
579                                         LZMA_BUFFER_SIZE, action);
580
581                         if (ret == LZMA_STREAM_END)
582                                 coder->next_finished = true;
583                         else if (coder->temp.size == 0 || ret != LZMA_OK)
584                                 return ret;
585                 }
586
587                 if (coder->this_finished) {
588                         if (coder->temp.pos != coder->temp.size)
589                                 return LZMA_DATA_ERROR;
590
591                         if (coder->next_finished)
592                                 return LZMA_STREAM_END;
593
594                         return LZMA_OK;
595                 }
596
597                 const lzma_ret ret = decode_buffer(coder, allocator,
598                                 coder->temp.buffer, &coder->temp.pos,
599                                 coder->temp.size,
600                                 out, out_pos, out_size, action);
601
602                 if (ret == LZMA_STREAM_END)
603                         // The next coder in the chain hasn't finished
604                         // yet. If the input data is valid, there
605                         // must be no more output coming, but the
606                         // next coder may still need a litle more
607                         // input to detect End of Payload Marker.
608                         coder->this_finished = true;
609                 else if (ret != LZMA_OK)
610                         return ret;
611                 else if (coder->next_finished && *out_pos < out_size)
612                         return LZMA_DATA_ERROR;
613         }
614
615         return LZMA_OK;
616 }
617
618
619 static void
620 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
621 {
622         lzma_next_coder_end(&coder->next, allocator);
623         lzma_next_coder_end(&coder->subfilter, allocator);
624         lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
625         lzma_free(coder->filter_flags.options, allocator);
626         lzma_free(coder, allocator);
627         return;
628 }
629
630
631 extern lzma_ret
632 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
633                 const lzma_filter_info *filters)
634 {
635         if (next->coder == NULL) {
636                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
637                 if (next->coder == NULL)
638                         return LZMA_MEM_ERROR;
639
640                 next->code = &subblock_decode;
641                 next->end = &subblock_decoder_end;
642
643                 next->coder->next = LZMA_NEXT_CODER_INIT;
644                 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
645                 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
646
647         } else {
648                 lzma_next_coder_end(&next->coder->subfilter, allocator);
649                 lzma_free(next->coder->filter_flags.options, allocator);
650         }
651
652         next->coder->filter_flags.options = NULL;
653
654         next->coder->sequence = SEQ_FLAGS;
655         next->coder->uncompressed_size = filters[0].uncompressed_size;
656         next->coder->padding = 0;
657         next->coder->next_finished = false;
658         next->coder->this_finished = false;
659         next->coder->temp.pos = 0;
660         next->coder->temp.size = 0;
661
662         if (filters[0].options != NULL)
663                 next->coder->allow_subfilters = ((lzma_options_subblock *)(
664                                 filters[0].options))->allow_subfilters;
665         else
666                 next->coder->allow_subfilters = false;
667
668         return lzma_next_filter_init(
669                         &next->coder->next, allocator, filters + 1);
670 }