]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/subblock/subblock_decoder.c
Renamed constants:
[icculus/xz.git] / src / liblzma / subblock / subblock_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       subblock_decoder.c
4 /// \brief      Decoder of the Subblock filter
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "filter_decoder.h"
23
24
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
27
28
29 struct lzma_coder_s {
30         lzma_next_coder next;
31
32         enum {
33                 // These require that there is at least one input
34                 // byte available.
35                 SEQ_FLAGS,
36                 SEQ_FILTER_FLAGS,
37                 SEQ_FILTER_END,
38                 SEQ_REPEAT_COUNT_1,
39                 SEQ_REPEAT_COUNT_2,
40                 SEQ_REPEAT_COUNT_3,
41                 SEQ_REPEAT_SIZE,
42                 SEQ_REPEAT_READ_DATA,
43                 SEQ_SIZE_1,
44                 SEQ_SIZE_2,
45                 SEQ_SIZE_3, // This must be right before SEQ_DATA.
46
47                 // These don't require any input to be available.
48                 SEQ_DATA,
49                 SEQ_REPEAT_FAST,
50                 SEQ_REPEAT_NORMAL,
51         } sequence;
52
53         /// Number of bytes left in the current Subblock Data field.
54         size_t size;
55
56         /// Number of consecutive Subblocks with Subblock Type Padding
57         uint32_t padding;
58
59         /// True when .next.code() has returned LZMA_STREAM_END.
60         bool next_finished;
61
62         /// True when the Subblock decoder has detected End of Payload Marker.
63         /// This may become true before next_finished becomes true.
64         bool this_finished;
65
66         /// True if Subfilters are allowed.
67         bool allow_subfilters;
68
69         /// Indicates if at least one byte of decoded output has been
70         /// produced after enabling Subfilter.
71         bool got_output_with_subfilter;
72
73         /// Possible subfilter
74         lzma_next_coder subfilter;
75
76         /// Filter Flags decoder is needed to parse the ID and Properties
77         /// of the subfilter.
78         lzma_next_coder filter_flags_decoder;
79
80         /// The filter_flags_decoder stores its results here.
81         lzma_filter filter_flags;
82
83         /// Options for the Subblock decoder helper. This is used to tell
84         /// the helper when it should return LZMA_STREAM_END to the subfilter.
85         lzma_options_subblock_helper helper;
86
87         struct {
88                 /// How many times buffer should be repeated
89                 size_t count;
90
91                 /// Size of the buffer
92                 size_t size;
93
94                 /// Position in the buffer
95                 size_t pos;
96
97                 /// Buffer to hold the data to be repeated
98                 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
99         } repeat;
100
101         /// Temporary buffer needed when the Subblock filter is not the last
102         /// filter in the chain. The output of the next filter is first
103         /// decoded into buffer[], which is then used as input for the actual
104         /// Subblock decoder.
105         struct {
106                 size_t pos;
107                 size_t size;
108                 uint8_t buffer[LZMA_BUFFER_SIZE];
109         } temp;
110 };
111
112
113 /// Values of valid Subblock Flags
114 enum {
115         FLAG_PADDING,
116         FLAG_EOPM,
117         FLAG_DATA,
118         FLAG_REPEAT,
119         FLAG_SET_SUBFILTER,
120         FLAG_END_SUBFILTER,
121 };
122
123
124 /// Calls the subfilter and updates coder->uncompressed_size.
125 static lzma_ret
126 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
127                 const uint8_t *in, size_t *in_pos,
128                 size_t in_size, uint8_t *restrict out,
129                 size_t *restrict out_pos, size_t out_size, lzma_action action)
130 {
131         assert(coder->subfilter.code != NULL);
132
133         // Call the subfilter.
134         const lzma_ret ret = coder->subfilter.code(
135                         coder->subfilter.coder, allocator,
136                         in, in_pos, in_size, out, out_pos, out_size, action);
137
138         return ret;
139 }
140
141
142 static lzma_ret
143 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
144                 const uint8_t *in, size_t *in_pos,
145                 size_t in_size, uint8_t *restrict out,
146                 size_t *restrict out_pos, size_t out_size, lzma_action action)
147 {
148         while (*out_pos < out_size && (*in_pos < in_size
149                         || coder->sequence >= SEQ_DATA))
150         switch (coder->sequence) {
151         case SEQ_FLAGS: {
152                 // Do the correct action depending on the Subblock Type.
153                 switch (in[*in_pos] >> 4) {
154                 case FLAG_PADDING:
155                         // Only check that reserved bits are zero.
156                         if (++coder->padding > PADDING_MAX
157                                         || in[*in_pos] & 0x0F)
158                                 return LZMA_DATA_ERROR;
159                         ++*in_pos;
160                         break;
161
162                 case FLAG_EOPM:
163                         // There must be no Padding before EOPM.
164                         if (coder->padding != 0)
165                                 return LZMA_DATA_ERROR;
166
167                         // Check that reserved bits are zero.
168                         if (in[*in_pos] & 0x0F)
169                                 return LZMA_DATA_ERROR;
170
171                         // There must be no Subfilter enabled.
172                         if (coder->subfilter.code != NULL)
173                                 return LZMA_DATA_ERROR;
174
175                         ++*in_pos;
176                         return LZMA_STREAM_END;
177
178                 case FLAG_DATA:
179                         // First four bits of the Subblock Data size.
180                         coder->size = in[*in_pos] & 0x0F;
181                         ++*in_pos;
182                         coder->got_output_with_subfilter = true;
183                         coder->sequence = SEQ_SIZE_1;
184                         break;
185
186                 case FLAG_REPEAT:
187                         // First four bits of the Repeat Count. We use
188                         // coder->size as a temporary place for it.
189                         coder->size = in[*in_pos] & 0x0F;
190                         ++*in_pos;
191                         coder->got_output_with_subfilter = true;
192                         coder->sequence = SEQ_REPEAT_COUNT_1;
193                         break;
194
195                 case FLAG_SET_SUBFILTER: {
196                         if (coder->padding != 0 || (in[*in_pos] & 0x0F)
197                                         || coder->subfilter.code != NULL
198                                         || !coder->allow_subfilters)
199                                 return LZMA_DATA_ERROR;
200
201                         assert(coder->filter_flags.options == NULL);
202                         abort();
203 //                      return_if_error(lzma_filter_flags_decoder_init(
204 //                                      &coder->filter_flags_decoder,
205 //                                      allocator, &coder->filter_flags));
206
207                         coder->got_output_with_subfilter = false;
208
209                         ++*in_pos;
210                         coder->sequence = SEQ_FILTER_FLAGS;
211                         break;
212                 }
213
214                 case FLAG_END_SUBFILTER:
215                         if (coder->padding != 0 || (in[*in_pos] & 0x0F)
216                                         || coder->subfilter.code == NULL
217                                         || !coder->got_output_with_subfilter)
218                                 return LZMA_DATA_ERROR;
219
220                         // Tell the helper filter to indicate End of Input
221                         // to our subfilter.
222                         coder->helper.end_was_reached = true;
223
224                         size_t dummy = 0;
225                         const lzma_ret ret = subfilter_decode(coder, allocator,
226                                         NULL, &dummy, 0, out, out_pos,out_size,
227                                         action);
228
229                         // If we didn't reach the end of the subfilter's output
230                         // yet, return to the application. On the next call we
231                         // will get to this same switch-case again, because we
232                         // haven't updated *in_pos yet.
233                         if (ret != LZMA_STREAM_END)
234                                 return ret;
235
236                         // Free Subfilter's memory. This is a bit debatable,
237                         // since we could avoid some malloc()/free() calls
238                         // if the same Subfilter gets used soon again. But
239                         // if Subfilter isn't used again, we could leave
240                         // a memory-hogging filter dangling until someone
241                         // frees Subblock filter itself.
242                         lzma_next_end(&coder->subfilter, allocator);
243
244                         // Free memory used for subfilter options. This is
245                         // safe, because we don't support any Subfilter that
246                         // would allow pointers in the options structure.
247                         lzma_free(coder->filter_flags.options, allocator);
248                         coder->filter_flags.options = NULL;
249
250                         ++*in_pos;
251
252                         break;
253
254                 default:
255                         return LZMA_DATA_ERROR;
256                 }
257
258                 break;
259         }
260
261         case SEQ_FILTER_FLAGS: {
262                 const lzma_ret ret = coder->filter_flags_decoder.code(
263                                 coder->filter_flags_decoder.coder, allocator,
264                                 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
265                 if (ret != LZMA_STREAM_END)
266                         return ret == LZMA_OPTIONS_ERROR
267                                         ? LZMA_DATA_ERROR : ret;
268
269                 // Don't free the filter_flags_decoder. It doesn't take much
270                 // memory and we may need it again.
271
272                 // Initialize the Subfilter. Subblock and Copy filters are
273                 // not allowed.
274                 if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
275                         return LZMA_DATA_ERROR;
276
277                 coder->helper.end_was_reached = false;
278
279                 lzma_filter filters[3] = {
280                         {
281                                 .id = coder->filter_flags.id,
282                                 .options = coder->filter_flags.options,
283                         }, {
284                                 .id = LZMA_FILTER_SUBBLOCK_HELPER,
285                                 .options = &coder->helper,
286                         }, {
287                                 .id = LZMA_VLI_UNKNOWN,
288                                 .options = NULL,
289                         }
290                 };
291
292                 // Optimization: We know that LZMA uses End of Payload Marker
293                 // (not End of Input), so we can omit the helper filter.
294                 if (filters[0].id == LZMA_FILTER_LZMA)
295                         filters[1].id = LZMA_VLI_UNKNOWN;
296
297                 return_if_error(lzma_raw_decoder_init(
298                                 &coder->subfilter, allocator, filters));
299
300                 coder->sequence = SEQ_FLAGS;
301                 break;
302         }
303
304         case SEQ_FILTER_END:
305                 // We are in the beginning of a Subblock. The next Subblock
306                 // whose type is not Padding, must indicate end of Subfilter.
307                 if (in[*in_pos] == (FLAG_PADDING << 4)) {
308                         ++*in_pos;
309                         break;
310                 }
311
312                 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
313                         return LZMA_DATA_ERROR;
314
315                 coder->sequence = SEQ_FLAGS;
316                 break;
317
318         case SEQ_REPEAT_COUNT_1:
319         case SEQ_SIZE_1:
320                 // We use the same code to parse
321                 //  - the Size (28 bits) in Subblocks of type Data; and
322                 //  - the Repeat count (28 bits) in Subblocks of type
323                 //    Repeating Data.
324                 coder->size |= (size_t)(in[*in_pos]) << 4;
325                 ++*in_pos;
326                 ++coder->sequence;
327                 break;
328
329         case SEQ_REPEAT_COUNT_2:
330         case SEQ_SIZE_2:
331                 coder->size |= (size_t)(in[*in_pos]) << 12;
332                 ++*in_pos;
333                 ++coder->sequence;
334                 break;
335
336         case SEQ_REPEAT_COUNT_3:
337         case SEQ_SIZE_3:
338                 coder->size |= (size_t)(in[*in_pos]) << 20;
339                 ++*in_pos;
340
341                 // The real value is the stored value plus one.
342                 ++coder->size;
343
344                 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
345                 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
346                 ++coder->sequence;
347                 break;
348
349         case SEQ_REPEAT_SIZE:
350                 // Move the Repeat Count to the correct variable and parse
351                 // the Size of the Data to be repeated.
352                 coder->repeat.count = coder->size;
353                 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
354                 coder->repeat.pos = 0;
355
356                 // The size of the Data field must be bigger than the number
357                 // of Padding bytes before this Subblock.
358                 if (coder->repeat.size <= coder->padding)
359                         return LZMA_DATA_ERROR;
360
361                 ++*in_pos;
362                 coder->padding = 0;
363                 coder->sequence = SEQ_REPEAT_READ_DATA;
364                 break;
365
366         case SEQ_REPEAT_READ_DATA: {
367                 // Fill coder->repeat.buffer[].
368                 const size_t in_avail = in_size - *in_pos;
369                 const size_t out_avail
370                                 = coder->repeat.size - coder->repeat.pos;
371                 const size_t copy_size = MIN(in_avail, out_avail);
372
373                 memcpy(coder->repeat.buffer + coder->repeat.pos,
374                                 in + *in_pos, copy_size);
375                 *in_pos += copy_size;
376                 coder->repeat.pos += copy_size;
377
378                 if (coder->repeat.pos == coder->repeat.size) {
379                         coder->repeat.pos = 0;
380
381                         if (coder->repeat.size == 1
382                                         && coder->subfilter.code == NULL)
383                                 coder->sequence = SEQ_REPEAT_FAST;
384                         else
385                                 coder->sequence = SEQ_REPEAT_NORMAL;
386                 }
387
388                 break;
389         }
390
391         case SEQ_DATA: {
392                 // The size of the Data field must be bigger than the number
393                 // of Padding bytes before this Subblock.
394                 assert(coder->size > 0);
395                 if (coder->size <= coder->padding)
396                         return LZMA_DATA_ERROR;
397
398                 coder->padding = 0;
399
400                 // Limit the amount of input to match the available
401                 // Subblock Data size.
402                 size_t in_limit;
403                 if (in_size - *in_pos > coder->size)
404                         in_limit = *in_pos + coder->size;
405                 else
406                         in_limit = in_size;
407
408                 if (coder->subfilter.code == NULL) {
409                         const size_t copy_size = lzma_bufcpy(
410                                         in, in_pos, in_limit,
411                                         out, out_pos, out_size);
412
413                         coder->size -= copy_size;
414                 } else {
415                         const size_t in_start = *in_pos;
416                         const lzma_ret ret = subfilter_decode(
417                                         coder, allocator,
418                                         in, in_pos, in_limit,
419                                         out, out_pos, out_size,
420                                         action);
421
422                         // Update the number of unprocessed bytes left in
423                         // this Subblock. This assert() is true because
424                         // in_limit prevents *in_pos getting too big.
425                         assert(*in_pos - in_start <= coder->size);
426                         coder->size -= *in_pos - in_start;
427
428                         if (ret == LZMA_STREAM_END) {
429                                 // End of Subfilter can occur only at
430                                 // a Subblock boundary.
431                                 if (coder->size != 0)
432                                         return LZMA_DATA_ERROR;
433
434                                 // We need a Subblock with Unset
435                                 // Subfilter before more data.
436                                 coder->sequence = SEQ_FILTER_END;
437                                 break;
438                         }
439
440                         if (ret != LZMA_OK)
441                                 return ret;
442                 }
443
444                 // If we couldn't process the whole Subblock Data yet, return.
445                 if (coder->size > 0)
446                         return LZMA_OK;
447
448                 coder->sequence = SEQ_FLAGS;
449                 break;
450         }
451
452         case SEQ_REPEAT_FAST: {
453                 // Optimization for cases when there is only one byte to
454                 // repeat and no Subfilter.
455                 const size_t out_avail = out_size - *out_pos;
456                 const size_t copy_size = MIN(coder->repeat.count, out_avail);
457
458                 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
459
460                 *out_pos += copy_size;
461                 coder->repeat.count -= copy_size;
462
463                 if (coder->repeat.count != 0)
464                         return LZMA_OK;
465
466                 coder->sequence = SEQ_FLAGS;
467                 break;
468         }
469
470         case SEQ_REPEAT_NORMAL:
471                 do {
472                         // Cycle the repeat buffer if needed.
473                         if (coder->repeat.pos == coder->repeat.size) {
474                                 if (--coder->repeat.count == 0) {
475                                         coder->sequence = SEQ_FLAGS;
476                                         break;
477                                 }
478
479                                 coder->repeat.pos = 0;
480                         }
481
482                         if (coder->subfilter.code == NULL) {
483                                 lzma_bufcpy(coder->repeat.buffer,
484                                                 &coder->repeat.pos,
485                                                 coder->repeat.size,
486                                                 out, out_pos, out_size);
487                         } else {
488                                 const lzma_ret ret = subfilter_decode(
489                                                 coder, allocator,
490                                                 coder->repeat.buffer,
491                                                 &coder->repeat.pos,
492                                                 coder->repeat.size,
493                                                 out, out_pos, out_size,
494                                                 action);
495
496                                 if (ret == LZMA_STREAM_END) {
497                                         // End of Subfilter can occur only at
498                                         // a Subblock boundary.
499                                         if (coder->repeat.pos
500                                                         != coder->repeat.size
501                                                         || --coder->repeat
502                                                                 .count != 0)
503                                                 return LZMA_DATA_ERROR;
504
505                                         // We need a Subblock with Unset
506                                         // Subfilter before more data.
507                                         coder->sequence = SEQ_FILTER_END;
508                                         break;
509
510                                 } else if (ret != LZMA_OK) {
511                                         return ret;
512                                 }
513                         }
514                 } while (*out_pos < out_size);
515
516                 break;
517
518         default:
519                 return LZMA_PROG_ERROR;
520         }
521
522         return LZMA_OK;
523 }
524
525
526 static lzma_ret
527 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
528                 const uint8_t *restrict in, size_t *restrict in_pos,
529                 size_t in_size, uint8_t *restrict out,
530                 size_t *restrict out_pos, size_t out_size, lzma_action action)
531 {
532         if (coder->next.code == NULL)
533                 return decode_buffer(coder, allocator, in, in_pos, in_size,
534                                 out, out_pos, out_size, action);
535
536         while (*out_pos < out_size) {
537                 if (!coder->next_finished
538                                 && coder->temp.pos == coder->temp.size) {
539                         coder->temp.pos = 0;
540                         coder->temp.size = 0;
541
542                         const lzma_ret ret = coder->next.code(
543                                         coder->next.coder,
544                                         allocator, in, in_pos, in_size,
545                                         coder->temp.buffer, &coder->temp.size,
546                                         LZMA_BUFFER_SIZE, action);
547
548                         if (ret == LZMA_STREAM_END)
549                                 coder->next_finished = true;
550                         else if (coder->temp.size == 0 || ret != LZMA_OK)
551                                 return ret;
552                 }
553
554                 if (coder->this_finished) {
555                         if (coder->temp.pos != coder->temp.size)
556                                 return LZMA_DATA_ERROR;
557
558                         if (coder->next_finished)
559                                 return LZMA_STREAM_END;
560
561                         return LZMA_OK;
562                 }
563
564                 const lzma_ret ret = decode_buffer(coder, allocator,
565                                 coder->temp.buffer, &coder->temp.pos,
566                                 coder->temp.size,
567                                 out, out_pos, out_size, action);
568
569                 if (ret == LZMA_STREAM_END)
570                         // The next coder in the chain hasn't finished
571                         // yet. If the input data is valid, there
572                         // must be no more output coming, but the
573                         // next coder may still need a litle more
574                         // input to detect End of Payload Marker.
575                         coder->this_finished = true;
576                 else if (ret != LZMA_OK)
577                         return ret;
578                 else if (coder->next_finished && *out_pos < out_size)
579                         return LZMA_DATA_ERROR;
580         }
581
582         return LZMA_OK;
583 }
584
585
586 static void
587 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
588 {
589         lzma_next_end(&coder->next, allocator);
590         lzma_next_end(&coder->subfilter, allocator);
591         lzma_next_end(&coder->filter_flags_decoder, allocator);
592         lzma_free(coder->filter_flags.options, allocator);
593         lzma_free(coder, allocator);
594         return;
595 }
596
597
598 extern lzma_ret
599 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
600                 const lzma_filter_info *filters)
601 {
602         if (next->coder == NULL) {
603                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
604                 if (next->coder == NULL)
605                         return LZMA_MEM_ERROR;
606
607                 next->code = &subblock_decode;
608                 next->end = &subblock_decoder_end;
609
610                 next->coder->next = LZMA_NEXT_CODER_INIT;
611                 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
612                 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
613
614         } else {
615                 lzma_next_end(&next->coder->subfilter, allocator);
616                 lzma_free(next->coder->filter_flags.options, allocator);
617         }
618
619         next->coder->filter_flags.options = NULL;
620
621         next->coder->sequence = SEQ_FLAGS;
622         next->coder->padding = 0;
623         next->coder->next_finished = false;
624         next->coder->this_finished = false;
625         next->coder->temp.pos = 0;
626         next->coder->temp.size = 0;
627
628         if (filters[0].options != NULL)
629                 next->coder->allow_subfilters = ((lzma_options_subblock *)(
630                                 filters[0].options))->allow_subfilters;
631         else
632                 next->coder->allow_subfilters = false;
633
634         return lzma_next_filter_init(
635                         &next->coder->next, allocator, filters + 1);
636 }