]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/subblock/subblock_decoder.c
Oh well, big messy commit again. Some highlights:
[icculus/xz.git] / src / liblzma / subblock / subblock_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       subblock_decoder.c
4 /// \brief      Decoder of the Subblock filter
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "subblock_decoder.h"
21 #include "subblock_decoder_helper.h"
22 #include "filter_decoder.h"
23
24
25 /// Maximum number of consecutive Subblocks with Subblock Type Padding
26 #define PADDING_MAX 31
27
28
29 struct lzma_coder_s {
30         lzma_next_coder next;
31
32         enum {
33                 // These require that there is at least one input
34                 // byte available.
35                 SEQ_FLAGS,
36                 SEQ_FILTER_FLAGS,
37                 SEQ_FILTER_END,
38                 SEQ_REPEAT_COUNT_1,
39                 SEQ_REPEAT_COUNT_2,
40                 SEQ_REPEAT_COUNT_3,
41                 SEQ_REPEAT_SIZE,
42                 SEQ_REPEAT_READ_DATA,
43                 SEQ_SIZE_1,
44                 SEQ_SIZE_2,
45                 SEQ_SIZE_3, // This must be right before SEQ_DATA.
46
47                 // These don't require any input to be available.
48                 SEQ_DATA,
49                 SEQ_REPEAT_FAST,
50                 SEQ_REPEAT_NORMAL,
51         } sequence;
52
53         /// Number of bytes left in the current Subblock Data field.
54         size_t size;
55
56         /// Number of consecutive Subblocks with Subblock Type Padding
57         uint32_t padding;
58
59         /// True when .next.code() has returned LZMA_STREAM_END.
60         bool next_finished;
61
62         /// True when the Subblock decoder has detected End of Payload Marker.
63         /// This may become true before next_finished becomes true.
64         bool this_finished;
65
66         /// True if Subfilters are allowed.
67         bool allow_subfilters;
68
69         /// Indicates if at least one byte of decoded output has been
70         /// produced after enabling Subfilter.
71         bool got_output_with_subfilter;
72
73         /// Possible subfilter
74         lzma_next_coder subfilter;
75
76         /// Filter Flags decoder is needed to parse the ID and Properties
77         /// of the subfilter.
78         lzma_next_coder filter_flags_decoder;
79
80         /// The filter_flags_decoder stores its results here.
81         lzma_filter filter_flags;
82
83         /// Options for the Subblock decoder helper. This is used to tell
84         /// the helper when it should return LZMA_STREAM_END to the subfilter.
85         lzma_options_subblock_helper helper;
86
87         struct {
88                 /// How many times buffer should be repeated
89                 size_t count;
90
91                 /// Size of the buffer
92                 size_t size;
93
94                 /// Position in the buffer
95                 size_t pos;
96
97                 /// Buffer to hold the data to be repeated
98                 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
99         } repeat;
100
101         /// Temporary buffer needed when the Subblock filter is not the last
102         /// filter in the chain. The output of the next filter is first
103         /// decoded into buffer[], which is then used as input for the actual
104         /// Subblock decoder.
105         struct {
106                 size_t pos;
107                 size_t size;
108                 uint8_t buffer[LZMA_BUFFER_SIZE];
109         } temp;
110 };
111
112
113 /// Values of valid Subblock Flags
114 enum {
115         FLAG_PADDING,
116         FLAG_EOPM,
117         FLAG_DATA,
118         FLAG_REPEAT,
119         FLAG_SET_SUBFILTER,
120         FLAG_END_SUBFILTER,
121 };
122
123
124 /// Calls the subfilter and updates coder->uncompressed_size.
125 static lzma_ret
126 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
127                 const uint8_t *in, size_t *in_pos,
128                 size_t in_size, uint8_t *restrict out,
129                 size_t *restrict out_pos, size_t out_size, lzma_action action)
130 {
131         assert(coder->subfilter.code != NULL);
132
133         // Call the subfilter.
134         const lzma_ret ret = coder->subfilter.code(
135                         coder->subfilter.coder, allocator,
136                         in, in_pos, in_size, out, out_pos, out_size, action);
137
138         return ret;
139 }
140
141
142 static lzma_ret
143 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
144                 const uint8_t *in, size_t *in_pos,
145                 size_t in_size, uint8_t *restrict out,
146                 size_t *restrict out_pos, size_t out_size, lzma_action action)
147 {
148         while (*out_pos < out_size && (*in_pos < in_size
149                         || coder->sequence >= SEQ_DATA))
150         switch (coder->sequence) {
151         case SEQ_FLAGS: {
152                 // Do the correct action depending on the Subblock Type.
153                 switch (in[*in_pos] >> 4) {
154                 case FLAG_PADDING:
155                         // Only check that reserved bits are zero.
156                         if (++coder->padding > PADDING_MAX
157                                         || in[*in_pos] & 0x0F)
158                                 return LZMA_DATA_ERROR;
159                         ++*in_pos;
160                         break;
161
162                 case FLAG_EOPM:
163                         // There must be no Padding before EOPM.
164                         if (coder->padding != 0)
165                                 return LZMA_DATA_ERROR;
166
167                         // Check that reserved bits are zero.
168                         if (in[*in_pos] & 0x0F)
169                                 return LZMA_DATA_ERROR;
170
171                         // There must be no Subfilter enabled.
172                         if (coder->subfilter.code != NULL)
173                                 return LZMA_DATA_ERROR;
174
175                         ++*in_pos;
176                         return LZMA_STREAM_END;
177
178                 case FLAG_DATA:
179                         // First four bits of the Subblock Data size.
180                         coder->size = in[*in_pos] & 0x0F;
181                         ++*in_pos;
182                         coder->got_output_with_subfilter = true;
183                         coder->sequence = SEQ_SIZE_1;
184                         break;
185
186                 case FLAG_REPEAT:
187                         // First four bits of the Repeat Count. We use
188                         // coder->size as a temporary place for it.
189                         coder->size = in[*in_pos] & 0x0F;
190                         ++*in_pos;
191                         coder->got_output_with_subfilter = true;
192                         coder->sequence = SEQ_REPEAT_COUNT_1;
193                         break;
194
195                 case FLAG_SET_SUBFILTER: {
196                         if (coder->padding != 0 || (in[*in_pos] & 0x0F)
197                                         || coder->subfilter.code != NULL
198                                         || !coder->allow_subfilters)
199                                 return LZMA_DATA_ERROR;
200
201                         assert(coder->filter_flags.options == NULL);
202                         abort();
203 //                      return_if_error(lzma_filter_flags_decoder_init(
204 //                                      &coder->filter_flags_decoder,
205 //                                      allocator, &coder->filter_flags));
206
207                         coder->got_output_with_subfilter = false;
208
209                         ++*in_pos;
210                         coder->sequence = SEQ_FILTER_FLAGS;
211                         break;
212                 }
213
214                 case FLAG_END_SUBFILTER: {
215                         if (coder->padding != 0 || (in[*in_pos] & 0x0F)
216                                         || coder->subfilter.code == NULL
217                                         || !coder->got_output_with_subfilter)
218                                 return LZMA_DATA_ERROR;
219
220                         // Tell the helper filter to indicate End of Input
221                         // to our subfilter.
222                         coder->helper.end_was_reached = true;
223
224                         size_t dummy = 0;
225                         const lzma_ret ret = subfilter_decode(coder, allocator,
226                                         NULL, &dummy, 0, out, out_pos,out_size,
227                                         action);
228
229                         // If we didn't reach the end of the subfilter's output
230                         // yet, return to the application. On the next call we
231                         // will get to this same switch-case again, because we
232                         // haven't updated *in_pos yet.
233                         if (ret != LZMA_STREAM_END)
234                                 return ret;
235
236                         // Free Subfilter's memory. This is a bit debatable,
237                         // since we could avoid some malloc()/free() calls
238                         // if the same Subfilter gets used soon again. But
239                         // if Subfilter isn't used again, we could leave
240                         // a memory-hogging filter dangling until someone
241                         // frees Subblock filter itself.
242                         lzma_next_end(&coder->subfilter, allocator);
243
244                         // Free memory used for subfilter options. This is
245                         // safe, because we don't support any Subfilter that
246                         // would allow pointers in the options structure.
247                         lzma_free(coder->filter_flags.options, allocator);
248                         coder->filter_flags.options = NULL;
249
250                         ++*in_pos;
251
252                         break;
253                 }
254
255                 default:
256                         return LZMA_DATA_ERROR;
257                 }
258
259                 break;
260         }
261
262         case SEQ_FILTER_FLAGS: {
263                 const lzma_ret ret = coder->filter_flags_decoder.code(
264                                 coder->filter_flags_decoder.coder, allocator,
265                                 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
266                 if (ret != LZMA_STREAM_END)
267                         return ret == LZMA_OPTIONS_ERROR
268                                         ? LZMA_DATA_ERROR : ret;
269
270                 // Don't free the filter_flags_decoder. It doesn't take much
271                 // memory and we may need it again.
272
273                 // Initialize the Subfilter. Subblock and Copy filters are
274                 // not allowed.
275                 if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
276                         return LZMA_DATA_ERROR;
277
278                 coder->helper.end_was_reached = false;
279
280                 lzma_filter filters[3] = {
281                         {
282                                 .id = coder->filter_flags.id,
283                                 .options = coder->filter_flags.options,
284                         }, {
285                                 .id = LZMA_FILTER_SUBBLOCK_HELPER,
286                                 .options = &coder->helper,
287                         }, {
288                                 .id = LZMA_VLI_UNKNOWN,
289                                 .options = NULL,
290                         }
291                 };
292
293                 // Optimization: We know that LZMA uses End of Payload Marker
294                 // (not End of Input), so we can omit the helper filter.
295                 if (filters[0].id == LZMA_FILTER_LZMA1)
296                         filters[1].id = LZMA_VLI_UNKNOWN;
297
298                 return_if_error(lzma_raw_decoder_init(
299                                 &coder->subfilter, allocator, filters));
300
301                 coder->sequence = SEQ_FLAGS;
302                 break;
303         }
304
305         case SEQ_FILTER_END:
306                 // We are in the beginning of a Subblock. The next Subblock
307                 // whose type is not Padding, must indicate end of Subfilter.
308                 if (in[*in_pos] == (FLAG_PADDING << 4)) {
309                         ++*in_pos;
310                         break;
311                 }
312
313                 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
314                         return LZMA_DATA_ERROR;
315
316                 coder->sequence = SEQ_FLAGS;
317                 break;
318
319         case SEQ_REPEAT_COUNT_1:
320         case SEQ_SIZE_1:
321                 // We use the same code to parse
322                 //  - the Size (28 bits) in Subblocks of type Data; and
323                 //  - the Repeat count (28 bits) in Subblocks of type
324                 //    Repeating Data.
325                 coder->size |= (size_t)(in[*in_pos]) << 4;
326                 ++*in_pos;
327                 ++coder->sequence;
328                 break;
329
330         case SEQ_REPEAT_COUNT_2:
331         case SEQ_SIZE_2:
332                 coder->size |= (size_t)(in[*in_pos]) << 12;
333                 ++*in_pos;
334                 ++coder->sequence;
335                 break;
336
337         case SEQ_REPEAT_COUNT_3:
338         case SEQ_SIZE_3:
339                 coder->size |= (size_t)(in[*in_pos]) << 20;
340                 ++*in_pos;
341
342                 // The real value is the stored value plus one.
343                 ++coder->size;
344
345                 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
346                 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
347                 ++coder->sequence;
348                 break;
349
350         case SEQ_REPEAT_SIZE:
351                 // Move the Repeat Count to the correct variable and parse
352                 // the Size of the Data to be repeated.
353                 coder->repeat.count = coder->size;
354                 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
355                 coder->repeat.pos = 0;
356
357                 // The size of the Data field must be bigger than the number
358                 // of Padding bytes before this Subblock.
359                 if (coder->repeat.size <= coder->padding)
360                         return LZMA_DATA_ERROR;
361
362                 ++*in_pos;
363                 coder->padding = 0;
364                 coder->sequence = SEQ_REPEAT_READ_DATA;
365                 break;
366
367         case SEQ_REPEAT_READ_DATA: {
368                 // Fill coder->repeat.buffer[].
369                 const size_t in_avail = in_size - *in_pos;
370                 const size_t out_avail
371                                 = coder->repeat.size - coder->repeat.pos;
372                 const size_t copy_size = MIN(in_avail, out_avail);
373
374                 memcpy(coder->repeat.buffer + coder->repeat.pos,
375                                 in + *in_pos, copy_size);
376                 *in_pos += copy_size;
377                 coder->repeat.pos += copy_size;
378
379                 if (coder->repeat.pos == coder->repeat.size) {
380                         coder->repeat.pos = 0;
381
382                         if (coder->repeat.size == 1
383                                         && coder->subfilter.code == NULL)
384                                 coder->sequence = SEQ_REPEAT_FAST;
385                         else
386                                 coder->sequence = SEQ_REPEAT_NORMAL;
387                 }
388
389                 break;
390         }
391
392         case SEQ_DATA: {
393                 // The size of the Data field must be bigger than the number
394                 // of Padding bytes before this Subblock.
395                 assert(coder->size > 0);
396                 if (coder->size <= coder->padding)
397                         return LZMA_DATA_ERROR;
398
399                 coder->padding = 0;
400
401                 // Limit the amount of input to match the available
402                 // Subblock Data size.
403                 size_t in_limit;
404                 if (in_size - *in_pos > coder->size)
405                         in_limit = *in_pos + coder->size;
406                 else
407                         in_limit = in_size;
408
409                 if (coder->subfilter.code == NULL) {
410                         const size_t copy_size = lzma_bufcpy(
411                                         in, in_pos, in_limit,
412                                         out, out_pos, out_size);
413
414                         coder->size -= copy_size;
415                 } else {
416                         const size_t in_start = *in_pos;
417                         const lzma_ret ret = subfilter_decode(
418                                         coder, allocator,
419                                         in, in_pos, in_limit,
420                                         out, out_pos, out_size,
421                                         action);
422
423                         // Update the number of unprocessed bytes left in
424                         // this Subblock. This assert() is true because
425                         // in_limit prevents *in_pos getting too big.
426                         assert(*in_pos - in_start <= coder->size);
427                         coder->size -= *in_pos - in_start;
428
429                         if (ret == LZMA_STREAM_END) {
430                                 // End of Subfilter can occur only at
431                                 // a Subblock boundary.
432                                 if (coder->size != 0)
433                                         return LZMA_DATA_ERROR;
434
435                                 // We need a Subblock with Unset
436                                 // Subfilter before more data.
437                                 coder->sequence = SEQ_FILTER_END;
438                                 break;
439                         }
440
441                         if (ret != LZMA_OK)
442                                 return ret;
443                 }
444
445                 // If we couldn't process the whole Subblock Data yet, return.
446                 if (coder->size > 0)
447                         return LZMA_OK;
448
449                 coder->sequence = SEQ_FLAGS;
450                 break;
451         }
452
453         case SEQ_REPEAT_FAST: {
454                 // Optimization for cases when there is only one byte to
455                 // repeat and no Subfilter.
456                 const size_t out_avail = out_size - *out_pos;
457                 const size_t copy_size = MIN(coder->repeat.count, out_avail);
458
459                 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
460
461                 *out_pos += copy_size;
462                 coder->repeat.count -= copy_size;
463
464                 if (coder->repeat.count != 0)
465                         return LZMA_OK;
466
467                 coder->sequence = SEQ_FLAGS;
468                 break;
469         }
470
471         case SEQ_REPEAT_NORMAL:
472                 do {
473                         // Cycle the repeat buffer if needed.
474                         if (coder->repeat.pos == coder->repeat.size) {
475                                 if (--coder->repeat.count == 0) {
476                                         coder->sequence = SEQ_FLAGS;
477                                         break;
478                                 }
479
480                                 coder->repeat.pos = 0;
481                         }
482
483                         if (coder->subfilter.code == NULL) {
484                                 lzma_bufcpy(coder->repeat.buffer,
485                                                 &coder->repeat.pos,
486                                                 coder->repeat.size,
487                                                 out, out_pos, out_size);
488                         } else {
489                                 const lzma_ret ret = subfilter_decode(
490                                                 coder, allocator,
491                                                 coder->repeat.buffer,
492                                                 &coder->repeat.pos,
493                                                 coder->repeat.size,
494                                                 out, out_pos, out_size,
495                                                 action);
496
497                                 if (ret == LZMA_STREAM_END) {
498                                         // End of Subfilter can occur only at
499                                         // a Subblock boundary.
500                                         if (coder->repeat.pos
501                                                         != coder->repeat.size
502                                                         || --coder->repeat
503                                                                 .count != 0)
504                                                 return LZMA_DATA_ERROR;
505
506                                         // We need a Subblock with Unset
507                                         // Subfilter before more data.
508                                         coder->sequence = SEQ_FILTER_END;
509                                         break;
510
511                                 } else if (ret != LZMA_OK) {
512                                         return ret;
513                                 }
514                         }
515                 } while (*out_pos < out_size);
516
517                 break;
518
519         default:
520                 return LZMA_PROG_ERROR;
521         }
522
523         return LZMA_OK;
524 }
525
526
527 static lzma_ret
528 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
529                 const uint8_t *restrict in, size_t *restrict in_pos,
530                 size_t in_size, uint8_t *restrict out,
531                 size_t *restrict out_pos, size_t out_size, lzma_action action)
532 {
533         if (coder->next.code == NULL)
534                 return decode_buffer(coder, allocator, in, in_pos, in_size,
535                                 out, out_pos, out_size, action);
536
537         while (*out_pos < out_size) {
538                 if (!coder->next_finished
539                                 && coder->temp.pos == coder->temp.size) {
540                         coder->temp.pos = 0;
541                         coder->temp.size = 0;
542
543                         const lzma_ret ret = coder->next.code(
544                                         coder->next.coder,
545                                         allocator, in, in_pos, in_size,
546                                         coder->temp.buffer, &coder->temp.size,
547                                         LZMA_BUFFER_SIZE, action);
548
549                         if (ret == LZMA_STREAM_END)
550                                 coder->next_finished = true;
551                         else if (coder->temp.size == 0 || ret != LZMA_OK)
552                                 return ret;
553                 }
554
555                 if (coder->this_finished) {
556                         if (coder->temp.pos != coder->temp.size)
557                                 return LZMA_DATA_ERROR;
558
559                         if (coder->next_finished)
560                                 return LZMA_STREAM_END;
561
562                         return LZMA_OK;
563                 }
564
565                 const lzma_ret ret = decode_buffer(coder, allocator,
566                                 coder->temp.buffer, &coder->temp.pos,
567                                 coder->temp.size,
568                                 out, out_pos, out_size, action);
569
570                 if (ret == LZMA_STREAM_END)
571                         // The next coder in the chain hasn't finished
572                         // yet. If the input data is valid, there
573                         // must be no more output coming, but the
574                         // next coder may still need a litle more
575                         // input to detect End of Payload Marker.
576                         coder->this_finished = true;
577                 else if (ret != LZMA_OK)
578                         return ret;
579                 else if (coder->next_finished && *out_pos < out_size)
580                         return LZMA_DATA_ERROR;
581         }
582
583         return LZMA_OK;
584 }
585
586
587 static void
588 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
589 {
590         lzma_next_end(&coder->next, allocator);
591         lzma_next_end(&coder->subfilter, allocator);
592         lzma_next_end(&coder->filter_flags_decoder, allocator);
593         lzma_free(coder->filter_flags.options, allocator);
594         lzma_free(coder, allocator);
595         return;
596 }
597
598
599 extern lzma_ret
600 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
601                 const lzma_filter_info *filters)
602 {
603         if (next->coder == NULL) {
604                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
605                 if (next->coder == NULL)
606                         return LZMA_MEM_ERROR;
607
608                 next->code = &subblock_decode;
609                 next->end = &subblock_decoder_end;
610
611                 next->coder->next = LZMA_NEXT_CODER_INIT;
612                 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
613                 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
614
615         } else {
616                 lzma_next_end(&next->coder->subfilter, allocator);
617                 lzma_free(next->coder->filter_flags.options, allocator);
618         }
619
620         next->coder->filter_flags.options = NULL;
621
622         next->coder->sequence = SEQ_FLAGS;
623         next->coder->padding = 0;
624         next->coder->next_finished = false;
625         next->coder->this_finished = false;
626         next->coder->temp.pos = 0;
627         next->coder->temp.size = 0;
628
629         if (filters[0].options != NULL)
630                 next->coder->allow_subfilters = ((lzma_options_subblock *)(
631                                 filters[0].options))->allow_subfilters;
632         else
633                 next->coder->allow_subfilters = false;
634
635         return lzma_next_filter_init(
636                         &next->coder->next, allocator, filters + 1);
637 }