]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/metadata_decoder.c
Fix decoding of empty Metadata Blocks, that don't have
[icculus/xz.git] / src / liblzma / common / metadata_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       metadata_decoder.c
4 /// \brief      Decodes metadata stored in Metadata Blocks
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "metadata_decoder.h"
21 #include "block_decoder.h"
22
23
24 /// Maximum size of a single Extra Record. Again, this is mostly to make
25 /// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
26 /// be smaller.
27 #define EXTRA_SIZE_MAX (SIZE_MAX / 4)
28
29
30 struct lzma_coder_s {
31         enum {
32                 SEQ_FLAGS,
33                 SEQ_HEADER_METADATA_SIZE,
34                 SEQ_TOTAL_SIZE,
35                 SEQ_UNCOMPRESSED_SIZE,
36                 SEQ_INDEX_COUNT,
37                 SEQ_INDEX_ALLOC,
38                 SEQ_INDEX_TOTAL_SIZE,
39                 SEQ_INDEX_UNCOMPRESSED_SIZE,
40                 SEQ_EXTRA_PREPARE,
41                 SEQ_EXTRA_ALLOC,
42                 SEQ_EXTRA_ID,
43                 SEQ_EXTRA_SIZE,
44                 SEQ_EXTRA_DATA_ALLOC,
45                 SEQ_EXTRA_DATA_COPY,
46                 SEQ_EXTRA_DUMMY_ALLOC,
47                 SEQ_EXTRA_DUMMY_ID,
48                 SEQ_EXTRA_DUMMY_SIZE,
49                 SEQ_EXTRA_DUMMY_COPY,
50         } sequence;
51
52         /// Number of "things" left to be parsed. If we hit end of input
53         /// when this isn't zero, we have corrupt Metadata Block.
54         size_t todo_count;
55
56         /// Position in variable-length integers
57         size_t pos;
58
59         /// Temporary variable needed to decode variables whose type
60         /// is size_t instead of lzma_vli.
61         lzma_vli tmp;
62
63         /// Pointer to target structure to hold the parsed results.
64         lzma_metadata *metadata;
65
66         /// The Index Record we currently are parsing
67         lzma_index *index_current;
68
69         /// Number of Records in Index
70         size_t index_count;
71
72         /// Sum of Total Size fields in the Index
73         lzma_vli index_total_size;
74
75         /// Sum of Uncompressed Size fields in the Index
76         lzma_vli index_uncompressed_size;
77
78         /// True if Extra is present.
79         bool has_extra;
80
81         /// True if we have been requested to store the Extra to *metadata.
82         bool want_extra;
83
84         /// Pointer to the end of the Extra Record list.
85         lzma_extra *extra_tail;
86
87         /// Dummy Extra Record used when only verifying integrity of Extra
88         /// (not storing it to RAM).
89         lzma_extra extra_dummy;
90
91         /// Block decoder
92         lzma_next_coder block_decoder;
93
94         /// buffer[buffer_pos] is the next byte to process.
95         size_t buffer_pos;
96
97         /// buffer[buffer_size] is the first byte to not process.
98         size_t buffer_size;
99
100         /// Temporary buffer to which encoded Metadata is read before
101         /// it is parsed.
102         uint8_t buffer[LZMA_BUFFER_SIZE];
103 };
104
105
106 /// Reads a variable-length integer to coder->num.
107 #define read_vli(num) \
108 do { \
109         const lzma_ret ret = lzma_vli_decode( \
110                         &num, &coder->pos, \
111                         coder->buffer, &coder->buffer_pos, \
112                         coder->buffer_size); \
113         if (ret != LZMA_STREAM_END) \
114                 return ret; \
115         \
116         coder->pos = 0; \
117 } while (0)
118
119
120 static lzma_ret
121 process(lzma_coder *coder, lzma_allocator *allocator)
122 {
123         while (coder->buffer_pos < coder->buffer_size)
124         switch (coder->sequence) {
125         case SEQ_FLAGS:
126                 // Reserved bits must be unset.
127                 if (coder->buffer[coder->buffer_pos] & 0x70)
128                         return LZMA_HEADER_ERROR;
129
130                 coder->todo_count = 0;
131
132                 // If Size of Header Metadata is present, prepare the
133                 // variable for variable-length integer decoding. Otherwise
134                 // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
135                 // field isn't present.
136                 if (coder->buffer[coder->buffer_pos] & 0x01) {
137                         coder->metadata->header_metadata_size = 0;
138                         ++coder->todo_count;
139                 }
140
141                 if (coder->buffer[coder->buffer_pos] & 0x02) {
142                         coder->metadata->total_size = 0;
143                         ++coder->todo_count;
144                 }
145
146                 if (coder->buffer[coder->buffer_pos] & 0x04) {
147                         coder->metadata->uncompressed_size = 0;
148                         ++coder->todo_count;
149                 }
150
151                 if (coder->buffer[coder->buffer_pos] & 0x08) {
152                         // Setting index_count to 1 is just to indicate that
153                         // Index is present. The real size is parsed later.
154                         coder->index_count = 1;
155                         ++coder->todo_count;
156                 }
157
158                 coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
159                                 != 0;
160
161                 ++coder->buffer_pos;
162                 coder->sequence = SEQ_HEADER_METADATA_SIZE;
163                 break;
164
165         case SEQ_HEADER_METADATA_SIZE:
166                 if (coder->metadata->header_metadata_size
167                                 != LZMA_VLI_VALUE_UNKNOWN) {
168                         read_vli(coder->metadata->header_metadata_size);
169
170                         if (coder->metadata->header_metadata_size == 0)
171                                 return LZMA_DATA_ERROR;
172
173                         --coder->todo_count;
174                 }
175
176                 coder->sequence = SEQ_TOTAL_SIZE;
177                 break;
178
179         case SEQ_TOTAL_SIZE:
180                 if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
181                         read_vli(coder->metadata->total_size);
182
183                         if (coder->metadata->total_size == 0)
184                                 return LZMA_DATA_ERROR;
185
186                         --coder->todo_count;
187                 }
188
189                 coder->sequence = SEQ_UNCOMPRESSED_SIZE;
190                 break;
191
192         case SEQ_UNCOMPRESSED_SIZE:
193                 if (coder->metadata->uncompressed_size
194                                 != LZMA_VLI_VALUE_UNKNOWN) {
195                         read_vli(coder->metadata->uncompressed_size);
196                         --coder->todo_count;
197                 }
198
199                 coder->sequence = SEQ_INDEX_COUNT;
200                 break;
201
202         case SEQ_INDEX_COUNT:
203                 if (coder->index_count == 0) {
204                         coder->sequence = SEQ_EXTRA_PREPARE;
205                         break;
206                 }
207
208                 read_vli(coder->tmp);
209
210                 // Index must not be empty nor far too big (wouldn't fit
211                 // in RAM).
212                 if (coder->tmp == 0 || coder->tmp
213                                 >= SIZE_MAX / sizeof(lzma_index))
214                         return LZMA_DATA_ERROR;
215
216                 coder->index_count = (size_t)(coder->tmp);
217                 coder->tmp = 0;
218
219                 coder->sequence = SEQ_INDEX_ALLOC;
220                 break;
221
222         case SEQ_INDEX_ALLOC: {
223                 lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
224                 if (i == NULL)
225                         return LZMA_MEM_ERROR;
226
227                 i->total_size = 0;
228                 i->uncompressed_size = 0;
229                 i->next = NULL;
230
231                 if (coder->metadata->index == NULL)
232                         coder->metadata->index = i;
233                 else
234                         coder->index_current->next = i;
235
236                 coder->index_current = i;
237
238                 coder->sequence = SEQ_INDEX_TOTAL_SIZE;
239         }
240
241         // Fall through
242
243         case SEQ_INDEX_TOTAL_SIZE: {
244                 read_vli(coder->index_current->total_size);
245
246                 coder->index_total_size += coder->index_current->total_size;
247                 if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
248                         return LZMA_DATA_ERROR;
249
250                 // No Block can have Total Size of zero bytes.
251                 if (coder->index_current->total_size == 0)
252                         return LZMA_DATA_ERROR;
253
254                 if (--coder->index_count == 0) {
255                         // If Total Size is present, it must match the sum
256                         // of Total Sizes in Index.
257                         if (coder->metadata->total_size
258                                                 != LZMA_VLI_VALUE_UNKNOWN
259                                         && coder->metadata->total_size
260                                                 != coder->index_total_size)
261                                 return LZMA_DATA_ERROR;
262
263                         coder->index_current = coder->metadata->index;
264                         coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
265                 } else {
266                         coder->sequence = SEQ_INDEX_ALLOC;
267                 }
268
269                 break;
270         }
271
272         case SEQ_INDEX_UNCOMPRESSED_SIZE: {
273                 read_vli(coder->index_current->uncompressed_size);
274
275                 coder->index_uncompressed_size
276                                 += coder->index_current->uncompressed_size;
277                 if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
278                         return LZMA_DATA_ERROR;
279
280                 coder->index_current = coder->index_current->next;
281                 if (coder->index_current == NULL) {
282                         if (coder->metadata->uncompressed_size
283                                                 != LZMA_VLI_VALUE_UNKNOWN
284                                         && coder->metadata->uncompressed_size
285                                         != coder->index_uncompressed_size)
286                                 return LZMA_DATA_ERROR;
287
288                         --coder->todo_count;
289                         coder->sequence = SEQ_EXTRA_PREPARE;
290                 }
291
292                 break;
293         }
294
295         case SEQ_EXTRA_PREPARE:
296                 assert(coder->todo_count == 0);
297
298                 // If we get here, we have at least one byte of input left.
299                 // If "Extra is present" flag is unset in Metadata Flags,
300                 // it means that there is some garbage and we return an error.
301                 if (!coder->has_extra)
302                         return LZMA_DATA_ERROR;
303
304                 if (!coder->want_extra) {
305                         coder->extra_tail = &coder->extra_dummy;
306                         coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
307                         break;
308                 }
309
310                 coder->sequence = SEQ_EXTRA_ALLOC;
311
312         // Fall through
313
314         case SEQ_EXTRA_ALLOC: {
315                 lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
316                 if (e == NULL)
317                         return LZMA_MEM_ERROR;
318
319                 e->next = NULL;
320                 e->id = 0;
321                 e->size = 0;
322                 e->data = NULL;
323
324                 if (coder->metadata->extra == NULL)
325                         coder->metadata->extra = e;
326                 else
327                         coder->extra_tail->next = e;
328
329                 coder->extra_tail = e;
330
331                 coder->todo_count = 1;
332                 coder->sequence = SEQ_EXTRA_ID;
333         }
334
335         // Fall through
336
337         case SEQ_EXTRA_ID:
338         case SEQ_EXTRA_DUMMY_ID:
339                 read_vli(coder->extra_tail->id);
340
341                 if (coder->extra_tail->id == 0) {
342                         coder->extra_tail->size = 0;
343                         coder->extra_tail->data = NULL;
344                         coder->todo_count = 0;
345                         --coder->sequence;
346                 } else {
347                         ++coder->sequence;
348                 }
349
350                 break;
351
352         case SEQ_EXTRA_SIZE:
353         case SEQ_EXTRA_DUMMY_SIZE:
354                 read_vli(coder->tmp);
355
356                 if (coder->tmp == 0) {
357                         // We have no Data in the Extra Record. Don't
358                         // allocate any memory for it. Go back to
359                         // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC.
360                         coder->tmp = 0;
361                         coder->sequence -= 2;
362                         coder->todo_count = 0;
363                 } else {
364                         ++coder->sequence;
365                 }
366
367                 break;
368
369         case SEQ_EXTRA_DATA_ALLOC: {
370                 if (coder->tmp > EXTRA_SIZE_MAX)
371                         return LZMA_DATA_ERROR;
372
373                 coder->extra_tail->size = (size_t)(coder->tmp);
374                 coder->tmp = 0;
375
376                 // We reserve space for the trailing '\0' too.
377                 uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1,
378                                 allocator);
379                 if (d == NULL)
380                         return LZMA_MEM_ERROR;
381
382                 coder->extra_tail->data = d;
383                 coder->sequence = SEQ_EXTRA_DATA_COPY;
384         }
385
386         // Fall through
387
388         case SEQ_EXTRA_DATA_COPY:
389                 bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
390                                 coder->extra_tail->data, &coder->pos,
391                                 (size_t)(coder->extra_tail->size));
392
393                 if ((size_t)(coder->extra_tail->size) == coder->pos) {
394                         coder->extra_tail->data[coder->pos] = '\0';
395                         coder->pos = 0;
396                         coder->todo_count = 0;
397                         coder->sequence = SEQ_EXTRA_ALLOC;
398                 }
399
400                 break;
401
402         case SEQ_EXTRA_DUMMY_ALLOC:
403                 // Not really alloc, just initialize the dummy entry.
404                 coder->extra_dummy = (lzma_extra){
405                         .next = NULL,
406                         .id = 0,
407                         .size = 0,
408                         .data = NULL,
409                 };
410
411                 coder->todo_count = 1;
412                 coder->sequence = SEQ_EXTRA_DUMMY_ID;
413                 break;
414
415         case SEQ_EXTRA_DUMMY_COPY: {
416                 // Simply skip as many bytes as indicated by Extra Record Size.
417                 // We don't check lzma_extra_size_max because we don't
418                 // allocate any memory to hold the data.
419                 const size_t in_avail = coder->buffer_size - coder->buffer_pos;
420                 const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
421                 coder->buffer_pos += skip;
422                 coder->tmp -= skip;
423
424                 if (coder->tmp == 0) {
425                         coder->todo_count = 0;
426                         coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
427                 }
428
429                 break;
430         }
431
432         default:
433                 return LZMA_PROG_ERROR;
434         }
435
436         return LZMA_OK;
437 }
438
439
440 static lzma_ret
441 metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
442                 const uint8_t *restrict in, size_t *restrict in_pos,
443                 size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
444                 size_t *restrict out_pos lzma_attribute((unused)),
445                 size_t out_size lzma_attribute((unused)),
446                 lzma_action action lzma_attribute((unused)))
447 {
448         bool end_was_reached = false;
449
450         while (true) {
451                 // Fill the buffer if it is empty.
452                 if (coder->buffer_pos == coder->buffer_size) {
453                         coder->buffer_pos = 0;
454                         coder->buffer_size = 0;
455
456                         const lzma_ret ret = coder->block_decoder.code(
457                                         coder->block_decoder.coder, allocator,
458                                         in, in_pos, in_size, coder->buffer,
459                                         &coder->buffer_size, LZMA_BUFFER_SIZE,
460                                         LZMA_RUN);
461
462                         switch (ret) {
463                         case LZMA_OK:
464                                 // Return immediatelly if we got no new data.
465                                 if (coder->buffer_size == 0)
466                                         return LZMA_OK;
467
468                                 break;
469
470                         case LZMA_STREAM_END:
471                                 end_was_reached = true;
472                                 break;
473
474                         default:
475                                 return ret;
476                         }
477                 }
478
479                 // Process coder->buffer.
480                 const lzma_ret ret = process(coder, allocator);
481                 if (ret != LZMA_OK)
482                         return ret;
483
484                 // On success, process() eats all the input.
485                 assert(coder->buffer_pos == coder->buffer_size);
486
487                 if (end_was_reached) {
488                         // Check that the sequence is not in the
489                         // middle of anything.
490                         if (coder->todo_count != 0)
491                                 return LZMA_DATA_ERROR;
492
493                         // If Size of Header Metadata Block was not
494                         // present, we use zero as its size instead
495                         // of LZMA_VLI_VALUE_UNKNOWN.
496                         if (coder->metadata->header_metadata_size
497                                         == LZMA_VLI_VALUE_UNKNOWN)
498                                 coder->metadata->header_metadata_size = 0;
499
500                         return LZMA_STREAM_END;
501                 }
502         }
503 }
504
505
506 static void
507 metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
508 {
509         lzma_next_coder_end(&coder->block_decoder, allocator);
510         lzma_free(coder, allocator);
511         return;
512 }
513
514
515 static lzma_ret
516 metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
517                 lzma_options_block *options, lzma_metadata *metadata,
518                 bool want_extra)
519 {
520         if (options == NULL || metadata == NULL)
521                 return LZMA_PROG_ERROR;
522
523         if (next->coder == NULL) {
524                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
525                 if (next->coder == NULL)
526                         return LZMA_MEM_ERROR;
527
528                 next->code = &metadata_decode;
529                 next->end = &metadata_decoder_end;
530                 next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
531         }
532
533         metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
534         metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
535         metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
536         metadata->index = NULL;
537         metadata->extra = NULL;
538
539         next->coder->sequence = SEQ_FLAGS;
540         next->coder->todo_count = 1;
541         next->coder->pos = 0;
542         next->coder->tmp = 0;
543         next->coder->metadata = metadata;
544         next->coder->index_current = NULL;
545         next->coder->index_count = 0;
546         next->coder->index_total_size = 0;
547         next->coder->index_uncompressed_size = 0;
548         next->coder->want_extra = want_extra;
549         next->coder->extra_tail = NULL;
550         next->coder->buffer_pos = 0;
551         next->coder->buffer_size = 0;
552
553         return lzma_block_decoder_init(
554                         &next->coder->block_decoder, allocator, options);
555 }
556
557
558 extern lzma_ret
559 lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
560                 lzma_options_block *options, lzma_metadata *metadata,
561                 bool want_extra)
562 {
563         lzma_next_coder_init(metadata_decoder_init, next, allocator,
564                         options, metadata, want_extra);
565 }
566
567
568 extern LZMA_API lzma_ret
569 lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
570                 lzma_metadata *metadata, lzma_bool want_extra)
571 {
572         lzma_next_strm_init(strm, lzma_metadata_decoder_init,
573                         options, metadata, want_extra);
574
575         strm->internal->supported_actions[LZMA_RUN] = true;
576
577         return LZMA_OK;
578 }