]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/metadata_decoder.c
Fix Size of Header Metadata Block handling. Now
[icculus/xz.git] / src / liblzma / common / metadata_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       metadata_decoder.c
4 /// \brief      Decodes metadata stored in Metadata Blocks
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "metadata_decoder.h"
21 #include "block_decoder.h"
22
23
24 /// Maximum size of a single Extra Record. Again, this is mostly to make
25 /// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
26 /// be smaller.
27 #define EXTRA_SIZE_MAX (SIZE_MAX / 4)
28
29
30 struct lzma_coder_s {
31         enum {
32                 SEQ_FLAGS,
33                 SEQ_HEADER_METADATA_SIZE,
34                 SEQ_TOTAL_SIZE,
35                 SEQ_UNCOMPRESSED_SIZE,
36                 SEQ_INDEX_COUNT,
37                 SEQ_INDEX_ALLOC,
38                 SEQ_INDEX_TOTAL_SIZE,
39                 SEQ_INDEX_UNCOMPRESSED_SIZE,
40                 SEQ_EXTRA_PREPARE,
41                 SEQ_EXTRA_ALLOC,
42                 SEQ_EXTRA_ID,
43                 SEQ_EXTRA_SIZE,
44                 SEQ_EXTRA_DATA_ALLOC,
45                 SEQ_EXTRA_DATA_COPY,
46                 SEQ_EXTRA_DUMMY_ALLOC,
47                 SEQ_EXTRA_DUMMY_ID,
48                 SEQ_EXTRA_DUMMY_SIZE,
49                 SEQ_EXTRA_DUMMY_COPY,
50         } sequence;
51
52         /// Number of "things" left to be parsed. If we hit end of input
53         /// when this isn't zero, we have corrupt Metadata Block.
54         size_t todo_count;
55
56         /// Position in variable-length integers
57         size_t pos;
58
59         /// Temporary variable needed to decode variables whose type
60         /// is size_t instead of lzma_vli.
61         lzma_vli tmp;
62
63         /// Pointer to target structure to hold the parsed results.
64         lzma_metadata *metadata;
65
66         /// The Index Record we currently are parsing
67         lzma_index *index_current;
68
69         /// Number of Records in Index
70         size_t index_count;
71
72         /// Sum of Total Size fields in the Index
73         lzma_vli index_total_size;
74
75         /// Sum of Uncompressed Size fields in the Index
76         lzma_vli index_uncompressed_size;
77
78         /// True if Extra is present.
79         bool has_extra;
80
81         /// True if we have been requested to store the Extra to *metadata.
82         bool want_extra;
83
84         /// Pointer to the end of the Extra Record list.
85         lzma_extra *extra_tail;
86
87         /// Dummy Extra Record used when only verifying integrity of Extra
88         /// (not storing it to RAM).
89         lzma_extra extra_dummy;
90
91         /// Block decoder
92         lzma_next_coder block_decoder;
93
94         /// buffer[buffer_pos] is the next byte to process.
95         size_t buffer_pos;
96
97         /// buffer[buffer_size] is the first byte to not process.
98         size_t buffer_size;
99
100         /// Temporary buffer to which encoded Metadata is read before
101         /// it is parsed.
102         uint8_t buffer[LZMA_BUFFER_SIZE];
103 };
104
105
106 /// Reads a variable-length integer to coder->num.
107 #define read_vli(num) \
108 do { \
109         const lzma_ret ret = lzma_vli_decode( \
110                         &num, &coder->pos, \
111                         coder->buffer, &coder->buffer_pos, \
112                         coder->buffer_size); \
113         if (ret != LZMA_STREAM_END) \
114                 return ret; \
115         \
116         coder->pos = 0; \
117 } while (0)
118
119
120 static lzma_ret
121 process(lzma_coder *coder, lzma_allocator *allocator)
122 {
123         while (coder->buffer_pos < coder->buffer_size)
124         switch (coder->sequence) {
125         case SEQ_FLAGS:
126                 // Reserved bits must be unset.
127                 if (coder->buffer[coder->buffer_pos] & 0x70)
128                         return LZMA_HEADER_ERROR;
129
130                 // If Size of Header Metadata is present, prepare the
131                 // variable for variable-length integer decoding. Otherwise
132                 // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
133                 // field isn't present.
134                 if (coder->buffer[coder->buffer_pos] & 0x01) {
135                         coder->metadata->header_metadata_size = 0;
136                         ++coder->todo_count;
137                 }
138
139                 if (coder->buffer[coder->buffer_pos] & 0x02) {
140                         coder->metadata->total_size = 0;
141                         ++coder->todo_count;
142                 }
143
144                 if (coder->buffer[coder->buffer_pos] & 0x04) {
145                         coder->metadata->uncompressed_size = 0;
146                         ++coder->todo_count;
147                 }
148
149                 if (coder->buffer[coder->buffer_pos] & 0x08) {
150                         // Setting index_count to 1 is just to indicate that
151                         // Index is present. The real size is parsed later.
152                         coder->index_count = 1;
153                         ++coder->todo_count;
154                 }
155
156                 coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
157                                 != 0;
158
159                 ++coder->buffer_pos;
160                 coder->sequence = SEQ_HEADER_METADATA_SIZE;
161                 break;
162
163         case SEQ_HEADER_METADATA_SIZE:
164                 if (coder->metadata->header_metadata_size
165                                 != LZMA_VLI_VALUE_UNKNOWN) {
166                         read_vli(coder->metadata->header_metadata_size);
167
168                         if (coder->metadata->header_metadata_size == 0)
169                                 return LZMA_DATA_ERROR;
170
171                         --coder->todo_count;
172                 } else {
173                         // Zero indicates that Size of Header Metadata Block
174                         // is not present. That is, after successful Metadata
175                         // decoding, metadata->header_metadata_size is
176                         // never LZMA_VLI_VALUE_UNKNOWN.
177                         coder->metadata->header_metadata_size = 0;
178                 }
179
180                 coder->sequence = SEQ_TOTAL_SIZE;
181                 break;
182
183         case SEQ_TOTAL_SIZE:
184                 if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
185                         read_vli(coder->metadata->total_size);
186
187                         if (coder->metadata->total_size == 0)
188                                 return LZMA_DATA_ERROR;
189
190                         --coder->todo_count;
191                 }
192
193                 coder->sequence = SEQ_UNCOMPRESSED_SIZE;
194                 break;
195
196         case SEQ_UNCOMPRESSED_SIZE:
197                 if (coder->metadata->uncompressed_size
198                                 != LZMA_VLI_VALUE_UNKNOWN) {
199                         read_vli(coder->metadata->uncompressed_size);
200                         --coder->todo_count;
201                 }
202
203                 coder->sequence = SEQ_INDEX_COUNT;
204                 break;
205
206         case SEQ_INDEX_COUNT:
207                 if (coder->index_count == 0) {
208                         coder->sequence = SEQ_EXTRA_PREPARE;
209                         break;
210                 }
211
212                 read_vli(coder->tmp);
213
214                 // Index must not be empty nor far too big (wouldn't fit
215                 // in RAM).
216                 if (coder->tmp == 0 || coder->tmp
217                                 >= SIZE_MAX / sizeof(lzma_index))
218                         return LZMA_DATA_ERROR;
219
220                 coder->index_count = (size_t)(coder->tmp);
221                 coder->tmp = 0;
222
223                 coder->sequence = SEQ_INDEX_ALLOC;
224                 break;
225
226         case SEQ_INDEX_ALLOC: {
227                 lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
228                 if (i == NULL)
229                         return LZMA_MEM_ERROR;
230
231                 i->total_size = 0;
232                 i->uncompressed_size = 0;
233                 i->next = NULL;
234
235                 if (coder->metadata->index == NULL)
236                         coder->metadata->index = i;
237                 else
238                         coder->index_current->next = i;
239
240                 coder->index_current = i;
241
242                 coder->sequence = SEQ_INDEX_TOTAL_SIZE;
243         }
244
245         // Fall through
246
247         case SEQ_INDEX_TOTAL_SIZE: {
248                 read_vli(coder->index_current->total_size);
249
250                 coder->index_total_size += coder->index_current->total_size;
251                 if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
252                         return LZMA_DATA_ERROR;
253
254                 // No Block can have Total Size of zero bytes.
255                 if (coder->index_current->total_size == 0)
256                         return LZMA_DATA_ERROR;
257
258                 if (--coder->index_count == 0) {
259                         // If Total Size is present, it must match the sum
260                         // of Total Sizes in Index.
261                         if (coder->metadata->total_size
262                                                 != LZMA_VLI_VALUE_UNKNOWN
263                                         && coder->metadata->total_size
264                                                 != coder->index_total_size)
265                                 return LZMA_DATA_ERROR;
266
267                         coder->index_current = coder->metadata->index;
268                         coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
269                 } else {
270                         coder->sequence = SEQ_INDEX_ALLOC;
271                 }
272
273                 break;
274         }
275
276         case SEQ_INDEX_UNCOMPRESSED_SIZE: {
277                 read_vli(coder->index_current->uncompressed_size);
278
279                 coder->index_uncompressed_size
280                                 += coder->index_current->uncompressed_size;
281                 if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
282                         return LZMA_DATA_ERROR;
283
284                 coder->index_current = coder->index_current->next;
285                 if (coder->index_current == NULL) {
286                         if (coder->metadata->uncompressed_size
287                                                 != LZMA_VLI_VALUE_UNKNOWN
288                                         && coder->metadata->uncompressed_size
289                                         != coder->index_uncompressed_size)
290                                 return LZMA_DATA_ERROR;
291
292                         --coder->todo_count;
293                         coder->sequence = SEQ_EXTRA_PREPARE;
294                 }
295
296                 break;
297         }
298
299         case SEQ_EXTRA_PREPARE:
300                 assert(coder->todo_count == 0);
301
302                 // If we get here, we have at least one byte of input left.
303                 // If "Extra is present" flag is unset in Metadata Flags,
304                 // it means that there is some garbage and we return an error.
305                 if (!coder->has_extra)
306                         return LZMA_DATA_ERROR;
307
308                 if (!coder->want_extra) {
309                         coder->extra_tail = &coder->extra_dummy;
310                         coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
311                         break;
312                 }
313
314                 coder->sequence = SEQ_EXTRA_ALLOC;
315
316         // Fall through
317
318         case SEQ_EXTRA_ALLOC: {
319                 lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
320                 if (e == NULL)
321                         return LZMA_MEM_ERROR;
322
323                 e->next = NULL;
324                 e->id = 0;
325                 e->size = 0;
326                 e->data = NULL;
327
328                 if (coder->metadata->extra == NULL)
329                         coder->metadata->extra = e;
330                 else
331                         coder->extra_tail->next = e;
332
333                 coder->extra_tail = e;
334
335                 coder->todo_count = 1;
336                 coder->sequence = SEQ_EXTRA_ID;
337         }
338
339         // Fall through
340
341         case SEQ_EXTRA_ID:
342         case SEQ_EXTRA_DUMMY_ID:
343                 read_vli(coder->extra_tail->id);
344
345                 if (coder->extra_tail->id == 0) {
346                         coder->extra_tail->size = 0;
347                         coder->extra_tail->data = NULL;
348                         coder->todo_count = 0;
349                         --coder->sequence;
350                 } else {
351                         ++coder->sequence;
352                 }
353
354                 break;
355
356         case SEQ_EXTRA_SIZE:
357         case SEQ_EXTRA_DUMMY_SIZE:
358                 read_vli(coder->tmp);
359                 ++coder->sequence;
360                 break;
361
362         case SEQ_EXTRA_DATA_ALLOC: {
363                 if (coder->tmp > EXTRA_SIZE_MAX)
364                         return LZMA_DATA_ERROR;
365
366                 coder->extra_tail->size = (size_t)(coder->tmp);
367                 coder->tmp = 0;
368
369                 uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size),
370                                 allocator);
371                 if (d == NULL)
372                         return LZMA_MEM_ERROR;
373
374                 coder->extra_tail->data = d;
375                 coder->sequence = SEQ_EXTRA_DATA_COPY;
376         }
377
378         // Fall through
379
380         case SEQ_EXTRA_DATA_COPY:
381                 bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
382                                 coder->extra_tail->data, &coder->pos,
383                                 (size_t)(coder->extra_tail->size));
384
385                 if ((size_t)(coder->extra_tail->size) == coder->pos) {
386                         coder->pos = 0;
387                         coder->todo_count = 0;
388                         coder->sequence = SEQ_EXTRA_ALLOC;
389                 }
390
391                 break;
392
393         case SEQ_EXTRA_DUMMY_ALLOC:
394                 // Not really alloc, just initialize the dummy entry.
395                 coder->extra_dummy = (lzma_extra){
396                         .next = NULL,
397                         .id = 0,
398                         .size = 0,
399                         .data = NULL,
400                 };
401
402                 coder->todo_count = 1;
403                 coder->sequence = SEQ_EXTRA_DUMMY_ID;
404                 break;
405
406         case SEQ_EXTRA_DUMMY_COPY: {
407                 // Simply skip as many bytes as indicated by Extra Record Size.
408                 // We don't check lzma_extra_size_max because we don't
409                 // allocate any memory to hold the data.
410                 const size_t in_avail = coder->buffer_size - coder->buffer_pos;
411                 const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
412                 coder->buffer_pos += skip;
413                 coder->tmp -= skip;
414
415                 if (coder->tmp == 0) {
416                         coder->todo_count = 0;
417                         coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
418                 }
419
420                 break;
421         }
422
423         default:
424                 return LZMA_PROG_ERROR;
425         }
426
427         return LZMA_OK;
428 }
429
430
431 static lzma_ret
432 metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
433                 const uint8_t *restrict in, size_t *restrict in_pos,
434                 size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
435                 size_t *restrict out_pos lzma_attribute((unused)),
436                 size_t out_size lzma_attribute((unused)),
437                 lzma_action action lzma_attribute((unused)))
438 {
439         bool end_was_reached = false;
440
441         while (true) {
442                 // Fill the buffer if it is empty.
443                 if (coder->buffer_pos == coder->buffer_size) {
444                         coder->buffer_pos = 0;
445                         coder->buffer_size = 0;
446
447                         const lzma_ret ret = coder->block_decoder.code(
448                                         coder->block_decoder.coder, allocator,
449                                         in, in_pos, in_size, coder->buffer,
450                                         &coder->buffer_size, LZMA_BUFFER_SIZE,
451                                         LZMA_RUN);
452
453                         switch (ret) {
454                         case LZMA_OK:
455                                 // Return immediatelly if we got no new data.
456                                 if (coder->buffer_size == 0)
457                                         return LZMA_OK;
458
459                                 break;
460
461                         case LZMA_STREAM_END:
462                                 end_was_reached = true;
463                                 break;
464
465                         default:
466                                 return ret;
467                         }
468                 }
469
470                 // Process coder->buffer.
471                 const lzma_ret ret = process(coder, allocator);
472                 if (ret != LZMA_OK)
473                         return ret;
474
475                 // On success, process() eats all the input.
476                 assert(coder->buffer_pos == coder->buffer_size);
477
478                 if (end_was_reached) {
479                         // Check that the sequence is not in the
480                         // middle of anything.
481                         if (coder->todo_count != 0)
482                                 return LZMA_DATA_ERROR;
483
484                         return LZMA_STREAM_END;
485                 }
486         }
487 }
488
489
490 static void
491 metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
492 {
493         lzma_next_coder_end(&coder->block_decoder, allocator);
494         lzma_free(coder, allocator);
495         return;
496 }
497
498
499 static lzma_ret
500 metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
501                 lzma_options_block *options, lzma_metadata *metadata,
502                 bool want_extra)
503 {
504         if (options == NULL || metadata == NULL)
505                 return LZMA_PROG_ERROR;
506
507         if (next->coder == NULL) {
508                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
509                 if (next->coder == NULL)
510                         return LZMA_MEM_ERROR;
511
512                 next->code = &metadata_decode;
513                 next->end = &metadata_decoder_end;
514                 next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
515         }
516
517         metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
518         metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
519         metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
520         metadata->index = NULL;
521         metadata->extra = NULL;
522
523         next->coder->sequence = SEQ_FLAGS;
524         next->coder->todo_count = 0;
525         next->coder->pos = 0;
526         next->coder->tmp = 0;
527         next->coder->metadata = metadata;
528         next->coder->index_current = NULL;
529         next->coder->index_count = 0;
530         next->coder->index_total_size = 0;
531         next->coder->index_uncompressed_size = 0;
532         next->coder->want_extra = want_extra;
533         next->coder->extra_tail = NULL;
534         next->coder->buffer_pos = 0;
535         next->coder->buffer_size = 0;
536
537         return lzma_block_decoder_init(
538                         &next->coder->block_decoder, allocator, options);
539 }
540
541
542 extern lzma_ret
543 lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
544                 lzma_options_block *options, lzma_metadata *metadata,
545                 bool want_extra)
546 {
547         lzma_next_coder_init(metadata_decoder_init, next, allocator,
548                         options, metadata, want_extra);
549 }
550
551
552 extern LZMA_API lzma_ret
553 lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
554                 lzma_metadata *metadata, lzma_bool want_extra)
555 {
556         lzma_next_strm_init(strm, lzma_metadata_decoder_init,
557                         options, metadata, want_extra);
558
559         strm->internal->supported_actions[LZMA_RUN] = true;
560
561         return LZMA_OK;
562 }