]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/metadata_decoder.c
Fix a bug related to 99e12af4e2b866c011fe0106cd1e0bfdcc8fe9c6.
[icculus/xz.git] / src / liblzma / common / metadata_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       metadata_decoder.c
4 /// \brief      Decodes metadata stored in Metadata Blocks
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "metadata_decoder.h"
21 #include "block_decoder.h"
22
23
24 /// Maximum size of a single Extra Record. Again, this is mostly to make
25 /// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
26 /// be smaller.
27 #define EXTRA_SIZE_MAX (SIZE_MAX / 4)
28
29
30 struct lzma_coder_s {
31         enum {
32                 SEQ_FLAGS,
33                 SEQ_HEADER_METADATA_SIZE,
34                 SEQ_TOTAL_SIZE,
35                 SEQ_UNCOMPRESSED_SIZE,
36                 SEQ_INDEX_COUNT,
37                 SEQ_INDEX_ALLOC,
38                 SEQ_INDEX_TOTAL_SIZE,
39                 SEQ_INDEX_UNCOMPRESSED_SIZE,
40                 SEQ_EXTRA_PREPARE,
41                 SEQ_EXTRA_ALLOC,
42                 SEQ_EXTRA_ID,
43                 SEQ_EXTRA_SIZE,
44                 SEQ_EXTRA_DATA_ALLOC,
45                 SEQ_EXTRA_DATA_COPY,
46                 SEQ_EXTRA_DUMMY_ALLOC,
47                 SEQ_EXTRA_DUMMY_ID,
48                 SEQ_EXTRA_DUMMY_SIZE,
49                 SEQ_EXTRA_DUMMY_COPY,
50         } sequence;
51
52         /// Number of "things" left to be parsed. If we hit end of input
53         /// when this isn't zero, we have corrupt Metadata Block.
54         size_t todo_count;
55
56         /// Position in variable-length integers
57         size_t pos;
58
59         /// Temporary variable needed to decode variables whose type
60         /// is size_t instead of lzma_vli.
61         lzma_vli tmp;
62
63         /// Pointer to target structure to hold the parsed results.
64         lzma_metadata *metadata;
65
66         /// The Index Record we currently are parsing
67         lzma_index *index_current;
68
69         /// Number of Records in Index
70         size_t index_count;
71
72         /// Sum of Total Size fields in the Index
73         lzma_vli index_total_size;
74
75         /// Sum of Uncompressed Size fields in the Index
76         lzma_vli index_uncompressed_size;
77
78         /// True if Extra is present.
79         bool has_extra;
80
81         /// True if we have been requested to store the Extra to *metadata.
82         bool want_extra;
83
84         /// Pointer to the end of the Extra Record list.
85         lzma_extra *extra_tail;
86
87         /// Dummy Extra Record used when only verifying integrity of Extra
88         /// (not storing it to RAM).
89         lzma_extra extra_dummy;
90
91         /// Block decoder
92         lzma_next_coder block_decoder;
93
94         /// buffer[buffer_pos] is the next byte to process.
95         size_t buffer_pos;
96
97         /// buffer[buffer_size] is the first byte to not process.
98         size_t buffer_size;
99
100         /// Temporary buffer to which encoded Metadata is read before
101         /// it is parsed.
102         uint8_t buffer[LZMA_BUFFER_SIZE];
103 };
104
105
106 /// Reads a variable-length integer to coder->num.
107 #define read_vli(num) \
108 do { \
109         const lzma_ret ret = lzma_vli_decode( \
110                         &num, &coder->pos, \
111                         coder->buffer, &coder->buffer_pos, \
112                         coder->buffer_size); \
113         if (ret != LZMA_STREAM_END) \
114                 return ret; \
115         \
116         coder->pos = 0; \
117 } while (0)
118
119
120 static lzma_ret
121 process(lzma_coder *coder, lzma_allocator *allocator)
122 {
123         while (coder->buffer_pos < coder->buffer_size)
124         switch (coder->sequence) {
125         case SEQ_FLAGS:
126                 // Reserved bits must be unset.
127                 if (coder->buffer[coder->buffer_pos] & 0x70)
128                         return LZMA_HEADER_ERROR;
129
130                 // If Size of Header Metadata is present, prepare the
131                 // variable for variable-length integer decoding. Otherwise
132                 // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
133                 // field isn't present.
134                 if (coder->buffer[coder->buffer_pos] & 0x01) {
135                         coder->metadata->header_metadata_size = 0;
136                         ++coder->todo_count;
137                 }
138
139                 if (coder->buffer[coder->buffer_pos] & 0x02) {
140                         coder->metadata->total_size = 0;
141                         ++coder->todo_count;
142                 }
143
144                 if (coder->buffer[coder->buffer_pos] & 0x04) {
145                         coder->metadata->uncompressed_size = 0;
146                         ++coder->todo_count;
147                 }
148
149                 if (coder->buffer[coder->buffer_pos] & 0x08) {
150                         // Setting index_count to 1 is just to indicate that
151                         // Index is present. The real size is parsed later.
152                         coder->index_count = 1;
153                         ++coder->todo_count;
154                 }
155
156                 coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
157                                 != 0;
158
159                 ++coder->buffer_pos;
160                 coder->sequence = SEQ_HEADER_METADATA_SIZE;
161                 break;
162
163         case SEQ_HEADER_METADATA_SIZE:
164                 if (coder->metadata->header_metadata_size
165                                 != LZMA_VLI_VALUE_UNKNOWN) {
166                         read_vli(coder->metadata->header_metadata_size);
167
168                         if (coder->metadata->header_metadata_size == 0)
169                                 return LZMA_DATA_ERROR;
170
171                         --coder->todo_count;
172                 }
173
174                 coder->sequence = SEQ_TOTAL_SIZE;
175                 break;
176
177         case SEQ_TOTAL_SIZE:
178                 if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
179                         read_vli(coder->metadata->total_size);
180
181                         if (coder->metadata->total_size == 0)
182                                 return LZMA_DATA_ERROR;
183
184                         --coder->todo_count;
185                 }
186
187                 coder->sequence = SEQ_UNCOMPRESSED_SIZE;
188                 break;
189
190         case SEQ_UNCOMPRESSED_SIZE:
191                 if (coder->metadata->uncompressed_size
192                                 != LZMA_VLI_VALUE_UNKNOWN) {
193                         read_vli(coder->metadata->uncompressed_size);
194                         --coder->todo_count;
195                 }
196
197                 coder->sequence = SEQ_INDEX_COUNT;
198                 break;
199
200         case SEQ_INDEX_COUNT:
201                 if (coder->index_count == 0) {
202                         coder->sequence = SEQ_EXTRA_PREPARE;
203                         break;
204                 }
205
206                 read_vli(coder->tmp);
207
208                 // Index must not be empty nor far too big (wouldn't fit
209                 // in RAM).
210                 if (coder->tmp == 0 || coder->tmp
211                                 >= SIZE_MAX / sizeof(lzma_index))
212                         return LZMA_DATA_ERROR;
213
214                 coder->index_count = (size_t)(coder->tmp);
215                 coder->tmp = 0;
216
217                 coder->sequence = SEQ_INDEX_ALLOC;
218                 break;
219
220         case SEQ_INDEX_ALLOC: {
221                 lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
222                 if (i == NULL)
223                         return LZMA_MEM_ERROR;
224
225                 i->total_size = 0;
226                 i->uncompressed_size = 0;
227                 i->next = NULL;
228
229                 if (coder->metadata->index == NULL)
230                         coder->metadata->index = i;
231                 else
232                         coder->index_current->next = i;
233
234                 coder->index_current = i;
235
236                 coder->sequence = SEQ_INDEX_TOTAL_SIZE;
237         }
238
239         // Fall through
240
241         case SEQ_INDEX_TOTAL_SIZE: {
242                 read_vli(coder->index_current->total_size);
243
244                 coder->index_total_size += coder->index_current->total_size;
245                 if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
246                         return LZMA_DATA_ERROR;
247
248                 // No Block can have Total Size of zero bytes.
249                 if (coder->index_current->total_size == 0)
250                         return LZMA_DATA_ERROR;
251
252                 if (--coder->index_count == 0) {
253                         // If Total Size is present, it must match the sum
254                         // of Total Sizes in Index.
255                         if (coder->metadata->total_size
256                                                 != LZMA_VLI_VALUE_UNKNOWN
257                                         && coder->metadata->total_size
258                                                 != coder->index_total_size)
259                                 return LZMA_DATA_ERROR;
260
261                         coder->index_current = coder->metadata->index;
262                         coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
263                 } else {
264                         coder->sequence = SEQ_INDEX_ALLOC;
265                 }
266
267                 break;
268         }
269
270         case SEQ_INDEX_UNCOMPRESSED_SIZE: {
271                 read_vli(coder->index_current->uncompressed_size);
272
273                 coder->index_uncompressed_size
274                                 += coder->index_current->uncompressed_size;
275                 if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
276                         return LZMA_DATA_ERROR;
277
278                 coder->index_current = coder->index_current->next;
279                 if (coder->index_current == NULL) {
280                         if (coder->metadata->uncompressed_size
281                                                 != LZMA_VLI_VALUE_UNKNOWN
282                                         && coder->metadata->uncompressed_size
283                                         != coder->index_uncompressed_size)
284                                 return LZMA_DATA_ERROR;
285
286                         --coder->todo_count;
287                         coder->sequence = SEQ_EXTRA_PREPARE;
288                 }
289
290                 break;
291         }
292
293         case SEQ_EXTRA_PREPARE:
294                 assert(coder->todo_count == 0);
295
296                 // If we get here, we have at least one byte of input left.
297                 // If "Extra is present" flag is unset in Metadata Flags,
298                 // it means that there is some garbage and we return an error.
299                 if (!coder->has_extra)
300                         return LZMA_DATA_ERROR;
301
302                 if (!coder->want_extra) {
303                         coder->extra_tail = &coder->extra_dummy;
304                         coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
305                         break;
306                 }
307
308                 coder->sequence = SEQ_EXTRA_ALLOC;
309
310         // Fall through
311
312         case SEQ_EXTRA_ALLOC: {
313                 lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
314                 if (e == NULL)
315                         return LZMA_MEM_ERROR;
316
317                 e->next = NULL;
318                 e->id = 0;
319                 e->size = 0;
320                 e->data = NULL;
321
322                 if (coder->metadata->extra == NULL)
323                         coder->metadata->extra = e;
324                 else
325                         coder->extra_tail->next = e;
326
327                 coder->extra_tail = e;
328
329                 coder->todo_count = 1;
330                 coder->sequence = SEQ_EXTRA_ID;
331         }
332
333         // Fall through
334
335         case SEQ_EXTRA_ID:
336         case SEQ_EXTRA_DUMMY_ID:
337                 read_vli(coder->extra_tail->id);
338
339                 if (coder->extra_tail->id == 0) {
340                         coder->extra_tail->size = 0;
341                         coder->extra_tail->data = NULL;
342                         coder->todo_count = 0;
343                         --coder->sequence;
344                 } else {
345                         ++coder->sequence;
346                 }
347
348                 break;
349
350         case SEQ_EXTRA_SIZE:
351         case SEQ_EXTRA_DUMMY_SIZE:
352                 read_vli(coder->tmp);
353
354                 if (coder->tmp == 0) {
355                         // We have no Data in the Extra Record. Don't
356                         // allocate any memory for it. Go back to
357                         // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC.
358                         coder->tmp = 0;
359                         coder->sequence -= 2;
360                         coder->todo_count = 0;
361                 } else {
362                         ++coder->sequence;
363                 }
364
365                 break;
366
367         case SEQ_EXTRA_DATA_ALLOC: {
368                 if (coder->tmp > EXTRA_SIZE_MAX)
369                         return LZMA_DATA_ERROR;
370
371                 coder->extra_tail->size = (size_t)(coder->tmp);
372                 coder->tmp = 0;
373
374                 // We reserve space for the trailing '\0' too.
375                 uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1,
376                                 allocator);
377                 if (d == NULL)
378                         return LZMA_MEM_ERROR;
379
380                 coder->extra_tail->data = d;
381                 coder->sequence = SEQ_EXTRA_DATA_COPY;
382         }
383
384         // Fall through
385
386         case SEQ_EXTRA_DATA_COPY:
387                 bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
388                                 coder->extra_tail->data, &coder->pos,
389                                 (size_t)(coder->extra_tail->size));
390
391                 if ((size_t)(coder->extra_tail->size) == coder->pos) {
392                         coder->extra_tail->data[coder->pos] = '\0';
393                         coder->pos = 0;
394                         coder->todo_count = 0;
395                         coder->sequence = SEQ_EXTRA_ALLOC;
396                 }
397
398                 break;
399
400         case SEQ_EXTRA_DUMMY_ALLOC:
401                 // Not really alloc, just initialize the dummy entry.
402                 coder->extra_dummy = (lzma_extra){
403                         .next = NULL,
404                         .id = 0,
405                         .size = 0,
406                         .data = NULL,
407                 };
408
409                 coder->todo_count = 1;
410                 coder->sequence = SEQ_EXTRA_DUMMY_ID;
411                 break;
412
413         case SEQ_EXTRA_DUMMY_COPY: {
414                 // Simply skip as many bytes as indicated by Extra Record Size.
415                 // We don't check lzma_extra_size_max because we don't
416                 // allocate any memory to hold the data.
417                 const size_t in_avail = coder->buffer_size - coder->buffer_pos;
418                 const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
419                 coder->buffer_pos += skip;
420                 coder->tmp -= skip;
421
422                 if (coder->tmp == 0) {
423                         coder->todo_count = 0;
424                         coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
425                 }
426
427                 break;
428         }
429
430         default:
431                 return LZMA_PROG_ERROR;
432         }
433
434         return LZMA_OK;
435 }
436
437
438 static lzma_ret
439 metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
440                 const uint8_t *restrict in, size_t *restrict in_pos,
441                 size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
442                 size_t *restrict out_pos lzma_attribute((unused)),
443                 size_t out_size lzma_attribute((unused)),
444                 lzma_action action lzma_attribute((unused)))
445 {
446         bool end_was_reached = false;
447
448         while (true) {
449                 // Fill the buffer if it is empty.
450                 if (coder->buffer_pos == coder->buffer_size) {
451                         coder->buffer_pos = 0;
452                         coder->buffer_size = 0;
453
454                         const lzma_ret ret = coder->block_decoder.code(
455                                         coder->block_decoder.coder, allocator,
456                                         in, in_pos, in_size, coder->buffer,
457                                         &coder->buffer_size, LZMA_BUFFER_SIZE,
458                                         LZMA_RUN);
459
460                         switch (ret) {
461                         case LZMA_OK:
462                                 // Return immediatelly if we got no new data.
463                                 if (coder->buffer_size == 0)
464                                         return LZMA_OK;
465
466                                 break;
467
468                         case LZMA_STREAM_END:
469                                 end_was_reached = true;
470                                 break;
471
472                         default:
473                                 return ret;
474                         }
475                 }
476
477                 // Process coder->buffer.
478                 const lzma_ret ret = process(coder, allocator);
479                 if (ret != LZMA_OK)
480                         return ret;
481
482                 // On success, process() eats all the input.
483                 assert(coder->buffer_pos == coder->buffer_size);
484
485                 if (end_was_reached) {
486                         // Check that the sequence is not in the
487                         // middle of anything.
488                         if (coder->todo_count != 0)
489                                 return LZMA_DATA_ERROR;
490
491                         // If Size of Header Metadata Block was not
492                         // present, we use zero as its size instead
493                         // of LZMA_VLI_VALUE_UNKNOWN.
494                         if (coder->metadata->header_metadata_size
495                                         == LZMA_VLI_VALUE_UNKNOWN)
496                                 coder->metadata->header_metadata_size = 0;
497
498                         return LZMA_STREAM_END;
499                 }
500         }
501 }
502
503
504 static void
505 metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
506 {
507         lzma_next_coder_end(&coder->block_decoder, allocator);
508         lzma_free(coder, allocator);
509         return;
510 }
511
512
513 static lzma_ret
514 metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
515                 lzma_options_block *options, lzma_metadata *metadata,
516                 bool want_extra)
517 {
518         if (options == NULL || metadata == NULL)
519                 return LZMA_PROG_ERROR;
520
521         if (next->coder == NULL) {
522                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
523                 if (next->coder == NULL)
524                         return LZMA_MEM_ERROR;
525
526                 next->code = &metadata_decode;
527                 next->end = &metadata_decoder_end;
528                 next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
529         }
530
531         metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
532         metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
533         metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
534         metadata->index = NULL;
535         metadata->extra = NULL;
536
537         next->coder->sequence = SEQ_FLAGS;
538         next->coder->todo_count = 0;
539         next->coder->pos = 0;
540         next->coder->tmp = 0;
541         next->coder->metadata = metadata;
542         next->coder->index_current = NULL;
543         next->coder->index_count = 0;
544         next->coder->index_total_size = 0;
545         next->coder->index_uncompressed_size = 0;
546         next->coder->want_extra = want_extra;
547         next->coder->extra_tail = NULL;
548         next->coder->buffer_pos = 0;
549         next->coder->buffer_size = 0;
550
551         return lzma_block_decoder_init(
552                         &next->coder->block_decoder, allocator, options);
553 }
554
555
556 extern lzma_ret
557 lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
558                 lzma_options_block *options, lzma_metadata *metadata,
559                 bool want_extra)
560 {
561         lzma_next_coder_init(metadata_decoder_init, next, allocator,
562                         options, metadata, want_extra);
563 }
564
565
566 extern LZMA_API lzma_ret
567 lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
568                 lzma_metadata *metadata, lzma_bool want_extra)
569 {
570         lzma_next_strm_init(strm, lzma_metadata_decoder_init,
571                         options, metadata, want_extra);
572
573         strm->internal->supported_actions[LZMA_RUN] = true;
574
575         return LZMA_OK;
576 }