]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/index_decoder.c
Index decoder fixes.
[icculus/xz.git] / src / liblzma / common / index_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "index.h"
14 #include "check.h"
15
16
17 struct lzma_coder_s {
18         enum {
19                 SEQ_INDICATOR,
20                 SEQ_COUNT,
21                 SEQ_MEMUSAGE,
22                 SEQ_UNPADDED,
23                 SEQ_UNCOMPRESSED,
24                 SEQ_PADDING_INIT,
25                 SEQ_PADDING,
26                 SEQ_CRC32,
27         } sequence;
28
29         /// Memory usage limit
30         uint64_t memlimit;
31
32         /// Target Index
33         lzma_index *index;
34
35         /// Pointer give by the application, which is set after
36         /// successful decoding.
37         lzma_index **index_ptr;
38
39         /// Number of Records left to decode.
40         lzma_vli count;
41
42         /// The most recent Unpadded Size field
43         lzma_vli unpadded_size;
44
45         /// The most recent Uncompressed Size field
46         lzma_vli uncompressed_size;
47
48         /// Position in integers
49         size_t pos;
50
51         /// CRC32 of the List of Records field
52         uint32_t crc32;
53 };
54
55
56 static lzma_ret
57 index_decode(lzma_coder *coder, lzma_allocator *allocator,
58                 const uint8_t *restrict in, size_t *restrict in_pos,
59                 size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
60                 size_t *restrict out_pos lzma_attribute((unused)),
61                 size_t out_size lzma_attribute((unused)),
62                 lzma_action action lzma_attribute((unused)))
63 {
64         // Similar optimization as in index_encoder.c
65         const size_t in_start = *in_pos;
66         lzma_ret ret = LZMA_OK;
67
68         while (*in_pos < in_size)
69         switch (coder->sequence) {
70         case SEQ_INDICATOR:
71                 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
72                 // LZMA_FORMAT_ERROR, because a typical usage case for Index
73                 // decoder is when parsing the Stream backwards. If seeking
74                 // backward from the Stream Footer gives us something that
75                 // doesn't begin with Index Indicator, the file is considered
76                 // corrupt, not "programming error" or "unrecognized file
77                 // format". One could argue that the application should
78                 // verify the Index Indicator before trying to decode the
79                 // Index, but well, I suppose it is simpler this way.
80                 if (in[(*in_pos)++] != 0x00)
81                         return LZMA_DATA_ERROR;
82
83                 coder->sequence = SEQ_COUNT;
84                 break;
85
86         case SEQ_COUNT:
87                 ret = lzma_vli_decode(&coder->count, &coder->pos,
88                                 in, in_pos, in_size);
89                 if (ret != LZMA_STREAM_END)
90                         goto out;
91
92                 coder->pos = 0;
93                 coder->sequence = SEQ_MEMUSAGE;
94
95         // Fall through
96
97         case SEQ_MEMUSAGE:
98                 if (lzma_index_memusage(coder->count) > coder->memlimit) {
99                         ret = LZMA_MEMLIMIT_ERROR;
100                         goto out;
101                 }
102
103                 ret = LZMA_OK;
104                 coder->sequence = coder->count == 0
105                                 ? SEQ_PADDING_INIT : SEQ_UNPADDED;
106                 break;
107
108         case SEQ_UNPADDED:
109         case SEQ_UNCOMPRESSED: {
110                 lzma_vli *size = coder->sequence == SEQ_UNPADDED
111                                 ? &coder->unpadded_size
112                                 : &coder->uncompressed_size;
113
114                 ret = lzma_vli_decode(size, &coder->pos,
115                                 in, in_pos, in_size);
116                 if (ret != LZMA_STREAM_END)
117                         goto out;
118
119                 ret = LZMA_OK;
120                 coder->pos = 0;
121
122                 if (coder->sequence == SEQ_UNPADDED) {
123                         // Validate that encoded Unpadded Size isn't too small
124                         // or too big.
125                         if (coder->unpadded_size < UNPADDED_SIZE_MIN
126                                         || coder->unpadded_size
127                                                 > UNPADDED_SIZE_MAX)
128                                 return LZMA_DATA_ERROR;
129
130                         coder->sequence = SEQ_UNCOMPRESSED;
131                 } else {
132                         // Add the decoded Record to the Index.
133                         return_if_error(lzma_index_append(
134                                         coder->index, allocator,
135                                         coder->unpadded_size,
136                                         coder->uncompressed_size));
137
138                         // Check if this was the last Record.
139                         coder->sequence = --coder->count == 0
140                                         ? SEQ_PADDING_INIT
141                                         : SEQ_UNPADDED;
142                 }
143
144                 break;
145         }
146
147         case SEQ_PADDING_INIT:
148                 coder->pos = lzma_index_padding_size(coder->index);
149                 coder->sequence = SEQ_PADDING;
150
151         // Fall through
152
153         case SEQ_PADDING:
154                 if (coder->pos > 0) {
155                         --coder->pos;
156                         if (in[(*in_pos)++] != 0x00)
157                                 return LZMA_DATA_ERROR;
158
159                         break;
160                 }
161
162                 // Finish the CRC32 calculation.
163                 coder->crc32 = lzma_crc32(in + in_start,
164                                 *in_pos - in_start, coder->crc32);
165
166                 coder->sequence = SEQ_CRC32;
167
168         // Fall through
169
170         case SEQ_CRC32:
171                 do {
172                         if (*in_pos == in_size)
173                                 return LZMA_OK;
174
175                         if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
176                                         != in[(*in_pos)++])
177                                 return LZMA_DATA_ERROR;
178
179                 } while (++coder->pos < 4);
180
181                 // Decoding was successful, now we can let the application
182                 // see the decoded Index.
183                 *coder->index_ptr = coder->index;
184
185                 // Make index NULL so we don't free it unintentionally.
186                 coder->index = NULL;
187
188                 return LZMA_STREAM_END;
189
190         default:
191                 assert(0);
192                 return LZMA_PROG_ERROR;
193         }
194
195 out:
196         // Update the CRC32,
197         coder->crc32 = lzma_crc32(in + in_start,
198                         *in_pos - in_start, coder->crc32);
199
200         return ret;
201 }
202
203
204 static void
205 index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
206 {
207         lzma_index_end(coder->index, allocator);
208         lzma_free(coder, allocator);
209         return;
210 }
211
212
213 static lzma_ret
214 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
215                 uint64_t *old_memlimit, uint64_t new_memlimit)
216 {
217         *memusage = lzma_index_memusage(coder->count);
218
219         if (new_memlimit != 0 && new_memlimit < *memusage)
220                 return LZMA_MEMLIMIT_ERROR;
221
222         *old_memlimit = coder->memlimit;
223         coder->memlimit = new_memlimit;
224
225         return LZMA_OK;
226 }
227
228
229 static lzma_ret
230 index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
231                 lzma_index **i, uint64_t memlimit)
232 {
233         // Remember the pointer given by the application. We will set it
234         // to point to the decoded Index only if decoding is successful.
235         // Before that, keep it NULL so that applications can always safely
236         // pass it to lzma_index_end() no matter did decoding succeed or not.
237         coder->index_ptr = i;
238         *i = NULL;
239
240         // We always allocate a new lzma_index.
241         coder->index = lzma_index_init(NULL, allocator);
242         if (coder->index == NULL)
243                 return LZMA_MEM_ERROR;
244
245         // Initialize the rest.
246         coder->sequence = SEQ_INDICATOR;
247         coder->memlimit = memlimit;
248         coder->count = 0; // Needs to be initialized due to _memconfig().
249         coder->pos = 0;
250         coder->crc32 = 0;
251
252         return LZMA_OK;
253 }
254
255
256 static lzma_ret
257 index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
258                 lzma_index **i, uint64_t memlimit)
259 {
260         lzma_next_coder_init(&index_decoder_init, next, allocator);
261
262         if (i == NULL || memlimit == 0)
263                 return LZMA_PROG_ERROR;
264
265         if (next->coder == NULL) {
266                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
267                 if (next->coder == NULL)
268                         return LZMA_MEM_ERROR;
269
270                 next->code = &index_decode;
271                 next->end = &index_decoder_end;
272                 next->memconfig = &index_decoder_memconfig;
273                 next->coder->index = NULL;
274         } else {
275                 lzma_index_end(next->coder->index, allocator);
276         }
277
278         return index_decoder_reset(next->coder, allocator, i, memlimit);
279 }
280
281
282 extern LZMA_API(lzma_ret)
283 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
284 {
285         lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
286
287         strm->internal->supported_actions[LZMA_RUN] = true;
288
289         return LZMA_OK;
290 }
291
292
293 extern LZMA_API(lzma_ret)
294 lzma_index_buffer_decode(
295                 lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator,
296                 const uint8_t *in, size_t *in_pos, size_t in_size)
297 {
298         // Sanity checks
299         if (i == NULL || memlimit == NULL
300                         || in == NULL || in_pos == NULL || *in_pos > in_size)
301                 return LZMA_PROG_ERROR;
302
303         // Initialize the decoder.
304         lzma_coder coder;
305         return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
306
307         // Store the input start position so that we can restore it in case
308         // of an error.
309         const size_t in_start = *in_pos;
310
311         // Do the actual decoding.
312         lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
313                         NULL, NULL, 0, LZMA_RUN);
314
315         if (ret == LZMA_STREAM_END) {
316                 ret = LZMA_OK;
317         } else {
318                 // Something went wrong, free the Index structure and restore
319                 // the input position.
320                 lzma_index_end(coder.index, allocator);
321                 *in_pos = in_start;
322
323                 if (ret == LZMA_OK) {
324                         // The input is truncated or otherwise corrupt.
325                         // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
326                         // like lzma_vli_decode() does in single-call mode.
327                         ret = LZMA_DATA_ERROR;
328
329                 } else if (ret == LZMA_MEMLIMIT_ERROR) {
330                         // Tell the caller how much memory would have
331                         // been needed.
332                         *memlimit = lzma_index_memusage(coder.count);
333                 }
334         }
335
336         return ret;
337 }