]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/index_decoder.c
Put the interesting parts of XZ Utils into the public domain.
[icculus/xz.git] / src / liblzma / common / index_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index_decoder.c
4 /// \brief      Decodes the Index field
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "index.h"
14 #include "check.h"
15
16
17 struct lzma_coder_s {
18         enum {
19                 SEQ_INDICATOR,
20                 SEQ_COUNT,
21                 SEQ_MEMUSAGE,
22                 SEQ_UNPADDED,
23                 SEQ_UNCOMPRESSED,
24                 SEQ_PADDING_INIT,
25                 SEQ_PADDING,
26                 SEQ_CRC32,
27         } sequence;
28
29         /// Memory usage limit
30         uint64_t memlimit;
31
32         /// Target Index
33         lzma_index *index;
34
35         /// Number of Records left to decode.
36         lzma_vli count;
37
38         /// The most recent Unpadded Size field
39         lzma_vli unpadded_size;
40
41         /// The most recent Uncompressed Size field
42         lzma_vli uncompressed_size;
43
44         /// Position in integers
45         size_t pos;
46
47         /// CRC32 of the List of Records field
48         uint32_t crc32;
49 };
50
51
52 static lzma_ret
53 index_decode(lzma_coder *coder, lzma_allocator *allocator,
54                 const uint8_t *restrict in, size_t *restrict in_pos,
55                 size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
56                 size_t *restrict out_pos lzma_attribute((unused)),
57                 size_t out_size lzma_attribute((unused)),
58                 lzma_action action lzma_attribute((unused)))
59 {
60         // Similar optimization as in index_encoder.c
61         const size_t in_start = *in_pos;
62         lzma_ret ret = LZMA_OK;
63
64         while (*in_pos < in_size)
65         switch (coder->sequence) {
66         case SEQ_INDICATOR:
67                 // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
68                 // LZMA_FORMAT_ERROR, because a typical usage case for Index
69                 // decoder is when parsing the Stream backwards. If seeking
70                 // backward from the Stream Footer gives us something that
71                 // doesn't begin with Index Indicator, the file is considered
72                 // corrupt, not "programming error" or "unrecognized file
73                 // format". One could argue that the application should
74                 // verify the Index Indicator before trying to decode the
75                 // Index, but well, I suppose it is simpler this way.
76                 if (in[(*in_pos)++] != 0x00)
77                         return LZMA_DATA_ERROR;
78
79                 coder->sequence = SEQ_COUNT;
80                 break;
81
82         case SEQ_COUNT:
83                 ret = lzma_vli_decode(&coder->count, &coder->pos,
84                                 in, in_pos, in_size);
85                 if (ret != LZMA_STREAM_END)
86                         goto out;
87
88                 coder->pos = 0;
89                 coder->sequence = SEQ_MEMUSAGE;
90
91         // Fall through
92
93         case SEQ_MEMUSAGE:
94                 if (lzma_index_memusage(coder->count) > coder->memlimit) {
95                         ret = LZMA_MEMLIMIT_ERROR;
96                         goto out;
97                 }
98
99                 ret = LZMA_OK;
100                 coder->sequence = coder->count == 0
101                                 ? SEQ_PADDING_INIT : SEQ_UNPADDED;
102                 break;
103
104         case SEQ_UNPADDED:
105         case SEQ_UNCOMPRESSED: {
106                 lzma_vli *size = coder->sequence == SEQ_UNPADDED
107                                 ? &coder->unpadded_size
108                                 : &coder->uncompressed_size;
109
110                 ret = lzma_vli_decode(size, &coder->pos,
111                                 in, in_pos, in_size);
112                 if (ret != LZMA_STREAM_END)
113                         goto out;
114
115                 ret = LZMA_OK;
116                 coder->pos = 0;
117
118                 if (coder->sequence == SEQ_UNPADDED) {
119                         // Validate that encoded Unpadded Size isn't too small
120                         // or too big.
121                         if (coder->unpadded_size < UNPADDED_SIZE_MIN
122                                         || coder->unpadded_size
123                                                 > UNPADDED_SIZE_MAX)
124                                 return LZMA_DATA_ERROR;
125
126                         coder->sequence = SEQ_UNCOMPRESSED;
127                 } else {
128                         // Add the decoded Record to the Index.
129                         return_if_error(lzma_index_append(
130                                         coder->index, allocator,
131                                         coder->unpadded_size,
132                                         coder->uncompressed_size));
133
134                         // Check if this was the last Record.
135                         coder->sequence = --coder->count == 0
136                                         ? SEQ_PADDING_INIT
137                                         : SEQ_UNPADDED;
138                 }
139
140                 break;
141         }
142
143         case SEQ_PADDING_INIT:
144                 coder->pos = lzma_index_padding_size(coder->index);
145                 coder->sequence = SEQ_PADDING;
146
147         // Fall through
148
149         case SEQ_PADDING:
150                 if (coder->pos > 0) {
151                         --coder->pos;
152                         if (in[(*in_pos)++] != 0x00)
153                                 return LZMA_DATA_ERROR;
154
155                         break;
156                 }
157
158                 // Finish the CRC32 calculation.
159                 coder->crc32 = lzma_crc32(in + in_start,
160                                 *in_pos - in_start, coder->crc32);
161
162                 coder->sequence = SEQ_CRC32;
163
164         // Fall through
165
166         case SEQ_CRC32:
167                 do {
168                         if (*in_pos == in_size)
169                                 return LZMA_OK;
170
171                         if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
172                                         != in[(*in_pos)++])
173                                 return LZMA_DATA_ERROR;
174
175                 } while (++coder->pos < 4);
176
177                 // Make index NULL so we don't free it unintentionally.
178                 coder->index = NULL;
179
180                 return LZMA_STREAM_END;
181
182         default:
183                 assert(0);
184                 return LZMA_PROG_ERROR;
185         }
186
187 out:
188         // Update the CRC32,
189         coder->crc32 = lzma_crc32(in + in_start,
190                         *in_pos - in_start, coder->crc32);
191
192         return ret;
193 }
194
195
196 static void
197 index_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
198 {
199         lzma_index_end(coder->index, allocator);
200         lzma_free(coder, allocator);
201         return;
202 }
203
204
205 static lzma_ret
206 index_decoder_memconfig(lzma_coder *coder, uint64_t *memusage,
207                 uint64_t *old_memlimit, uint64_t new_memlimit)
208 {
209         *memusage = lzma_index_memusage(coder->count);
210
211         if (new_memlimit != 0 && new_memlimit < *memusage)
212                 return LZMA_MEMLIMIT_ERROR;
213
214         *old_memlimit = coder->memlimit;
215         coder->memlimit = new_memlimit;
216
217         return LZMA_OK;
218 }
219
220
221 static lzma_ret
222 index_decoder_reset(lzma_coder *coder, lzma_allocator *allocator,
223                 lzma_index **i, uint64_t memlimit)
224 {
225         // We always allocate a new lzma_index.
226         *i = lzma_index_init(NULL, allocator);
227         if (*i == NULL)
228                 return LZMA_MEM_ERROR;
229
230         // Initialize the rest.
231         coder->sequence = SEQ_INDICATOR;
232         coder->memlimit = memlimit;
233         coder->index = *i;
234         coder->count = 0; // Needs to be initialized due to _memconfig().
235         coder->pos = 0;
236         coder->crc32 = 0;
237
238         return LZMA_OK;
239 }
240
241
242 static lzma_ret
243 index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
244                 lzma_index **i, uint64_t memlimit)
245 {
246         lzma_next_coder_init(index_decoder_init, next, allocator);
247
248         if (i == NULL || memlimit == 0)
249                 return LZMA_PROG_ERROR;
250
251         if (next->coder == NULL) {
252                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
253                 if (next->coder == NULL)
254                         return LZMA_MEM_ERROR;
255
256                 next->code = &index_decode;
257                 next->end = &index_decoder_end;
258                 next->memconfig = &index_decoder_memconfig;
259                 next->coder->index = NULL;
260         } else {
261                 lzma_index_end(next->coder->index, allocator);
262         }
263
264         return index_decoder_reset(next->coder, allocator, i, memlimit);
265 }
266
267
268 extern LZMA_API(lzma_ret)
269 lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
270 {
271         lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
272
273         strm->internal->supported_actions[LZMA_RUN] = true;
274
275         return LZMA_OK;
276 }
277
278
279 extern LZMA_API(lzma_ret)
280 lzma_index_buffer_decode(
281                 lzma_index **i, uint64_t *memlimit, lzma_allocator *allocator,
282                 const uint8_t *in, size_t *in_pos, size_t in_size)
283 {
284         // Sanity checks
285         if (i == NULL || in == NULL || in_pos == NULL || *in_pos > in_size)
286                 return LZMA_PROG_ERROR;
287
288         // Initialize the decoder.
289         lzma_coder coder;
290         return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
291
292         // Store the input start position so that we can restore it in case
293         // of an error.
294         const size_t in_start = *in_pos;
295
296         // Do the actual decoding.
297         lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
298                         NULL, NULL, 0, LZMA_RUN);
299
300         if (ret == LZMA_STREAM_END) {
301                 ret = LZMA_OK;
302         } else {
303                 // Something went wrong, free the Index structure and restore
304                 // the input position.
305                 lzma_index_end(*i, allocator);
306                 *i = NULL;
307                 *in_pos = in_start;
308
309                 if (ret == LZMA_OK) {
310                         // The input is truncated or otherwise corrupt.
311                         // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
312                         // like lzma_vli_decode() does in single-call mode.
313                         ret = LZMA_DATA_ERROR;
314
315                 } else if (ret == LZMA_MEMLIMIT_ERROR) {
316                         // Tell the caller how much memory would have
317                         // been needed.
318                         *memlimit = lzma_index_memusage(coder.count);
319                 }
320         }
321
322         return ret;
323 }