]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/simple/simple_coder.c
Do uncompressed size validation in raw encoder. This way
[icculus/xz.git] / src / liblzma / simple / simple_coder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       simple_coder.c
4 /// \brief      Wrapper for simple filters
5 ///
6 /// Simple filters don't change the size of the data i.e. number of bytes
7 /// in equals the number of bytes out.
8 //
9 //  Copyright (C) 2007 Lasse Collin
10 //
11 //  This library is free software; you can redistribute it and/or
12 //  modify it under the terms of the GNU Lesser General Public
13 //  License as published by the Free Software Foundation; either
14 //  version 2.1 of the License, or (at your option) any later version.
15 //
16 //  This library is distributed in the hope that it will be useful,
17 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 //  Lesser General Public License for more details.
20 //
21 ///////////////////////////////////////////////////////////////////////////////
22
23 #include "simple_private.h"
24
25
26 /// Copied or encodes/decodes more data to out[]. Checks and updates
27 /// uncompressed_size when we are the last coder in the chain.
28 /// If we aren't the last filter in the chain, we don't need to care about
29 /// uncompressed size, since we don't change it; the next filter in the
30 /// chain will check it anyway.
31 static lzma_ret
32 copy_or_code(lzma_coder *coder, lzma_allocator *allocator,
33                 const uint8_t *restrict in, size_t *restrict in_pos,
34                 size_t in_size, uint8_t *restrict out,
35                 size_t *restrict out_pos, size_t out_size, lzma_action action)
36 {
37         assert(!coder->end_was_reached);
38
39         if (coder->next.code == NULL) {
40                 const size_t in_avail = in_size - *in_pos;
41
42                 if (!coder->is_encoder) {
43                         // Limit in_size so that we don't copy too much.
44                         if ((lzma_vli)(in_avail) > coder->uncompressed_size)
45                                 in_size = *in_pos + (size_t)(
46                                                 coder->uncompressed_size);
47                 }
48
49                 const size_t out_start = *out_pos;
50                 bufcpy(in, in_pos, in_size, out, out_pos, out_size);
51
52                 // Check if end of stream was reached.
53                 if (coder->is_encoder) {
54                         if (action == LZMA_FINISH && *in_pos == in_size)
55                                 coder->end_was_reached = true;
56                 } else if (coder->uncompressed_size
57                                 != LZMA_VLI_VALUE_UNKNOWN) {
58                         coder->uncompressed_size -= *out_pos - out_start;
59                         if (coder->uncompressed_size == 0)
60                                 coder->end_was_reached = true;
61                 }
62
63         } else {
64                 // Call the next coder in the chain to provide us some data.
65                 // We don't care about uncompressed_size here, because
66                 // the next filter in the chain will do it for us (since
67                 // we don't change the size of the data).
68                 const lzma_ret ret = coder->next.code(
69                                 coder->next.coder, allocator,
70                                 in, in_pos, in_size,
71                                 out, out_pos, out_size, action);
72
73                 if (ret == LZMA_STREAM_END) {
74                         assert(!coder->is_encoder
75                                         || action == LZMA_FINISH);
76                         coder->end_was_reached = true;
77
78                 } else if (ret != LZMA_OK) {
79                         return ret;
80                 }
81         }
82
83         return LZMA_OK;
84 }
85
86
87 static size_t
88 call_filter(lzma_coder *coder, uint8_t *buffer, size_t size)
89 {
90         const size_t filtered = coder->filter(coder->simple,
91                         coder->now_pos, coder->is_encoder,
92                         buffer, size);
93         coder->now_pos += filtered;
94         return filtered;
95 }
96
97
98 static lzma_ret
99 simple_code(lzma_coder *coder, lzma_allocator *allocator,
100                 const uint8_t *restrict in, size_t *restrict in_pos,
101                 size_t in_size, uint8_t *restrict out,
102                 size_t *restrict out_pos, size_t out_size, lzma_action action)
103 {
104         // TODO: Add partial support for LZMA_SYNC_FLUSH. We can support it
105         // in cases when the filter is able to filter everything. With most
106         // simple filters it can be done at offset that is a multiple of 2,
107         // 4, or 16. With x86 filter, it needs good luck, and thus cannot
108         // be made to work predictably.
109         if (action == LZMA_SYNC_FLUSH)
110                 return LZMA_HEADER_ERROR;
111
112         // Flush already filtered data from coder->buffer[] to out[].
113         if (coder->pos < coder->filtered) {
114                 bufcpy(coder->buffer, &coder->pos, coder->filtered,
115                                 out, out_pos, out_size);
116
117                 // If we couldn't flush all the filtered data, return to
118                 // application immediatelly.
119                 if (coder->pos < coder->filtered)
120                         return LZMA_OK;
121
122                 if (coder->end_was_reached) {
123                         assert(coder->filtered == coder->size);
124                         return LZMA_STREAM_END;
125                 }
126         }
127
128         // If we get here, there is no filtered data left in the buffer.
129         coder->filtered = 0;
130
131         assert(!coder->end_was_reached);
132
133         // If there is more output space left than there is unfiltered data
134         // in coder->buffer[], flush coder->buffer[] to out[], and copy/code
135         // more data to out[] hopefully filling it completely. Then filter
136         // the data in out[]. This step is where most of the data gets
137         // filtered if the buffer sizes used by the application are reasonable.
138         const size_t out_avail = out_size - *out_pos;
139         const size_t buf_avail = coder->size - coder->pos;
140         if (out_avail > buf_avail) {
141                 // Store the old position so that we know from which byte
142                 // to start filtering.
143                 const size_t out_start = *out_pos;
144
145                 // Flush data from coder->buffer[] to out[], but don't reset
146                 // coder->pos and coder->size yet. This way the coder can be
147                 // restarted if the next filter in the chain returns e.g.
148                 // LZMA_MEM_ERROR.
149                 memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail);
150                 *out_pos += buf_avail;
151
152                 // Copy/Encode/Decode more data to out[].
153                 {
154                         const lzma_ret ret = copy_or_code(coder, allocator,
155                                         in, in_pos, in_size,
156                                         out, out_pos, out_size, action);
157                         assert(ret != LZMA_STREAM_END);
158                         if (ret != LZMA_OK)
159                                 return ret;
160                 }
161
162                 // Filter out[].
163                 const size_t size = *out_pos - out_start;
164                 const size_t filtered = call_filter(
165                                 coder, out + out_start, size);
166
167                 const size_t unfiltered = size - filtered;
168                 assert(unfiltered <= coder->allocated / 2);
169
170                 // Now we can update coder->pos and coder->size, because
171                 // the next coder in the chain (if any) was successful.
172                 coder->pos = 0;
173                 coder->size = unfiltered;
174
175                 if (coder->end_was_reached) {
176                         // The last byte has been copied to out[] already.
177                         // They are left as is.
178                         coder->size = 0;
179
180                 } else if (unfiltered > 0) {
181                         // There is unfiltered data left in out[]. Copy it to
182                         // coder->buffer[] and rewind *out_pos appropriately.
183                         *out_pos -= unfiltered;
184                         memcpy(coder->buffer, out + *out_pos, unfiltered);
185                 }
186         } else if (coder->pos > 0) {
187                 memmove(coder->buffer, coder->buffer + coder->pos, buf_avail);
188                 coder->size -= coder->pos;
189                 coder->pos = 0;
190         }
191
192         assert(coder->pos == 0);
193
194         // If coder->buffer[] isn't empty, try to fill it by copying/decoding
195         // more data. Then filter coder->buffer[] and copy the successfully
196         // filtered data to out[]. It is probable, that some filtered and
197         // unfiltered data will be left to coder->buffer[].
198         if (coder->size > 0) {
199                 {
200                         const lzma_ret ret = copy_or_code(coder, allocator,
201                                         in, in_pos, in_size,
202                                         coder->buffer, &coder->size,
203                                         coder->allocated, action);
204                         assert(ret != LZMA_STREAM_END);
205                         if (ret != LZMA_OK)
206                                 return ret;
207                 }
208
209                 coder->filtered = call_filter(
210                                 coder, coder->buffer, coder->size);
211
212                 // Everything is considered to be filtered if coder->buffer[]
213                 // contains the last bytes of the data.
214                 if (coder->end_was_reached)
215                         coder->filtered = coder->size;
216
217                 // Flush as much as possible.
218                 bufcpy(coder->buffer, &coder->pos, coder->filtered,
219                                 out, out_pos, out_size);
220         }
221
222         // Check if we got everything done.
223         if (coder->end_was_reached && coder->pos == coder->size)
224                 return LZMA_STREAM_END;
225
226         return LZMA_OK;
227 }
228
229
230 static void
231 simple_coder_end(lzma_coder *coder, lzma_allocator *allocator)
232 {
233         lzma_next_coder_end(&coder->next, allocator);
234         lzma_free(coder->simple, allocator);
235         lzma_free(coder, allocator);
236         return;
237 }
238
239
240 extern lzma_ret
241 lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
242                 const lzma_filter_info *filters,
243                 size_t (*filter)(lzma_simple *simple, uint32_t now_pos,
244                         bool is_encoder, uint8_t *buffer, size_t size),
245                 size_t simple_size, size_t unfiltered_max, bool is_encoder)
246 {
247         // Allocate memory for the lzma_coder structure if needed.
248         if (next->coder == NULL) {
249                 // Here we allocate space also for the temporary buffer. We
250                 // need twice the size of unfiltered_max, because then it
251                 // is always possible to filter at least unfiltered_max bytes
252                 // more data in coder->buffer[] if it can be filled completely.
253                 next->coder = lzma_alloc(sizeof(lzma_coder)
254                                 + 2 * unfiltered_max, allocator);
255                 if (next->coder == NULL)
256                         return LZMA_MEM_ERROR;
257
258                 next->code = &simple_code;
259                 next->end = &simple_coder_end;
260
261                 next->coder->next = LZMA_NEXT_CODER_INIT;
262                 next->coder->filter = filter;
263                 next->coder->allocated = 2 * unfiltered_max;
264
265                 // Allocate memory for filter-specific data structure.
266                 if (simple_size > 0) {
267                         next->coder->simple = lzma_alloc(
268                                         simple_size, allocator);
269                         if (next->coder->simple == NULL)
270                                 return LZMA_MEM_ERROR;
271                 } else {
272                         next->coder->simple = NULL;
273                 }
274         }
275
276         if (filters[0].options != NULL) {
277                 const lzma_options_simple *simple = filters[0].options;
278                 next->coder->now_pos = simple->start_offset;
279         } else {
280                 next->coder->now_pos = 0;
281         }
282
283         // Reset variables.
284         next->coder->is_encoder = is_encoder;
285         next->coder->end_was_reached = false;
286         next->coder->uncompressed_size = filters[0].uncompressed_size;
287         next->coder->pos = 0;
288         next->coder->filtered = 0;
289         next->coder->size = 0;
290
291         return lzma_next_filter_init(
292                         &next->coder->next, allocator, filters + 1);
293 }