1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file lzma2_encoder.c
4 /// \brief LZMA2 encoder
6 // Copyright (C) 1999-2008 Igor Pavlov
7 // Copyright (C) 2008 Lasse Collin
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 ///////////////////////////////////////////////////////////////////////////////
21 #include "lz_encoder.h"
22 #include "lzma_encoder.h"
24 #include "lzma2_encoder.h"
32 SEQ_UNCOMPRESSED_HEADER,
33 SEQ_UNCOMPRESSED_COPY,
39 /// If this is not NULL, we will check new options from this
40 /// structure when starting a new chunk.
41 const lzma_options_lzma *opt_new;
43 /// LZMA options currently in use.
44 lzma_options_lzma opt_cur;
47 bool need_state_reset;
48 bool need_dictionary_reset;
50 /// Uncompressed size of a chunk
51 size_t uncompressed_size;
53 /// Compressed size of a chunk (excluding headers); this is also used
54 /// to indicate the end of buf[] in SEQ_LZMA_COPY.
55 size_t compressed_size;
57 /// Read position in buf[]
60 /// Buffer to hold the chunk header and LZMA compressed data
61 uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX];
66 lzma2_header_lzma(lzma_coder *coder)
68 assert(coder->uncompressed_size > 0);
69 assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
70 assert(coder->compressed_size > 0);
71 assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
75 if (coder->need_properties) {
78 if (coder->need_dictionary_reset)
79 coder->buf[pos] = 0x80 + (3 << 5);
81 coder->buf[pos] = 0x80 + (2 << 5);
85 if (coder->need_state_reset)
86 coder->buf[pos] = 0x80 + (1 << 5);
88 coder->buf[pos] = 0x80;
91 // Set the start position for copying.
95 size_t size = coder->uncompressed_size - 1;
96 coder->buf[pos++] += size >> 16;
97 coder->buf[pos++] = (size >> 8) & 0xFF;
98 coder->buf[pos++] = size & 0xFF;
101 size = coder->compressed_size - 1;
102 coder->buf[pos++] = size >> 8;
103 coder->buf[pos++] = size & 0xFF;
105 // Properties, if needed
106 if (coder->need_properties)
107 lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos);
109 coder->need_properties = false;
110 coder->need_state_reset = false;
111 coder->need_dictionary_reset = false;
113 // The copying code uses coder->compressed_size to indicate the end
114 // of coder->buf[], so we need add the maximum size of the header here.
115 coder->compressed_size += LZMA2_HEADER_MAX;
122 lzma2_header_uncompressed(lzma_coder *coder)
124 assert(coder->uncompressed_size > 0);
125 assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX);
127 // If this is the first chunk, we need to include dictionary
129 if (coder->need_dictionary_reset)
134 coder->need_dictionary_reset = false;
137 coder->buf[1] = (coder->uncompressed_size - 1) >> 8;
138 coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF;
140 // Set the start position for copying.
147 lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
148 uint8_t *restrict out, size_t *restrict out_pos,
151 while (*out_pos < out_size)
152 switch (coder->sequence) {
154 // If there's no input left and we are flushing or finishing,
155 // don't start a new chunk.
156 if (mf_unencoded(mf) == 0) {
157 // Write end of payload marker if finishing.
158 if (mf->action == LZMA_FINISH)
159 out[(*out_pos)++] = 0;
161 return mf->action == LZMA_RUN
162 ? LZMA_OK : LZMA_STREAM_END;
165 // Look if there are new options. At least for now,
166 // only lc/lp/pb can be changed.
167 if (coder->opt_new != NULL
168 && (coder->opt_cur.lc != coder->opt_new->lc
169 || coder->opt_cur.lp != coder->opt_new->lp
170 || coder->opt_cur.pb != coder->opt_new->pb)) {
171 // Options have been changed, copy them to opt_cur.
172 // These get validated as part of
173 // lzma_lzma_encoder_reset() below.
174 coder->opt_cur.lc = coder->opt_new->lc;
175 coder->opt_cur.lp = coder->opt_new->lp;
176 coder->opt_cur.pb = coder->opt_new->pb;
178 // We need to write the new options and reset
179 // the encoder state.
180 coder->need_properties = true;
181 coder->need_state_reset = true;
184 if (coder->need_state_reset)
185 return_if_error(lzma_lzma_encoder_reset(
186 coder->lzma, &coder->opt_cur));
188 coder->uncompressed_size = 0;
189 coder->compressed_size = 0;
190 coder->sequence = SEQ_LZMA_ENCODE;
194 case SEQ_LZMA_ENCODE: {
195 // Calculate how much more uncompressed data this chunk
197 const uint32_t left = LZMA2_UNCOMPRESSED_MAX
198 - coder->uncompressed_size;
201 if (left < mf->match_len_max) {
202 // Must flush immediatelly since the next LZMA symbol
203 // could make the uncompressed size of the chunk too
207 // Calculate maximum read_limit that is OK from point
208 // of view of LZMA2 chunk size.
209 limit = mf->read_pos - mf->read_ahead
210 + left - mf->match_len_max;
213 // Save the start position so that we can update
214 // coder->uncompressed_size.
215 const uint32_t read_start = mf->read_pos - mf->read_ahead;
217 // Call the LZMA encoder until the chunk is finished.
218 const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf,
219 coder->buf + LZMA2_HEADER_MAX,
220 &coder->compressed_size,
221 LZMA2_CHUNK_MAX, limit);
223 coder->uncompressed_size += mf->read_pos - mf->read_ahead
226 assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
227 assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
229 if (ret != LZMA_STREAM_END)
232 // See if the chunk compressed. If it didn't, we encode it
233 // as uncompressed chunk. This saves a few bytes of space
234 // and makes decoding faster.
235 if (coder->compressed_size >= coder->uncompressed_size) {
236 coder->uncompressed_size += mf->read_ahead;
237 assert(coder->uncompressed_size
238 <= LZMA2_UNCOMPRESSED_MAX);
240 lzma2_header_uncompressed(coder);
241 coder->need_state_reset = true;
242 coder->sequence = SEQ_UNCOMPRESSED_HEADER;
246 // The chunk did compress at least by one byte, so we store
247 // the chunk as LZMA.
248 lzma2_header_lzma(coder);
250 coder->sequence = SEQ_LZMA_COPY;
256 // Copy the compressed chunk along its headers to the
258 lzma_bufcpy(coder->buf, &coder->buf_pos,
259 coder->compressed_size,
260 out, out_pos, out_size);
261 if (coder->buf_pos != coder->compressed_size)
264 coder->sequence = SEQ_INIT;
267 case SEQ_UNCOMPRESSED_HEADER:
268 // Copy the three-byte header to indicate uncompressed chunk.
269 lzma_bufcpy(coder->buf, &coder->buf_pos,
270 LZMA2_HEADER_UNCOMPRESSED,
271 out, out_pos, out_size);
272 if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED)
275 coder->sequence = SEQ_UNCOMPRESSED_COPY;
279 case SEQ_UNCOMPRESSED_COPY:
280 // Copy the uncompressed data as is from the dictionary
281 // to the output buffer.
282 mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size);
283 if (coder->uncompressed_size != 0)
286 coder->sequence = SEQ_INIT;
295 lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
297 lzma_free(coder->lzma, allocator);
298 lzma_free(coder, allocator);
304 lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
305 const void *options, lzma_lz_options *lz_options)
308 return LZMA_PROG_ERROR;
310 if (lz->coder == NULL) {
311 lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
312 if (lz->coder == NULL)
313 return LZMA_MEM_ERROR;
315 lz->code = &lzma2_encode;
316 lz->end = &lzma2_encoder_end;
318 lz->coder->lzma = NULL;
321 lz->coder->sequence = SEQ_INIT;
322 lz->coder->need_properties = true;
323 lz->coder->need_state_reset = false;
324 lz->coder->need_dictionary_reset = true;
326 lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
327 lz->coder->opt_new = lz->coder->opt_cur.persistent
330 // Initialize LZMA encoder
331 return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
332 &lz->coder->opt_cur, lz_options));
334 // Make sure that we will always have enough history available in
335 // case we need to use uncompressed chunks. They are used when the
336 // compressed size of a chunk is not smaller than the uncompressed
337 // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes
338 // history available.
339 if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX)
340 lz_options->before_size
341 = LZMA2_CHUNK_MAX - lz_options->dict_size;
348 lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
349 const lzma_filter_info *filters)
351 return lzma_lz_encoder_init(
352 next, allocator, filters, &lzma2_encoder_init);
357 lzma_lzma2_encoder_memusage(const void *options)
359 const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options);
360 if (lzma_mem == UINT64_MAX)
363 return sizeof(lzma_coder) + lzma_mem;
368 lzma_lzma2_props_encode(const void *options, uint8_t *out)
370 const lzma_options_lzma *const opt = options;
371 uint32_t d = MAX(opt->dict_size, LZMA_DICT_SIZE_MIN);
373 // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending
374 // on which one is the next:
382 // Get the highest two bits using the proper encoding:
386 out[0] = get_pos_slot(d + 1) - 24;