src/liblzma/lz/lz_decoder.c

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       lz_decoder.c
   4 /// \brief      LZ out window
   5 //
   6 //  Copyright (C) 1999-2006 Igor Pavlov
   7 //  Copyright (C) 2007 Lasse Collin
   8 //
   9 //  This library is free software; you can redistribute it and/or
  10 //  modify it under the terms of the GNU Lesser General Public
  11 //  License as published by the Free Software Foundation; either
  12 //  version 2.1 of the License, or (at your option) any later version.
  13 //
  14 //  This library is distributed in the hope that it will be useful,
  15 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 //  Lesser General Public License for more details.
  18 //
  19 ///////////////////////////////////////////////////////////////////////////////
  20
  21 // liblzma supports multiple LZ77-based filters. The LZ part is shared
  22 // between these filters. The LZ code takes care of dictionary handling
  23 // and passing the data between filters in the chain. The filter-specific
  24 // part decodes from the input buffer to the dictionary.
  25
  26
  27 #include "lz_decoder.h"
  28
  29
  30 struct lzma_coder_s {
  31         /// Dictionary (history buffer)
  32         lzma_dict dict;
  33
  34         /// The actual LZ-based decoder e.g. LZMA
  35         lzma_lz_decoder lz;
  36
  37         /// Next filter in the chain, if any. Note that LZMA and LZMA2 are
  38         /// only allowed as the last filter, but the long-range filter in
  39         /// future can be in the middle of the chain.
  40         lzma_next_coder next;
  41
  42         /// True if the next filter in the chain has returned LZMA_STREAM_END.
  43         bool next_finished;
  44
  45         /// True if the LZ decoder (e.g. LZMA) has detected end of payload
  46         /// marker. This may become true before next_finished becomes true.
  47         bool this_finished;
  48
  49         /// Temporary buffer needed when the LZ-based filter is not the last
  50         /// filter in the chain. The output of the next filter is first
  51         /// decoded into buffer[], which is then used as input for the actual
  52         /// LZ-based decoder.
  53         struct {
  54                 size_t pos;
  55                 size_t size;
  56                 uint8_t buffer[LZMA_BUFFER_SIZE];
  57         } temp;
  58 };
  59
  60
  61 static lzma_ret
  62 decode_buffer(lzma_coder *coder,
  63                 const uint8_t *restrict in, size_t *restrict in_pos,
  64                 size_t in_size, uint8_t *restrict out,
  65                 size_t *restrict out_pos, size_t out_size)
  66 {
  67         while (true) {
  68                 // Wrap the dictionary if needed.
  69                 if (coder->dict.pos == coder->dict.size)
  70                         coder->dict.pos = 0;
  71
  72                 // Store the current dictionary position. It is needed to know
  73                 // where to start copying to the out[] buffer.
  74                 const size_t dict_start = coder->dict.pos;
  75
  76                 // Calculate how much we allow the process() function to
  77                 // decode. It must not decode past the end of the dictionary
  78                 // buffer, and we don't want it to decode more than is
  79                 // actually needed to fill the out[] buffer.
  80                 coder->dict.limit = coder->dict.pos + MIN(out_size - *out_pos,
  81                                 coder->dict.size - coder->dict.pos);
  82
  83                 // Call the process() function to do the actual decoding.
  84                 const lzma_ret ret = coder->lz.code(
  85                                 coder->lz.coder, &coder->dict,
  86                                 in, in_pos, in_size);
  87
  88                 // Copy the decoded data from the dictionary to the out[]
  89                 // buffer.
  90                 const size_t copy_size = coder->dict.pos - dict_start;
  91                 assert(copy_size <= out_size - *out_pos);
  92                 memcpy(out + *out_pos, coder->dict.buf + dict_start,
  93                                 copy_size);
  94                 *out_pos += copy_size;
  95
  96                 // Return if everything got decoded or an error occurred, or
  97                 // if there's no more data to decode.
  98                 if (ret != LZMA_OK || *out_pos == out_size
  99                                 || coder->dict.pos < coder->dict.size)
 100                         return ret;
 101         }
 102 }
 103
 104
 105 static lzma_ret
 106 lz_decode(lzma_coder *coder,
 107                 lzma_allocator *allocator lzma_attribute((unused)),
 108                 const uint8_t *restrict in, size_t *restrict in_pos,
 109                 size_t in_size, uint8_t *restrict out,
 110                 size_t *restrict out_pos, size_t out_size,
 111                 lzma_action action)
 112 {
 113         if (coder->next.code == NULL)
 114                 return decode_buffer(coder, in, in_pos, in_size,
 115                                 out, out_pos, out_size);
 116
 117         // We aren't the last coder in the chain, we need to decode
 118         // our input to a temporary buffer.
 119         while (*out_pos < out_size) {
 120                 // Fill the temporary buffer if it is empty.
 121                 if (!coder->next_finished
 122                                 && coder->temp.pos == coder->temp.size) {
 123                         coder->temp.pos = 0;
 124                         coder->temp.size = 0;
 125
 126                         const lzma_ret ret = coder->next.code(
 127                                         coder->next.coder,
 128                                         allocator, in, in_pos, in_size,
 129                                         coder->temp.buffer, &coder->temp.size,
 130                                         LZMA_BUFFER_SIZE, action);
 131
 132                         if (ret == LZMA_STREAM_END)
 133                                 coder->next_finished = true;
 134                         else if (ret != LZMA_OK || coder->temp.size == 0)
 135                                 return ret;
 136                 }
 137
 138                 if (coder->this_finished) {
 139                         if (coder->temp.size != 0)
 140                                 return LZMA_DATA_ERROR;
 141
 142                         if (coder->next_finished)
 143                                 return LZMA_STREAM_END;
 144
 145                         return LZMA_OK;
 146                 }
 147
 148                 const lzma_ret ret = decode_buffer(coder, coder->temp.buffer,
 149                                 &coder->temp.pos, coder->temp.size,
 150                                 out, out_pos, out_size);
 151
 152                 if (ret == LZMA_STREAM_END)
 153                         coder->this_finished = true;
 154                 else if (ret != LZMA_OK)
 155                         return ret;
 156                 else if (coder->next_finished && *out_pos < out_size)
 157                         return LZMA_DATA_ERROR;
 158         }
 159
 160         return LZMA_OK;
 161 }
 162
 163
 164 static void
 165 lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
 166 {
 167         lzma_next_end(&coder->next, allocator);
 168         lzma_free(coder->dict.buf, allocator);
 169
 170         if (coder->lz.end != NULL)
 171                 coder->lz.end(coder->lz.coder, allocator);
 172         else
 173                 lzma_free(coder->lz.coder, allocator);
 174
 175         lzma_free(coder, allocator);
 176         return;
 177 }
 178
 179
 180 extern lzma_ret
 181 lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 182                 const lzma_filter_info *filters,
 183                 lzma_ret (*lz_init)(lzma_lz_decoder *lz,
 184                         lzma_allocator *allocator, const void *options,
 185                         size_t *dict_size))
 186 {
 187         // Allocate the base structure if it isn't already allocated.
 188         if (next->coder == NULL) {
 189                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
 190                 if (next->coder == NULL)
 191                         return LZMA_MEM_ERROR;
 192
 193                 next->code = &lz_decode;
 194                 next->end = &lz_decoder_end;
 195
 196                 next->coder->dict.buf = NULL;
 197                 next->coder->dict.size = 0;
 198                 next->coder->lz = LZMA_LZ_DECODER_INIT;
 199                 next->coder->next = LZMA_NEXT_CODER_INIT;
 200         }
 201
 202         // Allocate and initialize the LZ-based decoder. It will also give
 203         // us the dictionary size.
 204         size_t dict_size;
 205         return_if_error(lz_init(&next->coder->lz, allocator,
 206                         filters[0].options, &dict_size));
 207
 208         // If the dictionary size is very small, increase it to 4096 bytes.
 209         // This is to prevent constant wrapping of the dictionary, which
 210         // would slow things down. The downside is that since we don't check
 211         // separately for the real dictionary size, we may happily accept
 212         // corrupt files.
 213         if (dict_size < 4096)
 214                 dict_size = 4096;
 215
 216         // Make dictionary size a multipe of 16. Some LZ-based decoders like
 217         // LZMA use the lowest bits lzma_dict.pos to know the alignment of the
 218         // data. Aligned buffer is also good when memcpying from the
 219         // dictionary to the output buffer, since applications are
 220         // recommended to give aligned buffers to liblzma.
 221         //
 222         // Avoid integer overflow. FIXME Should the return value be
 223         // LZMA_HEADER_ERROR or LZMA_MEM_ERROR?
 224         if (dict_size > SIZE_MAX - 15)
 225                 return LZMA_MEM_ERROR;
 226
 227         dict_size = (dict_size + 15) & (SIZE_MAX - 15);
 228
 229         // Allocate and initialize the dictionary.
 230         if (next->coder->dict.size != dict_size) {
 231                 lzma_free(next->coder->dict.buf, allocator);
 232                 next->coder->dict.buf = lzma_alloc(dict_size, allocator);
 233                 if (next->coder->dict.buf == NULL)
 234                         return LZMA_MEM_ERROR;
 235
 236                 next->coder->dict.size = dict_size;
 237         }
 238
 239         dict_reset(&next->coder->dict);
 240
 241         // Miscellaneous initializations
 242         next->coder->next_finished = false;
 243         next->coder->this_finished = false;
 244         next->coder->temp.pos = 0;
 245         next->coder->temp.size = 0;
 246
 247         // Initialize the next filter in the chain, if any.
 248         return lzma_next_filter_init(&next->coder->next, allocator,
 249                         filters + 1);
 250 }
 251
 252
 253 extern uint64_t
 254 lzma_lz_decoder_memusage(size_t dictionary_size)
 255 {
 256         return sizeof(lzma_coder) + (uint64_t)(dictionary_size);
 257 }
 258
 259
 260 extern void
 261 lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
 262 {
 263         coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size);
 264 }