src/liblzma/lz/lz_decoder.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       lz_decoder.h
   4 /// \brief      LZ out window
   5 //
   6 //  Copyright (C) 1999-2006 Igor Pavlov
   7 //  Copyright (C) 2007 Lasse Collin
   8 //
   9 //  This library is free software; you can redistribute it and/or
  10 //  modify it under the terms of the GNU Lesser General Public
  11 //  License as published by the Free Software Foundation; either
  12 //  version 2.1 of the License, or (at your option) any later version.
  13 //
  14 //  This library is distributed in the hope that it will be useful,
  15 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 //  Lesser General Public License for more details.
  18 //
  19 ///////////////////////////////////////////////////////////////////////////////
  20
  21 #ifndef LZMA_LZ_DECODER_H
  22 #define LZMA_LZ_DECODER_H
  23
  24 #include "common.h"
  25
  26
  27 typedef struct {
  28         /// Pointer to the dictionary buffer. It can be an allocated buffer
  29         /// internal to liblzma, or it can a be a buffer given by the
  30         /// application when in single-call mode (not implemented yet).
  31         uint8_t *buf;
  32
  33         /// Write position in dictionary. The next byte will be written to
  34         /// buf[pos].
  35         size_t pos;
  36
  37         /// Indicates how full the dictionary is. This is used by
  38         /// dict_is_distance_valid() to detect corrupt files that would
  39         /// read beyond the beginning of the dictionary.
  40         size_t full;
  41
  42         /// Write limit
  43         size_t limit;
  44
  45         /// Size of the dictionary
  46         size_t size;
  47
  48         /// True when dictionary should be reset before decoding more data.
  49         bool need_reset;
  50
  51 } lzma_dict;
  52
  53
  54 typedef struct {
  55         size_t dict_size;
  56         const uint8_t *preset_dict;
  57         size_t preset_dict_size;
  58 } lzma_lz_options;
  59
  60
  61 typedef struct {
  62         /// Data specific to the LZ-based decoder
  63         lzma_coder *coder;
  64
  65         /// Function to decode from in[] to *dict
  66         lzma_ret (*code)(lzma_coder *restrict coder,
  67                         lzma_dict *restrict dict, const uint8_t *restrict in,
  68                         size_t *restrict in_pos, size_t in_size);
  69
  70         void (*reset)(lzma_coder *coder, const void *options);
  71
  72         /// Set the uncompressed size
  73         void (*set_uncompressed)(lzma_coder *coder,
  74                         lzma_vli uncompressed_size);
  75
  76         /// Free allocated resources
  77         void (*end)(lzma_coder *coder, lzma_allocator *allocator);
  78
  79 } lzma_lz_decoder;
  80
  81
  82 #define LZMA_LZ_DECODER_INIT \
  83         (lzma_lz_decoder){ \
  84                 .coder = NULL, \
  85                 .code = NULL, \
  86                 .reset = NULL, \
  87                 .set_uncompressed = NULL, \
  88                 .end = NULL, \
  89         }
  90
  91
  92 extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
  93                 lzma_allocator *allocator, const lzma_filter_info *filters,
  94                 lzma_ret (*lz_init)(lzma_lz_decoder *lz,
  95                         lzma_allocator *allocator, const void *options,
  96                         lzma_lz_options *lz_options));
  97
  98 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
  99
 100 extern void lzma_lz_decoder_uncompressed(
 101                 lzma_coder *coder, lzma_vli uncompressed_size);
 102
 103
 104 //////////////////////
 105 // Inline functions //
 106 //////////////////////
 107
 108 /// Get a byte from the history buffer.
 109 static inline uint8_t
 110 dict_get(const lzma_dict *const dict, const uint32_t distance)
 111 {
 112         return dict->buf[dict->pos - distance - 1
 113                         + (distance < dict->pos ? 0 : dict->size)];
 114 }
 115
 116
 117 /// Test if dictionary is empty.
 118 static inline bool
 119 dict_is_empty(const lzma_dict *const dict)
 120 {
 121         return dict->full == 0;
 122 }
 123
 124
 125 /// Validate the match distance
 126 static inline bool
 127 dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
 128 {
 129         return dict->full >= distance;
 130 }
 131
 132
 133 /// Repeat *len bytes at distance.
 134 static inline bool
 135 dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
 136 {
 137         // Don't write past the end of the dictionary.
 138         const size_t dict_avail = dict->limit - dict->pos;
 139         uint32_t left = MIN(dict_avail, *len);
 140         *len -= left;
 141
 142         // Repeat a block of data from the history. Because memcpy() is faster
 143         // than copying byte by byte in a loop, the copying process gets split
 144         // into three cases.
 145         if (distance < left) {
 146                 // Source and target areas overlap, thus we can't use
 147                 // memcpy() nor even memmove() safely.
 148                 do {
 149                         dict->buf[dict->pos] = dict_get(dict, distance);
 150                         ++dict->pos;
 151                 } while (--left > 0);
 152
 153         } else if (distance < dict->pos) {
 154                 // The easiest and fastest case
 155                 memcpy(dict->buf + dict->pos,
 156                                 dict->buf + dict->pos - distance - 1,
 157                                 left);
 158                 dict->pos += left;
 159
 160         } else {
 161                 // The bigger the dictionary, the more rare this
 162                 // case occurs. We need to "wrap" the dict, thus
 163                 // we might need two memcpy() to copy all the data.
 164                 assert(dict->full == dict->size);
 165                 const uint32_t copy_pos
 166                                 = dict->pos - distance - 1 + dict->size;
 167                 uint32_t copy_size = dict->size - copy_pos;
 168
 169                 if (copy_size < left) {
 170                         memmove(dict->buf + dict->pos, dict->buf + copy_pos,
 171                                         copy_size);
 172                         dict->pos += copy_size;
 173                         copy_size = left - copy_size;
 174                         memcpy(dict->buf + dict->pos, dict->buf, copy_size);
 175                         dict->pos += copy_size;
 176                 } else {
 177                         memmove(dict->buf + dict->pos, dict->buf + copy_pos,
 178                                         left);
 179                         dict->pos += left;
 180                 }
 181         }
 182
 183         // Update how full the dictionary is.
 184         if (dict->full < dict->pos)
 185                 dict->full = dict->pos;
 186
 187         return unlikely(*len != 0);
 188 }
 189
 190
 191 /// Puts one byte into the dictionary. Returns true if the dictionary was
 192 /// already full and the byte couldn't be added.
 193 static inline bool
 194 dict_put(lzma_dict *dict, uint8_t byte)
 195 {
 196         if (unlikely(dict->pos == dict->limit))
 197                 return true;
 198
 199         dict->buf[dict->pos++] = byte;
 200
 201         if (dict->pos > dict->full)
 202                 dict->full = dict->pos;
 203
 204         return false;
 205 }
 206
 207
 208 /// Copies arbitrary amount of data into the dictionary.
 209 static inline void
 210 dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
 211                 size_t *restrict in_pos, size_t in_size,
 212                 size_t *restrict left)
 213 {
 214         // NOTE: If we are being given more data than the size of the
 215         // dictionary, it could be possible to optimize the LZ decoder
 216         // so that not everything needs to go through the dictionary.
 217         // This shouldn't be very common thing in practice though, and
 218         // the slowdown of one extra memcpy() isn't bad compared to how
 219         // much time it would have taken if the data were compressed.
 220
 221         if (in_size - *in_pos > *left)
 222                 in_size = *in_pos + *left;
 223
 224         *left -= lzma_bufcpy(in, in_pos, in_size,
 225                         dict->buf, &dict->pos, dict->limit);
 226
 227         if (dict->pos > dict->full)
 228                 dict->full = dict->pos;
 229
 230         return;
 231 }
 232
 233
 234 static inline void
 235 dict_reset(lzma_dict *dict)
 236 {
 237         dict->need_reset = true;
 238         return;
 239 }
 240
 241 #endif