src/liblzma/lz/lz_decoder.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       lz_decoder.h
   4 /// \brief      LZ out window
   5 //
   6 //  Copyright (C) 1999-2006 Igor Pavlov
   7 //  Copyright (C) 2007 Lasse Collin
   8 //
   9 //  This library is free software; you can redistribute it and/or
  10 //  modify it under the terms of the GNU Lesser General Public
  11 //  License as published by the Free Software Foundation; either
  12 //  version 2.1 of the License, or (at your option) any later version.
  13 //
  14 //  This library is distributed in the hope that it will be useful,
  15 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 //  Lesser General Public License for more details.
  18 //
  19 ///////////////////////////////////////////////////////////////////////////////
  20
  21 #ifndef LZMA_LZ_OUT_H
  22 #define LZMA_LZ_OUT_H
  23
  24 #include "common.h"
  25
  26
  27 /// Get a byte from the history buffer.
  28 #define lz_get_byte(lz, distance) \
  29         ((distance) < (lz).pos \
  30                 ? (lz).dict[(lz).pos - (distance) - 1] \
  31                 : (lz).dict[(lz).pos - (distance) - 1 + (lz).end])
  32
  33
  34 #define LZMA_LZ_DECODER_INIT \
  35         (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }
  36
  37
  38 typedef struct {
  39         /// Function to do the actual decoding (LZMA or Inflate)
  40         bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in,
  41                         size_t *restrict in_pos, size_t size_in,
  42                         bool has_safe_buffer);
  43
  44         /// Pointer to dictionary (history) buffer.
  45         /// \note Not 'restrict' because can alias next_out.
  46         uint8_t *dict;
  47
  48         /// Next write goes to dict[pos].
  49         size_t pos;
  50
  51         /// Next byte to flush is buffer[start].
  52         size_t start;
  53
  54         /// First byte to not flush is buffer[end].
  55         size_t end;
  56
  57         /// First position to which data must not be written.
  58         size_t limit;
  59
  60         /// True if dictionary has needed wrapping.
  61         bool is_full;
  62
  63         /// True if process() has detected End of Payload Marker.
  64         bool eopm_detected;
  65
  66         /// True if the next coder in the chain has returned LZMA_STREAM_END.
  67         bool next_finished;
  68
  69         /// True if the LZ decoder (e.g. LZMA) has detected End of Payload
  70         /// Marker. This may become true before next_finished becomes true.
  71         bool this_finished;
  72
  73         /// When pos >= must_flush_pos, we must not call process().
  74         size_t must_flush_pos;
  75
  76         /// Maximum number of bytes that a single decoding loop inside
  77         /// process() can produce data into dict. This amount is kept
  78         /// always available at dict + pos i.e. it is safe to write a byte
  79         /// to dict[pos + match_max_len - 1].
  80         size_t match_max_len;
  81
  82         /// Number of bytes allocated to dict.
  83         size_t size;
  84
  85         /// Requested size of the dictionary. This is needed because we avoid
  86         /// using extremely tiny history buffers.
  87         size_t requested_size;
  88
  89         /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.
  90         lzma_vli uncompressed_size;
  91
  92         /// Number of bytes currently in temp[].
  93         size_t temp_size;
  94
  95         /// Temporary buffer needed when
  96         /// 1) we cannot make the input buffer completely empty; or
  97         /// 2) we are not the last filter in the chain.
  98         uint8_t temp[LZMA_BUFFER_SIZE];
  99
 100 } lzma_lz_decoder;
 101
 102
 103 /////////////////////////
 104 // Function prototypes //
 105 /////////////////////////
 106
 107 extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,
 108                 lzma_allocator *allocator, bool (*process)(
 109                         lzma_coder *restrict coder, const uint8_t *restrict in,
 110                         size_t *restrict in_pos, size_t in_size,
 111                         bool has_safe_buffer),
 112                 lzma_vli uncompressed_size,
 113                 size_t history_size, size_t match_max_len);
 114
 115 extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator,
 116                 const uint8_t *restrict in, size_t *restrict in_pos,
 117                 size_t in_size, uint8_t *restrict out,
 118                 size_t *restrict out_pos, size_t out_size,
 119                 lzma_action action);
 120
 121 /// Deallocates the history buffer if one exists.
 122 extern void lzma_lz_decoder_end(
 123                 lzma_lz_decoder *lz, lzma_allocator *allocator);
 124
 125 //////////////////////
 126 // Inline functions //
 127 //////////////////////
 128
 129 // Repeat a block of data from the history. Because memcpy() is faster
 130 // than copying byte by byte in a loop, the copying process gets split
 131 // into three cases:
 132 // 1. distance < length
 133 //    Source and target areas overlap, thus we can't use memcpy()
 134 //    (nor memmove()) safely.
 135 //    TODO: If this is common enough, it might be worth optimizing this
 136 //    more e.g. by checking if distance > sizeof(uint8_t*) and using
 137 //    memcpy in small chunks.
 138 // 2. distance < pos
 139 //    This is the easiest and the fastest case. The block being copied
 140 //    is a contiguous piece in the history buffer. The buffer offset
 141 //    doesn't need wrapping.
 142 // 3. distance >= pos
 143 //    We need to wrap the position, because otherwise we would try copying
 144 //    behind the first byte of the allocated buffer. It is possible that
 145 //    the block is fragmeneted into two pieces, thus we might need to call
 146 //    memcpy() twice.
 147 // NOTE: The function using this macro must ensure that length is positive
 148 // and that distance is FIXME
 149 static inline bool
 150 lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)
 151 {
 152         // Validate offset of the block to be repeated. It doesn't
 153         // make sense to copy data behind the beginning of the stream.
 154         // Leaving this check away would lead to a security problem,
 155         // in which e.g. the data of the previously decoded file(s)
 156         // would be leaked (or whatever happens to be in unused
 157         // part of the dictionary buffer).
 158         if (distance >= lz->pos && !lz->is_full)
 159                 return false;
 160
 161         // It also doesn't make sense to copy data farer than
 162         // the dictionary size.
 163         if (distance >= lz->requested_size)
 164                 return false;
 165
 166         // The caller must have checked these!
 167         assert(distance <= lz->size);
 168         assert(length > 0);
 169         assert(length <= lz->match_max_len);
 170
 171         // Copy the amount of data requested by the decoder.
 172         if (distance < length) {
 173                 // Source and target areas overlap, thus we can't use
 174                 // memcpy() nor even memmove() safely. :-(
 175                 // TODO: Copying byte by byte is slow. It might be
 176                 // worth optimizing this more if this case is common.
 177                 do {
 178                         lz->dict[lz->pos] = lz_get_byte(*lz, distance);
 179                         ++lz->pos;
 180                 } while (--length > 0);
 181
 182         } else if (distance < lz->pos) {
 183                 // The easiest and fastest case
 184                 memcpy(lz->dict + lz->pos,
 185                                 lz->dict + lz->pos - distance - 1,
 186                                 length);
 187                 lz->pos += length;
 188
 189         } else {
 190                 // The bigger the dictionary, the more rare this
 191                 // case occurs. We need to "wrap" the dict, thus
 192                 // we might need two memcpy() to copy all the data.
 193                 assert(lz->is_full);
 194                 const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;
 195                 uint32_t copy_size = lz->end - copy_pos;
 196
 197                 if (copy_size < length) {
 198                         memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
 199                                         copy_size);
 200                         lz->pos += copy_size;
 201                         copy_size = length - copy_size;
 202                         memcpy(lz->dict + lz->pos, lz->dict, copy_size);
 203                         lz->pos += copy_size;
 204                 } else {
 205                         memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
 206                                         length);
 207                         lz->pos += length;
 208                 }
 209         }
 210
 211         return true;
 212 }
 213
 214 #endif