1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Template for different match finders
6 /// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
7 /// sets slighly different #defines, resulting the different match finders.
9 // Copyright (C) 1999-2006 Igor Pavlov
10 // Copyright (C) 2007 Lasse Collin
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
22 ///////////////////////////////////////////////////////////////////////////////
35 #define START_MAX_LEN 1
38 # define NUM_HASH_DIRECT_BYTES 0
39 # define HASH_2_SIZE (1 << 10)
41 # define NUM_HASH_BYTES 4
42 # define HASH_3_SIZE (1 << 16)
43 # define HASH_3_OFFSET HASH_2_SIZE
44 # define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
46 # define NUM_HASH_BYTES 3
47 # define FIX_HASH_SIZE HASH_2_SIZE
50 # define MIN_MATCH_CHECK NUM_HASH_BYTES
52 # define NUM_HASH_DIRECT_BYTES 2
53 # define NUM_HASH_BYTES 2
54 # define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
55 # define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
56 # define FIX_HASH_SIZE 0
66 # define HASH_CALC() \
68 const uint32_t temp = lzma_crc32_table[0][ \
70 hash_2_value = temp & (HASH_2_SIZE - 1); \
71 hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
72 & (HASH_3_SIZE - 1); \
73 hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
74 ^ (lzma_crc32_table[0][cur[3]] << 5)) \
78 # define HASH_CALC() \
80 const uint32_t temp = lzma_crc32_table[0][ \
82 hash_2_value = temp & (HASH_2_SIZE - 1); \
83 hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
88 # define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
92 // Moves the current read position forward by one byte. In LZMA SDK,
93 // CLZInWindow::MovePos() can read more input data if needed, because of
94 // the callback style API. In liblzma we must have ensured earlier, that
95 // there is enough data available in lz->buffer.
98 if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
99 lz->cyclic_buffer_pos = 0; \
101 assert(lz->read_pos <= lz->write_pos); \
102 if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
103 lzma_lz_encoder_normalize(lz); \
107 //////////////////////
108 // Global constants //
109 //////////////////////
111 LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
112 LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;
119 LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
122 if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
123 len_limit = lz->match_max_len;
125 assert(lz->stream_end_was_reached);
126 len_limit = lz->write_pos - lz->read_pos;
127 if (len_limit < MIN_MATCH_CHECK) {
135 const uint32_t match_min_pos
136 = lz->read_pos + lz->offset > lz->cyclic_buffer_size
137 ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
139 const uint8_t *cur = lz->buffer + lz->read_pos;
140 uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size
143 uint32_t hash_2_value;
145 uint32_t hash_3_value;
151 uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
153 uint32_t cur_match2 = lz->hash[hash_2_value];
155 uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
157 lz->hash[hash_2_value] = lz->read_pos + lz->offset;
159 if (cur_match2 > match_min_pos) {
160 if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
162 distances[offset++] = 2;
163 distances[offset++] = lz->read_pos + lz->offset
169 lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
170 if (cur_match3 > match_min_pos) {
171 if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
172 if (cur_match3 == cur_match2)
176 distances[offset++] = 3;
177 distances[offset++] = lz->read_pos + lz->offset
179 cur_match2 = cur_match3;
184 if (offset != 1 && cur_match2 == cur_match) {
186 max_len = START_MAX_LEN;
190 lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;
193 lz->son[lz->cyclic_buffer_pos] = cur_match;
195 uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
196 uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
198 uint32_t len0 = NUM_HASH_DIRECT_BYTES;
199 uint32_t len1 = NUM_HASH_DIRECT_BYTES;
202 #if NUM_HASH_DIRECT_BYTES != 0
203 if (cur_match > match_min_pos) {
204 if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
205 != cur[NUM_HASH_DIRECT_BYTES]) {
206 max_len = NUM_HASH_DIRECT_BYTES;
207 distances[offset++] = NUM_HASH_DIRECT_BYTES;
208 distances[offset++] = lz->read_pos + lz->offset
214 uint32_t count = lz->cut_value;
217 if (cur_match <= match_min_pos || count-- == 0) {
218 #ifndef IS_HASH_CHAIN
219 *ptr0 = EMPTY_HASH_VALUE;
220 *ptr1 = EMPTY_HASH_VALUE;
225 const uint32_t delta = lz->read_pos + lz->offset - cur_match;
226 const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
227 ? lz->cyclic_buffer_pos - delta
228 : lz->cyclic_buffer_pos - delta
229 + lz->cyclic_buffer_size;
230 uint32_t *pair = lz->son +
237 const uint8_t *pb = lz->buffer + cur_match - lz->offset;
240 NUM_HASH_DIRECT_BYTES;
241 if (pb[max_len] == cur[max_len])
246 if (pb[len] == cur[len]) {
247 while (++len != len_limit)
248 if (pb[len] != cur[len])
253 distances[offset++] = len;
254 distances[offset++] = delta - 1;
255 if (len == len_limit) {
256 #ifndef IS_HASH_CHAIN
268 if (pb[len] < cur[len]) {
282 distances[0] = offset - 1;
290 LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
294 if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
300 if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
301 len_limit = lz->match_max_len;
303 assert(lz->stream_end_was_reached == true);
304 len_limit = lz->write_pos - lz->read_pos;
305 if (len_limit < MIN_MATCH_CHECK) {
310 const uint32_t match_min_pos
311 = lz->read_pos + lz->offset > lz->cyclic_buffer_size
312 ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
316 const uint8_t *cur = lz->buffer + lz->read_pos;
319 uint32_t hash_2_value;
321 uint32_t hash_3_value;
324 lz->hash[HASH_3_OFFSET + hash_3_value]
325 = lz->read_pos + lz->offset;
330 lz->hash[hash_2_value] = lz->read_pos + lz->offset;
336 uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
337 lz->hash[FIX_HASH_SIZE + hash_value]
338 = lz->read_pos + lz->offset;
341 lz->son[lz->cyclic_buffer_pos] = cur_match;
343 uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
344 uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
346 uint32_t len0 = NUM_HASH_DIRECT_BYTES;
347 uint32_t len1 = NUM_HASH_DIRECT_BYTES;
348 uint32_t count = lz->cut_value;
351 if (cur_match <= match_min_pos || count-- == 0) {
352 *ptr0 = EMPTY_HASH_VALUE;
353 *ptr1 = EMPTY_HASH_VALUE;
357 const uint32_t delta = lz->read_pos
358 + lz->offset - cur_match;
359 const uint32_t cyclic_pos
360 = delta <= lz->cyclic_buffer_pos
361 ? lz->cyclic_buffer_pos - delta
362 : lz->cyclic_buffer_pos - delta
363 + lz->cyclic_buffer_size;
364 uint32_t *pair = lz->son + (cyclic_pos << 1);
366 const uint8_t *pb = lz->buffer + cur_match
368 uint32_t len = MIN(len0, len1);
370 if (pb[len] == cur[len]) {
371 while (++len != len_limit)
372 if (pb[len] != cur[len])
375 if (len == len_limit) {
382 if (pb[len] < cur[len]) {
398 } while (--num != 0);