1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Template for different match finders
6 /// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
7 /// sets slighly different #defines, resulting the different match finders.
9 // Copyright (C) 1999-2006 Igor Pavlov
10 // Copyright (C) 2007 Lasse Collin
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
22 ///////////////////////////////////////////////////////////////////////////////
35 #define START_MAX_LEN 1
38 # define NUM_HASH_DIRECT_BYTES 0
39 # define HASH_2_SIZE (1 << 10)
41 # define NUM_HASH_BYTES 4
42 # define HASH_3_SIZE (1 << 16)
43 # define HASH_3_OFFSET HASH_2_SIZE
44 # define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
46 # define NUM_HASH_BYTES 3
47 # define FIX_HASH_SIZE HASH_2_SIZE
50 # define MIN_MATCH_CHECK NUM_HASH_BYTES
52 # define NUM_HASH_DIRECT_BYTES 2
53 # define NUM_HASH_BYTES 2
54 # define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
55 # define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
56 # define FIX_HASH_SIZE 0
66 # define HASH_CALC() \
68 const uint32_t temp = lzma_crc32_table[0][ \
70 hash_2_value = temp & (HASH_2_SIZE - 1); \
71 hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
72 & (HASH_3_SIZE - 1); \
73 hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
74 ^ (lzma_crc32_table[0][cur[3]] << 5)) \
78 # define HASH_CALC() \
80 const uint32_t temp = lzma_crc32_table[0][ \
82 hash_2_value = temp & (HASH_2_SIZE - 1); \
83 hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
88 # define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
92 // Moves the current read position forward by one byte. In LZMA SDK,
93 // CLZInWindow::MovePos() can read more input data if needed, because of
94 // the callback style API. In liblzma we must have ensured earlier, that
95 // there is enough data available in lz->buffer.
98 if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
99 lz->cyclic_buffer_pos = 0; \
101 assert(lz->read_pos <= lz->write_pos); \
102 if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
103 lzma_lz_encoder_normalize(lz); \
107 //////////////////////
108 // Global constants //
109 //////////////////////
111 LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
112 LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;
119 LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
122 if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
123 len_limit = lz->match_max_len;
125 len_limit = lz->write_pos - lz->read_pos;
126 if (len_limit < MIN_MATCH_CHECK) {
134 const uint32_t match_min_pos
135 = lz->read_pos + lz->offset > lz->cyclic_buffer_size
136 ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
138 const uint8_t *cur = lz->buffer + lz->read_pos;
139 uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size
142 uint32_t hash_2_value;
144 uint32_t hash_3_value;
150 uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
152 uint32_t cur_match2 = lz->hash[hash_2_value];
154 uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
156 lz->hash[hash_2_value] = lz->read_pos + lz->offset;
158 if (cur_match2 > match_min_pos) {
159 if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
161 distances[offset++] = 2;
162 distances[offset++] = lz->read_pos + lz->offset
168 lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
169 if (cur_match3 > match_min_pos) {
170 if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
171 if (cur_match3 == cur_match2)
175 distances[offset++] = 3;
176 distances[offset++] = lz->read_pos + lz->offset
178 cur_match2 = cur_match3;
183 if (offset != 1 && cur_match2 == cur_match) {
185 max_len = START_MAX_LEN;
189 lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;
192 lz->son[lz->cyclic_buffer_pos] = cur_match;
194 uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
195 uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
197 uint32_t len0 = NUM_HASH_DIRECT_BYTES;
198 uint32_t len1 = NUM_HASH_DIRECT_BYTES;
201 #if NUM_HASH_DIRECT_BYTES != 0
202 if (cur_match > match_min_pos) {
203 if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
204 != cur[NUM_HASH_DIRECT_BYTES]) {
205 max_len = NUM_HASH_DIRECT_BYTES;
206 distances[offset++] = NUM_HASH_DIRECT_BYTES;
207 distances[offset++] = lz->read_pos + lz->offset
213 uint32_t count = lz->cut_value;
216 if (cur_match <= match_min_pos || count-- == 0) {
217 #ifndef IS_HASH_CHAIN
218 *ptr0 = EMPTY_HASH_VALUE;
219 *ptr1 = EMPTY_HASH_VALUE;
224 const uint32_t delta = lz->read_pos + lz->offset - cur_match;
225 const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
226 ? lz->cyclic_buffer_pos - delta
227 : lz->cyclic_buffer_pos - delta
228 + lz->cyclic_buffer_size;
229 uint32_t *pair = lz->son +
236 const uint8_t *pb = lz->buffer + cur_match - lz->offset;
239 NUM_HASH_DIRECT_BYTES;
240 if (pb[max_len] == cur[max_len])
245 if (pb[len] == cur[len]) {
246 while (++len != len_limit)
247 if (pb[len] != cur[len])
252 distances[offset++] = len;
253 distances[offset++] = delta - 1;
254 if (len == len_limit) {
255 #ifndef IS_HASH_CHAIN
267 if (pb[len] < cur[len]) {
281 distances[0] = offset - 1;
289 LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
293 if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
299 if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
300 len_limit = lz->match_max_len;
302 len_limit = lz->write_pos - lz->read_pos;
303 if (len_limit < MIN_MATCH_CHECK) {
308 const uint32_t match_min_pos
309 = lz->read_pos + lz->offset > lz->cyclic_buffer_size
310 ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
314 const uint8_t *cur = lz->buffer + lz->read_pos;
317 uint32_t hash_2_value;
319 uint32_t hash_3_value;
322 lz->hash[HASH_3_OFFSET + hash_3_value]
323 = lz->read_pos + lz->offset;
328 lz->hash[hash_2_value] = lz->read_pos + lz->offset;
334 uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
335 lz->hash[FIX_HASH_SIZE + hash_value]
336 = lz->read_pos + lz->offset;
339 lz->son[lz->cyclic_buffer_pos] = cur_match;
341 uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
342 uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
344 uint32_t len0 = NUM_HASH_DIRECT_BYTES;
345 uint32_t len1 = NUM_HASH_DIRECT_BYTES;
346 uint32_t count = lz->cut_value;
349 if (cur_match <= match_min_pos || count-- == 0) {
350 *ptr0 = EMPTY_HASH_VALUE;
351 *ptr1 = EMPTY_HASH_VALUE;
355 const uint32_t delta = lz->read_pos
356 + lz->offset - cur_match;
357 const uint32_t cyclic_pos
358 = delta <= lz->cyclic_buffer_pos
359 ? lz->cyclic_buffer_pos - delta
360 : lz->cyclic_buffer_pos - delta
361 + lz->cyclic_buffer_size;
362 uint32_t *pair = lz->son + (cyclic_pos << 1);
364 const uint8_t *pb = lz->buffer + cur_match
366 uint32_t len = MIN(len0, len1);
368 if (pb[len] == cur[len]) {
369 while (++len != len_limit)
370 if (pb[len] != cur[len])
373 if (len == len_limit) {
380 if (pb[len] < cur[len]) {
396 } while (--num != 0);