]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/lz/match_c.h
Don't memzero() the history buffer when initializing LZ
[icculus/xz.git] / src / liblzma / lz / match_c.h
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       match_c.h
4 /// \brief      Template for different match finders
5 ///
6 /// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
7 /// sets slighly different #defines, resulting the different match finders.
8 //
9 //  Copyright (C) 1999-2006 Igor Pavlov
10 //  Copyright (C) 2007 Lasse Collin
11 //
12 //  This library is free software; you can redistribute it and/or
13 //  modify it under the terms of the GNU Lesser General Public
14 //  License as published by the Free Software Foundation; either
15 //  version 2.1 of the License, or (at your option) any later version.
16 //
17 //  This library is distributed in the hope that it will be useful,
18 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
19 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 //  Lesser General Public License for more details.
21 //
22 ///////////////////////////////////////////////////////////////////////////////
23
24 //////////////
25 // Includes //
26 //////////////
27
28 #include "check.h"
29
30
31 ///////////////
32 // Constants //
33 ///////////////
34
35 #define START_MAX_LEN 1
36
37 #ifdef HASH_ARRAY_2
38 #       define NUM_HASH_DIRECT_BYTES 0
39 #       define HASH_2_SIZE (1 << 10)
40 #       ifdef HASH_ARRAY_3
41 #               define NUM_HASH_BYTES 4
42 #               define HASH_3_SIZE (1 << 16)
43 #               define HASH_3_OFFSET HASH_2_SIZE
44 #               define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
45 #       else
46 #               define NUM_HASH_BYTES 3
47 #               define FIX_HASH_SIZE HASH_2_SIZE
48 #       endif
49 #       define HASH_SIZE 0
50 #       define MIN_MATCH_CHECK NUM_HASH_BYTES
51 #else
52 #       define NUM_HASH_DIRECT_BYTES 2
53 #       define NUM_HASH_BYTES 2
54 #       define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
55 #       define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
56 #       define FIX_HASH_SIZE 0
57 #endif
58
59
60 ////////////
61 // Macros //
62 ////////////
63
64 #ifdef HASH_ARRAY_2
65 #       ifdef HASH_ARRAY_3
66 #               define HASH_CALC() \
67                 do { \
68                         const uint32_t temp = lzma_crc32_table[0][ \
69                                         cur[0]] ^ cur[1]; \
70                         hash_2_value = temp & (HASH_2_SIZE - 1); \
71                         hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
72                                         & (HASH_3_SIZE - 1); \
73                         hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
74                                         ^ (lzma_crc32_table[0][cur[3]] << 5)) \
75                                         & lz->hash_mask; \
76                 } while (0)
77 #       else
78 #               define HASH_CALC() \
79                 do { \
80                         const uint32_t temp = lzma_crc32_table[0][ \
81                                         cur[0]] ^ cur[1]; \
82                         hash_2_value = temp & (HASH_2_SIZE - 1); \
83                         hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
84                                         & lz->hash_mask; \
85                 } while (0)
86 #       endif
87 #else
88 #       define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
89 #endif
90
91
92 // Moves the current read position forward by one byte. In LZMA SDK,
93 // CLZInWindow::MovePos() can read more input data if needed, because of
94 // the callback style API. In liblzma we must have ensured earlier, that
95 // there is enough data available in lz->buffer.
96 #define move_pos() \
97 do { \
98         if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
99                 lz->cyclic_buffer_pos = 0; \
100         ++lz->read_pos; \
101         assert(lz->read_pos <= lz->write_pos); \
102         if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
103                 lzma_lz_encoder_normalize(lz); \
104 } while (0)
105
106
107 //////////////////////
108 // Global constants //
109 //////////////////////
110
111 LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
112 LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;
113
114
115 ///////////////////
116 // API functions //
117 ///////////////////
118
119 LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
120 {
121         uint32_t len_limit;
122         if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
123                 len_limit = lz->match_max_len;
124         } else {
125                 len_limit = lz->write_pos - lz->read_pos;
126                 if (len_limit < MIN_MATCH_CHECK) {
127                         distances[0] = 0;
128                         move_pos();
129                         return;
130                 }
131         }
132
133         int32_t offset = 1;
134         const uint32_t match_min_pos
135                         = lz->read_pos + lz->offset > lz->cyclic_buffer_size
136                         ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
137                         : 0;
138         const uint8_t *cur = lz->buffer + lz->read_pos;
139         uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size
140
141 #ifdef HASH_ARRAY_2
142         uint32_t hash_2_value;
143 #       ifdef HASH_ARRAY_3
144         uint32_t hash_3_value;
145 #       endif
146 #endif
147         uint32_t hash_value;
148         HASH_CALC();
149
150         uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
151 #ifdef HASH_ARRAY_2
152         uint32_t cur_match2 = lz->hash[hash_2_value];
153 #       ifdef HASH_ARRAY_3
154         uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
155 #       endif
156         lz->hash[hash_2_value] = lz->read_pos + lz->offset;
157
158         if (cur_match2 > match_min_pos) {
159                 if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
160                         max_len = 2;
161                         distances[offset++] = 2;
162                         distances[offset++] = lz->read_pos + lz->offset
163                                         - cur_match2 - 1;
164                 }
165         }
166
167 #       ifdef HASH_ARRAY_3
168         lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
169         if (cur_match3 > match_min_pos) {
170                 if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
171                         if (cur_match3 == cur_match2)
172                                 offset -= 2;
173
174                         max_len = 3;
175                         distances[offset++] = 3;
176                         distances[offset++] = lz->read_pos + lz->offset
177                                         - cur_match3 - 1;
178                         cur_match2 = cur_match3;
179                 }
180         }
181 #       endif
182
183         if (offset != 1 && cur_match2 == cur_match) {
184                 offset -= 2;
185                 max_len = START_MAX_LEN;
186         }
187 #endif
188
189         lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;
190
191 #ifdef IS_HASH_CHAIN
192         lz->son[lz->cyclic_buffer_pos] = cur_match;
193 #else
194         uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
195         uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
196
197         uint32_t len0 = NUM_HASH_DIRECT_BYTES;
198         uint32_t len1 = NUM_HASH_DIRECT_BYTES;
199 #endif
200
201 #if NUM_HASH_DIRECT_BYTES != 0
202         if (cur_match > match_min_pos) {
203                 if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
204                                 != cur[NUM_HASH_DIRECT_BYTES]) {
205                         max_len = NUM_HASH_DIRECT_BYTES;
206                         distances[offset++] = NUM_HASH_DIRECT_BYTES;
207                         distances[offset++] = lz->read_pos + lz->offset
208                                         - cur_match - 1;
209                 }
210         }
211 #endif
212
213         uint32_t count = lz->cut_value;
214
215         while (true) {
216                 if (cur_match <= match_min_pos || count-- == 0) {
217 #ifndef IS_HASH_CHAIN
218                         *ptr0 = EMPTY_HASH_VALUE;
219                         *ptr1 = EMPTY_HASH_VALUE;
220 #endif
221                         break;
222                 }
223
224                 const uint32_t delta = lz->read_pos + lz->offset - cur_match;
225                 const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
226                                 ? lz->cyclic_buffer_pos - delta
227                                 : lz->cyclic_buffer_pos - delta
228                                         + lz->cyclic_buffer_size;
229                 uint32_t *pair = lz->son +
230 #ifdef IS_HASH_CHAIN
231                                 cyclic_pos;
232 #else
233                                 (cyclic_pos << 1);
234 #endif
235
236                 const uint8_t *pb = lz->buffer + cur_match - lz->offset;
237                 uint32_t len =
238 #ifdef IS_HASH_CHAIN
239                                 NUM_HASH_DIRECT_BYTES;
240                 if (pb[max_len] == cur[max_len])
241 #else
242                                 MIN(len0, len1);
243 #endif
244
245                 if (pb[len] == cur[len]) {
246                         while (++len != len_limit)
247                                 if (pb[len] != cur[len])
248                                         break;
249
250                         if (max_len < len) {
251                                 max_len = len;
252                                 distances[offset++] = len;
253                                 distances[offset++] = delta - 1;
254                                 if (len == len_limit) {
255 #ifndef IS_HASH_CHAIN
256                                         *ptr1 = pair[0];
257                                         *ptr0 = pair[1];
258 #endif
259                                         break;
260                                 }
261                         }
262                 }
263
264 #ifdef IS_HASH_CHAIN
265                 cur_match = *pair;
266 #else
267                 if (pb[len] < cur[len]) {
268                         *ptr1 = cur_match;
269                         ptr1 = pair + 1;
270                         cur_match = *ptr1;
271                         len1 = len;
272                 } else {
273                         *ptr0 = cur_match;
274                         ptr0 = pair;
275                         cur_match = *ptr0;
276                         len0 = len;
277                 }
278 #endif
279         }
280
281         distances[0] = offset - 1;
282
283         move_pos();
284
285         return;
286 }
287
288
289 LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
290 {
291         do {
292 #ifdef IS_HASH_CHAIN
293                 if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
294                         move_pos();
295                         continue;
296                 }
297 #else
298                 uint32_t len_limit;
299                 if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
300                         len_limit = lz->match_max_len;
301                 } else {
302                         len_limit = lz->write_pos - lz->read_pos;
303                         if (len_limit < MIN_MATCH_CHECK) {
304                                 move_pos();
305                                 continue;
306                         }
307                 }
308                 const uint32_t match_min_pos
309                         = lz->read_pos + lz->offset > lz->cyclic_buffer_size
310                         ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
311                         : 0;
312 #endif
313
314                 const uint8_t *cur = lz->buffer + lz->read_pos;
315
316 #ifdef HASH_ARRAY_2
317                 uint32_t hash_2_value;
318 #       ifdef HASH_ARRAY_3
319                 uint32_t hash_3_value;
320                 uint32_t hash_value;
321                 HASH_CALC();
322                 lz->hash[HASH_3_OFFSET + hash_3_value]
323                                 = lz->read_pos + lz->offset;
324 #       else
325                 uint32_t hash_value;
326                 HASH_CALC();
327 #       endif
328                 lz->hash[hash_2_value] = lz->read_pos + lz->offset;
329 #else
330                 uint32_t hash_value;
331                 HASH_CALC();
332 #endif
333
334                 uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
335                 lz->hash[FIX_HASH_SIZE + hash_value]
336                                 = lz->read_pos + lz->offset;
337
338 #ifdef IS_HASH_CHAIN
339                 lz->son[lz->cyclic_buffer_pos] = cur_match;
340 #else
341                 uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
342                 uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
343
344                 uint32_t len0 = NUM_HASH_DIRECT_BYTES;
345                 uint32_t len1 = NUM_HASH_DIRECT_BYTES;
346                 uint32_t count = lz->cut_value;
347
348                 while (true) {
349                         if (cur_match <= match_min_pos || count-- == 0) {
350                                 *ptr0 = EMPTY_HASH_VALUE;
351                                 *ptr1 = EMPTY_HASH_VALUE;
352                                 break;
353                         }
354
355                         const uint32_t delta = lz->read_pos
356                                         + lz->offset - cur_match;
357                         const uint32_t cyclic_pos
358                                         = delta <= lz->cyclic_buffer_pos
359                                         ? lz->cyclic_buffer_pos - delta
360                                         : lz->cyclic_buffer_pos - delta
361                                                 + lz->cyclic_buffer_size;
362                         uint32_t *pair = lz->son + (cyclic_pos << 1);
363
364                         const uint8_t *pb = lz->buffer + cur_match
365                                         - lz->offset;
366                         uint32_t len = MIN(len0, len1);
367
368                         if (pb[len] == cur[len]) {
369                                 while (++len != len_limit)
370                                         if (pb[len] != cur[len])
371                                                 break;
372
373                                 if (len == len_limit) {
374                                         *ptr1 = pair[0];
375                                         *ptr0 = pair[1];
376                                         break;
377                                 }
378                         }
379
380                         if (pb[len] < cur[len]) {
381                                 *ptr1 = cur_match;
382                                 ptr1 = pair + 1;
383                                 cur_match = *ptr1;
384                                 len1 = len;
385                         } else {
386                                 *ptr0 = cur_match;
387                                 ptr0 = pair;
388                                 cur_match = *ptr0;
389                                 len0 = len;
390                         }
391                 }
392 #endif
393
394                 move_pos();
395
396         } while (--num != 0);
397
398         return;
399 }