]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/lz/match_c.h
Imported to git.
[icculus/xz.git] / src / liblzma / lz / match_c.h
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       match_c.h
4 /// \brief      Template for different match finders
5 ///
6 /// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
7 /// sets slighly different #defines, resulting the different match finders.
8 //
9 //  Copyright (C) 1999-2006 Igor Pavlov
10 //  Copyright (C) 2007 Lasse Collin
11 //
12 //  This library is free software; you can redistribute it and/or
13 //  modify it under the terms of the GNU Lesser General Public
14 //  License as published by the Free Software Foundation; either
15 //  version 2.1 of the License, or (at your option) any later version.
16 //
17 //  This library is distributed in the hope that it will be useful,
18 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
19 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 //  Lesser General Public License for more details.
21 //
22 ///////////////////////////////////////////////////////////////////////////////
23
24 //////////////
25 // Includes //
26 //////////////
27
28 #include "check.h"
29
30
31 ///////////////
32 // Constants //
33 ///////////////
34
35 #define START_MAX_LEN 1
36
37 #ifdef HASH_ARRAY_2
38 #       define NUM_HASH_DIRECT_BYTES 0
39 #       define HASH_2_SIZE (1 << 10)
40 #       ifdef HASH_ARRAY_3
41 #               define NUM_HASH_BYTES 4
42 #               define HASH_3_SIZE (1 << 16)
43 #               define HASH_3_OFFSET HASH_2_SIZE
44 #               define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
45 #       else
46 #               define NUM_HASH_BYTES 3
47 #               define FIX_HASH_SIZE HASH_2_SIZE
48 #       endif
49 #       define HASH_SIZE 0
50 #       define MIN_MATCH_CHECK NUM_HASH_BYTES
51 #else
52 #       define NUM_HASH_DIRECT_BYTES 2
53 #       define NUM_HASH_BYTES 2
54 #       define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
55 #       define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
56 #       define FIX_HASH_SIZE 0
57 #endif
58
59
60 ////////////
61 // Macros //
62 ////////////
63
64 #ifdef HASH_ARRAY_2
65 #       ifdef HASH_ARRAY_3
66 #               define HASH_CALC() \
67                 do { \
68                         const uint32_t temp = lzma_crc32_table[0][ \
69                                         cur[0]] ^ cur[1]; \
70                         hash_2_value = temp & (HASH_2_SIZE - 1); \
71                         hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
72                                         & (HASH_3_SIZE - 1); \
73                         hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
74                                         ^ (lzma_crc32_table[0][cur[3]] << 5)) \
75                                         & lz->hash_mask; \
76                 } while (0)
77 #       else
78 #               define HASH_CALC() \
79                 do { \
80                         const uint32_t temp = lzma_crc32_table[0][ \
81                                         cur[0]] ^ cur[1]; \
82                         hash_2_value = temp & (HASH_2_SIZE - 1); \
83                         hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
84                                         & lz->hash_mask; \
85                 } while (0)
86 #       endif
87 #else
88 #       define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
89 #endif
90
91
92 // Moves the current read position forward by one byte. In LZMA SDK,
93 // CLZInWindow::MovePos() can read more input data if needed, because of
94 // the callback style API. In liblzma we must have ensured earlier, that
95 // there is enough data available in lz->buffer.
96 #define move_pos() \
97 do { \
98         if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
99                 lz->cyclic_buffer_pos = 0; \
100         ++lz->read_pos; \
101         assert(lz->read_pos <= lz->write_pos); \
102         if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
103                 lzma_lz_encoder_normalize(lz); \
104 } while (0)
105
106
107 //////////////////////
108 // Global constants //
109 //////////////////////
110
111 LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
112 LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;
113
114
115 ///////////////////
116 // API functions //
117 ///////////////////
118
119 LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
120 {
121         uint32_t len_limit;
122         if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
123                 len_limit = lz->match_max_len;
124         } else {
125                 assert(lz->stream_end_was_reached);
126                 len_limit = lz->write_pos - lz->read_pos;
127                 if (len_limit < MIN_MATCH_CHECK) {
128                         distances[0] = 0;
129                         move_pos();
130                         return;
131                 }
132         }
133
134         int32_t offset = 1;
135         const uint32_t match_min_pos
136                         = lz->read_pos + lz->offset > lz->cyclic_buffer_size
137                         ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
138                         : 0;
139         const uint8_t *cur = lz->buffer + lz->read_pos;
140         uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size
141
142 #ifdef HASH_ARRAY_2
143         uint32_t hash_2_value;
144 #       ifdef HASH_ARRAY_3
145         uint32_t hash_3_value;
146 #       endif
147 #endif
148         uint32_t hash_value;
149         HASH_CALC();
150
151         uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
152 #ifdef HASH_ARRAY_2
153         uint32_t cur_match2 = lz->hash[hash_2_value];
154 #       ifdef HASH_ARRAY_3
155         uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
156 #       endif
157         lz->hash[hash_2_value] = lz->read_pos + lz->offset;
158
159         if (cur_match2 > match_min_pos) {
160                 if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
161                         max_len = 2;
162                         distances[offset++] = 2;
163                         distances[offset++] = lz->read_pos + lz->offset
164                                         - cur_match2 - 1;
165                 }
166         }
167
168 #       ifdef HASH_ARRAY_3
169         lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
170         if (cur_match3 > match_min_pos) {
171                 if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
172                         if (cur_match3 == cur_match2)
173                                 offset -= 2;
174
175                         max_len = 3;
176                         distances[offset++] = 3;
177                         distances[offset++] = lz->read_pos + lz->offset
178                                         - cur_match3 - 1;
179                         cur_match2 = cur_match3;
180                 }
181         }
182 #       endif
183
184         if (offset != 1 && cur_match2 == cur_match) {
185                 offset -= 2;
186                 max_len = START_MAX_LEN;
187         }
188 #endif
189
190         lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;
191
192 #ifdef IS_HASH_CHAIN
193         lz->son[lz->cyclic_buffer_pos] = cur_match;
194 #else
195         uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
196         uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
197
198         uint32_t len0 = NUM_HASH_DIRECT_BYTES;
199         uint32_t len1 = NUM_HASH_DIRECT_BYTES;
200 #endif
201
202 #if NUM_HASH_DIRECT_BYTES != 0
203         if (cur_match > match_min_pos) {
204                 if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
205                                 != cur[NUM_HASH_DIRECT_BYTES]) {
206                         max_len = NUM_HASH_DIRECT_BYTES;
207                         distances[offset++] = NUM_HASH_DIRECT_BYTES;
208                         distances[offset++] = lz->read_pos + lz->offset
209                                         - cur_match - 1;
210                 }
211         }
212 #endif
213
214         uint32_t count = lz->cut_value;
215
216         while (true) {
217                 if (cur_match <= match_min_pos || count-- == 0) {
218 #ifndef IS_HASH_CHAIN
219                         *ptr0 = EMPTY_HASH_VALUE;
220                         *ptr1 = EMPTY_HASH_VALUE;
221 #endif
222                         break;
223                 }
224
225                 const uint32_t delta = lz->read_pos + lz->offset - cur_match;
226                 const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
227                                 ? lz->cyclic_buffer_pos - delta
228                                 : lz->cyclic_buffer_pos - delta
229                                         + lz->cyclic_buffer_size;
230                 uint32_t *pair = lz->son +
231 #ifdef IS_HASH_CHAIN
232                                 cyclic_pos;
233 #else
234                                 (cyclic_pos << 1);
235 #endif
236
237                 const uint8_t *pb = lz->buffer + cur_match - lz->offset;
238                 uint32_t len =
239 #ifdef IS_HASH_CHAIN
240                                 NUM_HASH_DIRECT_BYTES;
241                 if (pb[max_len] == cur[max_len])
242 #else
243                                 MIN(len0, len1);
244 #endif
245
246                 if (pb[len] == cur[len]) {
247                         while (++len != len_limit)
248                                 if (pb[len] != cur[len])
249                                         break;
250
251                         if (max_len < len) {
252                                 max_len = len;
253                                 distances[offset++] = len;
254                                 distances[offset++] = delta - 1;
255                                 if (len == len_limit) {
256 #ifndef IS_HASH_CHAIN
257                                         *ptr1 = pair[0];
258                                         *ptr0 = pair[1];
259 #endif
260                                         break;
261                                 }
262                         }
263                 }
264
265 #ifdef IS_HASH_CHAIN
266                 cur_match = *pair;
267 #else
268                 if (pb[len] < cur[len]) {
269                         *ptr1 = cur_match;
270                         ptr1 = pair + 1;
271                         cur_match = *ptr1;
272                         len1 = len;
273                 } else {
274                         *ptr0 = cur_match;
275                         ptr0 = pair;
276                         cur_match = *ptr0;
277                         len0 = len;
278                 }
279 #endif
280         }
281
282         distances[0] = offset - 1;
283
284         move_pos();
285
286         return;
287 }
288
289
290 LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
291 {
292         do {
293 #ifdef IS_HASH_CHAIN
294                 if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
295                         move_pos();
296                         continue;
297                 }
298 #else
299                 uint32_t len_limit;
300                 if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
301                         len_limit = lz->match_max_len;
302                 } else {
303                         assert(lz->stream_end_was_reached == true);
304                         len_limit = lz->write_pos - lz->read_pos;
305                         if (len_limit < MIN_MATCH_CHECK) {
306                                 move_pos();
307                                 continue;
308                         }
309                 }
310                 const uint32_t match_min_pos
311                         = lz->read_pos + lz->offset > lz->cyclic_buffer_size
312                         ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
313                         : 0;
314 #endif
315
316                 const uint8_t *cur = lz->buffer + lz->read_pos;
317
318 #ifdef HASH_ARRAY_2
319                 uint32_t hash_2_value;
320 #       ifdef HASH_ARRAY_3
321                 uint32_t hash_3_value;
322                 uint32_t hash_value;
323                 HASH_CALC();
324                 lz->hash[HASH_3_OFFSET + hash_3_value]
325                                 = lz->read_pos + lz->offset;
326 #       else
327                 uint32_t hash_value;
328                 HASH_CALC();
329 #       endif
330                 lz->hash[hash_2_value] = lz->read_pos + lz->offset;
331 #else
332                 uint32_t hash_value;
333                 HASH_CALC();
334 #endif
335
336                 uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
337                 lz->hash[FIX_HASH_SIZE + hash_value]
338                                 = lz->read_pos + lz->offset;
339
340 #ifdef IS_HASH_CHAIN
341                 lz->son[lz->cyclic_buffer_pos] = cur_match;
342 #else
343                 uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
344                 uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
345
346                 uint32_t len0 = NUM_HASH_DIRECT_BYTES;
347                 uint32_t len1 = NUM_HASH_DIRECT_BYTES;
348                 uint32_t count = lz->cut_value;
349
350                 while (true) {
351                         if (cur_match <= match_min_pos || count-- == 0) {
352                                 *ptr0 = EMPTY_HASH_VALUE;
353                                 *ptr1 = EMPTY_HASH_VALUE;
354                                 break;
355                         }
356
357                         const uint32_t delta = lz->read_pos
358                                         + lz->offset - cur_match;
359                         const uint32_t cyclic_pos
360                                         = delta <= lz->cyclic_buffer_pos
361                                         ? lz->cyclic_buffer_pos - delta
362                                         : lz->cyclic_buffer_pos - delta
363                                                 + lz->cyclic_buffer_size;
364                         uint32_t *pair = lz->son + (cyclic_pos << 1);
365
366                         const uint8_t *pb = lz->buffer + cur_match
367                                         - lz->offset;
368                         uint32_t len = MIN(len0, len1);
369
370                         if (pb[len] == cur[len]) {
371                                 while (++len != len_limit)
372                                         if (pb[len] != cur[len])
373                                                 break;
374
375                                 if (len == len_limit) {
376                                         *ptr1 = pair[0];
377                                         *ptr0 = pair[1];
378                                         break;
379                                 }
380                         }
381
382                         if (pb[len] < cur[len]) {
383                                 *ptr1 = cur_match;
384                                 ptr1 = pair + 1;
385                                 cur_match = *ptr1;
386                                 len1 = len;
387                         } else {
388                                 *ptr0 = cur_match;
389                                 ptr0 = pair;
390                                 cur_match = *ptr0;
391                                 len0 = len;
392                         }
393                 }
394 #endif
395
396                 move_pos();
397
398         } while (--num != 0);
399
400         return;
401 }