]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/index.c
Remove some redundant code from LZMA encoder.
[icculus/xz.git] / src / liblzma / common / index.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index.c
4 /// \brief      Handling of Index
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "index.h"
21
22
23 /// Number of Records to allocate at once.
24 #define INDEX_GROUP_SIZE 256
25
26
27 typedef struct lzma_index_group_s lzma_index_group;
28 struct lzma_index_group_s {
29         /// Next group
30         lzma_index_group *prev;
31
32         /// Previous group
33         lzma_index_group *next;
34
35         /// Index of the last Record in this group
36         size_t last;
37
38         /// Total Size fields as cumulative sum relative to the beginning
39         /// of the group. The total size of the group is total_sums[last].
40         lzma_vli total_sums[INDEX_GROUP_SIZE];
41
42         /// Uncompressed Size fields as cumulative sum relative to the
43         /// beginning of the group. The uncompressed size of the group is
44         /// uncompressed_sums[last].
45         lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
46
47         /// True if the Record is padding
48         bool paddings[INDEX_GROUP_SIZE];
49 };
50
51
52 struct lzma_index_s {
53         /// Total size of the Blocks and padding
54         lzma_vli total_size;
55
56         /// Uncompressed size of the Stream
57         lzma_vli uncompressed_size;
58
59         /// Number of non-padding records. This is needed by Index encoder.
60         lzma_vli count;
61
62         /// Size of the List of Records field; this is updated every time
63         /// a new non-padding Record is added.
64         lzma_vli index_list_size;
65
66         /// This is zero if no Indexes have been combined with
67         /// lzma_index_cat(). With combined Indexes, this contains the sizes
68         /// of all but latest the Streams, including possible Stream Padding
69         /// fields.
70         lzma_vli padding_size;
71
72         /// First group of Records
73         lzma_index_group *head;
74
75         /// Last group of Records
76         lzma_index_group *tail;
77
78         /// Tracking the read position
79         struct {
80                 /// Group where the current read position is.
81                 lzma_index_group *group;
82
83                 /// The most recently read record in *group
84                 lzma_vli record;
85
86                 /// Uncompressed offset of the beginning of *group relative
87                 /// to the beginning of the Stream
88                 lzma_vli uncompressed_offset;
89
90                 /// Compressed offset of the beginning of *group relative
91                 /// to the beginning of the Stream
92                 lzma_vli stream_offset;
93         } current;
94
95         /// Information about earlier Indexes when multiple Indexes have
96         /// been combined.
97         struct {
98                 /// Sum of the Record counts of the all but the last Stream.
99                 lzma_vli count;
100
101                 /// Sum of the List of Records fields of all but the last
102                 /// Stream. This is needed when a new Index is concatenated
103                 /// to this lzma_index structure.
104                 lzma_vli index_list_size;
105         } old;
106 };
107
108
109 static void
110 free_index_list(lzma_index *i, lzma_allocator *allocator)
111 {
112         lzma_index_group *g = i->head;
113
114         while (g != NULL) {
115                 lzma_index_group *tmp = g->next;
116                 lzma_free(g, allocator);
117                 g = tmp;
118         }
119
120         return;
121 }
122
123
124 extern LZMA_API lzma_index *
125 lzma_index_init(lzma_index *i, lzma_allocator *allocator)
126 {
127         if (i == NULL) {
128                 i = lzma_alloc(sizeof(lzma_index), allocator);
129                 if (i == NULL)
130                         return NULL;
131         } else {
132                 free_index_list(i, allocator);
133         }
134
135         i->total_size = 0;
136         i->uncompressed_size = 0;
137         i->count = 0;
138         i->index_list_size = 0;
139         i->padding_size = 0;
140         i->head = NULL;
141         i->tail = NULL;
142         i->current.group = NULL;
143         i->old.count = 0;
144         i->old.index_list_size = 0;
145
146         return i;
147 }
148
149
150 extern LZMA_API void
151 lzma_index_end(lzma_index *i, lzma_allocator *allocator)
152 {
153         if (i != NULL) {
154                 free_index_list(i, allocator);
155                 lzma_free(i, allocator);
156         }
157
158         return;
159 }
160
161
162 extern LZMA_API lzma_vli
163 lzma_index_count(const lzma_index *i)
164 {
165         return i->count;
166 }
167
168
169 extern LZMA_API lzma_vli
170 lzma_index_size(const lzma_index *i)
171 {
172         return index_size(i->count, i->index_list_size);
173 }
174
175
176 extern LZMA_API lzma_vli
177 lzma_index_total_size(const lzma_index *i)
178 {
179         return i->total_size;
180 }
181
182
183 extern LZMA_API lzma_vli
184 lzma_index_stream_size(const lzma_index *i)
185 {
186         // Stream Header + Blocks + Index + Stream Footer
187         return LZMA_STREAM_HEADER_SIZE + i->total_size
188                         + index_size(i->count, i->index_list_size)
189                         + LZMA_STREAM_HEADER_SIZE;
190 }
191
192
193 extern LZMA_API lzma_vli
194 lzma_index_file_size(const lzma_index *i)
195 {
196         // If multiple Streams are concatenated, the Stream Header, Index,
197         // and Stream Footer fields of all but the last Stream are already
198         // included in padding_size. Thus, we need to calculate only the
199         // size of the last Index, not all Indexes.
200         return i->total_size + i->padding_size
201                         + index_size(i->count - i->old.count,
202                                 i->index_list_size - i->old.index_list_size)
203                         + LZMA_STREAM_HEADER_SIZE * 2;
204 }
205
206
207 extern LZMA_API lzma_vli
208 lzma_index_uncompressed_size(const lzma_index *i)
209 {
210         return i->uncompressed_size;
211 }
212
213
214 extern uint32_t
215 lzma_index_padding_size(const lzma_index *i)
216 {
217         return (LZMA_VLI_C(4)
218                 - index_size_unpadded(i->count, i->index_list_size)) & 3;
219 }
220
221
222 /// Helper function for index_append()
223 static lzma_ret
224 index_append_real(lzma_index *i, lzma_allocator *allocator,
225                 lzma_vli total_size, lzma_vli uncompressed_size,
226                 bool is_padding)
227 {
228         // Add the new record.
229         if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
230                 // Allocate a new group.
231                 lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
232                                 allocator);
233                 if (g == NULL)
234                         return LZMA_MEM_ERROR;
235
236                 // Initialize the group and set its first record.
237                 g->prev = i->tail;
238                 g->next = NULL;
239                 g->last = 0;
240                 g->total_sums[0] = total_size;
241                 g->uncompressed_sums[0] = uncompressed_size;
242                 g->paddings[0] = is_padding;
243
244                 // If this is the first group, make it the head.
245                 if (i->head == NULL)
246                         i->head = g;
247                 else
248                         i->tail->next = g;
249
250                 // Make it the new tail.
251                 i->tail = g;
252
253         } else {
254                 // i->tail has space left for at least one record.
255                 i->tail->total_sums[i->tail->last + 1]
256                                 = i->tail->total_sums[i->tail->last]
257                                         + total_size;
258                 i->tail->uncompressed_sums[i->tail->last + 1]
259                                 = i->tail->uncompressed_sums[i->tail->last]
260                                         + uncompressed_size;
261                 i->tail->paddings[i->tail->last + 1] = is_padding;
262                 ++i->tail->last;
263         }
264
265         return LZMA_OK;
266 }
267
268
269 static lzma_ret
270 index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
271                 lzma_vli uncompressed_size, bool is_padding)
272 {
273         if (total_size > LZMA_VLI_VALUE_MAX
274                         || uncompressed_size > LZMA_VLI_VALUE_MAX)
275                 return LZMA_DATA_ERROR;
276
277         // This looks a bit ugly. We want to first validate that the Index
278         // and Stream stay in valid limits after adding this Record. After
279         // validating, we may need to allocate a new lzma_index_group (it's
280         // slightly more correct to validate before allocating, YMMV).
281         lzma_ret ret;
282
283         if (is_padding) {
284                 assert(uncompressed_size == 0);
285
286                 // First update the info so we can validate it.
287                 i->padding_size += total_size;
288
289                 if (i->padding_size > LZMA_VLI_VALUE_MAX
290                                 || lzma_index_file_size(i)
291                                         > LZMA_VLI_VALUE_MAX)
292                         ret = LZMA_DATA_ERROR; // Would grow past the limits.
293                 else
294                         ret = index_append_real(i, allocator,
295                                         total_size, uncompressed_size, true);
296
297                 // If something went wrong, undo the updated value.
298                 if (ret != LZMA_OK)
299                         i->padding_size -= total_size;
300
301         } else {
302                 // First update the overall info so we can validate it.
303                 const lzma_vli index_list_size_add
304                                 = lzma_vli_size(total_size / 4 - 1)
305                                 + lzma_vli_size(uncompressed_size);
306
307                 i->total_size += total_size;
308                 i->uncompressed_size += uncompressed_size;
309                 ++i->count;
310                 i->index_list_size += index_list_size_add;
311
312                 if (i->total_size > LZMA_VLI_VALUE_MAX
313                                 || i->uncompressed_size > LZMA_VLI_VALUE_MAX
314                                 || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
315                                 || lzma_index_file_size(i)
316                                         > LZMA_VLI_VALUE_MAX)
317                         ret = LZMA_DATA_ERROR; // Would grow past the limits.
318                 else
319                         ret = index_append_real(i, allocator,
320                                         total_size, uncompressed_size, false);
321
322                 if (ret != LZMA_OK) {
323                         // Something went wrong. Undo the updates.
324                         i->total_size -= total_size;
325                         i->uncompressed_size -= uncompressed_size;
326                         --i->count;
327                         i->index_list_size -= index_list_size_add;
328                 }
329         }
330
331         return ret;
332 }
333
334
335 extern LZMA_API lzma_ret
336 lzma_index_append(lzma_index *i, lzma_allocator *allocator,
337                 lzma_vli total_size, lzma_vli uncompressed_size)
338 {
339         return index_append(i, allocator,
340                         total_size, uncompressed_size, false);
341 }
342
343
344 /// Initialize i->current to point to the first Record.
345 static bool
346 init_current(lzma_index *i)
347 {
348         if (i->head == NULL) {
349                 assert(i->count == 0);
350                 return true;
351         }
352
353         assert(i->count > 0);
354
355         i->current.group = i->head;
356         i->current.record = 0;
357         i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
358         i->current.uncompressed_offset = 0;
359
360         return false;
361 }
362
363
364 /// Go backward to the previous group.
365 static void
366 previous_group(lzma_index *i)
367 {
368         assert(i->current.group->prev != NULL);
369
370         // Go to the previous group first.
371         i->current.group = i->current.group->prev;
372         i->current.record = i->current.group->last;
373
374         // Then update the offsets.
375         i->current.stream_offset -= i->current.group
376                         ->total_sums[i->current.group->last];
377         i->current.uncompressed_offset -= i->current.group
378                         ->uncompressed_sums[i->current.group->last];
379
380         return;
381 }
382
383
384 /// Go forward to the next group.
385 static void
386 next_group(lzma_index *i)
387 {
388         assert(i->current.group->next != NULL);
389
390         // Update the offsets first.
391         i->current.stream_offset += i->current.group
392                         ->total_sums[i->current.group->last];
393         i->current.uncompressed_offset += i->current.group
394                         ->uncompressed_sums[i->current.group->last];
395
396         // Then go to the next group.
397         i->current.record = 0;
398         i->current.group = i->current.group->next;
399
400         return;
401 }
402
403
404 /// Set *info from i->current.
405 static void
406 set_info(const lzma_index *i, lzma_index_record *info)
407 {
408         info->total_size = i->current.group->total_sums[i->current.record];
409         info->uncompressed_size = i->current.group->uncompressed_sums[
410                         i->current.record];
411
412         info->stream_offset = i->current.stream_offset;
413         info->uncompressed_offset = i->current.uncompressed_offset;
414
415         // If it's not the first Record in this group, we need to do some
416         // adjustements.
417         if (i->current.record > 0) {
418                 // _sums[] are cumulative, thus we need to substract the
419                 // _previous _sums[] to get the sizes of this Record.
420                 info->total_size -= i->current.group
421                                 ->total_sums[i->current.record - 1];
422                 info->uncompressed_size -= i->current.group
423                                 ->uncompressed_sums[i->current.record - 1];
424
425                 // i->current.{total,uncompressed}_offsets have the offset
426                 // of the beginning of the group, thus we need to add the
427                 // appropriate amount to get the offsetes of this Record.
428                 info->stream_offset += i->current.group
429                                 ->total_sums[i->current.record - 1];
430                 info->uncompressed_offset += i->current.group
431                                 ->uncompressed_sums[i->current.record - 1];
432         }
433
434         return;
435 }
436
437
438 extern LZMA_API lzma_bool
439 lzma_index_read(lzma_index *i, lzma_index_record *info)
440 {
441         if (i->current.group == NULL) {
442                 // We are at the beginning of the Record list. Set up
443                 // i->current point at the first Record. Return if there
444                 // are no Records.
445                 if (init_current(i))
446                         return true;
447         } else do {
448                 // Try to go the next Record.
449                 if (i->current.record < i->current.group->last)
450                         ++i->current.record;
451                 else if (i->current.group->next == NULL)
452                         return true;
453                 else
454                         next_group(i);
455         } while (i->current.group->paddings[i->current.record]);
456
457         // We found a new Record. Set the information to *info.
458         set_info(i, info);
459
460         return false;
461 }
462
463
464 extern LZMA_API void
465 lzma_index_rewind(lzma_index *i)
466 {
467         i->current.group = NULL;
468         return;
469 }
470
471
472 extern LZMA_API lzma_bool
473 lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
474 {
475         // Check if it is possible to fullfill the request.
476         if (target >= i->uncompressed_size)
477                 return true;
478
479         // Now we know that we will have an answer. Initialize the current
480         // read position if needed.
481         if (i->current.group == NULL && init_current(i))
482                 return true;
483
484         // Locate the group where the wanted Block is. First search forward.
485         while (i->current.uncompressed_offset <= target) {
486                 // If the first uncompressed byte of the next group is past
487                 // the target offset, it has to be this or an earlier group.
488                 if (i->current.uncompressed_offset + i->current.group
489                                 ->uncompressed_sums[i->current.group->last]
490                                 > target)
491                         break;
492
493                 // Go forward to the next group.
494                 next_group(i);
495         }
496
497         // Then search backward.
498         while (i->current.uncompressed_offset > target)
499                 previous_group(i);
500
501         // Now the target Block is somewhere in i->current.group. Offsets
502         // in groups are relative to the beginning of the group, thus
503         // we must adjust the target before starting the search loop.
504         assert(target >= i->current.uncompressed_offset);
505         target -= i->current.uncompressed_offset;
506
507         // Use binary search to locate the exact Record. It is the first
508         // Record whose uncompressed_sums[] value is greater than target.
509         // This is because we want the rightmost Record that fullfills the
510         // search criterion. It is possible that there are empty Blocks or
511         // padding, we don't want to return them.
512         size_t left = 0;
513         size_t right = i->current.group->last;
514
515         while (left < right) {
516                 const size_t pos = left + (right - left) / 2;
517                 if (i->current.group->uncompressed_sums[pos] <= target)
518                         left = pos + 1;
519                 else
520                         right = pos;
521         }
522
523         i->current.record = left;
524
525 #ifndef NDEBUG
526         // The found Record must not be padding or have zero uncompressed size.
527         assert(!i->current.group->paddings[i->current.record]);
528
529         if (i->current.record == 0)
530                 assert(i->current.group->uncompressed_sums[0] > 0);
531         else
532                 assert(i->current.group->uncompressed_sums[i->current.record]
533                                 - i->current.group->uncompressed_sums[
534                                         i->current.record - 1] > 0);
535 #endif
536
537         set_info(i, info);
538
539         return false;
540 }
541
542
543 extern LZMA_API lzma_ret
544 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
545                 lzma_allocator *allocator, lzma_vli padding)
546 {
547         if (dest == NULL || src == NULL || dest == src
548                         || padding > LZMA_VLI_VALUE_MAX)
549                 return LZMA_PROG_ERROR;
550
551         // Check that the combined size of the Indexes stays within limits.
552         {
553                 const lzma_vli dest_size = lzma_index_file_size(dest);
554                 const lzma_vli src_size = lzma_index_file_size(src);
555                 if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN
556                                 || dest_size + src_size + padding
557                                         > LZMA_VLI_VALUE_UNKNOWN)
558                         return LZMA_DATA_ERROR;
559         }
560
561         // Add a padding Record to take into account the size of
562         // Index + Stream Footer + Stream Padding + Stream Header.
563         //
564         // NOTE: This cannot overflow, because Index Size is always
565         // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs
566         // (Index Size and padding) doesn't overflow. It may become
567         // an invalid VLI if padding is huge, but that is caught by
568         // index_append().
569         padding += index_size(dest->count - dest->old.count,
570                                 dest->index_list_size
571                                         - dest->old.index_list_size)
572                         + LZMA_STREAM_HEADER_SIZE * 2;
573
574         // Add the padding Record.
575         return_if_error(index_append(
576                         dest, allocator, padding, 0, true));
577
578         // Avoid wasting lots of memory if src->head has only a few records
579         // that fit into dest->tail. That is, combine two groups if possible.
580         //
581         // NOTE: We know that dest->tail != NULL since we just appended
582         // a padding Record. But we don't know about src->head.
583         if (src->head != NULL && src->head->last + 1
584                         <= INDEX_GROUP_SIZE - dest->tail->last - 1) {
585                 // Copy the first Record.
586                 dest->tail->total_sums[dest->tail->last + 1]
587                         = dest->tail->total_sums[dest->tail->last]
588                                 + src->head->total_sums[0];
589
590                 dest->tail->uncompressed_sums[dest->tail->last + 1]
591                         = dest->tail->uncompressed_sums[dest->tail->last]
592                                 + src->head->uncompressed_sums[0];
593
594                 dest->tail->paddings[dest->tail->last + 1]
595                                 = src->head->paddings[0];
596
597                 ++dest->tail->last;
598
599                 // Copy the rest.
600                 for (size_t i = 1; i < src->head->last; ++i) {
601                         dest->tail->total_sums[dest->tail->last + 1]
602                                 = dest->tail->total_sums[dest->tail->last]
603                                         + src->head->total_sums[i + 1]
604                                         - src->head->total_sums[i];
605
606                         dest->tail->uncompressed_sums[dest->tail->last + 1]
607                                 = dest->tail->uncompressed_sums[
608                                                 dest->tail->last]
609                                         + src->head->uncompressed_sums[i + 1]
610                                         - src->head->uncompressed_sums[i];
611
612                         dest->tail->paddings[dest->tail->last + 1]
613                                 = src->head->paddings[i + 1];
614
615                         ++dest->tail->last;
616                 }
617
618                 // Free the head group of *src. Don't bother updating prev
619                 // pointers since those won't be used for anything before
620                 // we deallocate the whole *src structure.
621                 lzma_index_group *tmp = src->head;
622                 src->head = src->head->next;
623                 lzma_free(tmp, allocator);
624         }
625
626         // If there are groups left in *src, join them as is. Note that if we
627         // are combining already combined Indexes, src->head can be non-NULL
628         // even if we just combined the old src->head to dest->tail.
629         if (src->head != NULL) {
630                 src->head->prev = dest->tail;
631                 dest->tail->next = src->head;
632                 dest->tail = src->tail;
633         }
634
635         // Update information about earlier Indexes. Only the last Index
636         // from *src won't be counted in dest->old. The last Index is left
637         // open and can be even appended with lzma_index_append().
638         dest->old.count = dest->count + src->old.count;
639         dest->old.index_list_size
640                         = dest->index_list_size + src->old.index_list_size;
641
642         // Update overall information.
643         dest->total_size += src->total_size;
644         dest->uncompressed_size += src->uncompressed_size;
645         dest->count += src->count;
646         dest->index_list_size += src->index_list_size;
647         dest->padding_size += src->padding_size;
648
649         // *src has nothing left but the base structure.
650         lzma_free(src, allocator);
651
652         return LZMA_OK;
653 }
654
655
656 extern LZMA_API lzma_index *
657 lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
658 {
659         lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
660         if (dest == NULL)
661                 return NULL;
662
663         // Copy the base structure except the pointers.
664         *dest = *src;
665         dest->head = NULL;
666         dest->tail = NULL;
667         dest->current.group = NULL;
668
669         // Copy the Records.
670         const lzma_index_group *src_group = src->head;
671         while (src_group != NULL) {
672                 // Allocate a new group.
673                 lzma_index_group *dest_group = lzma_alloc(
674                                 sizeof(lzma_index_group), allocator);
675                 if (dest_group == NULL) {
676                         lzma_index_end(dest, allocator);
677                         return NULL;
678                 }
679
680                 // Set the pointers.
681                 dest_group->prev = dest->tail;
682                 dest_group->next = NULL;
683
684                 if (dest->head == NULL)
685                         dest->head = dest_group;
686                 else
687                         dest->tail->next = dest_group;
688
689                 dest->tail = dest_group;
690
691                 dest_group->last = src_group->last;
692
693                 // Copy the arrays so that we don't read uninitialized memory.
694                 const size_t count = src_group->last + 1;
695                 memcpy(dest_group->total_sums, src_group->total_sums,
696                                 sizeof(lzma_vli) * count);
697                 memcpy(dest_group->uncompressed_sums,
698                                 src_group->uncompressed_sums,
699                                 sizeof(lzma_vli) * count);
700                 memcpy(dest_group->paddings, src_group->paddings,
701                                 sizeof(bool) * count);
702
703                 // Copy also the read position.
704                 if (src_group == src->current.group)
705                         dest->current.group = dest->tail;
706
707                 src_group = src_group->next;
708         }
709
710         return dest;
711 }
712
713
714 extern LZMA_API lzma_bool
715 lzma_index_equal(const lzma_index *a, const lzma_index *b)
716 {
717         // No point to compare more if the pointers are the same.
718         if (a == b)
719                 return true;
720
721         // Compare the basic properties.
722         if (a->total_size != b->total_size
723                         || a->uncompressed_size != b->uncompressed_size
724                         || a->index_list_size != b->index_list_size
725                         || a->count != b->count)
726                 return false;
727
728         // Compare the Records.
729         const lzma_index_group *ag = a->head;
730         const lzma_index_group *bg = b->head;
731         while (ag != NULL && bg != NULL) {
732                 const size_t count = ag->last + 1;
733                 if (ag->last != bg->last
734                                 || memcmp(ag->total_sums,
735                                         bg->total_sums,
736                                         sizeof(lzma_vli) * count) != 0
737                                 || memcmp(ag->uncompressed_sums,
738                                         bg->uncompressed_sums,
739                                         sizeof(lzma_vli) * count) != 0
740                                 || memcmp(ag->paddings, bg->paddings,
741                                         sizeof(bool) * count) != 0)
742                         return false;
743
744                 ag = ag->next;
745                 bg = bg->next;
746         }
747
748         return ag == NULL && bg == NULL;
749 }