1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Handling of Index
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
23 /// Number of Records to allocate at once.
24 #define INDEX_GROUP_SIZE 256
27 typedef struct lzma_index_group_s lzma_index_group;
28 struct lzma_index_group_s {
30 lzma_index_group *prev;
33 lzma_index_group *next;
35 /// Index of the last Record in this group
38 /// Total Size fields as cumulative sum relative to the beginning
39 /// of the group. The total size of the group is total_sums[last].
40 lzma_vli total_sums[INDEX_GROUP_SIZE];
42 /// Uncompressed Size fields as cumulative sum relative to the
43 /// beginning of the group. The uncompressed size of the group is
44 /// uncompressed_sums[last].
45 lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
47 /// True if the Record is padding
48 bool paddings[INDEX_GROUP_SIZE];
53 /// Total size of the Blocks and padding
56 /// Uncompressed size of the Stream
57 lzma_vli uncompressed_size;
59 /// Number of non-padding records. This is needed by Index encoder.
62 /// Size of the List of Records field; this is updated every time
63 /// a new non-padding Record is added.
64 lzma_vli index_list_size;
66 /// This is zero if no Indexes have been combined with
67 /// lzma_index_cat(). With combined Indexes, this contains the sizes
68 /// of all but latest the Streams, including possible Stream Padding
70 lzma_vli padding_size;
72 /// First group of Records
73 lzma_index_group *head;
75 /// Last group of Records
76 lzma_index_group *tail;
78 /// Tracking the read position
80 /// Group where the current read position is.
81 lzma_index_group *group;
83 /// The most recently read record in *group
86 /// Uncompressed offset of the beginning of *group relative
87 /// to the beginning of the Stream
88 lzma_vli uncompressed_offset;
90 /// Compressed offset of the beginning of *group relative
91 /// to the beginning of the Stream
92 lzma_vli stream_offset;
95 /// Information about earlier Indexes when multiple Indexes have
98 /// Sum of the Record counts of the all but the last Stream.
101 /// Sum of the List of Records fields of all but the last
102 /// Stream. This is needed when a new Index is concatenated
103 /// to this lzma_index structure.
104 lzma_vli index_list_size;
110 free_index_list(lzma_index *i, lzma_allocator *allocator)
112 lzma_index_group *g = i->head;
115 lzma_index_group *tmp = g->next;
116 lzma_free(g, allocator);
124 extern LZMA_API lzma_index *
125 lzma_index_init(lzma_index *i, lzma_allocator *allocator)
128 i = lzma_alloc(sizeof(lzma_index), allocator);
132 free_index_list(i, allocator);
136 i->uncompressed_size = 0;
138 i->index_list_size = 0;
142 i->current.group = NULL;
144 i->old.index_list_size = 0;
151 lzma_index_end(lzma_index *i, lzma_allocator *allocator)
154 free_index_list(i, allocator);
155 lzma_free(i, allocator);
162 extern LZMA_API lzma_vli
163 lzma_index_count(const lzma_index *i)
169 extern LZMA_API lzma_vli
170 lzma_index_size(const lzma_index *i)
172 return index_size(i->count, i->index_list_size);
176 extern LZMA_API lzma_vli
177 lzma_index_total_size(const lzma_index *i)
179 return i->total_size;
183 extern LZMA_API lzma_vli
184 lzma_index_stream_size(const lzma_index *i)
186 // Stream Header + Blocks + Index + Stream Footer
187 return LZMA_STREAM_HEADER_SIZE + i->total_size
188 + index_size(i->count, i->index_list_size)
189 + LZMA_STREAM_HEADER_SIZE;
193 extern LZMA_API lzma_vli
194 lzma_index_file_size(const lzma_index *i)
196 // If multiple Streams are concatenated, the Stream Header, Index,
197 // and Stream Footer fields of all but the last Stream are already
198 // included in padding_size. Thus, we need to calculate only the
199 // size of the last Index, not all Indexes.
200 return i->total_size + i->padding_size
201 + index_size(i->count - i->old.count,
202 i->index_list_size - i->old.index_list_size)
203 + LZMA_STREAM_HEADER_SIZE * 2;
207 extern LZMA_API lzma_vli
208 lzma_index_uncompressed_size(const lzma_index *i)
210 return i->uncompressed_size;
215 lzma_index_padding_size(const lzma_index *i)
217 return (LZMA_VLI_C(4)
218 - index_size_unpadded(i->count, i->index_list_size)) & 3;
222 /// Helper function for index_append()
224 index_append_real(lzma_index *i, lzma_allocator *allocator,
225 lzma_vli total_size, lzma_vli uncompressed_size,
228 // Add the new record.
229 if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
230 // Allocate a new group.
231 lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
234 return LZMA_MEM_ERROR;
236 // Initialize the group and set its first record.
240 g->total_sums[0] = total_size;
241 g->uncompressed_sums[0] = uncompressed_size;
242 g->paddings[0] = is_padding;
244 // If this is the first group, make it the head.
250 // Make it the new tail.
254 // i->tail has space left for at least one record.
255 i->tail->total_sums[i->tail->last + 1]
256 = i->tail->total_sums[i->tail->last]
258 i->tail->uncompressed_sums[i->tail->last + 1]
259 = i->tail->uncompressed_sums[i->tail->last]
261 i->tail->paddings[i->tail->last + 1] = is_padding;
270 index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
271 lzma_vli uncompressed_size, bool is_padding)
273 if (total_size > LZMA_VLI_VALUE_MAX
274 || uncompressed_size > LZMA_VLI_VALUE_MAX)
275 return LZMA_DATA_ERROR;
277 // This looks a bit ugly. We want to first validate that the Index
278 // and Stream stay in valid limits after adding this Record. After
279 // validating, we may need to allocate a new lzma_index_group (it's
280 // slightly more correct to validate before allocating, YMMV).
284 assert(uncompressed_size == 0);
286 // First update the info so we can validate it.
287 i->padding_size += total_size;
289 if (i->padding_size > LZMA_VLI_VALUE_MAX
290 || lzma_index_file_size(i)
291 > LZMA_VLI_VALUE_MAX)
292 ret = LZMA_DATA_ERROR; // Would grow past the limits.
294 ret = index_append_real(i, allocator,
295 total_size, uncompressed_size, true);
297 // If something went wrong, undo the updated value.
299 i->padding_size -= total_size;
302 // First update the overall info so we can validate it.
303 const lzma_vli index_list_size_add
304 = lzma_vli_size(total_size / 4 - 1)
305 + lzma_vli_size(uncompressed_size);
307 i->total_size += total_size;
308 i->uncompressed_size += uncompressed_size;
310 i->index_list_size += index_list_size_add;
312 if (i->total_size > LZMA_VLI_VALUE_MAX
313 || i->uncompressed_size > LZMA_VLI_VALUE_MAX
314 || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
315 || lzma_index_file_size(i)
316 > LZMA_VLI_VALUE_MAX)
317 ret = LZMA_DATA_ERROR; // Would grow past the limits.
319 ret = index_append_real(i, allocator,
320 total_size, uncompressed_size, false);
322 if (ret != LZMA_OK) {
323 // Something went wrong. Undo the updates.
324 i->total_size -= total_size;
325 i->uncompressed_size -= uncompressed_size;
327 i->index_list_size -= index_list_size_add;
335 extern LZMA_API lzma_ret
336 lzma_index_append(lzma_index *i, lzma_allocator *allocator,
337 lzma_vli total_size, lzma_vli uncompressed_size)
339 return index_append(i, allocator,
340 total_size, uncompressed_size, false);
344 /// Initialize i->current to point to the first Record.
346 init_current(lzma_index *i)
348 if (i->head == NULL) {
349 assert(i->count == 0);
353 assert(i->count > 0);
355 i->current.group = i->head;
356 i->current.record = 0;
357 i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
358 i->current.uncompressed_offset = 0;
364 /// Go backward to the previous group.
366 previous_group(lzma_index *i)
368 assert(i->current.group->prev != NULL);
370 // Go to the previous group first.
371 i->current.group = i->current.group->prev;
372 i->current.record = i->current.group->last;
374 // Then update the offsets.
375 i->current.stream_offset -= i->current.group
376 ->total_sums[i->current.group->last];
377 i->current.uncompressed_offset -= i->current.group
378 ->uncompressed_sums[i->current.group->last];
384 /// Go forward to the next group.
386 next_group(lzma_index *i)
388 assert(i->current.group->next != NULL);
390 // Update the offsets first.
391 i->current.stream_offset += i->current.group
392 ->total_sums[i->current.group->last];
393 i->current.uncompressed_offset += i->current.group
394 ->uncompressed_sums[i->current.group->last];
396 // Then go to the next group.
397 i->current.record = 0;
398 i->current.group = i->current.group->next;
404 /// Set *info from i->current.
406 set_info(const lzma_index *i, lzma_index_record *info)
408 info->total_size = i->current.group->total_sums[i->current.record];
409 info->uncompressed_size = i->current.group->uncompressed_sums[
412 info->stream_offset = i->current.stream_offset;
413 info->uncompressed_offset = i->current.uncompressed_offset;
415 // If it's not the first Record in this group, we need to do some
417 if (i->current.record > 0) {
418 // _sums[] are cumulative, thus we need to substract the
419 // _previous _sums[] to get the sizes of this Record.
420 info->total_size -= i->current.group
421 ->total_sums[i->current.record - 1];
422 info->uncompressed_size -= i->current.group
423 ->uncompressed_sums[i->current.record - 1];
425 // i->current.{total,uncompressed}_offsets have the offset
426 // of the beginning of the group, thus we need to add the
427 // appropriate amount to get the offsetes of this Record.
428 info->stream_offset += i->current.group
429 ->total_sums[i->current.record - 1];
430 info->uncompressed_offset += i->current.group
431 ->uncompressed_sums[i->current.record - 1];
438 extern LZMA_API lzma_bool
439 lzma_index_read(lzma_index *i, lzma_index_record *info)
441 if (i->current.group == NULL) {
442 // We are at the beginning of the Record list. Set up
443 // i->current point at the first Record. Return if there
448 // Try to go the next Record.
449 if (i->current.record < i->current.group->last)
451 else if (i->current.group->next == NULL)
455 } while (i->current.group->paddings[i->current.record]);
457 // We found a new Record. Set the information to *info.
465 lzma_index_rewind(lzma_index *i)
467 i->current.group = NULL;
472 extern LZMA_API lzma_bool
473 lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
475 // Check if it is possible to fullfill the request.
476 if (target >= i->uncompressed_size)
479 // Now we know that we will have an answer. Initialize the current
480 // read position if needed.
481 if (i->current.group == NULL && init_current(i))
484 // Locate the group where the wanted Block is. First search forward.
485 while (i->current.uncompressed_offset <= target) {
486 // If the first uncompressed byte of the next group is past
487 // the target offset, it has to be this or an earlier group.
488 if (i->current.uncompressed_offset + i->current.group
489 ->uncompressed_sums[i->current.group->last]
493 // Go forward to the next group.
497 // Then search backward.
498 while (i->current.uncompressed_offset > target)
501 // Now the target Block is somewhere in i->current.group. Offsets
502 // in groups are relative to the beginning of the group, thus
503 // we must adjust the target before starting the search loop.
504 assert(target >= i->current.uncompressed_offset);
505 target -= i->current.uncompressed_offset;
507 // Use binary search to locate the exact Record. It is the first
508 // Record whose uncompressed_sums[] value is greater than target.
509 // This is because we want the rightmost Record that fullfills the
510 // search criterion. It is possible that there are empty Blocks or
511 // padding, we don't want to return them.
513 size_t right = i->current.group->last;
515 while (left < right) {
516 const size_t pos = left + (right - left) / 2;
517 if (i->current.group->uncompressed_sums[pos] <= target)
523 i->current.record = left;
526 // The found Record must not be padding or have zero uncompressed size.
527 assert(!i->current.group->paddings[i->current.record]);
529 if (i->current.record == 0)
530 assert(i->current.group->uncompressed_sums[0] > 0);
532 assert(i->current.group->uncompressed_sums[i->current.record]
533 - i->current.group->uncompressed_sums[
534 i->current.record - 1] > 0);
543 extern LZMA_API lzma_ret
544 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
545 lzma_allocator *allocator, lzma_vli padding)
547 if (dest == NULL || src == NULL || dest == src
548 || padding > LZMA_VLI_VALUE_MAX)
549 return LZMA_PROG_ERROR;
551 // Check that the combined size of the Indexes stays within limits.
553 const lzma_vli dest_size = lzma_index_file_size(dest);
554 const lzma_vli src_size = lzma_index_file_size(src);
555 if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN
556 || dest_size + src_size + padding
557 > LZMA_VLI_VALUE_UNKNOWN)
558 return LZMA_DATA_ERROR;
561 // Add a padding Record to take into account the size of
562 // Index + Stream Footer + Stream Padding + Stream Header.
564 // NOTE: This cannot overflow, because Index Size is always
565 // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs
566 // (Index Size and padding) doesn't overflow. It may become
567 // an invalid VLI if padding is huge, but that is caught by
569 padding += index_size(dest->count - dest->old.count,
570 dest->index_list_size
571 - dest->old.index_list_size)
572 + LZMA_STREAM_HEADER_SIZE * 2;
574 // Add the padding Record.
575 return_if_error(index_append(
576 dest, allocator, padding, 0, true));
578 // Avoid wasting lots of memory if src->head has only a few records
579 // that fit into dest->tail. That is, combine two groups if possible.
581 // NOTE: We know that dest->tail != NULL since we just appended
582 // a padding Record. But we don't know about src->head.
583 if (src->head != NULL && src->head->last + 1
584 <= INDEX_GROUP_SIZE - dest->tail->last - 1) {
585 // Copy the first Record.
586 dest->tail->total_sums[dest->tail->last + 1]
587 = dest->tail->total_sums[dest->tail->last]
588 + src->head->total_sums[0];
590 dest->tail->uncompressed_sums[dest->tail->last + 1]
591 = dest->tail->uncompressed_sums[dest->tail->last]
592 + src->head->uncompressed_sums[0];
594 dest->tail->paddings[dest->tail->last + 1]
595 = src->head->paddings[0];
600 for (size_t i = 1; i < src->head->last; ++i) {
601 dest->tail->total_sums[dest->tail->last + 1]
602 = dest->tail->total_sums[dest->tail->last]
603 + src->head->total_sums[i + 1]
604 - src->head->total_sums[i];
606 dest->tail->uncompressed_sums[dest->tail->last + 1]
607 = dest->tail->uncompressed_sums[
609 + src->head->uncompressed_sums[i + 1]
610 - src->head->uncompressed_sums[i];
612 dest->tail->paddings[dest->tail->last + 1]
613 = src->head->paddings[i + 1];
618 // Free the head group of *src. Don't bother updating prev
619 // pointers since those won't be used for anything before
620 // we deallocate the whole *src structure.
621 lzma_index_group *tmp = src->head;
622 src->head = src->head->next;
623 lzma_free(tmp, allocator);
626 // If there are groups left in *src, join them as is. Note that if we
627 // are combining already combined Indexes, src->head can be non-NULL
628 // even if we just combined the old src->head to dest->tail.
629 if (src->head != NULL) {
630 src->head->prev = dest->tail;
631 dest->tail->next = src->head;
632 dest->tail = src->tail;
635 // Update information about earlier Indexes. Only the last Index
636 // from *src won't be counted in dest->old. The last Index is left
637 // open and can be even appended with lzma_index_append().
638 dest->old.count = dest->count + src->old.count;
639 dest->old.index_list_size
640 = dest->index_list_size + src->old.index_list_size;
642 // Update overall information.
643 dest->total_size += src->total_size;
644 dest->uncompressed_size += src->uncompressed_size;
645 dest->count += src->count;
646 dest->index_list_size += src->index_list_size;
647 dest->padding_size += src->padding_size;
649 // *src has nothing left but the base structure.
650 lzma_free(src, allocator);
656 extern LZMA_API lzma_index *
657 lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
659 lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
663 // Copy the base structure except the pointers.
667 dest->current.group = NULL;
670 const lzma_index_group *src_group = src->head;
671 while (src_group != NULL) {
672 // Allocate a new group.
673 lzma_index_group *dest_group = lzma_alloc(
674 sizeof(lzma_index_group), allocator);
675 if (dest_group == NULL) {
676 lzma_index_end(dest, allocator);
681 dest_group->prev = dest->tail;
682 dest_group->next = NULL;
684 if (dest->head == NULL)
685 dest->head = dest_group;
687 dest->tail->next = dest_group;
689 dest->tail = dest_group;
691 dest_group->last = src_group->last;
693 // Copy the arrays so that we don't read uninitialized memory.
694 const size_t count = src_group->last + 1;
695 memcpy(dest_group->total_sums, src_group->total_sums,
696 sizeof(lzma_vli) * count);
697 memcpy(dest_group->uncompressed_sums,
698 src_group->uncompressed_sums,
699 sizeof(lzma_vli) * count);
700 memcpy(dest_group->paddings, src_group->paddings,
701 sizeof(bool) * count);
703 // Copy also the read position.
704 if (src_group == src->current.group)
705 dest->current.group = dest->tail;
707 src_group = src_group->next;
714 extern LZMA_API lzma_bool
715 lzma_index_equal(const lzma_index *a, const lzma_index *b)
717 // No point to compare more if the pointers are the same.
721 // Compare the basic properties.
722 if (a->total_size != b->total_size
723 || a->uncompressed_size != b->uncompressed_size
724 || a->index_list_size != b->index_list_size
725 || a->count != b->count)
728 // Compare the Records.
729 const lzma_index_group *ag = a->head;
730 const lzma_index_group *bg = b->head;
731 while (ag != NULL && bg != NULL) {
732 const size_t count = ag->last + 1;
733 if (ag->last != bg->last
734 || memcmp(ag->total_sums,
736 sizeof(lzma_vli) * count) != 0
737 || memcmp(ag->uncompressed_sums,
738 bg->uncompressed_sums,
739 sizeof(lzma_vli) * count) != 0
740 || memcmp(ag->paddings, bg->paddings,
741 sizeof(bool) * count) != 0)
748 return ag == NULL && bg == NULL;