1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Handling of Index
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
23 /// Number of Records to allocate at once in the unrolled list.
24 #define INDEX_GROUP_SIZE 256
27 typedef struct lzma_index_group_s lzma_index_group;
28 struct lzma_index_group_s {
30 lzma_index_group *prev;
33 lzma_index_group *next;
35 /// Index of the last Record in this group
38 /// Unpadded Size fields as special cumulative sum relative to the
39 /// beginning of the group. It's special in sense that the previous
40 /// value is rounded up the next multiple of four with before
41 /// calculating the new value. The total encoded size of the Blocks
42 /// in the group is unpadded_sums[last] rounded up to the next
45 /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored
46 /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total
47 /// encoded size of these Blocks is 184.
49 /// This encoding is nice from point of view of lzma_index_locate().
50 lzma_vli unpadded_sums[INDEX_GROUP_SIZE];
52 /// Uncompressed Size fields as cumulative sum relative to the
53 /// beginning of the group. The uncompressed size of the group is
54 /// uncompressed_sums[last].
55 lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
57 /// True if the Record is padding
58 bool paddings[INDEX_GROUP_SIZE];
63 /// Total size of the Blocks and padding
66 /// Uncompressed size of the Stream
67 lzma_vli uncompressed_size;
69 /// Number of non-padding records. This is needed for Index encoder.
72 /// Size of the List of Records field; this is updated every time
73 /// a new non-padding Record is added.
74 lzma_vli index_list_size;
76 /// First group of Records
77 lzma_index_group *head;
79 /// Last group of Records
80 lzma_index_group *tail;
82 /// Tracking the read position
84 /// Group where the current read position is.
85 lzma_index_group *group;
87 /// The most recently read Record in *group
90 /// Uncompressed offset of the beginning of *group relative
91 /// to the beginning of the Stream
92 lzma_vli uncompressed_offset;
94 /// Compressed offset of the beginning of *group relative
95 /// to the beginning of the Stream
96 lzma_vli stream_offset;
99 /// Information about earlier Indexes when multiple Indexes have
102 /// Sum of the Record counts of the all but the last Stream.
105 /// Sum of the List of Records fields of all but the last
106 /// Stream. This is needed when a new Index is concatenated
107 /// to this lzma_index structure.
108 lzma_vli index_list_size;
110 /// Total size of all but the last Stream and all Stream
112 lzma_vli streams_size;
117 extern LZMA_API(lzma_vli)
118 lzma_index_memusage(lzma_vli count)
120 if (count > LZMA_VLI_MAX)
123 return sizeof(lzma_index) + (count + INDEX_GROUP_SIZE - 1)
124 / INDEX_GROUP_SIZE * sizeof(lzma_index_group);
129 free_index_list(lzma_index *i, lzma_allocator *allocator)
131 lzma_index_group *g = i->head;
134 lzma_index_group *tmp = g->next;
135 lzma_free(g, allocator);
143 extern LZMA_API(lzma_index *)
144 lzma_index_init(lzma_index *i, lzma_allocator *allocator)
147 i = lzma_alloc(sizeof(lzma_index), allocator);
151 free_index_list(i, allocator);
155 i->uncompressed_size = 0;
157 i->index_list_size = 0;
160 i->current.group = NULL;
162 i->old.index_list_size = 0;
163 i->old.streams_size = 0;
169 extern LZMA_API(void)
170 lzma_index_end(lzma_index *i, lzma_allocator *allocator)
173 free_index_list(i, allocator);
174 lzma_free(i, allocator);
181 extern LZMA_API(lzma_vli)
182 lzma_index_count(const lzma_index *i)
188 extern LZMA_API(lzma_vli)
189 lzma_index_size(const lzma_index *i)
191 return index_size(i->count, i->index_list_size);
195 extern LZMA_API(lzma_vli)
196 lzma_index_total_size(const lzma_index *i)
198 return i->total_size;
202 extern LZMA_API(lzma_vli)
203 lzma_index_stream_size(const lzma_index *i)
205 // Stream Header + Blocks + Index + Stream Footer
206 return LZMA_STREAM_HEADER_SIZE + i->total_size
207 + index_size(i->count, i->index_list_size)
208 + LZMA_STREAM_HEADER_SIZE;
212 extern LZMA_API(lzma_vli)
213 lzma_index_file_size(const lzma_index *i)
215 // If multiple Streams are concatenated, the Stream Header, Index,
216 // and Stream Footer fields of all but the last Stream are already
217 // included in old.streams_size. Thus, we need to calculate only the
218 // size of the last Index, not all Indexes.
219 return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size
220 + index_size(i->count - i->old.count,
221 i->index_list_size - i->old.index_list_size)
222 + LZMA_STREAM_HEADER_SIZE;
226 extern LZMA_API(lzma_vli)
227 lzma_index_uncompressed_size(const lzma_index *i)
229 return i->uncompressed_size;
234 lzma_index_padding_size(const lzma_index *i)
236 return (LZMA_VLI_C(4)
237 - index_size_unpadded(i->count, i->index_list_size)) & 3;
241 /// Appends a new Record to the Index. If needed, this allocates a new
244 index_append_real(lzma_index *i, lzma_allocator *allocator,
245 lzma_vli unpadded_size, lzma_vli uncompressed_size,
248 // Add the new record.
249 if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
250 // Allocate a new group.
251 lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
254 return LZMA_MEM_ERROR;
256 // Initialize the group and set its first record.
260 g->unpadded_sums[0] = unpadded_size;
261 g->uncompressed_sums[0] = uncompressed_size;
262 g->paddings[0] = is_padding;
264 // If this is the first group, make it the head.
270 // Make it the new tail.
274 // i->tail has space left for at least one record.
275 i->tail->unpadded_sums[i->tail->last + 1]
276 = unpadded_size + vli_ceil4(
277 i->tail->unpadded_sums[i->tail->last]);
278 i->tail->uncompressed_sums[i->tail->last + 1]
279 = i->tail->uncompressed_sums[i->tail->last]
281 i->tail->paddings[i->tail->last + 1] = is_padding;
289 extern LZMA_API(lzma_ret)
290 lzma_index_append(lzma_index *i, lzma_allocator *allocator,
291 lzma_vli unpadded_size, lzma_vli uncompressed_size)
293 if (unpadded_size < UNPADDED_SIZE_MIN
294 || unpadded_size > UNPADDED_SIZE_MAX
295 || uncompressed_size > LZMA_VLI_MAX)
296 return LZMA_PROG_ERROR;
298 // This looks a bit ugly. We want to first validate that the Index
299 // and Stream stay in valid limits after adding this Record. After
300 // validating, we may need to allocate a new lzma_index_group (it's
301 // slightly more correct to validate before allocating, YMMV).
304 // First update the overall info so we can validate it.
305 const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size)
306 + lzma_vli_size(uncompressed_size);
308 const lzma_vli total_size = vli_ceil4(unpadded_size);
310 i->total_size += total_size;
311 i->uncompressed_size += uncompressed_size;
313 i->index_list_size += index_list_size_add;
315 if (i->total_size > LZMA_VLI_MAX
316 || i->uncompressed_size > LZMA_VLI_MAX
317 || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
318 || lzma_index_file_size(i) > LZMA_VLI_MAX)
319 ret = LZMA_DATA_ERROR; // Would grow past the limits.
321 ret = index_append_real(i, allocator, unpadded_size,
322 uncompressed_size, false);
324 if (ret != LZMA_OK) {
325 // Something went wrong. Undo the updates.
326 i->total_size -= total_size;
327 i->uncompressed_size -= uncompressed_size;
329 i->index_list_size -= index_list_size_add;
336 /// Initialize i->current to point to the first Record.
338 init_current(lzma_index *i)
340 if (i->head == NULL) {
341 assert(i->count == 0);
345 assert(i->count > 0);
347 i->current.group = i->head;
348 i->current.record = 0;
349 i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
350 i->current.uncompressed_offset = 0;
356 /// Go backward to the previous group.
358 previous_group(lzma_index *i)
360 assert(i->current.group->prev != NULL);
362 // Go to the previous group first.
363 i->current.group = i->current.group->prev;
364 i->current.record = i->current.group->last;
366 // Then update the offsets.
367 i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[
368 i->current.group->last]);
369 i->current.uncompressed_offset -= i->current.group->uncompressed_sums[
370 i->current.group->last];
376 /// Go forward to the next group.
378 next_group(lzma_index *i)
380 assert(i->current.group->next != NULL);
382 // Update the offsets first.
383 i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[
384 i->current.group->last]);
385 i->current.uncompressed_offset += i->current.group
386 ->uncompressed_sums[i->current.group->last];
388 // Then go to the next group.
389 i->current.record = 0;
390 i->current.group = i->current.group->next;
396 /// Set *info from i->current.
398 set_info(const lzma_index *i, lzma_index_record *info)
400 // First copy the cumulative sizes from the current Record of the
403 = i->current.group->unpadded_sums[i->current.record];
404 info->total_size = vli_ceil4(info->unpadded_size);
405 info->uncompressed_size = i->current.group->uncompressed_sums[
408 // Copy the start offsets of this group.
409 info->stream_offset = i->current.stream_offset;
410 info->uncompressed_offset = i->current.uncompressed_offset;
412 // If it's not the first Record in this group, we need to do some
414 if (i->current.record > 0) {
415 // Since the _sums[] are cumulative, we substract the sums of
416 // the previous Record to get the sizes of the current Record,
417 // and add the sums of the previous Record to the offsets.
418 // With unpadded_sums[] we need to take into account that it
419 // uses a bit weird way to do the cumulative summing
420 const lzma_vli total_sum
421 = vli_ceil4(i->current.group->unpadded_sums[
422 i->current.record - 1]);
424 const lzma_vli uncompressed_sum = i->current.group
425 ->uncompressed_sums[i->current.record - 1];
427 info->total_size -= total_sum;
428 info->unpadded_size -= total_sum;
429 info->uncompressed_size -= uncompressed_sum;
431 info->stream_offset += total_sum;
432 info->uncompressed_offset += uncompressed_sum;
439 extern LZMA_API(lzma_bool)
440 lzma_index_read(lzma_index *i, lzma_index_record *info)
442 if (i->current.group == NULL) {
443 // We are at the beginning of the Record list. Set up
444 // i->current point at the first Record. Return if there
449 // Try to go the next Record.
450 if (i->current.record < i->current.group->last)
452 else if (i->current.group->next == NULL)
456 } while (i->current.group->paddings[i->current.record]);
458 // We found a new Record. Set the information to *info.
465 extern LZMA_API(void)
466 lzma_index_rewind(lzma_index *i)
468 i->current.group = NULL;
473 extern LZMA_API(lzma_bool)
474 lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
476 // Check if it is possible to fullfill the request.
477 if (target >= i->uncompressed_size)
480 // Now we know that we will have an answer. Initialize the current
481 // read position if needed.
482 if (i->current.group == NULL && init_current(i))
485 // Locate the group where the wanted Block is. First search forward.
486 while (i->current.uncompressed_offset <= target) {
487 // If the first uncompressed byte of the next group is past
488 // the target offset, it has to be this or an earlier group.
489 if (i->current.uncompressed_offset + i->current.group
490 ->uncompressed_sums[i->current.group->last]
494 // Go forward to the next group.
498 // Then search backward.
499 while (i->current.uncompressed_offset > target)
502 // Now the target Block is somewhere in i->current.group. Offsets
503 // in groups are relative to the beginning of the group, thus
504 // we must adjust the target before starting the search loop.
505 assert(target >= i->current.uncompressed_offset);
506 target -= i->current.uncompressed_offset;
508 // Use binary search to locate the exact Record. It is the first
509 // Record whose uncompressed_sums[] value is greater than target.
510 // This is because we want the rightmost Record that fullfills the
511 // search criterion. It is possible that there are empty Blocks or
512 // padding, we don't want to return them.
514 size_t right = i->current.group->last;
516 while (left < right) {
517 const size_t pos = left + (right - left) / 2;
518 if (i->current.group->uncompressed_sums[pos] <= target)
524 i->current.record = left;
527 // The found Record must not be padding or have zero uncompressed size.
528 assert(!i->current.group->paddings[i->current.record]);
530 if (i->current.record == 0)
531 assert(i->current.group->uncompressed_sums[0] > 0);
533 assert(i->current.group->uncompressed_sums[i->current.record]
534 - i->current.group->uncompressed_sums[
535 i->current.record - 1] > 0);
544 extern LZMA_API(lzma_ret)
545 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
546 lzma_allocator *allocator, lzma_vli padding)
548 if (dest == NULL || src == NULL || dest == src
549 || padding > LZMA_VLI_MAX)
550 return LZMA_PROG_ERROR;
552 // Check that the combined size of the Indexes stays within limits.
554 const lzma_vli dest_size = index_size_unpadded(
555 dest->count, dest->index_list_size);
556 const lzma_vli src_size = index_size_unpadded(
557 src->count, src->index_list_size);
558 if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
559 return LZMA_DATA_ERROR;
562 // Check that the combined size of the "files" (combined total
563 // encoded sizes) stays within limits.
565 const lzma_vli dest_size = lzma_index_file_size(dest);
566 const lzma_vli src_size = lzma_index_file_size(src);
567 if (dest_size + src_size > LZMA_VLI_MAX
568 || dest_size + src_size + padding
570 return LZMA_DATA_ERROR;
573 // Add a padding Record to take into account the size of
574 // Index + Stream Footer + Stream Padding + Stream Header.
576 // NOTE: This cannot overflow, because Index Size is always
577 // far smaller than LZMA_VLI_MAX, and adding two VLIs
578 // (Index Size and padding) doesn't overflow.
579 padding += index_size(dest->count - dest->old.count,
580 dest->index_list_size
581 - dest->old.index_list_size)
582 + LZMA_STREAM_HEADER_SIZE * 2;
584 // While the above cannot overflow, but it may become an invalid VLI.
585 if (padding > LZMA_VLI_MAX)
586 return LZMA_DATA_ERROR;
588 // Add the padding Record.
592 // First update the info so we can validate it.
593 dest->old.streams_size += padding;
595 if (dest->old.streams_size > LZMA_VLI_MAX
596 || lzma_index_file_size(dest) > LZMA_VLI_MAX)
597 ret = LZMA_DATA_ERROR; // Would grow past the limits.
599 ret = index_append_real(dest, allocator,
602 // If something went wrong, undo the updated value and return
604 if (ret != LZMA_OK) {
605 dest->old.streams_size -= padding;
610 // Avoid wasting lots of memory if src->head has only a few records
611 // that fit into dest->tail. That is, combine two groups if possible.
613 // NOTE: We know that dest->tail != NULL since we just appended
614 // a padding Record. But we don't know about src->head.
615 if (src->head != NULL && src->head->last + 1
616 <= INDEX_GROUP_SIZE - dest->tail->last - 1) {
617 // Copy the first Record.
618 dest->tail->unpadded_sums[dest->tail->last + 1]
619 = vli_ceil4(dest->tail->unpadded_sums[
621 + src->head->unpadded_sums[0];
623 dest->tail->uncompressed_sums[dest->tail->last + 1]
624 = dest->tail->uncompressed_sums[dest->tail->last]
625 + src->head->uncompressed_sums[0];
627 dest->tail->paddings[dest->tail->last + 1]
628 = src->head->paddings[0];
633 for (size_t i = 1; i < src->head->last; ++i) {
634 dest->tail->unpadded_sums[dest->tail->last + 1]
635 = vli_ceil4(dest->tail->unpadded_sums[
637 + src->head->unpadded_sums[i + 1]
638 - src->head->unpadded_sums[i];
640 dest->tail->uncompressed_sums[dest->tail->last + 1]
641 = dest->tail->uncompressed_sums[
643 + src->head->uncompressed_sums[i + 1]
644 - src->head->uncompressed_sums[i];
646 dest->tail->paddings[dest->tail->last + 1]
647 = src->head->paddings[i + 1];
652 // Free the head group of *src. Don't bother updating prev
653 // pointers since those won't be used for anything before
654 // we deallocate the whole *src structure.
655 lzma_index_group *tmp = src->head;
656 src->head = src->head->next;
657 lzma_free(tmp, allocator);
660 // If there are groups left in *src, join them as is. Note that if we
661 // are combining already combined Indexes, src->head can be non-NULL
662 // even if we just combined the old src->head to dest->tail.
663 if (src->head != NULL) {
664 src->head->prev = dest->tail;
665 dest->tail->next = src->head;
666 dest->tail = src->tail;
669 // Update information about earlier Indexes. Only the last Index
670 // from *src won't be counted in dest->old. The last Index is left
671 // open and can be even appended with lzma_index_append().
672 dest->old.count = dest->count + src->old.count;
673 dest->old.index_list_size
674 = dest->index_list_size + src->old.index_list_size;
675 dest->old.streams_size += src->old.streams_size;
677 // Update overall information.
678 dest->total_size += src->total_size;
679 dest->uncompressed_size += src->uncompressed_size;
680 dest->count += src->count;
681 dest->index_list_size += src->index_list_size;
683 // *src has nothing left but the base structure.
684 lzma_free(src, allocator);
690 extern LZMA_API(lzma_index *)
691 lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
693 lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
697 // Copy the base structure except the pointers.
701 dest->current.group = NULL;
704 const lzma_index_group *src_group = src->head;
705 while (src_group != NULL) {
706 // Allocate a new group.
707 lzma_index_group *dest_group = lzma_alloc(
708 sizeof(lzma_index_group), allocator);
709 if (dest_group == NULL) {
710 lzma_index_end(dest, allocator);
715 dest_group->prev = dest->tail;
716 dest_group->next = NULL;
718 if (dest->head == NULL)
719 dest->head = dest_group;
721 dest->tail->next = dest_group;
723 dest->tail = dest_group;
725 dest_group->last = src_group->last;
727 // Copy the arrays so that we don't read uninitialized memory.
728 const size_t count = src_group->last + 1;
729 memcpy(dest_group->unpadded_sums, src_group->unpadded_sums,
730 sizeof(lzma_vli) * count);
731 memcpy(dest_group->uncompressed_sums,
732 src_group->uncompressed_sums,
733 sizeof(lzma_vli) * count);
734 memcpy(dest_group->paddings, src_group->paddings,
735 sizeof(bool) * count);
737 // Copy also the read position.
738 if (src_group == src->current.group)
739 dest->current.group = dest->tail;
741 src_group = src_group->next;
748 extern LZMA_API(lzma_bool)
749 lzma_index_equal(const lzma_index *a, const lzma_index *b)
751 // No point to compare more if the pointers are the same.
755 // Compare the basic properties.
756 if (a->total_size != b->total_size
757 || a->uncompressed_size != b->uncompressed_size
758 || a->index_list_size != b->index_list_size
759 || a->count != b->count)
762 // Compare the Records.
763 const lzma_index_group *ag = a->head;
764 const lzma_index_group *bg = b->head;
765 while (ag != NULL && bg != NULL) {
766 const size_t count = ag->last + 1;
767 if (ag->last != bg->last
768 || memcmp(ag->unpadded_sums,
770 sizeof(lzma_vli) * count) != 0
771 || memcmp(ag->uncompressed_sums,
772 bg->uncompressed_sums,
773 sizeof(lzma_vli) * count) != 0
774 || memcmp(ag->paddings, bg->paddings,
775 sizeof(bool) * count) != 0)
782 return ag == NULL && bg == NULL;