1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Handling of Index
6 // Author: Lasse Collin
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
16 /// Number of Records to allocate at once in the unrolled list.
17 #define INDEX_GROUP_SIZE 256
20 typedef struct lzma_index_group_s lzma_index_group;
21 struct lzma_index_group_s {
23 lzma_index_group *prev;
26 lzma_index_group *next;
28 /// Index of the last Record in this group
31 /// Unpadded Size fields as special cumulative sum relative to the
32 /// beginning of the group. It's special in sense that the previous
33 /// value is rounded up the next multiple of four with before
34 /// calculating the new value. The total encoded size of the Blocks
35 /// in the group is unpadded_sums[last] rounded up to the next
38 /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored
39 /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total
40 /// encoded size of these Blocks is 184.
42 /// This encoding is nice from point of view of lzma_index_locate().
43 lzma_vli unpadded_sums[INDEX_GROUP_SIZE];
45 /// Uncompressed Size fields as cumulative sum relative to the
46 /// beginning of the group. The uncompressed size of the group is
47 /// uncompressed_sums[last].
48 lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
50 /// True if the Record is padding
51 bool paddings[INDEX_GROUP_SIZE];
56 /// Total size of the Blocks and padding
59 /// Uncompressed size of the Stream
60 lzma_vli uncompressed_size;
62 /// Number of non-padding records. This is needed for Index encoder.
65 /// Size of the List of Records field; this is updated every time
66 /// a new non-padding Record is added.
67 lzma_vli index_list_size;
69 /// First group of Records
70 lzma_index_group *head;
72 /// Last group of Records
73 lzma_index_group *tail;
75 /// Tracking the read position
77 /// Group where the current read position is.
78 lzma_index_group *group;
80 /// The most recently read Record in *group
83 /// Uncompressed offset of the beginning of *group relative
84 /// to the beginning of the Stream
85 lzma_vli uncompressed_offset;
87 /// Compressed offset of the beginning of *group relative
88 /// to the beginning of the Stream
89 lzma_vli stream_offset;
92 /// Information about earlier Indexes when multiple Indexes have
95 /// Sum of the Record counts of the all but the last Stream.
98 /// Sum of the List of Records fields of all but the last
99 /// Stream. This is needed when a new Index is concatenated
100 /// to this lzma_index structure.
101 lzma_vli index_list_size;
103 /// Total size of all but the last Stream and all Stream
105 lzma_vli streams_size;
110 extern LZMA_API(lzma_vli)
111 lzma_index_memusage(lzma_vli count)
113 if (count > LZMA_VLI_MAX)
116 return sizeof(lzma_index) + (count + INDEX_GROUP_SIZE - 1)
117 / INDEX_GROUP_SIZE * sizeof(lzma_index_group);
122 free_index_list(lzma_index *i, lzma_allocator *allocator)
124 lzma_index_group *g = i->head;
127 lzma_index_group *tmp = g->next;
128 lzma_free(g, allocator);
136 extern LZMA_API(lzma_index *)
137 lzma_index_init(lzma_index *i, lzma_allocator *allocator)
140 i = lzma_alloc(sizeof(lzma_index), allocator);
144 free_index_list(i, allocator);
148 i->uncompressed_size = 0;
150 i->index_list_size = 0;
153 i->current.group = NULL;
155 i->old.index_list_size = 0;
156 i->old.streams_size = 0;
162 extern LZMA_API(void)
163 lzma_index_end(lzma_index *i, lzma_allocator *allocator)
166 free_index_list(i, allocator);
167 lzma_free(i, allocator);
174 extern LZMA_API(lzma_vli)
175 lzma_index_count(const lzma_index *i)
181 extern LZMA_API(lzma_vli)
182 lzma_index_size(const lzma_index *i)
184 return index_size(i->count, i->index_list_size);
188 extern LZMA_API(lzma_vli)
189 lzma_index_total_size(const lzma_index *i)
191 return i->total_size;
195 extern LZMA_API(lzma_vli)
196 lzma_index_stream_size(const lzma_index *i)
198 // Stream Header + Blocks + Index + Stream Footer
199 return LZMA_STREAM_HEADER_SIZE + i->total_size
200 + index_size(i->count, i->index_list_size)
201 + LZMA_STREAM_HEADER_SIZE;
205 extern LZMA_API(lzma_vli)
206 lzma_index_file_size(const lzma_index *i)
208 // If multiple Streams are concatenated, the Stream Header, Index,
209 // and Stream Footer fields of all but the last Stream are already
210 // included in old.streams_size. Thus, we need to calculate only the
211 // size of the last Index, not all Indexes.
212 return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size
213 + index_size(i->count - i->old.count,
214 i->index_list_size - i->old.index_list_size)
215 + LZMA_STREAM_HEADER_SIZE;
219 extern LZMA_API(lzma_vli)
220 lzma_index_uncompressed_size(const lzma_index *i)
222 return i->uncompressed_size;
227 lzma_index_padding_size(const lzma_index *i)
229 return (LZMA_VLI_C(4)
230 - index_size_unpadded(i->count, i->index_list_size)) & 3;
234 /// Appends a new Record to the Index. If needed, this allocates a new
237 index_append_real(lzma_index *i, lzma_allocator *allocator,
238 lzma_vli unpadded_size, lzma_vli uncompressed_size,
241 // Add the new record.
242 if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
243 // Allocate a new group.
244 lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
247 return LZMA_MEM_ERROR;
249 // Initialize the group and set its first record.
253 g->unpadded_sums[0] = unpadded_size;
254 g->uncompressed_sums[0] = uncompressed_size;
255 g->paddings[0] = is_padding;
257 // If this is the first group, make it the head.
263 // Make it the new tail.
267 // i->tail has space left for at least one record.
268 i->tail->unpadded_sums[i->tail->last + 1]
269 = unpadded_size + vli_ceil4(
270 i->tail->unpadded_sums[i->tail->last]);
271 i->tail->uncompressed_sums[i->tail->last + 1]
272 = i->tail->uncompressed_sums[i->tail->last]
274 i->tail->paddings[i->tail->last + 1] = is_padding;
282 extern LZMA_API(lzma_ret)
283 lzma_index_append(lzma_index *i, lzma_allocator *allocator,
284 lzma_vli unpadded_size, lzma_vli uncompressed_size)
286 if (unpadded_size < UNPADDED_SIZE_MIN
287 || unpadded_size > UNPADDED_SIZE_MAX
288 || uncompressed_size > LZMA_VLI_MAX)
289 return LZMA_PROG_ERROR;
291 // This looks a bit ugly. We want to first validate that the Index
292 // and Stream stay in valid limits after adding this Record. After
293 // validating, we may need to allocate a new lzma_index_group (it's
294 // slightly more correct to validate before allocating, YMMV).
297 // First update the overall info so we can validate it.
298 const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size)
299 + lzma_vli_size(uncompressed_size);
301 const lzma_vli total_size = vli_ceil4(unpadded_size);
303 i->total_size += total_size;
304 i->uncompressed_size += uncompressed_size;
306 i->index_list_size += index_list_size_add;
308 if (i->total_size > LZMA_VLI_MAX
309 || i->uncompressed_size > LZMA_VLI_MAX
310 || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
311 || lzma_index_file_size(i) > LZMA_VLI_MAX)
312 ret = LZMA_DATA_ERROR; // Would grow past the limits.
314 ret = index_append_real(i, allocator, unpadded_size,
315 uncompressed_size, false);
317 if (ret != LZMA_OK) {
318 // Something went wrong. Undo the updates.
319 i->total_size -= total_size;
320 i->uncompressed_size -= uncompressed_size;
322 i->index_list_size -= index_list_size_add;
329 /// Initialize i->current to point to the first Record.
331 init_current(lzma_index *i)
333 if (i->head == NULL) {
334 assert(i->count == 0);
338 assert(i->count > 0);
340 i->current.group = i->head;
341 i->current.record = 0;
342 i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
343 i->current.uncompressed_offset = 0;
349 /// Go backward to the previous group.
351 previous_group(lzma_index *i)
353 assert(i->current.group->prev != NULL);
355 // Go to the previous group first.
356 i->current.group = i->current.group->prev;
357 i->current.record = i->current.group->last;
359 // Then update the offsets.
360 i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[
361 i->current.group->last]);
362 i->current.uncompressed_offset -= i->current.group->uncompressed_sums[
363 i->current.group->last];
369 /// Go forward to the next group.
371 next_group(lzma_index *i)
373 assert(i->current.group->next != NULL);
375 // Update the offsets first.
376 i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[
377 i->current.group->last]);
378 i->current.uncompressed_offset += i->current.group
379 ->uncompressed_sums[i->current.group->last];
381 // Then go to the next group.
382 i->current.record = 0;
383 i->current.group = i->current.group->next;
389 /// Set *info from i->current.
391 set_info(const lzma_index *i, lzma_index_record *info)
393 // First copy the cumulative sizes from the current Record of the
396 = i->current.group->unpadded_sums[i->current.record];
397 info->total_size = vli_ceil4(info->unpadded_size);
398 info->uncompressed_size = i->current.group->uncompressed_sums[
401 // Copy the start offsets of this group.
402 info->stream_offset = i->current.stream_offset;
403 info->uncompressed_offset = i->current.uncompressed_offset;
405 // If it's not the first Record in this group, we need to do some
407 if (i->current.record > 0) {
408 // Since the _sums[] are cumulative, we substract the sums of
409 // the previous Record to get the sizes of the current Record,
410 // and add the sums of the previous Record to the offsets.
411 // With unpadded_sums[] we need to take into account that it
412 // uses a bit weird way to do the cumulative summing
413 const lzma_vli total_sum
414 = vli_ceil4(i->current.group->unpadded_sums[
415 i->current.record - 1]);
417 const lzma_vli uncompressed_sum = i->current.group
418 ->uncompressed_sums[i->current.record - 1];
420 info->total_size -= total_sum;
421 info->unpadded_size -= total_sum;
422 info->uncompressed_size -= uncompressed_sum;
424 info->stream_offset += total_sum;
425 info->uncompressed_offset += uncompressed_sum;
432 extern LZMA_API(lzma_bool)
433 lzma_index_read(lzma_index *i, lzma_index_record *info)
435 if (i->current.group == NULL) {
436 // We are at the beginning of the Record list. Set up
437 // i->current point at the first Record. Return if there
442 // Try to go the next Record.
443 if (i->current.record < i->current.group->last)
445 else if (i->current.group->next == NULL)
449 } while (i->current.group->paddings[i->current.record]);
451 // We found a new Record. Set the information to *info.
458 extern LZMA_API(void)
459 lzma_index_rewind(lzma_index *i)
461 i->current.group = NULL;
466 extern LZMA_API(lzma_bool)
467 lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
469 // Check if it is possible to fullfill the request.
470 if (target >= i->uncompressed_size)
473 // Now we know that we will have an answer. Initialize the current
474 // read position if needed.
475 if (i->current.group == NULL && init_current(i))
478 // Locate the group where the wanted Block is. First search forward.
479 while (i->current.uncompressed_offset <= target) {
480 // If the first uncompressed byte of the next group is past
481 // the target offset, it has to be this or an earlier group.
482 if (i->current.uncompressed_offset + i->current.group
483 ->uncompressed_sums[i->current.group->last]
487 // Go forward to the next group.
491 // Then search backward.
492 while (i->current.uncompressed_offset > target)
495 // Now the target Block is somewhere in i->current.group. Offsets
496 // in groups are relative to the beginning of the group, thus
497 // we must adjust the target before starting the search loop.
498 assert(target >= i->current.uncompressed_offset);
499 target -= i->current.uncompressed_offset;
501 // Use binary search to locate the exact Record. It is the first
502 // Record whose uncompressed_sums[] value is greater than target.
503 // This is because we want the rightmost Record that fullfills the
504 // search criterion. It is possible that there are empty Blocks or
505 // padding, we don't want to return them.
507 size_t right = i->current.group->last;
509 while (left < right) {
510 const size_t pos = left + (right - left) / 2;
511 if (i->current.group->uncompressed_sums[pos] <= target)
517 i->current.record = left;
520 // The found Record must not be padding or have zero uncompressed size.
521 assert(!i->current.group->paddings[i->current.record]);
523 if (i->current.record == 0)
524 assert(i->current.group->uncompressed_sums[0] > 0);
526 assert(i->current.group->uncompressed_sums[i->current.record]
527 - i->current.group->uncompressed_sums[
528 i->current.record - 1] > 0);
537 extern LZMA_API(lzma_ret)
538 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
539 lzma_allocator *allocator, lzma_vli padding)
541 if (dest == NULL || src == NULL || dest == src
542 || padding > LZMA_VLI_MAX)
543 return LZMA_PROG_ERROR;
545 // Check that the combined size of the Indexes stays within limits.
547 const lzma_vli dest_size = index_size_unpadded(
548 dest->count, dest->index_list_size);
549 const lzma_vli src_size = index_size_unpadded(
550 src->count, src->index_list_size);
551 if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
552 return LZMA_DATA_ERROR;
555 // Check that the combined size of the "files" (combined total
556 // encoded sizes) stays within limits.
558 const lzma_vli dest_size = lzma_index_file_size(dest);
559 const lzma_vli src_size = lzma_index_file_size(src);
560 if (dest_size + src_size > LZMA_VLI_MAX
561 || dest_size + src_size + padding
563 return LZMA_DATA_ERROR;
566 // Add a padding Record to take into account the size of
567 // Index + Stream Footer + Stream Padding + Stream Header.
569 // NOTE: This cannot overflow, because Index Size is always
570 // far smaller than LZMA_VLI_MAX, and adding two VLIs
571 // (Index Size and padding) doesn't overflow.
572 padding += index_size(dest->count - dest->old.count,
573 dest->index_list_size
574 - dest->old.index_list_size)
575 + LZMA_STREAM_HEADER_SIZE * 2;
577 // While the above cannot overflow, but it may become an invalid VLI.
578 if (padding > LZMA_VLI_MAX)
579 return LZMA_DATA_ERROR;
581 // Add the padding Record.
585 // First update the info so we can validate it.
586 dest->old.streams_size += padding;
588 if (dest->old.streams_size > LZMA_VLI_MAX
589 || lzma_index_file_size(dest) > LZMA_VLI_MAX)
590 ret = LZMA_DATA_ERROR; // Would grow past the limits.
592 ret = index_append_real(dest, allocator,
595 // If something went wrong, undo the updated value and return
597 if (ret != LZMA_OK) {
598 dest->old.streams_size -= padding;
603 // Avoid wasting lots of memory if src->head has only a few records
604 // that fit into dest->tail. That is, combine two groups if possible.
606 // NOTE: We know that dest->tail != NULL since we just appended
607 // a padding Record. But we don't know about src->head.
608 if (src->head != NULL && src->head->last + 1
609 <= INDEX_GROUP_SIZE - dest->tail->last - 1) {
610 // Copy the first Record.
611 dest->tail->unpadded_sums[dest->tail->last + 1]
612 = vli_ceil4(dest->tail->unpadded_sums[
614 + src->head->unpadded_sums[0];
616 dest->tail->uncompressed_sums[dest->tail->last + 1]
617 = dest->tail->uncompressed_sums[dest->tail->last]
618 + src->head->uncompressed_sums[0];
620 dest->tail->paddings[dest->tail->last + 1]
621 = src->head->paddings[0];
626 for (size_t i = 1; i < src->head->last; ++i) {
627 dest->tail->unpadded_sums[dest->tail->last + 1]
628 = vli_ceil4(dest->tail->unpadded_sums[
630 + src->head->unpadded_sums[i + 1]
631 - src->head->unpadded_sums[i];
633 dest->tail->uncompressed_sums[dest->tail->last + 1]
634 = dest->tail->uncompressed_sums[
636 + src->head->uncompressed_sums[i + 1]
637 - src->head->uncompressed_sums[i];
639 dest->tail->paddings[dest->tail->last + 1]
640 = src->head->paddings[i + 1];
645 // Free the head group of *src. Don't bother updating prev
646 // pointers since those won't be used for anything before
647 // we deallocate the whole *src structure.
648 lzma_index_group *tmp = src->head;
649 src->head = src->head->next;
650 lzma_free(tmp, allocator);
653 // If there are groups left in *src, join them as is. Note that if we
654 // are combining already combined Indexes, src->head can be non-NULL
655 // even if we just combined the old src->head to dest->tail.
656 if (src->head != NULL) {
657 src->head->prev = dest->tail;
658 dest->tail->next = src->head;
659 dest->tail = src->tail;
662 // Update information about earlier Indexes. Only the last Index
663 // from *src won't be counted in dest->old. The last Index is left
664 // open and can be even appended with lzma_index_append().
665 dest->old.count = dest->count + src->old.count;
666 dest->old.index_list_size
667 = dest->index_list_size + src->old.index_list_size;
668 dest->old.streams_size += src->old.streams_size;
670 // Update overall information.
671 dest->total_size += src->total_size;
672 dest->uncompressed_size += src->uncompressed_size;
673 dest->count += src->count;
674 dest->index_list_size += src->index_list_size;
676 // *src has nothing left but the base structure.
677 lzma_free(src, allocator);
683 extern LZMA_API(lzma_index *)
684 lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
686 lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
690 // Copy the base structure except the pointers.
694 dest->current.group = NULL;
697 const lzma_index_group *src_group = src->head;
698 while (src_group != NULL) {
699 // Allocate a new group.
700 lzma_index_group *dest_group = lzma_alloc(
701 sizeof(lzma_index_group), allocator);
702 if (dest_group == NULL) {
703 lzma_index_end(dest, allocator);
708 dest_group->prev = dest->tail;
709 dest_group->next = NULL;
711 if (dest->head == NULL)
712 dest->head = dest_group;
714 dest->tail->next = dest_group;
716 dest->tail = dest_group;
718 dest_group->last = src_group->last;
720 // Copy the arrays so that we don't read uninitialized memory.
721 const size_t count = src_group->last + 1;
722 memcpy(dest_group->unpadded_sums, src_group->unpadded_sums,
723 sizeof(lzma_vli) * count);
724 memcpy(dest_group->uncompressed_sums,
725 src_group->uncompressed_sums,
726 sizeof(lzma_vli) * count);
727 memcpy(dest_group->paddings, src_group->paddings,
728 sizeof(bool) * count);
730 // Copy also the read position.
731 if (src_group == src->current.group)
732 dest->current.group = dest->tail;
734 src_group = src_group->next;
741 extern LZMA_API(lzma_bool)
742 lzma_index_equal(const lzma_index *a, const lzma_index *b)
744 // No point to compare more if the pointers are the same.
748 // Compare the basic properties.
749 if (a->total_size != b->total_size
750 || a->uncompressed_size != b->uncompressed_size
751 || a->index_list_size != b->index_list_size
752 || a->count != b->count)
755 // Compare the Records.
756 const lzma_index_group *ag = a->head;
757 const lzma_index_group *bg = b->head;
758 while (ag != NULL && bg != NULL) {
759 const size_t count = ag->last + 1;
760 if (ag->last != bg->last
761 || memcmp(ag->unpadded_sums,
763 sizeof(lzma_vli) * count) != 0
764 || memcmp(ag->uncompressed_sums,
765 bg->uncompressed_sums,
766 sizeof(lzma_vli) * count) != 0
767 || memcmp(ag->paddings, bg->paddings,
768 sizeof(bool) * count) != 0)
775 return ag == NULL && bg == NULL;