1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Handling of Index
6 // Copyright (C) 2007 Lasse Collin
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
18 ///////////////////////////////////////////////////////////////////////////////
23 /// Number of Records to allocate at once.
24 #define INDEX_GROUP_SIZE 256
27 typedef struct lzma_index_group_s lzma_index_group;
28 struct lzma_index_group_s {
30 lzma_index_group *prev;
33 lzma_index_group *next;
35 /// Index of the last Record in this group
38 /// Total Size fields as cumulative sum relative to the beginning
39 /// of the group. The total size of the group is total_sums[last].
40 lzma_vli total_sums[INDEX_GROUP_SIZE];
42 /// Uncompressed Size fields as cumulative sum relative to the
43 /// beginning of the group. The uncompressed size of the group is
44 /// uncompressed_sums[last].
45 lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
47 /// True if the Record is padding
48 bool paddings[INDEX_GROUP_SIZE];
53 /// Total size of the Blocks and padding
56 /// Uncompressed size of the Stream
57 lzma_vli uncompressed_size;
59 /// Number of non-padding records. This is needed by Index encoder.
62 /// Size of the List of Records field; this is updated every time
63 /// a new non-padding Record is added.
64 lzma_vli index_list_size;
66 /// This is zero if no Indexes have been combined with
67 /// lzma_index_cat(). With combined Indexes, this contains the sizes
68 /// of all but latest the Streams, including possible Stream Padding
70 lzma_vli padding_size;
72 /// First group of Records
73 lzma_index_group *head;
75 /// Last group of Records
76 lzma_index_group *tail;
78 /// Tracking the read position
80 /// Group where the current read position is.
81 lzma_index_group *group;
83 /// The most recently read record in *group
86 /// Uncompressed offset of the beginning of *group relative
87 /// to the beginning of the Stream
88 lzma_vli uncompressed_offset;
90 /// Compressed offset of the beginning of *group relative
91 /// to the beginning of the Stream
92 lzma_vli stream_offset;
95 /// Information about earlier Indexes when multiple Indexes have
98 /// Sum of the Record counts of the all but the last Stream.
101 /// Sum of the List of Records fields of all but the last
102 /// Stream. This is needed when a new Index is concatenated
103 /// to this lzma_index structure.
104 lzma_vli index_list_size;
110 free_index_list(lzma_index *i, lzma_allocator *allocator)
112 lzma_index_group *g = i->head;
115 lzma_index_group *tmp = g->next;
116 lzma_free(g, allocator);
124 extern LZMA_API lzma_index *
125 lzma_index_init(lzma_index *i, lzma_allocator *allocator)
128 i = lzma_alloc(sizeof(lzma_index), allocator);
132 free_index_list(i, allocator);
136 i->uncompressed_size = 0;
138 i->index_list_size = 0;
142 i->current.group = NULL;
144 i->old.index_list_size = 0;
151 lzma_index_end(lzma_index *i, lzma_allocator *allocator)
154 free_index_list(i, allocator);
155 lzma_free(i, allocator);
162 extern LZMA_API lzma_vli
163 lzma_index_count(const lzma_index *i)
169 extern LZMA_API lzma_vli
170 lzma_index_size(const lzma_index *i)
172 return index_size(i->count, i->index_list_size);
176 extern LZMA_API lzma_vli
177 lzma_index_total_size(const lzma_index *i)
179 return i->total_size;
183 extern LZMA_API lzma_vli
184 lzma_index_stream_size(const lzma_index *i)
186 // Stream Header + Blocks + Index + Stream Footer
187 return LZMA_STREAM_HEADER_SIZE + i->total_size
188 + index_size(i->count, i->index_list_size)
189 + LZMA_STREAM_HEADER_SIZE;
193 extern LZMA_API lzma_vli
194 lzma_index_file_size(const lzma_index *i)
196 // If multiple Streams are concatenated, the Stream Header, Index,
197 // and Stream Footer fields of all but the last Stream are already
198 // included in padding_size. Thus, we need to calculate only the
199 // size of the last Index, not all Indexes.
200 return i->total_size + i->padding_size
201 + index_size(i->count - i->old.count,
202 i->index_list_size - i->old.index_list_size)
203 + LZMA_STREAM_HEADER_SIZE * 2;
207 extern LZMA_API lzma_vli
208 lzma_index_uncompressed_size(const lzma_index *i)
210 return i->uncompressed_size;
215 lzma_index_padding_size(const lzma_index *i)
217 return (LZMA_VLI_C(4)
218 - index_size_unpadded(i->count, i->index_list_size)) & 3;
222 /// Helper function for index_append()
224 index_append_real(lzma_index *i, lzma_allocator *allocator,
225 lzma_vli total_size, lzma_vli uncompressed_size,
228 // Add the new record.
229 if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
230 // Allocate a new group.
231 lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
234 return LZMA_MEM_ERROR;
236 // Initialize the group and set its first record.
240 g->total_sums[0] = total_size;
241 g->uncompressed_sums[0] = uncompressed_size;
242 g->paddings[0] = is_padding;
244 // If this is the first group, make it the head.
250 // Make it the new tail.
254 // i->tail has space left for at least one record.
255 i->tail->total_sums[i->tail->last + 1]
256 = i->tail->total_sums[i->tail->last]
258 i->tail->uncompressed_sums[i->tail->last + 1]
259 = i->tail->uncompressed_sums[i->tail->last]
261 i->tail->paddings[i->tail->last + 1] = is_padding;
270 index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
271 lzma_vli uncompressed_size, bool is_padding)
273 if (total_size > LZMA_VLI_MAX
274 || uncompressed_size > LZMA_VLI_MAX)
275 return LZMA_DATA_ERROR;
277 // This looks a bit ugly. We want to first validate that the Index
278 // and Stream stay in valid limits after adding this Record. After
279 // validating, we may need to allocate a new lzma_index_group (it's
280 // slightly more correct to validate before allocating, YMMV).
284 assert(uncompressed_size == 0);
286 // First update the info so we can validate it.
287 i->padding_size += total_size;
289 if (i->padding_size > LZMA_VLI_MAX
290 || lzma_index_file_size(i) > LZMA_VLI_MAX)
291 ret = LZMA_DATA_ERROR; // Would grow past the limits.
293 ret = index_append_real(i, allocator,
294 total_size, uncompressed_size, true);
296 // If something went wrong, undo the updated value.
298 i->padding_size -= total_size;
301 // First update the overall info so we can validate it.
302 const lzma_vli index_list_size_add
303 = lzma_vli_size(total_size / 4 - 1)
304 + lzma_vli_size(uncompressed_size);
306 i->total_size += total_size;
307 i->uncompressed_size += uncompressed_size;
309 i->index_list_size += index_list_size_add;
311 if (i->total_size > LZMA_VLI_MAX
312 || i->uncompressed_size > LZMA_VLI_MAX
313 || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
314 || lzma_index_file_size(i) > LZMA_VLI_MAX)
315 ret = LZMA_DATA_ERROR; // Would grow past the limits.
317 ret = index_append_real(i, allocator,
318 total_size, uncompressed_size, false);
320 if (ret != LZMA_OK) {
321 // Something went wrong. Undo the updates.
322 i->total_size -= total_size;
323 i->uncompressed_size -= uncompressed_size;
325 i->index_list_size -= index_list_size_add;
333 extern LZMA_API lzma_ret
334 lzma_index_append(lzma_index *i, lzma_allocator *allocator,
335 lzma_vli total_size, lzma_vli uncompressed_size)
337 return index_append(i, allocator,
338 total_size, uncompressed_size, false);
342 /// Initialize i->current to point to the first Record.
344 init_current(lzma_index *i)
346 if (i->head == NULL) {
347 assert(i->count == 0);
351 assert(i->count > 0);
353 i->current.group = i->head;
354 i->current.record = 0;
355 i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
356 i->current.uncompressed_offset = 0;
362 /// Go backward to the previous group.
364 previous_group(lzma_index *i)
366 assert(i->current.group->prev != NULL);
368 // Go to the previous group first.
369 i->current.group = i->current.group->prev;
370 i->current.record = i->current.group->last;
372 // Then update the offsets.
373 i->current.stream_offset -= i->current.group
374 ->total_sums[i->current.group->last];
375 i->current.uncompressed_offset -= i->current.group
376 ->uncompressed_sums[i->current.group->last];
382 /// Go forward to the next group.
384 next_group(lzma_index *i)
386 assert(i->current.group->next != NULL);
388 // Update the offsets first.
389 i->current.stream_offset += i->current.group
390 ->total_sums[i->current.group->last];
391 i->current.uncompressed_offset += i->current.group
392 ->uncompressed_sums[i->current.group->last];
394 // Then go to the next group.
395 i->current.record = 0;
396 i->current.group = i->current.group->next;
402 /// Set *info from i->current.
404 set_info(const lzma_index *i, lzma_index_record *info)
406 info->total_size = i->current.group->total_sums[i->current.record];
407 info->uncompressed_size = i->current.group->uncompressed_sums[
410 info->stream_offset = i->current.stream_offset;
411 info->uncompressed_offset = i->current.uncompressed_offset;
413 // If it's not the first Record in this group, we need to do some
415 if (i->current.record > 0) {
416 // _sums[] are cumulative, thus we need to substract the
417 // _previous _sums[] to get the sizes of this Record.
418 info->total_size -= i->current.group
419 ->total_sums[i->current.record - 1];
420 info->uncompressed_size -= i->current.group
421 ->uncompressed_sums[i->current.record - 1];
423 // i->current.{total,uncompressed}_offsets have the offset
424 // of the beginning of the group, thus we need to add the
425 // appropriate amount to get the offsetes of this Record.
426 info->stream_offset += i->current.group
427 ->total_sums[i->current.record - 1];
428 info->uncompressed_offset += i->current.group
429 ->uncompressed_sums[i->current.record - 1];
436 extern LZMA_API lzma_bool
437 lzma_index_read(lzma_index *i, lzma_index_record *info)
439 if (i->current.group == NULL) {
440 // We are at the beginning of the Record list. Set up
441 // i->current point at the first Record. Return if there
446 // Try to go the next Record.
447 if (i->current.record < i->current.group->last)
449 else if (i->current.group->next == NULL)
453 } while (i->current.group->paddings[i->current.record]);
455 // We found a new Record. Set the information to *info.
463 lzma_index_rewind(lzma_index *i)
465 i->current.group = NULL;
470 extern LZMA_API lzma_bool
471 lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
473 // Check if it is possible to fullfill the request.
474 if (target >= i->uncompressed_size)
477 // Now we know that we will have an answer. Initialize the current
478 // read position if needed.
479 if (i->current.group == NULL && init_current(i))
482 // Locate the group where the wanted Block is. First search forward.
483 while (i->current.uncompressed_offset <= target) {
484 // If the first uncompressed byte of the next group is past
485 // the target offset, it has to be this or an earlier group.
486 if (i->current.uncompressed_offset + i->current.group
487 ->uncompressed_sums[i->current.group->last]
491 // Go forward to the next group.
495 // Then search backward.
496 while (i->current.uncompressed_offset > target)
499 // Now the target Block is somewhere in i->current.group. Offsets
500 // in groups are relative to the beginning of the group, thus
501 // we must adjust the target before starting the search loop.
502 assert(target >= i->current.uncompressed_offset);
503 target -= i->current.uncompressed_offset;
505 // Use binary search to locate the exact Record. It is the first
506 // Record whose uncompressed_sums[] value is greater than target.
507 // This is because we want the rightmost Record that fullfills the
508 // search criterion. It is possible that there are empty Blocks or
509 // padding, we don't want to return them.
511 size_t right = i->current.group->last;
513 while (left < right) {
514 const size_t pos = left + (right - left) / 2;
515 if (i->current.group->uncompressed_sums[pos] <= target)
521 i->current.record = left;
524 // The found Record must not be padding or have zero uncompressed size.
525 assert(!i->current.group->paddings[i->current.record]);
527 if (i->current.record == 0)
528 assert(i->current.group->uncompressed_sums[0] > 0);
530 assert(i->current.group->uncompressed_sums[i->current.record]
531 - i->current.group->uncompressed_sums[
532 i->current.record - 1] > 0);
541 extern LZMA_API lzma_ret
542 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
543 lzma_allocator *allocator, lzma_vli padding)
545 if (dest == NULL || src == NULL || dest == src
546 || padding > LZMA_VLI_MAX)
547 return LZMA_PROG_ERROR;
549 // Check that the combined size of the Indexes stays within limits.
551 const lzma_vli dest_size = lzma_index_file_size(dest);
552 const lzma_vli src_size = lzma_index_file_size(src);
553 if (dest_size + src_size > LZMA_VLI_UNKNOWN
554 || dest_size + src_size + padding
556 return LZMA_DATA_ERROR;
559 // Add a padding Record to take into account the size of
560 // Index + Stream Footer + Stream Padding + Stream Header.
562 // NOTE: This cannot overflow, because Index Size is always
563 // far smaller than LZMA_VLI_MAX, and adding two VLIs
564 // (Index Size and padding) doesn't overflow. It may become
565 // an invalid VLI if padding is huge, but that is caught by
567 padding += index_size(dest->count - dest->old.count,
568 dest->index_list_size
569 - dest->old.index_list_size)
570 + LZMA_STREAM_HEADER_SIZE * 2;
572 // Add the padding Record.
573 return_if_error(index_append(
574 dest, allocator, padding, 0, true));
576 // Avoid wasting lots of memory if src->head has only a few records
577 // that fit into dest->tail. That is, combine two groups if possible.
579 // NOTE: We know that dest->tail != NULL since we just appended
580 // a padding Record. But we don't know about src->head.
581 if (src->head != NULL && src->head->last + 1
582 <= INDEX_GROUP_SIZE - dest->tail->last - 1) {
583 // Copy the first Record.
584 dest->tail->total_sums[dest->tail->last + 1]
585 = dest->tail->total_sums[dest->tail->last]
586 + src->head->total_sums[0];
588 dest->tail->uncompressed_sums[dest->tail->last + 1]
589 = dest->tail->uncompressed_sums[dest->tail->last]
590 + src->head->uncompressed_sums[0];
592 dest->tail->paddings[dest->tail->last + 1]
593 = src->head->paddings[0];
598 for (size_t i = 1; i < src->head->last; ++i) {
599 dest->tail->total_sums[dest->tail->last + 1]
600 = dest->tail->total_sums[dest->tail->last]
601 + src->head->total_sums[i + 1]
602 - src->head->total_sums[i];
604 dest->tail->uncompressed_sums[dest->tail->last + 1]
605 = dest->tail->uncompressed_sums[
607 + src->head->uncompressed_sums[i + 1]
608 - src->head->uncompressed_sums[i];
610 dest->tail->paddings[dest->tail->last + 1]
611 = src->head->paddings[i + 1];
616 // Free the head group of *src. Don't bother updating prev
617 // pointers since those won't be used for anything before
618 // we deallocate the whole *src structure.
619 lzma_index_group *tmp = src->head;
620 src->head = src->head->next;
621 lzma_free(tmp, allocator);
624 // If there are groups left in *src, join them as is. Note that if we
625 // are combining already combined Indexes, src->head can be non-NULL
626 // even if we just combined the old src->head to dest->tail.
627 if (src->head != NULL) {
628 src->head->prev = dest->tail;
629 dest->tail->next = src->head;
630 dest->tail = src->tail;
633 // Update information about earlier Indexes. Only the last Index
634 // from *src won't be counted in dest->old. The last Index is left
635 // open and can be even appended with lzma_index_append().
636 dest->old.count = dest->count + src->old.count;
637 dest->old.index_list_size
638 = dest->index_list_size + src->old.index_list_size;
640 // Update overall information.
641 dest->total_size += src->total_size;
642 dest->uncompressed_size += src->uncompressed_size;
643 dest->count += src->count;
644 dest->index_list_size += src->index_list_size;
645 dest->padding_size += src->padding_size;
647 // *src has nothing left but the base structure.
648 lzma_free(src, allocator);
654 extern LZMA_API lzma_index *
655 lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
657 lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
661 // Copy the base structure except the pointers.
665 dest->current.group = NULL;
668 const lzma_index_group *src_group = src->head;
669 while (src_group != NULL) {
670 // Allocate a new group.
671 lzma_index_group *dest_group = lzma_alloc(
672 sizeof(lzma_index_group), allocator);
673 if (dest_group == NULL) {
674 lzma_index_end(dest, allocator);
679 dest_group->prev = dest->tail;
680 dest_group->next = NULL;
682 if (dest->head == NULL)
683 dest->head = dest_group;
685 dest->tail->next = dest_group;
687 dest->tail = dest_group;
689 dest_group->last = src_group->last;
691 // Copy the arrays so that we don't read uninitialized memory.
692 const size_t count = src_group->last + 1;
693 memcpy(dest_group->total_sums, src_group->total_sums,
694 sizeof(lzma_vli) * count);
695 memcpy(dest_group->uncompressed_sums,
696 src_group->uncompressed_sums,
697 sizeof(lzma_vli) * count);
698 memcpy(dest_group->paddings, src_group->paddings,
699 sizeof(bool) * count);
701 // Copy also the read position.
702 if (src_group == src->current.group)
703 dest->current.group = dest->tail;
705 src_group = src_group->next;
712 extern LZMA_API lzma_bool
713 lzma_index_equal(const lzma_index *a, const lzma_index *b)
715 // No point to compare more if the pointers are the same.
719 // Compare the basic properties.
720 if (a->total_size != b->total_size
721 || a->uncompressed_size != b->uncompressed_size
722 || a->index_list_size != b->index_list_size
723 || a->count != b->count)
726 // Compare the Records.
727 const lzma_index_group *ag = a->head;
728 const lzma_index_group *bg = b->head;
729 while (ag != NULL && bg != NULL) {
730 const size_t count = ag->last + 1;
731 if (ag->last != bg->last
732 || memcmp(ag->total_sums,
734 sizeof(lzma_vli) * count) != 0
735 || memcmp(ag->uncompressed_sums,
736 bg->uncompressed_sums,
737 sizeof(lzma_vli) * count) != 0
738 || memcmp(ag->paddings, bg->paddings,
739 sizeof(bool) * count) != 0)
746 return ag == NULL && bg == NULL;