]> icculus.org git repositories - icculus/xz.git/blob - src/liblzma/common/index.c
Renamed constants:
[icculus/xz.git] / src / liblzma / common / index.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       index.c
4 /// \brief      Handling of Index
5 //
6 //  Copyright (C) 2007 Lasse Collin
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2.1 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 ///////////////////////////////////////////////////////////////////////////////
19
20 #include "index.h"
21
22
23 /// Number of Records to allocate at once.
24 #define INDEX_GROUP_SIZE 256
25
26
27 typedef struct lzma_index_group_s lzma_index_group;
28 struct lzma_index_group_s {
29         /// Next group
30         lzma_index_group *prev;
31
32         /// Previous group
33         lzma_index_group *next;
34
35         /// Index of the last Record in this group
36         size_t last;
37
38         /// Total Size fields as cumulative sum relative to the beginning
39         /// of the group. The total size of the group is total_sums[last].
40         lzma_vli total_sums[INDEX_GROUP_SIZE];
41
42         /// Uncompressed Size fields as cumulative sum relative to the
43         /// beginning of the group. The uncompressed size of the group is
44         /// uncompressed_sums[last].
45         lzma_vli uncompressed_sums[INDEX_GROUP_SIZE];
46
47         /// True if the Record is padding
48         bool paddings[INDEX_GROUP_SIZE];
49 };
50
51
52 struct lzma_index_s {
53         /// Total size of the Blocks and padding
54         lzma_vli total_size;
55
56         /// Uncompressed size of the Stream
57         lzma_vli uncompressed_size;
58
59         /// Number of non-padding records. This is needed by Index encoder.
60         lzma_vli count;
61
62         /// Size of the List of Records field; this is updated every time
63         /// a new non-padding Record is added.
64         lzma_vli index_list_size;
65
66         /// This is zero if no Indexes have been combined with
67         /// lzma_index_cat(). With combined Indexes, this contains the sizes
68         /// of all but latest the Streams, including possible Stream Padding
69         /// fields.
70         lzma_vli padding_size;
71
72         /// First group of Records
73         lzma_index_group *head;
74
75         /// Last group of Records
76         lzma_index_group *tail;
77
78         /// Tracking the read position
79         struct {
80                 /// Group where the current read position is.
81                 lzma_index_group *group;
82
83                 /// The most recently read record in *group
84                 lzma_vli record;
85
86                 /// Uncompressed offset of the beginning of *group relative
87                 /// to the beginning of the Stream
88                 lzma_vli uncompressed_offset;
89
90                 /// Compressed offset of the beginning of *group relative
91                 /// to the beginning of the Stream
92                 lzma_vli stream_offset;
93         } current;
94
95         /// Information about earlier Indexes when multiple Indexes have
96         /// been combined.
97         struct {
98                 /// Sum of the Record counts of the all but the last Stream.
99                 lzma_vli count;
100
101                 /// Sum of the List of Records fields of all but the last
102                 /// Stream. This is needed when a new Index is concatenated
103                 /// to this lzma_index structure.
104                 lzma_vli index_list_size;
105         } old;
106 };
107
108
109 static void
110 free_index_list(lzma_index *i, lzma_allocator *allocator)
111 {
112         lzma_index_group *g = i->head;
113
114         while (g != NULL) {
115                 lzma_index_group *tmp = g->next;
116                 lzma_free(g, allocator);
117                 g = tmp;
118         }
119
120         return;
121 }
122
123
124 extern LZMA_API lzma_index *
125 lzma_index_init(lzma_index *i, lzma_allocator *allocator)
126 {
127         if (i == NULL) {
128                 i = lzma_alloc(sizeof(lzma_index), allocator);
129                 if (i == NULL)
130                         return NULL;
131         } else {
132                 free_index_list(i, allocator);
133         }
134
135         i->total_size = 0;
136         i->uncompressed_size = 0;
137         i->count = 0;
138         i->index_list_size = 0;
139         i->padding_size = 0;
140         i->head = NULL;
141         i->tail = NULL;
142         i->current.group = NULL;
143         i->old.count = 0;
144         i->old.index_list_size = 0;
145
146         return i;
147 }
148
149
150 extern LZMA_API void
151 lzma_index_end(lzma_index *i, lzma_allocator *allocator)
152 {
153         if (i != NULL) {
154                 free_index_list(i, allocator);
155                 lzma_free(i, allocator);
156         }
157
158         return;
159 }
160
161
162 extern LZMA_API lzma_vli
163 lzma_index_count(const lzma_index *i)
164 {
165         return i->count;
166 }
167
168
169 extern LZMA_API lzma_vli
170 lzma_index_size(const lzma_index *i)
171 {
172         return index_size(i->count, i->index_list_size);
173 }
174
175
176 extern LZMA_API lzma_vli
177 lzma_index_total_size(const lzma_index *i)
178 {
179         return i->total_size;
180 }
181
182
183 extern LZMA_API lzma_vli
184 lzma_index_stream_size(const lzma_index *i)
185 {
186         // Stream Header + Blocks + Index + Stream Footer
187         return LZMA_STREAM_HEADER_SIZE + i->total_size
188                         + index_size(i->count, i->index_list_size)
189                         + LZMA_STREAM_HEADER_SIZE;
190 }
191
192
193 extern LZMA_API lzma_vli
194 lzma_index_file_size(const lzma_index *i)
195 {
196         // If multiple Streams are concatenated, the Stream Header, Index,
197         // and Stream Footer fields of all but the last Stream are already
198         // included in padding_size. Thus, we need to calculate only the
199         // size of the last Index, not all Indexes.
200         return i->total_size + i->padding_size
201                         + index_size(i->count - i->old.count,
202                                 i->index_list_size - i->old.index_list_size)
203                         + LZMA_STREAM_HEADER_SIZE * 2;
204 }
205
206
207 extern LZMA_API lzma_vli
208 lzma_index_uncompressed_size(const lzma_index *i)
209 {
210         return i->uncompressed_size;
211 }
212
213
214 extern uint32_t
215 lzma_index_padding_size(const lzma_index *i)
216 {
217         return (LZMA_VLI_C(4)
218                 - index_size_unpadded(i->count, i->index_list_size)) & 3;
219 }
220
221
222 /// Helper function for index_append()
223 static lzma_ret
224 index_append_real(lzma_index *i, lzma_allocator *allocator,
225                 lzma_vli total_size, lzma_vli uncompressed_size,
226                 bool is_padding)
227 {
228         // Add the new record.
229         if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) {
230                 // Allocate a new group.
231                 lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group),
232                                 allocator);
233                 if (g == NULL)
234                         return LZMA_MEM_ERROR;
235
236                 // Initialize the group and set its first record.
237                 g->prev = i->tail;
238                 g->next = NULL;
239                 g->last = 0;
240                 g->total_sums[0] = total_size;
241                 g->uncompressed_sums[0] = uncompressed_size;
242                 g->paddings[0] = is_padding;
243
244                 // If this is the first group, make it the head.
245                 if (i->head == NULL)
246                         i->head = g;
247                 else
248                         i->tail->next = g;
249
250                 // Make it the new tail.
251                 i->tail = g;
252
253         } else {
254                 // i->tail has space left for at least one record.
255                 i->tail->total_sums[i->tail->last + 1]
256                                 = i->tail->total_sums[i->tail->last]
257                                         + total_size;
258                 i->tail->uncompressed_sums[i->tail->last + 1]
259                                 = i->tail->uncompressed_sums[i->tail->last]
260                                         + uncompressed_size;
261                 i->tail->paddings[i->tail->last + 1] = is_padding;
262                 ++i->tail->last;
263         }
264
265         return LZMA_OK;
266 }
267
268
269 static lzma_ret
270 index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
271                 lzma_vli uncompressed_size, bool is_padding)
272 {
273         if (total_size > LZMA_VLI_MAX
274                         || uncompressed_size > LZMA_VLI_MAX)
275                 return LZMA_DATA_ERROR;
276
277         // This looks a bit ugly. We want to first validate that the Index
278         // and Stream stay in valid limits after adding this Record. After
279         // validating, we may need to allocate a new lzma_index_group (it's
280         // slightly more correct to validate before allocating, YMMV).
281         lzma_ret ret;
282
283         if (is_padding) {
284                 assert(uncompressed_size == 0);
285
286                 // First update the info so we can validate it.
287                 i->padding_size += total_size;
288
289                 if (i->padding_size > LZMA_VLI_MAX
290                                 || lzma_index_file_size(i) > LZMA_VLI_MAX)
291                         ret = LZMA_DATA_ERROR; // Would grow past the limits.
292                 else
293                         ret = index_append_real(i, allocator,
294                                         total_size, uncompressed_size, true);
295
296                 // If something went wrong, undo the updated value.
297                 if (ret != LZMA_OK)
298                         i->padding_size -= total_size;
299
300         } else {
301                 // First update the overall info so we can validate it.
302                 const lzma_vli index_list_size_add
303                                 = lzma_vli_size(total_size / 4 - 1)
304                                 + lzma_vli_size(uncompressed_size);
305
306                 i->total_size += total_size;
307                 i->uncompressed_size += uncompressed_size;
308                 ++i->count;
309                 i->index_list_size += index_list_size_add;
310
311                 if (i->total_size > LZMA_VLI_MAX
312                                 || i->uncompressed_size > LZMA_VLI_MAX
313                                 || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
314                                 || lzma_index_file_size(i) > LZMA_VLI_MAX)
315                         ret = LZMA_DATA_ERROR; // Would grow past the limits.
316                 else
317                         ret = index_append_real(i, allocator,
318                                         total_size, uncompressed_size, false);
319
320                 if (ret != LZMA_OK) {
321                         // Something went wrong. Undo the updates.
322                         i->total_size -= total_size;
323                         i->uncompressed_size -= uncompressed_size;
324                         --i->count;
325                         i->index_list_size -= index_list_size_add;
326                 }
327         }
328
329         return ret;
330 }
331
332
333 extern LZMA_API lzma_ret
334 lzma_index_append(lzma_index *i, lzma_allocator *allocator,
335                 lzma_vli total_size, lzma_vli uncompressed_size)
336 {
337         return index_append(i, allocator,
338                         total_size, uncompressed_size, false);
339 }
340
341
342 /// Initialize i->current to point to the first Record.
343 static bool
344 init_current(lzma_index *i)
345 {
346         if (i->head == NULL) {
347                 assert(i->count == 0);
348                 return true;
349         }
350
351         assert(i->count > 0);
352
353         i->current.group = i->head;
354         i->current.record = 0;
355         i->current.stream_offset = LZMA_STREAM_HEADER_SIZE;
356         i->current.uncompressed_offset = 0;
357
358         return false;
359 }
360
361
362 /// Go backward to the previous group.
363 static void
364 previous_group(lzma_index *i)
365 {
366         assert(i->current.group->prev != NULL);
367
368         // Go to the previous group first.
369         i->current.group = i->current.group->prev;
370         i->current.record = i->current.group->last;
371
372         // Then update the offsets.
373         i->current.stream_offset -= i->current.group
374                         ->total_sums[i->current.group->last];
375         i->current.uncompressed_offset -= i->current.group
376                         ->uncompressed_sums[i->current.group->last];
377
378         return;
379 }
380
381
382 /// Go forward to the next group.
383 static void
384 next_group(lzma_index *i)
385 {
386         assert(i->current.group->next != NULL);
387
388         // Update the offsets first.
389         i->current.stream_offset += i->current.group
390                         ->total_sums[i->current.group->last];
391         i->current.uncompressed_offset += i->current.group
392                         ->uncompressed_sums[i->current.group->last];
393
394         // Then go to the next group.
395         i->current.record = 0;
396         i->current.group = i->current.group->next;
397
398         return;
399 }
400
401
402 /// Set *info from i->current.
403 static void
404 set_info(const lzma_index *i, lzma_index_record *info)
405 {
406         info->total_size = i->current.group->total_sums[i->current.record];
407         info->uncompressed_size = i->current.group->uncompressed_sums[
408                         i->current.record];
409
410         info->stream_offset = i->current.stream_offset;
411         info->uncompressed_offset = i->current.uncompressed_offset;
412
413         // If it's not the first Record in this group, we need to do some
414         // adjustements.
415         if (i->current.record > 0) {
416                 // _sums[] are cumulative, thus we need to substract the
417                 // _previous _sums[] to get the sizes of this Record.
418                 info->total_size -= i->current.group
419                                 ->total_sums[i->current.record - 1];
420                 info->uncompressed_size -= i->current.group
421                                 ->uncompressed_sums[i->current.record - 1];
422
423                 // i->current.{total,uncompressed}_offsets have the offset
424                 // of the beginning of the group, thus we need to add the
425                 // appropriate amount to get the offsetes of this Record.
426                 info->stream_offset += i->current.group
427                                 ->total_sums[i->current.record - 1];
428                 info->uncompressed_offset += i->current.group
429                                 ->uncompressed_sums[i->current.record - 1];
430         }
431
432         return;
433 }
434
435
436 extern LZMA_API lzma_bool
437 lzma_index_read(lzma_index *i, lzma_index_record *info)
438 {
439         if (i->current.group == NULL) {
440                 // We are at the beginning of the Record list. Set up
441                 // i->current point at the first Record. Return if there
442                 // are no Records.
443                 if (init_current(i))
444                         return true;
445         } else do {
446                 // Try to go the next Record.
447                 if (i->current.record < i->current.group->last)
448                         ++i->current.record;
449                 else if (i->current.group->next == NULL)
450                         return true;
451                 else
452                         next_group(i);
453         } while (i->current.group->paddings[i->current.record]);
454
455         // We found a new Record. Set the information to *info.
456         set_info(i, info);
457
458         return false;
459 }
460
461
462 extern LZMA_API void
463 lzma_index_rewind(lzma_index *i)
464 {
465         i->current.group = NULL;
466         return;
467 }
468
469
470 extern LZMA_API lzma_bool
471 lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target)
472 {
473         // Check if it is possible to fullfill the request.
474         if (target >= i->uncompressed_size)
475                 return true;
476
477         // Now we know that we will have an answer. Initialize the current
478         // read position if needed.
479         if (i->current.group == NULL && init_current(i))
480                 return true;
481
482         // Locate the group where the wanted Block is. First search forward.
483         while (i->current.uncompressed_offset <= target) {
484                 // If the first uncompressed byte of the next group is past
485                 // the target offset, it has to be this or an earlier group.
486                 if (i->current.uncompressed_offset + i->current.group
487                                 ->uncompressed_sums[i->current.group->last]
488                                 > target)
489                         break;
490
491                 // Go forward to the next group.
492                 next_group(i);
493         }
494
495         // Then search backward.
496         while (i->current.uncompressed_offset > target)
497                 previous_group(i);
498
499         // Now the target Block is somewhere in i->current.group. Offsets
500         // in groups are relative to the beginning of the group, thus
501         // we must adjust the target before starting the search loop.
502         assert(target >= i->current.uncompressed_offset);
503         target -= i->current.uncompressed_offset;
504
505         // Use binary search to locate the exact Record. It is the first
506         // Record whose uncompressed_sums[] value is greater than target.
507         // This is because we want the rightmost Record that fullfills the
508         // search criterion. It is possible that there are empty Blocks or
509         // padding, we don't want to return them.
510         size_t left = 0;
511         size_t right = i->current.group->last;
512
513         while (left < right) {
514                 const size_t pos = left + (right - left) / 2;
515                 if (i->current.group->uncompressed_sums[pos] <= target)
516                         left = pos + 1;
517                 else
518                         right = pos;
519         }
520
521         i->current.record = left;
522
523 #ifndef NDEBUG
524         // The found Record must not be padding or have zero uncompressed size.
525         assert(!i->current.group->paddings[i->current.record]);
526
527         if (i->current.record == 0)
528                 assert(i->current.group->uncompressed_sums[0] > 0);
529         else
530                 assert(i->current.group->uncompressed_sums[i->current.record]
531                                 - i->current.group->uncompressed_sums[
532                                         i->current.record - 1] > 0);
533 #endif
534
535         set_info(i, info);
536
537         return false;
538 }
539
540
541 extern LZMA_API lzma_ret
542 lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
543                 lzma_allocator *allocator, lzma_vli padding)
544 {
545         if (dest == NULL || src == NULL || dest == src
546                         || padding > LZMA_VLI_MAX)
547                 return LZMA_PROG_ERROR;
548
549         // Check that the combined size of the Indexes stays within limits.
550         {
551                 const lzma_vli dest_size = lzma_index_file_size(dest);
552                 const lzma_vli src_size = lzma_index_file_size(src);
553                 if (dest_size + src_size > LZMA_VLI_UNKNOWN
554                                 || dest_size + src_size + padding
555                                         > LZMA_VLI_UNKNOWN)
556                         return LZMA_DATA_ERROR;
557         }
558
559         // Add a padding Record to take into account the size of
560         // Index + Stream Footer + Stream Padding + Stream Header.
561         //
562         // NOTE: This cannot overflow, because Index Size is always
563         // far smaller than LZMA_VLI_MAX, and adding two VLIs
564         // (Index Size and padding) doesn't overflow. It may become
565         // an invalid VLI if padding is huge, but that is caught by
566         // index_append().
567         padding += index_size(dest->count - dest->old.count,
568                                 dest->index_list_size
569                                         - dest->old.index_list_size)
570                         + LZMA_STREAM_HEADER_SIZE * 2;
571
572         // Add the padding Record.
573         return_if_error(index_append(
574                         dest, allocator, padding, 0, true));
575
576         // Avoid wasting lots of memory if src->head has only a few records
577         // that fit into dest->tail. That is, combine two groups if possible.
578         //
579         // NOTE: We know that dest->tail != NULL since we just appended
580         // a padding Record. But we don't know about src->head.
581         if (src->head != NULL && src->head->last + 1
582                         <= INDEX_GROUP_SIZE - dest->tail->last - 1) {
583                 // Copy the first Record.
584                 dest->tail->total_sums[dest->tail->last + 1]
585                         = dest->tail->total_sums[dest->tail->last]
586                                 + src->head->total_sums[0];
587
588                 dest->tail->uncompressed_sums[dest->tail->last + 1]
589                         = dest->tail->uncompressed_sums[dest->tail->last]
590                                 + src->head->uncompressed_sums[0];
591
592                 dest->tail->paddings[dest->tail->last + 1]
593                                 = src->head->paddings[0];
594
595                 ++dest->tail->last;
596
597                 // Copy the rest.
598                 for (size_t i = 1; i < src->head->last; ++i) {
599                         dest->tail->total_sums[dest->tail->last + 1]
600                                 = dest->tail->total_sums[dest->tail->last]
601                                         + src->head->total_sums[i + 1]
602                                         - src->head->total_sums[i];
603
604                         dest->tail->uncompressed_sums[dest->tail->last + 1]
605                                 = dest->tail->uncompressed_sums[
606                                                 dest->tail->last]
607                                         + src->head->uncompressed_sums[i + 1]
608                                         - src->head->uncompressed_sums[i];
609
610                         dest->tail->paddings[dest->tail->last + 1]
611                                 = src->head->paddings[i + 1];
612
613                         ++dest->tail->last;
614                 }
615
616                 // Free the head group of *src. Don't bother updating prev
617                 // pointers since those won't be used for anything before
618                 // we deallocate the whole *src structure.
619                 lzma_index_group *tmp = src->head;
620                 src->head = src->head->next;
621                 lzma_free(tmp, allocator);
622         }
623
624         // If there are groups left in *src, join them as is. Note that if we
625         // are combining already combined Indexes, src->head can be non-NULL
626         // even if we just combined the old src->head to dest->tail.
627         if (src->head != NULL) {
628                 src->head->prev = dest->tail;
629                 dest->tail->next = src->head;
630                 dest->tail = src->tail;
631         }
632
633         // Update information about earlier Indexes. Only the last Index
634         // from *src won't be counted in dest->old. The last Index is left
635         // open and can be even appended with lzma_index_append().
636         dest->old.count = dest->count + src->old.count;
637         dest->old.index_list_size
638                         = dest->index_list_size + src->old.index_list_size;
639
640         // Update overall information.
641         dest->total_size += src->total_size;
642         dest->uncompressed_size += src->uncompressed_size;
643         dest->count += src->count;
644         dest->index_list_size += src->index_list_size;
645         dest->padding_size += src->padding_size;
646
647         // *src has nothing left but the base structure.
648         lzma_free(src, allocator);
649
650         return LZMA_OK;
651 }
652
653
654 extern LZMA_API lzma_index *
655 lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
656 {
657         lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator);
658         if (dest == NULL)
659                 return NULL;
660
661         // Copy the base structure except the pointers.
662         *dest = *src;
663         dest->head = NULL;
664         dest->tail = NULL;
665         dest->current.group = NULL;
666
667         // Copy the Records.
668         const lzma_index_group *src_group = src->head;
669         while (src_group != NULL) {
670                 // Allocate a new group.
671                 lzma_index_group *dest_group = lzma_alloc(
672                                 sizeof(lzma_index_group), allocator);
673                 if (dest_group == NULL) {
674                         lzma_index_end(dest, allocator);
675                         return NULL;
676                 }
677
678                 // Set the pointers.
679                 dest_group->prev = dest->tail;
680                 dest_group->next = NULL;
681
682                 if (dest->head == NULL)
683                         dest->head = dest_group;
684                 else
685                         dest->tail->next = dest_group;
686
687                 dest->tail = dest_group;
688
689                 dest_group->last = src_group->last;
690
691                 // Copy the arrays so that we don't read uninitialized memory.
692                 const size_t count = src_group->last + 1;
693                 memcpy(dest_group->total_sums, src_group->total_sums,
694                                 sizeof(lzma_vli) * count);
695                 memcpy(dest_group->uncompressed_sums,
696                                 src_group->uncompressed_sums,
697                                 sizeof(lzma_vli) * count);
698                 memcpy(dest_group->paddings, src_group->paddings,
699                                 sizeof(bool) * count);
700
701                 // Copy also the read position.
702                 if (src_group == src->current.group)
703                         dest->current.group = dest->tail;
704
705                 src_group = src_group->next;
706         }
707
708         return dest;
709 }
710
711
712 extern LZMA_API lzma_bool
713 lzma_index_equal(const lzma_index *a, const lzma_index *b)
714 {
715         // No point to compare more if the pointers are the same.
716         if (a == b)
717                 return true;
718
719         // Compare the basic properties.
720         if (a->total_size != b->total_size
721                         || a->uncompressed_size != b->uncompressed_size
722                         || a->index_list_size != b->index_list_size
723                         || a->count != b->count)
724                 return false;
725
726         // Compare the Records.
727         const lzma_index_group *ag = a->head;
728         const lzma_index_group *bg = b->head;
729         while (ag != NULL && bg != NULL) {
730                 const size_t count = ag->last + 1;
731                 if (ag->last != bg->last
732                                 || memcmp(ag->total_sums,
733                                         bg->total_sums,
734                                         sizeof(lzma_vli) * count) != 0
735                                 || memcmp(ag->uncompressed_sums,
736                                         bg->uncompressed_sums,
737                                         sizeof(lzma_vli) * count) != 0
738                                 || memcmp(ag->paddings, bg->paddings,
739                                         sizeof(bool) * count) != 0)
740                         return false;
741
742                 ag = ag->next;
743                 bg = bg->next;
744         }
745
746         return ag == NULL && bg == NULL;
747 }