diff options
Diffstat (limited to 'src/liblzma/common/index.c')
-rw-r--r-- | src/liblzma/common/index.c | 773 |
1 files changed, 691 insertions, 82 deletions
diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 6816b37a..f01206de 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c -/// \brief Handling of Index in Metadata +/// \brief Handling of Index // // Copyright (C) 2007 Lasse Collin // @@ -17,124 +17,733 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "index.h" -/** - * \brief Duplicates an Index list - * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. - */ -extern LZMA_API lzma_index * -lzma_index_dup(const lzma_index *old_current, lzma_allocator *allocator) +/// Number of Records to allocate at once. +#define INDEX_GROUP_SIZE 256 + + +typedef struct lzma_index_group_s lzma_index_group; +struct lzma_index_group_s { + /// Next group + lzma_index_group *prev; + + /// Previous group + lzma_index_group *next; + + /// Index of the last Record in this group + size_t last; + + /// Total Size fields as cumulative sum relative to the beginning + /// of the group. The total size of the group is total_sums[last]. + lzma_vli total_sums[INDEX_GROUP_SIZE]; + + /// Uncompressed Size fields as cumulative sum relative to the + /// beginning of the group. The uncompressed size of the group is + /// uncompressed_sums[last]. + lzma_vli uncompressed_sums[INDEX_GROUP_SIZE]; + + /// True if the Record is padding + bool paddings[INDEX_GROUP_SIZE]; +}; + + +struct lzma_index_s { + /// Total size of the Blocks and padding + lzma_vli total_size; + + /// Uncompressed size of the Stream + lzma_vli uncompressed_size; + + /// Number of non-padding records. This is needed by Index encoder. + lzma_vli count; + + /// Size of the List of Records field; this is updated every time + /// a new non-padding Record is added. + lzma_vli index_list_size; + + /// This is zero if no Indexes have been combined with + /// lzma_index_cat(). With combined Indexes, this contains the sizes + /// of all but latest the Streams, including possible Stream Padding + /// fields. + lzma_vli padding_size; + + /// First group of Records + lzma_index_group *head; + + /// Last group of Records + lzma_index_group *tail; + + /// Tracking the read position + struct { + /// Group where the current read position is. + lzma_index_group *group; + + /// The most recently read record in *group + lzma_vli record; + + /// Uncompressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli uncompressed_offset; + + /// Compressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli stream_offset; + } current; + + /// Information about earlier Indexes when multiple Indexes have + /// been combined. + struct { + /// Sum of the Record counts of the all but the last Stream. + lzma_vli count; + + /// Sum of the List of Records fields of all but the last + /// Stream. This is needed when a new Index is concatenated + /// to this lzma_index structure. + lzma_vli index_list_size; + } old; +}; + + +static void +free_index_list(lzma_index *i, lzma_allocator *allocator) { - lzma_index *new_head = NULL; - lzma_index *new_current = NULL; + lzma_index_group *g = i->head; - while (old_current != NULL) { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) { - lzma_index_free(new_head, allocator); - return NULL; - } + while (g != NULL) { + lzma_index_group *tmp = g->next; + lzma_free(g, allocator); + g = tmp; + } - i->total_size = old_current->total_size; - i->uncompressed_size = old_current->uncompressed_size; - i->next = NULL; + return; +} - if (new_head == NULL) - new_head = i; - else - new_current->next = i; - new_current = i; - old_current = old_current->next; +extern LZMA_API lzma_index * +lzma_index_init(lzma_index *i, lzma_allocator *allocator) +{ + if (i == NULL) { + i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return NULL; + } else { + free_index_list(i, allocator); } - return new_head; + i->total_size = 0; + i->uncompressed_size = 0; + i->count = 0; + i->index_list_size = 0; + i->padding_size = 0; + i->head = NULL; + i->tail = NULL; + i->current.group = NULL; + i->old.count = 0; + i->old.index_list_size = 0; + + return i; } -/** - * \brief Frees an Index list - * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. - */ extern LZMA_API void -lzma_index_free(lzma_index *i, lzma_allocator *allocator) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) { - while (i != NULL) { - lzma_index *tmp = i->next; + if (i != NULL) { + free_index_list(i, allocator); lzma_free(i, allocator); - i = tmp; } return; } -/** - * \brief Calculates properties of an Index list - * - * - */ -extern LZMA_API lzma_ret -lzma_index_count(const lzma_index *i, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size) -{ - *count = 0; - *total_size = 0; - *uncompressed_size = 0; - - while (i != NULL) { - if (i->total_size == LZMA_VLI_VALUE_UNKNOWN) { - *total_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->total_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*total_size != LZMA_VLI_VALUE_UNKNOWN) { - *total_size += i->total_size; - if (*total_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; +extern LZMA_API lzma_vli +lzma_index_count(const lzma_index *i) +{ + return i->count; +} + + +extern LZMA_API lzma_vli +lzma_index_size(const lzma_index *i) +{ + return index_size(i->count, i->index_list_size); +} + + +extern LZMA_API lzma_vli +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API lzma_vli +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +extern LZMA_API lzma_vli +lzma_index_file_size(const lzma_index *i) +{ + // If multiple Streams are concatenated, the Stream Header, Index, + // and Stream Footer fields of all but the last Stream are already + // included in padding_size. Thus, we need to calculate only the + // size of the last Index, not all Indexes. + return i->total_size + i->padding_size + + index_size(i->count - i->old.count, + i->index_list_size - i->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; +} + + +extern LZMA_API lzma_vli +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) + - index_size_unpadded(i->count, i->index_list_size)) & 3; +} + + +/// Helper function for index_append() +static lzma_ret +index_append_real(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size, + bool is_padding) +{ + // Add the new record. + if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) { + // Allocate a new group. + lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + // Initialize the group and set its first record. + g->prev = i->tail; + g->next = NULL; + g->last = 0; + g->total_sums[0] = total_size; + g->uncompressed_sums[0] = uncompressed_size; + g->paddings[0] = is_padding; + + // If this is the first group, make it the head. + if (i->head == NULL) + i->head = g; + else + i->tail->next = g; + + // Make it the new tail. + i->tail = g; + + } else { + // i->tail has space left for at least one record. + i->tail->total_sums[i->tail->last + 1] + = i->tail->total_sums[i->tail->last] + + total_size; + i->tail->uncompressed_sums[i->tail->last + 1] + = i->tail->uncompressed_sums[i->tail->last] + + uncompressed_size; + i->tail->paddings[i->tail->last + 1] = is_padding; + ++i->tail->last; + } + + return LZMA_OK; +} + + +static lzma_ret +index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, + lzma_vli uncompressed_size, bool is_padding) +{ + if (total_size > LZMA_VLI_VALUE_MAX + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + // This looks a bit ugly. We want to first validate that the Index + // and Stream stay in valid limits after adding this Record. After + // validating, we may need to allocate a new lzma_index_group (it's + // slightly more correct to validate before allocating, YMMV). + lzma_ret ret; + + if (is_padding) { + assert(uncompressed_size == 0); + + // First update the info so we can validate it. + i->padding_size += total_size; + + if (i->padding_size > LZMA_VLI_VALUE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, true); + + // If something went wrong, undo the updated value. + if (ret != LZMA_OK) + i->padding_size -= total_size; + + } else { + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add + = lzma_vli_size(total_size / 4 - 1) + + lzma_vli_size(uncompressed_size); + + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; + + if (i->total_size > LZMA_VLI_VALUE_MAX + || i->uncompressed_size > LZMA_VLI_VALUE_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } + } + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size) +{ + return index_append(i, allocator, + total_size, uncompressed_size, false); +} + + +/// Initialize i->current to point to the first Record. +static bool +init_current(lzma_index *i) +{ + if (i->head == NULL) { + assert(i->count == 0); + return true; + } + + assert(i->count > 0); + + i->current.group = i->head; + i->current.record = 0; + i->current.stream_offset = LZMA_STREAM_HEADER_SIZE; + i->current.uncompressed_offset = 0; - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->uncompressed_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size += i->uncompressed_size; - if (*uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; + return false; +} + + +/// Go backward to the previous group. +static void +previous_group(lzma_index *i) +{ + assert(i->current.group->prev != NULL); + + // Go to the previous group first. + i->current.group = i->current.group->prev; + i->current.record = i->current.group->last; + + // Then update the offsets. + i->current.stream_offset -= i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset -= i->current.group + ->uncompressed_sums[i->current.group->last]; + + return; +} + + +/// Go forward to the next group. +static void +next_group(lzma_index *i) +{ + assert(i->current.group->next != NULL); + + // Update the offsets first. + i->current.stream_offset += i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.group->last]; + + // Then go to the next group. + i->current.record = 0; + i->current.group = i->current.group->next; + + return; +} + + +/// Set *info from i->current. +static void +set_info(const lzma_index *i, lzma_index_record *info) +{ + info->total_size = i->current.group->total_sums[i->current.record]; + info->uncompressed_size = i->current.group->uncompressed_sums[ + i->current.record]; + + info->stream_offset = i->current.stream_offset; + info->uncompressed_offset = i->current.uncompressed_offset; + + // If it's not the first Record in this group, we need to do some + // adjustements. + if (i->current.record > 0) { + // _sums[] are cumulative, thus we need to substract the + // _previous _sums[] to get the sizes of this Record. + info->total_size -= i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_size -= i->current.group + ->uncompressed_sums[i->current.record - 1]; + + // i->current.{total,uncompressed}_offsets have the offset + // of the beginning of the group, thus we need to add the + // appropriate amount to get the offsetes of this Record. + info->stream_offset += i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.record - 1]; + } + + return; +} + + +extern LZMA_API lzma_bool +lzma_index_read(lzma_index *i, lzma_index_record *info) +{ + if (i->current.group == NULL) { + // We are at the beginning of the Record list. Set up + // i->current point at the first Record. Return if there + // are no Records. + if (init_current(i)) + return true; + } else do { + // Try to go the next Record. + if (i->current.record < i->current.group->last) + ++i->current.record; + else if (i->current.group->next == NULL) + return true; + else + next_group(i); + } while (i->current.group->paddings[i->current.record]); + + // We found a new Record. Set the information to *info. + set_info(i, info); + + return false; +} + + +extern LZMA_API void +lzma_index_rewind(lzma_index *i) +{ + i->current.group = NULL; + return; +} + + +extern LZMA_API lzma_bool +lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target) +{ + // Check if it is possible to fullfill the request. + if (target >= i->uncompressed_size) + return true; + + // Now we know that we will have an answer. Initialize the current + // read position if needed. + if (i->current.group == NULL && init_current(i)) + return true; + + // Locate the group where the wanted Block is. First search forward. + while (i->current.uncompressed_offset <= target) { + // If the first uncompressed byte of the next group is past + // the target offset, it has to be this or an earlier group. + if (i->current.uncompressed_offset + i->current.group + ->uncompressed_sums[i->current.group->last] + > target) + break; + + // Go forward to the next group. + next_group(i); + } + + // Then search backward. + while (i->current.uncompressed_offset > target) + previous_group(i); + + // Now the target Block is somewhere in i->current.group. Offsets + // in groups are relative to the beginning of the group, thus + // we must adjust the target before starting the search loop. + assert(target >= i->current.uncompressed_offset); + target -= i->current.uncompressed_offset; + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sums[] value is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks or + // padding, we don't want to return them. + size_t left = 0; + size_t right = i->current.group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (i->current.group->uncompressed_sums[pos] <= target) + left = pos + 1; + else + right = pos; + } + + i->current.record = left; + +#ifndef NDEBUG + // The found Record must not be padding or have zero uncompressed size. + assert(!i->current.group->paddings[i->current.record]); + + if (i->current.record == 0) + assert(i->current.group->uncompressed_sums[0] > 0); + else + assert(i->current.group->uncompressed_sums[i->current.record] + - i->current.group->uncompressed_sums[ + i->current.record - 1] > 0); +#endif + + set_info(i, info); + + return false; +} + + +extern LZMA_API lzma_ret +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator, lzma_vli padding) +{ + if (dest == NULL || src == NULL || dest == src + || padding > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Check that the combined size of the Indexes stays within limits. + { + const lzma_vli dest_size = lzma_index_file_size(dest); + const lzma_vli src_size = lzma_index_file_size(src); + if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN + || dest_size + src_size + padding + > LZMA_VLI_VALUE_UNKNOWN) + return LZMA_DATA_ERROR; + } + + // Add a padding Record to take into account the size of + // Index + Stream Footer + Stream Padding + Stream Header. + // + // NOTE: This cannot overflow, because Index Size is always + // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs + // (Index Size and padding) doesn't overflow. It may become + // an invalid VLI if padding is huge, but that is caught by + // index_append(). + padding += index_size(dest->count - dest->old.count, + dest->index_list_size + - dest->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; + + // Add the padding Record. + return_if_error(index_append( + dest, allocator, padding, 0, true)); + + // Avoid wasting lots of memory if src->head has only a few records + // that fit into dest->tail. That is, combine two groups if possible. + // + // NOTE: We know that dest->tail != NULL since we just appended + // a padding Record. But we don't know about src->head. + if (src->head != NULL && src->head->last + 1 + <= INDEX_GROUP_SIZE - dest->tail->last - 1) { + // Copy the first Record. + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[0]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[dest->tail->last] + + src->head->uncompressed_sums[0]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[0]; + + ++dest->tail->last; + + // Copy the rest. + for (size_t i = 1; i < src->head->last; ++i) { + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[i + 1] + - src->head->total_sums[i]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[ + dest->tail->last] + + src->head->uncompressed_sums[i + 1] + - src->head->uncompressed_sums[i]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[i + 1]; + + ++dest->tail->last; } - ++*count; - i = i->next; + // Free the head group of *src. Don't bother updating prev + // pointers since those won't be used for anything before + // we deallocate the whole *src structure. + lzma_index_group *tmp = src->head; + src->head = src->head->next; + lzma_free(tmp, allocator); + } + + // If there are groups left in *src, join them as is. Note that if we + // are combining already combined Indexes, src->head can be non-NULL + // even if we just combined the old src->head to dest->tail. + if (src->head != NULL) { + src->head->prev = dest->tail; + dest->tail->next = src->head; + dest->tail = src->tail; } - // FIXME ? - if (*total_size == LZMA_VLI_VALUE_UNKNOWN - || *uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - return LZMA_HEADER_ERROR; + // Update information about earlier Indexes. Only the last Index + // from *src won't be counted in dest->old. The last Index is left + // open and can be even appended with lzma_index_append(). + dest->old.count = dest->count + src->old.count; + dest->old.index_list_size + = dest->index_list_size + src->old.index_list_size; + + // Update overall information. + dest->total_size += src->total_size; + dest->uncompressed_size += src->uncompressed_size; + dest->count += src->count; + dest->index_list_size += src->index_list_size; + dest->padding_size += src->padding_size; + + // *src has nothing left but the base structure. + lzma_free(src, allocator); return LZMA_OK; } +extern LZMA_API lzma_index * +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator); + if (dest == NULL) + return NULL; + + // Copy the base structure except the pointers. + *dest = *src; + dest->head = NULL; + dest->tail = NULL; + dest->current.group = NULL; + + // Copy the Records. + const lzma_index_group *src_group = src->head; + while (src_group != NULL) { + // Allocate a new group. + lzma_index_group *dest_group = lzma_alloc( + sizeof(lzma_index_group), allocator); + if (dest_group == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + // Set the pointers. + dest_group->prev = dest->tail; + dest_group->next = NULL; + + if (dest->head == NULL) + dest->head = dest_group; + else + dest->tail->next = dest_group; + + dest->tail = dest_group; + + dest_group->last = src_group->last; + + // Copy the arrays so that we don't read uninitialized memory. + const size_t count = src_group->last + 1; + memcpy(dest_group->total_sums, src_group->total_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->uncompressed_sums, + src_group->uncompressed_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->paddings, src_group->paddings, + sizeof(bool) * count); + + // Copy also the read position. + if (src_group == src->current.group) + dest->current.group = dest->tail; + + src_group = src_group->next; + } + + return dest; +} + extern LZMA_API lzma_bool -lzma_index_is_equal(const lzma_index *a, const lzma_index *b) +lzma_index_equal(const lzma_index *a, const lzma_index *b) { - while (a != NULL && b != NULL) { - if (a->total_size != b->total_size || a->uncompressed_size - != b->uncompressed_size) + // No point to compare more if the pointers are the same. + if (a == b) + return true; + + // Compare the basic properties. + if (a->total_size != b->total_size + || a->uncompressed_size != b->uncompressed_size + || a->index_list_size != b->index_list_size + || a->count != b->count) + return false; + + // Compare the Records. + const lzma_index_group *ag = a->head; + const lzma_index_group *bg = b->head; + while (ag != NULL && bg != NULL) { + const size_t count = ag->last + 1; + if (ag->last != bg->last + || memcmp(ag->total_sums, + bg->total_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->uncompressed_sums, + bg->uncompressed_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->paddings, bg->paddings, + sizeof(bool) * count) != 0) return false; - a = a->next; - b = b->next; + ag = ag->next; + bg = bg->next; } - return a == b; + return ag == NULL && bg == NULL; } |