diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 20:46:52 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 20:46:52 +0200 |
commit | e114502b2bc371e4a45449832cb69be036360722 (patch) | |
tree | 449c41d0408f99926de202611091747f1fbe2f85 /src/liblzma/common/index.c | |
parent | Fixed the test that should have been fixed as part (diff) | |
download | xz-e114502b2bc371e4a45449832cb69be036360722.tar.xz |
Oh well, big messy commit again. Some highlights:
- Updated to the latest, probably final file format version.
- Command line tool reworked to not use threads anymore.
Threading will probably go into liblzma anyway.
- Memory usage limit is now about 30 % for uncompression
and about 90 % for compression.
- Progress indicator with --verbose
- Simplified --help and full --long-help
- Upgraded to the last LGPLv2.1+ getopt_long from gnulib.
- Some bug fixes
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/common/index.c | 259 |
1 files changed, 142 insertions, 117 deletions
diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index f965749f..1fe65650 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -20,24 +20,34 @@ #include "index.h" -/// Number of Records to allocate at once. +/// Number of Records to allocate at once in the unrolled list. #define INDEX_GROUP_SIZE 256 typedef struct lzma_index_group_s lzma_index_group; struct lzma_index_group_s { - /// Next group + /// Previous group lzma_index_group *prev; - /// Previous group + /// Next group lzma_index_group *next; /// Index of the last Record in this group size_t last; - /// Total Size fields as cumulative sum relative to the beginning - /// of the group. The total size of the group is total_sums[last]. - lzma_vli total_sums[INDEX_GROUP_SIZE]; + /// Unpadded Size fields as special cumulative sum relative to the + /// beginning of the group. It's special in sense that the previous + /// value is rounded up the next multiple of four with before + /// calculating the new value. The total encoded size of the Blocks + /// in the group is unpadded_sums[last] rounded up to the next + /// multiple of four. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored + /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total + /// encoded size of these Blocks is 184. + /// + /// This encoding is nice from point of view of lzma_index_locate(). + lzma_vli unpadded_sums[INDEX_GROUP_SIZE]; /// Uncompressed Size fields as cumulative sum relative to the /// beginning of the group. The uncompressed size of the group is @@ -56,19 +66,13 @@ struct lzma_index_s { /// Uncompressed size of the Stream lzma_vli uncompressed_size; - /// Number of non-padding records. This is needed by Index encoder. + /// Number of non-padding records. This is needed for Index encoder. lzma_vli count; /// Size of the List of Records field; this is updated every time /// a new non-padding Record is added. lzma_vli index_list_size; - /// This is zero if no Indexes have been combined with - /// lzma_index_cat(). With combined Indexes, this contains the sizes - /// of all but latest the Streams, including possible Stream Padding - /// fields. - lzma_vli padding_size; - /// First group of Records lzma_index_group *head; @@ -80,8 +84,8 @@ struct lzma_index_s { /// Group where the current read position is. lzma_index_group *group; - /// The most recently read record in *group - lzma_vli record; + /// The most recently read Record in *group + size_t record; /// Uncompressed offset of the beginning of *group relative /// to the beginning of the Stream @@ -102,6 +106,10 @@ struct lzma_index_s { /// Stream. This is needed when a new Index is concatenated /// to this lzma_index structure. lzma_vli index_list_size; + + /// Total size of all but the last Stream and all Stream + /// Padding fields. + lzma_vli streams_size; } old; }; @@ -136,12 +144,12 @@ lzma_index_init(lzma_index *i, lzma_allocator *allocator) i->uncompressed_size = 0; i->count = 0; i->index_list_size = 0; - i->padding_size = 0; i->head = NULL; i->tail = NULL; i->current.group = NULL; i->old.count = 0; i->old.index_list_size = 0; + i->old.streams_size = 0; return i; } @@ -195,12 +203,12 @@ lzma_index_file_size(const lzma_index *i) { // If multiple Streams are concatenated, the Stream Header, Index, // and Stream Footer fields of all but the last Stream are already - // included in padding_size. Thus, we need to calculate only the + // included in old.streams_size. Thus, we need to calculate only the // size of the last Index, not all Indexes. - return i->total_size + i->padding_size + return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size + index_size(i->count - i->old.count, i->index_list_size - i->old.index_list_size) - + LZMA_STREAM_HEADER_SIZE * 2; + + LZMA_STREAM_HEADER_SIZE; } @@ -219,10 +227,11 @@ lzma_index_padding_size(const lzma_index *i) } -/// Helper function for index_append() +/// Appends a new Record to the Index. If needed, this allocates a new +/// Record group. static lzma_ret index_append_real(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size, + lzma_vli unpadded_size, lzma_vli uncompressed_size, bool is_padding) { // Add the new record. @@ -237,7 +246,7 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, g->prev = i->tail; g->next = NULL; g->last = 0; - g->total_sums[0] = total_size; + g->unpadded_sums[0] = unpadded_size; g->uncompressed_sums[0] = uncompressed_size; g->paddings[0] = is_padding; @@ -252,9 +261,9 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } else { // i->tail has space left for at least one record. - i->tail->total_sums[i->tail->last + 1] - = i->tail->total_sums[i->tail->last] - + total_size; + i->tail->unpadded_sums[i->tail->last + 1] + = unpadded_size + vli_ceil4( + i->tail->unpadded_sums[i->tail->last]); i->tail->uncompressed_sums[i->tail->last + 1] = i->tail->uncompressed_sums[i->tail->last] + uncompressed_size; @@ -266,13 +275,14 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } -static lzma_ret -index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, - lzma_vli uncompressed_size, bool is_padding) +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) { - if (total_size > LZMA_VLI_MAX + if (unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; // This looks a bit ugly. We want to first validate that the Index // and Stream stay in valid limits after adding this Record. After @@ -280,65 +290,38 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, // slightly more correct to validate before allocating, YMMV). lzma_ret ret; - if (is_padding) { - assert(uncompressed_size == 0); + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); - // First update the info so we can validate it. - i->padding_size += total_size; - - if (i->padding_size > LZMA_VLI_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, true); - - // If something went wrong, undo the updated value. - if (ret != LZMA_OK) - i->padding_size -= total_size; + const lzma_vli total_size = vli_ceil4(unpadded_size); - } else { - // First update the overall info so we can validate it. - const lzma_vli index_list_size_add - = lzma_vli_size(total_size / 4 - 1) - + lzma_vli_size(uncompressed_size); - - i->total_size += total_size; - i->uncompressed_size += uncompressed_size; - ++i->count; - i->index_list_size += index_list_size_add; - - if (i->total_size > LZMA_VLI_MAX - || i->uncompressed_size > LZMA_VLI_MAX - || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, false); + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; - if (ret != LZMA_OK) { - // Something went wrong. Undo the updates. - i->total_size -= total_size; - i->uncompressed_size -= uncompressed_size; - --i->count; - i->index_list_size -= index_list_size_add; - } + if (i->total_size > LZMA_VLI_MAX + || i->uncompressed_size > LZMA_VLI_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, unpadded_size, + uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } return ret; } -extern LZMA_API lzma_ret -lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - return index_append(i, allocator, - total_size, uncompressed_size, false); -} - - /// Initialize i->current to point to the first Record. static bool init_current(lzma_index *i) @@ -370,10 +353,10 @@ previous_group(lzma_index *i) i->current.record = i->current.group->last; // Then update the offsets. - i->current.stream_offset -= i->current.group - ->total_sums[i->current.group->last]; - i->current.uncompressed_offset -= i->current.group - ->uncompressed_sums[i->current.group->last]; + i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); + i->current.uncompressed_offset -= i->current.group->uncompressed_sums[ + i->current.group->last]; return; } @@ -386,8 +369,8 @@ next_group(lzma_index *i) assert(i->current.group->next != NULL); // Update the offsets first. - i->current.stream_offset += i->current.group - ->total_sums[i->current.group->last]; + i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); i->current.uncompressed_offset += i->current.group ->uncompressed_sums[i->current.group->last]; @@ -403,30 +386,39 @@ next_group(lzma_index *i) static void set_info(const lzma_index *i, lzma_index_record *info) { - info->total_size = i->current.group->total_sums[i->current.record]; + // First copy the cumulative sizes from the current Record of the + // current group. + info->unpadded_size + = i->current.group->unpadded_sums[i->current.record]; + info->total_size = vli_ceil4(info->unpadded_size); info->uncompressed_size = i->current.group->uncompressed_sums[ i->current.record]; + // Copy the start offsets of this group. info->stream_offset = i->current.stream_offset; info->uncompressed_offset = i->current.uncompressed_offset; // If it's not the first Record in this group, we need to do some // adjustements. if (i->current.record > 0) { - // _sums[] are cumulative, thus we need to substract the - // _previous _sums[] to get the sizes of this Record. - info->total_size -= i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_size -= i->current.group + // Since the _sums[] are cumulative, we substract the sums of + // the previous Record to get the sizes of the current Record, + // and add the sums of the previous Record to the offsets. + // With unpadded_sums[] we need to take into account that it + // uses a bit weird way to do the cumulative summing + const lzma_vli total_sum + = vli_ceil4(i->current.group->unpadded_sums[ + i->current.record - 1]); + + const lzma_vli uncompressed_sum = i->current.group ->uncompressed_sums[i->current.record - 1]; - // i->current.{total,uncompressed}_offsets have the offset - // of the beginning of the group, thus we need to add the - // appropriate amount to get the offsetes of this Record. - info->stream_offset += i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_offset += i->current.group - ->uncompressed_sums[i->current.record - 1]; + info->total_size -= total_sum; + info->unpadded_size -= total_sum; + info->uncompressed_size -= uncompressed_sum; + + info->stream_offset += total_sum; + info->uncompressed_offset += uncompressed_sum; } return; @@ -548,11 +540,22 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Check that the combined size of the Indexes stays within limits. { + const lzma_vli dest_size = index_size_unpadded( + dest->count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Check that the combined size of the "files" (combined total + // encoded sizes) stays within limits. + { const lzma_vli dest_size = lzma_index_file_size(dest); const lzma_vli src_size = lzma_index_file_size(src); - if (dest_size + src_size > LZMA_VLI_UNKNOWN + if (dest_size + src_size > LZMA_VLI_MAX || dest_size + src_size + padding - > LZMA_VLI_UNKNOWN) + > LZMA_VLI_MAX) return LZMA_DATA_ERROR; } @@ -561,17 +564,37 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // // NOTE: This cannot overflow, because Index Size is always // far smaller than LZMA_VLI_MAX, and adding two VLIs - // (Index Size and padding) doesn't overflow. It may become - // an invalid VLI if padding is huge, but that is caught by - // index_append(). + // (Index Size and padding) doesn't overflow. padding += index_size(dest->count - dest->old.count, dest->index_list_size - dest->old.index_list_size) + LZMA_STREAM_HEADER_SIZE * 2; + // While the above cannot overflow, but it may become an invalid VLI. + if (padding > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + // Add the padding Record. - return_if_error(index_append( - dest, allocator, padding, 0, true)); + { + lzma_ret ret; + + // First update the info so we can validate it. + dest->old.streams_size += padding; + + if (dest->old.streams_size > LZMA_VLI_MAX + || lzma_index_file_size(dest) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(dest, allocator, + padding, 0, true); + + // If something went wrong, undo the updated value and return + // the error. + if (ret != LZMA_OK) { + dest->old.streams_size -= padding; + return ret; + } + } // Avoid wasting lots of memory if src->head has only a few records // that fit into dest->tail. That is, combine two groups if possible. @@ -581,9 +604,10 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, if (src->head != NULL && src->head->last + 1 <= INDEX_GROUP_SIZE - dest->tail->last - 1) { // Copy the first Record. - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[0]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[0]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[dest->tail->last] @@ -596,10 +620,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Copy the rest. for (size_t i = 1; i < src->head->last; ++i) { - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[i + 1] - - src->head->total_sums[i]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[i + 1] + - src->head->unpadded_sums[i]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[ @@ -636,13 +661,13 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, dest->old.count = dest->count + src->old.count; dest->old.index_list_size = dest->index_list_size + src->old.index_list_size; + dest->old.streams_size += src->old.streams_size; // Update overall information. dest->total_size += src->total_size; dest->uncompressed_size += src->uncompressed_size; dest->count += src->count; dest->index_list_size += src->index_list_size; - dest->padding_size += src->padding_size; // *src has nothing left but the base structure. lzma_free(src, allocator); @@ -690,7 +715,7 @@ lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) // Copy the arrays so that we don't read uninitialized memory. const size_t count = src_group->last + 1; - memcpy(dest_group->total_sums, src_group->total_sums, + memcpy(dest_group->unpadded_sums, src_group->unpadded_sums, sizeof(lzma_vli) * count); memcpy(dest_group->uncompressed_sums, src_group->uncompressed_sums, @@ -729,8 +754,8 @@ lzma_index_equal(const lzma_index *a, const lzma_index *b) while (ag != NULL && bg != NULL) { const size_t count = ag->last + 1; if (ag->last != bg->last - || memcmp(ag->total_sums, - bg->total_sums, + || memcmp(ag->unpadded_sums, + bg->unpadded_sums, sizeof(lzma_vli) * count) != 0 || memcmp(ag->uncompressed_sums, bg->uncompressed_sums, |