diff options
Diffstat (limited to 'src/liblzma/common/stream_decoder.c')
-rw-r--r-- | src/liblzma/common/stream_decoder.c | 458 |
1 files changed, 138 insertions, 320 deletions
diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 56de3d9f..1bf7f1f8 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -18,281 +18,148 @@ /////////////////////////////////////////////////////////////////////////////// #include "stream_common.h" +#include "stream_decoder.h" #include "check.h" #include "stream_flags_decoder.h" #include "block_decoder.h" -#include "metadata_decoder.h" struct lzma_coder_s { enum { - SEQ_STREAM_HEADER_CODE, - SEQ_BLOCK_HEADER_INIT, - SEQ_BLOCK_HEADER_CODE, - SEQ_METADATA_CODE, - SEQ_DATA_CODE, - SEQ_STREAM_TAIL_INIT, - SEQ_STREAM_TAIL_CODE, + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, } sequence; - /// Position in variable-length integers and in some other things. - size_t pos; - /// Block or Metadata decoder. This takes little memory and the same /// data structure can be used to decode every Block Header, so it's /// a good idea to have a separate lzma_next_coder structure for it. lzma_next_coder block_decoder; - /// Block Header decoder; this is separate - lzma_next_coder block_header_decoder; - + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. lzma_options_block block_options; - /// Information about the sizes of the Blocks - lzma_info *info; - - /// Current Block in *info - lzma_info_iter iter; - - /// Number of bytes not yet processed from Data Blocks in the Stream. - /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is - /// decremented while decoding and verified to match the reality. - lzma_vli total_left; - - /// Like uncompressed_left above but for uncompressed data from - /// Data Blocks. - lzma_vli uncompressed_left; - /// Stream Flags from Stream Header - lzma_stream_flags header_flags; - - /// Stream Flags from Stream tail - lzma_stream_flags tail_flags; + lzma_stream_flags stream_flags; - /// Decoder for Stream Header and Stream tail. This takes very - /// little memory and the same data structure can be used for - /// both Header and tail, so it's a good idea to have a separate - /// lzma_next_coder structure for it. - lzma_next_coder flags_decoder; + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; - /// Temporary destination for the decoded Metadata. - lzma_metadata metadata; + /// Write position in buffer[] + size_t buffer_pos; - /// Pointer to application-supplied pointer where to store the list - /// of Extra Records from the Header Metadata Block. - lzma_extra **header_extra; - - /// Same as above but Footer Metadata Block - lzma_extra **footer_extra; + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; static lzma_ret -metadata_init(lzma_coder *coder, lzma_allocator *allocator) -{ - assert(coder->metadata.index == NULL); - assert(coder->metadata.extra == NULL); - - // Single-Block Streams don't have Metadata Blocks. - if (!coder->header_flags.is_multi) - return LZMA_DATA_ERROR; - - coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN; - - // Limit the Uncompressed Size of a Metadata Block. This is to - // prevent security issues where input file would have very huge - // Metadata. - // - // FIXME: Hardcoded constant is ugly. Maybe we should provide - // some way to specify this from the application. - coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23; - - lzma_info_size size_type; - bool want_extra; - - // If we haven't decoded any Data Blocks yet, this is Header - // Metadata Block. - if (lzma_info_index_count_get(coder->info) == 0) { - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - size_type = LZMA_INFO_HEADER_METADATA; - want_extra = coder->header_extra != NULL; - } else { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - size_type = LZMA_INFO_FOOTER_METADATA; - want_extra = coder->footer_extra != NULL; - } - - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.total_size = lzma_info_size_get( - coder->info, size_type); - - coder->sequence = SEQ_METADATA_CODE; - - return lzma_metadata_decoder_init(&coder->block_decoder, allocator, - &coder->block_options, &coder->metadata, want_extra); -} - - -static lzma_ret -data_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(lzma_info_iter_set( - &coder->iter, LZMA_VLI_VALUE_UNKNOWN, - coder->block_options.uncompressed_size)); - - coder->block_options.total_size = coder->iter.total_size; - coder->block_options.uncompressed_size = coder->iter.uncompressed_size; - coder->block_options.total_limit = coder->total_left; - coder->block_options.uncompressed_limit = coder->uncompressed_left; - - if (coder->header_flags.is_multi) { - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - } else { - coder->block_options.has_uncompressed_size_in_footer - = coder->iter.uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - } - - coder->sequence = SEQ_DATA_CODE; - - return lzma_block_decoder_init(&coder->block_decoder, allocator, - &coder->block_options); -} - - -static lzma_ret stream_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_DATA_CODE)) + || coder->sequence == SEQ_BLOCK)) switch (coder->sequence) { - case SEQ_STREAM_HEADER_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->buffer_pos = 0; + + // Decode the Stream Header. + return_if_error(lzma_stream_header_decode( + &coder->stream_flags, coder->buffer)); - coder->sequence = SEQ_BLOCK_HEADER_INIT; + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_UNSUPPORTED_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; // Detect if the Check type is supported and give appropriate // warning if it isn't. We don't warn every time a new Block // is started. - lzma_check tmp; - if (lzma_check_init(&tmp, coder->header_flags.check)) + if (!lzma_available_checks[coder->block_options.check]) return LZMA_UNSUPPORTED_CHECK; break; } - case SEQ_BLOCK_HEADER_INIT: { - coder->block_options.check = coder->header_flags.check; - coder->block_options.has_crc32 = coder->header_flags.has_crc32; + case SEQ_BLOCK_HEADER: { + if (coder->buffer_pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } - for (size_t i = 0; - i < ARRAY_SIZE(coder->block_options.filters); - ++i) { - lzma_free(coder->block_options.filters[i].options, - allocator); - coder->block_options.filters[i].options = NULL; + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); } - return_if_error(lzma_block_header_decoder_init( - &coder->block_header_decoder, allocator, - &coder->block_options)); - - coder->sequence = SEQ_BLOCK_HEADER_CODE; - } - - // Fall through - - case SEQ_BLOCK_HEADER_CODE: { - lzma_ret ret = coder->block_header_decoder.code( - coder->block_header_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the Block Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + coder->block_options.header_size); - if (coder->block_options.is_metadata) - ret = metadata_init(coder, allocator); - else - ret = data_init(coder, allocator); - - if (ret != LZMA_OK) - return ret; - - break; - } + // Return if we didn't get the whole Block Header yet. + if (coder->buffer_pos < coder->block_options.header_size) + return LZMA_OK; - case SEQ_METADATA_CODE: { - lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + coder->buffer_pos = 0; - const bool is_header_metadata = lzma_info_index_count_get( - coder->info) == 0; + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_options_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + coder->block_options.filters = filters; - if (is_header_metadata) { - if (coder->header_extra != NULL) { - *coder->header_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, - coder->block_options.total_size) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_BLOCK_HEADER_INIT; - } else { - if (coder->footer_extra != NULL) { - *coder->footer_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Initialize the Block decoder. + const lzma_ret ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, &coder->block_options); - coder->sequence = SEQ_STREAM_TAIL_INIT; - } + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); - assert(coder->metadata.extra == NULL); + coder->block_options.filters = NULL; - ret = lzma_info_metadata_set(coder->info, allocator, - &coder->metadata, is_header_metadata, true); - if (ret != LZMA_OK) + // Check if Block enocoder initialization succeeded. Don't + // warn about unsupported check anymore since we did it + // earlier if it was needed. + if (ret != LZMA_OK && ret != LZMA_UNSUPPORTED_CHECK) return ret; - // Intialize coder->total_size and coder->uncompressed_size - // from Header Metadata. - if (is_header_metadata) { - coder->total_left = lzma_info_size_get( - coder->info, LZMA_INFO_TOTAL); - coder->uncompressed_left = lzma_info_size_get( - coder->info, LZMA_INFO_UNCOMPRESSED); - } - + coder->sequence = SEQ_BLOCK; break; } - case SEQ_DATA_CODE: { + case SEQ_BLOCK: { lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, @@ -301,62 +168,59 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - ret = lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size); - if (ret != LZMA_OK) - return ret; - - // These won't overflow since lzma_info_iter_set() succeeded. - if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN) - coder->total_left -= coder->block_options.total_size; - if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_left -= coder->block_options - .uncompressed_size; + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_total_size_get( + &coder->block_options), + coder->block_options.uncompressed_size)); - if (!coder->header_flags.is_multi) { - ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_STREAM_TAIL_INIT; - break; - } - - coder->sequence = SEQ_BLOCK_HEADER_INIT; + coder->sequence = SEQ_BLOCK_HEADER; break; } - case SEQ_STREAM_TAIL_INIT: { - lzma_ret ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - ret = lzma_stream_tail_decoder_init(&coder->flags_decoder, - allocator, &coder->tail_flags); - if (ret != LZMA_OK) + case SEQ_INDEX: { + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) return ret; - coder->sequence = SEQ_STREAM_TAIL_CODE; + coder->sequence = SEQ_STREAM_FOOTER; + break; } - // Fall through + case SEQ_STREAM_FOOTER: + // Copy the Stream Footer to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); - case SEQ_STREAM_TAIL_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + // Return if we didn't get the whole Stream Footer yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; - if (!lzma_stream_flags_is_equal( - coder->header_flags, coder->tail_flags)) + // Decode the Stream Footer. + lzma_stream_flags footer_flags; + return_if_error(lzma_stream_footer_decode( + &footer_flags, coder->buffer)); + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + if (!lzma_stream_flags_equal(&coder->stream_flags, + &footer_flags)) return LZMA_DATA_ERROR; return LZMA_STREAM_END; - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -367,23 +231,15 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, static void stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { - for (size_t i = 0; i < ARRAY_SIZE(coder->block_options.filters); ++i) - lzma_free(coder->block_options.filters[i].options, allocator); - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_next_coder_end(&coder->block_header_decoder, allocator); - lzma_next_coder_end(&coder->flags_decoder, allocator); - lzma_info_free(coder->info, allocator); - lzma_index_free(coder->metadata.index, allocator); - lzma_extra_free(coder->metadata.extra, allocator); + lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_ret -stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -394,73 +250,35 @@ stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->end = &stream_decoder_end; next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - next->coder->flags_decoder = LZMA_NEXT_CODER_INIT; - next->coder->metadata.index = NULL; - next->coder->metadata.extra = NULL; - } else { - for (size_t i = 0; i < ARRAY_SIZE( - next->coder->block_options.filters); ++i) - lzma_free(next->coder->block_options - .filters[i].options, allocator); - - lzma_index_free(next->coder->metadata.index, allocator); - next->coder->metadata.index = NULL; - - lzma_extra_free(next->coder->metadata.extra, allocator); - next->coder->metadata.extra = NULL; + next->coder->index_hash = NULL; } - for (size_t i = 0; i < ARRAY_SIZE(next->coder->block_options.filters); - ++i) - next->coder->block_options.filters[i].options = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) + // Initialize the Index hash used to verify the Index. + next->coder->index_hash = lzma_index_hash_init( + next->coder->index_hash, allocator); + if (next->coder->index_hash == NULL) return LZMA_MEM_ERROR; - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - // Initialize Stream Header decoder. - return_if_error(lzma_stream_header_decoder_init( - &next->coder->flags_decoder, allocator, - &next->coder->header_flags)); - - // Reset the *foo_extra pointers to NULL. This way the caller knows - // if there were no Extra Records. (We don't support appending - // Records to Extra list.) - if (header != NULL) - *header = NULL; - if (footer != NULL) - *footer = NULL; - - // Reset some variables. - next->coder->sequence = SEQ_STREAM_HEADER_CODE; - next->coder->pos = 0; - next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->header_extra = header; - next->coder->footer_extra = footer; + // Reset the rest of the variables. + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.filters = NULL; + next->coder->buffer_pos = 0; return LZMA_OK; } extern lzma_ret -lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - lzma_next_coder_init( - stream_decoder_init, next, allocator, header, footer); + lzma_next_coder_init0(stream_decoder_init, next, allocator); } extern LZMA_API lzma_ret -lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, stream_decoder_init, header, footer); + lzma_next_strm_init0(strm, stream_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; |