aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common/stream_decoder.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-06-18 18:02:10 +0300
committerLasse Collin <lasse.collin@tukaani.org>2008-06-18 18:02:10 +0300
commit7d17818cec8597f847b0a2537fde991bbc3d9e96 (patch)
tree9c41502e3eb96f103fe98e13456b382fbba7a292 /src/liblzma/common/stream_decoder.c
parentUpdate the file format specification draft. The new one is (diff)
downloadxz-7d17818cec8597f847b0a2537fde991bbc3d9e96.tar.xz
Update the code to mostly match the new simpler file format
specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided.
Diffstat (limited to '')
-rw-r--r--src/liblzma/common/stream_decoder.c458
1 files changed, 138 insertions, 320 deletions
diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c
index 56de3d9f..1bf7f1f8 100644
--- a/src/liblzma/common/stream_decoder.c
+++ b/src/liblzma/common/stream_decoder.c
@@ -18,281 +18,148 @@
///////////////////////////////////////////////////////////////////////////////
#include "stream_common.h"
+#include "stream_decoder.h"
#include "check.h"
#include "stream_flags_decoder.h"
#include "block_decoder.h"
-#include "metadata_decoder.h"
struct lzma_coder_s {
enum {
- SEQ_STREAM_HEADER_CODE,
- SEQ_BLOCK_HEADER_INIT,
- SEQ_BLOCK_HEADER_CODE,
- SEQ_METADATA_CODE,
- SEQ_DATA_CODE,
- SEQ_STREAM_TAIL_INIT,
- SEQ_STREAM_TAIL_CODE,
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK_HEADER,
+ SEQ_BLOCK,
+ SEQ_INDEX,
+ SEQ_STREAM_FOOTER,
} sequence;
- /// Position in variable-length integers and in some other things.
- size_t pos;
-
/// Block or Metadata decoder. This takes little memory and the same
/// data structure can be used to decode every Block Header, so it's
/// a good idea to have a separate lzma_next_coder structure for it.
lzma_next_coder block_decoder;
- /// Block Header decoder; this is separate
- lzma_next_coder block_header_decoder;
-
+ /// Block options decoded by the Block Header decoder and used by
+ /// the Block decoder.
lzma_options_block block_options;
- /// Information about the sizes of the Blocks
- lzma_info *info;
-
- /// Current Block in *info
- lzma_info_iter iter;
-
- /// Number of bytes not yet processed from Data Blocks in the Stream.
- /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is
- /// decremented while decoding and verified to match the reality.
- lzma_vli total_left;
-
- /// Like uncompressed_left above but for uncompressed data from
- /// Data Blocks.
- lzma_vli uncompressed_left;
-
/// Stream Flags from Stream Header
- lzma_stream_flags header_flags;
-
- /// Stream Flags from Stream tail
- lzma_stream_flags tail_flags;
+ lzma_stream_flags stream_flags;
- /// Decoder for Stream Header and Stream tail. This takes very
- /// little memory and the same data structure can be used for
- /// both Header and tail, so it's a good idea to have a separate
- /// lzma_next_coder structure for it.
- lzma_next_coder flags_decoder;
+ /// Index is hashed so that it can be compared to the sizes of Blocks
+ /// with O(1) memory usage.
+ lzma_index_hash *index_hash;
- /// Temporary destination for the decoded Metadata.
- lzma_metadata metadata;
+ /// Write position in buffer[]
+ size_t buffer_pos;
- /// Pointer to application-supplied pointer where to store the list
- /// of Extra Records from the Header Metadata Block.
- lzma_extra **header_extra;
-
- /// Same as above but Footer Metadata Block
- lzma_extra **footer_extra;
+ /// Buffer to hold Stream Header, Block Header, and Stream Footer.
+ /// Block Header has biggest maximum size.
+ uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
};
static lzma_ret
-metadata_init(lzma_coder *coder, lzma_allocator *allocator)
-{
- assert(coder->metadata.index == NULL);
- assert(coder->metadata.extra == NULL);
-
- // Single-Block Streams don't have Metadata Blocks.
- if (!coder->header_flags.is_multi)
- return LZMA_DATA_ERROR;
-
- coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN;
-
- // Limit the Uncompressed Size of a Metadata Block. This is to
- // prevent security issues where input file would have very huge
- // Metadata.
- //
- // FIXME: Hardcoded constant is ugly. Maybe we should provide
- // some way to specify this from the application.
- coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23;
-
- lzma_info_size size_type;
- bool want_extra;
-
- // If we haven't decoded any Data Blocks yet, this is Header
- // Metadata Block.
- if (lzma_info_index_count_get(coder->info) == 0) {
- coder->block_options.has_backward_size = false;
- coder->block_options.handle_padding = true;
- size_type = LZMA_INFO_HEADER_METADATA;
- want_extra = coder->header_extra != NULL;
- } else {
- if (lzma_info_index_finish(coder->info))
- return LZMA_DATA_ERROR;
-
- coder->block_options.has_backward_size = true;
- coder->block_options.handle_padding = false;
- size_type = LZMA_INFO_FOOTER_METADATA;
- want_extra = coder->footer_extra != NULL;
- }
-
- coder->block_options.has_uncompressed_size_in_footer = false;
- coder->block_options.total_size = lzma_info_size_get(
- coder->info, size_type);
-
- coder->sequence = SEQ_METADATA_CODE;
-
- return lzma_metadata_decoder_init(&coder->block_decoder, allocator,
- &coder->block_options, &coder->metadata, want_extra);
-}
-
-
-static lzma_ret
-data_init(lzma_coder *coder, lzma_allocator *allocator)
-{
- return_if_error(lzma_info_iter_next(&coder->iter, allocator));
-
- return_if_error(lzma_info_iter_set(
- &coder->iter, LZMA_VLI_VALUE_UNKNOWN,
- coder->block_options.uncompressed_size));
-
- coder->block_options.total_size = coder->iter.total_size;
- coder->block_options.uncompressed_size = coder->iter.uncompressed_size;
- coder->block_options.total_limit = coder->total_left;
- coder->block_options.uncompressed_limit = coder->uncompressed_left;
-
- if (coder->header_flags.is_multi) {
- coder->block_options.has_uncompressed_size_in_footer = false;
- coder->block_options.has_backward_size = false;
- coder->block_options.handle_padding = true;
- } else {
- coder->block_options.has_uncompressed_size_in_footer
- = coder->iter.uncompressed_size
- == LZMA_VLI_VALUE_UNKNOWN;
- coder->block_options.has_backward_size = true;
- coder->block_options.handle_padding = false;
- }
-
- coder->sequence = SEQ_DATA_CODE;
-
- return lzma_block_decoder_init(&coder->block_decoder, allocator,
- &coder->block_options);
-}
-
-
-static lzma_ret
stream_decode(lzma_coder *coder, lzma_allocator *allocator,
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size, lzma_action action)
{
+ // When decoding the actual Block, it may be able to produce more
+ // output even if we don't give it any new input.
while (*out_pos < out_size && (*in_pos < in_size
- || coder->sequence == SEQ_DATA_CODE))
+ || coder->sequence == SEQ_BLOCK))
switch (coder->sequence) {
- case SEQ_STREAM_HEADER_CODE: {
- const lzma_ret ret = coder->flags_decoder.code(
- coder->flags_decoder.coder,
- allocator, in, in_pos, in_size,
- NULL, NULL, 0, LZMA_RUN);
- if (ret != LZMA_STREAM_END)
- return ret;
+ case SEQ_STREAM_HEADER: {
+ // Copy the Stream Header to the internal buffer.
+ bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos,
+ LZMA_STREAM_HEADER_SIZE);
+
+ // Return if we didn't get the whole Stream Header yet.
+ if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->buffer_pos = 0;
+
+ // Decode the Stream Header.
+ return_if_error(lzma_stream_header_decode(
+ &coder->stream_flags, coder->buffer));
- coder->sequence = SEQ_BLOCK_HEADER_INIT;
+ // Copy the type of the Check so that Block Header and Block
+ // decoders see it.
+ coder->block_options.check = coder->stream_flags.check;
+
+ // Even if we return LZMA_UNSUPPORTED_CHECK below, we want
+ // to continue from Block Header decoding.
+ coder->sequence = SEQ_BLOCK_HEADER;
// Detect if the Check type is supported and give appropriate
// warning if it isn't. We don't warn every time a new Block
// is started.
- lzma_check tmp;
- if (lzma_check_init(&tmp, coder->header_flags.check))
+ if (!lzma_available_checks[coder->block_options.check])
return LZMA_UNSUPPORTED_CHECK;
break;
}
- case SEQ_BLOCK_HEADER_INIT: {
- coder->block_options.check = coder->header_flags.check;
- coder->block_options.has_crc32 = coder->header_flags.has_crc32;
+ case SEQ_BLOCK_HEADER: {
+ if (coder->buffer_pos == 0) {
+ // Detect if it's Index.
+ if (in[*in_pos] == 0x00) {
+ coder->sequence = SEQ_INDEX;
+ break;
+ }
- for (size_t i = 0;
- i < ARRAY_SIZE(coder->block_options.filters);
- ++i) {
- lzma_free(coder->block_options.filters[i].options,
- allocator);
- coder->block_options.filters[i].options = NULL;
+ // Calculate the size of the Block Header. Note that
+ // Block Header decoder wants to see this byte too
+ // so don't advance *in_pos.
+ coder->block_options.header_size
+ = lzma_block_header_size_decode(
+ in[*in_pos]);
}
- return_if_error(lzma_block_header_decoder_init(
- &coder->block_header_decoder, allocator,
- &coder->block_options));
-
- coder->sequence = SEQ_BLOCK_HEADER_CODE;
- }
-
- // Fall through
-
- case SEQ_BLOCK_HEADER_CODE: {
- lzma_ret ret = coder->block_header_decoder.code(
- coder->block_header_decoder.coder,
- allocator, in, in_pos, in_size,
- NULL, NULL, 0, LZMA_RUN);
-
- if (ret != LZMA_STREAM_END)
- return ret;
+ // Copy the Block Header to the internal buffer.
+ bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos,
+ coder->block_options.header_size);
- if (coder->block_options.is_metadata)
- ret = metadata_init(coder, allocator);
- else
- ret = data_init(coder, allocator);
-
- if (ret != LZMA_OK)
- return ret;
-
- break;
- }
+ // Return if we didn't get the whole Block Header yet.
+ if (coder->buffer_pos < coder->block_options.header_size)
+ return LZMA_OK;
- case SEQ_METADATA_CODE: {
- lzma_ret ret = coder->block_decoder.code(
- coder->block_decoder.coder, allocator,
- in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
- if (ret != LZMA_STREAM_END)
- return ret;
+ coder->buffer_pos = 0;
- const bool is_header_metadata = lzma_info_index_count_get(
- coder->info) == 0;
+ // Set up a buffer to hold the filter chain. Block Header
+ // decoder will initialize all members of this array so
+ // we don't need to do it here.
+ lzma_options_filter filters[LZMA_BLOCK_FILTERS_MAX + 1];
+ coder->block_options.filters = filters;
- if (is_header_metadata) {
- if (coder->header_extra != NULL) {
- *coder->header_extra = coder->metadata.extra;
- coder->metadata.extra = NULL;
- }
+ // Decode the Block Header.
+ return_if_error(lzma_block_header_decode(&coder->block_options,
+ allocator, coder->buffer));
- if (lzma_info_size_set(coder->info,
- LZMA_INFO_HEADER_METADATA,
- coder->block_options.total_size)
- != LZMA_OK)
- return LZMA_PROG_ERROR;
-
- coder->sequence = SEQ_BLOCK_HEADER_INIT;
- } else {
- if (coder->footer_extra != NULL) {
- *coder->footer_extra = coder->metadata.extra;
- coder->metadata.extra = NULL;
- }
+ // Initialize the Block decoder.
+ const lzma_ret ret = lzma_block_decoder_init(
+ &coder->block_decoder,
+ allocator, &coder->block_options);
- coder->sequence = SEQ_STREAM_TAIL_INIT;
- }
+ // Free the allocated filter options since they are needed
+ // only to initialize the Block decoder.
+ for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i)
+ lzma_free(filters[i].options, allocator);
- assert(coder->metadata.extra == NULL);
+ coder->block_options.filters = NULL;
- ret = lzma_info_metadata_set(coder->info, allocator,
- &coder->metadata, is_header_metadata, true);
- if (ret != LZMA_OK)
+ // Check if Block enocoder initialization succeeded. Don't
+ // warn about unsupported check anymore since we did it
+ // earlier if it was needed.
+ if (ret != LZMA_OK && ret != LZMA_UNSUPPORTED_CHECK)
return ret;
- // Intialize coder->total_size and coder->uncompressed_size
- // from Header Metadata.
- if (is_header_metadata) {
- coder->total_left = lzma_info_size_get(
- coder->info, LZMA_INFO_TOTAL);
- coder->uncompressed_left = lzma_info_size_get(
- coder->info, LZMA_INFO_UNCOMPRESSED);
- }
-
+ coder->sequence = SEQ_BLOCK;
break;
}
- case SEQ_DATA_CODE: {
+ case SEQ_BLOCK: {
lzma_ret ret = coder->block_decoder.code(
coder->block_decoder.coder, allocator,
in, in_pos, in_size, out, out_pos, out_size,
@@ -301,62 +168,59 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
if (ret != LZMA_STREAM_END)
return ret;
- ret = lzma_info_iter_set(&coder->iter,
- coder->block_options.total_size,
- coder->block_options.uncompressed_size);
- if (ret != LZMA_OK)
- return ret;
-
- // These won't overflow since lzma_info_iter_set() succeeded.
- if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN)
- coder->total_left -= coder->block_options.total_size;
- if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN)
- coder->uncompressed_left -= coder->block_options
- .uncompressed_size;
+ // Block decoded successfully. Add the new size pair to
+ // the Index hash.
+ return_if_error(lzma_index_hash_append(coder->index_hash,
+ lzma_block_total_size_get(
+ &coder->block_options),
+ coder->block_options.uncompressed_size));
- if (!coder->header_flags.is_multi) {
- ret = lzma_info_index_finish(coder->info);
- if (ret != LZMA_OK)
- return ret;
-
- coder->sequence = SEQ_STREAM_TAIL_INIT;
- break;
- }
-
- coder->sequence = SEQ_BLOCK_HEADER_INIT;
+ coder->sequence = SEQ_BLOCK_HEADER;
break;
}
- case SEQ_STREAM_TAIL_INIT: {
- lzma_ret ret = lzma_info_index_finish(coder->info);
- if (ret != LZMA_OK)
- return ret;
-
- ret = lzma_stream_tail_decoder_init(&coder->flags_decoder,
- allocator, &coder->tail_flags);
- if (ret != LZMA_OK)
+ case SEQ_INDEX: {
+ // Decode the Index and compare it to the hash calculated
+ // from the sizes of the Blocks (if any).
+ const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
+ in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
return ret;
- coder->sequence = SEQ_STREAM_TAIL_CODE;
+ coder->sequence = SEQ_STREAM_FOOTER;
+ break;
}
- // Fall through
+ case SEQ_STREAM_FOOTER:
+ // Copy the Stream Footer to the internal buffer.
+ bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos,
+ LZMA_STREAM_HEADER_SIZE);
- case SEQ_STREAM_TAIL_CODE: {
- const lzma_ret ret = coder->flags_decoder.code(
- coder->flags_decoder.coder, allocator,
- in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
- if (ret != LZMA_STREAM_END)
- return ret;
+ // Return if we didn't get the whole Stream Footer yet.
+ if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
- if (!lzma_stream_flags_is_equal(
- coder->header_flags, coder->tail_flags))
+ // Decode the Stream Footer.
+ lzma_stream_flags footer_flags;
+ return_if_error(lzma_stream_footer_decode(
+ &footer_flags, coder->buffer));
+
+ // Check that Index Size stored in the Stream Footer matches
+ // the real size of the Index field.
+ if (lzma_index_hash_size(coder->index_hash)
+ != footer_flags.backward_size)
+ return LZMA_DATA_ERROR;
+
+ // Compare that the Stream Flags fields are identical in
+ // both Stream Header and Stream Footer.
+ if (!lzma_stream_flags_equal(&coder->stream_flags,
+ &footer_flags))
return LZMA_DATA_ERROR;
return LZMA_STREAM_END;
- }
default:
+ assert(0);
return LZMA_PROG_ERROR;
}
@@ -367,23 +231,15 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
static void
stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
- for (size_t i = 0; i < ARRAY_SIZE(coder->block_options.filters); ++i)
- lzma_free(coder->block_options.filters[i].options, allocator);
-
lzma_next_coder_end(&coder->block_decoder, allocator);
- lzma_next_coder_end(&coder->block_header_decoder, allocator);
- lzma_next_coder_end(&coder->flags_decoder, allocator);
- lzma_info_free(coder->info, allocator);
- lzma_index_free(coder->metadata.index, allocator);
- lzma_extra_free(coder->metadata.extra, allocator);
+ lzma_index_hash_end(coder->index_hash, allocator);
lzma_free(coder, allocator);
return;
}
static lzma_ret
-stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
- lzma_extra **header, lzma_extra **footer)
+stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator)
{
if (next->coder == NULL) {
next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
@@ -394,73 +250,35 @@ stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
next->end = &stream_decoder_end;
next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
- next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT;
- next->coder->info = NULL;
- next->coder->flags_decoder = LZMA_NEXT_CODER_INIT;
- next->coder->metadata.index = NULL;
- next->coder->metadata.extra = NULL;
- } else {
- for (size_t i = 0; i < ARRAY_SIZE(
- next->coder->block_options.filters); ++i)
- lzma_free(next->coder->block_options
- .filters[i].options, allocator);
-
- lzma_index_free(next->coder->metadata.index, allocator);
- next->coder->metadata.index = NULL;
-
- lzma_extra_free(next->coder->metadata.extra, allocator);
- next->coder->metadata.extra = NULL;
+ next->coder->index_hash = NULL;
}
- for (size_t i = 0; i < ARRAY_SIZE(next->coder->block_options.filters);
- ++i)
- next->coder->block_options.filters[i].options = NULL;
-
- next->coder->info = lzma_info_init(next->coder->info, allocator);
- if (next->coder->info == NULL)
+ // Initialize the Index hash used to verify the Index.
+ next->coder->index_hash = lzma_index_hash_init(
+ next->coder->index_hash, allocator);
+ if (next->coder->index_hash == NULL)
return LZMA_MEM_ERROR;
- lzma_info_iter_begin(next->coder->info, &next->coder->iter);
-
- // Initialize Stream Header decoder.
- return_if_error(lzma_stream_header_decoder_init(
- &next->coder->flags_decoder, allocator,
- &next->coder->header_flags));
-
- // Reset the *foo_extra pointers to NULL. This way the caller knows
- // if there were no Extra Records. (We don't support appending
- // Records to Extra list.)
- if (header != NULL)
- *header = NULL;
- if (footer != NULL)
- *footer = NULL;
-
- // Reset some variables.
- next->coder->sequence = SEQ_STREAM_HEADER_CODE;
- next->coder->pos = 0;
- next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN;
- next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN;
- next->coder->header_extra = header;
- next->coder->footer_extra = footer;
+ // Reset the rest of the variables.
+ next->coder->sequence = SEQ_STREAM_HEADER;
+ next->coder->block_options.filters = NULL;
+ next->coder->buffer_pos = 0;
return LZMA_OK;
}
extern lzma_ret
-lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
- lzma_extra **header, lzma_extra **footer)
+lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator)
{
- lzma_next_coder_init(
- stream_decoder_init, next, allocator, header, footer);
+ lzma_next_coder_init0(stream_decoder_init, next, allocator);
}
extern LZMA_API lzma_ret
-lzma_stream_decoder(lzma_stream *strm,
- lzma_extra **header, lzma_extra **footer)
+lzma_stream_decoder(lzma_stream *strm)
{
- lzma_next_strm_init(strm, stream_decoder_init, header, footer);
+ lzma_next_strm_init0(strm, stream_decoder_init);
strm->internal->supported_actions[LZMA_RUN] = true;
strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;