aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common/block_header_decoder.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-06-18 18:02:10 +0300
committerLasse Collin <lasse.collin@tukaani.org>2008-06-18 18:02:10 +0300
commit7d17818cec8597f847b0a2537fde991bbc3d9e96 (patch)
tree9c41502e3eb96f103fe98e13456b382fbba7a292 /src/liblzma/common/block_header_decoder.c
parentUpdate the file format specification draft. The new one is (diff)
downloadxz-7d17818cec8597f847b0a2537fde991bbc3d9e96.tar.xz
Update the code to mostly match the new simpler file format
specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided.
Diffstat (limited to 'src/liblzma/common/block_header_decoder.c')
-rw-r--r--src/liblzma/common/block_header_decoder.c400
1 files changed, 79 insertions, 321 deletions
diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c
index 7676c795..b9e072e0 100644
--- a/src/liblzma/common/block_header_decoder.c
+++ b/src/liblzma/common/block_header_decoder.c
@@ -21,353 +21,111 @@
#include "check.h"
-struct lzma_coder_s {
- lzma_options_block *options;
-
- enum {
- SEQ_FLAGS_1,
- SEQ_FLAGS_2,
- SEQ_COMPRESSED_SIZE,
- SEQ_UNCOMPRESSED_SIZE,
- SEQ_FILTER_FLAGS_INIT,
- SEQ_FILTER_FLAGS_DECODE,
- SEQ_CRC32,
- SEQ_PADDING
- } sequence;
-
- /// Position in variable-length integers
- size_t pos;
-
- /// CRC32 of the Block Header
- uint32_t crc32;
-
- lzma_next_coder filter_flags_decoder;
-};
-
-
-static bool
-update_sequence(lzma_coder *coder)
+static void
+free_properties(lzma_options_block *options, lzma_allocator *allocator)
{
- switch (coder->sequence) {
- case SEQ_FLAGS_2:
- if (coder->options->compressed_size
- != LZMA_VLI_VALUE_UNKNOWN) {
- coder->pos = 0;
- coder->sequence = SEQ_COMPRESSED_SIZE;
- break;
- }
-
- // Fall through
-
- case SEQ_COMPRESSED_SIZE:
- if (coder->options->uncompressed_size
- != LZMA_VLI_VALUE_UNKNOWN) {
- coder->pos = 0;
- coder->sequence = SEQ_UNCOMPRESSED_SIZE;
- break;
- }
-
- // Fall through
-
- case SEQ_UNCOMPRESSED_SIZE:
- coder->pos = 0;
-
- // Fall through
-
- case SEQ_FILTER_FLAGS_DECODE:
- if (coder->options->filters[coder->pos].id
- != LZMA_VLI_VALUE_UNKNOWN) {
- coder->sequence = SEQ_FILTER_FLAGS_INIT;
- break;
- }
-
- if (coder->options->has_crc32) {
- coder->pos = 0;
- coder->sequence = SEQ_CRC32;
- break;
- }
-
- case SEQ_CRC32:
- if (coder->options->padding != 0) {
- coder->pos = 0;
- coder->sequence = SEQ_PADDING;
- break;
- }
-
- return true;
-
- default:
- assert(0);
- return true;
+ // Free allocated filter options. The last array member is not
+ // touched after the initialization in the beginning of
+ // lzma_block_header_decode(), so we don't need to touch that here.
+ for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) {
+ lzma_free(options->filters[i].options, allocator);
+ options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN;
+ options->filters[i].options = NULL;
}
- return false;
+ return;
}
-static lzma_ret
-block_header_decode(lzma_coder *coder, lzma_allocator *allocator,
- const uint8_t *restrict in, size_t *restrict in_pos,
- size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
- size_t *restrict out_pos lzma_attribute((unused)),
- size_t out_size lzma_attribute((unused)),
- lzma_action action lzma_attribute((unused)))
+extern LZMA_API lzma_ret
+lzma_block_header_decode(lzma_options_block *options,
+ lzma_allocator *allocator, const uint8_t *in)
{
- while (*in_pos < in_size)
- switch (coder->sequence) {
- case SEQ_FLAGS_1:
- // Check that the reserved bit is unset. Use HEADER_ERROR
- // because newer version of liblzma may support the reserved
- // bit, although it is likely that this is just a broken file.
- if (in[*in_pos] & 0x40)
- return LZMA_HEADER_ERROR;
-
- // Number of filters: we prepare appropriate amount of
- // variables for variable-length integer parsing. The
- // initialization function has already reset the rest
- // of the values to LZMA_VLI_VALUE_UNKNOWN, which allows
- // us to later know how many filters there are.
- for (int i = (int)(in[*in_pos] & 0x07) - 1; i >= 0; --i)
- coder->options->filters[i].id = 0;
-
- // End of Payload Marker flag
- coder->options->has_eopm = (in[*in_pos] & 0x08) != 0;
-
- // Compressed Size: Prepare for variable-length integer
- // parsing if it is known.
- if (in[*in_pos] & 0x10)
- coder->options->compressed_size = 0;
-
- // Uncompressed Size: the same.
- if (in[*in_pos] & 0x20)
- coder->options->uncompressed_size = 0;
-
- // Is Metadata Block flag
- coder->options->is_metadata = (in[*in_pos] & 0x80) != 0;
-
- // We need at least one: Uncompressed Size or EOPM.
- if (coder->options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN
- && !coder->options->has_eopm)
- return LZMA_DATA_ERROR;
-
- // Update header CRC32.
- coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32);
-
- ++*in_pos;
- coder->sequence = SEQ_FLAGS_2;
- break;
-
- case SEQ_FLAGS_2:
- // Check that the reserved bits are unset.
- if (in[*in_pos] & 0xE0)
- return LZMA_DATA_ERROR;
-
- // Get the size of Header Padding.
- coder->options->padding = in[*in_pos] & 0x1F;
-
- coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32);
-
- ++*in_pos;
-
- if (update_sequence(coder))
- return LZMA_STREAM_END;
-
- break;
-
- case SEQ_COMPRESSED_SIZE: {
- // Store the old input position to be used when
- // updating coder->header_crc32.
- const size_t in_start = *in_pos;
-
- const lzma_ret ret = lzma_vli_decode(
- &coder->options->compressed_size,
- &coder->pos, in, in_pos, in_size);
-
- const size_t in_used = *in_pos - in_start;
-
- coder->options->compressed_reserve += in_used;
- assert(coder->options->compressed_reserve
- <= LZMA_VLI_BYTES_MAX);
-
- coder->options->header_size += in_used;
-
- coder->crc32 = lzma_crc32(in + in_start, in_used,
- coder->crc32);
-
- if (ret != LZMA_STREAM_END)
- return ret;
-
- if (update_sequence(coder))
- return LZMA_STREAM_END;
-
- break;
- }
-
- case SEQ_UNCOMPRESSED_SIZE: {
- const size_t in_start = *in_pos;
-
- const lzma_ret ret = lzma_vli_decode(
- &coder->options->uncompressed_size,
- &coder->pos, in, in_pos, in_size);
-
- const size_t in_used = *in_pos - in_start;
-
- coder->options->uncompressed_reserve += in_used;
- assert(coder->options->uncompressed_reserve
- <= LZMA_VLI_BYTES_MAX);
-
- coder->options->header_size += in_used;
-
- coder->crc32 = lzma_crc32(in + in_start, in_used,
- coder->crc32);
-
- if (ret != LZMA_STREAM_END)
- return ret;
-
- if (update_sequence(coder))
- return LZMA_STREAM_END;
-
- break;
- }
-
- case SEQ_FILTER_FLAGS_INIT: {
- assert(coder->options->filters[coder->pos].id
- != LZMA_VLI_VALUE_UNKNOWN);
-
- const lzma_ret ret = lzma_filter_flags_decoder_init(
- &coder->filter_flags_decoder, allocator,
- &coder->options->filters[coder->pos]);
- if (ret != LZMA_OK)
- return ret;
-
- coder->sequence = SEQ_FILTER_FLAGS_DECODE;
+ // NOTE: We consider the header to be corrupt not only when the
+ // CRC32 doesn't match, but also when variable-length integers
+ // are invalid or not over 63 bits, or if the header is too small
+ // to contain the claimed information.
+
+ // Initialize the filter options array. This way the caller can
+ // safely free() the options even if an error occurs in this function.
+ for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) {
+ options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN;
+ options->filters[i].options = NULL;
}
- // Fall through
-
- case SEQ_FILTER_FLAGS_DECODE: {
- const size_t in_start = *in_pos;
+ size_t in_size = options->header_size;
- const lzma_ret ret = coder->filter_flags_decoder.code(
- coder->filter_flags_decoder.coder,
- allocator, in, in_pos, in_size,
- NULL, NULL, 0, LZMA_RUN);
-
- const size_t in_used = *in_pos - in_start;
- coder->options->header_size += in_used;
- coder->crc32 = lzma_crc32(in + in_start,
- in_used, coder->crc32);
+ // Validate. The caller must have set options->header_size with
+ // lzma_block_header_size_decode() macro, so it is a programming error
+ // if these tests fail.
+ if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN
+ || in_size > LZMA_BLOCK_HEADER_SIZE_MAX
+ || (in_size & 3)
+ || lzma_block_header_size_decode(in[0]) != in_size)
+ return LZMA_PROG_ERROR;
- if (ret != LZMA_STREAM_END)
- return ret;
+ // Exclude the CRC32 field.
+ in_size -= 4;
- ++coder->pos;
+ // Verify CRC32
+ if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size))
+ return LZMA_DATA_ERROR;
- if (update_sequence(coder))
- return LZMA_STREAM_END;
+ // Check for unsupported flags.
+ if (in[1] & 0x3C)
+ return LZMA_HEADER_ERROR;
- break;
- }
+ // Start after the Block Header Size and Block Flags fields.
+ size_t in_pos = 2;
- case SEQ_CRC32:
- assert(coder->options->has_crc32);
+ // Compressed Size
+ if (in[1] & 0x40) {
+ return_if_error(lzma_vli_decode(&options->compressed_size,
+ NULL, in, &in_pos, in_size));
- if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF))
+ if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1)
return LZMA_DATA_ERROR;
- ++*in_pos;
- ++coder->pos;
-
- // Check if we reached end of the CRC32 field.
- if (coder->pos == 4) {
- coder->options->header_size += 4;
-
- if (update_sequence(coder))
- return LZMA_STREAM_END;
- }
-
- break;
+ options->compressed_size = (options->compressed_size + 1) * 4;
- case SEQ_PADDING:
- if (in[*in_pos] != 0x00)
+ // Check that Total Size (that is, size of
+ // Block Header + Compressed Data + Check) is
+ // representable as a VLI.
+ if (lzma_block_total_size_get(options) == 0)
return LZMA_DATA_ERROR;
-
- ++*in_pos;
- ++coder->options->header_size;
- ++coder->pos;
-
- if (coder->pos < (size_t)(coder->options->padding))
- break;
-
- return LZMA_STREAM_END;
-
- default:
- return LZMA_PROG_ERROR;
+ } else {
+ options->compressed_size = LZMA_VLI_VALUE_UNKNOWN;
}
- return LZMA_OK;
-}
-
-
-static void
-block_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
-{
- lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
- lzma_free(coder, allocator);
- return;
-}
-
-
-extern lzma_ret
-lzma_block_header_decoder_init(lzma_next_coder *next,
- lzma_allocator *allocator, lzma_options_block *options)
-{
- if (next->coder == NULL) {
- next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
- if (next->coder == NULL)
- return LZMA_MEM_ERROR;
-
- next->code = &block_header_decode;
- next->end = &block_header_decoder_end;
- next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
+ // Uncompressed Size
+ if (in[1] & 0x80)
+ return_if_error(lzma_vli_decode(&options->uncompressed_size,
+ NULL, in, &in_pos, in_size));
+ else
+ options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
+
+ // Filter Flags
+ const size_t filter_count = (in[1] & 3) + 1;
+ for (size_t i = 0; i < filter_count; ++i) {
+ const lzma_ret ret = lzma_filter_flags_decode(
+ &options->filters[i], allocator,
+ in, &in_pos, in_size);
+ if (ret != LZMA_OK) {
+ free_properties(options, allocator);
+ return ret;
+ }
}
- // Assume that Compressed Size and Uncompressed Size are unknown.
- options->compressed_size = LZMA_VLI_VALUE_UNKNOWN;
- options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
-
- // We will calculate the sizes of these fields too so that the
- // application may rewrite the header if it wishes so.
- options->compressed_reserve = 0;
- options->uncompressed_reserve = 0;
+ // Padding
+ while (in_pos < in_size) {
+ if (in[in_pos++] != 0x00) {
+ free_properties(options, allocator);
- // The Block Flags field is always present, so include its size here
- // and we don't need to worry about it in block_header_decode().
- options->header_size = 2;
-
- // Reset filters[] to indicate empty list of filters.
- // See SEQ_FLAGS_1 in block_header_decode() for reasoning of this.
- for (size_t i = 0; i < 8; ++i) {
- options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN;
- options->filters[i].options = NULL;
+ // Possibly some new field present so use
+ // LZMA_HEADER_ERROR instead of LZMA_DATA_ERROR.
+ return LZMA_HEADER_ERROR;
+ }
}
- next->coder->options = options;
- next->coder->sequence = SEQ_FLAGS_1;
- next->coder->pos = 0;
- next->coder->crc32 = 0;
-
- return LZMA_OK;
-}
-
-
-extern LZMA_API lzma_ret
-lzma_block_header_decoder(lzma_stream *strm,
- lzma_options_block *options)
-{
- lzma_next_strm_init(strm, lzma_block_header_decoder_init, options);
-
- strm->internal->supported_actions[LZMA_RUN] = true;
-
return LZMA_OK;
}