diff options
Diffstat (limited to '')
54 files changed, 2944 insertions, 5315 deletions
diff --git a/src/liblzma/common/Makefile.am b/src/liblzma/common/Makefile.am index c76ce14f..40b42250 100644 --- a/src/liblzma/common/Makefile.am +++ b/src/liblzma/common/Makefile.am @@ -25,26 +25,20 @@ libcommon_la_SOURCES = \ common.h \ bsr.h \ allocator.c \ + block_util.c \ block_private.h \ - extra.c \ features.c \ index.c \ - info.c \ init.c \ memory_limiter.c \ memory_usage.c \ next_coder.c \ raw_common.c \ raw_common.h \ + stream_flags_equal.c \ code.c \ version.c -if COND_FILTER_COPY -libcommon_la_SOURCES += \ - copy_coder.c \ - copy_coder.h -endif - if COND_FILTER_DELTA libcommon_la_SOURCES += \ delta_common.c \ @@ -69,21 +63,17 @@ libcommon_la_SOURCES += \ block_encoder.c \ block_encoder.h \ block_header_encoder.c \ - easy_common.c \ - easy_common.h \ - easy_single.c \ - easy_multi.c \ + easy.c \ filter_flags_encoder.c \ + index_encoder.c \ + index_encoder.h \ init_encoder.c \ - metadata_encoder.c \ - metadata_encoder.h \ raw_encoder.c \ raw_encoder.h \ stream_common.c \ stream_common.h \ - stream_encoder_single.c \ - stream_encoder_multi.c \ - stream_encoder_multi.h \ + stream_encoder.c \ + stream_encoder.h \ stream_flags_encoder.c \ vli_encoder.c endif @@ -96,14 +86,13 @@ libcommon_la_SOURCES += \ block_decoder.h \ block_header_decoder.c \ filter_flags_decoder.c \ + index_decoder.c \ + index_hash.c \ init_decoder.c \ - metadata_decoder.c \ - metadata_decoder.h \ raw_decoder.c \ raw_decoder.h \ stream_decoder.c \ stream_flags_decoder.c \ stream_flags_decoder.h \ - vli_decoder.c \ - vli_reverse_decoder.c + vli_decoder.c endif diff --git a/src/liblzma/common/alignment.c b/src/liblzma/common/alignment.c index 2d468fe5..c80e5fab 100644 --- a/src/liblzma/common/alignment.c +++ b/src/liblzma/common/alignment.c @@ -25,7 +25,6 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) { for (size_t i = 0; filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // The same as the input, check the next filter. continue; @@ -69,9 +68,8 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) extern LZMA_API uint32_t lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) { - // Check if there is only an implicit Copy filter. if (filters[0].id == LZMA_VLI_VALUE_UNKNOWN) - return guess; + return UINT32_MAX; // Find the last filter in the chain. size_t i = 0; @@ -80,7 +78,6 @@ lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) do { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // It's the same as the input alignment, so // check the next filter. diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c index 91df5bf2..062f6fab 100644 --- a/src/liblzma/common/alone_decoder.c +++ b/src/liblzma/common/alone_decoder.c @@ -32,9 +32,15 @@ struct lzma_coder_s { SEQ_CODE, } sequence; + /// Position in the header fields size_t pos; - lzma_options_alone options; + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; }; @@ -50,34 +56,39 @@ alone_decode(lzma_coder *coder, && (coder->sequence == SEQ_CODE || *in_pos < in_size)) switch (coder->sequence) { case SEQ_PROPERTIES: - if (lzma_lzma_decode_properties( - &coder->options.lzma, in[*in_pos])) - return LZMA_DATA_ERROR; + if (lzma_lzma_decode_properties(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; coder->sequence = SEQ_DICTIONARY_SIZE; ++*in_pos; break; case SEQ_DICTIONARY_SIZE: - coder->options.lzma.dictionary_size + coder->options.dictionary_size |= (size_t)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 4) { - // A hack to ditch tons of false positives: We allow - // only dictionary sizes that are a power of two. - // LZMA_Alone didn't create other kinds of files, - // although it's not impossible that files with - // other dictionary sizes exist. Well, if someone - // complains, this will be reconsidered. - size_t count = 0; - for (size_t i = 0; i < 32; ++i) - if (coder->options.lzma.dictionary_size - & (UINT32_C(1) << i)) - ++count; - - if (count != 1 || coder->options.lzma.dictionary_size + if (coder->options.dictionary_size + < LZMA_DICTIONARY_SIZE_MIN + || coder->options.dictionary_size > LZMA_DICTIONARY_SIZE_MAX) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; + + // A hack to ditch tons of false positives: We allow + // only dictionary sizes that are 2^n or 2^n + 2^(n-1). + // LZMA_Alone created only files with 2^n, but accepts + // any dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dictionary_size) + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_UNCOMPRESSED_SIZE; @@ -87,7 +98,7 @@ alone_decode(lzma_coder *coder, break; case SEQ_UNCOMPRESSED_SIZE: - coder->options.uncompressed_size + coder->uncompressed_size |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 8) { @@ -95,11 +106,10 @@ alone_decode(lzma_coder *coder, // if the uncompressed size is known, it must be less // than 256 GiB. Again, if someone complains, this // will be reconsidered. - if (coder->options.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->options.uncompressed_size + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size >= (LZMA_VLI_C(1) << 38)) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_CODER_INIT; @@ -113,9 +123,7 @@ alone_decode(lzma_coder *coder, lzma_filter_info filters[2] = { { .init = &lzma_lzma_decoder_init, - .options = &coder->options.lzma, - .uncompressed_size = coder->options - .uncompressed_size, + .options = &coder->options, }, { .init = NULL, } @@ -126,6 +134,10 @@ alone_decode(lzma_coder *coder, if (ret != LZMA_OK) return ret; + // Use a hack to set the uncompressed size. + lzma_lzma_decoder_uncompressed_size(&coder->next, + coder->uncompressed_size); + coder->sequence = SEQ_CODE; } @@ -169,8 +181,8 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) next->coder->sequence = SEQ_PROPERTIES; next->coder->pos = 0; - next->coder->options.lzma.dictionary_size = 0; - next->coder->options.uncompressed_size = 0; + next->coder->options.dictionary_size = 0; + next->coder->uncompressed_size = 0; return LZMA_OK; } @@ -179,17 +191,14 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - // We need to use _init2 because we don't pass any varadic args. - lzma_next_coder_init2(next, allocator, alone_decoder_init, - alone_decoder_init, allocator); + lzma_next_coder_init0(alone_decoder_init, next, allocator); } extern LZMA_API lzma_ret lzma_alone_decoder(lzma_stream *strm) { - lzma_next_strm_init2(strm, alone_decoder_init, - alone_decoder_init, strm->allocator); + lzma_next_strm_init0(strm, alone_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c index 7629aa77..f94a21c1 100644 --- a/src/liblzma/common/alone_encoder.c +++ b/src/liblzma/common/alone_encoder.c @@ -21,19 +21,19 @@ #include "lzma_encoder.h" +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + struct lzma_coder_s { lzma_next_coder next; enum { - SEQ_PROPERTIES, - SEQ_DICTIONARY_SIZE, - SEQ_UNCOMPRESSED_SIZE, + SEQ_HEADER, SEQ_CODE, } sequence; - size_t pos; - - lzma_options_alone options; + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; }; @@ -47,47 +47,23 @@ alone_encode(lzma_coder *coder, { while (*out_pos < out_size) switch (coder->sequence) { - case SEQ_PROPERTIES: - if (lzma_lzma_encode_properties( - &coder->options.lzma, out + *out_pos)) { - return LZMA_PROG_ERROR; - } - - coder->sequence = SEQ_DICTIONARY_SIZE; - ++*out_pos; - break; - - case SEQ_DICTIONARY_SIZE: - out[*out_pos] = coder->options.lzma.dictionary_size - >> (coder->pos * 8); - - if (++coder->pos == 4) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - ++*out_pos; - break; - - case SEQ_UNCOMPRESSED_SIZE: - out[*out_pos] = coder->options.uncompressed_size - >> (coder->pos * 8); - - if (++coder->pos == 8) { - coder->pos = 0; - coder->sequence = SEQ_CODE; - } - - ++*out_pos; + case SEQ_HEADER: + bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; break; - case SEQ_CODE: { + case SEQ_CODE: return coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -107,7 +83,7 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) // At least for now, this is not used by any internal function. static lzma_ret alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_alone *options) + const lzma_options_lzma *options) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -119,23 +95,42 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the LZMA_Alone coder variables. - next->coder->sequence = SEQ_PROPERTIES; - next->coder->pos = 0; - next->coder->options = *options; + // Basic initializations + next->coder->sequence = SEQ_HEADER; + next->coder->header_pos = 0; - // Verify uncompressed_size since the other functions assume that - // it is valid. - if (!lzma_vli_is_valid(next->coder->options.uncompressed_size)) + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_encode_properties(options, next->coder->header)) return LZMA_PROG_ERROR; + // - Dictionary size (4 bytes) + if (options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || options->dictionary_size > LZMA_DICTIONARY_SIZE_MAX) + return LZMA_PROG_ERROR; + + // Round up to to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next. While the header would allow any 32-bit integer, + // we do this to keep the decoder of liblzma accepting the resulting + // files. + uint32_t d = options->dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + integer_write_32(next->coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(next->coder->header + 1 + 4, 0xFF, 8); + // Initialize the LZMA encoder. const lzma_filter_info filters[2] = { { .init = &lzma_lzma_encoder_init, - .options = &next->coder->options.lzma, - .uncompressed_size = next->coder->options - .uncompressed_size, + .options = (void *)(options), }, { .init = NULL, } @@ -156,7 +151,7 @@ lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_alone_encoder(lzma_stream *strm, const lzma_options_alone *options) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) { lzma_next_strm_init(strm, alone_encoder_init, options); diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c index 7e92df9a..765a27b1 100644 --- a/src/liblzma/common/auto_decoder.c +++ b/src/liblzma/common/auto_decoder.c @@ -17,15 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "stream_decoder.h" #include "alone_decoder.h" struct lzma_coder_s { lzma_next_coder next; - - lzma_extra **header; - lzma_extra **footer; bool initialized; }; @@ -43,8 +40,8 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator, lzma_ret ret; if (in[*in_pos] == 0xFF) - ret = lzma_stream_decoder_init(&coder->next, allocator, - coder->header, coder->footer); + ret = lzma_stream_decoder_init( + &coder->next, allocator); else ret = lzma_alone_decoder_init(&coder->next, allocator); @@ -69,8 +66,7 @@ auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -82,8 +78,6 @@ auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - next->coder->header = header; - next->coder->footer = footer; next->coder->initialized = false; return LZMA_OK; @@ -102,9 +96,9 @@ lzma_auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_auto_decoder(lzma_stream *strm, lzma_extra **header, lzma_extra **footer) +lzma_auto_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, auto_decoder_init, header, footer); + lzma_next_strm_init0(strm, auto_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index e1b5dc96..f07c4e06 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -26,129 +26,47 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, - SEQ_END, + SEQ_CHECK, } sequence; /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Decoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the encoding has been finished. lzma_options_block *options; - /// Position in variable-length integers (and in some other places). - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; + /// Position when reading the Check field + size_t check_pos; - /// Temporary location for the Uncompressed Size and Backward Size - /// fields in Block Footer. - lzma_vli tmp; - - /// Size of the Backward Size field - This is needed so that we - /// can verify the Backward Size and still keep updating total_size. - size_t size_of_backward_size; + /// Check of the uncompressed data + lzma_check check; }; static lzma_ret -update_sequence(lzma_coder *coder) -{ - switch (coder->sequence) { - case SEQ_CODE: - if (coder->options->check != LZMA_CHECK_NONE) { - lzma_check_finish(&coder->check, - coder->options->check); - coder->sequence = SEQ_CHECK; - break; - } - - // Fall through - - case SEQ_CHECK: - if (coder->options->has_uncompressed_size_in_footer) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_backward_size) { - coder->sequence = SEQ_BACKWARD_SIZE; - break; - } - - // Fall through - - case SEQ_BACKWARD_SIZE: - if (coder->options->handle_padding) { - coder->sequence = SEQ_PADDING; - break; - } - - case SEQ_PADDING: - if (!is_size_valid(coder->total_size, - coder->options->total_size) - || !is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret block_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - // Special case when the Block has only Block Header. - if (coder->sequence == SEQ_END) - return LZMA_STREAM_END; - - // FIXME: Termination condition should work but could be cleaner. - while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_CODE)) switch (coder->sequence) { case SEQ_CODE: { + if (*out_pos >= out_size) + return LZMA_OK; + const size_t in_start = *in_pos; const size_t out_start = *out_pos; @@ -159,13 +77,13 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, in_used, - coder->total_limit) - || update_size(&coder->compressed_size, - in_used, - coder->options->compressed_size) + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_VALUE_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) || update_size(&coder->uncompressed_size, - out_used, coder->uncompressed_limit)) + out_used, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; lzma_check_update(&coder->check, coder->options->check, @@ -174,116 +92,61 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - return_if_error(update_sequence(coder)); - - break; + coder->sequence = SEQ_PADDING; } - case SEQ_CHECK: - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - if (((coder->check.crc32 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + // Fall through - case LZMA_CHECK_CRC64: - if (((coder->check.crc64 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + case SEQ_PADDING: + // If Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; - case LZMA_CHECK_SHA256: - if (coder->check.sha256.buffer[coder->pos] - != in[*in_pos]) + if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - break; - - default: - assert(coder->options->check != LZMA_CHECK_NONE); - assert(coder->options->check <= LZMA_CHECK_ID_MAX); - break; - } - - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - ++*in_pos; - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - return_if_error(update_sequence(coder)); - coder->pos = 0; + if (update_size(&coder->compressed_size, 1, + coder->compressed_limit)) + return LZMA_DATA_ERROR; } - break; - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - if (update_size(&coder->total_size, *in_pos - in_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->tmp != coder->uncompressed_size) - return LZMA_DATA_ERROR; - - coder->pos = 0; - coder->tmp = 0; - - return_if_error(update_sequence(coder)); - - break; - } - - case SEQ_BACKWARD_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - if (update_size(&coder->total_size, in_used, - coder->total_limit)) + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->size_of_backward_size += in_used; - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->tmp != coder->total_size - - coder->size_of_backward_size) - return LZMA_DATA_ERROR; + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; - return_if_error(update_sequence(coder)); + lzma_check_finish(&coder->check, coder->options->check); + coder->sequence = SEQ_CHECK; - break; - } + // Fall through - case SEQ_PADDING: - if (in[*in_pos] == 0x00) { - if (update_size(&coder->total_size, 1, - coder->total_limit)) + case SEQ_CHECK: + while (*in_pos < in_size) { + if (in[(*in_pos)++] != coder->check.buffer[ + coder->check_pos]) return LZMA_DATA_ERROR; - ++*in_pos; - break; + if (++coder->check_pos == lzma_check_sizes[ + coder->options->check]) + return LZMA_STREAM_END; } - return update_sequence(coder); - - default: - return LZMA_PROG_ERROR; + return LZMA_OK; } - return LZMA_OK; + return LZMA_PROG_ERROR; } @@ -300,9 +163,12 @@ static lzma_ret block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // This is pretty similar to lzma_block_encoder_init(). - // See comments there. + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) + return LZMA_PROG_ERROR; + // Allocate and initialize *next->coder if needed. if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) @@ -313,40 +179,28 @@ block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - if (validate_options_1(options)) - return LZMA_PROG_ERROR; - - if (validate_options_2(options)) - return LZMA_DATA_ERROR; - - return_if_error(lzma_check_init(&next->coder->check, options->check)); - + // Basic initializations next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); - next->coder->tmp = 0; - next->coder->size_of_backward_size = 0; - - if (!options->has_eopm && options->uncompressed_size == 0) { - // The Compressed Data field is empty, thus we skip SEQ_CODE - // phase completely. - const lzma_ret ret = update_sequence(next->coder); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that Total Size of the Block still is a valid VLI and + // a multiple of four. + next->coder->compressed_limit + = options->compressed_size == LZMA_VLI_VALUE_UNKNOWN + ? (LZMA_VLI_VALUE_MAX & ~LZMA_VLI_C(3)) + - options->header_size + - lzma_check_sizes[options->check] + : options->compressed_size; + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); return lzma_raw_decoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 78185790..3add45a9 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -34,37 +34,21 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK_FINISH, - SEQ_CHECK_COPY, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, + SEQ_CHECK, } sequence; - /// Position in .header and .check. - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Position when writing out the Check field + size_t check_pos; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; - - /// Backward Size - This is a copy of total_size right before - /// the Backward Size field. - lzma_vli backward_size; + /// Check of the uncompressed data + lzma_check check; }; @@ -80,16 +64,16 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, if (coder->options->uncompressed_size - coder->uncompressed_size != (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } else { if (coder->options->uncompressed_size - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } } else if (LZMA_VLI_VALUE_MAX - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) { - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } // Main loop @@ -107,11 +91,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, out_used, - coder->total_limit) - || update_size(&coder->compressed_size, - out_used, - coder->options->compressed_size)) + // FIXME We must also check that Total Size doesn't get + // too big. + if (update_size(&coder->compressed_size, out_used, + coder->options->compressed_size)) return LZMA_DATA_ERROR; // No need to check for overflow because we have already @@ -125,141 +108,54 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; assert(*in_pos == in_size); + coder->sequence = SEQ_PADDING; + break; + } + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. + if (coder->compressed_size & 3) { + out[*out_pos] = 0x00; + ++*out_pos; + + if (update_size(&coder->compressed_size, 1, + coder->options->compressed_size)) + return LZMA_DATA_ERROR; + + break; + } // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values give to us. + // values. Verify that they match the values given to us. if (!is_size_valid(coder->compressed_size, coder->options->compressed_size) || !is_size_valid(coder->uncompressed_size, coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->sequence = SEQ_CHECK_FINISH; - break; - } + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - case SEQ_CHECK_FINISH: - if (coder->options->check == LZMA_CHECK_NONE) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); - coder->sequence = SEQ_CHECK_COPY; + coder->sequence = SEQ_CHECK; // Fall through - case SEQ_CHECK_COPY: - assert(lzma_check_sizes[coder->options->check] > 0); - - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - out[*out_pos] = coder->check.crc32 >> (coder->pos * 8); - break; - - case LZMA_CHECK_CRC64: - out[*out_pos] = coder->check.crc64 >> (coder->pos * 8); - break; - - case LZMA_CHECK_SHA256: - out[*out_pos] = coder->check.sha256.buffer[coder->pos]; - break; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - + case SEQ_CHECK: + out[*out_pos] = coder->check.buffer[coder->check_pos]; ++*out_pos; - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_uncompressed_size_in_footer) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->uncompressed_size, - &coder->pos, 1, - out, out_pos, out_size); - - // Updating the size this way instead of doing in a - // single chunk using lzma_vli_size(), because this - // way we detect when exactly we are going out of - // our limits. - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - coder->pos = 0; - } + if (++coder->check_pos + == lzma_check_sizes[coder->options->check]) + return LZMA_STREAM_END; - coder->backward_size = coder->total_size; - coder->sequence = SEQ_BACKWARD_SIZE; break; - case SEQ_BACKWARD_SIZE: - if (coder->options->has_backward_size) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->backward_size, &coder->pos, 1, - out, out_pos, out_size); - - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - } - - coder->sequence = SEQ_PADDING; - break; - - case SEQ_PADDING: - if (coder->options->handle_padding) { - assert(coder->options->total_size - != LZMA_VLI_VALUE_UNKNOWN); - - if (coder->total_size < coder->options->total_size) { - out[*out_pos] = 0x00; - ++*out_pos; - - if (update_size(&coder->total_size, 1, - coder->total_limit)) - return LZMA_DATA_ERROR; - - break; - } - } - - // Now also Total Size is known. Verify it. - if (!is_size_valid(coder->total_size, - coder->options->total_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - default: return LZMA_PROG_ERROR; } @@ -281,10 +177,9 @@ static lzma_ret block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // Validate some options. - if (validate_options_1(options) || validate_options_2(options) - || (options->handle_padding && options->total_size - == LZMA_VLI_VALUE_UNKNOWN)) + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -298,40 +193,19 @@ block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the check. - return_if_error(lzma_check_init(&next->coder->check, options->check)); - - // If End of Payload Marker is not used and Uncompressed Size is zero, - // Compressed Data is empty. That is, we don't call the encoder at all. - // We initialize it though; it allows detecting invalid options. - if (!options->has_eopm && options->uncompressed_size == 0) { - // Also Compressed Size must be zero if it has been - // given to us. - if (!is_size_valid(0, options->compressed_size)) - return LZMA_PROG_ERROR; - - next->coder->sequence = SEQ_CHECK_FINISH; - } else { - next->coder->sequence = SEQ_CODE; - } - - // Other initializations + // Basic initializations + next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); // Initialize the requested filters. return lzma_raw_encoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 7676c795..b9e072e0 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -21,353 +21,111 @@ #include "check.h" -struct lzma_coder_s { - lzma_options_block *options; - - enum { - SEQ_FLAGS_1, - SEQ_FLAGS_2, - SEQ_COMPRESSED_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_FILTER_FLAGS_INIT, - SEQ_FILTER_FLAGS_DECODE, - SEQ_CRC32, - SEQ_PADDING - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// CRC32 of the Block Header - uint32_t crc32; - - lzma_next_coder filter_flags_decoder; -}; - - -static bool -update_sequence(lzma_coder *coder) +static void +free_properties(lzma_options_block *options, lzma_allocator *allocator) { - switch (coder->sequence) { - case SEQ_FLAGS_2: - if (coder->options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_COMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_COMPRESSED_SIZE: - if (coder->options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - coder->pos = 0; - - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: - if (coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_FILTER_FLAGS_INIT; - break; - } - - if (coder->options->has_crc32) { - coder->pos = 0; - coder->sequence = SEQ_CRC32; - break; - } - - case SEQ_CRC32: - if (coder->options->padding != 0) { - coder->pos = 0; - coder->sequence = SEQ_PADDING; - break; - } - - return true; - - default: - assert(0); - return true; + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + lzma_free(options->filters[i].options, allocator); + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - return false; + return; } -static lzma_ret -block_header_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_block_header_decode(lzma_options_block *options, + lzma_allocator *allocator, const uint8_t *in) { - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FLAGS_1: - // Check that the reserved bit is unset. Use HEADER_ERROR - // because newer version of liblzma may support the reserved - // bit, although it is likely that this is just a broken file. - if (in[*in_pos] & 0x40) - return LZMA_HEADER_ERROR; - - // Number of filters: we prepare appropriate amount of - // variables for variable-length integer parsing. The - // initialization function has already reset the rest - // of the values to LZMA_VLI_VALUE_UNKNOWN, which allows - // us to later know how many filters there are. - for (int i = (int)(in[*in_pos] & 0x07) - 1; i >= 0; --i) - coder->options->filters[i].id = 0; - - // End of Payload Marker flag - coder->options->has_eopm = (in[*in_pos] & 0x08) != 0; - - // Compressed Size: Prepare for variable-length integer - // parsing if it is known. - if (in[*in_pos] & 0x10) - coder->options->compressed_size = 0; - - // Uncompressed Size: the same. - if (in[*in_pos] & 0x20) - coder->options->uncompressed_size = 0; - - // Is Metadata Block flag - coder->options->is_metadata = (in[*in_pos] & 0x80) != 0; - - // We need at least one: Uncompressed Size or EOPM. - if (coder->options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - && !coder->options->has_eopm) - return LZMA_DATA_ERROR; - - // Update header CRC32. - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - coder->sequence = SEQ_FLAGS_2; - break; - - case SEQ_FLAGS_2: - // Check that the reserved bits are unset. - if (in[*in_pos] & 0xE0) - return LZMA_DATA_ERROR; - - // Get the size of Header Padding. - coder->options->padding = in[*in_pos] & 0x1F; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - - case SEQ_COMPRESSED_SIZE: { - // Store the old input position to be used when - // updating coder->header_crc32. - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->compressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->compressed_reserve += in_used; - assert(coder->options->compressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->uncompressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->uncompressed_reserve += in_used; - assert(coder->options->uncompressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_FILTER_FLAGS_INIT: { - assert(coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN); - - const lzma_ret ret = lzma_filter_flags_decoder_init( - &coder->filter_flags_decoder, allocator, - &coder->options->filters[coder->pos]); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_FILTER_FLAGS_DECODE; + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or not over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: { - const size_t in_start = *in_pos; + size_t in_size = options->header_size; - const lzma_ret ret = coder->filter_flags_decoder.code( - coder->filter_flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - const size_t in_used = *in_pos - in_start; - coder->options->header_size += in_used; - coder->crc32 = lzma_crc32(in + in_start, - in_used, coder->crc32); + // Validate. The caller must have set options->header_size with + // lzma_block_header_size_decode() macro, so it is a programming error + // if these tests fail. + if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN + || in_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (in_size & 3) + || lzma_block_header_size_decode(in[0]) != in_size) + return LZMA_PROG_ERROR; - if (ret != LZMA_STREAM_END) - return ret; + // Exclude the CRC32 field. + in_size -= 4; - ++coder->pos; + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) + return LZMA_DATA_ERROR; - if (update_sequence(coder)) - return LZMA_STREAM_END; + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_HEADER_ERROR; - break; - } + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; - case SEQ_CRC32: - assert(coder->options->has_crc32); + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&options->compressed_size, + NULL, in, &in_pos, in_size)); - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1) return LZMA_DATA_ERROR; - ++*in_pos; - ++coder->pos; - - // Check if we reached end of the CRC32 field. - if (coder->pos == 4) { - coder->options->header_size += 4; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - } - - break; + options->compressed_size = (options->compressed_size + 1) * 4; - case SEQ_PADDING: - if (in[*in_pos] != 0x00) + // Check that Total Size (that is, size of + // Block Header + Compressed Data + Check) is + // representable as a VLI. + if (lzma_block_total_size_get(options) == 0) return LZMA_DATA_ERROR; - - ++*in_pos; - ++coder->options->header_size; - ++coder->pos; - - if (coder->pos < (size_t)(coder->options->padding)) - break; - - return LZMA_STREAM_END; - - default: - return LZMA_PROG_ERROR; + } else { + options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; } - return LZMA_OK; -} - - -static void -block_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->filter_flags_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &block_header_decode; - next->end = &block_header_decoder_end; - next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&options->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &options->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(options, allocator); + return ret; + } } - // Assume that Compressed Size and Uncompressed Size are unknown. - options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; - options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // We will calculate the sizes of these fields too so that the - // application may rewrite the header if it wishes so. - options->compressed_reserve = 0; - options->uncompressed_reserve = 0; + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(options, allocator); - // The Block Flags field is always present, so include its size here - // and we don't need to worry about it in block_header_decode(). - options->header_size = 2; - - // Reset filters[] to indicate empty list of filters. - // See SEQ_FLAGS_1 in block_header_decode() for reasoning of this. - for (size_t i = 0; i < 8; ++i) { - options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; - options->filters[i].options = NULL; + // Possibly some new field present so use + // LZMA_HEADER_ERROR instead of LZMA_DATA_ERROR. + return LZMA_HEADER_ERROR; + } } - next->coder->options = options; - next->coder->sequence = SEQ_FLAGS_1; - next->coder->pos = 0; - next->coder->crc32 = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_block_header_decoder(lzma_stream *strm, - lzma_options_block *options) -{ - lzma_next_strm_init(strm, lzma_block_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - return LZMA_OK; } diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 594b4fc0..ed0c88ba 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -24,188 +24,129 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_options_block *options) { - // Block Flags take two bytes. - size_t size = 2; + // Block Header Size + Block Flags + CRC32. + size_t size = 1 + 1 + 4; // Compressed Size - if (!lzma_vli_is_valid(options->compressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->compressed_reserve != 0) { - // Make sure that the known Compressed Size fits into the - // reserved space. Note that lzma_vli_size() will return zero - // if options->compressed_size is LZMA_VLI_VALUE_UNKNOWN, so - // we don't need to handle that special case separately. - if (options->compressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->compressed_size) - > (size_t)(options->compressed_reserve)) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1 + || options->compressed_size == 0 + || (options->compressed_size & 3)) return LZMA_PROG_ERROR; - size += options->compressed_reserve; - - } else if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Compressed Size is known. We have already checked - // that is is a valid VLI, and since it isn't - // LZMA_VLI_VALUE_UNKNOWN, we can be sure that - // lzma_vli_size() will succeed. - size += lzma_vli_size(options->compressed_size); + size += lzma_vli_size(options->compressed_size / 4 - 1); } // Uncompressed Size - if (!lzma_vli_is_valid(options->uncompressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->uncompressed_reserve != 0) { - if (options->uncompressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->uncompressed_size) - > (size_t)(options->uncompressed_reserve)) + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + const size_t add = lzma_vli_size(options->uncompressed_size); + if (add == 0) return LZMA_PROG_ERROR; - size += options->uncompressed_reserve; - - } else if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - size += lzma_vli_size(options->uncompressed_size); + size += add; } // List of Filter Flags + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_PROG_ERROR; + for (size_t i = 0; options->filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 7) + if (i == 4) return LZMA_PROG_ERROR; - uint32_t tmp; - const lzma_ret ret = lzma_filter_flags_size(&tmp, - options->filters + i); - if (ret != LZMA_OK) - return ret; + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + options->filters + i)); - size += tmp; + size += add; } - // CRC32 - if (options->has_crc32) - size += 4; - - // Padding - int32_t padding; - if (options->padding == LZMA_BLOCK_HEADER_PADDING_AUTO) { - const uint32_t preferred = lzma_alignment_output( - options->filters, 1); - const uint32_t unaligned = size + options->alignment; - padding = (int32_t)(unaligned % preferred); - if (padding != 0) - padding = preferred - padding; - } else if (options->padding >= LZMA_BLOCK_HEADER_PADDING_MIN - && options->padding <= LZMA_BLOCK_HEADER_PADDING_MAX) { - padding = options->padding; - } else { - return LZMA_PROG_ERROR; - } + // Pad to a multiple of four bytes. + options->header_size = (size + 3) & ~(size_t)(3); - // All success. Copy the calculated values to the options structure. - options->padding = padding; - options->header_size = size + (size_t)(padding); + // NOTE: We don't verify that Total Size of the Block stays within + // limits. This is because it is possible that we are called with + // exaggerated values to reserve space for Block Header, and later + // called again with lower, real values. return LZMA_OK; } extern LZMA_API lzma_ret -lzma_block_header_encode(uint8_t *out, const lzma_options_block *options) +lzma_block_header_encode(const lzma_options_block *options, uint8_t *out) { - // We write the Block Flags later. - if (options->header_size < 2) + if ((options->header_size & 3) + || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) return LZMA_PROG_ERROR; - const size_t out_size = options->header_size; + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = options->header_size - 4; + + // Store the Block Header Size. + out[0] = out_size / 4; + + // We write Block Flags a little later. size_t out_pos = 2; // Compressed Size - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) { - const lzma_vli size = options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->compressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->compressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Compressed Size must be non-zero, fit into a 63-bit + // integer and be a multiple of four. Also the Total Size + // of the Block must fit into 63-bit integer. + if (options->compressed_size == 0 + || (options->compressed_size & 3) + || options->compressed_size + > LZMA_VLI_VALUE_MAX + || lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; + return_if_error(lzma_vli_encode( + options->compressed_size / 4 - 1, NULL, + out, &out_pos, out_size)); } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) { - const lzma_vli size = options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->uncompressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->uncompressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + return_if_error(lzma_vli_encode( + options->uncompressed_size, NULL, + out, &out_pos, out_size)); // Filter Flags - size_t filter_count; - for (filter_count = 0; options->filters[filter_count].id - != LZMA_VLI_VALUE_UNKNOWN; ++filter_count) { - // There can be at maximum of seven filters. - if (filter_count == 7) - return LZMA_PROG_ERROR; - - const lzma_ret ret = lzma_filter_flags_encode(out, &out_pos, - out_size, options->filters + filter_count); - // FIXME: Don't return LZMA_BUF_ERROR. - if (ret != LZMA_OK) - return ret; - } - - // Block Flags 1 - out[0] = filter_count; - - if (options->has_eopm) - out[0] |= 0x08; - else if (options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) return LZMA_PROG_ERROR; - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) - out[0] |= 0x10; - - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) - out[0] |= 0x20; + size_t filter_count = 0; + do { + // There can be at maximum of four filters. + if (filter_count == 4) + return LZMA_PROG_ERROR; - if (options->is_metadata) - out[0] |= 0x80; + return_if_error(lzma_filter_flags_encode(out, &out_pos, + out_size, options->filters + filter_count)); - // Block Flags 2 - if (options->padding < LZMA_BLOCK_HEADER_PADDING_MIN - || options->padding > LZMA_BLOCK_HEADER_PADDING_MAX) - return LZMA_PROG_ERROR; + } while (options->filters[++filter_count].id + != LZMA_VLI_VALUE_UNKNOWN); - out[1] = (uint8_t)(options->padding); + // Block Flags + out[1] = filter_count - 1; - // CRC32 - if (options->has_crc32) { - if (out_size - out_pos < 4) - return LZMA_PROG_ERROR; + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x40; - const uint32_t crc = lzma_crc32(out, out_pos, 0); - for (size_t i = 0; i < 4; ++i) - out[out_pos++] = crc >> (i * 8); - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x80; - // Padding - the amount of available space must now match with - // the size of the Padding field. - if (out_size - out_pos != (size_t)(options->padding)) - return LZMA_PROG_ERROR; + // Padding + memzero(out + out_pos, out_size - out_pos); - memzero(out + out_pos, (size_t)(options->padding)); + // CRC32 + integer_write_32(out + out_size, lzma_crc32(out, out_size, 0)); return LZMA_OK; } diff --git a/src/liblzma/common/block_private.h b/src/liblzma/common/block_private.h index 16d95b9f..235e96b8 100644 --- a/src/liblzma/common/block_private.h +++ b/src/liblzma/common/block_private.h @@ -22,6 +22,7 @@ #include "common.h" + static inline bool update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) { @@ -43,54 +44,4 @@ is_size_valid(lzma_vli size, lzma_vli reference) return reference == LZMA_VLI_VALUE_UNKNOWN || reference == size; } - -/// If any of these tests fail, the caller has to return LZMA_PROG_ERROR. -static inline bool -validate_options_1(const lzma_options_block *options) -{ - return options == NULL - || !lzma_vli_is_valid(options->compressed_size) - || !lzma_vli_is_valid(options->uncompressed_size) - || !lzma_vli_is_valid(options->total_size) - || !lzma_vli_is_valid(options->total_limit) - || !lzma_vli_is_valid(options->uncompressed_limit); -} - - -/// If any of these tests fail, the encoder has to return LZMA_PROG_ERROR -/// because something is going horribly wrong if such values get passed -/// to the encoder. In contrast, the decoder has to return LZMA_DATA_ERROR, -/// since these tests failing indicate that something is wrong in the Stream. -static inline bool -validate_options_2(const lzma_options_block *options) -{ - if ((options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - && options->uncompressed_size - > options->uncompressed_limit) - || (options->total_size != LZMA_VLI_VALUE_UNKNOWN - && options->total_size - > options->total_limit) - || (!options->has_eopm && options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - || options->header_size > options->total_size) - return true; - - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Calculate a rough minimum possible valid Total Size of - // this Block, and check that total_size and total_limit - // are big enough. Note that the real minimum size can be - // bigger due to the Check, Uncompressed Size, Backwards - // Size, pr Padding being present. A rough check here is - // enough for us to catch the most obvious errors as early - // as possible. - const lzma_vli total_min = options->compressed_size - + (lzma_vli)(options->header_size); - if (total_min > options->total_size - || total_min > options->total_limit) - return true; - } - - return false; -} - #endif diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c new file mode 100644 index 00000000..6bffc2f1 --- /dev/null +++ b/src/liblzma/common/block_util.c @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_options_block +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_block_total_size_set(lzma_options_block *options, lzma_vli total_size) +{ + // Validate. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX + || (total_size & 3)) + return LZMA_PROG_ERROR; + + const uint32_t container_size = options->header_size + + lzma_check_sizes[options->check]; + + // Validate that Compressed Size will be greater than zero. + if (container_size <= total_size) + return LZMA_DATA_ERROR; + + options->compressed_size = total_size - container_size; + + return LZMA_OK; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size_get(const lzma_options_block *options) +{ + // Validate the values that we are interested in. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // Total Size either. + if (options->compressed_size == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_VLI_VALUE_UNKNOWN; + + const lzma_vli total_size = options->compressed_size + + options->header_size + + lzma_check_sizes[options->check]; + + // Validate the calculated Total Size. + if (options->compressed_size > LZMA_VLI_VALUE_MAX + || (options->compressed_size & 3) + || total_size > LZMA_VLI_VALUE_MAX) + return 0; + + return total_size; +} diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 5dd7a87f..4f30427d 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -21,6 +21,7 @@ #define LZMA_COMMON_H #include "../../common/sysdefs.h" +#include "../../common/integer.h" // Don't use ifdef... #if HAVE_VISIBILITY @@ -30,6 +31,17 @@ #endif +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + /// Size of temporary buffers needed in some filters #define LZMA_BUFFER_SIZE 4096 @@ -117,10 +129,6 @@ struct lzma_filter_info_s { /// Pointer to filter's options structure void *options; - - /// Uncompressed size of the filter, or LZMA_VLI_VALUE_UNKNOWN - /// if unknown. - lzma_vli uncompressed_size; }; @@ -158,20 +166,6 @@ extern void lzma_next_coder_end(lzma_next_coder *next, lzma_allocator *allocator); -extern lzma_ret lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options); - -extern lzma_ret lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options); - -extern lzma_ret lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); - -extern lzma_ret lzma_stream_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer); - - /// \brief Wrapper for memcpy() /// /// This function copies as much data as possible from in[] to out[] and @@ -225,6 +219,13 @@ do { \ lzma_next_coder_init2(next, allocator, \ func, func, allocator, __VA_ARGS__) +/// \brief Initializing lzma_next_coder +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_coder_init0(func, next, allocator) \ + lzma_next_coder_init2(next, allocator, func, func, allocator) + /// \brief Initializing lzma_stream /// @@ -254,6 +255,13 @@ do { \ #define lzma_next_strm_init(strm, func, ...) \ lzma_next_strm_init2(strm, func, func, (strm)->allocator, __VA_ARGS__) +/// \brief Initializing lzma_stream +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_strm_init0(strm, func) \ + lzma_next_strm_init2(strm, func, func, (strm)->allocator) + /// \brief Return if expression doesn't evaluate to LZMA_OK /// diff --git a/src/liblzma/common/copy_coder.c b/src/liblzma/common/copy_coder.c deleted file mode 100644 index 0bd674f6..00000000 --- a/src/liblzma/common/copy_coder.c +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.c -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "copy_coder.h" - - -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - -#ifdef HAVE_ENCODER -static lzma_ret -copy_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // If we aren't the last filter in the chain, the Copy filter - // is totally useless. Note that it is job of the next coder to - // take care of Uncompressed Size, so we don't need to update our - // coder->uncompressed_size at all. - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - // We are the last coder in the chain. - // Just copy as much data as possible. - bufcpy(in, in_pos, in_size, out, out_pos, out_size); - - // LZMA_SYNC_FLUSH and LZMA_FINISH are the same thing for us. - if (action != LZMA_RUN && *in_pos == in_size) - return LZMA_STREAM_END; - - return LZMA_OK; -} -#endif - - -#ifdef HAVE_DECODER -static lzma_ret -copy_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - - const size_t in_avail = in_size - *in_pos; - - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_avail) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - // We are the last coder in the chain. - // Just copy as much data as possible. - const size_t in_used = bufcpy( - in, in_pos, in_size, out, out_pos, out_size); - - // Update uncompressed_size if it is known. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= in_used; - - return coder->uncompressed_size == 0 ? LZMA_STREAM_END : LZMA_OK; -} -#endif - - -static void -copy_coder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -copy_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters, lzma_code_function encode) -{ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = encode; - next->end = ©_coder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - // Copy Uncompressed Size which is used to limit the output size. - next->coder->uncompressed_size = filters[0].uncompressed_size; - - // Initialize the next decoder in the chain, if any. - return lzma_next_filter_init( - &next->coder->next, allocator, filters + 1); -} - - -#ifdef HAVE_ENCODER -extern lzma_ret -lzma_copy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_encode); -} -#endif - - -#ifdef HAVE_DECODER -extern lzma_ret -lzma_copy_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_decode); -} -#endif diff --git a/src/liblzma/common/copy_coder.h b/src/liblzma/common/copy_coder.h deleted file mode 100644 index b8d0295d..00000000 --- a/src/liblzma/common/copy_coder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.h -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_COPY_CODER_H -#define LZMA_COPY_CODER_H - -#include "common.h" - -extern lzma_ret lzma_copy_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -extern lzma_ret lzma_copy_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -#endif diff --git a/src/liblzma/common/delta_common.c b/src/liblzma/common/delta_common.c index de27b5a6..acd31e14 100644 --- a/src/liblzma/common/delta_common.c +++ b/src/liblzma/common/delta_common.c @@ -47,10 +47,6 @@ lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // Coding function is different for encoder and decoder. next->code = code; - // Copy Uncompressed Size which is used to limit the output size - // in the Delta decoder. - next->coder->uncompressed_size = filters[0].uncompressed_size; - // Set the delta distance. if (filters[0].options == NULL) return LZMA_PROG_ERROR; diff --git a/src/liblzma/common/delta_common.h b/src/liblzma/common/delta_common.h index 3ec955b7..1d58899d 100644 --- a/src/liblzma/common/delta_common.h +++ b/src/liblzma/common/delta_common.h @@ -26,10 +26,6 @@ struct lzma_coder_s { /// Next coder in the chain lzma_next_coder next; - /// Uncompressed size - This is needed when we are the last - /// filter in the chain. - lzma_vli uncompressed_size; - /// Delta distance size_t distance; diff --git a/src/liblzma/common/delta_decoder.c b/src/liblzma/common/delta_decoder.c index af2b840d..8f5a4cbf 100644 --- a/src/liblzma/common/delta_decoder.c +++ b/src/liblzma/common/delta_decoder.c @@ -21,26 +21,8 @@ #include "delta_common.h" -/// Copies and decodes the data at the same time. This is used when Delta -/// is the last filter in the chain. static void -copy_and_decode(lzma_coder *coder, - const uint8_t *restrict in, uint8_t *restrict out, size_t size) -{ - const size_t distance = coder->distance; - - for (size_t i = 0; i < size; ++i) { - out[i] = in[i] + coder->history[ - (distance + coder->pos) & 0xFF]; - coder->history[coder->pos-- & 0xFF] = out[i]; - } -} - - -/// Decodes the data in place. This is used when we are not the last filter -/// in the chain. -static void -decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) { const size_t distance = coder->distance; @@ -51,44 +33,21 @@ decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) } - static lzma_ret delta_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - lzma_ret ret; - - if (coder->next.code == NULL) { - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_size - *in_pos) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - const size_t in_avail = in_size - *in_pos; - const size_t out_avail = out_size - *out_pos; - const size_t size = MIN(in_avail, out_avail); - - copy_and_decode(coder, in + *in_pos, out + *out_pos, size); + assert(coder->next.code != NULL); - *in_pos += size; - *out_pos += size; + const size_t out_start = *out_pos; - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - coder->uncompressed_size -= size; + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); - ret = coder->uncompressed_size == 0 - ? LZMA_STREAM_END : LZMA_OK; - - } else { - const size_t out_start = *out_pos; - - ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - decode_in_place(coder, out + out_start, *out_pos - out_start); - } + decode_buffer(coder, out + out_start, *out_pos - out_start); return ret; } diff --git a/src/liblzma/common/delta_encoder.c b/src/liblzma/common/delta_encoder.c index b94f92de..a8bb9341 100644 --- a/src/liblzma/common/delta_encoder.c +++ b/src/liblzma/common/delta_encoder.c @@ -22,7 +22,8 @@ /// Copies and encodes the data at the same time. This is used when Delta -/// is the last filter in the chain. +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). static void copy_and_encode(lzma_coder *coder, const uint8_t *restrict in, uint8_t *restrict out, size_t size) @@ -38,8 +39,8 @@ copy_and_encode(lzma_coder *coder, } -/// Encodes the data in place. This is used when we are not the last filter -/// in the chain. +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). static void encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) { diff --git a/src/liblzma/common/easy_multi.c b/src/liblzma/common/easy.c index 15778fab..6c258204 100644 --- a/src/liblzma/common/easy_multi.c +++ b/src/liblzma/common/easy.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_multi.c -/// \brief Easy Multi-Block Stream encoder initialization +/// \file easy.c +/// \brief Easy Stream encoder initialization // // Copyright (C) 2008 Lasse Collin // @@ -17,23 +17,50 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "easy_common.h" -#include "stream_encoder_multi.h" +#include "stream_encoder.h" struct lzma_coder_s { - lzma_next_coder encoder; - lzma_options_stream options; + lzma_next_coder stream_encoder; + + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_options_filter filters[5]; }; +static bool +easy_set_filters(lzma_options_filter *filters, uint32_t level) +{ + bool error = false; + + if (level == 0) { + // TODO FIXME Use Subblock or LZMA2 with no compression. + error = true; + +#ifdef HAVE_FILTER_LZMA + } else if (level <= 9) { + filters[0].id = LZMA_FILTER_LZMA; + filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); + filters[1].id = LZMA_VLI_VALUE_UNKNOWN; +#endif + + } else { + error = true; + } + + return error; +} + + static lzma_ret easy_encode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - return coder->encoder.code(coder->encoder.coder, allocator, + return coder->stream_encoder.code( + coder->stream_encoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); } @@ -41,7 +68,7 @@ easy_encode(lzma_coder *coder, lzma_allocator *allocator, static void easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) { - lzma_next_coder_end(&coder->encoder, allocator); + lzma_next_coder_end(&coder->stream_encoder, allocator); lzma_free(coder, allocator); return; } @@ -49,8 +76,7 @@ easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) + lzma_easy_level level) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -60,39 +86,21 @@ easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->code = &easy_encode; next->end = &easy_encoder_end; - next->coder->encoder = LZMA_NEXT_CODER_INIT; + next->coder->stream_encoder = LZMA_NEXT_CODER_INIT; } - next->coder->options = (lzma_options_stream){ - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - .header = header, - .footer = footer, - }; - - if (lzma_easy_set_filters(next->coder->options.filters, level) - || lzma_easy_set_filters( - next->coder->options.metadata_filters, - metadata_level)) + if (easy_set_filters(next->coder->filters, level)) return LZMA_HEADER_ERROR; - return lzma_stream_encoder_multi_init(&next->coder->encoder, - allocator, &next->coder->options); + return lzma_stream_encoder_init(&next->coder->stream_encoder, + allocator, next->coder->filters, LZMA_CHECK_CRC32); } extern LZMA_API lzma_ret -lzma_easy_encoder_multi(lzma_stream *strm, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) +lzma_easy_encoder(lzma_stream *strm, lzma_easy_level level) { - // This is more complicated than lzma_easy_encoder_single(), - // because lzma_stream_encoder_multi() wants that the options - // structure is available until the encoding is finished. - lzma_next_strm_init(strm, easy_encoder_init, - level, metadata_level, header, footer); + lzma_next_strm_init(strm, easy_encoder_init, level); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; @@ -101,3 +109,14 @@ lzma_easy_encoder_multi(lzma_stream *strm, return LZMA_OK; } + + +extern LZMA_API uint32_t +lzma_easy_memory_usage(lzma_easy_level level) +{ + lzma_options_filter filters[5]; + if (easy_set_filters(filters, level)) + return UINT32_MAX; + + return lzma_memory_usage(filters, true); +} diff --git a/src/liblzma/common/easy_common.c b/src/liblzma/common/easy_common.c deleted file mode 100644 index e0c12a52..00000000 --- a/src/liblzma/common/easy_common.c +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "easy_common.h" - - -extern bool -lzma_easy_set_filters(lzma_options_filter *filters, uint32_t level) -{ - bool error = false; - - if (level == 0) { - filters[0].id = LZMA_VLI_VALUE_UNKNOWN; - -#ifdef HAVE_FILTER_LZMA - } else if (level <= 9) { - filters[0].id = LZMA_FILTER_LZMA; - filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); - filters[1].id = LZMA_VLI_VALUE_UNKNOWN; -#endif - - } else { - error = true; - } - - return error; -} - - -extern LZMA_API uint32_t -lzma_easy_memory_usage(lzma_easy_level level) -{ - lzma_options_filter filters[8]; - if (lzma_easy_set_filters(filters, level)) - return UINT32_MAX; - - return lzma_memory_usage(filters, true); -} diff --git a/src/liblzma/common/extra.c b/src/liblzma/common/extra.c deleted file mode 100644 index c532abb0..00000000 --- a/src/liblzma/common/extra.c +++ /dev/null @@ -1,34 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file extra.c -/// \brief Handling of Extra in Metadata -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API void -lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator) -{ - while (extra != NULL) { - lzma_extra *tmp = extra->next; - lzma_free(extra->data, allocator); - lzma_free(extra, allocator); - extra = tmp; - } - - return; -} diff --git a/src/liblzma/common/features.c b/src/liblzma/common/features.c index 33b2e0a2..a02949d9 100644 --- a/src/liblzma/common/features.c +++ b/src/liblzma/common/features.c @@ -21,10 +21,6 @@ static const lzma_vli filters[] = { -#ifdef HAVE_FILTER_COPY - LZMA_FILTER_COPY, -#endif - #ifdef HAVE_FILTER_SUBBLOCK LZMA_FILTER_SUBBLOCK, #endif diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c index 515f9346..498b2ad6 100644 --- a/src/liblzma/common/filter_flags_decoder.c +++ b/src/liblzma/common/filter_flags_decoder.c @@ -21,362 +21,188 @@ #include "lzma_decoder.h" -struct lzma_coder_s { - lzma_options_filter *options; - - enum { - SEQ_MISC, - SEQ_ID, - SEQ_SIZE, - SEQ_PROPERTIES, - } sequence; - - /// \brief Position in variable-length integers - size_t pos; - - /// \brief Size of Filter Properties - lzma_vli properties_size; -}; - - #ifdef HAVE_FILTER_SUBBLOCK static lzma_ret -properties_subblock(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in lzma_attribute((unused)), - size_t *in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused))) +properties_subblock(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props lzma_attribute((unused)), + size_t prop_size lzma_attribute((unused))) { - if (coder->properties_size != 0) + if (prop_size != 0) return LZMA_HEADER_ERROR; - coder->options->options = lzma_alloc( + options->options = lzma_alloc( sizeof(lzma_options_subblock), allocator); - if (coder->options->options == NULL) + if (options->options == NULL) return LZMA_MEM_ERROR; - ((lzma_options_subblock *)(coder->options->options)) - ->allow_subfilters = true; - return LZMA_STREAM_END; + ((lzma_options_subblock *)(options->options))->allow_subfilters = true; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_SIMPLE static lzma_ret -properties_simple(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_simple(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size == 0) - return LZMA_STREAM_END; + if (prop_size == 0) + return LZMA_OK; - if (coder->properties_size != 4) + if (prop_size != 4) return LZMA_HEADER_ERROR; - lzma_options_simple *options = coder->options->options; - - if (options == NULL) { - options = lzma_alloc(sizeof(lzma_options_simple), allocator); - if (options == NULL) - return LZMA_MEM_ERROR; - - options->start_offset = 0; - coder->options->options = options; - } - - while (coder->pos < 4) { - if (*in_pos == in_size) - return LZMA_OK; + lzma_options_simple *simple = lzma_alloc( + sizeof(lzma_options_simple), allocator); + if (simple == NULL) + return LZMA_MEM_ERROR; - options->start_offset - |= (uint32_t)(in[*in_pos]) << (8 * coder->pos); - ++*in_pos; - ++coder->pos; - } + simple->start_offset = integer_read_32(props); // Don't leave an options structure allocated if start_offset is zero. - if (options->start_offset == 0) { - lzma_free(options, allocator); - coder->options->options = NULL; - } + if (simple->start_offset == 0) + lzma_free(simple, allocator); + else + options->options = simple; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_DELTA static lzma_ret -properties_delta(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_delta(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size != 1) + if (prop_size != 1) return LZMA_HEADER_ERROR; - if (*in_pos == in_size) - return LZMA_OK; - - lzma_options_delta *options = lzma_alloc( - sizeof(lzma_options_delta), allocator); - if (options == NULL) + options->options = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (options->options == NULL) return LZMA_MEM_ERROR; - coder->options->options = options; - - options->distance = (uint32_t)(in[*in_pos]) + 1; - ++*in_pos; + ((lzma_options_delta *)(options->options))->distance + = (uint32_t)(props[0]) + 1; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_LZMA static lzma_ret -properties_lzma(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_lzma(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { // LZMA properties are always two bytes (at least for now). - if (coder->properties_size != 2) + if (prop_size != 2) return LZMA_HEADER_ERROR; - assert(coder->pos < 2); - - while (*in_pos < in_size) { - switch (coder->pos) { - case 0: - // Allocate the options structure. - coder->options->options = lzma_alloc( - sizeof(lzma_options_lzma), allocator); - if (coder->options->options == NULL) - return LZMA_MEM_ERROR; - - // Decode lc, lp, and pb. - if (lzma_lzma_decode_properties( - coder->options->options, in[*in_pos])) - return LZMA_HEADER_ERROR; - - ++*in_pos; - ++coder->pos; - break; - - case 1: { - lzma_options_lzma *options = coder->options->options; - - // Check that reserved bits are unset. - if (in[*in_pos] & 0xC0) - return LZMA_HEADER_ERROR; - - // Decode the dictionary size. See the file format - // specification section 4.3.4.2 to understand this. - if (in[*in_pos] == 0) { - options->dictionary_size = 1; - - } else if (in[*in_pos] > 59) { - // Dictionary size is over 1 GiB. - // It's not supported at the moment. - return LZMA_HEADER_ERROR; -# if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 -# error Update the if()-condition a few lines -# error above to match LZMA_DICTIONARY_SIZE_MAX. -# endif - - } else { - options->dictionary_size - = 2 | ((in[*in_pos] + 1) & 1); - options->dictionary_size - <<= (in[*in_pos] - 1) / 2; - } - - ++*in_pos; - return LZMA_STREAM_END; - } - } - } + lzma_options_lzma *lzma = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (lzma == NULL) + return LZMA_MEM_ERROR; + + // Decode lc, lp, and pb. + if (lzma_lzma_decode_properties(lzma, props[0])) + goto error; + + // Check that reserved bits are unset. + if (props[1] & 0xC0) + goto error; + + // Decode the dictionary size. + // FIXME The specification says that maximum is 4 GiB. + if (props[1] > 36) + goto error; +#if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 +# error Update the if()-condition a few lines +# error above to match LZMA_DICTIONARY_SIZE_MAX. +#endif + + lzma->dictionary_size = 2 | (props[1] & 1); + lzma->dictionary_size <<= props[1] / 2 + 11; - assert(coder->pos < 2); + options->options = lzma; return LZMA_OK; + +error: + lzma_free(lzma, allocator); + return LZMA_HEADER_ERROR; } #endif -static lzma_ret -filter_flags_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_filter_flags_decode( + lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) { - while (*in_pos < in_size || coder->sequence == SEQ_PROPERTIES) - switch (coder->sequence) { - case SEQ_MISC: - // Determine the Filter ID and Size of Filter Properties. - if (in[*in_pos] >= 0xE0) { - // Using External ID. Prepare the ID - // for variable-length integer parsing. - coder->options->id = 0; - - if (in[*in_pos] == 0xFF) { - // Mark that Size of Filter Properties is - // unknown, so we know later that there is - // external Size of Filter Properties present. - coder->properties_size - = LZMA_VLI_VALUE_UNKNOWN; - } else { - // Take Size of Filter Properties from Misc. - coder->properties_size = in[*in_pos] - 0xE0; - } - - coder->sequence = SEQ_ID; - - } else { - // The Filter ID is the same as Misc. - coder->options->id = in[*in_pos]; - - // The Size of Filter Properties can be calculated - // from Misc too. - coder->properties_size = in[*in_pos] / 0x20; - - coder->sequence = SEQ_PROPERTIES; - } - - ++*in_pos; - break; + // Set the pointer to NULL so the caller can always safely free it. + options->options = NULL; - case SEQ_ID: { - const lzma_ret ret = lzma_vli_decode(&coder->options->id, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->properties_size == LZMA_VLI_VALUE_UNKNOWN) { - // We have also external Size of Filter - // Properties. Prepare the size for - // variable-length integer parsing. - coder->properties_size = 0; - coder->sequence = SEQ_SIZE; - } else { - coder->sequence = SEQ_PROPERTIES; - } - - // Reset pos for its next job. - coder->pos = 0; - break; - } + // Filter ID + return_if_error(lzma_vli_decode(&options->id, NULL, + in, in_pos, in_size)); - case SEQ_SIZE: { - const lzma_ret ret = lzma_vli_decode(&coder->properties_size, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; + // Size of Properties + lzma_vli prop_size; + return_if_error(lzma_vli_decode(&prop_size, NULL, + in, in_pos, in_size)); - coder->pos = 0; - coder->sequence = SEQ_PROPERTIES; - break; - } + // Check that we have enough input. + if (prop_size > in_size - *in_pos) + return LZMA_DATA_ERROR; - case SEQ_PROPERTIES: { - lzma_ret (*get_properties)(lzma_coder *coder, - lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size); + // Determine the function to decode the properties. + lzma_ret (*get_properties)(lzma_options_filter *options, + lzma_allocator *allocator, const uint8_t *props, + size_t prop_size); - switch (coder->options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return coder->properties_size > 0 - ? LZMA_HEADER_ERROR : LZMA_STREAM_END; -#endif + switch (options->id) { #ifdef HAVE_FILTER_SUBBLOCK - case LZMA_FILTER_SUBBLOCK: - get_properties = &properties_subblock; - break; + case LZMA_FILTER_SUBBLOCK: + get_properties = &properties_subblock; + break; #endif #ifdef HAVE_FILTER_SIMPLE # ifdef HAVE_FILTER_X86 - case LZMA_FILTER_X86: + case LZMA_FILTER_X86: # endif # ifdef HAVE_FILTER_POWERPC - case LZMA_FILTER_POWERPC: + case LZMA_FILTER_POWERPC: # endif # ifdef HAVE_FILTER_IA64 - case LZMA_FILTER_IA64: + case LZMA_FILTER_IA64: # endif # ifdef HAVE_FILTER_ARM - case LZMA_FILTER_ARM: + case LZMA_FILTER_ARM: # endif # ifdef HAVE_FILTER_ARMTHUMB - case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_ARMTHUMB: # endif # ifdef HAVE_FILTER_SPARC - case LZMA_FILTER_SPARC: + case LZMA_FILTER_SPARC: # endif - get_properties = &properties_simple; - break; + get_properties = &properties_simple; + break; #endif #ifdef HAVE_FILTER_DELTA - case LZMA_FILTER_DELTA: - get_properties = &properties_delta; - break; + case LZMA_FILTER_DELTA: + get_properties = &properties_delta; + break; #endif #ifdef HAVE_FILTER_LZMA - case LZMA_FILTER_LZMA: - get_properties = &properties_lzma; - break; + case LZMA_FILTER_LZMA: + get_properties = &properties_lzma; + break; #endif - default: - return LZMA_HEADER_ERROR; - } - - return get_properties(coder, allocator, in, in_pos, in_size); - } - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -filter_flags_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &filter_flags_decode; - next->end = &filter_flags_decoder_end; + return LZMA_HEADER_ERROR; } - options->id = 0; - options->options = NULL; - - next->coder->options = options; - next->coder->sequence = SEQ_MISC; - next->coder->pos = 0; - next->coder->properties_size = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_filter_flags_decoder(lzma_stream *strm, lzma_options_filter *options) -{ - lzma_next_strm_init(strm, lzma_filter_flags_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; + const uint8_t *props = in + *in_pos; + *in_pos += prop_size; + return get_properties(options, allocator, props, prop_size); } diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c index 2d11dd3a..45fbbb00 100644 --- a/src/liblzma/common/filter_flags_encoder.c +++ b/src/liblzma/common/filter_flags_encoder.c @@ -22,22 +22,13 @@ #include "fastpos.h" -/// \brief Calculates the size of the Filter Properties field -/// -/// This currently can return only LZMA_OK or LZMA_HEADER_ERROR, but -/// with some new filters it may return also LZMA_PROG_ERROR. +/// Calculate the size of the Filter Properties field static lzma_ret get_properties_size(uint32_t *size, const lzma_options_filter *options) { lzma_ret ret = LZMA_OK; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - *size = 0; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: *size = 0; @@ -100,40 +91,14 @@ get_properties_size(uint32_t *size, const lzma_options_filter *options) extern LZMA_API lzma_ret lzma_filter_flags_size(uint32_t *size, const lzma_options_filter *options) { - // Get size of Filter Properties. + // Get size of Filter Properties. This also validates the Filter ID. uint32_t prop_size; - const lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Size of Filter ID field if it exists. - size_t id_size; - size_t prop_size_size; - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - id_size = 0; - prop_size_size = 0; - - } else { - // At least Filter ID is stored using the External ID field. - id_size = lzma_vli_size(options->id); - if (id_size == 0) - return LZMA_PROG_ERROR; - - if (prop_size <= 30) { - // Size of Filter Properties fits into Misc still. - prop_size_size = 0; - } else { - // The Size of Filter Properties field is used too. - prop_size_size = lzma_vli_size(prop_size); - if (prop_size_size == 0) - return LZMA_PROG_ERROR; - } - } + return_if_error(get_properties_size(&prop_size, options)); - // 1 is for the Misc field. - *size = 1 + id_size + prop_size_size + prop_size; + // Calculate the size of the Filter ID and Size of Properties fields. + // These cannot fail since get_properties_size() already succeeded. + *size = lzma_vli_size(options->id) + lzma_vli_size(prop_size) + + prop_size; return LZMA_OK; } @@ -149,10 +114,10 @@ properties_simple(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_OK; if (out_size - *out_pos < 4) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; - for (size_t i = 0; i < 4; ++i) - out[(*out_pos)++] = options->start_offset >> (i * 8); + integer_write_32(out + *out_pos, options->start_offset); + *out_pos += 4; return LZMA_OK; } @@ -175,7 +140,7 @@ properties_delta(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_HEADER_ERROR; if (out_size - *out_pos < 1) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; out[*out_pos] = options->distance - LZMA_DELTA_DISTANCE_MIN; ++*out_pos; @@ -195,7 +160,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_PROG_ERROR; if (out_size - *out_pos < 2) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; // LZMA Properties if (lzma_lzma_encode_properties(options, out + *out_pos)) @@ -230,7 +195,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, ++d; // Get the highest two bits using the proper encoding: - out[*out_pos] = get_pos_slot(d) - 1; + out[*out_pos] = get_pos_slot(d) - 24; ++*out_pos; return LZMA_OK; @@ -250,58 +215,19 @@ lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, size_t out_size, // Get size of Filter Properties. uint32_t prop_size; - lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Misc, External ID, and Size of Properties - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - out[*out_pos] = options->id; - ++*out_pos; - - } else if (prop_size <= 30) { - // Size of Filter Properties fits into Misc. - out[*out_pos] = prop_size + 0xE0; - ++*out_pos; - - // External ID is used to encode the Filter ID. If encoding - // the VLI fails, it's because the caller has given as too - // little output space, which it should have checked already. - // So return LZMA_PROG_ERROR, not LZMA_BUF_ERROR. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } else { - // Nothing fits into Misc. - out[*out_pos] = 0xFF; - ++*out_pos; - - // External ID is used to encode the Filter ID. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - // External Size of Filter Properties - dummy = 0; - if (lzma_vli_encode(prop_size, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + return_if_error(get_properties_size(&prop_size, options)); + + // Filter ID + return_if_error(lzma_vli_encode(options->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + return_if_error(lzma_vli_encode(prop_size, NULL, + out, out_pos, out_size)); // Filter Properties + lzma_ret ret; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - assert(prop_size == 0); - ret = options->options == NULL ? LZMA_OK : LZMA_HEADER_ERROR; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: assert(prop_size == 0); diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 6816b37a..f01206de 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c -/// \brief Handling of Index in Metadata +/// \brief Handling of Index // // Copyright (C) 2007 Lasse Collin // @@ -17,124 +17,733 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "index.h" -/** - * \brief Duplicates an Index list - * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. - */ -extern LZMA_API lzma_index * -lzma_index_dup(const lzma_index *old_current, lzma_allocator *allocator) +/// Number of Records to allocate at once. +#define INDEX_GROUP_SIZE 256 + + +typedef struct lzma_index_group_s lzma_index_group; +struct lzma_index_group_s { + /// Next group + lzma_index_group *prev; + + /// Previous group + lzma_index_group *next; + + /// Index of the last Record in this group + size_t last; + + /// Total Size fields as cumulative sum relative to the beginning + /// of the group. The total size of the group is total_sums[last]. + lzma_vli total_sums[INDEX_GROUP_SIZE]; + + /// Uncompressed Size fields as cumulative sum relative to the + /// beginning of the group. The uncompressed size of the group is + /// uncompressed_sums[last]. + lzma_vli uncompressed_sums[INDEX_GROUP_SIZE]; + + /// True if the Record is padding + bool paddings[INDEX_GROUP_SIZE]; +}; + + +struct lzma_index_s { + /// Total size of the Blocks and padding + lzma_vli total_size; + + /// Uncompressed size of the Stream + lzma_vli uncompressed_size; + + /// Number of non-padding records. This is needed by Index encoder. + lzma_vli count; + + /// Size of the List of Records field; this is updated every time + /// a new non-padding Record is added. + lzma_vli index_list_size; + + /// This is zero if no Indexes have been combined with + /// lzma_index_cat(). With combined Indexes, this contains the sizes + /// of all but latest the Streams, including possible Stream Padding + /// fields. + lzma_vli padding_size; + + /// First group of Records + lzma_index_group *head; + + /// Last group of Records + lzma_index_group *tail; + + /// Tracking the read position + struct { + /// Group where the current read position is. + lzma_index_group *group; + + /// The most recently read record in *group + lzma_vli record; + + /// Uncompressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli uncompressed_offset; + + /// Compressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli stream_offset; + } current; + + /// Information about earlier Indexes when multiple Indexes have + /// been combined. + struct { + /// Sum of the Record counts of the all but the last Stream. + lzma_vli count; + + /// Sum of the List of Records fields of all but the last + /// Stream. This is needed when a new Index is concatenated + /// to this lzma_index structure. + lzma_vli index_list_size; + } old; +}; + + +static void +free_index_list(lzma_index *i, lzma_allocator *allocator) { - lzma_index *new_head = NULL; - lzma_index *new_current = NULL; + lzma_index_group *g = i->head; - while (old_current != NULL) { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) { - lzma_index_free(new_head, allocator); - return NULL; - } + while (g != NULL) { + lzma_index_group *tmp = g->next; + lzma_free(g, allocator); + g = tmp; + } - i->total_size = old_current->total_size; - i->uncompressed_size = old_current->uncompressed_size; - i->next = NULL; + return; +} - if (new_head == NULL) - new_head = i; - else - new_current->next = i; - new_current = i; - old_current = old_current->next; +extern LZMA_API lzma_index * +lzma_index_init(lzma_index *i, lzma_allocator *allocator) +{ + if (i == NULL) { + i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return NULL; + } else { + free_index_list(i, allocator); } - return new_head; + i->total_size = 0; + i->uncompressed_size = 0; + i->count = 0; + i->index_list_size = 0; + i->padding_size = 0; + i->head = NULL; + i->tail = NULL; + i->current.group = NULL; + i->old.count = 0; + i->old.index_list_size = 0; + + return i; } -/** - * \brief Frees an Index list - * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. - */ extern LZMA_API void -lzma_index_free(lzma_index *i, lzma_allocator *allocator) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) { - while (i != NULL) { - lzma_index *tmp = i->next; + if (i != NULL) { + free_index_list(i, allocator); lzma_free(i, allocator); - i = tmp; } return; } -/** - * \brief Calculates properties of an Index list - * - * - */ -extern LZMA_API lzma_ret -lzma_index_count(const lzma_index *i, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size) -{ - *count = 0; - *total_size = 0; - *uncompressed_size = 0; - - while (i != NULL) { - if (i->total_size == LZMA_VLI_VALUE_UNKNOWN) { - *total_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->total_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*total_size != LZMA_VLI_VALUE_UNKNOWN) { - *total_size += i->total_size; - if (*total_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; +extern LZMA_API lzma_vli +lzma_index_count(const lzma_index *i) +{ + return i->count; +} + + +extern LZMA_API lzma_vli +lzma_index_size(const lzma_index *i) +{ + return index_size(i->count, i->index_list_size); +} + + +extern LZMA_API lzma_vli +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API lzma_vli +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +extern LZMA_API lzma_vli +lzma_index_file_size(const lzma_index *i) +{ + // If multiple Streams are concatenated, the Stream Header, Index, + // and Stream Footer fields of all but the last Stream are already + // included in padding_size. Thus, we need to calculate only the + // size of the last Index, not all Indexes. + return i->total_size + i->padding_size + + index_size(i->count - i->old.count, + i->index_list_size - i->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; +} + + +extern LZMA_API lzma_vli +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) + - index_size_unpadded(i->count, i->index_list_size)) & 3; +} + + +/// Helper function for index_append() +static lzma_ret +index_append_real(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size, + bool is_padding) +{ + // Add the new record. + if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) { + // Allocate a new group. + lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + // Initialize the group and set its first record. + g->prev = i->tail; + g->next = NULL; + g->last = 0; + g->total_sums[0] = total_size; + g->uncompressed_sums[0] = uncompressed_size; + g->paddings[0] = is_padding; + + // If this is the first group, make it the head. + if (i->head == NULL) + i->head = g; + else + i->tail->next = g; + + // Make it the new tail. + i->tail = g; + + } else { + // i->tail has space left for at least one record. + i->tail->total_sums[i->tail->last + 1] + = i->tail->total_sums[i->tail->last] + + total_size; + i->tail->uncompressed_sums[i->tail->last + 1] + = i->tail->uncompressed_sums[i->tail->last] + + uncompressed_size; + i->tail->paddings[i->tail->last + 1] = is_padding; + ++i->tail->last; + } + + return LZMA_OK; +} + + +static lzma_ret +index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, + lzma_vli uncompressed_size, bool is_padding) +{ + if (total_size > LZMA_VLI_VALUE_MAX + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + // This looks a bit ugly. We want to first validate that the Index + // and Stream stay in valid limits after adding this Record. After + // validating, we may need to allocate a new lzma_index_group (it's + // slightly more correct to validate before allocating, YMMV). + lzma_ret ret; + + if (is_padding) { + assert(uncompressed_size == 0); + + // First update the info so we can validate it. + i->padding_size += total_size; + + if (i->padding_size > LZMA_VLI_VALUE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, true); + + // If something went wrong, undo the updated value. + if (ret != LZMA_OK) + i->padding_size -= total_size; + + } else { + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add + = lzma_vli_size(total_size / 4 - 1) + + lzma_vli_size(uncompressed_size); + + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; + + if (i->total_size > LZMA_VLI_VALUE_MAX + || i->uncompressed_size > LZMA_VLI_VALUE_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } + } + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size) +{ + return index_append(i, allocator, + total_size, uncompressed_size, false); +} + + +/// Initialize i->current to point to the first Record. +static bool +init_current(lzma_index *i) +{ + if (i->head == NULL) { + assert(i->count == 0); + return true; + } + + assert(i->count > 0); + + i->current.group = i->head; + i->current.record = 0; + i->current.stream_offset = LZMA_STREAM_HEADER_SIZE; + i->current.uncompressed_offset = 0; - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->uncompressed_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size += i->uncompressed_size; - if (*uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; + return false; +} + + +/// Go backward to the previous group. +static void +previous_group(lzma_index *i) +{ + assert(i->current.group->prev != NULL); + + // Go to the previous group first. + i->current.group = i->current.group->prev; + i->current.record = i->current.group->last; + + // Then update the offsets. + i->current.stream_offset -= i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset -= i->current.group + ->uncompressed_sums[i->current.group->last]; + + return; +} + + +/// Go forward to the next group. +static void +next_group(lzma_index *i) +{ + assert(i->current.group->next != NULL); + + // Update the offsets first. + i->current.stream_offset += i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.group->last]; + + // Then go to the next group. + i->current.record = 0; + i->current.group = i->current.group->next; + + return; +} + + +/// Set *info from i->current. +static void +set_info(const lzma_index *i, lzma_index_record *info) +{ + info->total_size = i->current.group->total_sums[i->current.record]; + info->uncompressed_size = i->current.group->uncompressed_sums[ + i->current.record]; + + info->stream_offset = i->current.stream_offset; + info->uncompressed_offset = i->current.uncompressed_offset; + + // If it's not the first Record in this group, we need to do some + // adjustements. + if (i->current.record > 0) { + // _sums[] are cumulative, thus we need to substract the + // _previous _sums[] to get the sizes of this Record. + info->total_size -= i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_size -= i->current.group + ->uncompressed_sums[i->current.record - 1]; + + // i->current.{total,uncompressed}_offsets have the offset + // of the beginning of the group, thus we need to add the + // appropriate amount to get the offsetes of this Record. + info->stream_offset += i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.record - 1]; + } + + return; +} + + +extern LZMA_API lzma_bool +lzma_index_read(lzma_index *i, lzma_index_record *info) +{ + if (i->current.group == NULL) { + // We are at the beginning of the Record list. Set up + // i->current point at the first Record. Return if there + // are no Records. + if (init_current(i)) + return true; + } else do { + // Try to go the next Record. + if (i->current.record < i->current.group->last) + ++i->current.record; + else if (i->current.group->next == NULL) + return true; + else + next_group(i); + } while (i->current.group->paddings[i->current.record]); + + // We found a new Record. Set the information to *info. + set_info(i, info); + + return false; +} + + +extern LZMA_API void +lzma_index_rewind(lzma_index *i) +{ + i->current.group = NULL; + return; +} + + +extern LZMA_API lzma_bool +lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target) +{ + // Check if it is possible to fullfill the request. + if (target >= i->uncompressed_size) + return true; + + // Now we know that we will have an answer. Initialize the current + // read position if needed. + if (i->current.group == NULL && init_current(i)) + return true; + + // Locate the group where the wanted Block is. First search forward. + while (i->current.uncompressed_offset <= target) { + // If the first uncompressed byte of the next group is past + // the target offset, it has to be this or an earlier group. + if (i->current.uncompressed_offset + i->current.group + ->uncompressed_sums[i->current.group->last] + > target) + break; + + // Go forward to the next group. + next_group(i); + } + + // Then search backward. + while (i->current.uncompressed_offset > target) + previous_group(i); + + // Now the target Block is somewhere in i->current.group. Offsets + // in groups are relative to the beginning of the group, thus + // we must adjust the target before starting the search loop. + assert(target >= i->current.uncompressed_offset); + target -= i->current.uncompressed_offset; + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sums[] value is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks or + // padding, we don't want to return them. + size_t left = 0; + size_t right = i->current.group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (i->current.group->uncompressed_sums[pos] <= target) + left = pos + 1; + else + right = pos; + } + + i->current.record = left; + +#ifndef NDEBUG + // The found Record must not be padding or have zero uncompressed size. + assert(!i->current.group->paddings[i->current.record]); + + if (i->current.record == 0) + assert(i->current.group->uncompressed_sums[0] > 0); + else + assert(i->current.group->uncompressed_sums[i->current.record] + - i->current.group->uncompressed_sums[ + i->current.record - 1] > 0); +#endif + + set_info(i, info); + + return false; +} + + +extern LZMA_API lzma_ret +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator, lzma_vli padding) +{ + if (dest == NULL || src == NULL || dest == src + || padding > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Check that the combined size of the Indexes stays within limits. + { + const lzma_vli dest_size = lzma_index_file_size(dest); + const lzma_vli src_size = lzma_index_file_size(src); + if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN + || dest_size + src_size + padding + > LZMA_VLI_VALUE_UNKNOWN) + return LZMA_DATA_ERROR; + } + + // Add a padding Record to take into account the size of + // Index + Stream Footer + Stream Padding + Stream Header. + // + // NOTE: This cannot overflow, because Index Size is always + // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs + // (Index Size and padding) doesn't overflow. It may become + // an invalid VLI if padding is huge, but that is caught by + // index_append(). + padding += index_size(dest->count - dest->old.count, + dest->index_list_size + - dest->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; + + // Add the padding Record. + return_if_error(index_append( + dest, allocator, padding, 0, true)); + + // Avoid wasting lots of memory if src->head has only a few records + // that fit into dest->tail. That is, combine two groups if possible. + // + // NOTE: We know that dest->tail != NULL since we just appended + // a padding Record. But we don't know about src->head. + if (src->head != NULL && src->head->last + 1 + <= INDEX_GROUP_SIZE - dest->tail->last - 1) { + // Copy the first Record. + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[0]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[dest->tail->last] + + src->head->uncompressed_sums[0]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[0]; + + ++dest->tail->last; + + // Copy the rest. + for (size_t i = 1; i < src->head->last; ++i) { + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[i + 1] + - src->head->total_sums[i]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[ + dest->tail->last] + + src->head->uncompressed_sums[i + 1] + - src->head->uncompressed_sums[i]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[i + 1]; + + ++dest->tail->last; } - ++*count; - i = i->next; + // Free the head group of *src. Don't bother updating prev + // pointers since those won't be used for anything before + // we deallocate the whole *src structure. + lzma_index_group *tmp = src->head; + src->head = src->head->next; + lzma_free(tmp, allocator); + } + + // If there are groups left in *src, join them as is. Note that if we + // are combining already combined Indexes, src->head can be non-NULL + // even if we just combined the old src->head to dest->tail. + if (src->head != NULL) { + src->head->prev = dest->tail; + dest->tail->next = src->head; + dest->tail = src->tail; } - // FIXME ? - if (*total_size == LZMA_VLI_VALUE_UNKNOWN - || *uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - return LZMA_HEADER_ERROR; + // Update information about earlier Indexes. Only the last Index + // from *src won't be counted in dest->old. The last Index is left + // open and can be even appended with lzma_index_append(). + dest->old.count = dest->count + src->old.count; + dest->old.index_list_size + = dest->index_list_size + src->old.index_list_size; + + // Update overall information. + dest->total_size += src->total_size; + dest->uncompressed_size += src->uncompressed_size; + dest->count += src->count; + dest->index_list_size += src->index_list_size; + dest->padding_size += src->padding_size; + + // *src has nothing left but the base structure. + lzma_free(src, allocator); return LZMA_OK; } +extern LZMA_API lzma_index * +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator); + if (dest == NULL) + return NULL; + + // Copy the base structure except the pointers. + *dest = *src; + dest->head = NULL; + dest->tail = NULL; + dest->current.group = NULL; + + // Copy the Records. + const lzma_index_group *src_group = src->head; + while (src_group != NULL) { + // Allocate a new group. + lzma_index_group *dest_group = lzma_alloc( + sizeof(lzma_index_group), allocator); + if (dest_group == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + // Set the pointers. + dest_group->prev = dest->tail; + dest_group->next = NULL; + + if (dest->head == NULL) + dest->head = dest_group; + else + dest->tail->next = dest_group; + + dest->tail = dest_group; + + dest_group->last = src_group->last; + + // Copy the arrays so that we don't read uninitialized memory. + const size_t count = src_group->last + 1; + memcpy(dest_group->total_sums, src_group->total_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->uncompressed_sums, + src_group->uncompressed_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->paddings, src_group->paddings, + sizeof(bool) * count); + + // Copy also the read position. + if (src_group == src->current.group) + dest->current.group = dest->tail; + + src_group = src_group->next; + } + + return dest; +} + extern LZMA_API lzma_bool -lzma_index_is_equal(const lzma_index *a, const lzma_index *b) +lzma_index_equal(const lzma_index *a, const lzma_index *b) { - while (a != NULL && b != NULL) { - if (a->total_size != b->total_size || a->uncompressed_size - != b->uncompressed_size) + // No point to compare more if the pointers are the same. + if (a == b) + return true; + + // Compare the basic properties. + if (a->total_size != b->total_size + || a->uncompressed_size != b->uncompressed_size + || a->index_list_size != b->index_list_size + || a->count != b->count) + return false; + + // Compare the Records. + const lzma_index_group *ag = a->head; + const lzma_index_group *bg = b->head; + while (ag != NULL && bg != NULL) { + const size_t count = ag->last + 1; + if (ag->last != bg->last + || memcmp(ag->total_sums, + bg->total_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->uncompressed_sums, + bg->uncompressed_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->paddings, bg->paddings, + sizeof(bool) * count) != 0) return false; - a = a->next; - b = b->next; + ag = ag->next; + bg = bg->next; } - return a == b; + return ag == NULL && bg == NULL; } diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h new file mode 100644 index 00000000..303ad43a --- /dev/null +++ b/src/liblzma/common/index.h @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Maximum encoded value of Total Size. +#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_VALUE_MAX / 4 - 1) + +/// Convert the real Total Size value to a value that is stored to the Index. +#define total_size_encode(size) ((size) / 4 - 1) + +/// Convert the encoded Total Size value from Index to the real Total Size. +#define total_size_decode(size) (((size) + 1) * 4) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + // Round up to a mulitiple of four. + return (index_size_unpadded(count, index_list_size) + 3) + & ~LZMA_VLI_C(3); +} + + +static inline lzma_vli +index_stream_size( + lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + total_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c new file mode 100644 index 00000000..1635948c --- /dev/null +++ b/src/liblzma/common/index_decoder.c @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Target Index + lzma_index *index; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Total Size field + lzma_vli total_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_TOTAL + ? &coder->total_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_TOTAL) { + // Validate that encoded Total Size isn't too big. + if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + // Convert the encoded Total Size to the real + // Total Size. + coder->total_size = total_size_decode( + coder->total_size); + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->total_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + // We always allocate a new lzma_index. + *i = lzma_index_init(NULL, allocator); + if (*i == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = *i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + lzma_next_coder_init(index_decoder_init, next, allocator, i); +} +*/ + + +extern LZMA_API lzma_ret +lzma_index_decoder(lzma_stream *strm, lzma_index **i) +{ + lzma_next_strm_init(strm, index_decoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c new file mode 100644 index 00000000..5a7d8c8c --- /dev/null +++ b/src/liblzma/common/index_encoder.c @@ -0,0 +1,222 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index given to us to encode. Note that we modify it in sense that + /// we read it, and read position is tracked in lzma_index structure. + lzma_index *index; + + /// The current Index Record being encoded + lzma_index_record record; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action lzma_attribute((unused))) +{ + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = 0x00; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli index_count = lzma_index_count(coder->index); + ret = lzma_vli_encode(index_count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_read(coder->index, &coder->record)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + // Total Size must be a multiple of four. + if (coder->record.total_size & 3) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_TOTAL; + + // Fall through + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_TOTAL + ? total_size_encode(coder->record.total_size) + : coder->record.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + return ret; +} + + +static void +index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + lzma_index_rewind(i); + + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + lzma_next_coder_init(index_encoder_init, next, allocator, i); +} + + +extern LZMA_API lzma_ret +lzma_index_encoder(lzma_stream *strm, lzma_index *i) +{ + lzma_next_strm_init(strm, index_encoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder_multi.h b/src/liblzma/common/index_encoder.h index e0ff02f3..0087c284 100644 --- a/src/liblzma/common/stream_encoder_multi.h +++ b/src/liblzma/common/index_encoder.h @@ -1,9 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file stream_encoder_multi.h -/// \brief Encodes Multi-Block .lzma files +/// \file index_encoder.h +/// \brief Encodes the Index field // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,10 +17,14 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_STREAM_ENCODER_MULTI_H -#define LZMA_STREAM_ENCODER_MULTI_H +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_index *i); -extern lzma_ret lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); #endif diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c new file mode 100644 index 00000000..35dea41f --- /dev/null +++ b/src/liblzma/common/index_hash.c @@ -0,0 +1,340 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Total Size fields + lzma_vli total_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Total Sizes and Uncompressed Sizes. + lzma_check check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Total Size currently being read from an Index Record. + lzma_vli total_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API lzma_index_hash * +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.total_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.total_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->total_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API void +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API lzma_vli +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + info->total_size += total_size; + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { total_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK || total_size == 0 || + total_size > LZMA_VLI_VALUE_MAX || (total_size & 3) + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + total_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.total_size > LZMA_VLI_VALUE_MAX + || index_hash->blocks.uncompressed_size + > LZMA_VLI_VALUE_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.total_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_TOTAL + ? &index_hash->total_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_TOTAL) { + if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + index_hash->total_size = total_size_decode( + index_hash->total_size); + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->total_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.total_size + < index_hash->records.total_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.total_size + != index_hash->records.total_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer, + index_hash->records.check.buffer, + lzma_check_sizes[LZMA_CHECK_BEST]) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} diff --git a/src/liblzma/common/info.c b/src/liblzma/common/info.c deleted file mode 100644 index ab7fc999..00000000 --- a/src/liblzma/common/info.c +++ /dev/null @@ -1,814 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file info.c -/// \brief Collects and verifies integrity of Stream size information -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -struct lzma_info_s { - struct { - /// Known Size of Header Metadata Block; here's some - /// special things: - /// - LZMA_VLI_VALUE_UNKNOWN indicates that we don't know - /// if Header Metadata Block is present. - /// - 0 indicates that Header Metadata Block is not present. - lzma_vli header_metadata_size; - - /// Known Total Size of the Data Blocks in the Stream - lzma_vli total_size; - - /// Known Uncompressed Size of the Data Blocks in the Stream - lzma_vli uncompressed_size; - - /// Known Size of Footer Metadata Block - lzma_vli footer_metadata_size; - } known; - - struct { - /// Sum of Total Size fields stored to the Index so far - lzma_vli total_size; - - /// Sum of Uncompressed Size fields stored to the Index so far - lzma_vli uncompressed_size; - - /// First Index Record in the list, or NULL if Index is empty. - lzma_index *head; - - /// Number of Index Records - size_t record_count; - - /// Number of Index Records - size_t incomplete_count; - - /// True when we know that no more Records will get added - /// to the Index. - bool is_final; - } index; - - /// Start offset of the Stream. This is needed to calculate - /// lzma_info_iter.stream_offset. - lzma_vli stream_start_offset; - - /// True if Index is present in Header Metadata Block - bool has_index_in_header_metadata; -}; - - -////////////////////// -// Create/Reset/End // -////////////////////// - -static void -index_init(lzma_info *info) -{ - info->index.total_size = 0; - info->index.uncompressed_size = 0; - info->index.head = NULL; - info->index.record_count = 0; - info->index.incomplete_count = 0; - info->index.is_final = false; - return; -} - - -static void -info_init(lzma_info *info) -{ - info->known.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.total_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->stream_start_offset = 0; - info->has_index_in_header_metadata = false; - - index_init(info); - - return; -} - - -extern LZMA_API lzma_info * -lzma_info_init(lzma_info *info, lzma_allocator *allocator) -{ - if (info == NULL) - info = lzma_alloc(sizeof(lzma_info), allocator); - else - lzma_index_free(info->index.head, allocator); - - if (info != NULL) - info_init(info); - - return info; -} - - -extern LZMA_API void -lzma_info_free(lzma_info *info, lzma_allocator *allocator) -{ - lzma_index_free(info->index.head, allocator); - lzma_free(info, allocator); - return; -} - - -///////// -// Set // -///////// - -static lzma_ret -set_size(lzma_vli new_size, lzma_vli *known_size, lzma_vli index_size, - bool forbid_zero) -{ - assert(new_size <= LZMA_VLI_VALUE_MAX); - - lzma_ret ret = LZMA_OK; - - if (forbid_zero && new_size == 0) - ret = LZMA_PROG_ERROR; - else if (index_size > new_size) - ret = LZMA_DATA_ERROR; - else if (*known_size == LZMA_VLI_VALUE_UNKNOWN) - *known_size = new_size; - else if (*known_size != new_size) - ret = LZMA_DATA_ERROR; - - return ret; -} - - -extern LZMA_API lzma_ret -lzma_info_size_set(lzma_info *info, lzma_info_size type, lzma_vli size) -{ - if (size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - switch (type) { - case LZMA_INFO_STREAM_START: - info->stream_start_offset = size; - return LZMA_OK; - - case LZMA_INFO_HEADER_METADATA: - return set_size(size, &info->known.header_metadata_size, - 0, false); - - case LZMA_INFO_TOTAL: - return set_size(size, &info->known.total_size, - info->index.total_size, true); - - case LZMA_INFO_UNCOMPRESSED: - return set_size(size, &info->known.uncompressed_size, - info->index.uncompressed_size, false); - - case LZMA_INFO_FOOTER_METADATA: - return set_size(size, &info->known.footer_metadata_size, - 0, true); - } - - return LZMA_PROG_ERROR; -} - - -extern LZMA_API lzma_ret -lzma_info_index_set(lzma_info *info, lzma_allocator *allocator, - lzma_index *i_new, lzma_bool eat_index) -{ - if (i_new == NULL) - return LZMA_PROG_ERROR; - - lzma_index *i_old = info->index.head; - - if (i_old != NULL) { - while (true) { - // If the new Index has fewer Records than the old one, - // the new Index cannot be valid. - if (i_new == NULL) - return LZMA_DATA_ERROR; - - // The new Index must be complete i.e. no unknown - // values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_PROG_ERROR; - } - - // Compare the values from the new Index with the old - // Index. The old Index may be incomplete; in that - // case we - // - use the value from the new Index as is; - // - update the appropriate info->index.foo_size; and - // - decrease the count of incomplete Index Records. - bool was_incomplete = false; - - if (i_old->total_size == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->total_size = i_new->total_size; - - if (lzma_vli_add(info->index.total_size, - i_new->total_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->total_size != i_new->total_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (i_old->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->uncompressed_size - = i_new->uncompressed_size; - - if (lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->uncompressed_size - != i_new->uncompressed_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (was_incomplete) { - assert(!info->index.is_final); - assert(info->index.incomplete_count > 0); - --info->index.incomplete_count; - } - - // Get rid of *i_new. It's now identical with *i_old. - lzma_index *tmp = i_new->next; - if (eat_index) - lzma_free(i_new, allocator); - - i_new = tmp; - - // We want to leave i_old pointing to the last - // Index Record in the old Index. This way we can - // concatenate the possible new Records from i_new. - if (i_old->next == NULL) - break; - - i_old = i_old->next; - } - } - - assert(info->index.incomplete_count == 0); - - // If Index was already known to be final, i_new must be NULL now. - // The new Index cannot contain more Records that we already have. - if (info->index.is_final) { - assert(info->index.head != NULL); - - if (i_new != NULL) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - return LZMA_OK; - } - - // The rest of the new Index is merged to the old Index. Keep the - // current i_new pointer in available. We need it when merging the - // new Index with the old one, and if an error occurs so we can - // get rid of the broken part of the new Index. - lzma_index *i_start = i_new; - while (i_new != NULL) { - // The new Index must be complete i.e. no unknown values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - // Update info->index.foo_sizes. - if (lzma_vli_add(info->index.total_size, i_new->total_size) - || lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - ++info->index.record_count; - i_new = i_new->next; - } - - // All the Records in the new Index are good, and info->index.foo_sizes - // were successfully updated. - if (lzma_info_index_finish(info) != LZMA_OK) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_DATA_ERROR; - } - - // The Index is ready to be merged. If we aren't supposed to eat - // the Index, make a copy of it first. - if (!eat_index && i_start != NULL) { - i_start = lzma_index_dup(i_start, allocator); - if (i_start == NULL) - return LZMA_MEM_ERROR; - } - - // Concatenate the new Index with the old one. Note that it is - // possible that we don't have any old Index. - if (info->index.head == NULL) - info->index.head = i_start; - else - i_old->next = i_start; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_metadata_set(lzma_info *info, lzma_allocator *allocator, - lzma_metadata *metadata, lzma_bool is_header_metadata, - lzma_bool eat_index) -{ - // Validate *metadata. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || !lzma_vli_is_valid(metadata->uncompressed_size)) { - if (eat_index) { - lzma_index_free(metadata->index, allocator); - metadata->index = NULL; - } - - return LZMA_PROG_ERROR; - } - - // Index - if (metadata->index != NULL) { - if (is_header_metadata) - info->has_index_in_header_metadata = true; - - const lzma_ret ret = lzma_info_index_set( - info, allocator, metadata->index, eat_index); - - if (eat_index) - metadata->index = NULL; - - if (ret != LZMA_OK) - return ret; - - } else if (!is_header_metadata - && (metadata->total_size == LZMA_VLI_VALUE_UNKNOWN - || !info->has_index_in_header_metadata)) { - // Either Total Size or Index must be present in Footer - // Metadata Block. If Index is not present, it must have - // already been in the Header Metadata Block. Since we - // got here, these conditions weren't met. - return LZMA_DATA_ERROR; - } - - // Size of Header Metadata - if (!is_header_metadata) - return_if_error(lzma_info_size_set( - info, LZMA_INFO_HEADER_METADATA, - metadata->header_metadata_size)); - - // Total Size - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_TOTAL, metadata->total_size)); - - // Uncompressed Size - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_UNCOMPRESSED, - metadata->uncompressed_size)); - - return LZMA_OK; -} - - -///////// -// Get // -///////// - -extern LZMA_API lzma_vli -lzma_info_size_get(const lzma_info *info, lzma_info_size type) -{ - switch (type) { - case LZMA_INFO_STREAM_START: - return info->stream_start_offset; - - case LZMA_INFO_HEADER_METADATA: - return info->known.header_metadata_size; - - case LZMA_INFO_TOTAL: - return info->known.total_size; - - case LZMA_INFO_UNCOMPRESSED: - return info->known.uncompressed_size; - - case LZMA_INFO_FOOTER_METADATA: - return info->known.footer_metadata_size; - } - - return LZMA_VLI_VALUE_UNKNOWN; -} - - -extern LZMA_API lzma_index * -lzma_info_index_get(lzma_info *info, lzma_bool detach) -{ - lzma_index *i = info->index.head; - - if (detach) - index_init(info); - - return i; -} - - -extern LZMA_API size_t -lzma_info_index_count_get(const lzma_info *info) -{ - return info->index.record_count; -} - - -///////////////// -// Incremental // -///////////////// - -enum { - ITER_INFO, - ITER_INDEX, - ITER_RESERVED_1, - ITER_RESERVED_2, -}; - - -#define iter_info ((lzma_info *)(iter->internal[ITER_INFO])) - -#define iter_index ((lzma_index *)(iter->internal[ITER_INDEX])) - - -extern LZMA_API void -lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter) -{ - *iter = (lzma_info_iter){ - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .stream_offset = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_offset = LZMA_VLI_VALUE_UNKNOWN, - .internal = { info, NULL, NULL, NULL }, - }; - - return; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_next(lzma_info_iter *iter, lzma_allocator *allocator) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL) { - // The first call after lzma_info_iter_begin(). - if (iter_info->known.header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - - iter->uncompressed_offset = 0; - - if (iter_info->index.head != NULL) { - // The first Index Record has already been allocated. - iter->internal[ITER_INDEX] = iter_info->index.head; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } else { - // Update iter->*_offsets. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->stream_offset, - iter_index->total_size)) - return LZMA_DATA_ERROR; - } - - if (iter->uncompressed_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->uncompressed_offset - = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->uncompressed_offset, - iter_index->uncompressed_size)) - return LZMA_DATA_ERROR; - } - - if (iter_index->next != NULL) { - // The next Record has already been allocated. - iter->internal[ITER_INDEX] = iter_index->next; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } - - // Don't add new Records to a final Index. - if (iter_info->index.is_final) - return LZMA_DATA_ERROR; - - // Allocate and initialize a new Index Record. - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = LZMA_VLI_VALUE_UNKNOWN; - i->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - i->next = NULL; - - iter->total_size = LZMA_VLI_VALUE_UNKNOWN; - iter->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // Decide where to put the new Index Record. - if (iter_info->index.head == NULL) - iter_info->index.head = i; - - if (iter_index != NULL) - iter_index->next = i; - - iter->internal[ITER_INDEX] = i; - - ++iter_info->index.record_count; - ++iter_info->index.incomplete_count; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_set(lzma_info_iter *iter, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL || !lzma_vli_is_valid(total_size) - || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - const bool was_incomplete = iter_index->total_size - == LZMA_VLI_VALUE_UNKNOWN - || iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - - if (total_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->total_size = total_size; - - if (lzma_vli_add(iter_info->index.total_size, - total_size) - || iter_info->index.total_size - > iter_info->known.total_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->total_size != total_size) { - return LZMA_DATA_ERROR; - } - } - - if (uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->uncompressed_size = uncompressed_size; - - if (lzma_vli_add(iter_info->index.uncompressed_size, - uncompressed_size) - || iter_info->index.uncompressed_size - > iter_info->known.uncompressed_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->uncompressed_size - != uncompressed_size) { - return LZMA_DATA_ERROR; - } - } - - // Check if the new information we got managed to finish this - // Index Record. If so, update the count of incomplete Index Records. - if (was_incomplete && iter_index->total_size - != LZMA_VLI_VALUE_UNKNOWN - && iter_index->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - assert(iter_info->index.incomplete_count > 0); - --iter_info->index.incomplete_count; - } - - // Make sure that the known sizes are now available in *iter. - iter->total_size = iter_index->total_size; - iter->uncompressed_size = iter_index->uncompressed_size; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_index_finish(lzma_info *info) -{ - if (info->index.record_count == 0 || info->index.incomplete_count > 0 - || lzma_info_size_set(info, LZMA_INFO_TOTAL, - info->index.total_size) - || lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, - info->index.uncompressed_size)) - return LZMA_DATA_ERROR; - - info->index.is_final = true; - - return LZMA_OK; -} - - -////////////// -// Locating // -////////////// - -extern LZMA_API lzma_vli -lzma_info_metadata_locate(const lzma_info *info, lzma_bool is_header_metadata) -{ - bool error = false; - lzma_vli size = 0; - - if (info->known.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN) { - // We don't know if Header Metadata Block is present, thus - // we cannot locate it either. - // - // Well, you could say that just assume that it is present. - // I'm not sure if this is useful. But it can be useful to - // be able to use this function and get LZMA_VLI_VALUE_UNKNOWN - // to detect that Header Metadata Block wasn't present. - error = true; - } else if (is_header_metadata) { - error = lzma_vli_sum(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE); - } else if (!info->index.is_final) { - // Since we don't know if we have all the Index Records yet, - // we cannot know where the Footer Metadata Block is. - error = true; - } else { - error = lzma_vli_sum4(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - info->known.header_metadata_size, - info->known.total_size); - } - - return error ? LZMA_VLI_VALUE_UNKNOWN : size; -} - - -extern LZMA_API uint32_t -lzma_info_metadata_alignment_get( - const lzma_info *info, lzma_bool is_header_metadata) -{ - uint32_t alignment; - - if (is_header_metadata) { - alignment = info->stream_start_offset - + LZMA_STREAM_HEADER_SIZE; - } else { - alignment = info->stream_start_offset + LZMA_STREAM_HEADER_SIZE - + info->known.header_metadata_size - + info->known.total_size; - } - - return alignment; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_locate(lzma_info_iter *iter, lzma_allocator *allocator, - lzma_vli uncompressed_offset, lzma_bool allow_alloc) -{ - if (iter == NULL || uncompressed_offset > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - // Quick check in case Index is final. - if (iter_info->index.is_final) { - assert(iter_info->known.uncompressed_size - == iter_info->index.uncompressed_size); - if (uncompressed_offset >= iter_info->index.uncompressed_size) - return LZMA_DATA_ERROR; - } - - // TODO: Optimize so that it uses existing info from *iter when - // seeking forward. - - // Initialize *iter - if (iter_info->known.header_metadata_size != LZMA_VLI_VALUE_UNKNOWN) { - if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - } else { - // We don't know the Size of Header Metadata Block, thus - // we cannot calculate the Stream offset either. - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - } - - iter->uncompressed_offset = 0; - - // If we have no Index Records, it's obvious that we need to - // add a new one. - if (iter_info->index.head == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - return lzma_info_iter_next(iter, allocator); - } - - // Locate an appropriate Index Record. - lzma_index *i = iter_info->index.head; - while (true) { - // - If Uncompressed Size in the Record is unknown, - // we have no chance to search further. - // - If the next Record would go past the requested offset, - // we have found our target Data Block. - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - || iter->uncompressed_offset - + i->uncompressed_size > uncompressed_offset) { - iter->total_size = i->total_size; - iter->uncompressed_size = i->uncompressed_size; - iter->internal[ITER_INDEX] = i; - return LZMA_OK; - } - - // Update the stream offset. It may be unknown if we didn't - // know the size of Header Metadata Block. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) - if (lzma_vli_add(iter->stream_offset, i->total_size)) - return LZMA_PROG_ERROR; - - // Update the uncompressed offset. This cannot overflow since - // the Index is known to be valid. - iter->uncompressed_offset += i->uncompressed_size; - - // Move to the next Block. - if (i->next == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - iter->internal[ITER_INDEX] = i; - return lzma_info_iter_next(iter, allocator); - } - - i = i->next; - } -} diff --git a/src/liblzma/common/memory_usage.c b/src/liblzma/common/memory_usage.c index b6f27957..8244c404 100644 --- a/src/liblzma/common/memory_usage.c +++ b/src/liblzma/common/memory_usage.c @@ -28,7 +28,6 @@ get_usage(const lzma_options_filter *filter, bool is_encoder) uint64_t ret; switch (filter->id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_X86: case LZMA_FILTER_POWERPC: case LZMA_FILTER_IA64: diff --git a/src/liblzma/common/metadata_decoder.c b/src/liblzma/common/metadata_decoder.c deleted file mode 100644 index 579b0a51..00000000 --- a/src/liblzma/common/metadata_decoder.c +++ /dev/null @@ -1,578 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.c -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_decoder.h" -#include "block_decoder.h" - - -/// Maximum size of a single Extra Record. Again, this is mostly to make -/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should -/// be smaller. -#define EXTRA_SIZE_MAX (SIZE_MAX / 4) - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_ALLOC, - SEQ_INDEX_TOTAL_SIZE, - SEQ_INDEX_UNCOMPRESSED_SIZE, - SEQ_EXTRA_PREPARE, - SEQ_EXTRA_ALLOC, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA_ALLOC, - SEQ_EXTRA_DATA_COPY, - SEQ_EXTRA_DUMMY_ALLOC, - SEQ_EXTRA_DUMMY_ID, - SEQ_EXTRA_DUMMY_SIZE, - SEQ_EXTRA_DUMMY_COPY, - } sequence; - - /// Number of "things" left to be parsed. If we hit end of input - /// when this isn't zero, we have corrupt Metadata Block. - size_t todo_count; - - /// Position in variable-length integers - size_t pos; - - /// Temporary variable needed to decode variables whose type - /// is size_t instead of lzma_vli. - lzma_vli tmp; - - /// Pointer to target structure to hold the parsed results. - lzma_metadata *metadata; - - /// The Index Record we currently are parsing - lzma_index *index_current; - - /// Number of Records in Index - size_t index_count; - - /// Sum of Total Size fields in the Index - lzma_vli index_total_size; - - /// Sum of Uncompressed Size fields in the Index - lzma_vli index_uncompressed_size; - - /// True if Extra is present. - bool has_extra; - - /// True if we have been requested to store the Extra to *metadata. - bool want_extra; - - /// Pointer to the end of the Extra Record list. - lzma_extra *extra_tail; - - /// Dummy Extra Record used when only verifying integrity of Extra - /// (not storing it to RAM). - lzma_extra extra_dummy; - - /// Block decoder - lzma_next_coder block_decoder; - - /// buffer[buffer_pos] is the next byte to process. - size_t buffer_pos; - - /// buffer[buffer_size] is the first byte to not process. - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is read before - /// it is parsed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -/// Reads a variable-length integer to coder->num. -#define read_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_decode( \ - &num, &coder->pos, \ - coder->buffer, &coder->buffer_pos, \ - coder->buffer_size); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder, lzma_allocator *allocator) -{ - while (coder->buffer_pos < coder->buffer_size) - switch (coder->sequence) { - case SEQ_FLAGS: - // Reserved bits must be unset. - if (coder->buffer[coder->buffer_pos] & 0x70) - return LZMA_HEADER_ERROR; - - coder->todo_count = 0; - - // If Size of Header Metadata is present, prepare the - // variable for variable-length integer decoding. Otherwise - // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the - // field isn't present. - if (coder->buffer[coder->buffer_pos] & 0x01) { - coder->metadata->header_metadata_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x02) { - coder->metadata->total_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x04) { - coder->metadata->uncompressed_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x08) { - // Setting index_count to 1 is just to indicate that - // Index is present. The real size is parsed later. - coder->index_count = 1; - ++coder->todo_count; - } - - coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80) - != 0; - - ++coder->buffer_pos; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata->header_metadata_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->header_metadata_size); - - if (coder->metadata->header_metadata_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->total_size); - - if (coder->metadata->total_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->uncompressed_size); - --coder->todo_count; - } - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - coder->sequence = SEQ_EXTRA_PREPARE; - break; - } - - read_vli(coder->tmp); - - // Index must not be empty nor far too big (wouldn't fit - // in RAM). - if (coder->tmp == 0 || coder->tmp - >= SIZE_MAX / sizeof(lzma_index)) - return LZMA_DATA_ERROR; - - coder->index_count = (size_t)(coder->tmp); - coder->tmp = 0; - - coder->sequence = SEQ_INDEX_ALLOC; - break; - - case SEQ_INDEX_ALLOC: { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = 0; - i->uncompressed_size = 0; - i->next = NULL; - - if (coder->metadata->index == NULL) - coder->metadata->index = i; - else - coder->index_current->next = i; - - coder->index_current = i; - - coder->sequence = SEQ_INDEX_TOTAL_SIZE; - } - - // Fall through - - case SEQ_INDEX_TOTAL_SIZE: { - read_vli(coder->index_current->total_size); - - coder->index_total_size += coder->index_current->total_size; - if (coder->index_total_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - // No Block can have Total Size of zero bytes. - if (coder->index_current->total_size == 0) - return LZMA_DATA_ERROR; - - if (--coder->index_count == 0) { - // If Total Size is present, it must match the sum - // of Total Sizes in Index. - if (coder->metadata->total_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->total_size - != coder->index_total_size) - return LZMA_DATA_ERROR; - - coder->index_current = coder->metadata->index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE; - } else { - coder->sequence = SEQ_INDEX_ALLOC; - } - - break; - } - - case SEQ_INDEX_UNCOMPRESSED_SIZE: { - read_vli(coder->index_current->uncompressed_size); - - coder->index_uncompressed_size - += coder->index_current->uncompressed_size; - if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->uncompressed_size - != coder->index_uncompressed_size) - return LZMA_DATA_ERROR; - - --coder->todo_count; - coder->sequence = SEQ_EXTRA_PREPARE; - } - - break; - } - - case SEQ_EXTRA_PREPARE: - assert(coder->todo_count == 0); - - // If we get here, we have at least one byte of input left. - // If "Extra is present" flag is unset in Metadata Flags, - // it means that there is some garbage and we return an error. - if (!coder->has_extra) - return LZMA_DATA_ERROR; - - if (!coder->want_extra) { - coder->extra_tail = &coder->extra_dummy; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - break; - } - - coder->sequence = SEQ_EXTRA_ALLOC; - - // Fall through - - case SEQ_EXTRA_ALLOC: { - lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator); - if (e == NULL) - return LZMA_MEM_ERROR; - - e->next = NULL; - e->id = 0; - e->size = 0; - e->data = NULL; - - if (coder->metadata->extra == NULL) - coder->metadata->extra = e; - else - coder->extra_tail->next = e; - - coder->extra_tail = e; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_ID; - } - - // Fall through - - case SEQ_EXTRA_ID: - case SEQ_EXTRA_DUMMY_ID: - read_vli(coder->extra_tail->id); - - if (coder->extra_tail->id == 0) { - coder->extra_tail->size = 0; - coder->extra_tail->data = NULL; - coder->todo_count = 0; - --coder->sequence; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_SIZE: - case SEQ_EXTRA_DUMMY_SIZE: - read_vli(coder->tmp); - - if (coder->tmp == 0) { - // We have no Data in the Extra Record. Don't - // allocate any memory for it. Go back to - // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC. - coder->tmp = 0; - coder->sequence -= 2; - coder->todo_count = 0; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_DATA_ALLOC: { - if (coder->tmp > EXTRA_SIZE_MAX) - return LZMA_DATA_ERROR; - - coder->extra_tail->size = (size_t)(coder->tmp); - coder->tmp = 0; - - // We reserve space for the trailing '\0' too. - uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1, - allocator); - if (d == NULL) - return LZMA_MEM_ERROR; - - coder->extra_tail->data = d; - coder->sequence = SEQ_EXTRA_DATA_COPY; - } - - // Fall through - - case SEQ_EXTRA_DATA_COPY: - bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, - coder->extra_tail->data, &coder->pos, - (size_t)(coder->extra_tail->size)); - - if ((size_t)(coder->extra_tail->size) == coder->pos) { - coder->extra_tail->data[coder->pos] = '\0'; - coder->pos = 0; - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_ALLOC; - } - - break; - - case SEQ_EXTRA_DUMMY_ALLOC: - // Not really alloc, just initialize the dummy entry. - coder->extra_dummy = (lzma_extra){ - .next = NULL, - .id = 0, - .size = 0, - .data = NULL, - }; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_DUMMY_ID; - break; - - case SEQ_EXTRA_DUMMY_COPY: { - // Simply skip as many bytes as indicated by Extra Record Size. - // We don't check lzma_extra_size_max because we don't - // allocate any memory to hold the data. - const size_t in_avail = coder->buffer_size - coder->buffer_pos; - const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp); - coder->buffer_pos += skip; - coder->tmp -= skip; - - if (coder->tmp == 0) { - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - } - - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - bool end_was_reached = false; - - while (true) { - // Fill the buffer if it is empty. - if (coder->buffer_pos == coder->buffer_size) { - coder->buffer_pos = 0; - coder->buffer_size = 0; - - const lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, coder->buffer, - &coder->buffer_size, LZMA_BUFFER_SIZE, - LZMA_RUN); - - switch (ret) { - case LZMA_OK: - // Return immediatelly if we got no new data. - if (coder->buffer_size == 0) - return LZMA_OK; - - break; - - case LZMA_STREAM_END: - end_was_reached = true; - break; - - default: - return ret; - } - } - - // Process coder->buffer. - const lzma_ret ret = process(coder, allocator); - if (ret != LZMA_OK) - return ret; - - // On success, process() eats all the input. - assert(coder->buffer_pos == coder->buffer_size); - - if (end_was_reached) { - // Check that the sequence is not in the - // middle of anything. - if (coder->todo_count != 0) - return LZMA_DATA_ERROR; - - // If Size of Header Metadata Block was not - // present, we use zero as its size instead - // of LZMA_VLI_VALUE_UNKNOWN. - if (coder->metadata->header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - coder->metadata->header_metadata_size = 0; - - return LZMA_STREAM_END; - } - } -} - - -static void -metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_decode; - next->end = &metadata_decoder_end; - next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - } - - metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->total_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->index = NULL; - metadata->extra = NULL; - - next->coder->sequence = SEQ_FLAGS; - next->coder->todo_count = 1; - next->coder->pos = 0; - next->coder->tmp = 0; - next->coder->metadata = metadata; - next->coder->index_current = NULL; - next->coder->index_count = 0; - next->coder->index_total_size = 0; - next->coder->index_uncompressed_size = 0; - next->coder->want_extra = want_extra; - next->coder->extra_tail = NULL; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - return lzma_block_decoder_init( - &next->coder->block_decoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - lzma_next_coder_init(metadata_decoder_init, next, allocator, - options, metadata, want_extra); -} - - -extern LZMA_API lzma_ret -lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options, - lzma_metadata *metadata, lzma_bool want_extra) -{ - lzma_next_strm_init(strm, lzma_metadata_decoder_init, - options, metadata, want_extra); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/metadata_decoder.h b/src/liblzma/common/metadata_decoder.h deleted file mode 100644 index 1fba2179..00000000 --- a/src/liblzma/common/metadata_decoder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.h -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_METADATA_DECODER_H -#define LZMA_METADATA_DECODER_H - -#include "common.h" - - -extern lzma_ret lzma_metadata_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra); - -#endif diff --git a/src/liblzma/common/metadata_encoder.c b/src/liblzma/common/metadata_encoder.c deleted file mode 100644 index 9f4a15b0..00000000 --- a/src/liblzma/common/metadata_encoder.c +++ /dev/null @@ -1,435 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_encoder.c -/// \brief Encodes metadata to be stored into Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_encoder.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_TOTAL, - SEQ_INDEX_UNCOMPRESSED, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA, - SEQ_END, - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// Local copy of the Metadata structure. Note that we keep - /// a copy only of the main structure, not Index or Extra Records. - lzma_metadata metadata; - - /// Number of Records in Index - size_t index_count; - - /// Index Record currently being processed - const lzma_index *index_current; - - /// Block encoder for the encoded Metadata - lzma_next_coder block_encoder; - - /// True once everything except compression has been done. - bool end_was_reached; - - /// buffer[buffer_pos] is the first byte that needs to be compressed. - size_t buffer_pos; - - /// buffer[buffer_size] is the next position where a byte will be - /// written by process(). - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is written before - /// it is compressed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -#define write_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_encode(num, &coder->pos, 1, \ - coder->buffer, &coder->buffer_size, \ - LZMA_BUFFER_SIZE); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder) -{ - while (coder->buffer_size < LZMA_BUFFER_SIZE) - switch (coder->sequence) { - case SEQ_FLAGS: - coder->buffer[coder->buffer_size] = 0; - - if (coder->metadata.header_metadata_size != 0) - coder->buffer[coder->buffer_size] |= 0x01; - - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x02; - - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x04; - - if (coder->index_count > 0) - coder->buffer[coder->buffer_size] |= 0x08; - - if (coder->metadata.extra != NULL) - coder->buffer[coder->buffer_size] |= 0x80; - - ++coder->buffer_size; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata.header_metadata_size != 0) - write_vli(coder->metadata.header_metadata_size); - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.total_size); - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.uncompressed_size); - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - break; - } - - write_vli(coder->index_count); - coder->sequence = SEQ_INDEX_TOTAL; - break; - - case SEQ_INDEX_TOTAL: - write_vli(coder->index_current->total_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - coder->index_current = coder->metadata.index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED; - } - - break; - - case SEQ_INDEX_UNCOMPRESSED: - write_vli(coder->index_current->uncompressed_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current != NULL) - break; - - if (coder->metadata.extra != NULL) { - coder->sequence = SEQ_EXTRA_ID; - break; - } - - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - - case SEQ_EXTRA_ID: { - const lzma_ret ret = lzma_vli_encode( - coder->metadata.extra->id, &coder->pos, 1, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->pos = 0; - - // Handle the special ID 0. - if (coder->metadata.extra->id == 0) { - coder->metadata.extra - = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - - } else { - coder->sequence = SEQ_EXTRA_SIZE; - } - - break; - - default: - return ret; - } - - break; - } - - case SEQ_EXTRA_SIZE: - if (coder->metadata.extra->size >= (lzma_vli)(SIZE_MAX)) - return LZMA_HEADER_ERROR; - - write_vli(coder->metadata.extra->size); - coder->sequence = SEQ_EXTRA_DATA; - break; - - case SEQ_EXTRA_DATA: - bufcpy(coder->metadata.extra->data, &coder->pos, - coder->metadata.extra->size, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - - if ((size_t)(coder->metadata.extra->size) == coder->pos) { - coder->metadata.extra = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->pos = 0; - coder->sequence = SEQ_EXTRA_ID; - } - - break; - - case SEQ_END: - // Everything is encoded. Let the compression code finish - // its work now. - return LZMA_STREAM_END; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in lzma_attribute((unused)), - size_t *restrict in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused)), uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action lzma_attribute((unused))) -{ - while (!coder->end_was_reached) { - // Flush coder->buffer if it isn't empty. - if (coder->buffer_size > 0) { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, - coder->buffer_size, - out, out_pos, out_size, LZMA_RUN); - if (coder->buffer_pos < coder->buffer_size - || ret != LZMA_OK) - return ret; - - coder->buffer_pos = 0; - coder->buffer_size = 0; - } - - const lzma_ret ret = process(coder); - - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->end_was_reached = true; - break; - - default: - return ret; - } - } - - // Finish - return coder->block_encoder.code(coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, coder->buffer_size, - out, out_pos, out_size, LZMA_FINISH); -} - - -static void -metadata_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_encode; - next->end = &metadata_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } - - next->coder->sequence = SEQ_FLAGS; - next->coder->pos = 0; - next->coder->metadata = *metadata; - next->coder->index_count = 0; - next->coder->index_current = metadata->index; - next->coder->end_was_reached = false; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - // Count and validate the Index Records. - { - const lzma_index *i = metadata->index; - while (i != NULL) { - if (i->total_size > LZMA_VLI_VALUE_MAX - || i->uncompressed_size - > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - ++next->coder->index_count; - i = i->next; - } - } - - // Initialize the Block encoder. - return lzma_block_encoder_init( - &next->coder->block_encoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - lzma_next_coder_init(metadata_encoder_init, next, allocator, - options, metadata); -} - - -extern LZMA_API lzma_ret -lzma_metadata_encoder(lzma_stream *strm, lzma_options_block *options, - const lzma_metadata *metadata) -{ - lzma_next_strm_init(strm, metadata_encoder_init, options, metadata); - - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} - - -extern LZMA_API lzma_vli -lzma_metadata_size(const lzma_metadata *metadata) -{ - lzma_vli size = 1; // Metadata Flags - - // Validate header_metadata_size, total_size, and uncompressed_size. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || metadata->total_size == 0 - || !lzma_vli_is_valid(metadata->uncompressed_size)) - return 0; - - // Add the sizes of these three fields. - if (metadata->header_metadata_size != 0) - size += lzma_vli_size(metadata->header_metadata_size); - - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->total_size); - - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->uncompressed_size); - - // Index - if (metadata->index != NULL) { - const lzma_index *i = metadata->index; - size_t count = 1; - - do { - const size_t x = lzma_vli_size(i->total_size); - const size_t y = lzma_vli_size(i->uncompressed_size); - if (x == 0 || y == 0) - return 0; - - size += x + y; - ++count; - i = i->next; - - } while (i != NULL); - - const size_t tmp = lzma_vli_size(count); - if (tmp == 0) - return 0; - - size += tmp; - } - - // Extra - { - const lzma_extra *e = metadata->extra; - while (e != NULL) { - // Validate the numbers. - if (e->id > LZMA_VLI_VALUE_MAX - || e->size >= (lzma_vli)(SIZE_MAX)) - return 0; - - // Add the sizes. - size += lzma_vli_size(e->id); - if (e->id != 0) { - size += lzma_vli_size(e->size); - size += e->size; - } - - e = e->next; - } - } - - return size; -} diff --git a/src/liblzma/common/raw_common.c b/src/liblzma/common/raw_common.c index d45bf4de..35252fc2 100644 --- a/src/liblzma/common/raw_common.c +++ b/src/liblzma/common/raw_common.c @@ -20,122 +20,81 @@ #include "raw_common.h" -/// \brief Prepares the filter chain -/// -/// Prepares the filter chain by setting uncompressed sizes for each filter, -/// and adding implicit Subblock filter when needed. -/// -/// \return true if error occurred, false on success. -/// -static bool -prepare(lzma_vli *id, lzma_vli *uncompressed_size, bool allow_implicit) +static lzma_ret +validate_options(const lzma_options_filter *options, size_t *count) { - bool needs_end_of_input = false; - - switch (id[0]) { - case LZMA_FILTER_COPY: - case LZMA_FILTER_X86: - case LZMA_FILTER_POWERPC: - case LZMA_FILTER_IA64: - case LZMA_FILTER_ARM: - case LZMA_FILTER_ARMTHUMB: - case LZMA_FILTER_SPARC: - case LZMA_FILTER_DELTA: - uncompressed_size[1] = uncompressed_size[0]; - needs_end_of_input = true; - break; - - case LZMA_FILTER_SUBBLOCK: - case LZMA_FILTER_LZMA: - // These change the size of the data unpredictably. - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - break; - - case LZMA_FILTER_SUBBLOCK_HELPER: - uncompressed_size[1] = uncompressed_size[0]; - break; - - default: - // Unknown filter. - return true; - } + if (options == NULL) + return LZMA_PROG_ERROR; - // Is this the last filter in the chain? - if (id[1] == LZMA_VLI_VALUE_UNKNOWN) { - if (needs_end_of_input && allow_implicit - && uncompressed_size[0] - == LZMA_VLI_VALUE_UNKNOWN) { - // Add implicit Subblock filter. - id[1] = LZMA_FILTER_SUBBLOCK; - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - id[2] = LZMA_VLI_VALUE_UNKNOWN; + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t change = 0; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i; + for (i = 0; options[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { + switch (options[i].id) { + // Not #ifdeffing these for simplicity. + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + case LZMA_FILTER_DELTA: + // These don't change the size of the data and cannot + // be used as the last filter in the chain. + last_ok = false; + break; + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + last_ok = true; + ++change; + break; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + last_ok = true; + break; +#endif + + default: + return LZMA_HEADER_ERROR; } - - return false; } - return prepare(id + 1, uncompressed_size + 1, allow_implicit); + // There must be 1-4 filters and the last filter must be usable as + // the last filter in the chain. + if (i == 0 || i > 4 || !last_ok) + return LZMA_HEADER_ERROR; + + // At maximum of two non-last filters are allowed to change the + // size of the data. + if (change > 2) + return LZMA_HEADER_ERROR; + + *count = i; + return LZMA_OK; } extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder) + bool is_encoder) { - if (options == NULL || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - // Count the number of filters in the chain. - size_t count = 0; - while (options[count].id != LZMA_VLI_VALUE_UNKNOWN) - ++count; - - // Allocate enough space from the stack for IDs and uncompressed - // sizes. We need two extra: possible implicit Subblock and end - // of array indicator. - lzma_vli ids[count + 2]; - lzma_vli uncompressed_sizes[count + 2]; - bool using_implicit = false; - - uncompressed_sizes[0] = uncompressed_size; - - if (count == 0) { - if (!allow_implicit) - return LZMA_PROG_ERROR; - - count = 1; - using_implicit = true; - - // Special case: no filters were specified, so an implicit - // Copy or Subblock filter is used. - if (uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - ids[0] = LZMA_FILTER_SUBBLOCK; - else - ids[0] = LZMA_FILTER_COPY; + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_options(options, &count)); - ids[1] = LZMA_VLI_VALUE_UNKNOWN; - - } else { - // Prepare the ids[] and uncompressed_sizes[]. - for (size_t i = 0; i < count; ++i) - ids[i] = options[i].id; - - ids[count] = LZMA_VLI_VALUE_UNKNOWN; - - if (prepare(ids, uncompressed_sizes, allow_implicit)) - return LZMA_HEADER_ERROR; - - // Check if implicit Subblock filter was added. - if (ids[count] != LZMA_VLI_VALUE_UNKNOWN) { - assert(ids[count] == LZMA_FILTER_SUBBLOCK); - ++count; - using_implicit = true; - } - } - - // Set the filter functions, and copy uncompressed sizes and options. + // Set the filter functions and copy the options pointer. lzma_filter_info filters[count + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { @@ -144,29 +103,20 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // of the uncompressed data. const size_t j = count - i - 1; - filters[j].init = get_function(ids[i]); + filters[j].init = get_function(options[i].id); if (filters[j].init == NULL) return LZMA_HEADER_ERROR; filters[j].options = options[i].options; - filters[j].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[0].options = NULL; - } else { for (size_t i = 0; i < count; ++i) { - filters[i].init = get_function(ids[i]); + filters[i].init = get_function(options[i].id); if (filters[i].init == NULL) return LZMA_HEADER_ERROR; filters[i].options = options[i].options; - filters[i].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[count - 1].options = NULL; } // Terminate the array. diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/common/raw_common.h index 172223cb..0a27f3dc 100644 --- a/src/liblzma/common/raw_common.h +++ b/src/liblzma/common/raw_common.h @@ -23,9 +23,8 @@ #include "common.h" extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, - lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + lzma_allocator *allocator, const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder); + bool is_encoder); #endif diff --git a/src/liblzma/common/raw_decoder.c b/src/liblzma/common/raw_decoder.c index 03f1d847..4fb7111c 100644 --- a/src/liblzma/common/raw_decoder.c +++ b/src/liblzma/common/raw_decoder.c @@ -18,24 +18,17 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_decoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_decoder.h" #include "subblock_decoder_helper.h" #include "delta_decoder.h" #include "lzma_decoder.h" -#include "metadata_decoder.h" static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_decoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_decoder_init; @@ -93,12 +86,10 @@ get_function(lzma_vli id) extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) + const lzma_options_filter *options) { const lzma_ret ret = lzma_raw_coder_init(next, allocator, - options, uncompressed_size, &get_function, - allow_implicit, false); + options, &get_function, false); if (ret != LZMA_OK) lzma_next_coder_end(next, allocator); @@ -108,8 +99,7 @@ lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options) { return_if_error(lzma_strm_init(strm)); @@ -117,8 +107,7 @@ lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, - strm->allocator, options, uncompressed_size, - &get_function, allow_implicit, false); + strm->allocator, options, &get_function, false); if (ret != LZMA_OK) lzma_end(strm); diff --git a/src/liblzma/common/raw_decoder.h b/src/liblzma/common/raw_decoder.h index 9d48074b..c0e626a8 100644 --- a/src/liblzma/common/raw_decoder.h +++ b/src/liblzma/common/raw_decoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/raw_encoder.c b/src/liblzma/common/raw_encoder.c index fb12862b..9b8cbfae 100644 --- a/src/liblzma/common/raw_encoder.c +++ b/src/liblzma/common/raw_encoder.c @@ -18,28 +18,16 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_encoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_encoder.h" #include "delta_encoder.h" #include "lzma_encoder.h" -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_encoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_encoder_init; @@ -90,91 +78,34 @@ get_function(lzma_vli id) } -static lzma_ret -raw_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options) { - // Check that our amount of input stays in proper limits. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (action == LZMA_FINISH) { - if (coder->uncompressed_size != in_size - *in_pos) - return LZMA_PROG_ERROR; - } else { - if (coder->uncompressed_size < in_size - *in_pos) - return LZMA_PROG_ERROR; - } - } - - const size_t in_start = *in_pos; + const lzma_ret ret = lzma_raw_coder_init(next, allocator, + options, &get_function, true); - const lzma_ret ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, action); - - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= *in_pos - in_start; + if (ret != LZMA_OK) + lzma_next_coder_end(next, allocator); return ret; } -static void -raw_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &raw_encode; - next->end = &raw_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - next->coder->uncompressed_size = uncompressed_size; - - // lzma_raw_coder_init() accesses get_function() via function pointer, - // because this way linker doesn't statically link both encoder and - // decoder functions if user needs only encoder or decoder. - return lzma_raw_coder_init(&next->coder->next, allocator, - options, uncompressed_size, - &get_function, allow_implicit, true); -} - - -extern lzma_ret -lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - lzma_next_coder_init(raw_encoder_init, next, allocator, - options, uncompressed_size, allow_implicit); -} - - extern LZMA_API lzma_ret -lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options) { - lzma_next_strm_init(strm, raw_encoder_init, - options, uncompressed_size, allow_implicit); + return_if_error(lzma_strm_init(strm)); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FINISH] = true; - return LZMA_OK; + const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, + strm->allocator, options, &get_function, true); + + if (ret != LZMA_OK) + lzma_end(strm); + + return ret; } diff --git a/src/liblzma/common/raw_encoder.h b/src/liblzma/common/raw_encoder.h index b0aab61a..4e148489 100644 --- a/src/liblzma/common/raw_encoder.h +++ b/src/liblzma/common/raw_encoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/stream_common.h b/src/liblzma/common/stream_common.h index b2f37f37..4f83fc58 100644 --- a/src/liblzma/common/stream_common.h +++ b/src/liblzma/common/stream_common.h @@ -22,6 +22,9 @@ #include "common.h" +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + extern const uint8_t lzma_header_magic[6]; extern const uint8_t lzma_footer_magic[2]; diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 56de3d9f..1bf7f1f8 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -18,281 +18,148 @@ /////////////////////////////////////////////////////////////////////////////// #include "stream_common.h" +#include "stream_decoder.h" #include "check.h" #include "stream_flags_decoder.h" #include "block_decoder.h" -#include "metadata_decoder.h" struct lzma_coder_s { enum { - SEQ_STREAM_HEADER_CODE, - SEQ_BLOCK_HEADER_INIT, - SEQ_BLOCK_HEADER_CODE, - SEQ_METADATA_CODE, - SEQ_DATA_CODE, - SEQ_STREAM_TAIL_INIT, - SEQ_STREAM_TAIL_CODE, + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, } sequence; - /// Position in variable-length integers and in some other things. - size_t pos; - /// Block or Metadata decoder. This takes little memory and the same /// data structure can be used to decode every Block Header, so it's /// a good idea to have a separate lzma_next_coder structure for it. lzma_next_coder block_decoder; - /// Block Header decoder; this is separate - lzma_next_coder block_header_decoder; - + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. lzma_options_block block_options; - /// Information about the sizes of the Blocks - lzma_info *info; - - /// Current Block in *info - lzma_info_iter iter; - - /// Number of bytes not yet processed from Data Blocks in the Stream. - /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is - /// decremented while decoding and verified to match the reality. - lzma_vli total_left; - - /// Like uncompressed_left above but for uncompressed data from - /// Data Blocks. - lzma_vli uncompressed_left; - /// Stream Flags from Stream Header - lzma_stream_flags header_flags; - - /// Stream Flags from Stream tail - lzma_stream_flags tail_flags; + lzma_stream_flags stream_flags; - /// Decoder for Stream Header and Stream tail. This takes very - /// little memory and the same data structure can be used for - /// both Header and tail, so it's a good idea to have a separate - /// lzma_next_coder structure for it. - lzma_next_coder flags_decoder; + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; - /// Temporary destination for the decoded Metadata. - lzma_metadata metadata; + /// Write position in buffer[] + size_t buffer_pos; - /// Pointer to application-supplied pointer where to store the list - /// of Extra Records from the Header Metadata Block. - lzma_extra **header_extra; - - /// Same as above but Footer Metadata Block - lzma_extra **footer_extra; + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; static lzma_ret -metadata_init(lzma_coder *coder, lzma_allocator *allocator) -{ - assert(coder->metadata.index == NULL); - assert(coder->metadata.extra == NULL); - - // Single-Block Streams don't have Metadata Blocks. - if (!coder->header_flags.is_multi) - return LZMA_DATA_ERROR; - - coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN; - - // Limit the Uncompressed Size of a Metadata Block. This is to - // prevent security issues where input file would have very huge - // Metadata. - // - // FIXME: Hardcoded constant is ugly. Maybe we should provide - // some way to specify this from the application. - coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23; - - lzma_info_size size_type; - bool want_extra; - - // If we haven't decoded any Data Blocks yet, this is Header - // Metadata Block. - if (lzma_info_index_count_get(coder->info) == 0) { - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - size_type = LZMA_INFO_HEADER_METADATA; - want_extra = coder->header_extra != NULL; - } else { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - size_type = LZMA_INFO_FOOTER_METADATA; - want_extra = coder->footer_extra != NULL; - } - - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.total_size = lzma_info_size_get( - coder->info, size_type); - - coder->sequence = SEQ_METADATA_CODE; - - return lzma_metadata_decoder_init(&coder->block_decoder, allocator, - &coder->block_options, &coder->metadata, want_extra); -} - - -static lzma_ret -data_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(lzma_info_iter_set( - &coder->iter, LZMA_VLI_VALUE_UNKNOWN, - coder->block_options.uncompressed_size)); - - coder->block_options.total_size = coder->iter.total_size; - coder->block_options.uncompressed_size = coder->iter.uncompressed_size; - coder->block_options.total_limit = coder->total_left; - coder->block_options.uncompressed_limit = coder->uncompressed_left; - - if (coder->header_flags.is_multi) { - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - } else { - coder->block_options.has_uncompressed_size_in_footer - = coder->iter.uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - } - - coder->sequence = SEQ_DATA_CODE; - - return lzma_block_decoder_init(&coder->block_decoder, allocator, - &coder->block_options); -} - - -static lzma_ret stream_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_DATA_CODE)) + || coder->sequence == SEQ_BLOCK)) switch (coder->sequence) { - case SEQ_STREAM_HEADER_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->buffer_pos = 0; + + // Decode the Stream Header. + return_if_error(lzma_stream_header_decode( + &coder->stream_flags, coder->buffer)); - coder->sequence = SEQ_BLOCK_HEADER_INIT; + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_UNSUPPORTED_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; // Detect if the Check type is supported and give appropriate // warning if it isn't. We don't warn every time a new Block // is started. - lzma_check tmp; - if (lzma_check_init(&tmp, coder->header_flags.check)) + if (!lzma_available_checks[coder->block_options.check]) return LZMA_UNSUPPORTED_CHECK; break; } - case SEQ_BLOCK_HEADER_INIT: { - coder->block_options.check = coder->header_flags.check; - coder->block_options.has_crc32 = coder->header_flags.has_crc32; + case SEQ_BLOCK_HEADER: { + if (coder->buffer_pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } - for (size_t i = 0; - i < ARRAY_SIZE(coder->block_options.filters); - ++i) { - lzma_free(coder->block_options.filters[i].options, - allocator); - coder->block_options.filters[i].options = NULL; + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); } - return_if_error(lzma_block_header_decoder_init( - &coder->block_header_decoder, allocator, - &coder->block_options)); - - coder->sequence = SEQ_BLOCK_HEADER_CODE; - } - - // Fall through - - case SEQ_BLOCK_HEADER_CODE: { - lzma_ret ret = coder->block_header_decoder.code( - coder->block_header_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the Block Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + coder->block_options.header_size); - if (coder->block_options.is_metadata) - ret = metadata_init(coder, allocator); - else - ret = data_init(coder, allocator); - - if (ret != LZMA_OK) - return ret; - - break; - } + // Return if we didn't get the whole Block Header yet. + if (coder->buffer_pos < coder->block_options.header_size) + return LZMA_OK; - case SEQ_METADATA_CODE: { - lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + coder->buffer_pos = 0; - const bool is_header_metadata = lzma_info_index_count_get( - coder->info) == 0; + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_options_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + coder->block_options.filters = filters; - if (is_header_metadata) { - if (coder->header_extra != NULL) { - *coder->header_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, - coder->block_options.total_size) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_BLOCK_HEADER_INIT; - } else { - if (coder->footer_extra != NULL) { - *coder->footer_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Initialize the Block decoder. + const lzma_ret ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, &coder->block_options); - coder->sequence = SEQ_STREAM_TAIL_INIT; - } + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); - assert(coder->metadata.extra == NULL); + coder->block_options.filters = NULL; - ret = lzma_info_metadata_set(coder->info, allocator, - &coder->metadata, is_header_metadata, true); - if (ret != LZMA_OK) + // Check if Block enocoder initialization succeeded. Don't + // warn about unsupported check anymore since we did it + // earlier if it was needed. + if (ret != LZMA_OK && ret != LZMA_UNSUPPORTED_CHECK) return ret; - // Intialize coder->total_size and coder->uncompressed_size - // from Header Metadata. - if (is_header_metadata) { - coder->total_left = lzma_info_size_get( - coder->info, LZMA_INFO_TOTAL); - coder->uncompressed_left = lzma_info_size_get( - coder->info, LZMA_INFO_UNCOMPRESSED); - } - + coder->sequence = SEQ_BLOCK; break; } - case SEQ_DATA_CODE: { + case SEQ_BLOCK: { lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, @@ -301,62 +168,59 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - ret = lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size); - if (ret != LZMA_OK) - return ret; - - // These won't overflow since lzma_info_iter_set() succeeded. - if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN) - coder->total_left -= coder->block_options.total_size; - if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_left -= coder->block_options - .uncompressed_size; + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_total_size_get( + &coder->block_options), + coder->block_options.uncompressed_size)); - if (!coder->header_flags.is_multi) { - ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_STREAM_TAIL_INIT; - break; - } - - coder->sequence = SEQ_BLOCK_HEADER_INIT; + coder->sequence = SEQ_BLOCK_HEADER; break; } - case SEQ_STREAM_TAIL_INIT: { - lzma_ret ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - ret = lzma_stream_tail_decoder_init(&coder->flags_decoder, - allocator, &coder->tail_flags); - if (ret != LZMA_OK) + case SEQ_INDEX: { + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) return ret; - coder->sequence = SEQ_STREAM_TAIL_CODE; + coder->sequence = SEQ_STREAM_FOOTER; + break; } - // Fall through + case SEQ_STREAM_FOOTER: + // Copy the Stream Footer to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); - case SEQ_STREAM_TAIL_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + // Return if we didn't get the whole Stream Footer yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; - if (!lzma_stream_flags_is_equal( - coder->header_flags, coder->tail_flags)) + // Decode the Stream Footer. + lzma_stream_flags footer_flags; + return_if_error(lzma_stream_footer_decode( + &footer_flags, coder->buffer)); + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + if (!lzma_stream_flags_equal(&coder->stream_flags, + &footer_flags)) return LZMA_DATA_ERROR; return LZMA_STREAM_END; - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -367,23 +231,15 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, static void stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { - for (size_t i = 0; i < ARRAY_SIZE(coder->block_options.filters); ++i) - lzma_free(coder->block_options.filters[i].options, allocator); - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_next_coder_end(&coder->block_header_decoder, allocator); - lzma_next_coder_end(&coder->flags_decoder, allocator); - lzma_info_free(coder->info, allocator); - lzma_index_free(coder->metadata.index, allocator); - lzma_extra_free(coder->metadata.extra, allocator); + lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_ret -stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -394,73 +250,35 @@ stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->end = &stream_decoder_end; next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - next->coder->flags_decoder = LZMA_NEXT_CODER_INIT; - next->coder->metadata.index = NULL; - next->coder->metadata.extra = NULL; - } else { - for (size_t i = 0; i < ARRAY_SIZE( - next->coder->block_options.filters); ++i) - lzma_free(next->coder->block_options - .filters[i].options, allocator); - - lzma_index_free(next->coder->metadata.index, allocator); - next->coder->metadata.index = NULL; - - lzma_extra_free(next->coder->metadata.extra, allocator); - next->coder->metadata.extra = NULL; + next->coder->index_hash = NULL; } - for (size_t i = 0; i < ARRAY_SIZE(next->coder->block_options.filters); - ++i) - next->coder->block_options.filters[i].options = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) + // Initialize the Index hash used to verify the Index. + next->coder->index_hash = lzma_index_hash_init( + next->coder->index_hash, allocator); + if (next->coder->index_hash == NULL) return LZMA_MEM_ERROR; - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - // Initialize Stream Header decoder. - return_if_error(lzma_stream_header_decoder_init( - &next->coder->flags_decoder, allocator, - &next->coder->header_flags)); - - // Reset the *foo_extra pointers to NULL. This way the caller knows - // if there were no Extra Records. (We don't support appending - // Records to Extra list.) - if (header != NULL) - *header = NULL; - if (footer != NULL) - *footer = NULL; - - // Reset some variables. - next->coder->sequence = SEQ_STREAM_HEADER_CODE; - next->coder->pos = 0; - next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->header_extra = header; - next->coder->footer_extra = footer; + // Reset the rest of the variables. + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.filters = NULL; + next->coder->buffer_pos = 0; return LZMA_OK; } extern lzma_ret -lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - lzma_next_coder_init( - stream_decoder_init, next, allocator, header, footer); + lzma_next_coder_init0(stream_decoder_init, next, allocator); } extern LZMA_API lzma_ret -lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, stream_decoder_init, header, footer); + lzma_next_strm_init0(strm, stream_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/easy_common.h b/src/liblzma/common/stream_decoder.h index d864cce5..dcda387d 100644 --- a/src/liblzma/common/easy_common.h +++ b/src/liblzma/common/stream_decoder.h @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions +/// \file stream_decoder.h +/// \brief Decodes .lzma Streams // // Copyright (C) 2008 Lasse Collin // @@ -17,12 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H -#ifndef LZMA_EASY_COMMON_H -#define LZMA_EASY_COMMON_H +#include "common.h" -extern bool lzma_easy_set_filters( - lzma_options_filter *filters, uint32_t level); +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator); #endif diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c new file mode 100644 index 00000000..767b8014 --- /dev/null +++ b/src/liblzma/common/stream_encoder.c @@ -0,0 +1,282 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .lzma Streams +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "stream_encoder.h" +#include "block_encoder.h" +#include "index_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_options_block block_options; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + // Prepare the Block options. + coder->block_options.compressed_size = LZMA_VLI_VALUE_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, + out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediatelly since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder except if this is the first + // Block, because stream_encoder_init() has already + // initialized it. + if (lzma_index_count(coder->index) != 0) + return_if_error(block_encoder_init(coder, allocator)); + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli total_size = lzma_block_total_size_get( + &coder->block_options); + assert(total_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + total_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_encoder, allocator); + lzma_next_coder_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + if (filters == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.check = check; + next->coder->block_options.filters = (lzma_options_filter *)(filters); + + // Initialize the Index + next->coder->index = lzma_index_init(next->coder->index, allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, next->coder->buffer)); + + next->coder->buffer_pos = 0; + next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect if the given + // filters are supported by the current liblzma build, and the + // application doesn't need to keep the filters structure available + // unless it is going to use LZMA_FULL_FLUSH. + return block_encoder_init(next->coder, allocator); +} + + +extern lzma_ret +lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_coder_init(stream_encoder_init, next, allocator, + filters, check); +} + + +extern LZMA_API lzma_ret +lzma_stream_encoder(lzma_stream *strm, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_strm_init(strm, stream_encoder_init, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/metadata_encoder.h b/src/liblzma/common/stream_encoder.h index 20357fe6..3ce29561 100644 --- a/src/liblzma/common/metadata_encoder.h +++ b/src/liblzma/common/stream_encoder.h @@ -1,9 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file metadata_encoder.h -/// \brief Encodes metadata to be stored into Metadata Blocks +/// \file stream_encoder.h +/// \brief Encodes .lzma Streams // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,14 +17,14 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_METADATA_ENCODER_H -#define LZMA_METADATA_ENCODER_H +#ifndef LZMA_STREAM_ENCODER_H +#define LZMA_STREAM_ENCODER_H #include "common.h" -extern lzma_ret lzma_metadata_encoder_init( +extern lzma_ret lzma_stream_encoder_init( lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata); + const lzma_options_filter *filters, lzma_check_type check); #endif diff --git a/src/liblzma/common/stream_encoder_multi.c b/src/liblzma/common/stream_encoder_multi.c deleted file mode 100644 index 403980cf..00000000 --- a/src/liblzma/common/stream_encoder_multi.c +++ /dev/null @@ -1,445 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_multi.c -/// \brief Encodes Multi-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "stream_encoder_multi.h" -#include "block_encoder.h" -#include "metadata_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_STREAM_HEADER_COPY, - SEQ_HEADER_METADATA_INIT, - SEQ_HEADER_METADATA_COPY, - SEQ_HEADER_METADATA_CODE, - SEQ_DATA_INIT, - SEQ_DATA_COPY, - SEQ_DATA_CODE, - SEQ_FOOTER_METADATA_INIT, - SEQ_FOOTER_METADATA_COPY, - SEQ_FOOTER_METADATA_CODE, - SEQ_STREAM_FOOTER_INIT, - SEQ_STREAM_FOOTER_COPY, - } sequence; - - /// Block or Metadata encoder - lzma_next_coder next; - - /// Options for the Block encoder - lzma_options_block block_options; - - /// Information about the Stream - lzma_info *info; - - /// Information about the current Data Block - lzma_info_iter iter; - - /// Pointer to user-supplied options structure. We don't write to - /// it, only read instructions from the application, thus this is - /// const even though the user-supplied pointer from - /// lzma_options_filter structure isn't. - const lzma_options_stream *stream_options; - - /// Stream Header or Stream Footer in encoded form - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -typedef enum { - BLOCK_HEADER_METADATA, - BLOCK_DATA, - BLOCK_FOOTER_METADATA, -} block_type; - - -static lzma_ret -block_header_encode(lzma_coder *coder, lzma_allocator *allocator, - lzma_vli uncompressed_size, block_type type) -{ - assert(coder->header == NULL); - - coder->block_options = (lzma_options_block){ - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .has_eopm = uncompressed_size == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = type != BLOCK_DATA, - .has_uncompressed_size_in_footer = false, - .has_backward_size = type == BLOCK_FOOTER_METADATA, - .handle_padding = false, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - }; - - if (type == BLOCK_DATA) { - memcpy(coder->block_options.filters, - coder->stream_options->filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment = coder->iter.stream_offset; - } else { - memcpy(coder->block_options.filters, - coder->stream_options->metadata_filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment - = lzma_info_metadata_alignment_get( - coder->info, type == BLOCK_HEADER_METADATA); - } - - return_if_error(lzma_block_header_size(&coder->block_options)); - - coder->header_size = coder->block_options.header_size; - coder->header = lzma_alloc(coder->header_size, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_block_header_encode( - coder->header, &coder->block_options)); - - coder->header_pos = 0; - return LZMA_OK; -} - - -static lzma_ret -metadata_encoder_init(lzma_coder *coder, lzma_allocator *allocator, - lzma_metadata *metadata, block_type type) -{ - return_if_error(lzma_info_metadata_set(coder->info, allocator, - metadata, type == BLOCK_HEADER_METADATA, false)); - - const lzma_vli metadata_size = lzma_metadata_size(metadata); - if (metadata_size == 0) - return LZMA_PROG_ERROR; - - return_if_error(block_header_encode( - coder, allocator, metadata_size, type)); - - return lzma_metadata_encoder_init(&coder->next, allocator, - &coder->block_options, metadata); -} - - -static lzma_ret -data_encoder_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(block_header_encode(coder, allocator, - LZMA_VLI_VALUE_UNKNOWN, BLOCK_DATA)); - - return lzma_block_encoder_init(&coder->next, allocator, - &coder->block_options); -} - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // Main loop - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - case SEQ_STREAM_FOOTER_COPY: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - if (coder->header_pos < coder->header_size) - return LZMA_OK; - - lzma_free(coder->header, allocator); - coder->header = NULL; - - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - // Write Header Metadata Block if we have Extra for it - // or known Uncompressed Size. - if (coder->stream_options->header != NULL - || coder->stream_options - ->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_HEADER_METADATA_INIT; - } else { - // Mark that Header Metadata Block doesn't - // exist. - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, 0) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_DATA_INIT; - } - break; - - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - ++coder->sequence; - break; - - case SEQ_STREAM_FOOTER_COPY: - return LZMA_STREAM_END; - - default: - assert(0); - } - - break; - - case SEQ_HEADER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size = LZMA_VLI_VALUE_UNKNOWN, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = coder->stream_options - ->uncompressed_size, - .index = NULL, - // Metadata encoder doesn't modify this, but since - // the lzma_extra structure is used also when decoding - // Metadata, the pointer is not const, and we need - // to cast the constness away in the encoder. - .extra = (lzma_extra *)(coder->stream_options->header), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_HEADER_METADATA)); - - coder->sequence = SEQ_HEADER_METADATA_COPY; - break; - } - - case SEQ_FOOTER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size - = lzma_info_size_get(coder->info, - LZMA_INFO_HEADER_METADATA), - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .index = lzma_info_index_get(coder->info, false), - .extra = (lzma_extra *)(coder->stream_options->footer), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_FOOTER_METADATA)); - - coder->sequence = SEQ_FOOTER_METADATA_COPY; - break; - } - - case SEQ_HEADER_METADATA_CODE: - case SEQ_FOOTER_METADATA_CODE: { - size_t dummy = 0; - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, NULL, &dummy, 0, - out, out_pos, out_size, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; - - return_if_error(lzma_info_size_set(coder->info, - coder->sequence == SEQ_HEADER_METADATA_CODE - ? LZMA_INFO_HEADER_METADATA - : LZMA_INFO_FOOTER_METADATA, - coder->block_options.total_size)); - - ++coder->sequence; - break; - } - - case SEQ_DATA_INIT: { - // Don't create an empty Block unless it would be - // the only Data Block. - if (*in_pos == in_size) { - // If we are LZMA_SYNC_FLUSHing or LZMA_FULL_FLUSHing, - // return LZMA_STREAM_END since there's nothing to - // flush. - if (action != LZMA_FINISH) - return action == LZMA_RUN - ? LZMA_OK : LZMA_STREAM_END; - - if (lzma_info_index_count_get(coder->info) != 0) { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->sequence = SEQ_FOOTER_METADATA_INIT; - break; - } - } - - return_if_error(data_encoder_init(coder, allocator)); - - coder->sequence = SEQ_DATA_COPY; - break; - } - - case SEQ_DATA_CODE: { - static const lzma_action convert[4] = { - LZMA_RUN, - LZMA_SYNC_FLUSH, - LZMA_FINISH, - LZMA_FINISH, - }; - - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, in, in_pos, in_size, - out, out_pos, out_size, convert[action]); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - return_if_error(lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size)); - - coder->sequence = SEQ_DATA_INIT; - break; - } - - case SEQ_STREAM_FOOTER_INIT: { - assert(coder->header == NULL); - - lzma_stream_flags flags = { - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .is_multi = true, - }; - - coder->header = lzma_alloc(LZMA_STREAM_TAIL_SIZE, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_tail_encode( - coder->header, &flags)); - - coder->header_size = LZMA_STREAM_TAIL_SIZE; - coder->header_pos = 0; - - coder->sequence = SEQ_STREAM_FOOTER_COPY; - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_info_free(coder->info, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - } else { - lzma_free(next->coder->header, allocator); - } - - next->coder->header = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) - return LZMA_MEM_ERROR; - - next->coder->sequence = SEQ_STREAM_HEADER_COPY; - next->coder->stream_options = options; - - // Encode Stream Flags - { - lzma_stream_flags flags = { - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = true, - }; - - next->coder->header = lzma_alloc(LZMA_STREAM_HEADER_SIZE, - allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode( - next->coder->header, &flags)); - - next->coder->header_pos = 0; - next->coder->header_size = LZMA_STREAM_HEADER_SIZE; - } - - if (lzma_info_size_set(next->coder->info, LZMA_INFO_STREAM_START, - options->alignment) != LZMA_OK) - return LZMA_PROG_ERROR; - - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, next, allocator, options); -} - - -extern LZMA_API lzma_ret -lzma_stream_encoder_multi( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_encoder_single.c b/src/liblzma/common/stream_encoder_single.c deleted file mode 100644 index d93e7169..00000000 --- a/src/liblzma/common/stream_encoder_single.c +++ /dev/null @@ -1,219 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_single.c -/// \brief Encodes Single-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - /// Uncompressed Size, Backward Size, and Footer Magic Bytes are - /// part of Block in the file format specification, but it is simpler - /// to implement them as part of Stream. - enum { - SEQ_HEADERS, - SEQ_DATA, - SEQ_FOOTER, - } sequence; - - /// Block encoder - lzma_next_coder block_encoder; - - /// Block encoder options - lzma_options_block block_options; - - /// Stream Flags; we need to have these in this struct so that we - /// can encode Stream Footer. - lzma_stream_flags stream_flags; - - /// Stream Header + Block Header, or Stream Footer - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, size_t *out_pos, - size_t out_size, lzma_action action) -{ - // NOTE: We don't check if the amount of input is in the proper limits, - // because the Block encoder will do it for us. - - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_HEADERS: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - if (coder->header_pos == coder->header_size) { - coder->header_pos = 0; - coder->sequence = SEQ_DATA; - } - - break; - - case SEQ_DATA: { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - in, in_pos, in_size, - out, out_pos, out_size, action); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - assert(*in_pos == in_size); - - assert(coder->header_size >= LZMA_STREAM_TAIL_SIZE); - coder->header_size = LZMA_STREAM_TAIL_SIZE; - - return_if_error(lzma_stream_tail_encode( - coder->header, &coder->stream_flags)); - - coder->sequence = SEQ_FOOTER; - break; - } - - case SEQ_FOOTER: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - return coder->header_pos == coder->header_size - ? LZMA_STREAM_END : LZMA_OK; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } else { - // Free the previous buffer, if any. - lzma_free(next->coder->header, allocator); - } - - // At this point, next->coder->header points to nothing useful. - next->coder->header = NULL; - - // Basic initializations - next->coder->sequence = SEQ_HEADERS; - next->coder->header_pos = 0; - - // Initialize next->coder->stream_flags. - next->coder->stream_flags = (lzma_stream_flags){ - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = false, - }; - - // Initialize next->coder->block_options. - next->coder->block_options = (lzma_options_block){ - .check = options->check, - .has_crc32 = options->has_crc32, - .has_eopm = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = false, - .has_uncompressed_size_in_footer = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .has_backward_size = true, - .handle_padding = false, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = options->uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - .alignment = options->alignment + LZMA_STREAM_HEADER_SIZE, - }; - memcpy(next->coder->block_options.filters, options->filters, - sizeof(options->filters)); - - return_if_error(lzma_block_header_size(&next->coder->block_options)); - - // Encode Stream Flags and Block Header into next->coder->header. - next->coder->header_size = (size_t)(LZMA_STREAM_HEADER_SIZE) - + next->coder->block_options.header_size; - next->coder->header = lzma_alloc(next->coder->header_size, allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode(next->coder->header, - &next->coder->stream_flags)); - - return_if_error(lzma_block_header_encode( - next->coder->header + LZMA_STREAM_HEADER_SIZE, - &next->coder->block_options)); - - // Initialize the Block encoder. - return lzma_block_encoder_init(&next->coder->block_encoder, allocator, - &next->coder->block_options); -} - - -/* -extern lzma_ret -lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, allocator, options); -} -*/ - - -extern LZMA_API lzma_ret -lzma_stream_encoder_single( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c index d9c847ac..0270875a 100644 --- a/src/liblzma/common/stream_flags_decoder.c +++ b/src/liblzma/common/stream_flags_decoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_decoder.c -/// \brief Decodes Stream Header and tail from .lzma files +/// \brief Decodes Stream Header and Stream Footer from .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -17,242 +17,72 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "stream_flags_decoder.h" #include "stream_common.h" -//////////// -// Common // -//////////// - -struct lzma_coder_s { - enum { - SEQ_HEADER_MAGIC, - SEQ_HEADER_FLAGS, - SEQ_HEADER_CRC32, - - SEQ_FOOTER_FLAGS, - SEQ_FOOTER_MAGIC, - } sequence; - - size_t pos; - uint32_t crc32; - - lzma_stream_flags *options; -}; - - -static void -stream_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - static bool -stream_flags_decode(const uint8_t *in, lzma_stream_flags *options) +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) { // Reserved bits must be unset. - if (*in & 0xE0) + if (in[0] != 0x00 || (in[1] & 0xF0)) return true; - options->check = *in & 0x07; - options->has_crc32 = (*in & 0x08) != 0; - options->is_multi = (*in & 0x10) != 0; + options->check = in[1] & 0x0F; return false; } -//////////// -// Header // -//////////// - -static lzma_ret -stream_header_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_HEADER_MAGIC: - if (in[*in_pos] != lzma_header_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_header_magic)) { - coder->pos = 0; - coder->sequence = SEQ_HEADER_FLAGS; - } - - break; - - case SEQ_HEADER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, 0); - - ++*in_pos; - coder->sequence = SEQ_HEADER_CRC32; - break; - - case SEQ_HEADER_CRC32: - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == 4) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_header_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_HEADER_MAGIC; - next->coder->pos = 0; - next->coder->crc32 = 0; - next->coder->options = options; - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init( - stream_header_decoder_init, next, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_header_decoder(lzma_stream *strm, lzma_stream_flags *options) -{ - lzma_next_strm_init(strm, stream_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} - - -////////// -// Tail // -////////// - -static lzma_ret -stream_tail_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FOOTER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - ++*in_pos; - coder->sequence = SEQ_FOOTER_MAGIC; - break; - - case SEQ_FOOTER_MAGIC: - if (in[*in_pos] != lzma_footer_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_footer_magic)) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) { - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_tail_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_FOOTER_FLAGS; - next->coder->pos = 0; - next->coder->options = options; + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_HEADER_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_equal can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_VALUE_UNKNOWN; return LZMA_OK; } -extern lzma_ret -lzma_stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init2(next, allocator, stream_header_decoder_init, - stream_tail_decoder_init, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_tail_decoder(lzma_stream *strm, lzma_stream_flags *options) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) { - lzma_next_strm_init2(strm, stream_header_decoder_init, - stream_tail_decoder_init, strm->allocator, options); - - strm->internal->supported_actions[LZMA_RUN] = true; + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_HEADER_ERROR; + + // Backward Size + options->backward_size = integer_read_32(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; return LZMA_OK; } diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c index 55468580..4efbb6f4 100644 --- a/src/liblzma/common/stream_flags_encoder.c +++ b/src/liblzma/common/stream_flags_encoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_encoder.c -/// \brief Encodes Stream Header and Footer for .lzma files +/// \brief Encodes Stream Header and Stream Footer for .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -21,55 +21,69 @@ static bool -stream_flags_encode(uint8_t *flags_byte, const lzma_stream_flags *options) +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) { - // Check type if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return true; - *flags_byte = options->check; - - // Usage of CRC32 in Block Headers - if (options->has_crc32) - *flags_byte |= 0x08; - - // Single- or Multi-Block - if (options->is_multi) - *flags_byte |= 0x10; + out[0] = 0x00; + out[1] = options->check; return false; } extern LZMA_API lzma_ret -lzma_stream_header_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + // Magic memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); // Stream Flags - if (stream_flags_encode(out + sizeof(lzma_header_magic), options)) - return LZMA_PROG_ERROR;; + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; // CRC32 of the Stream Header - const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), 1, 0); + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); - for (size_t i = 0; i < 4; ++i) - out[sizeof(lzma_header_magic) + 1 + i] = crc >> (i * 8); + integer_write_32(out + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE, crc); return LZMA_OK; } extern LZMA_API lzma_ret -lzma_stream_tail_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + // Backward Size + if (options->backward_size < LZMA_BACKWARD_SIZE_MIN + || options->backward_size > LZMA_BACKWARD_SIZE_MAX + || (options->backward_size & 3)) + return LZMA_PROG_ERROR; + + integer_write_32(out + 4, options->backward_size / 4 - 1); + // Stream Flags - if (stream_flags_encode(out, options)) + if (stream_flags_encode(options, out + 2 * 4)) return LZMA_PROG_ERROR; + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + integer_write_32(out, crc); + // Magic - memcpy(out + 1, lzma_footer_magic, sizeof(lzma_footer_magic)); + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); return LZMA_OK; } diff --git a/src/liblzma/common/easy_single.c b/src/liblzma/common/stream_flags_equal.c index e2fa4e13..db22567f 100644 --- a/src/liblzma/common/easy_single.c +++ b/src/liblzma/common/stream_flags_equal.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_single.c -/// \brief Easy Single-Block Stream encoder initialization +/// \file stream_flags_equal.c +/// \brief Compare Stream Header and Stream Footer // // Copyright (C) 2008 Lasse Collin // @@ -17,21 +17,20 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "easy_common.h" +#include "common.h" -extern LZMA_API lzma_ret -lzma_easy_encoder_single(lzma_stream *strm, lzma_easy_level level) +extern LZMA_API lzma_bool +lzma_stream_flags_equal(const lzma_stream_flags *a, lzma_stream_flags *b) { - lzma_options_stream opt_stream = { - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - }; + if (a->check != b->check) + return false; - if (lzma_easy_set_filters(opt_stream.filters, level)) - return LZMA_HEADER_ERROR; + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_VALUE_UNKNOWN + && b->backward_size != LZMA_VLI_VALUE_UNKNOWN + && a->backward_size != b->backward_size) + return false; - return lzma_stream_encoder_single(strm, &opt_stream); + return true; } diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c index 2b89c1a7..faff6ccb 100644 --- a/src/liblzma/common/vli_decoder.c +++ b/src/liblzma/common/vli_decoder.c @@ -3,7 +3,7 @@ /// \file vli_decoder.c /// \brief Decodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,45 +25,53 @@ lzma_vli_decode(lzma_vli *restrict vli, size_t *restrict vli_pos, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { - if (*vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 - || (*vli >> (7 * *vli_pos)) != 0) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*in_pos >= in_size) - return LZMA_BUF_ERROR; + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; - if (*vli_pos == 0) { - *vli_pos = 1; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *in_pos >= in_size + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; - if (in[*in_pos] <= 0x7F) { - // Single-byte integer - *vli = in[*in_pos]; - ++*in_pos; - return LZMA_STREAM_END; - } - - *vli = in[*in_pos] & 0x7F; - ++*in_pos; - } - - while (*in_pos < in_size) { - // Read in the next byte. + do { + // Read the next byte. *vli |= (lzma_vli)(in[*in_pos] & 0x7F) << (*vli_pos * 7); ++*vli_pos; // Check if this is the last byte of a multibyte integer. - if (in[*in_pos] & 0x80) { - ++*in_pos; - return LZMA_STREAM_END; + if (!(in[*in_pos] & 0x80)) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (in[(*in_pos)++] == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; } - // Limit variable-length representation to nine bytes. - if (*vli_pos == 9) + ++*in_pos; + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) return LZMA_DATA_ERROR; - // Increment input position only when the byte was accepted. - ++*in_pos; - } + } while (*in_pos < in_size); - return LZMA_OK; + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; } diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c index 1ecdb0d2..c48d6474 100644 --- a/src/liblzma/common/vli_encoder.c +++ b/src/liblzma/common/vli_encoder.c @@ -3,7 +3,7 @@ /// \file vli_encoder.c /// \brief Encodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -21,61 +21,54 @@ extern LZMA_API lzma_ret -lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, size_t vli_size, +lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { - if (vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 || vli_size > 9 - || (vli != 0 && (vli >> (7 * *vli_pos)) == 0)) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*out_pos >= out_size) - return LZMA_BUF_ERROR; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *out_pos >= out_size + || vli > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; - if (*vli_pos == 0) { - *vli_pos = 1; + // Write the non-last bytes in a loop. + while ((vli >> (*vli_pos * 7)) >= 0x80) { + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)) | 0x80; - if (vli <= 0x7F && *vli_pos >= vli_size) { - // Single-byte integer - out[(*out_pos)++] = vli; - return LZMA_STREAM_END; - } + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); - // First byte of a multibyte integer - out[(*out_pos)++] = (vli & 0x7F) | 0x80; + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; } - while (*out_pos < out_size) { - const lzma_vli b = vli >> (7 * *vli_pos); - ++*vli_pos; - - if (b <= 0x7F && *vli_pos >= vli_size) { - // Last byte of a multibyte integer - out[(*out_pos)++] = (b & 0xFF) | 0x80; - return LZMA_STREAM_END; - } + // Write the last byte. + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)); + ++*out_pos; + ++*vli_pos; - // Middle byte of a multibyte integer - out[(*out_pos)++] = b & 0x7F; - } + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; - // vli is not yet completely written out. - return LZMA_OK; } -extern LZMA_API size_t +extern LZMA_API uint32_t lzma_vli_size(lzma_vli vli) { if (vli > LZMA_VLI_VALUE_MAX) return 0; - size_t i = 0; + uint32_t i = 0; do { vli >>= 7; ++i; } while (vli != 0); - assert(i <= 9); + assert(i <= LZMA_VLI_BYTES_MAX); return i; } diff --git a/src/liblzma/common/vli_reverse_decoder.c b/src/liblzma/common/vli_reverse_decoder.c deleted file mode 100644 index 68ca6a42..00000000 --- a/src/liblzma/common/vli_reverse_decoder.c +++ /dev/null @@ -1,55 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file vli_reverse_decoder.c -/// \brief Decodes variable-length integers starting at end of the buffer -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API lzma_ret -lzma_vli_reverse_decode(lzma_vli *vli, const uint8_t *in, size_t *in_size) -{ - if (*in_size == 0) - return LZMA_BUF_ERROR; - - size_t i = *in_size - 1; - *vli = in[i] & 0x7F; - - if (!(in[i] & 0x80)) { - *in_size = i; - return LZMA_OK; - } - - const size_t end = *in_size > LZMA_VLI_BYTES_MAX - ? *in_size - LZMA_VLI_BYTES_MAX : 0; - - do { - if (i-- == end) { - if (*in_size < LZMA_VLI_BYTES_MAX) - return LZMA_BUF_ERROR; - - return LZMA_DATA_ERROR; - } - - *vli <<= 7; - *vli = in[i] & 0x7F; - - } while (!(in[i] & 0x80)); - - *in_size = i; - return LZMA_OK; -} |