diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-06-18 18:02:10 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-06-18 18:02:10 +0300 |
commit | 7d17818cec8597f847b0a2537fde991bbc3d9e96 (patch) | |
tree | 9c41502e3eb96f103fe98e13456b382fbba7a292 /src/liblzma/common | |
parent | Update the file format specification draft. The new one is (diff) | |
download | xz-7d17818cec8597f847b0a2537fde991bbc3d9e96.tar.xz |
Update the code to mostly match the new simpler file format
specification. Simplify things by removing most of the
support for known uncompressed size in most places.
There are some miscellaneous changes here and there too.
The API of liblzma has got many changes and still some
more will be done soon. While most of the code has been
updated, some things are not fixed (the command line tool
will choke with invalid filter chain, if nothing else).
Subblock filter is somewhat broken for now. It will be
updated once the encoded format of the Subblock filter
has been decided.
Diffstat (limited to '')
54 files changed, 2944 insertions, 5315 deletions
diff --git a/src/liblzma/common/Makefile.am b/src/liblzma/common/Makefile.am index c76ce14f..40b42250 100644 --- a/src/liblzma/common/Makefile.am +++ b/src/liblzma/common/Makefile.am @@ -25,26 +25,20 @@ libcommon_la_SOURCES = \ common.h \ bsr.h \ allocator.c \ + block_util.c \ block_private.h \ - extra.c \ features.c \ index.c \ - info.c \ init.c \ memory_limiter.c \ memory_usage.c \ next_coder.c \ raw_common.c \ raw_common.h \ + stream_flags_equal.c \ code.c \ version.c -if COND_FILTER_COPY -libcommon_la_SOURCES += \ - copy_coder.c \ - copy_coder.h -endif - if COND_FILTER_DELTA libcommon_la_SOURCES += \ delta_common.c \ @@ -69,21 +63,17 @@ libcommon_la_SOURCES += \ block_encoder.c \ block_encoder.h \ block_header_encoder.c \ - easy_common.c \ - easy_common.h \ - easy_single.c \ - easy_multi.c \ + easy.c \ filter_flags_encoder.c \ + index_encoder.c \ + index_encoder.h \ init_encoder.c \ - metadata_encoder.c \ - metadata_encoder.h \ raw_encoder.c \ raw_encoder.h \ stream_common.c \ stream_common.h \ - stream_encoder_single.c \ - stream_encoder_multi.c \ - stream_encoder_multi.h \ + stream_encoder.c \ + stream_encoder.h \ stream_flags_encoder.c \ vli_encoder.c endif @@ -96,14 +86,13 @@ libcommon_la_SOURCES += \ block_decoder.h \ block_header_decoder.c \ filter_flags_decoder.c \ + index_decoder.c \ + index_hash.c \ init_decoder.c \ - metadata_decoder.c \ - metadata_decoder.h \ raw_decoder.c \ raw_decoder.h \ stream_decoder.c \ stream_flags_decoder.c \ stream_flags_decoder.h \ - vli_decoder.c \ - vli_reverse_decoder.c + vli_decoder.c endif diff --git a/src/liblzma/common/alignment.c b/src/liblzma/common/alignment.c index 2d468fe5..c80e5fab 100644 --- a/src/liblzma/common/alignment.c +++ b/src/liblzma/common/alignment.c @@ -25,7 +25,6 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) { for (size_t i = 0; filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // The same as the input, check the next filter. continue; @@ -69,9 +68,8 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) extern LZMA_API uint32_t lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) { - // Check if there is only an implicit Copy filter. if (filters[0].id == LZMA_VLI_VALUE_UNKNOWN) - return guess; + return UINT32_MAX; // Find the last filter in the chain. size_t i = 0; @@ -80,7 +78,6 @@ lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) do { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // It's the same as the input alignment, so // check the next filter. diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c index 91df5bf2..062f6fab 100644 --- a/src/liblzma/common/alone_decoder.c +++ b/src/liblzma/common/alone_decoder.c @@ -32,9 +32,15 @@ struct lzma_coder_s { SEQ_CODE, } sequence; + /// Position in the header fields size_t pos; - lzma_options_alone options; + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; }; @@ -50,34 +56,39 @@ alone_decode(lzma_coder *coder, && (coder->sequence == SEQ_CODE || *in_pos < in_size)) switch (coder->sequence) { case SEQ_PROPERTIES: - if (lzma_lzma_decode_properties( - &coder->options.lzma, in[*in_pos])) - return LZMA_DATA_ERROR; + if (lzma_lzma_decode_properties(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; coder->sequence = SEQ_DICTIONARY_SIZE; ++*in_pos; break; case SEQ_DICTIONARY_SIZE: - coder->options.lzma.dictionary_size + coder->options.dictionary_size |= (size_t)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 4) { - // A hack to ditch tons of false positives: We allow - // only dictionary sizes that are a power of two. - // LZMA_Alone didn't create other kinds of files, - // although it's not impossible that files with - // other dictionary sizes exist. Well, if someone - // complains, this will be reconsidered. - size_t count = 0; - for (size_t i = 0; i < 32; ++i) - if (coder->options.lzma.dictionary_size - & (UINT32_C(1) << i)) - ++count; - - if (count != 1 || coder->options.lzma.dictionary_size + if (coder->options.dictionary_size + < LZMA_DICTIONARY_SIZE_MIN + || coder->options.dictionary_size > LZMA_DICTIONARY_SIZE_MAX) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; + + // A hack to ditch tons of false positives: We allow + // only dictionary sizes that are 2^n or 2^n + 2^(n-1). + // LZMA_Alone created only files with 2^n, but accepts + // any dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dictionary_size) + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_UNCOMPRESSED_SIZE; @@ -87,7 +98,7 @@ alone_decode(lzma_coder *coder, break; case SEQ_UNCOMPRESSED_SIZE: - coder->options.uncompressed_size + coder->uncompressed_size |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 8) { @@ -95,11 +106,10 @@ alone_decode(lzma_coder *coder, // if the uncompressed size is known, it must be less // than 256 GiB. Again, if someone complains, this // will be reconsidered. - if (coder->options.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->options.uncompressed_size + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size >= (LZMA_VLI_C(1) << 38)) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_CODER_INIT; @@ -113,9 +123,7 @@ alone_decode(lzma_coder *coder, lzma_filter_info filters[2] = { { .init = &lzma_lzma_decoder_init, - .options = &coder->options.lzma, - .uncompressed_size = coder->options - .uncompressed_size, + .options = &coder->options, }, { .init = NULL, } @@ -126,6 +134,10 @@ alone_decode(lzma_coder *coder, if (ret != LZMA_OK) return ret; + // Use a hack to set the uncompressed size. + lzma_lzma_decoder_uncompressed_size(&coder->next, + coder->uncompressed_size); + coder->sequence = SEQ_CODE; } @@ -169,8 +181,8 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) next->coder->sequence = SEQ_PROPERTIES; next->coder->pos = 0; - next->coder->options.lzma.dictionary_size = 0; - next->coder->options.uncompressed_size = 0; + next->coder->options.dictionary_size = 0; + next->coder->uncompressed_size = 0; return LZMA_OK; } @@ -179,17 +191,14 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - // We need to use _init2 because we don't pass any varadic args. - lzma_next_coder_init2(next, allocator, alone_decoder_init, - alone_decoder_init, allocator); + lzma_next_coder_init0(alone_decoder_init, next, allocator); } extern LZMA_API lzma_ret lzma_alone_decoder(lzma_stream *strm) { - lzma_next_strm_init2(strm, alone_decoder_init, - alone_decoder_init, strm->allocator); + lzma_next_strm_init0(strm, alone_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c index 7629aa77..f94a21c1 100644 --- a/src/liblzma/common/alone_encoder.c +++ b/src/liblzma/common/alone_encoder.c @@ -21,19 +21,19 @@ #include "lzma_encoder.h" +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + struct lzma_coder_s { lzma_next_coder next; enum { - SEQ_PROPERTIES, - SEQ_DICTIONARY_SIZE, - SEQ_UNCOMPRESSED_SIZE, + SEQ_HEADER, SEQ_CODE, } sequence; - size_t pos; - - lzma_options_alone options; + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; }; @@ -47,47 +47,23 @@ alone_encode(lzma_coder *coder, { while (*out_pos < out_size) switch (coder->sequence) { - case SEQ_PROPERTIES: - if (lzma_lzma_encode_properties( - &coder->options.lzma, out + *out_pos)) { - return LZMA_PROG_ERROR; - } - - coder->sequence = SEQ_DICTIONARY_SIZE; - ++*out_pos; - break; - - case SEQ_DICTIONARY_SIZE: - out[*out_pos] = coder->options.lzma.dictionary_size - >> (coder->pos * 8); - - if (++coder->pos == 4) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - ++*out_pos; - break; - - case SEQ_UNCOMPRESSED_SIZE: - out[*out_pos] = coder->options.uncompressed_size - >> (coder->pos * 8); - - if (++coder->pos == 8) { - coder->pos = 0; - coder->sequence = SEQ_CODE; - } - - ++*out_pos; + case SEQ_HEADER: + bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; break; - case SEQ_CODE: { + case SEQ_CODE: return coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -107,7 +83,7 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) // At least for now, this is not used by any internal function. static lzma_ret alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_alone *options) + const lzma_options_lzma *options) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -119,23 +95,42 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the LZMA_Alone coder variables. - next->coder->sequence = SEQ_PROPERTIES; - next->coder->pos = 0; - next->coder->options = *options; + // Basic initializations + next->coder->sequence = SEQ_HEADER; + next->coder->header_pos = 0; - // Verify uncompressed_size since the other functions assume that - // it is valid. - if (!lzma_vli_is_valid(next->coder->options.uncompressed_size)) + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_encode_properties(options, next->coder->header)) return LZMA_PROG_ERROR; + // - Dictionary size (4 bytes) + if (options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || options->dictionary_size > LZMA_DICTIONARY_SIZE_MAX) + return LZMA_PROG_ERROR; + + // Round up to to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next. While the header would allow any 32-bit integer, + // we do this to keep the decoder of liblzma accepting the resulting + // files. + uint32_t d = options->dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + integer_write_32(next->coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(next->coder->header + 1 + 4, 0xFF, 8); + // Initialize the LZMA encoder. const lzma_filter_info filters[2] = { { .init = &lzma_lzma_encoder_init, - .options = &next->coder->options.lzma, - .uncompressed_size = next->coder->options - .uncompressed_size, + .options = (void *)(options), }, { .init = NULL, } @@ -156,7 +151,7 @@ lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_alone_encoder(lzma_stream *strm, const lzma_options_alone *options) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) { lzma_next_strm_init(strm, alone_encoder_init, options); diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c index 7e92df9a..765a27b1 100644 --- a/src/liblzma/common/auto_decoder.c +++ b/src/liblzma/common/auto_decoder.c @@ -17,15 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "stream_decoder.h" #include "alone_decoder.h" struct lzma_coder_s { lzma_next_coder next; - - lzma_extra **header; - lzma_extra **footer; bool initialized; }; @@ -43,8 +40,8 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator, lzma_ret ret; if (in[*in_pos] == 0xFF) - ret = lzma_stream_decoder_init(&coder->next, allocator, - coder->header, coder->footer); + ret = lzma_stream_decoder_init( + &coder->next, allocator); else ret = lzma_alone_decoder_init(&coder->next, allocator); @@ -69,8 +66,7 @@ auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -82,8 +78,6 @@ auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - next->coder->header = header; - next->coder->footer = footer; next->coder->initialized = false; return LZMA_OK; @@ -102,9 +96,9 @@ lzma_auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_auto_decoder(lzma_stream *strm, lzma_extra **header, lzma_extra **footer) +lzma_auto_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, auto_decoder_init, header, footer); + lzma_next_strm_init0(strm, auto_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index e1b5dc96..f07c4e06 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -26,129 +26,47 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, - SEQ_END, + SEQ_CHECK, } sequence; /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Decoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the encoding has been finished. lzma_options_block *options; - /// Position in variable-length integers (and in some other places). - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; + /// Position when reading the Check field + size_t check_pos; - /// Temporary location for the Uncompressed Size and Backward Size - /// fields in Block Footer. - lzma_vli tmp; - - /// Size of the Backward Size field - This is needed so that we - /// can verify the Backward Size and still keep updating total_size. - size_t size_of_backward_size; + /// Check of the uncompressed data + lzma_check check; }; static lzma_ret -update_sequence(lzma_coder *coder) -{ - switch (coder->sequence) { - case SEQ_CODE: - if (coder->options->check != LZMA_CHECK_NONE) { - lzma_check_finish(&coder->check, - coder->options->check); - coder->sequence = SEQ_CHECK; - break; - } - - // Fall through - - case SEQ_CHECK: - if (coder->options->has_uncompressed_size_in_footer) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_backward_size) { - coder->sequence = SEQ_BACKWARD_SIZE; - break; - } - - // Fall through - - case SEQ_BACKWARD_SIZE: - if (coder->options->handle_padding) { - coder->sequence = SEQ_PADDING; - break; - } - - case SEQ_PADDING: - if (!is_size_valid(coder->total_size, - coder->options->total_size) - || !is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret block_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - // Special case when the Block has only Block Header. - if (coder->sequence == SEQ_END) - return LZMA_STREAM_END; - - // FIXME: Termination condition should work but could be cleaner. - while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_CODE)) switch (coder->sequence) { case SEQ_CODE: { + if (*out_pos >= out_size) + return LZMA_OK; + const size_t in_start = *in_pos; const size_t out_start = *out_pos; @@ -159,13 +77,13 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, in_used, - coder->total_limit) - || update_size(&coder->compressed_size, - in_used, - coder->options->compressed_size) + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_VALUE_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) || update_size(&coder->uncompressed_size, - out_used, coder->uncompressed_limit)) + out_used, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; lzma_check_update(&coder->check, coder->options->check, @@ -174,116 +92,61 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - return_if_error(update_sequence(coder)); - - break; + coder->sequence = SEQ_PADDING; } - case SEQ_CHECK: - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - if (((coder->check.crc32 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + // Fall through - case LZMA_CHECK_CRC64: - if (((coder->check.crc64 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + case SEQ_PADDING: + // If Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; - case LZMA_CHECK_SHA256: - if (coder->check.sha256.buffer[coder->pos] - != in[*in_pos]) + if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - break; - - default: - assert(coder->options->check != LZMA_CHECK_NONE); - assert(coder->options->check <= LZMA_CHECK_ID_MAX); - break; - } - - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - ++*in_pos; - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - return_if_error(update_sequence(coder)); - coder->pos = 0; + if (update_size(&coder->compressed_size, 1, + coder->compressed_limit)) + return LZMA_DATA_ERROR; } - break; - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - if (update_size(&coder->total_size, *in_pos - in_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->tmp != coder->uncompressed_size) - return LZMA_DATA_ERROR; - - coder->pos = 0; - coder->tmp = 0; - - return_if_error(update_sequence(coder)); - - break; - } - - case SEQ_BACKWARD_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - if (update_size(&coder->total_size, in_used, - coder->total_limit)) + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->size_of_backward_size += in_used; - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->tmp != coder->total_size - - coder->size_of_backward_size) - return LZMA_DATA_ERROR; + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; - return_if_error(update_sequence(coder)); + lzma_check_finish(&coder->check, coder->options->check); + coder->sequence = SEQ_CHECK; - break; - } + // Fall through - case SEQ_PADDING: - if (in[*in_pos] == 0x00) { - if (update_size(&coder->total_size, 1, - coder->total_limit)) + case SEQ_CHECK: + while (*in_pos < in_size) { + if (in[(*in_pos)++] != coder->check.buffer[ + coder->check_pos]) return LZMA_DATA_ERROR; - ++*in_pos; - break; + if (++coder->check_pos == lzma_check_sizes[ + coder->options->check]) + return LZMA_STREAM_END; } - return update_sequence(coder); - - default: - return LZMA_PROG_ERROR; + return LZMA_OK; } - return LZMA_OK; + return LZMA_PROG_ERROR; } @@ -300,9 +163,12 @@ static lzma_ret block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // This is pretty similar to lzma_block_encoder_init(). - // See comments there. + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) + return LZMA_PROG_ERROR; + // Allocate and initialize *next->coder if needed. if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) @@ -313,40 +179,28 @@ block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - if (validate_options_1(options)) - return LZMA_PROG_ERROR; - - if (validate_options_2(options)) - return LZMA_DATA_ERROR; - - return_if_error(lzma_check_init(&next->coder->check, options->check)); - + // Basic initializations next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); - next->coder->tmp = 0; - next->coder->size_of_backward_size = 0; - - if (!options->has_eopm && options->uncompressed_size == 0) { - // The Compressed Data field is empty, thus we skip SEQ_CODE - // phase completely. - const lzma_ret ret = update_sequence(next->coder); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that Total Size of the Block still is a valid VLI and + // a multiple of four. + next->coder->compressed_limit + = options->compressed_size == LZMA_VLI_VALUE_UNKNOWN + ? (LZMA_VLI_VALUE_MAX & ~LZMA_VLI_C(3)) + - options->header_size + - lzma_check_sizes[options->check] + : options->compressed_size; + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); return lzma_raw_decoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 78185790..3add45a9 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -34,37 +34,21 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK_FINISH, - SEQ_CHECK_COPY, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, + SEQ_CHECK, } sequence; - /// Position in .header and .check. - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Position when writing out the Check field + size_t check_pos; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; - - /// Backward Size - This is a copy of total_size right before - /// the Backward Size field. - lzma_vli backward_size; + /// Check of the uncompressed data + lzma_check check; }; @@ -80,16 +64,16 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, if (coder->options->uncompressed_size - coder->uncompressed_size != (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } else { if (coder->options->uncompressed_size - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } } else if (LZMA_VLI_VALUE_MAX - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) { - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } // Main loop @@ -107,11 +91,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, out_used, - coder->total_limit) - || update_size(&coder->compressed_size, - out_used, - coder->options->compressed_size)) + // FIXME We must also check that Total Size doesn't get + // too big. + if (update_size(&coder->compressed_size, out_used, + coder->options->compressed_size)) return LZMA_DATA_ERROR; // No need to check for overflow because we have already @@ -125,141 +108,54 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; assert(*in_pos == in_size); + coder->sequence = SEQ_PADDING; + break; + } + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. + if (coder->compressed_size & 3) { + out[*out_pos] = 0x00; + ++*out_pos; + + if (update_size(&coder->compressed_size, 1, + coder->options->compressed_size)) + return LZMA_DATA_ERROR; + + break; + } // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values give to us. + // values. Verify that they match the values given to us. if (!is_size_valid(coder->compressed_size, coder->options->compressed_size) || !is_size_valid(coder->uncompressed_size, coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->sequence = SEQ_CHECK_FINISH; - break; - } + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - case SEQ_CHECK_FINISH: - if (coder->options->check == LZMA_CHECK_NONE) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); - coder->sequence = SEQ_CHECK_COPY; + coder->sequence = SEQ_CHECK; // Fall through - case SEQ_CHECK_COPY: - assert(lzma_check_sizes[coder->options->check] > 0); - - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - out[*out_pos] = coder->check.crc32 >> (coder->pos * 8); - break; - - case LZMA_CHECK_CRC64: - out[*out_pos] = coder->check.crc64 >> (coder->pos * 8); - break; - - case LZMA_CHECK_SHA256: - out[*out_pos] = coder->check.sha256.buffer[coder->pos]; - break; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - + case SEQ_CHECK: + out[*out_pos] = coder->check.buffer[coder->check_pos]; ++*out_pos; - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_uncompressed_size_in_footer) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->uncompressed_size, - &coder->pos, 1, - out, out_pos, out_size); - - // Updating the size this way instead of doing in a - // single chunk using lzma_vli_size(), because this - // way we detect when exactly we are going out of - // our limits. - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - coder->pos = 0; - } + if (++coder->check_pos + == lzma_check_sizes[coder->options->check]) + return LZMA_STREAM_END; - coder->backward_size = coder->total_size; - coder->sequence = SEQ_BACKWARD_SIZE; break; - case SEQ_BACKWARD_SIZE: - if (coder->options->has_backward_size) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->backward_size, &coder->pos, 1, - out, out_pos, out_size); - - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - } - - coder->sequence = SEQ_PADDING; - break; - - case SEQ_PADDING: - if (coder->options->handle_padding) { - assert(coder->options->total_size - != LZMA_VLI_VALUE_UNKNOWN); - - if (coder->total_size < coder->options->total_size) { - out[*out_pos] = 0x00; - ++*out_pos; - - if (update_size(&coder->total_size, 1, - coder->total_limit)) - return LZMA_DATA_ERROR; - - break; - } - } - - // Now also Total Size is known. Verify it. - if (!is_size_valid(coder->total_size, - coder->options->total_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - default: return LZMA_PROG_ERROR; } @@ -281,10 +177,9 @@ static lzma_ret block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // Validate some options. - if (validate_options_1(options) || validate_options_2(options) - || (options->handle_padding && options->total_size - == LZMA_VLI_VALUE_UNKNOWN)) + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -298,40 +193,19 @@ block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the check. - return_if_error(lzma_check_init(&next->coder->check, options->check)); - - // If End of Payload Marker is not used and Uncompressed Size is zero, - // Compressed Data is empty. That is, we don't call the encoder at all. - // We initialize it though; it allows detecting invalid options. - if (!options->has_eopm && options->uncompressed_size == 0) { - // Also Compressed Size must be zero if it has been - // given to us. - if (!is_size_valid(0, options->compressed_size)) - return LZMA_PROG_ERROR; - - next->coder->sequence = SEQ_CHECK_FINISH; - } else { - next->coder->sequence = SEQ_CODE; - } - - // Other initializations + // Basic initializations + next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); // Initialize the requested filters. return lzma_raw_encoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 7676c795..b9e072e0 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -21,353 +21,111 @@ #include "check.h" -struct lzma_coder_s { - lzma_options_block *options; - - enum { - SEQ_FLAGS_1, - SEQ_FLAGS_2, - SEQ_COMPRESSED_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_FILTER_FLAGS_INIT, - SEQ_FILTER_FLAGS_DECODE, - SEQ_CRC32, - SEQ_PADDING - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// CRC32 of the Block Header - uint32_t crc32; - - lzma_next_coder filter_flags_decoder; -}; - - -static bool -update_sequence(lzma_coder *coder) +static void +free_properties(lzma_options_block *options, lzma_allocator *allocator) { - switch (coder->sequence) { - case SEQ_FLAGS_2: - if (coder->options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_COMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_COMPRESSED_SIZE: - if (coder->options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - coder->pos = 0; - - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: - if (coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_FILTER_FLAGS_INIT; - break; - } - - if (coder->options->has_crc32) { - coder->pos = 0; - coder->sequence = SEQ_CRC32; - break; - } - - case SEQ_CRC32: - if (coder->options->padding != 0) { - coder->pos = 0; - coder->sequence = SEQ_PADDING; - break; - } - - return true; - - default: - assert(0); - return true; + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + lzma_free(options->filters[i].options, allocator); + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - return false; + return; } -static lzma_ret -block_header_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_block_header_decode(lzma_options_block *options, + lzma_allocator *allocator, const uint8_t *in) { - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FLAGS_1: - // Check that the reserved bit is unset. Use HEADER_ERROR - // because newer version of liblzma may support the reserved - // bit, although it is likely that this is just a broken file. - if (in[*in_pos] & 0x40) - return LZMA_HEADER_ERROR; - - // Number of filters: we prepare appropriate amount of - // variables for variable-length integer parsing. The - // initialization function has already reset the rest - // of the values to LZMA_VLI_VALUE_UNKNOWN, which allows - // us to later know how many filters there are. - for (int i = (int)(in[*in_pos] & 0x07) - 1; i >= 0; --i) - coder->options->filters[i].id = 0; - - // End of Payload Marker flag - coder->options->has_eopm = (in[*in_pos] & 0x08) != 0; - - // Compressed Size: Prepare for variable-length integer - // parsing if it is known. - if (in[*in_pos] & 0x10) - coder->options->compressed_size = 0; - - // Uncompressed Size: the same. - if (in[*in_pos] & 0x20) - coder->options->uncompressed_size = 0; - - // Is Metadata Block flag - coder->options->is_metadata = (in[*in_pos] & 0x80) != 0; - - // We need at least one: Uncompressed Size or EOPM. - if (coder->options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - && !coder->options->has_eopm) - return LZMA_DATA_ERROR; - - // Update header CRC32. - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - coder->sequence = SEQ_FLAGS_2; - break; - - case SEQ_FLAGS_2: - // Check that the reserved bits are unset. - if (in[*in_pos] & 0xE0) - return LZMA_DATA_ERROR; - - // Get the size of Header Padding. - coder->options->padding = in[*in_pos] & 0x1F; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - - case SEQ_COMPRESSED_SIZE: { - // Store the old input position to be used when - // updating coder->header_crc32. - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->compressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->compressed_reserve += in_used; - assert(coder->options->compressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->uncompressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->uncompressed_reserve += in_used; - assert(coder->options->uncompressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_FILTER_FLAGS_INIT: { - assert(coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN); - - const lzma_ret ret = lzma_filter_flags_decoder_init( - &coder->filter_flags_decoder, allocator, - &coder->options->filters[coder->pos]); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_FILTER_FLAGS_DECODE; + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or not over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: { - const size_t in_start = *in_pos; + size_t in_size = options->header_size; - const lzma_ret ret = coder->filter_flags_decoder.code( - coder->filter_flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - const size_t in_used = *in_pos - in_start; - coder->options->header_size += in_used; - coder->crc32 = lzma_crc32(in + in_start, - in_used, coder->crc32); + // Validate. The caller must have set options->header_size with + // lzma_block_header_size_decode() macro, so it is a programming error + // if these tests fail. + if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN + || in_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (in_size & 3) + || lzma_block_header_size_decode(in[0]) != in_size) + return LZMA_PROG_ERROR; - if (ret != LZMA_STREAM_END) - return ret; + // Exclude the CRC32 field. + in_size -= 4; - ++coder->pos; + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) + return LZMA_DATA_ERROR; - if (update_sequence(coder)) - return LZMA_STREAM_END; + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_HEADER_ERROR; - break; - } + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; - case SEQ_CRC32: - assert(coder->options->has_crc32); + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&options->compressed_size, + NULL, in, &in_pos, in_size)); - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1) return LZMA_DATA_ERROR; - ++*in_pos; - ++coder->pos; - - // Check if we reached end of the CRC32 field. - if (coder->pos == 4) { - coder->options->header_size += 4; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - } - - break; + options->compressed_size = (options->compressed_size + 1) * 4; - case SEQ_PADDING: - if (in[*in_pos] != 0x00) + // Check that Total Size (that is, size of + // Block Header + Compressed Data + Check) is + // representable as a VLI. + if (lzma_block_total_size_get(options) == 0) return LZMA_DATA_ERROR; - - ++*in_pos; - ++coder->options->header_size; - ++coder->pos; - - if (coder->pos < (size_t)(coder->options->padding)) - break; - - return LZMA_STREAM_END; - - default: - return LZMA_PROG_ERROR; + } else { + options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; } - return LZMA_OK; -} - - -static void -block_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->filter_flags_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &block_header_decode; - next->end = &block_header_decoder_end; - next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&options->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &options->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(options, allocator); + return ret; + } } - // Assume that Compressed Size and Uncompressed Size are unknown. - options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; - options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // We will calculate the sizes of these fields too so that the - // application may rewrite the header if it wishes so. - options->compressed_reserve = 0; - options->uncompressed_reserve = 0; + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(options, allocator); - // The Block Flags field is always present, so include its size here - // and we don't need to worry about it in block_header_decode(). - options->header_size = 2; - - // Reset filters[] to indicate empty list of filters. - // See SEQ_FLAGS_1 in block_header_decode() for reasoning of this. - for (size_t i = 0; i < 8; ++i) { - options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; - options->filters[i].options = NULL; + // Possibly some new field present so use + // LZMA_HEADER_ERROR instead of LZMA_DATA_ERROR. + return LZMA_HEADER_ERROR; + } } - next->coder->options = options; - next->coder->sequence = SEQ_FLAGS_1; - next->coder->pos = 0; - next->coder->crc32 = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_block_header_decoder(lzma_stream *strm, - lzma_options_block *options) -{ - lzma_next_strm_init(strm, lzma_block_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - return LZMA_OK; } diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 594b4fc0..ed0c88ba 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -24,188 +24,129 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_options_block *options) { - // Block Flags take two bytes. - size_t size = 2; + // Block Header Size + Block Flags + CRC32. + size_t size = 1 + 1 + 4; // Compressed Size - if (!lzma_vli_is_valid(options->compressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->compressed_reserve != 0) { - // Make sure that the known Compressed Size fits into the - // reserved space. Note that lzma_vli_size() will return zero - // if options->compressed_size is LZMA_VLI_VALUE_UNKNOWN, so - // we don't need to handle that special case separately. - if (options->compressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->compressed_size) - > (size_t)(options->compressed_reserve)) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1 + || options->compressed_size == 0 + || (options->compressed_size & 3)) return LZMA_PROG_ERROR; - size += options->compressed_reserve; - - } else if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Compressed Size is known. We have already checked - // that is is a valid VLI, and since it isn't - // LZMA_VLI_VALUE_UNKNOWN, we can be sure that - // lzma_vli_size() will succeed. - size += lzma_vli_size(options->compressed_size); + size += lzma_vli_size(options->compressed_size / 4 - 1); } // Uncompressed Size - if (!lzma_vli_is_valid(options->uncompressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->uncompressed_reserve != 0) { - if (options->uncompressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->uncompressed_size) - > (size_t)(options->uncompressed_reserve)) + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + const size_t add = lzma_vli_size(options->uncompressed_size); + if (add == 0) return LZMA_PROG_ERROR; - size += options->uncompressed_reserve; - - } else if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - size += lzma_vli_size(options->uncompressed_size); + size += add; } // List of Filter Flags + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_PROG_ERROR; + for (size_t i = 0; options->filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 7) + if (i == 4) return LZMA_PROG_ERROR; - uint32_t tmp; - const lzma_ret ret = lzma_filter_flags_size(&tmp, - options->filters + i); - if (ret != LZMA_OK) - return ret; + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + options->filters + i)); - size += tmp; + size += add; } - // CRC32 - if (options->has_crc32) - size += 4; - - // Padding - int32_t padding; - if (options->padding == LZMA_BLOCK_HEADER_PADDING_AUTO) { - const uint32_t preferred = lzma_alignment_output( - options->filters, 1); - const uint32_t unaligned = size + options->alignment; - padding = (int32_t)(unaligned % preferred); - if (padding != 0) - padding = preferred - padding; - } else if (options->padding >= LZMA_BLOCK_HEADER_PADDING_MIN - && options->padding <= LZMA_BLOCK_HEADER_PADDING_MAX) { - padding = options->padding; - } else { - return LZMA_PROG_ERROR; - } + // Pad to a multiple of four bytes. + options->header_size = (size + 3) & ~(size_t)(3); - // All success. Copy the calculated values to the options structure. - options->padding = padding; - options->header_size = size + (size_t)(padding); + // NOTE: We don't verify that Total Size of the Block stays within + // limits. This is because it is possible that we are called with + // exaggerated values to reserve space for Block Header, and later + // called again with lower, real values. return LZMA_OK; } extern LZMA_API lzma_ret -lzma_block_header_encode(uint8_t *out, const lzma_options_block *options) +lzma_block_header_encode(const lzma_options_block *options, uint8_t *out) { - // We write the Block Flags later. - if (options->header_size < 2) + if ((options->header_size & 3) + || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) return LZMA_PROG_ERROR; - const size_t out_size = options->header_size; + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = options->header_size - 4; + + // Store the Block Header Size. + out[0] = out_size / 4; + + // We write Block Flags a little later. size_t out_pos = 2; // Compressed Size - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) { - const lzma_vli size = options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->compressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->compressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Compressed Size must be non-zero, fit into a 63-bit + // integer and be a multiple of four. Also the Total Size + // of the Block must fit into 63-bit integer. + if (options->compressed_size == 0 + || (options->compressed_size & 3) + || options->compressed_size + > LZMA_VLI_VALUE_MAX + || lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; + return_if_error(lzma_vli_encode( + options->compressed_size / 4 - 1, NULL, + out, &out_pos, out_size)); } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) { - const lzma_vli size = options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->uncompressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->uncompressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + return_if_error(lzma_vli_encode( + options->uncompressed_size, NULL, + out, &out_pos, out_size)); // Filter Flags - size_t filter_count; - for (filter_count = 0; options->filters[filter_count].id - != LZMA_VLI_VALUE_UNKNOWN; ++filter_count) { - // There can be at maximum of seven filters. - if (filter_count == 7) - return LZMA_PROG_ERROR; - - const lzma_ret ret = lzma_filter_flags_encode(out, &out_pos, - out_size, options->filters + filter_count); - // FIXME: Don't return LZMA_BUF_ERROR. - if (ret != LZMA_OK) - return ret; - } - - // Block Flags 1 - out[0] = filter_count; - - if (options->has_eopm) - out[0] |= 0x08; - else if (options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) return LZMA_PROG_ERROR; - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) - out[0] |= 0x10; - - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) - out[0] |= 0x20; + size_t filter_count = 0; + do { + // There can be at maximum of four filters. + if (filter_count == 4) + return LZMA_PROG_ERROR; - if (options->is_metadata) - out[0] |= 0x80; + return_if_error(lzma_filter_flags_encode(out, &out_pos, + out_size, options->filters + filter_count)); - // Block Flags 2 - if (options->padding < LZMA_BLOCK_HEADER_PADDING_MIN - || options->padding > LZMA_BLOCK_HEADER_PADDING_MAX) - return LZMA_PROG_ERROR; + } while (options->filters[++filter_count].id + != LZMA_VLI_VALUE_UNKNOWN); - out[1] = (uint8_t)(options->padding); + // Block Flags + out[1] = filter_count - 1; - // CRC32 - if (options->has_crc32) { - if (out_size - out_pos < 4) - return LZMA_PROG_ERROR; + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x40; - const uint32_t crc = lzma_crc32(out, out_pos, 0); - for (size_t i = 0; i < 4; ++i) - out[out_pos++] = crc >> (i * 8); - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x80; - // Padding - the amount of available space must now match with - // the size of the Padding field. - if (out_size - out_pos != (size_t)(options->padding)) - return LZMA_PROG_ERROR; + // Padding + memzero(out + out_pos, out_size - out_pos); - memzero(out + out_pos, (size_t)(options->padding)); + // CRC32 + integer_write_32(out + out_size, lzma_crc32(out, out_size, 0)); return LZMA_OK; } diff --git a/src/liblzma/common/block_private.h b/src/liblzma/common/block_private.h index 16d95b9f..235e96b8 100644 --- a/src/liblzma/common/block_private.h +++ b/src/liblzma/common/block_private.h @@ -22,6 +22,7 @@ #include "common.h" + static inline bool update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) { @@ -43,54 +44,4 @@ is_size_valid(lzma_vli size, lzma_vli reference) return reference == LZMA_VLI_VALUE_UNKNOWN || reference == size; } - -/// If any of these tests fail, the caller has to return LZMA_PROG_ERROR. -static inline bool -validate_options_1(const lzma_options_block *options) -{ - return options == NULL - || !lzma_vli_is_valid(options->compressed_size) - || !lzma_vli_is_valid(options->uncompressed_size) - || !lzma_vli_is_valid(options->total_size) - || !lzma_vli_is_valid(options->total_limit) - || !lzma_vli_is_valid(options->uncompressed_limit); -} - - -/// If any of these tests fail, the encoder has to return LZMA_PROG_ERROR -/// because something is going horribly wrong if such values get passed -/// to the encoder. In contrast, the decoder has to return LZMA_DATA_ERROR, -/// since these tests failing indicate that something is wrong in the Stream. -static inline bool -validate_options_2(const lzma_options_block *options) -{ - if ((options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - && options->uncompressed_size - > options->uncompressed_limit) - || (options->total_size != LZMA_VLI_VALUE_UNKNOWN - && options->total_size - > options->total_limit) - || (!options->has_eopm && options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - || options->header_size > options->total_size) - return true; - - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Calculate a rough minimum possible valid Total Size of - // this Block, and check that total_size and total_limit - // are big enough. Note that the real minimum size can be - // bigger due to the Check, Uncompressed Size, Backwards - // Size, pr Padding being present. A rough check here is - // enough for us to catch the most obvious errors as early - // as possible. - const lzma_vli total_min = options->compressed_size - + (lzma_vli)(options->header_size); - if (total_min > options->total_size - || total_min > options->total_limit) - return true; - } - - return false; -} - #endif diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c new file mode 100644 index 00000000..6bffc2f1 --- /dev/null +++ b/src/liblzma/common/block_util.c @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_options_block +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_block_total_size_set(lzma_options_block *options, lzma_vli total_size) +{ + // Validate. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX + || (total_size & 3)) + return LZMA_PROG_ERROR; + + const uint32_t container_size = options->header_size + + lzma_check_sizes[options->check]; + + // Validate that Compressed Size will be greater than zero. + if (container_size <= total_size) + return LZMA_DATA_ERROR; + + options->compressed_size = total_size - container_size; + + return LZMA_OK; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size_get(const lzma_options_block *options) +{ + // Validate the values that we are interested in. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // Total Size either. + if (options->compressed_size == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_VLI_VALUE_UNKNOWN; + + const lzma_vli total_size = options->compressed_size + + options->header_size + + lzma_check_sizes[options->check]; + + // Validate the calculated Total Size. + if (options->compressed_size > LZMA_VLI_VALUE_MAX + || (options->compressed_size & 3) + || total_size > LZMA_VLI_VALUE_MAX) + return 0; + + return total_size; +} diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 5dd7a87f..4f30427d 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -21,6 +21,7 @@ #define LZMA_COMMON_H #include "../../common/sysdefs.h" +#include "../../common/integer.h" // Don't use ifdef... #if HAVE_VISIBILITY @@ -30,6 +31,17 @@ #endif +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + /// Size of temporary buffers needed in some filters #define LZMA_BUFFER_SIZE 4096 @@ -117,10 +129,6 @@ struct lzma_filter_info_s { /// Pointer to filter's options structure void *options; - - /// Uncompressed size of the filter, or LZMA_VLI_VALUE_UNKNOWN - /// if unknown. - lzma_vli uncompressed_size; }; @@ -158,20 +166,6 @@ extern void lzma_next_coder_end(lzma_next_coder *next, lzma_allocator *allocator); -extern lzma_ret lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options); - -extern lzma_ret lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options); - -extern lzma_ret lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); - -extern lzma_ret lzma_stream_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer); - - /// \brief Wrapper for memcpy() /// /// This function copies as much data as possible from in[] to out[] and @@ -225,6 +219,13 @@ do { \ lzma_next_coder_init2(next, allocator, \ func, func, allocator, __VA_ARGS__) +/// \brief Initializing lzma_next_coder +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_coder_init0(func, next, allocator) \ + lzma_next_coder_init2(next, allocator, func, func, allocator) + /// \brief Initializing lzma_stream /// @@ -254,6 +255,13 @@ do { \ #define lzma_next_strm_init(strm, func, ...) \ lzma_next_strm_init2(strm, func, func, (strm)->allocator, __VA_ARGS__) +/// \brief Initializing lzma_stream +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_strm_init0(strm, func) \ + lzma_next_strm_init2(strm, func, func, (strm)->allocator) + /// \brief Return if expression doesn't evaluate to LZMA_OK /// diff --git a/src/liblzma/common/copy_coder.c b/src/liblzma/common/copy_coder.c deleted file mode 100644 index 0bd674f6..00000000 --- a/src/liblzma/common/copy_coder.c +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.c -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "copy_coder.h" - - -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - -#ifdef HAVE_ENCODER -static lzma_ret -copy_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // If we aren't the last filter in the chain, the Copy filter - // is totally useless. Note that it is job of the next coder to - // take care of Uncompressed Size, so we don't need to update our - // coder->uncompressed_size at all. - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - // We are the last coder in the chain. - // Just copy as much data as possible. - bufcpy(in, in_pos, in_size, out, out_pos, out_size); - - // LZMA_SYNC_FLUSH and LZMA_FINISH are the same thing for us. - if (action != LZMA_RUN && *in_pos == in_size) - return LZMA_STREAM_END; - - return LZMA_OK; -} -#endif - - -#ifdef HAVE_DECODER -static lzma_ret -copy_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - - const size_t in_avail = in_size - *in_pos; - - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_avail) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - // We are the last coder in the chain. - // Just copy as much data as possible. - const size_t in_used = bufcpy( - in, in_pos, in_size, out, out_pos, out_size); - - // Update uncompressed_size if it is known. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= in_used; - - return coder->uncompressed_size == 0 ? LZMA_STREAM_END : LZMA_OK; -} -#endif - - -static void -copy_coder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -copy_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters, lzma_code_function encode) -{ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = encode; - next->end = ©_coder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - // Copy Uncompressed Size which is used to limit the output size. - next->coder->uncompressed_size = filters[0].uncompressed_size; - - // Initialize the next decoder in the chain, if any. - return lzma_next_filter_init( - &next->coder->next, allocator, filters + 1); -} - - -#ifdef HAVE_ENCODER -extern lzma_ret -lzma_copy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_encode); -} -#endif - - -#ifdef HAVE_DECODER -extern lzma_ret -lzma_copy_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_decode); -} -#endif diff --git a/src/liblzma/common/copy_coder.h b/src/liblzma/common/copy_coder.h deleted file mode 100644 index b8d0295d..00000000 --- a/src/liblzma/common/copy_coder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.h -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_COPY_CODER_H -#define LZMA_COPY_CODER_H - -#include "common.h" - -extern lzma_ret lzma_copy_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -extern lzma_ret lzma_copy_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -#endif diff --git a/src/liblzma/common/delta_common.c b/src/liblzma/common/delta_common.c index de27b5a6..acd31e14 100644 --- a/src/liblzma/common/delta_common.c +++ b/src/liblzma/common/delta_common.c @@ -47,10 +47,6 @@ lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // Coding function is different for encoder and decoder. next->code = code; - // Copy Uncompressed Size which is used to limit the output size - // in the Delta decoder. - next->coder->uncompressed_size = filters[0].uncompressed_size; - // Set the delta distance. if (filters[0].options == NULL) return LZMA_PROG_ERROR; diff --git a/src/liblzma/common/delta_common.h b/src/liblzma/common/delta_common.h index 3ec955b7..1d58899d 100644 --- a/src/liblzma/common/delta_common.h +++ b/src/liblzma/common/delta_common.h @@ -26,10 +26,6 @@ struct lzma_coder_s { /// Next coder in the chain lzma_next_coder next; - /// Uncompressed size - This is needed when we are the last - /// filter in the chain. - lzma_vli uncompressed_size; - /// Delta distance size_t distance; diff --git a/src/liblzma/common/delta_decoder.c b/src/liblzma/common/delta_decoder.c index af2b840d..8f5a4cbf 100644 --- a/src/liblzma/common/delta_decoder.c +++ b/src/liblzma/common/delta_decoder.c @@ -21,26 +21,8 @@ #include "delta_common.h" -/// Copies and decodes the data at the same time. This is used when Delta -/// is the last filter in the chain. static void -copy_and_decode(lzma_coder *coder, - const uint8_t *restrict in, uint8_t *restrict out, size_t size) -{ - const size_t distance = coder->distance; - - for (size_t i = 0; i < size; ++i) { - out[i] = in[i] + coder->history[ - (distance + coder->pos) & 0xFF]; - coder->history[coder->pos-- & 0xFF] = out[i]; - } -} - - -/// Decodes the data in place. This is used when we are not the last filter -/// in the chain. -static void -decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) { const size_t distance = coder->distance; @@ -51,44 +33,21 @@ decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) } - static lzma_ret delta_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - lzma_ret ret; - - if (coder->next.code == NULL) { - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_size - *in_pos) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - const size_t in_avail = in_size - *in_pos; - const size_t out_avail = out_size - *out_pos; - const size_t size = MIN(in_avail, out_avail); - - copy_and_decode(coder, in + *in_pos, out + *out_pos, size); + assert(coder->next.code != NULL); - *in_pos += size; - *out_pos += size; + const size_t out_start = *out_pos; - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - coder->uncompressed_size -= size; + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); - ret = coder->uncompressed_size == 0 - ? LZMA_STREAM_END : LZMA_OK; - - } else { - const size_t out_start = *out_pos; - - ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - decode_in_place(coder, out + out_start, *out_pos - out_start); - } + decode_buffer(coder, out + out_start, *out_pos - out_start); return ret; } diff --git a/src/liblzma/common/delta_encoder.c b/src/liblzma/common/delta_encoder.c index b94f92de..a8bb9341 100644 --- a/src/liblzma/common/delta_encoder.c +++ b/src/liblzma/common/delta_encoder.c @@ -22,7 +22,8 @@ /// Copies and encodes the data at the same time. This is used when Delta -/// is the last filter in the chain. +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). static void copy_and_encode(lzma_coder *coder, const uint8_t *restrict in, uint8_t *restrict out, size_t size) @@ -38,8 +39,8 @@ copy_and_encode(lzma_coder *coder, } -/// Encodes the data in place. This is used when we are not the last filter -/// in the chain. +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). static void encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) { diff --git a/src/liblzma/common/easy_multi.c b/src/liblzma/common/easy.c index 15778fab..6c258204 100644 --- a/src/liblzma/common/easy_multi.c +++ b/src/liblzma/common/easy.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_multi.c -/// \brief Easy Multi-Block Stream encoder initialization +/// \file easy.c +/// \brief Easy Stream encoder initialization // // Copyright (C) 2008 Lasse Collin // @@ -17,23 +17,50 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "easy_common.h" -#include "stream_encoder_multi.h" +#include "stream_encoder.h" struct lzma_coder_s { - lzma_next_coder encoder; - lzma_options_stream options; + lzma_next_coder stream_encoder; + + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_options_filter filters[5]; }; +static bool +easy_set_filters(lzma_options_filter *filters, uint32_t level) +{ + bool error = false; + + if (level == 0) { + // TODO FIXME Use Subblock or LZMA2 with no compression. + error = true; + +#ifdef HAVE_FILTER_LZMA + } else if (level <= 9) { + filters[0].id = LZMA_FILTER_LZMA; + filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); + filters[1].id = LZMA_VLI_VALUE_UNKNOWN; +#endif + + } else { + error = true; + } + + return error; +} + + static lzma_ret easy_encode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - return coder->encoder.code(coder->encoder.coder, allocator, + return coder->stream_encoder.code( + coder->stream_encoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); } @@ -41,7 +68,7 @@ easy_encode(lzma_coder *coder, lzma_allocator *allocator, static void easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) { - lzma_next_coder_end(&coder->encoder, allocator); + lzma_next_coder_end(&coder->stream_encoder, allocator); lzma_free(coder, allocator); return; } @@ -49,8 +76,7 @@ easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) + lzma_easy_level level) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -60,39 +86,21 @@ easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->code = &easy_encode; next->end = &easy_encoder_end; - next->coder->encoder = LZMA_NEXT_CODER_INIT; + next->coder->stream_encoder = LZMA_NEXT_CODER_INIT; } - next->coder->options = (lzma_options_stream){ - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - .header = header, - .footer = footer, - }; - - if (lzma_easy_set_filters(next->coder->options.filters, level) - || lzma_easy_set_filters( - next->coder->options.metadata_filters, - metadata_level)) + if (easy_set_filters(next->coder->filters, level)) return LZMA_HEADER_ERROR; - return lzma_stream_encoder_multi_init(&next->coder->encoder, - allocator, &next->coder->options); + return lzma_stream_encoder_init(&next->coder->stream_encoder, + allocator, next->coder->filters, LZMA_CHECK_CRC32); } extern LZMA_API lzma_ret -lzma_easy_encoder_multi(lzma_stream *strm, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) +lzma_easy_encoder(lzma_stream *strm, lzma_easy_level level) { - // This is more complicated than lzma_easy_encoder_single(), - // because lzma_stream_encoder_multi() wants that the options - // structure is available until the encoding is finished. - lzma_next_strm_init(strm, easy_encoder_init, - level, metadata_level, header, footer); + lzma_next_strm_init(strm, easy_encoder_init, level); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; @@ -101,3 +109,14 @@ lzma_easy_encoder_multi(lzma_stream *strm, return LZMA_OK; } + + +extern LZMA_API uint32_t +lzma_easy_memory_usage(lzma_easy_level level) +{ + lzma_options_filter filters[5]; + if (easy_set_filters(filters, level)) + return UINT32_MAX; + + return lzma_memory_usage(filters, true); +} diff --git a/src/liblzma/common/easy_common.c b/src/liblzma/common/easy_common.c deleted file mode 100644 index e0c12a52..00000000 --- a/src/liblzma/common/easy_common.c +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "easy_common.h" - - -extern bool -lzma_easy_set_filters(lzma_options_filter *filters, uint32_t level) -{ - bool error = false; - - if (level == 0) { - filters[0].id = LZMA_VLI_VALUE_UNKNOWN; - -#ifdef HAVE_FILTER_LZMA - } else if (level <= 9) { - filters[0].id = LZMA_FILTER_LZMA; - filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); - filters[1].id = LZMA_VLI_VALUE_UNKNOWN; -#endif - - } else { - error = true; - } - - return error; -} - - -extern LZMA_API uint32_t -lzma_easy_memory_usage(lzma_easy_level level) -{ - lzma_options_filter filters[8]; - if (lzma_easy_set_filters(filters, level)) - return UINT32_MAX; - - return lzma_memory_usage(filters, true); -} diff --git a/src/liblzma/common/extra.c b/src/liblzma/common/extra.c deleted file mode 100644 index c532abb0..00000000 --- a/src/liblzma/common/extra.c +++ /dev/null @@ -1,34 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file extra.c -/// \brief Handling of Extra in Metadata -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API void -lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator) -{ - while (extra != NULL) { - lzma_extra *tmp = extra->next; - lzma_free(extra->data, allocator); - lzma_free(extra, allocator); - extra = tmp; - } - - return; -} diff --git a/src/liblzma/common/features.c b/src/liblzma/common/features.c index 33b2e0a2..a02949d9 100644 --- a/src/liblzma/common/features.c +++ b/src/liblzma/common/features.c @@ -21,10 +21,6 @@ static const lzma_vli filters[] = { -#ifdef HAVE_FILTER_COPY - LZMA_FILTER_COPY, -#endif - #ifdef HAVE_FILTER_SUBBLOCK LZMA_FILTER_SUBBLOCK, #endif diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c index 515f9346..498b2ad6 100644 --- a/src/liblzma/common/filter_flags_decoder.c +++ b/src/liblzma/common/filter_flags_decoder.c @@ -21,362 +21,188 @@ #include "lzma_decoder.h" -struct lzma_coder_s { - lzma_options_filter *options; - - enum { - SEQ_MISC, - SEQ_ID, - SEQ_SIZE, - SEQ_PROPERTIES, - } sequence; - - /// \brief Position in variable-length integers - size_t pos; - - /// \brief Size of Filter Properties - lzma_vli properties_size; -}; - - #ifdef HAVE_FILTER_SUBBLOCK static lzma_ret -properties_subblock(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in lzma_attribute((unused)), - size_t *in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused))) +properties_subblock(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props lzma_attribute((unused)), + size_t prop_size lzma_attribute((unused))) { - if (coder->properties_size != 0) + if (prop_size != 0) return LZMA_HEADER_ERROR; - coder->options->options = lzma_alloc( + options->options = lzma_alloc( sizeof(lzma_options_subblock), allocator); - if (coder->options->options == NULL) + if (options->options == NULL) return LZMA_MEM_ERROR; - ((lzma_options_subblock *)(coder->options->options)) - ->allow_subfilters = true; - return LZMA_STREAM_END; + ((lzma_options_subblock *)(options->options))->allow_subfilters = true; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_SIMPLE static lzma_ret -properties_simple(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_simple(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size == 0) - return LZMA_STREAM_END; + if (prop_size == 0) + return LZMA_OK; - if (coder->properties_size != 4) + if (prop_size != 4) return LZMA_HEADER_ERROR; - lzma_options_simple *options = coder->options->options; - - if (options == NULL) { - options = lzma_alloc(sizeof(lzma_options_simple), allocator); - if (options == NULL) - return LZMA_MEM_ERROR; - - options->start_offset = 0; - coder->options->options = options; - } - - while (coder->pos < 4) { - if (*in_pos == in_size) - return LZMA_OK; + lzma_options_simple *simple = lzma_alloc( + sizeof(lzma_options_simple), allocator); + if (simple == NULL) + return LZMA_MEM_ERROR; - options->start_offset - |= (uint32_t)(in[*in_pos]) << (8 * coder->pos); - ++*in_pos; - ++coder->pos; - } + simple->start_offset = integer_read_32(props); // Don't leave an options structure allocated if start_offset is zero. - if (options->start_offset == 0) { - lzma_free(options, allocator); - coder->options->options = NULL; - } + if (simple->start_offset == 0) + lzma_free(simple, allocator); + else + options->options = simple; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_DELTA static lzma_ret -properties_delta(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_delta(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size != 1) + if (prop_size != 1) return LZMA_HEADER_ERROR; - if (*in_pos == in_size) - return LZMA_OK; - - lzma_options_delta *options = lzma_alloc( - sizeof(lzma_options_delta), allocator); - if (options == NULL) + options->options = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (options->options == NULL) return LZMA_MEM_ERROR; - coder->options->options = options; - - options->distance = (uint32_t)(in[*in_pos]) + 1; - ++*in_pos; + ((lzma_options_delta *)(options->options))->distance + = (uint32_t)(props[0]) + 1; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_LZMA static lzma_ret -properties_lzma(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_lzma(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { // LZMA properties are always two bytes (at least for now). - if (coder->properties_size != 2) + if (prop_size != 2) return LZMA_HEADER_ERROR; - assert(coder->pos < 2); - - while (*in_pos < in_size) { - switch (coder->pos) { - case 0: - // Allocate the options structure. - coder->options->options = lzma_alloc( - sizeof(lzma_options_lzma), allocator); - if (coder->options->options == NULL) - return LZMA_MEM_ERROR; - - // Decode lc, lp, and pb. - if (lzma_lzma_decode_properties( - coder->options->options, in[*in_pos])) - return LZMA_HEADER_ERROR; - - ++*in_pos; - ++coder->pos; - break; - - case 1: { - lzma_options_lzma *options = coder->options->options; - - // Check that reserved bits are unset. - if (in[*in_pos] & 0xC0) - return LZMA_HEADER_ERROR; - - // Decode the dictionary size. See the file format - // specification section 4.3.4.2 to understand this. - if (in[*in_pos] == 0) { - options->dictionary_size = 1; - - } else if (in[*in_pos] > 59) { - // Dictionary size is over 1 GiB. - // It's not supported at the moment. - return LZMA_HEADER_ERROR; -# if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 -# error Update the if()-condition a few lines -# error above to match LZMA_DICTIONARY_SIZE_MAX. -# endif - - } else { - options->dictionary_size - = 2 | ((in[*in_pos] + 1) & 1); - options->dictionary_size - <<= (in[*in_pos] - 1) / 2; - } - - ++*in_pos; - return LZMA_STREAM_END; - } - } - } + lzma_options_lzma *lzma = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (lzma == NULL) + return LZMA_MEM_ERROR; + + // Decode lc, lp, and pb. + if (lzma_lzma_decode_properties(lzma, props[0])) + goto error; + + // Check that reserved bits are unset. + if (props[1] & 0xC0) + goto error; + + // Decode the dictionary size. + // FIXME The specification says that maximum is 4 GiB. + if (props[1] > 36) + goto error; +#if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 +# error Update the if()-condition a few lines +# error above to match LZMA_DICTIONARY_SIZE_MAX. +#endif + + lzma->dictionary_size = 2 | (props[1] & 1); + lzma->dictionary_size <<= props[1] / 2 + 11; - assert(coder->pos < 2); + options->options = lzma; return LZMA_OK; + +error: + lzma_free(lzma, allocator); + return LZMA_HEADER_ERROR; } #endif -static lzma_ret -filter_flags_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_filter_flags_decode( + lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) { - while (*in_pos < in_size || coder->sequence == SEQ_PROPERTIES) - switch (coder->sequence) { - case SEQ_MISC: - // Determine the Filter ID and Size of Filter Properties. - if (in[*in_pos] >= 0xE0) { - // Using External ID. Prepare the ID - // for variable-length integer parsing. - coder->options->id = 0; - - if (in[*in_pos] == 0xFF) { - // Mark that Size of Filter Properties is - // unknown, so we know later that there is - // external Size of Filter Properties present. - coder->properties_size - = LZMA_VLI_VALUE_UNKNOWN; - } else { - // Take Size of Filter Properties from Misc. - coder->properties_size = in[*in_pos] - 0xE0; - } - - coder->sequence = SEQ_ID; - - } else { - // The Filter ID is the same as Misc. - coder->options->id = in[*in_pos]; - - // The Size of Filter Properties can be calculated - // from Misc too. - coder->properties_size = in[*in_pos] / 0x20; - - coder->sequence = SEQ_PROPERTIES; - } - - ++*in_pos; - break; + // Set the pointer to NULL so the caller can always safely free it. + options->options = NULL; - case SEQ_ID: { - const lzma_ret ret = lzma_vli_decode(&coder->options->id, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->properties_size == LZMA_VLI_VALUE_UNKNOWN) { - // We have also external Size of Filter - // Properties. Prepare the size for - // variable-length integer parsing. - coder->properties_size = 0; - coder->sequence = SEQ_SIZE; - } else { - coder->sequence = SEQ_PROPERTIES; - } - - // Reset pos for its next job. - coder->pos = 0; - break; - } + // Filter ID + return_if_error(lzma_vli_decode(&options->id, NULL, + in, in_pos, in_size)); - case SEQ_SIZE: { - const lzma_ret ret = lzma_vli_decode(&coder->properties_size, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; + // Size of Properties + lzma_vli prop_size; + return_if_error(lzma_vli_decode(&prop_size, NULL, + in, in_pos, in_size)); - coder->pos = 0; - coder->sequence = SEQ_PROPERTIES; - break; - } + // Check that we have enough input. + if (prop_size > in_size - *in_pos) + return LZMA_DATA_ERROR; - case SEQ_PROPERTIES: { - lzma_ret (*get_properties)(lzma_coder *coder, - lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size); + // Determine the function to decode the properties. + lzma_ret (*get_properties)(lzma_options_filter *options, + lzma_allocator *allocator, const uint8_t *props, + size_t prop_size); - switch (coder->options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return coder->properties_size > 0 - ? LZMA_HEADER_ERROR : LZMA_STREAM_END; -#endif + switch (options->id) { #ifdef HAVE_FILTER_SUBBLOCK - case LZMA_FILTER_SUBBLOCK: - get_properties = &properties_subblock; - break; + case LZMA_FILTER_SUBBLOCK: + get_properties = &properties_subblock; + break; #endif #ifdef HAVE_FILTER_SIMPLE # ifdef HAVE_FILTER_X86 - case LZMA_FILTER_X86: + case LZMA_FILTER_X86: # endif # ifdef HAVE_FILTER_POWERPC - case LZMA_FILTER_POWERPC: + case LZMA_FILTER_POWERPC: # endif # ifdef HAVE_FILTER_IA64 - case LZMA_FILTER_IA64: + case LZMA_FILTER_IA64: # endif # ifdef HAVE_FILTER_ARM - case LZMA_FILTER_ARM: + case LZMA_FILTER_ARM: # endif # ifdef HAVE_FILTER_ARMTHUMB - case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_ARMTHUMB: # endif # ifdef HAVE_FILTER_SPARC - case LZMA_FILTER_SPARC: + case LZMA_FILTER_SPARC: # endif - get_properties = &properties_simple; - break; + get_properties = &properties_simple; + break; #endif #ifdef HAVE_FILTER_DELTA - case LZMA_FILTER_DELTA: - get_properties = &properties_delta; - break; + case LZMA_FILTER_DELTA: + get_properties = &properties_delta; + break; #endif #ifdef HAVE_FILTER_LZMA - case LZMA_FILTER_LZMA: - get_properties = &properties_lzma; - break; + case LZMA_FILTER_LZMA: + get_properties = &properties_lzma; + break; #endif - default: - return LZMA_HEADER_ERROR; - } - - return get_properties(coder, allocator, in, in_pos, in_size); - } - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -filter_flags_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &filter_flags_decode; - next->end = &filter_flags_decoder_end; + return LZMA_HEADER_ERROR; } - options->id = 0; - options->options = NULL; - - next->coder->options = options; - next->coder->sequence = SEQ_MISC; - next->coder->pos = 0; - next->coder->properties_size = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_filter_flags_decoder(lzma_stream *strm, lzma_options_filter *options) -{ - lzma_next_strm_init(strm, lzma_filter_flags_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; + const uint8_t *props = in + *in_pos; + *in_pos += prop_size; + return get_properties(options, allocator, props, prop_size); } diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c index 2d11dd3a..45fbbb00 100644 --- a/src/liblzma/common/filter_flags_encoder.c +++ b/src/liblzma/common/filter_flags_encoder.c @@ -22,22 +22,13 @@ #include "fastpos.h" -/// \brief Calculates the size of the Filter Properties field -/// -/// This currently can return only LZMA_OK or LZMA_HEADER_ERROR, but -/// with some new filters it may return also LZMA_PROG_ERROR. +/// Calculate the size of the Filter Properties field static lzma_ret get_properties_size(uint32_t *size, const lzma_options_filter *options) { lzma_ret ret = LZMA_OK; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - *size = 0; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: *size = 0; @@ -100,40 +91,14 @@ get_properties_size(uint32_t *size, const lzma_options_filter *options) extern LZMA_API lzma_ret lzma_filter_flags_size(uint32_t *size, const lzma_options_filter *options) { - // Get size of Filter Properties. + // Get size of Filter Properties. This also validates the Filter ID. uint32_t prop_size; - const lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Size of Filter ID field if it exists. - size_t id_size; - size_t prop_size_size; - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - id_size = 0; - prop_size_size = 0; - - } else { - // At least Filter ID is stored using the External ID field. - id_size = lzma_vli_size(options->id); - if (id_size == 0) - return LZMA_PROG_ERROR; - - if (prop_size <= 30) { - // Size of Filter Properties fits into Misc still. - prop_size_size = 0; - } else { - // The Size of Filter Properties field is used too. - prop_size_size = lzma_vli_size(prop_size); - if (prop_size_size == 0) - return LZMA_PROG_ERROR; - } - } + return_if_error(get_properties_size(&prop_size, options)); - // 1 is for the Misc field. - *size = 1 + id_size + prop_size_size + prop_size; + // Calculate the size of the Filter ID and Size of Properties fields. + // These cannot fail since get_properties_size() already succeeded. + *size = lzma_vli_size(options->id) + lzma_vli_size(prop_size) + + prop_size; return LZMA_OK; } @@ -149,10 +114,10 @@ properties_simple(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_OK; if (out_size - *out_pos < 4) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; - for (size_t i = 0; i < 4; ++i) - out[(*out_pos)++] = options->start_offset >> (i * 8); + integer_write_32(out + *out_pos, options->start_offset); + *out_pos += 4; return LZMA_OK; } @@ -175,7 +140,7 @@ properties_delta(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_HEADER_ERROR; if (out_size - *out_pos < 1) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; out[*out_pos] = options->distance - LZMA_DELTA_DISTANCE_MIN; ++*out_pos; @@ -195,7 +160,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_PROG_ERROR; if (out_size - *out_pos < 2) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; // LZMA Properties if (lzma_lzma_encode_properties(options, out + *out_pos)) @@ -230,7 +195,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, ++d; // Get the highest two bits using the proper encoding: - out[*out_pos] = get_pos_slot(d) - 1; + out[*out_pos] = get_pos_slot(d) - 24; ++*out_pos; return LZMA_OK; @@ -250,58 +215,19 @@ lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, size_t out_size, // Get size of Filter Properties. uint32_t prop_size; - lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Misc, External ID, and Size of Properties - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - out[*out_pos] = options->id; - ++*out_pos; - - } else if (prop_size <= 30) { - // Size of Filter Properties fits into Misc. - out[*out_pos] = prop_size + 0xE0; - ++*out_pos; - - // External ID is used to encode the Filter ID. If encoding - // the VLI fails, it's because the caller has given as too - // little output space, which it should have checked already. - // So return LZMA_PROG_ERROR, not LZMA_BUF_ERROR. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } else { - // Nothing fits into Misc. - out[*out_pos] = 0xFF; - ++*out_pos; - - // External ID is used to encode the Filter ID. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - // External Size of Filter Properties - dummy = 0; - if (lzma_vli_encode(prop_size, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + return_if_error(get_properties_size(&prop_size, options)); + + // Filter ID + return_if_error(lzma_vli_encode(options->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + return_if_error(lzma_vli_encode(prop_size, NULL, + out, out_pos, out_size)); // Filter Properties + lzma_ret ret; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - assert(prop_size == 0); - ret = options->options == NULL ? LZMA_OK : LZMA_HEADER_ERROR; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: assert(prop_size == 0); diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 6816b37a..f01206de 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c -/// \brief Handling of Index in Metadata +/// \brief Handling of Index // // Copyright (C) 2007 Lasse Collin // @@ -17,124 +17,733 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "index.h" -/** - * \brief Duplicates an Index list - * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. - */ -extern LZMA_API lzma_index * -lzma_index_dup(const lzma_index *old_current, lzma_allocator *allocator) +/// Number of Records to allocate at once. +#define INDEX_GROUP_SIZE 256 + + +typedef struct lzma_index_group_s lzma_index_group; +struct lzma_index_group_s { + /// Next group + lzma_index_group *prev; + + /// Previous group + lzma_index_group *next; + + /// Index of the last Record in this group + size_t last; + + /// Total Size fields as cumulative sum relative to the beginning + /// of the group. The total size of the group is total_sums[last]. + lzma_vli total_sums[INDEX_GROUP_SIZE]; + + /// Uncompressed Size fields as cumulative sum relative to the + /// beginning of the group. The uncompressed size of the group is + /// uncompressed_sums[last]. + lzma_vli uncompressed_sums[INDEX_GROUP_SIZE]; + + /// True if the Record is padding + bool paddings[INDEX_GROUP_SIZE]; +}; + + +struct lzma_index_s { + /// Total size of the Blocks and padding + lzma_vli total_size; + + /// Uncompressed size of the Stream + lzma_vli uncompressed_size; + + /// Number of non-padding records. This is needed by Index encoder. + lzma_vli count; + + /// Size of the List of Records field; this is updated every time + /// a new non-padding Record is added. + lzma_vli index_list_size; + + /// This is zero if no Indexes have been combined with + /// lzma_index_cat(). With combined Indexes, this contains the sizes + /// of all but latest the Streams, including possible Stream Padding + /// fields. + lzma_vli padding_size; + + /// First group of Records + lzma_index_group *head; + + /// Last group of Records + lzma_index_group *tail; + + /// Tracking the read position + struct { + /// Group where the current read position is. + lzma_index_group *group; + + /// The most recently read record in *group + lzma_vli record; + + /// Uncompressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli uncompressed_offset; + + /// Compressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli stream_offset; + } current; + + /// Information about earlier Indexes when multiple Indexes have + /// been combined. + struct { + /// Sum of the Record counts of the all but the last Stream. + lzma_vli count; + + /// Sum of the List of Records fields of all but the last + /// Stream. This is needed when a new Index is concatenated + /// to this lzma_index structure. + lzma_vli index_list_size; + } old; +}; + + +static void +free_index_list(lzma_index *i, lzma_allocator *allocator) { - lzma_index *new_head = NULL; - lzma_index *new_current = NULL; + lzma_index_group *g = i->head; - while (old_current != NULL) { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) { - lzma_index_free(new_head, allocator); - return NULL; - } + while (g != NULL) { + lzma_index_group *tmp = g->next; + lzma_free(g, allocator); + g = tmp; + } - i->total_size = old_current->total_size; - i->uncompressed_size = old_current->uncompressed_size; - i->next = NULL; + return; +} - if (new_head == NULL) - new_head = i; - else - new_current->next = i; - new_current = i; - old_current = old_current->next; +extern LZMA_API lzma_index * +lzma_index_init(lzma_index *i, lzma_allocator *allocator) +{ + if (i == NULL) { + i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return NULL; + } else { + free_index_list(i, allocator); } - return new_head; + i->total_size = 0; + i->uncompressed_size = 0; + i->count = 0; + i->index_list_size = 0; + i->padding_size = 0; + i->head = NULL; + i->tail = NULL; + i->current.group = NULL; + i->old.count = 0; + i->old.index_list_size = 0; + + return i; } -/** - * \brief Frees an Index list - * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. - */ extern LZMA_API void -lzma_index_free(lzma_index *i, lzma_allocator *allocator) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) { - while (i != NULL) { - lzma_index *tmp = i->next; + if (i != NULL) { + free_index_list(i, allocator); lzma_free(i, allocator); - i = tmp; } return; } -/** - * \brief Calculates properties of an Index list - * - * - */ -extern LZMA_API lzma_ret -lzma_index_count(const lzma_index *i, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size) -{ - *count = 0; - *total_size = 0; - *uncompressed_size = 0; - - while (i != NULL) { - if (i->total_size == LZMA_VLI_VALUE_UNKNOWN) { - *total_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->total_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*total_size != LZMA_VLI_VALUE_UNKNOWN) { - *total_size += i->total_size; - if (*total_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; +extern LZMA_API lzma_vli +lzma_index_count(const lzma_index *i) +{ + return i->count; +} + + +extern LZMA_API lzma_vli +lzma_index_size(const lzma_index *i) +{ + return index_size(i->count, i->index_list_size); +} + + +extern LZMA_API lzma_vli +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API lzma_vli +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +extern LZMA_API lzma_vli +lzma_index_file_size(const lzma_index *i) +{ + // If multiple Streams are concatenated, the Stream Header, Index, + // and Stream Footer fields of all but the last Stream are already + // included in padding_size. Thus, we need to calculate only the + // size of the last Index, not all Indexes. + return i->total_size + i->padding_size + + index_size(i->count - i->old.count, + i->index_list_size - i->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; +} + + +extern LZMA_API lzma_vli +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) + - index_size_unpadded(i->count, i->index_list_size)) & 3; +} + + +/// Helper function for index_append() +static lzma_ret +index_append_real(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size, + bool is_padding) +{ + // Add the new record. + if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) { + // Allocate a new group. + lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + // Initialize the group and set its first record. + g->prev = i->tail; + g->next = NULL; + g->last = 0; + g->total_sums[0] = total_size; + g->uncompressed_sums[0] = uncompressed_size; + g->paddings[0] = is_padding; + + // If this is the first group, make it the head. + if (i->head == NULL) + i->head = g; + else + i->tail->next = g; + + // Make it the new tail. + i->tail = g; + + } else { + // i->tail has space left for at least one record. + i->tail->total_sums[i->tail->last + 1] + = i->tail->total_sums[i->tail->last] + + total_size; + i->tail->uncompressed_sums[i->tail->last + 1] + = i->tail->uncompressed_sums[i->tail->last] + + uncompressed_size; + i->tail->paddings[i->tail->last + 1] = is_padding; + ++i->tail->last; + } + + return LZMA_OK; +} + + +static lzma_ret +index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, + lzma_vli uncompressed_size, bool is_padding) +{ + if (total_size > LZMA_VLI_VALUE_MAX + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + // This looks a bit ugly. We want to first validate that the Index + // and Stream stay in valid limits after adding this Record. After + // validating, we may need to allocate a new lzma_index_group (it's + // slightly more correct to validate before allocating, YMMV). + lzma_ret ret; + + if (is_padding) { + assert(uncompressed_size == 0); + + // First update the info so we can validate it. + i->padding_size += total_size; + + if (i->padding_size > LZMA_VLI_VALUE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, true); + + // If something went wrong, undo the updated value. + if (ret != LZMA_OK) + i->padding_size -= total_size; + + } else { + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add + = lzma_vli_size(total_size / 4 - 1) + + lzma_vli_size(uncompressed_size); + + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; + + if (i->total_size > LZMA_VLI_VALUE_MAX + || i->uncompressed_size > LZMA_VLI_VALUE_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } + } + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size) +{ + return index_append(i, allocator, + total_size, uncompressed_size, false); +} + + +/// Initialize i->current to point to the first Record. +static bool +init_current(lzma_index *i) +{ + if (i->head == NULL) { + assert(i->count == 0); + return true; + } + + assert(i->count > 0); + + i->current.group = i->head; + i->current.record = 0; + i->current.stream_offset = LZMA_STREAM_HEADER_SIZE; + i->current.uncompressed_offset = 0; - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->uncompressed_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size += i->uncompressed_size; - if (*uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; + return false; +} + + +/// Go backward to the previous group. +static void +previous_group(lzma_index *i) +{ + assert(i->current.group->prev != NULL); + + // Go to the previous group first. + i->current.group = i->current.group->prev; + i->current.record = i->current.group->last; + + // Then update the offsets. + i->current.stream_offset -= i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset -= i->current.group + ->uncompressed_sums[i->current.group->last]; + + return; +} + + +/// Go forward to the next group. +static void +next_group(lzma_index *i) +{ + assert(i->current.group->next != NULL); + + // Update the offsets first. + i->current.stream_offset += i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.group->last]; + + // Then go to the next group. + i->current.record = 0; + i->current.group = i->current.group->next; + + return; +} + + +/// Set *info from i->current. +static void +set_info(const lzma_index *i, lzma_index_record *info) +{ + info->total_size = i->current.group->total_sums[i->current.record]; + info->uncompressed_size = i->current.group->uncompressed_sums[ + i->current.record]; + + info->stream_offset = i->current.stream_offset; + info->uncompressed_offset = i->current.uncompressed_offset; + + // If it's not the first Record in this group, we need to do some + // adjustements. + if (i->current.record > 0) { + // _sums[] are cumulative, thus we need to substract the + // _previous _sums[] to get the sizes of this Record. + info->total_size -= i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_size -= i->current.group + ->uncompressed_sums[i->current.record - 1]; + + // i->current.{total,uncompressed}_offsets have the offset + // of the beginning of the group, thus we need to add the + // appropriate amount to get the offsetes of this Record. + info->stream_offset += i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.record - 1]; + } + + return; +} + + +extern LZMA_API lzma_bool +lzma_index_read(lzma_index *i, lzma_index_record *info) +{ + if (i->current.group == NULL) { + // We are at the beginning of the Record list. Set up + // i->current point at the first Record. Return if there + // are no Records. + if (init_current(i)) + return true; + } else do { + // Try to go the next Record. + if (i->current.record < i->current.group->last) + ++i->current.record; + else if (i->current.group->next == NULL) + return true; + else + next_group(i); + } while (i->current.group->paddings[i->current.record]); + + // We found a new Record. Set the information to *info. + set_info(i, info); + + return false; +} + + +extern LZMA_API void +lzma_index_rewind(lzma_index *i) +{ + i->current.group = NULL; + return; +} + + +extern LZMA_API lzma_bool +lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target) +{ + // Check if it is possible to fullfill the request. + if (target >= i->uncompressed_size) + return true; + + // Now we know that we will have an answer. Initialize the current + // read position if needed. + if (i->current.group == NULL && init_current(i)) + return true; + + // Locate the group where the wanted Block is. First search forward. + while (i->current.uncompressed_offset <= target) { + // If the first uncompressed byte of the next group is past + // the target offset, it has to be this or an earlier group. + if (i->current.uncompressed_offset + i->current.group + ->uncompressed_sums[i->current.group->last] + > target) + break; + + // Go forward to the next group. + next_group(i); + } + + // Then search backward. + while (i->current.uncompressed_offset > target) + previous_group(i); + + // Now the target Block is somewhere in i->current.group. Offsets + // in groups are relative to the beginning of the group, thus + // we must adjust the target before starting the search loop. + assert(target >= i->current.uncompressed_offset); + target -= i->current.uncompressed_offset; + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sums[] value is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks or + // padding, we don't want to return them. + size_t left = 0; + size_t right = i->current.group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (i->current.group->uncompressed_sums[pos] <= target) + left = pos + 1; + else + right = pos; + } + + i->current.record = left; + +#ifndef NDEBUG + // The found Record must not be padding or have zero uncompressed size. + assert(!i->current.group->paddings[i->current.record]); + + if (i->current.record == 0) + assert(i->current.group->uncompressed_sums[0] > 0); + else + assert(i->current.group->uncompressed_sums[i->current.record] + - i->current.group->uncompressed_sums[ + i->current.record - 1] > 0); +#endif + + set_info(i, info); + + return false; +} + + +extern LZMA_API lzma_ret +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator, lzma_vli padding) +{ + if (dest == NULL || src == NULL || dest == src + || padding > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Check that the combined size of the Indexes stays within limits. + { + const lzma_vli dest_size = lzma_index_file_size(dest); + const lzma_vli src_size = lzma_index_file_size(src); + if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN + || dest_size + src_size + padding + > LZMA_VLI_VALUE_UNKNOWN) + return LZMA_DATA_ERROR; + } + + // Add a padding Record to take into account the size of + // Index + Stream Footer + Stream Padding + Stream Header. + // + // NOTE: This cannot overflow, because Index Size is always + // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs + // (Index Size and padding) doesn't overflow. It may become + // an invalid VLI if padding is huge, but that is caught by + // index_append(). + padding += index_size(dest->count - dest->old.count, + dest->index_list_size + - dest->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; + + // Add the padding Record. + return_if_error(index_append( + dest, allocator, padding, 0, true)); + + // Avoid wasting lots of memory if src->head has only a few records + // that fit into dest->tail. That is, combine two groups if possible. + // + // NOTE: We know that dest->tail != NULL since we just appended + // a padding Record. But we don't know about src->head. + if (src->head != NULL && src->head->last + 1 + <= INDEX_GROUP_SIZE - dest->tail->last - 1) { + // Copy the first Record. + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[0]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[dest->tail->last] + + src->head->uncompressed_sums[0]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[0]; + + ++dest->tail->last; + + // Copy the rest. + for (size_t i = 1; i < src->head->last; ++i) { + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[i + 1] + - src->head->total_sums[i]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[ + dest->tail->last] + + src->head->uncompressed_sums[i + 1] + - src->head->uncompressed_sums[i]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[i + 1]; + + ++dest->tail->last; } - ++*count; - i = i->next; + // Free the head group of *src. Don't bother updating prev + // pointers since those won't be used for anything before + // we deallocate the whole *src structure. + lzma_index_group *tmp = src->head; + src->head = src->head->next; + lzma_free(tmp, allocator); + } + + // If there are groups left in *src, join them as is. Note that if we + // are combining already combined Indexes, src->head can be non-NULL + // even if we just combined the old src->head to dest->tail. + if (src->head != NULL) { + src->head->prev = dest->tail; + dest->tail->next = src->head; + dest->tail = src->tail; } - // FIXME ? - if (*total_size == LZMA_VLI_VALUE_UNKNOWN - || *uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - return LZMA_HEADER_ERROR; + // Update information about earlier Indexes. Only the last Index + // from *src won't be counted in dest->old. The last Index is left + // open and can be even appended with lzma_index_append(). + dest->old.count = dest->count + src->old.count; + dest->old.index_list_size + = dest->index_list_size + src->old.index_list_size; + + // Update overall information. + dest->total_size += src->total_size; + dest->uncompressed_size += src->uncompressed_size; + dest->count += src->count; + dest->index_list_size += src->index_list_size; + dest->padding_size += src->padding_size; + + // *src has nothing left but the base structure. + lzma_free(src, allocator); return LZMA_OK; } +extern LZMA_API lzma_index * +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator); + if (dest == NULL) + return NULL; + + // Copy the base structure except the pointers. + *dest = *src; + dest->head = NULL; + dest->tail = NULL; + dest->current.group = NULL; + + // Copy the Records. + const lzma_index_group *src_group = src->head; + while (src_group != NULL) { + // Allocate a new group. + lzma_index_group *dest_group = lzma_alloc( + sizeof(lzma_index_group), allocator); + if (dest_group == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + // Set the pointers. + dest_group->prev = dest->tail; + dest_group->next = NULL; + + if (dest->head == NULL) + dest->head = dest_group; + else + dest->tail->next = dest_group; + + dest->tail = dest_group; + + dest_group->last = src_group->last; + + // Copy the arrays so that we don't read uninitialized memory. + const size_t count = src_group->last + 1; + memcpy(dest_group->total_sums, src_group->total_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->uncompressed_sums, + src_group->uncompressed_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->paddings, src_group->paddings, + sizeof(bool) * count); + + // Copy also the read position. + if (src_group == src->current.group) + dest->current.group = dest->tail; + + src_group = src_group->next; + } + + return dest; +} + extern LZMA_API lzma_bool -lzma_index_is_equal(const lzma_index *a, const lzma_index *b) +lzma_index_equal(const lzma_index *a, const lzma_index *b) { - while (a != NULL && b != NULL) { - if (a->total_size != b->total_size || a->uncompressed_size - != b->uncompressed_size) + // No point to compare more if the pointers are the same. + if (a == b) + return true; + + // Compare the basic properties. + if (a->total_size != b->total_size + || a->uncompressed_size != b->uncompressed_size + || a->index_list_size != b->index_list_size + || a->count != b->count) + return false; + + // Compare the Records. + const lzma_index_group *ag = a->head; + const lzma_index_group *bg = b->head; + while (ag != NULL && bg != NULL) { + const size_t count = ag->last + 1; + if (ag->last != bg->last + || memcmp(ag->total_sums, + bg->total_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->uncompressed_sums, + bg->uncompressed_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->paddings, bg->paddings, + sizeof(bool) * count) != 0) return false; - a = a->next; - b = b->next; + ag = ag->next; + bg = bg->next; } - return a == b; + return ag == NULL && bg == NULL; } diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h new file mode 100644 index 00000000..303ad43a --- /dev/null +++ b/src/liblzma/common/index.h @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Maximum encoded value of Total Size. +#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_VALUE_MAX / 4 - 1) + +/// Convert the real Total Size value to a value that is stored to the Index. +#define total_size_encode(size) ((size) / 4 - 1) + +/// Convert the encoded Total Size value from Index to the real Total Size. +#define total_size_decode(size) (((size) + 1) * 4) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + // Round up to a mulitiple of four. + return (index_size_unpadded(count, index_list_size) + 3) + & ~LZMA_VLI_C(3); +} + + +static inline lzma_vli +index_stream_size( + lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + total_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c new file mode 100644 index 00000000..1635948c --- /dev/null +++ b/src/liblzma/common/index_decoder.c @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Target Index + lzma_index *index; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Total Size field + lzma_vli total_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_TOTAL + ? &coder->total_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_TOTAL) { + // Validate that encoded Total Size isn't too big. + if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + // Convert the encoded Total Size to the real + // Total Size. + coder->total_size = total_size_decode( + coder->total_size); + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->total_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + // We always allocate a new lzma_index. + *i = lzma_index_init(NULL, allocator); + if (*i == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = *i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + lzma_next_coder_init(index_decoder_init, next, allocator, i); +} +*/ + + +extern LZMA_API lzma_ret +lzma_index_decoder(lzma_stream *strm, lzma_index **i) +{ + lzma_next_strm_init(strm, index_decoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c new file mode 100644 index 00000000..5a7d8c8c --- /dev/null +++ b/src/liblzma/common/index_encoder.c @@ -0,0 +1,222 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index given to us to encode. Note that we modify it in sense that + /// we read it, and read position is tracked in lzma_index structure. + lzma_index *index; + + /// The current Index Record being encoded + lzma_index_record record; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action lzma_attribute((unused))) +{ + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = 0x00; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli index_count = lzma_index_count(coder->index); + ret = lzma_vli_encode(index_count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_read(coder->index, &coder->record)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + // Total Size must be a multiple of four. + if (coder->record.total_size & 3) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_TOTAL; + + // Fall through + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_TOTAL + ? total_size_encode(coder->record.total_size) + : coder->record.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + return ret; +} + + +static void +index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + lzma_index_rewind(i); + + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + lzma_next_coder_init(index_encoder_init, next, allocator, i); +} + + +extern LZMA_API lzma_ret +lzma_index_encoder(lzma_stream *strm, lzma_index *i) +{ + lzma_next_strm_init(strm, index_encoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder_multi.h b/src/liblzma/common/index_encoder.h index e0ff02f3..0087c284 100644 --- a/src/liblzma/common/stream_encoder_multi.h +++ b/src/liblzma/common/index_encoder.h @@ -1,9 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file stream_encoder_multi.h -/// \brief Encodes Multi-Block .lzma files +/// \file index_encoder.h +/// \brief Encodes the Index field // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,10 +17,14 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_STREAM_ENCODER_MULTI_H -#define LZMA_STREAM_ENCODER_MULTI_H +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_index *i); -extern lzma_ret lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); #endif diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c new file mode 100644 index 00000000..35dea41f --- /dev/null +++ b/src/liblzma/common/index_hash.c @@ -0,0 +1,340 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Total Size fields + lzma_vli total_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Total Sizes and Uncompressed Sizes. + lzma_check check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Total Size currently being read from an Index Record. + lzma_vli total_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API lzma_index_hash * +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.total_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.total_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->total_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API void +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API lzma_vli +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + info->total_size += total_size; + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { total_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK || total_size == 0 || + total_size > LZMA_VLI_VALUE_MAX || (total_size & 3) + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + total_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.total_size > LZMA_VLI_VALUE_MAX + || index_hash->blocks.uncompressed_size + > LZMA_VLI_VALUE_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.total_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_TOTAL + ? &index_hash->total_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_TOTAL) { + if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + index_hash->total_size = total_size_decode( + index_hash->total_size); + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->total_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.total_size + < index_hash->records.total_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.total_size + != index_hash->records.total_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer, + index_hash->records.check.buffer, + lzma_check_sizes[LZMA_CHECK_BEST]) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} diff --git a/src/liblzma/common/info.c b/src/liblzma/common/info.c deleted file mode 100644 index ab7fc999..00000000 --- a/src/liblzma/common/info.c +++ /dev/null @@ -1,814 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file info.c -/// \brief Collects and verifies integrity of Stream size information -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -struct lzma_info_s { - struct { - /// Known Size of Header Metadata Block; here's some - /// special things: - /// - LZMA_VLI_VALUE_UNKNOWN indicates that we don't know - /// if Header Metadata Block is present. - /// - 0 indicates that Header Metadata Block is not present. - lzma_vli header_metadata_size; - - /// Known Total Size of the Data Blocks in the Stream - lzma_vli total_size; - - /// Known Uncompressed Size of the Data Blocks in the Stream - lzma_vli uncompressed_size; - - /// Known Size of Footer Metadata Block - lzma_vli footer_metadata_size; - } known; - - struct { - /// Sum of Total Size fields stored to the Index so far - lzma_vli total_size; - - /// Sum of Uncompressed Size fields stored to the Index so far - lzma_vli uncompressed_size; - - /// First Index Record in the list, or NULL if Index is empty. - lzma_index *head; - - /// Number of Index Records - size_t record_count; - - /// Number of Index Records - size_t incomplete_count; - - /// True when we know that no more Records will get added - /// to the Index. - bool is_final; - } index; - - /// Start offset of the Stream. This is needed to calculate - /// lzma_info_iter.stream_offset. - lzma_vli stream_start_offset; - - /// True if Index is present in Header Metadata Block - bool has_index_in_header_metadata; -}; - - -////////////////////// -// Create/Reset/End // -////////////////////// - -static void -index_init(lzma_info *info) -{ - info->index.total_size = 0; - info->index.uncompressed_size = 0; - info->index.head = NULL; - info->index.record_count = 0; - info->index.incomplete_count = 0; - info->index.is_final = false; - return; -} - - -static void -info_init(lzma_info *info) -{ - info->known.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.total_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->stream_start_offset = 0; - info->has_index_in_header_metadata = false; - - index_init(info); - - return; -} - - -extern LZMA_API lzma_info * -lzma_info_init(lzma_info *info, lzma_allocator *allocator) -{ - if (info == NULL) - info = lzma_alloc(sizeof(lzma_info), allocator); - else - lzma_index_free(info->index.head, allocator); - - if (info != NULL) - info_init(info); - - return info; -} - - -extern LZMA_API void -lzma_info_free(lzma_info *info, lzma_allocator *allocator) -{ - lzma_index_free(info->index.head, allocator); - lzma_free(info, allocator); - return; -} - - -///////// -// Set // -///////// - -static lzma_ret -set_size(lzma_vli new_size, lzma_vli *known_size, lzma_vli index_size, - bool forbid_zero) -{ - assert(new_size <= LZMA_VLI_VALUE_MAX); - - lzma_ret ret = LZMA_OK; - - if (forbid_zero && new_size == 0) - ret = LZMA_PROG_ERROR; - else if (index_size > new_size) - ret = LZMA_DATA_ERROR; - else if (*known_size == LZMA_VLI_VALUE_UNKNOWN) - *known_size = new_size; - else if (*known_size != new_size) - ret = LZMA_DATA_ERROR; - - return ret; -} - - -extern LZMA_API lzma_ret -lzma_info_size_set(lzma_info *info, lzma_info_size type, lzma_vli size) -{ - if (size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - switch (type) { - case LZMA_INFO_STREAM_START: - info->stream_start_offset = size; - return LZMA_OK; - - case LZMA_INFO_HEADER_METADATA: - return set_size(size, &info->known.header_metadata_size, - 0, false); - - case LZMA_INFO_TOTAL: - return set_size(size, &info->known.total_size, - info->index.total_size, true); - - case LZMA_INFO_UNCOMPRESSED: - return set_size(size, &info->known.uncompressed_size, - info->index.uncompressed_size, false); - - case LZMA_INFO_FOOTER_METADATA: - return set_size(size, &info->known.footer_metadata_size, - 0, true); - } - - return LZMA_PROG_ERROR; -} - - -extern LZMA_API lzma_ret -lzma_info_index_set(lzma_info *info, lzma_allocator *allocator, - lzma_index *i_new, lzma_bool eat_index) -{ - if (i_new == NULL) - return LZMA_PROG_ERROR; - - lzma_index *i_old = info->index.head; - - if (i_old != NULL) { - while (true) { - // If the new Index has fewer Records than the old one, - // the new Index cannot be valid. - if (i_new == NULL) - return LZMA_DATA_ERROR; - - // The new Index must be complete i.e. no unknown - // values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_PROG_ERROR; - } - - // Compare the values from the new Index with the old - // Index. The old Index may be incomplete; in that - // case we - // - use the value from the new Index as is; - // - update the appropriate info->index.foo_size; and - // - decrease the count of incomplete Index Records. - bool was_incomplete = false; - - if (i_old->total_size == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->total_size = i_new->total_size; - - if (lzma_vli_add(info->index.total_size, - i_new->total_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->total_size != i_new->total_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (i_old->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->uncompressed_size - = i_new->uncompressed_size; - - if (lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->uncompressed_size - != i_new->uncompressed_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (was_incomplete) { - assert(!info->index.is_final); - assert(info->index.incomplete_count > 0); - --info->index.incomplete_count; - } - - // Get rid of *i_new. It's now identical with *i_old. - lzma_index *tmp = i_new->next; - if (eat_index) - lzma_free(i_new, allocator); - - i_new = tmp; - - // We want to leave i_old pointing to the last - // Index Record in the old Index. This way we can - // concatenate the possible new Records from i_new. - if (i_old->next == NULL) - break; - - i_old = i_old->next; - } - } - - assert(info->index.incomplete_count == 0); - - // If Index was already known to be final, i_new must be NULL now. - // The new Index cannot contain more Records that we already have. - if (info->index.is_final) { - assert(info->index.head != NULL); - - if (i_new != NULL) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - return LZMA_OK; - } - - // The rest of the new Index is merged to the old Index. Keep the - // current i_new pointer in available. We need it when merging the - // new Index with the old one, and if an error occurs so we can - // get rid of the broken part of the new Index. - lzma_index *i_start = i_new; - while (i_new != NULL) { - // The new Index must be complete i.e. no unknown values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - // Update info->index.foo_sizes. - if (lzma_vli_add(info->index.total_size, i_new->total_size) - || lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - ++info->index.record_count; - i_new = i_new->next; - } - - // All the Records in the new Index are good, and info->index.foo_sizes - // were successfully updated. - if (lzma_info_index_finish(info) != LZMA_OK) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_DATA_ERROR; - } - - // The Index is ready to be merged. If we aren't supposed to eat - // the Index, make a copy of it first. - if (!eat_index && i_start != NULL) { - i_start = lzma_index_dup(i_start, allocator); - if (i_start == NULL) - return LZMA_MEM_ERROR; - } - - // Concatenate the new Index with the old one. Note that it is - // possible that we don't have any old Index. - if (info->index.head == NULL) - info->index.head = i_start; - else - i_old->next = i_start; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_metadata_set(lzma_info *info, lzma_allocator *allocator, - lzma_metadata *metadata, lzma_bool is_header_metadata, - lzma_bool eat_index) -{ - // Validate *metadata. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || !lzma_vli_is_valid(metadata->uncompressed_size)) { - if (eat_index) { - lzma_index_free(metadata->index, allocator); - metadata->index = NULL; - } - - return LZMA_PROG_ERROR; - } - - // Index - if (metadata->index != NULL) { - if (is_header_metadata) - info->has_index_in_header_metadata = true; - - const lzma_ret ret = lzma_info_index_set( - info, allocator, metadata->index, eat_index); - - if (eat_index) - metadata->index = NULL; - - if (ret != LZMA_OK) - return ret; - - } else if (!is_header_metadata - && (metadata->total_size == LZMA_VLI_VALUE_UNKNOWN - || !info->has_index_in_header_metadata)) { - // Either Total Size or Index must be present in Footer - // Metadata Block. If Index is not present, it must have - // already been in the Header Metadata Block. Since we - // got here, these conditions weren't met. - return LZMA_DATA_ERROR; - } - - // Size of Header Metadata - if (!is_header_metadata) - return_if_error(lzma_info_size_set( - info, LZMA_INFO_HEADER_METADATA, - metadata->header_metadata_size)); - - // Total Size - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_TOTAL, metadata->total_size)); - - // Uncompressed Size - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_UNCOMPRESSED, - metadata->uncompressed_size)); - - return LZMA_OK; -} - - -///////// -// Get // -///////// - -extern LZMA_API lzma_vli -lzma_info_size_get(const lzma_info *info, lzma_info_size type) -{ - switch (type) { - case LZMA_INFO_STREAM_START: - return info->stream_start_offset; - - case LZMA_INFO_HEADER_METADATA: - return info->known.header_metadata_size; - - case LZMA_INFO_TOTAL: - return info->known.total_size; - - case LZMA_INFO_UNCOMPRESSED: - return info->known.uncompressed_size; - - case LZMA_INFO_FOOTER_METADATA: - return info->known.footer_metadata_size; - } - - return LZMA_VLI_VALUE_UNKNOWN; -} - - -extern LZMA_API lzma_index * -lzma_info_index_get(lzma_info *info, lzma_bool detach) -{ - lzma_index *i = info->index.head; - - if (detach) - index_init(info); - - return i; -} - - -extern LZMA_API size_t -lzma_info_index_count_get(const lzma_info *info) -{ - return info->index.record_count; -} - - -///////////////// -// Incremental // -///////////////// - -enum { - ITER_INFO, - ITER_INDEX, - ITER_RESERVED_1, - ITER_RESERVED_2, -}; - - -#define iter_info ((lzma_info *)(iter->internal[ITER_INFO])) - -#define iter_index ((lzma_index *)(iter->internal[ITER_INDEX])) - - -extern LZMA_API void -lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter) -{ - *iter = (lzma_info_iter){ - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .stream_offset = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_offset = LZMA_VLI_VALUE_UNKNOWN, - .internal = { info, NULL, NULL, NULL }, - }; - - return; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_next(lzma_info_iter *iter, lzma_allocator *allocator) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL) { - // The first call after lzma_info_iter_begin(). - if (iter_info->known.header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - - iter->uncompressed_offset = 0; - - if (iter_info->index.head != NULL) { - // The first Index Record has already been allocated. - iter->internal[ITER_INDEX] = iter_info->index.head; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } else { - // Update iter->*_offsets. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->stream_offset, - iter_index->total_size)) - return LZMA_DATA_ERROR; - } - - if (iter->uncompressed_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->uncompressed_offset - = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->uncompressed_offset, - iter_index->uncompressed_size)) - return LZMA_DATA_ERROR; - } - - if (iter_index->next != NULL) { - // The next Record has already been allocated. - iter->internal[ITER_INDEX] = iter_index->next; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } - - // Don't add new Records to a final Index. - if (iter_info->index.is_final) - return LZMA_DATA_ERROR; - - // Allocate and initialize a new Index Record. - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = LZMA_VLI_VALUE_UNKNOWN; - i->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - i->next = NULL; - - iter->total_size = LZMA_VLI_VALUE_UNKNOWN; - iter->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // Decide where to put the new Index Record. - if (iter_info->index.head == NULL) - iter_info->index.head = i; - - if (iter_index != NULL) - iter_index->next = i; - - iter->internal[ITER_INDEX] = i; - - ++iter_info->index.record_count; - ++iter_info->index.incomplete_count; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_set(lzma_info_iter *iter, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL || !lzma_vli_is_valid(total_size) - || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - const bool was_incomplete = iter_index->total_size - == LZMA_VLI_VALUE_UNKNOWN - || iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - - if (total_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->total_size = total_size; - - if (lzma_vli_add(iter_info->index.total_size, - total_size) - || iter_info->index.total_size - > iter_info->known.total_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->total_size != total_size) { - return LZMA_DATA_ERROR; - } - } - - if (uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->uncompressed_size = uncompressed_size; - - if (lzma_vli_add(iter_info->index.uncompressed_size, - uncompressed_size) - || iter_info->index.uncompressed_size - > iter_info->known.uncompressed_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->uncompressed_size - != uncompressed_size) { - return LZMA_DATA_ERROR; - } - } - - // Check if the new information we got managed to finish this - // Index Record. If so, update the count of incomplete Index Records. - if (was_incomplete && iter_index->total_size - != LZMA_VLI_VALUE_UNKNOWN - && iter_index->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - assert(iter_info->index.incomplete_count > 0); - --iter_info->index.incomplete_count; - } - - // Make sure that the known sizes are now available in *iter. - iter->total_size = iter_index->total_size; - iter->uncompressed_size = iter_index->uncompressed_size; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_index_finish(lzma_info *info) -{ - if (info->index.record_count == 0 || info->index.incomplete_count > 0 - || lzma_info_size_set(info, LZMA_INFO_TOTAL, - info->index.total_size) - || lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, - info->index.uncompressed_size)) - return LZMA_DATA_ERROR; - - info->index.is_final = true; - - return LZMA_OK; -} - - -////////////// -// Locating // -////////////// - -extern LZMA_API lzma_vli -lzma_info_metadata_locate(const lzma_info *info, lzma_bool is_header_metadata) -{ - bool error = false; - lzma_vli size = 0; - - if (info->known.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN) { - // We don't know if Header Metadata Block is present, thus - // we cannot locate it either. - // - // Well, you could say that just assume that it is present. - // I'm not sure if this is useful. But it can be useful to - // be able to use this function and get LZMA_VLI_VALUE_UNKNOWN - // to detect that Header Metadata Block wasn't present. - error = true; - } else if (is_header_metadata) { - error = lzma_vli_sum(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE); - } else if (!info->index.is_final) { - // Since we don't know if we have all the Index Records yet, - // we cannot know where the Footer Metadata Block is. - error = true; - } else { - error = lzma_vli_sum4(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - info->known.header_metadata_size, - info->known.total_size); - } - - return error ? LZMA_VLI_VALUE_UNKNOWN : size; -} - - -extern LZMA_API uint32_t -lzma_info_metadata_alignment_get( - const lzma_info *info, lzma_bool is_header_metadata) -{ - uint32_t alignment; - - if (is_header_metadata) { - alignment = info->stream_start_offset - + LZMA_STREAM_HEADER_SIZE; - } else { - alignment = info->stream_start_offset + LZMA_STREAM_HEADER_SIZE - + info->known.header_metadata_size - + info->known.total_size; - } - - return alignment; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_locate(lzma_info_iter *iter, lzma_allocator *allocator, - lzma_vli uncompressed_offset, lzma_bool allow_alloc) -{ - if (iter == NULL || uncompressed_offset > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - // Quick check in case Index is final. - if (iter_info->index.is_final) { - assert(iter_info->known.uncompressed_size - == iter_info->index.uncompressed_size); - if (uncompressed_offset >= iter_info->index.uncompressed_size) - return LZMA_DATA_ERROR; - } - - // TODO: Optimize so that it uses existing info from *iter when - // seeking forward. - - // Initialize *iter - if (iter_info->known.header_metadata_size != LZMA_VLI_VALUE_UNKNOWN) { - if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - } else { - // We don't know the Size of Header Metadata Block, thus - // we cannot calculate the Stream offset either. - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - } - - iter->uncompressed_offset = 0; - - // If we have no Index Records, it's obvious that we need to - // add a new one. - if (iter_info->index.head == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - return lzma_info_iter_next(iter, allocator); - } - - // Locate an appropriate Index Record. - lzma_index *i = iter_info->index.head; - while (true) { - // - If Uncompressed Size in the Record is unknown, - // we have no chance to search further. - // - If the next Record would go past the requested offset, - // we have found our target Data Block. - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - || iter->uncompressed_offset - + i->uncompressed_size > uncompressed_offset) { - iter->total_size = i->total_size; - iter->uncompressed_size = i->uncompressed_size; - iter->internal[ITER_INDEX] = i; - return LZMA_OK; - } - - // Update the stream offset. It may be unknown if we didn't - // know the size of Header Metadata Block. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) - if (lzma_vli_add(iter->stream_offset, i->total_size)) - return LZMA_PROG_ERROR; - - // Update the uncompressed offset. This cannot overflow since - // the Index is known to be valid. - iter->uncompressed_offset += i->uncompressed_size; - - // Move to the next Block. - if (i->next == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - iter->internal[ITER_INDEX] = i; - return lzma_info_iter_next(iter, allocator); - } - - i = i->next; - } -} diff --git a/src/liblzma/common/memory_usage.c b/src/liblzma/common/memory_usage.c index b6f27957..8244c404 100644 --- a/src/liblzma/common/memory_usage.c +++ b/src/liblzma/common/memory_usage.c @@ -28,7 +28,6 @@ get_usage(const lzma_options_filter *filter, bool is_encoder) uint64_t ret; switch (filter->id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_X86: case LZMA_FILTER_POWERPC: case LZMA_FILTER_IA64: diff --git a/src/liblzma/common/metadata_decoder.c b/src/liblzma/common/metadata_decoder.c deleted file mode 100644 index 579b0a51..00000000 --- a/src/liblzma/common/metadata_decoder.c +++ /dev/null @@ -1,578 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.c -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_decoder.h" -#include "block_decoder.h" - - -/// Maximum size of a single Extra Record. Again, this is mostly to make -/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should -/// be smaller. -#define EXTRA_SIZE_MAX (SIZE_MAX / 4) - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_ALLOC, - SEQ_INDEX_TOTAL_SIZE, - SEQ_INDEX_UNCOMPRESSED_SIZE, - SEQ_EXTRA_PREPARE, - SEQ_EXTRA_ALLOC, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA_ALLOC, - SEQ_EXTRA_DATA_COPY, - SEQ_EXTRA_DUMMY_ALLOC, - SEQ_EXTRA_DUMMY_ID, - SEQ_EXTRA_DUMMY_SIZE, - SEQ_EXTRA_DUMMY_COPY, - } sequence; - - /// Number of "things" left to be parsed. If we hit end of input - /// when this isn't zero, we have corrupt Metadata Block. - size_t todo_count; - - /// Position in variable-length integers - size_t pos; - - /// Temporary variable needed to decode variables whose type - /// is size_t instead of lzma_vli. - lzma_vli tmp; - - /// Pointer to target structure to hold the parsed results. - lzma_metadata *metadata; - - /// The Index Record we currently are parsing - lzma_index *index_current; - - /// Number of Records in Index - size_t index_count; - - /// Sum of Total Size fields in the Index - lzma_vli index_total_size; - - /// Sum of Uncompressed Size fields in the Index - lzma_vli index_uncompressed_size; - - /// True if Extra is present. - bool has_extra; - - /// True if we have been requested to store the Extra to *metadata. - bool want_extra; - - /// Pointer to the end of the Extra Record list. - lzma_extra *extra_tail; - - /// Dummy Extra Record used when only verifying integrity of Extra - /// (not storing it to RAM). - lzma_extra extra_dummy; - - /// Block decoder - lzma_next_coder block_decoder; - - /// buffer[buffer_pos] is the next byte to process. - size_t buffer_pos; - - /// buffer[buffer_size] is the first byte to not process. - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is read before - /// it is parsed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -/// Reads a variable-length integer to coder->num. -#define read_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_decode( \ - &num, &coder->pos, \ - coder->buffer, &coder->buffer_pos, \ - coder->buffer_size); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder, lzma_allocator *allocator) -{ - while (coder->buffer_pos < coder->buffer_size) - switch (coder->sequence) { - case SEQ_FLAGS: - // Reserved bits must be unset. - if (coder->buffer[coder->buffer_pos] & 0x70) - return LZMA_HEADER_ERROR; - - coder->todo_count = 0; - - // If Size of Header Metadata is present, prepare the - // variable for variable-length integer decoding. Otherwise - // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the - // field isn't present. - if (coder->buffer[coder->buffer_pos] & 0x01) { - coder->metadata->header_metadata_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x02) { - coder->metadata->total_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x04) { - coder->metadata->uncompressed_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x08) { - // Setting index_count to 1 is just to indicate that - // Index is present. The real size is parsed later. - coder->index_count = 1; - ++coder->todo_count; - } - - coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80) - != 0; - - ++coder->buffer_pos; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata->header_metadata_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->header_metadata_size); - - if (coder->metadata->header_metadata_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->total_size); - - if (coder->metadata->total_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->uncompressed_size); - --coder->todo_count; - } - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - coder->sequence = SEQ_EXTRA_PREPARE; - break; - } - - read_vli(coder->tmp); - - // Index must not be empty nor far too big (wouldn't fit - // in RAM). - if (coder->tmp == 0 || coder->tmp - >= SIZE_MAX / sizeof(lzma_index)) - return LZMA_DATA_ERROR; - - coder->index_count = (size_t)(coder->tmp); - coder->tmp = 0; - - coder->sequence = SEQ_INDEX_ALLOC; - break; - - case SEQ_INDEX_ALLOC: { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = 0; - i->uncompressed_size = 0; - i->next = NULL; - - if (coder->metadata->index == NULL) - coder->metadata->index = i; - else - coder->index_current->next = i; - - coder->index_current = i; - - coder->sequence = SEQ_INDEX_TOTAL_SIZE; - } - - // Fall through - - case SEQ_INDEX_TOTAL_SIZE: { - read_vli(coder->index_current->total_size); - - coder->index_total_size += coder->index_current->total_size; - if (coder->index_total_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - // No Block can have Total Size of zero bytes. - if (coder->index_current->total_size == 0) - return LZMA_DATA_ERROR; - - if (--coder->index_count == 0) { - // If Total Size is present, it must match the sum - // of Total Sizes in Index. - if (coder->metadata->total_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->total_size - != coder->index_total_size) - return LZMA_DATA_ERROR; - - coder->index_current = coder->metadata->index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE; - } else { - coder->sequence = SEQ_INDEX_ALLOC; - } - - break; - } - - case SEQ_INDEX_UNCOMPRESSED_SIZE: { - read_vli(coder->index_current->uncompressed_size); - - coder->index_uncompressed_size - += coder->index_current->uncompressed_size; - if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->uncompressed_size - != coder->index_uncompressed_size) - return LZMA_DATA_ERROR; - - --coder->todo_count; - coder->sequence = SEQ_EXTRA_PREPARE; - } - - break; - } - - case SEQ_EXTRA_PREPARE: - assert(coder->todo_count == 0); - - // If we get here, we have at least one byte of input left. - // If "Extra is present" flag is unset in Metadata Flags, - // it means that there is some garbage and we return an error. - if (!coder->has_extra) - return LZMA_DATA_ERROR; - - if (!coder->want_extra) { - coder->extra_tail = &coder->extra_dummy; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - break; - } - - coder->sequence = SEQ_EXTRA_ALLOC; - - // Fall through - - case SEQ_EXTRA_ALLOC: { - lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator); - if (e == NULL) - return LZMA_MEM_ERROR; - - e->next = NULL; - e->id = 0; - e->size = 0; - e->data = NULL; - - if (coder->metadata->extra == NULL) - coder->metadata->extra = e; - else - coder->extra_tail->next = e; - - coder->extra_tail = e; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_ID; - } - - // Fall through - - case SEQ_EXTRA_ID: - case SEQ_EXTRA_DUMMY_ID: - read_vli(coder->extra_tail->id); - - if (coder->extra_tail->id == 0) { - coder->extra_tail->size = 0; - coder->extra_tail->data = NULL; - coder->todo_count = 0; - --coder->sequence; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_SIZE: - case SEQ_EXTRA_DUMMY_SIZE: - read_vli(coder->tmp); - - if (coder->tmp == 0) { - // We have no Data in the Extra Record. Don't - // allocate any memory for it. Go back to - // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC. - coder->tmp = 0; - coder->sequence -= 2; - coder->todo_count = 0; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_DATA_ALLOC: { - if (coder->tmp > EXTRA_SIZE_MAX) - return LZMA_DATA_ERROR; - - coder->extra_tail->size = (size_t)(coder->tmp); - coder->tmp = 0; - - // We reserve space for the trailing '\0' too. - uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1, - allocator); - if (d == NULL) - return LZMA_MEM_ERROR; - - coder->extra_tail->data = d; - coder->sequence = SEQ_EXTRA_DATA_COPY; - } - - // Fall through - - case SEQ_EXTRA_DATA_COPY: - bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, - coder->extra_tail->data, &coder->pos, - (size_t)(coder->extra_tail->size)); - - if ((size_t)(coder->extra_tail->size) == coder->pos) { - coder->extra_tail->data[coder->pos] = '\0'; - coder->pos = 0; - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_ALLOC; - } - - break; - - case SEQ_EXTRA_DUMMY_ALLOC: - // Not really alloc, just initialize the dummy entry. - coder->extra_dummy = (lzma_extra){ - .next = NULL, - .id = 0, - .size = 0, - .data = NULL, - }; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_DUMMY_ID; - break; - - case SEQ_EXTRA_DUMMY_COPY: { - // Simply skip as many bytes as indicated by Extra Record Size. - // We don't check lzma_extra_size_max because we don't - // allocate any memory to hold the data. - const size_t in_avail = coder->buffer_size - coder->buffer_pos; - const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp); - coder->buffer_pos += skip; - coder->tmp -= skip; - - if (coder->tmp == 0) { - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - } - - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - bool end_was_reached = false; - - while (true) { - // Fill the buffer if it is empty. - if (coder->buffer_pos == coder->buffer_size) { - coder->buffer_pos = 0; - coder->buffer_size = 0; - - const lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, coder->buffer, - &coder->buffer_size, LZMA_BUFFER_SIZE, - LZMA_RUN); - - switch (ret) { - case LZMA_OK: - // Return immediatelly if we got no new data. - if (coder->buffer_size == 0) - return LZMA_OK; - - break; - - case LZMA_STREAM_END: - end_was_reached = true; - break; - - default: - return ret; - } - } - - // Process coder->buffer. - const lzma_ret ret = process(coder, allocator); - if (ret != LZMA_OK) - return ret; - - // On success, process() eats all the input. - assert(coder->buffer_pos == coder->buffer_size); - - if (end_was_reached) { - // Check that the sequence is not in the - // middle of anything. - if (coder->todo_count != 0) - return LZMA_DATA_ERROR; - - // If Size of Header Metadata Block was not - // present, we use zero as its size instead - // of LZMA_VLI_VALUE_UNKNOWN. - if (coder->metadata->header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - coder->metadata->header_metadata_size = 0; - - return LZMA_STREAM_END; - } - } -} - - -static void -metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_decode; - next->end = &metadata_decoder_end; - next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - } - - metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->total_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->index = NULL; - metadata->extra = NULL; - - next->coder->sequence = SEQ_FLAGS; - next->coder->todo_count = 1; - next->coder->pos = 0; - next->coder->tmp = 0; - next->coder->metadata = metadata; - next->coder->index_current = NULL; - next->coder->index_count = 0; - next->coder->index_total_size = 0; - next->coder->index_uncompressed_size = 0; - next->coder->want_extra = want_extra; - next->coder->extra_tail = NULL; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - return lzma_block_decoder_init( - &next->coder->block_decoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - lzma_next_coder_init(metadata_decoder_init, next, allocator, - options, metadata, want_extra); -} - - -extern LZMA_API lzma_ret -lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options, - lzma_metadata *metadata, lzma_bool want_extra) -{ - lzma_next_strm_init(strm, lzma_metadata_decoder_init, - options, metadata, want_extra); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/metadata_decoder.h b/src/liblzma/common/metadata_decoder.h deleted file mode 100644 index 1fba2179..00000000 --- a/src/liblzma/common/metadata_decoder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.h -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_METADATA_DECODER_H -#define LZMA_METADATA_DECODER_H - -#include "common.h" - - -extern lzma_ret lzma_metadata_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra); - -#endif diff --git a/src/liblzma/common/metadata_encoder.c b/src/liblzma/common/metadata_encoder.c deleted file mode 100644 index 9f4a15b0..00000000 --- a/src/liblzma/common/metadata_encoder.c +++ /dev/null @@ -1,435 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_encoder.c -/// \brief Encodes metadata to be stored into Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_encoder.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_TOTAL, - SEQ_INDEX_UNCOMPRESSED, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA, - SEQ_END, - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// Local copy of the Metadata structure. Note that we keep - /// a copy only of the main structure, not Index or Extra Records. - lzma_metadata metadata; - - /// Number of Records in Index - size_t index_count; - - /// Index Record currently being processed - const lzma_index *index_current; - - /// Block encoder for the encoded Metadata - lzma_next_coder block_encoder; - - /// True once everything except compression has been done. - bool end_was_reached; - - /// buffer[buffer_pos] is the first byte that needs to be compressed. - size_t buffer_pos; - - /// buffer[buffer_size] is the next position where a byte will be - /// written by process(). - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is written before - /// it is compressed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -#define write_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_encode(num, &coder->pos, 1, \ - coder->buffer, &coder->buffer_size, \ - LZMA_BUFFER_SIZE); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder) -{ - while (coder->buffer_size < LZMA_BUFFER_SIZE) - switch (coder->sequence) { - case SEQ_FLAGS: - coder->buffer[coder->buffer_size] = 0; - - if (coder->metadata.header_metadata_size != 0) - coder->buffer[coder->buffer_size] |= 0x01; - - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x02; - - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x04; - - if (coder->index_count > 0) - coder->buffer[coder->buffer_size] |= 0x08; - - if (coder->metadata.extra != NULL) - coder->buffer[coder->buffer_size] |= 0x80; - - ++coder->buffer_size; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata.header_metadata_size != 0) - write_vli(coder->metadata.header_metadata_size); - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.total_size); - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.uncompressed_size); - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - break; - } - - write_vli(coder->index_count); - coder->sequence = SEQ_INDEX_TOTAL; - break; - - case SEQ_INDEX_TOTAL: - write_vli(coder->index_current->total_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - coder->index_current = coder->metadata.index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED; - } - - break; - - case SEQ_INDEX_UNCOMPRESSED: - write_vli(coder->index_current->uncompressed_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current != NULL) - break; - - if (coder->metadata.extra != NULL) { - coder->sequence = SEQ_EXTRA_ID; - break; - } - - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - - case SEQ_EXTRA_ID: { - const lzma_ret ret = lzma_vli_encode( - coder->metadata.extra->id, &coder->pos, 1, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->pos = 0; - - // Handle the special ID 0. - if (coder->metadata.extra->id == 0) { - coder->metadata.extra - = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - - } else { - coder->sequence = SEQ_EXTRA_SIZE; - } - - break; - - default: - return ret; - } - - break; - } - - case SEQ_EXTRA_SIZE: - if (coder->metadata.extra->size >= (lzma_vli)(SIZE_MAX)) - return LZMA_HEADER_ERROR; - - write_vli(coder->metadata.extra->size); - coder->sequence = SEQ_EXTRA_DATA; - break; - - case SEQ_EXTRA_DATA: - bufcpy(coder->metadata.extra->data, &coder->pos, - coder->metadata.extra->size, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - - if ((size_t)(coder->metadata.extra->size) == coder->pos) { - coder->metadata.extra = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->pos = 0; - coder->sequence = SEQ_EXTRA_ID; - } - - break; - - case SEQ_END: - // Everything is encoded. Let the compression code finish - // its work now. - return LZMA_STREAM_END; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in lzma_attribute((unused)), - size_t *restrict in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused)), uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action lzma_attribute((unused))) -{ - while (!coder->end_was_reached) { - // Flush coder->buffer if it isn't empty. - if (coder->buffer_size > 0) { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, - coder->buffer_size, - out, out_pos, out_size, LZMA_RUN); - if (coder->buffer_pos < coder->buffer_size - || ret != LZMA_OK) - return ret; - - coder->buffer_pos = 0; - coder->buffer_size = 0; - } - - const lzma_ret ret = process(coder); - - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->end_was_reached = true; - break; - - default: - return ret; - } - } - - // Finish - return coder->block_encoder.code(coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, coder->buffer_size, - out, out_pos, out_size, LZMA_FINISH); -} - - -static void -metadata_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_encode; - next->end = &metadata_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } - - next->coder->sequence = SEQ_FLAGS; - next->coder->pos = 0; - next->coder->metadata = *metadata; - next->coder->index_count = 0; - next->coder->index_current = metadata->index; - next->coder->end_was_reached = false; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - // Count and validate the Index Records. - { - const lzma_index *i = metadata->index; - while (i != NULL) { - if (i->total_size > LZMA_VLI_VALUE_MAX - || i->uncompressed_size - > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - ++next->coder->index_count; - i = i->next; - } - } - - // Initialize the Block encoder. - return lzma_block_encoder_init( - &next->coder->block_encoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - lzma_next_coder_init(metadata_encoder_init, next, allocator, - options, metadata); -} - - -extern LZMA_API lzma_ret -lzma_metadata_encoder(lzma_stream *strm, lzma_options_block *options, - const lzma_metadata *metadata) -{ - lzma_next_strm_init(strm, metadata_encoder_init, options, metadata); - - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} - - -extern LZMA_API lzma_vli -lzma_metadata_size(const lzma_metadata *metadata) -{ - lzma_vli size = 1; // Metadata Flags - - // Validate header_metadata_size, total_size, and uncompressed_size. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || metadata->total_size == 0 - || !lzma_vli_is_valid(metadata->uncompressed_size)) - return 0; - - // Add the sizes of these three fields. - if (metadata->header_metadata_size != 0) - size += lzma_vli_size(metadata->header_metadata_size); - - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->total_size); - - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->uncompressed_size); - - // Index - if (metadata->index != NULL) { - const lzma_index *i = metadata->index; - size_t count = 1; - - do { - const size_t x = lzma_vli_size(i->total_size); - const size_t y = lzma_vli_size(i->uncompressed_size); - if (x == 0 || y == 0) - return 0; - - size += x + y; - ++count; - i = i->next; - - } while (i != NULL); - - const size_t tmp = lzma_vli_size(count); - if (tmp == 0) - return 0; - - size += tmp; - } - - // Extra - { - const lzma_extra *e = metadata->extra; - while (e != NULL) { - // Validate the numbers. - if (e->id > LZMA_VLI_VALUE_MAX - || e->size >= (lzma_vli)(SIZE_MAX)) - return 0; - - // Add the sizes. - size += lzma_vli_size(e->id); - if (e->id != 0) { - size += lzma_vli_size(e->size); - size += e->size; - } - - e = e->next; - } - } - - return size; -} diff --git a/src/liblzma/common/raw_common.c b/src/liblzma/common/raw_common.c index d45bf4de..35252fc2 100644 --- a/src/liblzma/common/raw_common.c +++ b/src/liblzma/common/raw_common.c @@ -20,122 +20,81 @@ #include "raw_common.h" -/// \brief Prepares the filter chain -/// -/// Prepares the filter chain by setting uncompressed sizes for each filter, -/// and adding implicit Subblock filter when needed. -/// -/// \return true if error occurred, false on success. -/// -static bool -prepare(lzma_vli *id, lzma_vli *uncompressed_size, bool allow_implicit) +static lzma_ret +validate_options(const lzma_options_filter *options, size_t *count) { - bool needs_end_of_input = false; - - switch (id[0]) { - case LZMA_FILTER_COPY: - case LZMA_FILTER_X86: - case LZMA_FILTER_POWERPC: - case LZMA_FILTER_IA64: - case LZMA_FILTER_ARM: - case LZMA_FILTER_ARMTHUMB: - case LZMA_FILTER_SPARC: - case LZMA_FILTER_DELTA: - uncompressed_size[1] = uncompressed_size[0]; - needs_end_of_input = true; - break; - - case LZMA_FILTER_SUBBLOCK: - case LZMA_FILTER_LZMA: - // These change the size of the data unpredictably. - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - break; - - case LZMA_FILTER_SUBBLOCK_HELPER: - uncompressed_size[1] = uncompressed_size[0]; - break; - - default: - // Unknown filter. - return true; - } + if (options == NULL) + return LZMA_PROG_ERROR; - // Is this the last filter in the chain? - if (id[1] == LZMA_VLI_VALUE_UNKNOWN) { - if (needs_end_of_input && allow_implicit - && uncompressed_size[0] - == LZMA_VLI_VALUE_UNKNOWN) { - // Add implicit Subblock filter. - id[1] = LZMA_FILTER_SUBBLOCK; - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - id[2] = LZMA_VLI_VALUE_UNKNOWN; + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t change = 0; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i; + for (i = 0; options[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { + switch (options[i].id) { + // Not #ifdeffing these for simplicity. + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + case LZMA_FILTER_DELTA: + // These don't change the size of the data and cannot + // be used as the last filter in the chain. + last_ok = false; + break; + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + last_ok = true; + ++change; + break; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + last_ok = true; + break; +#endif + + default: + return LZMA_HEADER_ERROR; } - - return false; } - return prepare(id + 1, uncompressed_size + 1, allow_implicit); + // There must be 1-4 filters and the last filter must be usable as + // the last filter in the chain. + if (i == 0 || i > 4 || !last_ok) + return LZMA_HEADER_ERROR; + + // At maximum of two non-last filters are allowed to change the + // size of the data. + if (change > 2) + return LZMA_HEADER_ERROR; + + *count = i; + return LZMA_OK; } extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder) + bool is_encoder) { - if (options == NULL || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - // Count the number of filters in the chain. - size_t count = 0; - while (options[count].id != LZMA_VLI_VALUE_UNKNOWN) - ++count; - - // Allocate enough space from the stack for IDs and uncompressed - // sizes. We need two extra: possible implicit Subblock and end - // of array indicator. - lzma_vli ids[count + 2]; - lzma_vli uncompressed_sizes[count + 2]; - bool using_implicit = false; - - uncompressed_sizes[0] = uncompressed_size; - - if (count == 0) { - if (!allow_implicit) - return LZMA_PROG_ERROR; - - count = 1; - using_implicit = true; - - // Special case: no filters were specified, so an implicit - // Copy or Subblock filter is used. - if (uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - ids[0] = LZMA_FILTER_SUBBLOCK; - else - ids[0] = LZMA_FILTER_COPY; + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_options(options, &count)); - ids[1] = LZMA_VLI_VALUE_UNKNOWN; - - } else { - // Prepare the ids[] and uncompressed_sizes[]. - for (size_t i = 0; i < count; ++i) - ids[i] = options[i].id; - - ids[count] = LZMA_VLI_VALUE_UNKNOWN; - - if (prepare(ids, uncompressed_sizes, allow_implicit)) - return LZMA_HEADER_ERROR; - - // Check if implicit Subblock filter was added. - if (ids[count] != LZMA_VLI_VALUE_UNKNOWN) { - assert(ids[count] == LZMA_FILTER_SUBBLOCK); - ++count; - using_implicit = true; - } - } - - // Set the filter functions, and copy uncompressed sizes and options. + // Set the filter functions and copy the options pointer. lzma_filter_info filters[count + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { @@ -144,29 +103,20 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // of the uncompressed data. const size_t j = count - i - 1; - filters[j].init = get_function(ids[i]); + filters[j].init = get_function(options[i].id); if (filters[j].init == NULL) return LZMA_HEADER_ERROR; filters[j].options = options[i].options; - filters[j].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[0].options = NULL; - } else { for (size_t i = 0; i < count; ++i) { - filters[i].init = get_function(ids[i]); + filters[i].init = get_function(options[i].id); if (filters[i].init == NULL) return LZMA_HEADER_ERROR; filters[i].options = options[i].options; - filters[i].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[count - 1].options = NULL; } // Terminate the array. diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/common/raw_common.h index 172223cb..0a27f3dc 100644 --- a/src/liblzma/common/raw_common.h +++ b/src/liblzma/common/raw_common.h @@ -23,9 +23,8 @@ #include "common.h" extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, - lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + lzma_allocator *allocator, const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder); + bool is_encoder); #endif diff --git a/src/liblzma/common/raw_decoder.c b/src/liblzma/common/raw_decoder.c index 03f1d847..4fb7111c 100644 --- a/src/liblzma/common/raw_decoder.c +++ b/src/liblzma/common/raw_decoder.c @@ -18,24 +18,17 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_decoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_decoder.h" #include "subblock_decoder_helper.h" #include "delta_decoder.h" #include "lzma_decoder.h" -#include "metadata_decoder.h" static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_decoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_decoder_init; @@ -93,12 +86,10 @@ get_function(lzma_vli id) extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) + const lzma_options_filter *options) { const lzma_ret ret = lzma_raw_coder_init(next, allocator, - options, uncompressed_size, &get_function, - allow_implicit, false); + options, &get_function, false); if (ret != LZMA_OK) lzma_next_coder_end(next, allocator); @@ -108,8 +99,7 @@ lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options) { return_if_error(lzma_strm_init(strm)); @@ -117,8 +107,7 @@ lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, - strm->allocator, options, uncompressed_size, - &get_function, allow_implicit, false); + strm->allocator, options, &get_function, false); if (ret != LZMA_OK) lzma_end(strm); diff --git a/src/liblzma/common/raw_decoder.h b/src/liblzma/common/raw_decoder.h index 9d48074b..c0e626a8 100644 --- a/src/liblzma/common/raw_decoder.h +++ b/src/liblzma/common/raw_decoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/raw_encoder.c b/src/liblzma/common/raw_encoder.c index fb12862b..9b8cbfae 100644 --- a/src/liblzma/common/raw_encoder.c +++ b/src/liblzma/common/raw_encoder.c @@ -18,28 +18,16 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_encoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_encoder.h" #include "delta_encoder.h" #include "lzma_encoder.h" -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_encoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_encoder_init; @@ -90,91 +78,34 @@ get_function(lzma_vli id) } -static lzma_ret -raw_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options) { - // Check that our amount of input stays in proper limits. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (action == LZMA_FINISH) { - if (coder->uncompressed_size != in_size - *in_pos) - return LZMA_PROG_ERROR; - } else { - if (coder->uncompressed_size < in_size - *in_pos) - return LZMA_PROG_ERROR; - } - } - - const size_t in_start = *in_pos; + const lzma_ret ret = lzma_raw_coder_init(next, allocator, + options, &get_function, true); - const lzma_ret ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, action); - - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= *in_pos - in_start; + if (ret != LZMA_OK) + lzma_next_coder_end(next, allocator); return ret; } -static void -raw_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &raw_encode; - next->end = &raw_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - next->coder->uncompressed_size = uncompressed_size; - - // lzma_raw_coder_init() accesses get_function() via function pointer, - // because this way linker doesn't statically link both encoder and - // decoder functions if user needs only encoder or decoder. - return lzma_raw_coder_init(&next->coder->next, allocator, - options, uncompressed_size, - &get_function, allow_implicit, true); -} - - -extern lzma_ret -lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - lzma_next_coder_init(raw_encoder_init, next, allocator, - options, uncompressed_size, allow_implicit); -} - - extern LZMA_API lzma_ret -lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options) { - lzma_next_strm_init(strm, raw_encoder_init, - options, uncompressed_size, allow_implicit); + return_if_error(lzma_strm_init(strm)); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FINISH] = true; - return LZMA_OK; + const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, + strm->allocator, options, &get_function, true); + + if (ret != LZMA_OK) + lzma_end(strm); + + return ret; } diff --git a/src/liblzma/common/raw_encoder.h b/src/liblzma/common/raw_encoder.h index b0aab61a..4e148489 100644 --- a/src/liblzma/common/raw_encoder.h +++ b/src/liblzma/common/raw_encoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/stream_common.h b/src/liblzma/common/stream_common.h index b2f37f37..4f83fc58 100644 --- a/src/liblzma/common/stream_common.h +++ b/src/liblzma/common/stream_common.h @@ -22,6 +22,9 @@ #include "common.h" +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + extern const uint8_t lzma_header_magic[6]; extern const uint8_t lzma_footer_magic[2]; diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 56de3d9f..1bf7f1f8 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -18,281 +18,148 @@ /////////////////////////////////////////////////////////////////////////////// #include "stream_common.h" +#include "stream_decoder.h" #include "check.h" #include "stream_flags_decoder.h" #include "block_decoder.h" -#include "metadata_decoder.h" struct lzma_coder_s { enum { - SEQ_STREAM_HEADER_CODE, - SEQ_BLOCK_HEADER_INIT, - SEQ_BLOCK_HEADER_CODE, - SEQ_METADATA_CODE, - SEQ_DATA_CODE, - SEQ_STREAM_TAIL_INIT, - SEQ_STREAM_TAIL_CODE, + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, } sequence; - /// Position in variable-length integers and in some other things. - size_t pos; - /// Block or Metadata decoder. This takes little memory and the same /// data structure can be used to decode every Block Header, so it's /// a good idea to have a separate lzma_next_coder structure for it. lzma_next_coder block_decoder; - /// Block Header decoder; this is separate - lzma_next_coder block_header_decoder; - + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. lzma_options_block block_options; - /// Information about the sizes of the Blocks - lzma_info *info; - - /// Current Block in *info - lzma_info_iter iter; - - /// Number of bytes not yet processed from Data Blocks in the Stream. - /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is - /// decremented while decoding and verified to match the reality. - lzma_vli total_left; - - /// Like uncompressed_left above but for uncompressed data from - /// Data Blocks. - lzma_vli uncompressed_left; - /// Stream Flags from Stream Header - lzma_stream_flags header_flags; - - /// Stream Flags from Stream tail - lzma_stream_flags tail_flags; + lzma_stream_flags stream_flags; - /// Decoder for Stream Header and Stream tail. This takes very - /// little memory and the same data structure can be used for - /// both Header and tail, so it's a good idea to have a separate - /// lzma_next_coder structure for it. - lzma_next_coder flags_decoder; + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; - /// Temporary destination for the decoded Metadata. - lzma_metadata metadata; + /// Write position in buffer[] + size_t buffer_pos; - /// Pointer to application-supplied pointer where to store the list - /// of Extra Records from the Header Metadata Block. - lzma_extra **header_extra; - - /// Same as above but Footer Metadata Block - lzma_extra **footer_extra; + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; static lzma_ret -metadata_init(lzma_coder *coder, lzma_allocator *allocator) -{ - assert(coder->metadata.index == NULL); - assert(coder->metadata.extra == NULL); - - // Single-Block Streams don't have Metadata Blocks. - if (!coder->header_flags.is_multi) - return LZMA_DATA_ERROR; - - coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN; - - // Limit the Uncompressed Size of a Metadata Block. This is to - // prevent security issues where input file would have very huge - // Metadata. - // - // FIXME: Hardcoded constant is ugly. Maybe we should provide - // some way to specify this from the application. - coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23; - - lzma_info_size size_type; - bool want_extra; - - // If we haven't decoded any Data Blocks yet, this is Header - // Metadata Block. - if (lzma_info_index_count_get(coder->info) == 0) { - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - size_type = LZMA_INFO_HEADER_METADATA; - want_extra = coder->header_extra != NULL; - } else { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - size_type = LZMA_INFO_FOOTER_METADATA; - want_extra = coder->footer_extra != NULL; - } - - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.total_size = lzma_info_size_get( - coder->info, size_type); - - coder->sequence = SEQ_METADATA_CODE; - - return lzma_metadata_decoder_init(&coder->block_decoder, allocator, - &coder->block_options, &coder->metadata, want_extra); -} - - -static lzma_ret -data_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(lzma_info_iter_set( - &coder->iter, LZMA_VLI_VALUE_UNKNOWN, - coder->block_options.uncompressed_size)); - - coder->block_options.total_size = coder->iter.total_size; - coder->block_options.uncompressed_size = coder->iter.uncompressed_size; - coder->block_options.total_limit = coder->total_left; - coder->block_options.uncompressed_limit = coder->uncompressed_left; - - if (coder->header_flags.is_multi) { - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - } else { - coder->block_options.has_uncompressed_size_in_footer - = coder->iter.uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - } - - coder->sequence = SEQ_DATA_CODE; - - return lzma_block_decoder_init(&coder->block_decoder, allocator, - &coder->block_options); -} - - -static lzma_ret stream_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_DATA_CODE)) + || coder->sequence == SEQ_BLOCK)) switch (coder->sequence) { - case SEQ_STREAM_HEADER_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->buffer_pos = 0; + + // Decode the Stream Header. + return_if_error(lzma_stream_header_decode( + &coder->stream_flags, coder->buffer)); - coder->sequence = SEQ_BLOCK_HEADER_INIT; + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_UNSUPPORTED_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; // Detect if the Check type is supported and give appropriate // warning if it isn't. We don't warn every time a new Block // is started. - lzma_check tmp; - if (lzma_check_init(&tmp, coder->header_flags.check)) + if (!lzma_available_checks[coder->block_options.check]) return LZMA_UNSUPPORTED_CHECK; break; } - case SEQ_BLOCK_HEADER_INIT: { - coder->block_options.check = coder->header_flags.check; - coder->block_options.has_crc32 = coder->header_flags.has_crc32; + case SEQ_BLOCK_HEADER: { + if (coder->buffer_pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } - for (size_t i = 0; - i < ARRAY_SIZE(coder->block_options.filters); - ++i) { - lzma_free(coder->block_options.filters[i].options, - allocator); - coder->block_options.filters[i].options = NULL; + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); } - return_if_error(lzma_block_header_decoder_init( - &coder->block_header_decoder, allocator, - &coder->block_options)); - - coder->sequence = SEQ_BLOCK_HEADER_CODE; - } - - // Fall through - - case SEQ_BLOCK_HEADER_CODE: { - lzma_ret ret = coder->block_header_decoder.code( - coder->block_header_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the Block Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + coder->block_options.header_size); - if (coder->block_options.is_metadata) - ret = metadata_init(coder, allocator); - else - ret = data_init(coder, allocator); - - if (ret != LZMA_OK) - return ret; - - break; - } + // Return if we didn't get the whole Block Header yet. + if (coder->buffer_pos < coder->block_options.header_size) + return LZMA_OK; - case SEQ_METADATA_CODE: { - lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + coder->buffer_pos = 0; - const bool is_header_metadata = lzma_info_index_count_get( - coder->info) == 0; + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_options_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + coder->block_options.filters = filters; - if (is_header_metadata) { - if (coder->header_extra != NULL) { - *coder->header_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, - coder->block_options.total_size) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_BLOCK_HEADER_INIT; - } else { - if (coder->footer_extra != NULL) { - *coder->footer_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Initialize the Block decoder. + const lzma_ret ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, &coder->block_options); - coder->sequence = SEQ_STREAM_TAIL_INIT; - } + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); - assert(coder->metadata.extra == NULL); + coder->block_options.filters = NULL; - ret = lzma_info_metadata_set(coder->info, allocator, - &coder->metadata, is_header_metadata, true); - if (ret != LZMA_OK) + // Check if Block enocoder initialization succeeded. Don't + // warn about unsupported check anymore since we did it + // earlier if it was needed. + if (ret != LZMA_OK && ret != LZMA_UNSUPPORTED_CHECK) return ret; - // Intialize coder->total_size and coder->uncompressed_size - // from Header Metadata. - if (is_header_metadata) { - coder->total_left = lzma_info_size_get( - coder->info, LZMA_INFO_TOTAL); - coder->uncompressed_left = lzma_info_size_get( - coder->info, LZMA_INFO_UNCOMPRESSED); - } - + coder->sequence = SEQ_BLOCK; break; } - case SEQ_DATA_CODE: { + case SEQ_BLOCK: { lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, @@ -301,62 +168,59 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - ret = lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size); - if (ret != LZMA_OK) - return ret; - - // These won't overflow since lzma_info_iter_set() succeeded. - if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN) - coder->total_left -= coder->block_options.total_size; - if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_left -= coder->block_options - .uncompressed_size; + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_total_size_get( + &coder->block_options), + coder->block_options.uncompressed_size)); - if (!coder->header_flags.is_multi) { - ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_STREAM_TAIL_INIT; - break; - } - - coder->sequence = SEQ_BLOCK_HEADER_INIT; + coder->sequence = SEQ_BLOCK_HEADER; break; } - case SEQ_STREAM_TAIL_INIT: { - lzma_ret ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - ret = lzma_stream_tail_decoder_init(&coder->flags_decoder, - allocator, &coder->tail_flags); - if (ret != LZMA_OK) + case SEQ_INDEX: { + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) return ret; - coder->sequence = SEQ_STREAM_TAIL_CODE; + coder->sequence = SEQ_STREAM_FOOTER; + break; } - // Fall through + case SEQ_STREAM_FOOTER: + // Copy the Stream Footer to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); - case SEQ_STREAM_TAIL_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + // Return if we didn't get the whole Stream Footer yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; - if (!lzma_stream_flags_is_equal( - coder->header_flags, coder->tail_flags)) + // Decode the Stream Footer. + lzma_stream_flags footer_flags; + return_if_error(lzma_stream_footer_decode( + &footer_flags, coder->buffer)); + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + if (!lzma_stream_flags_equal(&coder->stream_flags, + &footer_flags)) return LZMA_DATA_ERROR; return LZMA_STREAM_END; - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -367,23 +231,15 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, static void stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { - for (size_t i = 0; i < ARRAY_SIZE(coder->block_options.filters); ++i) - lzma_free(coder->block_options.filters[i].options, allocator); - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_next_coder_end(&coder->block_header_decoder, allocator); - lzma_next_coder_end(&coder->flags_decoder, allocator); - lzma_info_free(coder->info, allocator); - lzma_index_free(coder->metadata.index, allocator); - lzma_extra_free(coder->metadata.extra, allocator); + lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_ret -stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -394,73 +250,35 @@ stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->end = &stream_decoder_end; next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - next->coder->flags_decoder = LZMA_NEXT_CODER_INIT; - next->coder->metadata.index = NULL; - next->coder->metadata.extra = NULL; - } else { - for (size_t i = 0; i < ARRAY_SIZE( - next->coder->block_options.filters); ++i) - lzma_free(next->coder->block_options - .filters[i].options, allocator); - - lzma_index_free(next->coder->metadata.index, allocator); - next->coder->metadata.index = NULL; - - lzma_extra_free(next->coder->metadata.extra, allocator); - next->coder->metadata.extra = NULL; + next->coder->index_hash = NULL; } - for (size_t i = 0; i < ARRAY_SIZE(next->coder->block_options.filters); - ++i) - next->coder->block_options.filters[i].options = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) + // Initialize the Index hash used to verify the Index. + next->coder->index_hash = lzma_index_hash_init( + next->coder->index_hash, allocator); + if (next->coder->index_hash == NULL) return LZMA_MEM_ERROR; - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - // Initialize Stream Header decoder. - return_if_error(lzma_stream_header_decoder_init( - &next->coder->flags_decoder, allocator, - &next->coder->header_flags)); - - // Reset the *foo_extra pointers to NULL. This way the caller knows - // if there were no Extra Records. (We don't support appending - // Records to Extra list.) - if (header != NULL) - *header = NULL; - if (footer != NULL) - *footer = NULL; - - // Reset some variables. - next->coder->sequence = SEQ_STREAM_HEADER_CODE; - next->coder->pos = 0; - next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->header_extra = header; - next->coder->footer_extra = footer; + // Reset the rest of the variables. + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.filters = NULL; + next->coder->buffer_pos = 0; return LZMA_OK; } extern lzma_ret -lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - lzma_next_coder_init( - stream_decoder_init, next, allocator, header, footer); + lzma_next_coder_init0(stream_decoder_init, next, allocator); } extern LZMA_API lzma_ret -lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, stream_decoder_init, header, footer); + lzma_next_strm_init0(strm, stream_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/easy_common.h b/src/liblzma/common/stream_decoder.h index d864cce5..dcda387d 100644 --- a/src/liblzma/common/easy_common.h +++ b/src/liblzma/common/stream_decoder.h @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions +/// \file stream_decoder.h +/// \brief Decodes .lzma Streams // // Copyright (C) 2008 Lasse Collin // @@ -17,12 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H -#ifndef LZMA_EASY_COMMON_H -#define LZMA_EASY_COMMON_H +#include "common.h" -extern bool lzma_easy_set_filters( - lzma_options_filter *filters, uint32_t level); +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator); #endif diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c new file mode 100644 index 00000000..767b8014 --- /dev/null +++ b/src/liblzma/common/stream_encoder.c @@ -0,0 +1,282 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .lzma Streams +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "stream_encoder.h" +#include "block_encoder.h" +#include "index_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_options_block block_options; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + // Prepare the Block options. + coder->block_options.compressed_size = LZMA_VLI_VALUE_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, + out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediatelly since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder except if this is the first + // Block, because stream_encoder_init() has already + // initialized it. + if (lzma_index_count(coder->index) != 0) + return_if_error(block_encoder_init(coder, allocator)); + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli total_size = lzma_block_total_size_get( + &coder->block_options); + assert(total_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + total_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_encoder, allocator); + lzma_next_coder_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + if (filters == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.check = check; + next->coder->block_options.filters = (lzma_options_filter *)(filters); + + // Initialize the Index + next->coder->index = lzma_index_init(next->coder->index, allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, next->coder->buffer)); + + next->coder->buffer_pos = 0; + next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect if the given + // filters are supported by the current liblzma build, and the + // application doesn't need to keep the filters structure available + // unless it is going to use LZMA_FULL_FLUSH. + return block_encoder_init(next->coder, allocator); +} + + +extern lzma_ret +lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_coder_init(stream_encoder_init, next, allocator, + filters, check); +} + + +extern LZMA_API lzma_ret +lzma_stream_encoder(lzma_stream *strm, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_strm_init(strm, stream_encoder_init, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/metadata_encoder.h b/src/liblzma/common/stream_encoder.h index 20357fe6..3ce29561 100644 --- a/src/liblzma/common/metadata_encoder.h +++ b/src/liblzma/common/stream_encoder.h @@ -1,9 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file metadata_encoder.h -/// \brief Encodes metadata to be stored into Metadata Blocks +/// \file stream_encoder.h +/// \brief Encodes .lzma Streams // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,14 +17,14 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_METADATA_ENCODER_H -#define LZMA_METADATA_ENCODER_H +#ifndef LZMA_STREAM_ENCODER_H +#define LZMA_STREAM_ENCODER_H #include "common.h" -extern lzma_ret lzma_metadata_encoder_init( +extern lzma_ret lzma_stream_encoder_init( lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata); + const lzma_options_filter *filters, lzma_check_type check); #endif diff --git a/src/liblzma/common/stream_encoder_multi.c b/src/liblzma/common/stream_encoder_multi.c deleted file mode 100644 index 403980cf..00000000 --- a/src/liblzma/common/stream_encoder_multi.c +++ /dev/null @@ -1,445 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_multi.c -/// \brief Encodes Multi-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "stream_encoder_multi.h" -#include "block_encoder.h" -#include "metadata_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_STREAM_HEADER_COPY, - SEQ_HEADER_METADATA_INIT, - SEQ_HEADER_METADATA_COPY, - SEQ_HEADER_METADATA_CODE, - SEQ_DATA_INIT, - SEQ_DATA_COPY, - SEQ_DATA_CODE, - SEQ_FOOTER_METADATA_INIT, - SEQ_FOOTER_METADATA_COPY, - SEQ_FOOTER_METADATA_CODE, - SEQ_STREAM_FOOTER_INIT, - SEQ_STREAM_FOOTER_COPY, - } sequence; - - /// Block or Metadata encoder - lzma_next_coder next; - - /// Options for the Block encoder - lzma_options_block block_options; - - /// Information about the Stream - lzma_info *info; - - /// Information about the current Data Block - lzma_info_iter iter; - - /// Pointer to user-supplied options structure. We don't write to - /// it, only read instructions from the application, thus this is - /// const even though the user-supplied pointer from - /// lzma_options_filter structure isn't. - const lzma_options_stream *stream_options; - - /// Stream Header or Stream Footer in encoded form - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -typedef enum { - BLOCK_HEADER_METADATA, - BLOCK_DATA, - BLOCK_FOOTER_METADATA, -} block_type; - - -static lzma_ret -block_header_encode(lzma_coder *coder, lzma_allocator *allocator, - lzma_vli uncompressed_size, block_type type) -{ - assert(coder->header == NULL); - - coder->block_options = (lzma_options_block){ - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .has_eopm = uncompressed_size == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = type != BLOCK_DATA, - .has_uncompressed_size_in_footer = false, - .has_backward_size = type == BLOCK_FOOTER_METADATA, - .handle_padding = false, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - }; - - if (type == BLOCK_DATA) { - memcpy(coder->block_options.filters, - coder->stream_options->filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment = coder->iter.stream_offset; - } else { - memcpy(coder->block_options.filters, - coder->stream_options->metadata_filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment - = lzma_info_metadata_alignment_get( - coder->info, type == BLOCK_HEADER_METADATA); - } - - return_if_error(lzma_block_header_size(&coder->block_options)); - - coder->header_size = coder->block_options.header_size; - coder->header = lzma_alloc(coder->header_size, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_block_header_encode( - coder->header, &coder->block_options)); - - coder->header_pos = 0; - return LZMA_OK; -} - - -static lzma_ret -metadata_encoder_init(lzma_coder *coder, lzma_allocator *allocator, - lzma_metadata *metadata, block_type type) -{ - return_if_error(lzma_info_metadata_set(coder->info, allocator, - metadata, type == BLOCK_HEADER_METADATA, false)); - - const lzma_vli metadata_size = lzma_metadata_size(metadata); - if (metadata_size == 0) - return LZMA_PROG_ERROR; - - return_if_error(block_header_encode( - coder, allocator, metadata_size, type)); - - return lzma_metadata_encoder_init(&coder->next, allocator, - &coder->block_options, metadata); -} - - -static lzma_ret -data_encoder_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(block_header_encode(coder, allocator, - LZMA_VLI_VALUE_UNKNOWN, BLOCK_DATA)); - - return lzma_block_encoder_init(&coder->next, allocator, - &coder->block_options); -} - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // Main loop - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - case SEQ_STREAM_FOOTER_COPY: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - if (coder->header_pos < coder->header_size) - return LZMA_OK; - - lzma_free(coder->header, allocator); - coder->header = NULL; - - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - // Write Header Metadata Block if we have Extra for it - // or known Uncompressed Size. - if (coder->stream_options->header != NULL - || coder->stream_options - ->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_HEADER_METADATA_INIT; - } else { - // Mark that Header Metadata Block doesn't - // exist. - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, 0) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_DATA_INIT; - } - break; - - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - ++coder->sequence; - break; - - case SEQ_STREAM_FOOTER_COPY: - return LZMA_STREAM_END; - - default: - assert(0); - } - - break; - - case SEQ_HEADER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size = LZMA_VLI_VALUE_UNKNOWN, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = coder->stream_options - ->uncompressed_size, - .index = NULL, - // Metadata encoder doesn't modify this, but since - // the lzma_extra structure is used also when decoding - // Metadata, the pointer is not const, and we need - // to cast the constness away in the encoder. - .extra = (lzma_extra *)(coder->stream_options->header), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_HEADER_METADATA)); - - coder->sequence = SEQ_HEADER_METADATA_COPY; - break; - } - - case SEQ_FOOTER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size - = lzma_info_size_get(coder->info, - LZMA_INFO_HEADER_METADATA), - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .index = lzma_info_index_get(coder->info, false), - .extra = (lzma_extra *)(coder->stream_options->footer), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_FOOTER_METADATA)); - - coder->sequence = SEQ_FOOTER_METADATA_COPY; - break; - } - - case SEQ_HEADER_METADATA_CODE: - case SEQ_FOOTER_METADATA_CODE: { - size_t dummy = 0; - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, NULL, &dummy, 0, - out, out_pos, out_size, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; - - return_if_error(lzma_info_size_set(coder->info, - coder->sequence == SEQ_HEADER_METADATA_CODE - ? LZMA_INFO_HEADER_METADATA - : LZMA_INFO_FOOTER_METADATA, - coder->block_options.total_size)); - - ++coder->sequence; - break; - } - - case SEQ_DATA_INIT: { - // Don't create an empty Block unless it would be - // the only Data Block. - if (*in_pos == in_size) { - // If we are LZMA_SYNC_FLUSHing or LZMA_FULL_FLUSHing, - // return LZMA_STREAM_END since there's nothing to - // flush. - if (action != LZMA_FINISH) - return action == LZMA_RUN - ? LZMA_OK : LZMA_STREAM_END; - - if (lzma_info_index_count_get(coder->info) != 0) { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->sequence = SEQ_FOOTER_METADATA_INIT; - break; - } - } - - return_if_error(data_encoder_init(coder, allocator)); - - coder->sequence = SEQ_DATA_COPY; - break; - } - - case SEQ_DATA_CODE: { - static const lzma_action convert[4] = { - LZMA_RUN, - LZMA_SYNC_FLUSH, - LZMA_FINISH, - LZMA_FINISH, - }; - - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, in, in_pos, in_size, - out, out_pos, out_size, convert[action]); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - return_if_error(lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size)); - - coder->sequence = SEQ_DATA_INIT; - break; - } - - case SEQ_STREAM_FOOTER_INIT: { - assert(coder->header == NULL); - - lzma_stream_flags flags = { - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .is_multi = true, - }; - - coder->header = lzma_alloc(LZMA_STREAM_TAIL_SIZE, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_tail_encode( - coder->header, &flags)); - - coder->header_size = LZMA_STREAM_TAIL_SIZE; - coder->header_pos = 0; - - coder->sequence = SEQ_STREAM_FOOTER_COPY; - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_info_free(coder->info, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - } else { - lzma_free(next->coder->header, allocator); - } - - next->coder->header = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) - return LZMA_MEM_ERROR; - - next->coder->sequence = SEQ_STREAM_HEADER_COPY; - next->coder->stream_options = options; - - // Encode Stream Flags - { - lzma_stream_flags flags = { - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = true, - }; - - next->coder->header = lzma_alloc(LZMA_STREAM_HEADER_SIZE, - allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode( - next->coder->header, &flags)); - - next->coder->header_pos = 0; - next->coder->header_size = LZMA_STREAM_HEADER_SIZE; - } - - if (lzma_info_size_set(next->coder->info, LZMA_INFO_STREAM_START, - options->alignment) != LZMA_OK) - return LZMA_PROG_ERROR; - - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, next, allocator, options); -} - - -extern LZMA_API lzma_ret -lzma_stream_encoder_multi( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_encoder_single.c b/src/liblzma/common/stream_encoder_single.c deleted file mode 100644 index d93e7169..00000000 --- a/src/liblzma/common/stream_encoder_single.c +++ /dev/null @@ -1,219 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_single.c -/// \brief Encodes Single-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - /// Uncompressed Size, Backward Size, and Footer Magic Bytes are - /// part of Block in the file format specification, but it is simpler - /// to implement them as part of Stream. - enum { - SEQ_HEADERS, - SEQ_DATA, - SEQ_FOOTER, - } sequence; - - /// Block encoder - lzma_next_coder block_encoder; - - /// Block encoder options - lzma_options_block block_options; - - /// Stream Flags; we need to have these in this struct so that we - /// can encode Stream Footer. - lzma_stream_flags stream_flags; - - /// Stream Header + Block Header, or Stream Footer - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, size_t *out_pos, - size_t out_size, lzma_action action) -{ - // NOTE: We don't check if the amount of input is in the proper limits, - // because the Block encoder will do it for us. - - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_HEADERS: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - if (coder->header_pos == coder->header_size) { - coder->header_pos = 0; - coder->sequence = SEQ_DATA; - } - - break; - - case SEQ_DATA: { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - in, in_pos, in_size, - out, out_pos, out_size, action); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - assert(*in_pos == in_size); - - assert(coder->header_size >= LZMA_STREAM_TAIL_SIZE); - coder->header_size = LZMA_STREAM_TAIL_SIZE; - - return_if_error(lzma_stream_tail_encode( - coder->header, &coder->stream_flags)); - - coder->sequence = SEQ_FOOTER; - break; - } - - case SEQ_FOOTER: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - return coder->header_pos == coder->header_size - ? LZMA_STREAM_END : LZMA_OK; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } else { - // Free the previous buffer, if any. - lzma_free(next->coder->header, allocator); - } - - // At this point, next->coder->header points to nothing useful. - next->coder->header = NULL; - - // Basic initializations - next->coder->sequence = SEQ_HEADERS; - next->coder->header_pos = 0; - - // Initialize next->coder->stream_flags. - next->coder->stream_flags = (lzma_stream_flags){ - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = false, - }; - - // Initialize next->coder->block_options. - next->coder->block_options = (lzma_options_block){ - .check = options->check, - .has_crc32 = options->has_crc32, - .has_eopm = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = false, - .has_uncompressed_size_in_footer = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .has_backward_size = true, - .handle_padding = false, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = options->uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - .alignment = options->alignment + LZMA_STREAM_HEADER_SIZE, - }; - memcpy(next->coder->block_options.filters, options->filters, - sizeof(options->filters)); - - return_if_error(lzma_block_header_size(&next->coder->block_options)); - - // Encode Stream Flags and Block Header into next->coder->header. - next->coder->header_size = (size_t)(LZMA_STREAM_HEADER_SIZE) - + next->coder->block_options.header_size; - next->coder->header = lzma_alloc(next->coder->header_size, allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode(next->coder->header, - &next->coder->stream_flags)); - - return_if_error(lzma_block_header_encode( - next->coder->header + LZMA_STREAM_HEADER_SIZE, - &next->coder->block_options)); - - // Initialize the Block encoder. - return lzma_block_encoder_init(&next->coder->block_encoder, allocator, - &next->coder->block_options); -} - - -/* -extern lzma_ret -lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, allocator, options); -} -*/ - - -extern LZMA_API lzma_ret -lzma_stream_encoder_single( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c index d9c847ac..0270875a 100644 --- a/src/liblzma/common/stream_flags_decoder.c +++ b/src/liblzma/common/stream_flags_decoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_decoder.c -/// \brief Decodes Stream Header and tail from .lzma files +/// \brief Decodes Stream Header and Stream Footer from .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -17,242 +17,72 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "stream_flags_decoder.h" #include "stream_common.h" -//////////// -// Common // -//////////// - -struct lzma_coder_s { - enum { - SEQ_HEADER_MAGIC, - SEQ_HEADER_FLAGS, - SEQ_HEADER_CRC32, - - SEQ_FOOTER_FLAGS, - SEQ_FOOTER_MAGIC, - } sequence; - - size_t pos; - uint32_t crc32; - - lzma_stream_flags *options; -}; - - -static void -stream_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - static bool -stream_flags_decode(const uint8_t *in, lzma_stream_flags *options) +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) { // Reserved bits must be unset. - if (*in & 0xE0) + if (in[0] != 0x00 || (in[1] & 0xF0)) return true; - options->check = *in & 0x07; - options->has_crc32 = (*in & 0x08) != 0; - options->is_multi = (*in & 0x10) != 0; + options->check = in[1] & 0x0F; return false; } -//////////// -// Header // -//////////// - -static lzma_ret -stream_header_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_HEADER_MAGIC: - if (in[*in_pos] != lzma_header_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_header_magic)) { - coder->pos = 0; - coder->sequence = SEQ_HEADER_FLAGS; - } - - break; - - case SEQ_HEADER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, 0); - - ++*in_pos; - coder->sequence = SEQ_HEADER_CRC32; - break; - - case SEQ_HEADER_CRC32: - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == 4) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_header_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_HEADER_MAGIC; - next->coder->pos = 0; - next->coder->crc32 = 0; - next->coder->options = options; - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init( - stream_header_decoder_init, next, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_header_decoder(lzma_stream *strm, lzma_stream_flags *options) -{ - lzma_next_strm_init(strm, stream_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} - - -////////// -// Tail // -////////// - -static lzma_ret -stream_tail_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FOOTER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - ++*in_pos; - coder->sequence = SEQ_FOOTER_MAGIC; - break; - - case SEQ_FOOTER_MAGIC: - if (in[*in_pos] != lzma_footer_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_footer_magic)) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) { - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_tail_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_FOOTER_FLAGS; - next->coder->pos = 0; - next->coder->options = options; + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_HEADER_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_equal can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_VALUE_UNKNOWN; return LZMA_OK; } -extern lzma_ret -lzma_stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init2(next, allocator, stream_header_decoder_init, - stream_tail_decoder_init, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_tail_decoder(lzma_stream *strm, lzma_stream_flags *options) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) { - lzma_next_strm_init2(strm, stream_header_decoder_init, - stream_tail_decoder_init, strm->allocator, options); - - strm->internal->supported_actions[LZMA_RUN] = true; + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_HEADER_ERROR; + + // Backward Size + options->backward_size = integer_read_32(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; return LZMA_OK; } diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c index 55468580..4efbb6f4 100644 --- a/src/liblzma/common/stream_flags_encoder.c +++ b/src/liblzma/common/stream_flags_encoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_encoder.c -/// \brief Encodes Stream Header and Footer for .lzma files +/// \brief Encodes Stream Header and Stream Footer for .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -21,55 +21,69 @@ static bool -stream_flags_encode(uint8_t *flags_byte, const lzma_stream_flags *options) +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) { - // Check type if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return true; - *flags_byte = options->check; - - // Usage of CRC32 in Block Headers - if (options->has_crc32) - *flags_byte |= 0x08; - - // Single- or Multi-Block - if (options->is_multi) - *flags_byte |= 0x10; + out[0] = 0x00; + out[1] = options->check; return false; } extern LZMA_API lzma_ret -lzma_stream_header_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + // Magic memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); // Stream Flags - if (stream_flags_encode(out + sizeof(lzma_header_magic), options)) - return LZMA_PROG_ERROR;; + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; // CRC32 of the Stream Header - const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), 1, 0); + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); - for (size_t i = 0; i < 4; ++i) - out[sizeof(lzma_header_magic) + 1 + i] = crc >> (i * 8); + integer_write_32(out + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE, crc); return LZMA_OK; } extern LZMA_API lzma_ret -lzma_stream_tail_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + // Backward Size + if (options->backward_size < LZMA_BACKWARD_SIZE_MIN + || options->backward_size > LZMA_BACKWARD_SIZE_MAX + || (options->backward_size & 3)) + return LZMA_PROG_ERROR; + + integer_write_32(out + 4, options->backward_size / 4 - 1); + // Stream Flags - if (stream_flags_encode(out, options)) + if (stream_flags_encode(options, out + 2 * 4)) return LZMA_PROG_ERROR; + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + integer_write_32(out, crc); + // Magic - memcpy(out + 1, lzma_footer_magic, sizeof(lzma_footer_magic)); + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); return LZMA_OK; } diff --git a/src/liblzma/common/easy_single.c b/src/liblzma/common/stream_flags_equal.c index e2fa4e13..db22567f 100644 --- a/src/liblzma/common/easy_single.c +++ b/src/liblzma/common/stream_flags_equal.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_single.c -/// \brief Easy Single-Block Stream encoder initialization +/// \file stream_flags_equal.c +/// \brief Compare Stream Header and Stream Footer // // Copyright (C) 2008 Lasse Collin // @@ -17,21 +17,20 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "easy_common.h" +#include "common.h" -extern LZMA_API lzma_ret -lzma_easy_encoder_single(lzma_stream *strm, lzma_easy_level level) +extern LZMA_API lzma_bool +lzma_stream_flags_equal(const lzma_stream_flags *a, lzma_stream_flags *b) { - lzma_options_stream opt_stream = { - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - }; + if (a->check != b->check) + return false; - if (lzma_easy_set_filters(opt_stream.filters, level)) - return LZMA_HEADER_ERROR; + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_VALUE_UNKNOWN + && b->backward_size != LZMA_VLI_VALUE_UNKNOWN + && a->backward_size != b->backward_size) + return false; - return lzma_stream_encoder_single(strm, &opt_stream); + return true; } diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c index 2b89c1a7..faff6ccb 100644 --- a/src/liblzma/common/vli_decoder.c +++ b/src/liblzma/common/vli_decoder.c @@ -3,7 +3,7 @@ /// \file vli_decoder.c /// \brief Decodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,45 +25,53 @@ lzma_vli_decode(lzma_vli *restrict vli, size_t *restrict vli_pos, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { - if (*vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 - || (*vli >> (7 * *vli_pos)) != 0) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*in_pos >= in_size) - return LZMA_BUF_ERROR; + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; - if (*vli_pos == 0) { - *vli_pos = 1; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *in_pos >= in_size + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; - if (in[*in_pos] <= 0x7F) { - // Single-byte integer - *vli = in[*in_pos]; - ++*in_pos; - return LZMA_STREAM_END; - } - - *vli = in[*in_pos] & 0x7F; - ++*in_pos; - } - - while (*in_pos < in_size) { - // Read in the next byte. + do { + // Read the next byte. *vli |= (lzma_vli)(in[*in_pos] & 0x7F) << (*vli_pos * 7); ++*vli_pos; // Check if this is the last byte of a multibyte integer. - if (in[*in_pos] & 0x80) { - ++*in_pos; - return LZMA_STREAM_END; + if (!(in[*in_pos] & 0x80)) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (in[(*in_pos)++] == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; } - // Limit variable-length representation to nine bytes. - if (*vli_pos == 9) + ++*in_pos; + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) return LZMA_DATA_ERROR; - // Increment input position only when the byte was accepted. - ++*in_pos; - } + } while (*in_pos < in_size); - return LZMA_OK; + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; } diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c index 1ecdb0d2..c48d6474 100644 --- a/src/liblzma/common/vli_encoder.c +++ b/src/liblzma/common/vli_encoder.c @@ -3,7 +3,7 @@ /// \file vli_encoder.c /// \brief Encodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -21,61 +21,54 @@ extern LZMA_API lzma_ret -lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, size_t vli_size, +lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { - if (vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 || vli_size > 9 - || (vli != 0 && (vli >> (7 * *vli_pos)) == 0)) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*out_pos >= out_size) - return LZMA_BUF_ERROR; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *out_pos >= out_size + || vli > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; - if (*vli_pos == 0) { - *vli_pos = 1; + // Write the non-last bytes in a loop. + while ((vli >> (*vli_pos * 7)) >= 0x80) { + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)) | 0x80; - if (vli <= 0x7F && *vli_pos >= vli_size) { - // Single-byte integer - out[(*out_pos)++] = vli; - return LZMA_STREAM_END; - } + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); - // First byte of a multibyte integer - out[(*out_pos)++] = (vli & 0x7F) | 0x80; + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; } - while (*out_pos < out_size) { - const lzma_vli b = vli >> (7 * *vli_pos); - ++*vli_pos; - - if (b <= 0x7F && *vli_pos >= vli_size) { - // Last byte of a multibyte integer - out[(*out_pos)++] = (b & 0xFF) | 0x80; - return LZMA_STREAM_END; - } + // Write the last byte. + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)); + ++*out_pos; + ++*vli_pos; - // Middle byte of a multibyte integer - out[(*out_pos)++] = b & 0x7F; - } + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; - // vli is not yet completely written out. - return LZMA_OK; } -extern LZMA_API size_t +extern LZMA_API uint32_t lzma_vli_size(lzma_vli vli) { if (vli > LZMA_VLI_VALUE_MAX) return 0; - size_t i = 0; + uint32_t i = 0; do { vli >>= 7; ++i; } while (vli != 0); - assert(i <= 9); + assert(i <= LZMA_VLI_BYTES_MAX); return i; } diff --git a/src/liblzma/common/vli_reverse_decoder.c b/src/liblzma/common/vli_reverse_decoder.c deleted file mode 100644 index 68ca6a42..00000000 --- a/src/liblzma/common/vli_reverse_decoder.c +++ /dev/null @@ -1,55 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file vli_reverse_decoder.c -/// \brief Decodes variable-length integers starting at end of the buffer -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API lzma_ret -lzma_vli_reverse_decode(lzma_vli *vli, const uint8_t *in, size_t *in_size) -{ - if (*in_size == 0) - return LZMA_BUF_ERROR; - - size_t i = *in_size - 1; - *vli = in[i] & 0x7F; - - if (!(in[i] & 0x80)) { - *in_size = i; - return LZMA_OK; - } - - const size_t end = *in_size > LZMA_VLI_BYTES_MAX - ? *in_size - LZMA_VLI_BYTES_MAX : 0; - - do { - if (i-- == end) { - if (*in_size < LZMA_VLI_BYTES_MAX) - return LZMA_BUF_ERROR; - - return LZMA_DATA_ERROR; - } - - *vli <<= 7; - *vli = in[i] & 0x7F; - - } while (!(in[i] & 0x80)); - - *in_size = i; - return LZMA_OK; -} |