diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 20:46:52 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 20:46:52 +0200 |
commit | e114502b2bc371e4a45449832cb69be036360722 (patch) | |
tree | 449c41d0408f99926de202611091747f1fbe2f85 /src/liblzma | |
parent | Fixed the test that should have been fixed as part (diff) | |
download | xz-e114502b2bc371e4a45449832cb69be036360722.tar.xz |
Oh well, big messy commit again. Some highlights:
- Updated to the latest, probably final file format version.
- Command line tool reworked to not use threads anymore.
Threading will probably go into liblzma anyway.
- Memory usage limit is now about 30 % for uncompression
and about 90 % for compression.
- Progress indicator with --verbose
- Simplified --help and full --long-help
- Upgraded to the last LGPLv2.1+ getopt_long from gnulib.
- Some bug fixes
Diffstat (limited to 'src/liblzma')
-rw-r--r-- | src/liblzma/api/lzma/block.h | 47 | ||||
-rw-r--r-- | src/liblzma/api/lzma/filter.h | 8 | ||||
-rw-r--r-- | src/liblzma/api/lzma/index.h | 20 | ||||
-rw-r--r-- | src/liblzma/api/lzma/index_hash.h | 4 | ||||
-rw-r--r-- | src/liblzma/common/block_decoder.c | 59 | ||||
-rw-r--r-- | src/liblzma/common/block_encoder.c | 41 | ||||
-rw-r--r-- | src/liblzma/common/block_header_decoder.c | 31 | ||||
-rw-r--r-- | src/liblzma/common/block_header_encoder.c | 69 | ||||
-rw-r--r-- | src/liblzma/common/block_util.c | 45 | ||||
-rw-r--r-- | src/liblzma/common/common.h | 8 | ||||
-rw-r--r-- | src/liblzma/common/filter_common.c | 4 | ||||
-rw-r--r-- | src/liblzma/common/index.c | 259 | ||||
-rw-r--r-- | src/liblzma/common/index.h | 33 | ||||
-rw-r--r-- | src/liblzma/common/index_decoder.c | 31 | ||||
-rw-r--r-- | src/liblzma/common/index_encoder.c | 16 | ||||
-rw-r--r-- | src/liblzma/common/index_hash.c | 68 | ||||
-rw-r--r-- | src/liblzma/common/stream_decoder.c | 9 | ||||
-rw-r--r-- | src/liblzma/common/stream_encoder.c | 6 | ||||
-rw-r--r-- | src/liblzma/lz/lz_decoder.h | 4 | ||||
-rw-r--r-- | src/liblzma/subblock/subblock_decoder.c | 3 |
20 files changed, 407 insertions, 358 deletions
diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h index eb3768e2..06c1633c 100644 --- a/src/liblzma/api/lzma/block.h +++ b/src/liblzma/api/lzma/block.h @@ -1,6 +1,6 @@ /** * \file lzma/block.h - * \brief .lzma Block handling + * \brief .xz Block handling * * \author Copyright (C) 1999-2006 Igor Pavlov * \author Copyright (C) 2007 Lasse Collin @@ -131,11 +131,10 @@ typedef struct { * * \note Because of the array is terminated with * .id = LZMA_VLI_UNKNOWN, the actual array must - * have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block + * have LZMA_FILTERS_MAX + 1 members or the Block * Header decoder will overflow the buffer. */ lzma_filter *filters; -# define LZMA_BLOCK_FILTERS_MAX 4 } lzma_block; @@ -148,6 +147,8 @@ typedef struct { * The size can be calculated from the first byte of a Block using this macro. * Note that if the first byte is 0x00, it indicates beginning of Index; use * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. */ #define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) @@ -211,38 +212,50 @@ extern lzma_ret lzma_block_header_decode(lzma_block *options, /** - * \brief Sets Compressed Size according to Total Size + * \brief Sets Compressed Size according to Unpadded Size * - * Block Header stores Compressed Size, but Index has Total Size. If the + * Block Header stores Compressed Size, but Index has Unpadded Size. If the * application has already parsed the Index and is now decoding Blocks, - * it can calculate Compressed Size from Total Size. This function does + * it can calculate Compressed Size from Unpadded Size. This function does * exactly that with error checking, so application doesn't need to check, * for example, if the value in Index is too small to contain even the - * Block Header. Note that you need to call this function after decoding + * Block Header. Note that you need to call this function _after_ decoding * the Block Header field. * * \return - LZMA_OK: options->compressed_size was set successfully. - * - LZMA_DATA_ERROR: total_size is too small compared to + * - LZMA_DATA_ERROR: unpadded_size is too small compared to * options->header_size and lzma_check_sizes[options->check]. * - LZMA_PROG_ERROR: Some values are invalid. For example, - * total_size and options->header_size must be multiples - * of four, total_size must be at least 12, and + * options->header_size must be a multiple of four, and * options->header_size between 8 and 1024 inclusive. */ -extern lzma_ret lzma_block_total_size_set( - lzma_block *options, lzma_vli total_size) +extern lzma_ret lzma_block_compressed_size( + lzma_block *options, lzma_vli unpadded_size) lzma_attr_warn_unused_result; /** - * \brief Calculates Total Size + * \brief Calculates Unpadded Size * - * This function can be useful after decoding a Block to get Total Size + * This function can be useful after decoding a Block to get Unpadded Size * that is stored in Index. * - * \return Total Size on success, or zero on error. + * \return Unpadded Size on success, or zero on error. + */ +extern lzma_vli lzma_block_unpadded_size(const lzma_block *options) + lzma_attr_pure; + + +/** + * \brief Calculates the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. */ -extern lzma_vli lzma_block_total_size_get(const lzma_block *options) +extern lzma_vli lzma_block_total_size(const lzma_block *options) lzma_attr_pure; @@ -255,8 +268,6 @@ extern lzma_vli lzma_block_total_size_get(const lzma_block *options) * \return - LZMA_OK: All good, continue with lzma_code(). * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR - * - LZMA_DATA_ERROR: Limits (total_limit and uncompressed_limit) - * have been reached already. * - LZMA_UNSUPPORTED_CHECK: options->check specfies a Check * that is not supported by this buid of liblzma. Initializing * the encoder failed. diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h index 53e5737e..b4fb02a7 100644 --- a/src/liblzma/api/lzma/filter.h +++ b/src/liblzma/api/lzma/filter.h @@ -55,6 +55,14 @@ typedef struct { /** + * \brief Maximum number of filters in a chain + * + * FIXME desc + */ +#define LZMA_FILTERS_MAX 4 + + +/** * \brief Test if the given Filter ID is supported for encoding * * Returns true if the give Filter ID is supported for encoding by this diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h index 522969d4..d6072614 100644 --- a/src/liblzma/api/lzma/index.h +++ b/src/liblzma/api/lzma/index.h @@ -32,12 +32,24 @@ typedef struct lzma_index_s lzma_index; */ typedef struct { /** - * Total Size of a Block. + * \brief Total encoded size of a Block including Block Padding + * + * This value is useful if you need to know the actual size of the + * Block that the Block decoder will read. */ lzma_vli total_size; /** - * Uncompressed Size of a Block + * \brief Encoded size of a Block excluding Block Padding + * + * This value is stored in the Index. When doing random-access + * reading, you should give this value to the Block decoder along + * with uncompressed_size. + */ + lzma_vli unpadded_size; + + /** + * \brief Uncompressed Size of a Block */ lzma_vli uncompressed_size; @@ -80,7 +92,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); * \brief Add a new Record to an Index * * \param index Pointer to a lzma_index structure - * \param total_size Total Size of a Block + * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block, or * LZMA_VLI_UNKNOWN to indicate padding. * @@ -92,7 +104,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); * - LZMA_PROG_ERROR */ extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) + lzma_vli unpadded_size, lzma_vli uncompressed_size) lzma_attr_warn_unused_result; diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h index 58fc8061..001e6b5c 100644 --- a/src/liblzma/api/lzma/index_hash.h +++ b/src/liblzma/api/lzma/index_hash.h @@ -57,7 +57,7 @@ extern void lzma_index_hash_end( * \brief Add a new Record to an Index hash * * \param index Pointer to a lzma_index_hash structure - * \param total_size Total Size of a Block + * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block * * \return - LZMA_OK @@ -67,7 +67,7 @@ extern void lzma_index_hash_end( * used when lzma_index_hash_decode() has already been used. */ extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash, - lzma_vli total_size, lzma_vli uncompressed_size) + lzma_vli unpadded_size, lzma_vli uncompressed_size) lzma_attr_warn_unused_result; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index f9101c7d..2bfe0b92 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -33,13 +33,13 @@ struct lzma_coder_s { lzma_next_coder next; /// Decoding options; we also write Compressed Size and Uncompressed - /// Size back to this structure when the encoding has been finished. + /// Size back to this structure when the decoding has been finished. lzma_block *options; - /// Compressed Size calculated while encoding + /// Compressed Size calculated while decoding lzma_vli compressed_size; - /// Uncompressed Size calculated while encoding + /// Uncompressed Size calculated while decoding lzma_vli uncompressed_size; /// Maximum allowed Compressed Size; this takes into account the @@ -110,6 +110,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + coder->sequence = SEQ_PADDING; } @@ -118,30 +131,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, case SEQ_PADDING: // Compressed Data is padded to a multiple of four bytes. while (coder->compressed_size & 3) { + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->options already, and won't be modified by + // us anymore. + ++coder->compressed_size; + if (*in_pos >= in_size) return LZMA_OK; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - - if (update_size(&coder->compressed_size, 1, - coder->compressed_limit)) - return LZMA_DATA_ERROR; } - // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values given to us. - if (!is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->options->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; @@ -193,14 +195,11 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, { lzma_next_coder_init(lzma_block_decoder_init, next, allocator); - // While lzma_block_total_size_get() is meant to calculate the Total - // Size, it also validates the options excluding the filters. - if (lzma_block_total_size_get(options) == 0) - return LZMA_PROG_ERROR; - - // options->check is used for array indexing so we need to know that - // it is in the valid range. - if ((unsigned)(options->check) > LZMA_CHECK_ID_MAX) + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(options) == 0 + || !lzma_vli_is_valid(options->uncompressed_size)) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -221,8 +220,8 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->uncompressed_size = 0; // If Compressed Size is not known, we calculate the maximum allowed - // value so that Total Size of the Block still is a valid VLI and - // a multiple of four. + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. next->coder->compressed_limit = options->compressed_size == LZMA_VLI_UNKNOWN ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 3c678f7d..6468cb44 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -27,8 +27,8 @@ /// take into account the headers etc. to determine the exact maximum size /// of the Compressed Data field, but the complexity would give us nothing /// useful. Instead, limit the size of Compressed Data so that even with -/// biggest possible Block Header and Check fields the total size of the -/// Block stays as valid VLI. This way we don't produce incorrect output +/// biggest possible Block Header and Check fields the total encoded size of +/// the Block stays as valid VLI. This way we don't produce incorrect output /// if someone will really try creating a Block of 8 EiB. /// /// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of @@ -41,9 +41,9 @@ struct lzma_coder_s { /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Encoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Encoding options; we also write Unpadded Size, Compressed Size, + /// and Uncompressed Size back to this structure when the encoding + /// has been finished. lzma_block *options; enum { @@ -58,8 +58,8 @@ struct lzma_coder_s { /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Position when writing out the Check field - size_t check_pos; + /// Position in Block Padding and the Check fields + size_t pos; /// Check of the uncompressed data lzma_check_state check; @@ -106,6 +106,11 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, assert(*in_pos == in_size); assert(action == LZMA_FINISH); + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + coder->sequence = SEQ_PADDING; } @@ -113,28 +118,21 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, case SEQ_PADDING: // Pad Compressed Data to a multiple of four bytes. - while (coder->compressed_size & 3) { + while ((coder->compressed_size + coder->pos) & 3) { if (*out_pos >= out_size) return LZMA_OK; out[*out_pos] = 0x00; ++*out_pos; - - // No need to use check for overflow here since we - // have already checked in SEQ_CODE that Compressed - // Size will stay in proper limits. - ++coder->compressed_size; + ++coder->pos; } - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->options->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); + + coder->pos = 0; coder->sequence = SEQ_CHECK; // Fall through @@ -144,11 +142,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, = lzma_check_size(coder->options->check); while (*out_pos < out_size) { - out[*out_pos] = coder->check.buffer.u8[ - coder->check_pos]; + out[*out_pos] = coder->check.buffer.u8[coder->pos]; ++*out_pos; - if (++coder->check_pos == check_size) + if (++coder->pos == check_size) return LZMA_STREAM_END; } @@ -199,9 +196,9 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->options = options; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; + next->coder->pos = 0; // Initialize the check - next->coder->check_pos = 0; lzma_check_init(&next->coder->check, options->check); // Initialize the requested filters. diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 3b8e9f36..8421ac37 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -27,7 +27,7 @@ free_properties(lzma_block *options, lzma_allocator *allocator) // Free allocated filter options. The last array member is not // touched after the initialization in the beginning of // lzma_block_header_decode(), so we don't need to touch that here. - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) { lzma_free(options->filters[i].options, allocator); options->filters[i].id = LZMA_VLI_UNKNOWN; options->filters[i].options = NULL; @@ -48,24 +48,19 @@ lzma_block_header_decode(lzma_block *options, // Initialize the filter options array. This way the caller can // safely free() the options even if an error occurs in this function. - for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { options->filters[i].id = LZMA_VLI_UNKNOWN; options->filters[i].options = NULL; } - size_t in_size = options->header_size; - - // Validate. The caller must have set options->header_size with - // lzma_block_header_size_decode() macro, so it is a programming error - // if these tests fail. - if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN - || in_size > LZMA_BLOCK_HEADER_SIZE_MAX - || (in_size & 3) - || lzma_block_header_size_decode(in[0]) != in_size) + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != options->header_size + || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return LZMA_PROG_ERROR; // Exclude the CRC32 field. - in_size -= 4; + const size_t in_size = options->header_size - 4; // Verify CRC32 if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) @@ -83,15 +78,9 @@ lzma_block_header_decode(lzma_block *options, return_if_error(lzma_vli_decode(&options->compressed_size, NULL, in, &in_pos, in_size)); - if (options->compressed_size > LZMA_VLI_MAX / 4 - 1) - return LZMA_DATA_ERROR; - - options->compressed_size = (options->compressed_size + 1) * 4; - - // Check that Total Size (that is, size of - // Block Header + Compressed Data + Check) is - // representable as a VLI. - if (lzma_block_total_size_get(options) == 0) + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(options) == 0) return LZMA_DATA_ERROR; } else { options->compressed_size = LZMA_VLI_UNKNOWN; diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 9326350b..b9980363 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -25,21 +25,20 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_block *options) { // Block Header Size + Block Flags + CRC32. - size_t size = 1 + 1 + 4; + uint32_t size = 1 + 1 + 4; // Compressed Size if (options->compressed_size != LZMA_VLI_UNKNOWN) { - if (options->compressed_size > LZMA_VLI_MAX / 4 - 1 - || options->compressed_size == 0 - || (options->compressed_size & 3)) + const uint32_t add = lzma_vli_size(options->compressed_size); + if (add == 0 || options->compressed_size == 0) return LZMA_PROG_ERROR; - size += lzma_vli_size(options->compressed_size / 4 - 1); + size += add; } // Uncompressed Size if (options->uncompressed_size != LZMA_VLI_UNKNOWN) { - const size_t add = lzma_vli_size(options->uncompressed_size); + const uint32_t add = lzma_vli_size(options->uncompressed_size); if (add == 0) return LZMA_PROG_ERROR; @@ -51,10 +50,9 @@ lzma_block_header_size(lzma_block *options) || options->filters[0].id == LZMA_VLI_UNKNOWN) return LZMA_PROG_ERROR; - for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; - ++i) { + for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 4) + if (i == LZMA_FILTERS_MAX) return LZMA_PROG_ERROR; uint32_t add; @@ -65,12 +63,13 @@ lzma_block_header_size(lzma_block *options) } // Pad to a multiple of four bytes. - options->header_size = (size + 3) & ~(size_t)(3); + options->header_size = (size + 3) & ~UINT32_C(3); - // NOTE: We don't verify that Total Size of the Block stays within - // limits. This is because it is possible that we are called with - // exaggerated values to reserve space for Block Header, and later - // called again with lower, real values. + // NOTE: We don't verify that the encoded size of the Block stays + // within limits. This is because it is possible that we are called + // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve + // space for Block Header, and later called again with lower, + // real values. return LZMA_OK; } @@ -79,9 +78,9 @@ lzma_block_header_size(lzma_block *options) extern LZMA_API lzma_ret lzma_block_header_encode(const lzma_block *options, uint8_t *out) { - if ((options->header_size & 3) - || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN - || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) + // Valdidate everything but filters. + if (lzma_block_unpadded_size(options) == 0 + || !lzma_vli_is_valid(options->uncompressed_size)) return LZMA_PROG_ERROR; // Indicate the size of the buffer _excluding_ the CRC32 field. @@ -90,32 +89,28 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out) // Store the Block Header Size. out[0] = out_size / 4; - // We write Block Flags a little later. + // We write Block Flags in pieces. + out[1] = 0x00; size_t out_pos = 2; // Compressed Size if (options->compressed_size != LZMA_VLI_UNKNOWN) { - // Compressed Size must be non-zero, fit into a 63-bit - // integer and be a multiple of four. Also the Total Size - // of the Block must fit into 63-bit integer. - if (options->compressed_size == 0 - || (options->compressed_size & 3) - || options->compressed_size - > LZMA_VLI_MAX - || lzma_block_total_size_get(options) == 0) - return LZMA_PROG_ERROR; - return_if_error(lzma_vli_encode( - options->compressed_size / 4 - 1, NULL, + options->compressed_size, NULL, out, &out_pos, out_size)); + + out[1] |= 0x40; } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_UNKNOWN) + if (options->uncompressed_size != LZMA_VLI_UNKNOWN) { return_if_error(lzma_vli_encode( options->uncompressed_size, NULL, out, &out_pos, out_size)); + out[1] |= 0x80; + } + // Filter Flags if (options->filters == NULL || options->filters[0].id == LZMA_VLI_UNKNOWN) @@ -124,24 +119,16 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out) size_t filter_count = 0; do { // There can be at maximum of four filters. - if (filter_count == 4) + if (filter_count == LZMA_FILTERS_MAX) return LZMA_PROG_ERROR; return_if_error(lzma_filter_flags_encode( options->filters + filter_count, out, &out_pos, out_size)); - } while (options->filters[++filter_count].id - != LZMA_VLI_UNKNOWN); - - // Block Flags - out[1] = filter_count - 1; + } while (options->filters[++filter_count].id != LZMA_VLI_UNKNOWN); - if (options->compressed_size != LZMA_VLI_UNKNOWN) - out[1] |= 0x40; - - if (options->uncompressed_size != LZMA_VLI_UNKNOWN) - out[1] |= 0x80; + out[1] |= filter_count - 1; // Padding memzero(out + out_pos, out_size - out_pos); diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c index 7b46ba32..66e1cad9 100644 --- a/src/liblzma/common/block_util.c +++ b/src/liblzma/common/block_util.c @@ -18,10 +18,11 @@ /////////////////////////////////////////////////////////////////////////////// #include "common.h" +#include "index.h" extern LZMA_API lzma_ret -lzma_block_total_size_set(lzma_block *options, lzma_vli total_size) +lzma_block_compressed_size(lzma_block *options, lzma_vli total_size) { // Validate. if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN @@ -45,29 +46,47 @@ lzma_block_total_size_set(lzma_block *options, lzma_vli total_size) extern LZMA_API lzma_vli -lzma_block_total_size_get(const lzma_block *options) +lzma_block_unpadded_size(const lzma_block *options) { - // Validate the values that we are interested in. + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX || (options->header_size & 3) - || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + || !lzma_vli_is_valid(options->compressed_size) + || options->compressed_size == 0 + || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return 0; // If Compressed Size is unknown, return that we cannot know - // Total Size either. + // size of the Block either. if (options->compressed_size == LZMA_VLI_UNKNOWN) return LZMA_VLI_UNKNOWN; - const lzma_vli total_size = options->compressed_size - + options->header_size - + lzma_check_size(options->check); + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = options->compressed_size + + options->header_size + + lzma_check_size(options->check); - // Validate the calculated Total Size. - if (options->compressed_size > LZMA_VLI_MAX - || (options->compressed_size & 3) - || total_size > LZMA_VLI_MAX) + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) return 0; - return total_size; + return unpadded_size; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size(const lzma_block *options) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(options); + + if (unpadded_size != 0 && unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; } diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 275cf05f..0ee8574c 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -66,10 +66,6 @@ | LZMA_CONCATENATED ) -/////////// -// Types // -/////////// - /// Type of encoder/decoder specific data; the actual structure is defined /// differently in different coders. typedef struct lzma_coder_s lzma_coder; @@ -187,10 +183,6 @@ struct lzma_internal_s { }; -/////////////// -// Functions // -/////////////// - /// Allocates memory extern void *lzma_alloc(size_t size, lzma_allocator *allocator) lzma_attribute((malloc)); diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c index 71ceeca0..03b6859a 100644 --- a/src/liblzma/common/filter_common.c +++ b/src/liblzma/common/filter_common.c @@ -164,7 +164,7 @@ validate_chain(const lzma_filter *filters, size_t *count) // There must be 1-4 filters. The last filter must be usable as // the last filter in the chain. At maximum of three filters are // allowed to change the size of the data. - if (i > LZMA_BLOCK_FILTERS_MAX || !last_ok || changes_size_count > 3) + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) return LZMA_OPTIONS_ERROR; *count = i; @@ -182,7 +182,7 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, return_if_error(validate_chain(options, &count)); // Set the filter functions and copy the options pointer. - lzma_filter_info filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { // The order of the filters is reversed in the diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index f965749f..1fe65650 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -20,24 +20,34 @@ #include "index.h" -/// Number of Records to allocate at once. +/// Number of Records to allocate at once in the unrolled list. #define INDEX_GROUP_SIZE 256 typedef struct lzma_index_group_s lzma_index_group; struct lzma_index_group_s { - /// Next group + /// Previous group lzma_index_group *prev; - /// Previous group + /// Next group lzma_index_group *next; /// Index of the last Record in this group size_t last; - /// Total Size fields as cumulative sum relative to the beginning - /// of the group. The total size of the group is total_sums[last]. - lzma_vli total_sums[INDEX_GROUP_SIZE]; + /// Unpadded Size fields as special cumulative sum relative to the + /// beginning of the group. It's special in sense that the previous + /// value is rounded up the next multiple of four with before + /// calculating the new value. The total encoded size of the Blocks + /// in the group is unpadded_sums[last] rounded up to the next + /// multiple of four. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored + /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total + /// encoded size of these Blocks is 184. + /// + /// This encoding is nice from point of view of lzma_index_locate(). + lzma_vli unpadded_sums[INDEX_GROUP_SIZE]; /// Uncompressed Size fields as cumulative sum relative to the /// beginning of the group. The uncompressed size of the group is @@ -56,19 +66,13 @@ struct lzma_index_s { /// Uncompressed size of the Stream lzma_vli uncompressed_size; - /// Number of non-padding records. This is needed by Index encoder. + /// Number of non-padding records. This is needed for Index encoder. lzma_vli count; /// Size of the List of Records field; this is updated every time /// a new non-padding Record is added. lzma_vli index_list_size; - /// This is zero if no Indexes have been combined with - /// lzma_index_cat(). With combined Indexes, this contains the sizes - /// of all but latest the Streams, including possible Stream Padding - /// fields. - lzma_vli padding_size; - /// First group of Records lzma_index_group *head; @@ -80,8 +84,8 @@ struct lzma_index_s { /// Group where the current read position is. lzma_index_group *group; - /// The most recently read record in *group - lzma_vli record; + /// The most recently read Record in *group + size_t record; /// Uncompressed offset of the beginning of *group relative /// to the beginning of the Stream @@ -102,6 +106,10 @@ struct lzma_index_s { /// Stream. This is needed when a new Index is concatenated /// to this lzma_index structure. lzma_vli index_list_size; + + /// Total size of all but the last Stream and all Stream + /// Padding fields. + lzma_vli streams_size; } old; }; @@ -136,12 +144,12 @@ lzma_index_init(lzma_index *i, lzma_allocator *allocator) i->uncompressed_size = 0; i->count = 0; i->index_list_size = 0; - i->padding_size = 0; i->head = NULL; i->tail = NULL; i->current.group = NULL; i->old.count = 0; i->old.index_list_size = 0; + i->old.streams_size = 0; return i; } @@ -195,12 +203,12 @@ lzma_index_file_size(const lzma_index *i) { // If multiple Streams are concatenated, the Stream Header, Index, // and Stream Footer fields of all but the last Stream are already - // included in padding_size. Thus, we need to calculate only the + // included in old.streams_size. Thus, we need to calculate only the // size of the last Index, not all Indexes. - return i->total_size + i->padding_size + return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size + index_size(i->count - i->old.count, i->index_list_size - i->old.index_list_size) - + LZMA_STREAM_HEADER_SIZE * 2; + + LZMA_STREAM_HEADER_SIZE; } @@ -219,10 +227,11 @@ lzma_index_padding_size(const lzma_index *i) } -/// Helper function for index_append() +/// Appends a new Record to the Index. If needed, this allocates a new +/// Record group. static lzma_ret index_append_real(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size, + lzma_vli unpadded_size, lzma_vli uncompressed_size, bool is_padding) { // Add the new record. @@ -237,7 +246,7 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, g->prev = i->tail; g->next = NULL; g->last = 0; - g->total_sums[0] = total_size; + g->unpadded_sums[0] = unpadded_size; g->uncompressed_sums[0] = uncompressed_size; g->paddings[0] = is_padding; @@ -252,9 +261,9 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } else { // i->tail has space left for at least one record. - i->tail->total_sums[i->tail->last + 1] - = i->tail->total_sums[i->tail->last] - + total_size; + i->tail->unpadded_sums[i->tail->last + 1] + = unpadded_size + vli_ceil4( + i->tail->unpadded_sums[i->tail->last]); i->tail->uncompressed_sums[i->tail->last + 1] = i->tail->uncompressed_sums[i->tail->last] + uncompressed_size; @@ -266,13 +275,14 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } -static lzma_ret -index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, - lzma_vli uncompressed_size, bool is_padding) +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) { - if (total_size > LZMA_VLI_MAX + if (unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; // This looks a bit ugly. We want to first validate that the Index // and Stream stay in valid limits after adding this Record. After @@ -280,65 +290,38 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, // slightly more correct to validate before allocating, YMMV). lzma_ret ret; - if (is_padding) { - assert(uncompressed_size == 0); + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); - // First update the info so we can validate it. - i->padding_size += total_size; - - if (i->padding_size > LZMA_VLI_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, true); - - // If something went wrong, undo the updated value. - if (ret != LZMA_OK) - i->padding_size -= total_size; + const lzma_vli total_size = vli_ceil4(unpadded_size); - } else { - // First update the overall info so we can validate it. - const lzma_vli index_list_size_add - = lzma_vli_size(total_size / 4 - 1) - + lzma_vli_size(uncompressed_size); - - i->total_size += total_size; - i->uncompressed_size += uncompressed_size; - ++i->count; - i->index_list_size += index_list_size_add; - - if (i->total_size > LZMA_VLI_MAX - || i->uncompressed_size > LZMA_VLI_MAX - || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, false); + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; - if (ret != LZMA_OK) { - // Something went wrong. Undo the updates. - i->total_size -= total_size; - i->uncompressed_size -= uncompressed_size; - --i->count; - i->index_list_size -= index_list_size_add; - } + if (i->total_size > LZMA_VLI_MAX + || i->uncompressed_size > LZMA_VLI_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, unpadded_size, + uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } return ret; } -extern LZMA_API lzma_ret -lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - return index_append(i, allocator, - total_size, uncompressed_size, false); -} - - /// Initialize i->current to point to the first Record. static bool init_current(lzma_index *i) @@ -370,10 +353,10 @@ previous_group(lzma_index *i) i->current.record = i->current.group->last; // Then update the offsets. - i->current.stream_offset -= i->current.group - ->total_sums[i->current.group->last]; - i->current.uncompressed_offset -= i->current.group - ->uncompressed_sums[i->current.group->last]; + i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); + i->current.uncompressed_offset -= i->current.group->uncompressed_sums[ + i->current.group->last]; return; } @@ -386,8 +369,8 @@ next_group(lzma_index *i) assert(i->current.group->next != NULL); // Update the offsets first. - i->current.stream_offset += i->current.group - ->total_sums[i->current.group->last]; + i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); i->current.uncompressed_offset += i->current.group ->uncompressed_sums[i->current.group->last]; @@ -403,30 +386,39 @@ next_group(lzma_index *i) static void set_info(const lzma_index *i, lzma_index_record *info) { - info->total_size = i->current.group->total_sums[i->current.record]; + // First copy the cumulative sizes from the current Record of the + // current group. + info->unpadded_size + = i->current.group->unpadded_sums[i->current.record]; + info->total_size = vli_ceil4(info->unpadded_size); info->uncompressed_size = i->current.group->uncompressed_sums[ i->current.record]; + // Copy the start offsets of this group. info->stream_offset = i->current.stream_offset; info->uncompressed_offset = i->current.uncompressed_offset; // If it's not the first Record in this group, we need to do some // adjustements. if (i->current.record > 0) { - // _sums[] are cumulative, thus we need to substract the - // _previous _sums[] to get the sizes of this Record. - info->total_size -= i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_size -= i->current.group + // Since the _sums[] are cumulative, we substract the sums of + // the previous Record to get the sizes of the current Record, + // and add the sums of the previous Record to the offsets. + // With unpadded_sums[] we need to take into account that it + // uses a bit weird way to do the cumulative summing + const lzma_vli total_sum + = vli_ceil4(i->current.group->unpadded_sums[ + i->current.record - 1]); + + const lzma_vli uncompressed_sum = i->current.group ->uncompressed_sums[i->current.record - 1]; - // i->current.{total,uncompressed}_offsets have the offset - // of the beginning of the group, thus we need to add the - // appropriate amount to get the offsetes of this Record. - info->stream_offset += i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_offset += i->current.group - ->uncompressed_sums[i->current.record - 1]; + info->total_size -= total_sum; + info->unpadded_size -= total_sum; + info->uncompressed_size -= uncompressed_sum; + + info->stream_offset += total_sum; + info->uncompressed_offset += uncompressed_sum; } return; @@ -548,11 +540,22 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Check that the combined size of the Indexes stays within limits. { + const lzma_vli dest_size = index_size_unpadded( + dest->count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Check that the combined size of the "files" (combined total + // encoded sizes) stays within limits. + { const lzma_vli dest_size = lzma_index_file_size(dest); const lzma_vli src_size = lzma_index_file_size(src); - if (dest_size + src_size > LZMA_VLI_UNKNOWN + if (dest_size + src_size > LZMA_VLI_MAX || dest_size + src_size + padding - > LZMA_VLI_UNKNOWN) + > LZMA_VLI_MAX) return LZMA_DATA_ERROR; } @@ -561,17 +564,37 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // // NOTE: This cannot overflow, because Index Size is always // far smaller than LZMA_VLI_MAX, and adding two VLIs - // (Index Size and padding) doesn't overflow. It may become - // an invalid VLI if padding is huge, but that is caught by - // index_append(). + // (Index Size and padding) doesn't overflow. padding += index_size(dest->count - dest->old.count, dest->index_list_size - dest->old.index_list_size) + LZMA_STREAM_HEADER_SIZE * 2; + // While the above cannot overflow, but it may become an invalid VLI. + if (padding > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + // Add the padding Record. - return_if_error(index_append( - dest, allocator, padding, 0, true)); + { + lzma_ret ret; + + // First update the info so we can validate it. + dest->old.streams_size += padding; + + if (dest->old.streams_size > LZMA_VLI_MAX + || lzma_index_file_size(dest) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(dest, allocator, + padding, 0, true); + + // If something went wrong, undo the updated value and return + // the error. + if (ret != LZMA_OK) { + dest->old.streams_size -= padding; + return ret; + } + } // Avoid wasting lots of memory if src->head has only a few records // that fit into dest->tail. That is, combine two groups if possible. @@ -581,9 +604,10 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, if (src->head != NULL && src->head->last + 1 <= INDEX_GROUP_SIZE - dest->tail->last - 1) { // Copy the first Record. - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[0]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[0]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[dest->tail->last] @@ -596,10 +620,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Copy the rest. for (size_t i = 1; i < src->head->last; ++i) { - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[i + 1] - - src->head->total_sums[i]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[i + 1] + - src->head->unpadded_sums[i]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[ @@ -636,13 +661,13 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, dest->old.count = dest->count + src->old.count; dest->old.index_list_size = dest->index_list_size + src->old.index_list_size; + dest->old.streams_size += src->old.streams_size; // Update overall information. dest->total_size += src->total_size; dest->uncompressed_size += src->uncompressed_size; dest->count += src->count; dest->index_list_size += src->index_list_size; - dest->padding_size += src->padding_size; // *src has nothing left but the base structure. lzma_free(src, allocator); @@ -690,7 +715,7 @@ lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) // Copy the arrays so that we don't read uninitialized memory. const size_t count = src_group->last + 1; - memcpy(dest_group->total_sums, src_group->total_sums, + memcpy(dest_group->unpadded_sums, src_group->unpadded_sums, sizeof(lzma_vli) * count); memcpy(dest_group->uncompressed_sums, src_group->uncompressed_sums, @@ -729,8 +754,8 @@ lzma_index_equal(const lzma_index *a, const lzma_index *b) while (ag != NULL && bg != NULL) { const size_t count = ag->last + 1; if (ag->last != bg->last - || memcmp(ag->total_sums, - bg->total_sums, + || memcmp(ag->unpadded_sums, + bg->unpadded_sums, sizeof(lzma_vli) * count) != 0 || memcmp(ag->uncompressed_sums, bg->uncompressed_sums, diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h index df897367..79719dd7 100644 --- a/src/liblzma/common/index.h +++ b/src/liblzma/common/index.h @@ -23,14 +23,11 @@ #include "common.h" -/// Maximum encoded value of Total Size. -#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_MAX / 4 - 1) +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) -/// Convert the real Total Size value to a value that is stored to the Index. -#define total_size_encode(size) ((size) / 4 - 1) - -/// Convert the encoded Total Size value from Index to the real Total Size. -#define total_size_decode(size) (((size) + 1) * 4) +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) /// Get the size of the Index Padding field. This is needed by Index encoder @@ -38,6 +35,16 @@ extern uint32_t lzma_index_padding_size(const lzma_index *i); +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= LZMA_VLI_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding static inline lzma_vli index_size_unpadded(lzma_vli count, lzma_vli index_list_size) { @@ -46,20 +53,20 @@ index_size_unpadded(lzma_vli count, lzma_vli index_list_size) } +/// Calculate the size of the Index field including Index Padding static inline lzma_vli index_size(lzma_vli count, lzma_vli index_list_size) { - // Round up to a mulitiple of four. - return (index_size_unpadded(count, index_list_size) + 3) - & ~LZMA_VLI_C(3); + return vli_ceil4(index_size_unpadded(count, index_list_size)); } +/// Calculate the total size of the Stream static inline lzma_vli -index_stream_size( - lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) { - return LZMA_STREAM_HEADER_SIZE + total_size + return LZMA_STREAM_HEADER_SIZE + blocks_size + index_size(count, index_list_size) + LZMA_STREAM_HEADER_SIZE; } diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c index ae66595a..5faac161 100644 --- a/src/liblzma/common/index_decoder.c +++ b/src/liblzma/common/index_decoder.c @@ -25,7 +25,7 @@ struct lzma_coder_s { enum { SEQ_INDICATOR, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, @@ -38,8 +38,8 @@ struct lzma_coder_s { /// Number of Records left to decode. lzma_vli count; - /// The most recent Total Size field - lzma_vli total_size; + /// The most recent Unpadded Size field + lzma_vli unpadded_size; /// The most recent Uncompressed Size field lzma_vli uncompressed_size; @@ -91,14 +91,14 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator, ret = LZMA_OK; coder->pos = 0; coder->sequence = coder->count == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - lzma_vli *size = coder->sequence == SEQ_TOTAL - ? &coder->total_size + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size : &coder->uncompressed_size; ret = lzma_vli_decode(size, &coder->pos, @@ -109,27 +109,26 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator, ret = LZMA_OK; coder->pos = 0; - if (coder->sequence == SEQ_TOTAL) { - // Validate that encoded Total Size isn't too big. - if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; - // Convert the encoded Total Size to the real - // Total Size. - coder->total_size = total_size_decode( - coder->total_size); coder->sequence = SEQ_UNCOMPRESSED; } else { // Add the decoded Record to the Index. return_if_error(lzma_index_append( coder->index, allocator, - coder->total_size, + coder->unpadded_size, coder->uncompressed_size)); // Check if this was the last Record. coder->sequence = --coder->count == 0 ? SEQ_PADDING_INIT - : SEQ_TOTAL; + : SEQ_UNPADDED; } break; diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c index 3005f835..522dbb53 100644 --- a/src/liblzma/common/index_encoder.c +++ b/src/liblzma/common/index_encoder.c @@ -26,7 +26,7 @@ struct lzma_coder_s { enum { SEQ_INDICATOR, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_NEXT, SEQ_PADDING, @@ -97,18 +97,20 @@ index_encode(lzma_coder *coder, break; } - // Total Size must be a multiple of four. - if (coder->record.total_size & 3) + // Unpadded Size must be within valid limits. + if (coder->record.unpadded_size < UNPADDED_SIZE_MIN + || coder->record.unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_PROG_ERROR; - coder->sequence = SEQ_TOTAL; + coder->sequence = SEQ_UNPADDED; // Fall through - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - const lzma_vli size = coder->sequence == SEQ_TOTAL - ? total_size_encode(coder->record.total_size) + const lzma_vli size = coder->sequence == SEQ_UNPADDED + ? coder->record.unpadded_size : coder->record.uncompressed_size; ret = lzma_vli_encode(size, &coder->pos, diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c index 5e581838..162094d1 100644 --- a/src/liblzma/common/index_hash.c +++ b/src/liblzma/common/index_hash.c @@ -23,8 +23,8 @@ typedef struct { - /// Sum of the Total Size fields - lzma_vli total_size; + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; /// Sum of the Uncompressed Size fields lzma_vli uncompressed_size; @@ -35,7 +35,7 @@ typedef struct { /// Size of the List of Index Records as bytes lzma_vli index_list_size; - /// Check calculated from Total Sizes and Uncompressed Sizes. + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. lzma_check_state check; } lzma_index_hash_info; @@ -45,7 +45,7 @@ struct lzma_index_hash_s { enum { SEQ_BLOCK, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, @@ -61,8 +61,8 @@ struct lzma_index_hash_s { /// Number of Records not fully decoded lzma_vli remaining; - /// Total Size currently being read from an Index Record. - lzma_vli total_size; + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; /// Uncompressed Size currently being read from an Index Record. lzma_vli uncompressed_size; @@ -86,15 +86,15 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) } index_hash->sequence = SEQ_BLOCK; - index_hash->blocks.total_size = 0; + index_hash->blocks.blocks_size = 0; index_hash->blocks.uncompressed_size = 0; index_hash->blocks.count = 0; index_hash->blocks.index_list_size = 0; - index_hash->records.total_size = 0; + index_hash->records.blocks_size = 0; index_hash->records.uncompressed_size = 0; index_hash->records.count = 0; index_hash->records.index_list_size = 0; - index_hash->total_size = 0; + index_hash->unpadded_size = 0; index_hash->uncompressed_size = 0; index_hash->pos = 0; index_hash->crc32 = 0; @@ -128,16 +128,16 @@ lzma_index_hash_size(const lzma_index_hash *index_hash) /// Updates the sizes and the hash without any validation. static lzma_ret -hash_append(lzma_index_hash_info *info, lzma_vli total_size, +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, lzma_vli uncompressed_size) { - info->total_size += total_size; + info->blocks_size += vli_ceil4(unpadded_size); info->uncompressed_size += uncompressed_size; - info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + info->index_list_size += lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); ++info->count; - const lzma_vli sizes[2] = { total_size, uncompressed_size }; + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; lzma_check_update(&info->check, LZMA_CHECK_BEST, (const uint8_t *)(sizes), sizeof(sizes)); @@ -146,26 +146,27 @@ hash_append(lzma_index_hash_info *info, lzma_vli total_size, extern LZMA_API lzma_ret -lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate the arguments. - if (index_hash->sequence != SEQ_BLOCK || total_size == 0 - || total_size > LZMA_VLI_MAX || (total_size & 3) + if (index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; // Update the hash. return_if_error(hash_append(&index_hash->blocks, - total_size, uncompressed_size)); + unpadded_size, uncompressed_size)); // Validate the properties of *info are still in allowed limits. - if (index_hash->blocks.total_size > LZMA_VLI_MAX + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX || index_size(index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_BACKWARD_SIZE_MAX - || index_stream_size(index_hash->blocks.total_size, + || index_stream_size(index_hash->blocks.blocks_size, index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_VLI_MAX) @@ -216,14 +217,14 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // Handle the special case when there are no Blocks. index_hash->sequence = index_hash->remaining == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - lzma_vli *size = index_hash->sequence == SEQ_TOTAL - ? &index_hash->total_size + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size : &index_hash->uncompressed_size; ret = lzma_vli_decode(size, &index_hash->pos, @@ -234,18 +235,17 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, ret = LZMA_OK; index_hash->pos = 0; - if (index_hash->sequence == SEQ_TOTAL) { - if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; - index_hash->total_size = total_size_decode( - index_hash->total_size); - index_hash->sequence = SEQ_UNCOMPRESSED; } else { // Update the hash. return_if_error(hash_append(&index_hash->records, - index_hash->total_size, + index_hash->unpadded_size, index_hash->uncompressed_size)); // Verify that we don't go over the known sizes. Note @@ -254,8 +254,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // that values in index_hash->blocks are already // validated and we are fine as long as we don't // exceed them in index_hash->records. - if (index_hash->blocks.total_size - < index_hash->records.total_size + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size || index_hash->blocks.uncompressed_size < index_hash->records.uncompressed_size || index_hash->blocks.index_list_size @@ -264,7 +264,7 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // Check if this was the last Record. index_hash->sequence = --index_hash->remaining == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; } break; @@ -288,8 +288,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, } // Compare the sizes. - if (index_hash->blocks.total_size - != index_hash->records.total_size + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size || index_hash->blocks.uncompressed_size != index_hash->records.uncompressed_size || index_hash->blocks.index_list_size diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index e137685f..9be47893 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -190,7 +190,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Set up a buffer to hold the filter chain. Block Header // decoder will initialize all members of this array so // we don't need to do it here. - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; coder->block_options.filters = filters; // Decode the Block Header. @@ -216,7 +216,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Free the allocated filter options since they are needed // only to initialize the Block decoder. - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) lzma_free(filters[i].options, allocator); coder->block_options.filters = NULL; @@ -243,7 +243,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, - lzma_block_total_size_get( + lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); @@ -270,7 +270,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Fall through - case SEQ_STREAM_FOOTER: + case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); @@ -306,6 +306,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; + } // Fall through diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c index 0376fd3b..e52ad692 100644 --- a/src/liblzma/common/stream_encoder.c +++ b/src/liblzma/common/stream_encoder.c @@ -157,11 +157,11 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; // Add a new Index Record. - const lzma_vli total_size = lzma_block_total_size_get( + const lzma_vli unpadded_size = lzma_block_unpadded_size( &coder->block_options); - assert(total_size != 0); + assert(unpadded_size != 0); return_if_error(lzma_index_append(coder->index, allocator, - total_size, + unpadded_size, coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_INIT; diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index d2a77ba4..53ee1c1e 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -157,14 +157,14 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) uint32_t copy_size = dict->size - copy_pos; if (copy_size < left) { - memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + memmove(dict->buf + dict->pos, dict->buf + copy_pos, copy_size); dict->pos += copy_size; copy_size = left - copy_size; memcpy(dict->buf + dict->pos, dict->buf, copy_size); dict->pos += copy_size; } else { - memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + memmove(dict->buf + dict->pos, dict->buf + copy_pos, left); dict->pos += left; } diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c index 7cf06988..3096b442 100644 --- a/src/liblzma/subblock/subblock_decoder.c +++ b/src/liblzma/subblock/subblock_decoder.c @@ -211,7 +211,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; } - case FLAG_END_SUBFILTER: + case FLAG_END_SUBFILTER: { if (coder->padding != 0 || (in[*in_pos] & 0x0F) || coder->subfilter.code == NULL || !coder->got_output_with_subfilter) @@ -250,6 +250,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, ++*in_pos; break; + } default: return LZMA_DATA_ERROR; |