diff options
Diffstat (limited to '')
98 files changed, 3836 insertions, 6746 deletions
diff --git a/src/liblzma/check/check_byteswap.h b/src/common/bswap.h index 264def26..8f82a8f4 100644 --- a/src/liblzma/check/check_byteswap.h +++ b/src/common/bswap.h @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file check_byteswap.h -/// \brief Byteswapping needed by the checks +/// \file bswap.h +/// \brief Byte swapping // // This code has been put into the public domain. // @@ -11,18 +11,19 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_CHECK_BYTESWAP_H -#define LZMA_CHECK_BYTESWAP_H +#ifndef LZMA_BSWAP_H +#define LZMA_BSWAP_H -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif +// NOTE: We assume that config.h is already #included. // byteswap.h is a GNU extension. It contains inline assembly versions // for byteswapping. When byteswap.h is not available, we use generic code. #ifdef HAVE_BYTESWAP_H # include <byteswap.h> #else +# define bswap_16(num) \ + (((num) << 8) | ((num) >> 8)) + # define bswap_32(num) \ ( (((num) << 24) ) \ | (((num) << 8) & UINT32_C(0x00FF0000)) \ diff --git a/src/common/integer.h b/src/common/integer.h new file mode 100644 index 00000000..136a0f8d --- /dev/null +++ b/src/common/integer.h @@ -0,0 +1,167 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file integer.h +/// \brief Reading and writing integers from and to buffers +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INTEGER_H +#define LZMA_INTEGER_H + +// I'm aware of AC_CHECK_ALIGNED_ACCESS_REQUIRED from Autoconf archive, but +// it's not useful for us. We don't care if unaligned access is supported, +// we care if it is fast. Some systems can emulate unaligned access in +// software, which is horribly slow; we want to use byte-by-byte access on +// such systems but the Autoconf test would detect such a system as +// supporting unaligned access. +// +// NOTE: HAVE_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and +// 32-bit integer loads and stores. 64-bit integers may or may not work. +// That's why 64-bit functions are commented out. +#ifdef HAVE_FAST_UNALIGNED_ACCESS + +// On big endian, we need byte swapping. +// +// TODO: Big endian PowerPC supports byte swapping load and store instructions +// that also allow unaligned access. Inline assembler could be OK for that. +#ifdef WORDS_BIGENDIAN +# include "bswap.h" +# define integer_convert_16(n) bswap_16(n) +# define integer_convert_32(n) bswap_32(n) +# define integer_convert_64(n) bswap_64(n) +#else +# define integer_convert_16(n) (n) +# define integer_convert_32(n) (n) +# define integer_convert_64(n) (n) +#endif + + +static inline uint16_t +integer_read_16(const uint8_t buf[static 2]) +{ + uint16_t ret = *(const uint16_t *)(buf); + return integer_convert_16(ret); +} + + +static inline uint32_t +integer_read_32(const uint8_t buf[static 4]) +{ + uint32_t ret = *(const uint32_t *)(buf); + return integer_convert_32(ret); +} + + +/* +static inline uint64_t +integer_read_64(const uint8_t buf[static 8]) +{ + uint64_t ret = *(const uint64_t *)(buf); + return integer_convert_64(ret); +} +*/ + + +static inline void +integer_write_16(uint8_t buf[static 2], uint16_t num) +{ + *(uint16_t *)(buf) = integer_convert_16(num); +} + + +static inline void +integer_write_32(uint8_t buf[static 4], uint32_t num) +{ + *(uint32_t *)(buf) = integer_convert_32(num); +} + + +/* +static inline void +integer_write_64(uint8_t buf[static 8], uint64_t num) +{ + *(uint64_t *)(buf) = integer_convert_64(num); +} +*/ + + +#else + +static inline uint16_t +integer_read_16(const uint8_t buf[static 2]) +{ + uint16_t ret = buf[0] | (buf[1] << 8); + return ret; +} + + +static inline uint32_t +integer_read_32(const uint8_t buf[static 4]) +{ + uint32_t ret = buf[0]; + ret |= (uint32_t)(buf[1]) << 8; + ret |= (uint32_t)(buf[2]) << 16; + ret |= (uint32_t)(buf[3]) << 24; + return ret; +} + + +/* +static inline uint64_t +integer_read_64(const uint8_t buf[static 8]) +{ + uint64_t ret = buf[0]; + ret |= (uint64_t)(buf[1]) << 8; + ret |= (uint64_t)(buf[2]) << 16; + ret |= (uint64_t)(buf[3]) << 24; + ret |= (uint64_t)(buf[4]) << 32; + ret |= (uint64_t)(buf[5]) << 40; + ret |= (uint64_t)(buf[6]) << 48; + ret |= (uint64_t)(buf[7]) << 56; + return ret; +} +*/ + + +static inline void +integer_write_16(uint8_t buf[static 2], uint16_t num) +{ + buf[0] = (uint8_t)(num); + buf[1] = (uint8_t)(num >> 8); +} + + +static inline void +integer_write_32(uint8_t buf[static 4], uint32_t num) +{ + buf[0] = (uint8_t)(num); + buf[1] = (uint8_t)(num >> 8); + buf[2] = (uint8_t)(num >> 16); + buf[3] = (uint8_t)(num >> 24); +} + + +/* +static inline void +integer_write_64(uint8_t buf[static 8], uint64_t num) +{ + buf[0] = (uint8_t)(num); + buf[1] = (uint8_t)(num >> 8); + buf[2] = (uint8_t)(num >> 16); + buf[3] = (uint8_t)(num >> 24); + buf[4] = (uint8_t)(num >> 32); + buf[5] = (uint8_t)(num >> 40); + buf[6] = (uint8_t)(num >> 48); + buf[7] = (uint8_t)(num >> 56); +} +*/ + +#endif + +#endif diff --git a/src/liblzma/api/Makefile.am b/src/liblzma/api/Makefile.am index 83e47444..194f85db 100644 --- a/src/liblzma/api/Makefile.am +++ b/src/liblzma/api/Makefile.am @@ -20,17 +20,14 @@ nobase_include_HEADERS = \ lzma/base.h \ lzma/block.h \ lzma/check.h \ - lzma/copy.h \ lzma/delta.h \ lzma/easy.h \ - lzma/extra.h \ lzma/filter.h \ lzma/index.h \ - lzma/info.h \ + lzma/index_hash.h \ lzma/init.h \ lzma/lzma.h \ lzma/memlimit.h \ - lzma/metadata.h \ lzma/raw.h \ lzma/simple.h \ lzma/stream.h \ diff --git a/src/liblzma/api/lzma.h b/src/liblzma/api/lzma.h index fedcd25b..9dec904f 100644 --- a/src/liblzma/api/lzma.h +++ b/src/liblzma/api/lzma.h @@ -96,17 +96,13 @@ extern "C" { #include "lzma/check.h" /* Filters */ -#include "lzma/copy.h" #include "lzma/subblock.h" #include "lzma/simple.h" #include "lzma/delta.h" #include "lzma/lzma.h" -/* Container formats and Metadata */ +/* Container formats */ #include "lzma/block.h" -#include "lzma/index.h" -#include "lzma/extra.h" -#include "lzma/metadata.h" #include "lzma/stream.h" #include "lzma/alone.h" #include "lzma/raw.h" @@ -114,7 +110,8 @@ extern "C" { #include "lzma/easy.h" /* Advanced features */ -#include "lzma/info.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" #include "lzma/alignment.h" #include "lzma/stream_flags.h" #include "lzma/memlimit.h" diff --git a/src/liblzma/api/lzma/alone.h b/src/liblzma/api/lzma/alone.h index 1a6b8e27..72299773 100644 --- a/src/liblzma/api/lzma/alone.h +++ b/src/liblzma/api/lzma/alone.h @@ -22,36 +22,6 @@ /** - * \brief Options for files in the LZMA_Alone format - */ -typedef struct { - /** - * \brief Uncompressed Size and usage of End of Payload Marker - * - * In contrast to .lzma Blocks, LZMA_Alone format cannot have both - * uncompressed size field in the header and end of payload marker. - * If you don't know the uncompressed size beforehand, set it to - * LZMA_VLI_VALUE_UNKNOWN and liblzma will embed end of payload - * marker. - */ - lzma_vli uncompressed_size; - - /** - * \brief LZMA options - * - * The LZMA_Alone format supports only one filter: the LZMA filter. - * - * \note There exists also an undocumented variant of the - * LZMA_Alone format, which uses the x86 filter in - * addition to LZMA. This format was never supported - * by LZMA Utils and is not supported by liblzma either. - */ - lzma_options_lzma lzma; - -} lzma_options_alone; - - -/** * \brief Initializes LZMA_Alone encoder * * LZMA_Alone files have the suffix .lzma like the .lzma Stream files. @@ -68,7 +38,7 @@ typedef struct { * - LZMA_PROG_ERROR */ extern lzma_ret lzma_alone_encoder( - lzma_stream *strm, const lzma_options_alone *options); + lzma_stream *strm, const lzma_options_lzma *options); /** diff --git a/src/liblzma/api/lzma/auto.h b/src/liblzma/api/lzma/auto.h index 327e726f..fd5bf7d2 100644 --- a/src/liblzma/api/lzma/auto.h +++ b/src/liblzma/api/lzma/auto.h @@ -29,13 +29,8 @@ * the type of the file has been detected. * * \param strm Pointer to propertily prepared lzma_stream - * \param header Pointer to hold a pointer to Extra Records read - * from the Header Metadata Block. Use NULL if - * you don't care about Extra Records. - * \param footer Same as header, but for Footer Metadata Block. * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. */ -extern lzma_ret lzma_auto_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer); +extern lzma_ret lzma_auto_decoder(lzma_stream *strm); diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h index d39bfe95..b0dfed95 100644 --- a/src/liblzma/api/lzma/base.h +++ b/src/liblzma/api/lzma/base.h @@ -128,6 +128,21 @@ typedef enum { * In the decoder, this is only a warning and decoding can * still proceed normally (but the Check is ignored). */ + + LZMA_FORMAT_ERROR = -8, + /**< + * \brief Unknown file format + */ + + LZMA_MEMLIMIT_ERROR = -9 + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased. See functions + * lzma_memlimit_get() and lzma_memlimit_set(). + */ } lzma_ret; diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h index 210c1d87..a8941165 100644 --- a/src/liblzma/api/lzma/block.h +++ b/src/liblzma/api/lzma/block.h @@ -33,95 +33,31 @@ */ typedef struct { /** - * \brief Type of integrity Check - * - * The type of the integrity Check is not stored into the Block - * Header, thus its value must be provided also when decoding. - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_check_type check; - - /** - * \brief Precense of CRC32 of the Block Header - * - * Set this to true if CRC32 of the Block Header should be - * calculated and stored in the Block Header. - * - * There is no way to autodetect if CRC32 is present in the Block - * Header, thus this information must be provided also when decoding. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - * - lzma_block_header_decoder() - */ - lzma_bool has_crc32; - - /** - * \brief Usage of End of Payload Marker - * - * If this is true, End of Payload Marker is used even if - * Uncompressed Size is known. + * \brief Size of the Block Header * * Read by: - * - lzma_block_header_encoder() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder() - */ - lzma_bool has_eopm; - - /** - * \brief True if the Block is a Metadata Block - * - * If this is true, the Metadata bit will be set in the Block Header. - * It is up to the application to store correctly formatted data - * into Metadata Block. - * - * Read by: - * - lzma_block_header_encoder() - * - * Written by: - * - lzma_block_header_decoder() - */ - lzma_bool is_metadata; - - /** - * \brief True if Uncompressed Size is in Block Footer - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_bool has_uncompressed_size_in_footer; - - /** - * \brief True if Backward Size is in Block Footer - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() + * - lzma_block_header_size() + * - lzma_block_header_decode() */ - lzma_bool has_backward_size; + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 /** - * \brief True if Block coder should take care of Padding + * \brief Type of integrity Check * - * In liblzma, Stream decoder sets this to true when decoding - * Header Metadata Block or Data Blocks from Multi-Block Stream, - * and to false when decoding Single-Block Stream or Footer - * Metadata Block from a Multi-Block Stream. + * The type of the integrity Check is not stored into the Block + * Header, thus its value must be provided also when decoding. * * Read by: * - lzma_block_encoder() * - lzma_block_decoder() */ - lzma_bool handle_padding; + lzma_check_type check; /** * \brief Size of the Compressed Data in bytes @@ -134,12 +70,12 @@ typedef struct { * * Read by: * - lzma_block_header_size() - * - lzma_block_header_encoder() + * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder() + * - lzma_block_header_decode() * - lzma_block_encoder() * - lzma_block_decoder() */ @@ -163,167 +99,61 @@ typedef struct { * * Read by: * - lzma_block_header_size() - * - lzma_block_header_encoder() + * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder() + * - lzma_block_header_decode() * - lzma_block_encoder() * - lzma_block_decoder() */ lzma_vli uncompressed_size; /** - * \brief Number of bytes to reserve for Compressed Size - * - * This is useful if you want to be able to store the Compressed Size - * to the Block Header, but you don't know it when starting to encode. - * Setting this to non-zero value at maximum of LZMA_VLI_BYTES_MAX, - * the Block Header encoder will force the Compressed Size field to - * occupy specified number of bytes. You can later rewrite the Block - * Header to contain correct information by using otherwise identical - * lzma_options_block structure except the correct compressed_size. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - * - * Written by: - * - lzma_block_header_decoder() - */ - uint32_t compressed_reserve; - - /** - * \brief Number of bytes to reserve for Uncompressed Size - * - * See the description of compressed_size above. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - * - * Written by: - * - lzma_block_header_decoder() - */ - uint32_t uncompressed_reserve; - - /** - * \brief Total Size of the Block in bytes - * - * This is useful in the decoder, which can verify the Total Size - * if it is known from Index. - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - * - * Written by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_vli total_size; - - /** - * \brief Upper limit of Total Size - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_vli total_limit; - - /** - * \brief Upper limit of Uncompressed Size - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_vli uncompressed_limit; - - /** * \brief Array of filters * - * There can be at maximum of seven filters. The end of the array - * is marked with .id = LZMA_VLI_VALUE_UNKNOWN. Minimum number of - * filters is zero; in that case, an implicit Copy filter is used. + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_VALUE_UNKNOWN. * * Read by: * - lzma_block_header_size() - * - lzma_block_header_encoder() + * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder(): Note that this does NOT free() - * the old filter options structures. If decoding fails, the - * caller must take care of freeing the options structures - * that may have been allocated and decoded before the error - * occurred. - */ - lzma_options_filter filters[8]; - - /** - * \brief Size of the Padding field - * - * The Padding field exist to allow aligning the Compressed Data field - * optimally in the Block. See lzma_options_stream.alignment in - * stream.h for more information. - * - * If you want the Block Header encoder to automatically calculate - * optimal size for the Padding field by looking at the information - * in filters[], set this to LZMA_BLOCK_HEADER_PADDING_AUTO. In that - * case, you must also set the aligmnet variable to tell the the - * encoder the aligmnet of the beginning of the Block Header. - * - * The decoder never sets this to LZMA_BLOCK_HEADER_PADDING_AUTO. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder(): Note that this doesn't - * accept LZMA_BLOCK_HEADER_PADDING_AUTO. - * - * Written by (these never set padding to - * LZMA_BLOCK_HEADER_PADDING_AUTO): - * - lzma_block_header_size() - * - lzma_block_header_decoder() + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_VALUE_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_VALUE_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_VALUE_UNKNOWN, the actual array must + * have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. */ - int32_t padding; -# define LZMA_BLOCK_HEADER_PADDING_AUTO (-1) -# define LZMA_BLOCK_HEADER_PADDING_MIN 0 -# define LZMA_BLOCK_HEADER_PADDING_MAX 31 + lzma_options_filter *filters; +# define LZMA_BLOCK_FILTERS_MAX 4 - /** - * \brief Alignment of the beginning of the Block Header - * - * This variable is read only if padding has been set to - * LZMA_BLOCK_HEADER_PADDING_AUTO. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - */ - uint32_t alignment; +} lzma_options_block; - /** - * \brief Size of the Block Header - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - * - * Written by: - * - lzma_block_header_size() - * - lzma_block_header_decoder() - */ - uint32_t header_size; -} lzma_options_block; +/** + * \brief Decodes the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_options_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) /** - * \brief Calculates the size of Header Padding and Block Header + * \brief Calculates the size of Block Header * * \return - LZMA_OK: Size calculated successfully and stored to * options->header_size. @@ -353,24 +183,62 @@ extern lzma_ret lzma_block_header_size(lzma_options_block *options); * - LZMA_PROG_ERROR */ extern lzma_ret lzma_block_header_encode( - uint8_t *out, const lzma_options_block *options); + const lzma_options_block *options, uint8_t *out); /** - * \brief Initializes Block Header decoder + * \brief Decodes Block Header * - * Because the results of this decoder are placed into *options, - * strm->next_in, strm->avail_in, and strm->total_in are not used. + * Decoding of the Block options is done with a single call instead of + * first initializing and then doing the actual work with lzma_code(). * - * The only valid `action' with lzma_code() is LZMA_RUN. + * \param options Destination for block options + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc(). + * \param in Beginning of the input buffer. This must be + * at least options->header_size bytes. * - * \return - LZMA_OK: Encoding was successful. options->header_size + * \return - LZMA_OK: Decoding was successful. options->header_size * bytes were written to output buffer. * - LZMA_HEADER_ERROR: Invalid or unsupported options. * - LZMA_PROG_ERROR */ -extern lzma_ret lzma_block_header_decoder( - lzma_stream *strm, lzma_options_block *options); +extern lzma_ret lzma_block_header_decode(lzma_options_block *options, + lzma_allocator *allocator, const uint8_t *in); + + +/** + * \brief Sets Compressed Size according to Total Size + * + * Block Header stores Compressed Size, but Index has Total Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Total Size. This function does + * exactly that with error checking, so application doesn't need to check, + * for example, if the value in Index is too small to contain even the + * Block Header. Note that you need to call this function after decoding + * the Block Header field. + * + * \return - LZMA_OK: options->compressed_size was set successfully. + * - LZMA_DATA_ERROR: total_size is too small compared to + * options->header_size and lzma_check_sizes[options->check]. + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * total_size and options->header_size must be multiples + * of four, total_size must be at least 12, and + * options->header_size between 8 and 1024 inclusive. + */ +extern lzma_ret lzma_block_total_size_set( + lzma_options_block *options, lzma_vli total_size); + + +/** + * \brief Calculates Total Size + * + * This function can be useful after decoding a Block to get Total Size + * that is stored in Index. + * + * \return Total Size on success, or zero on error. + */ +extern lzma_vli lzma_block_total_size_get(const lzma_options_block *options); /** diff --git a/src/liblzma/api/lzma/check.h b/src/liblzma/api/lzma/check.h index 4a2a453b..dcba8269 100644 --- a/src/liblzma/api/lzma/check.h +++ b/src/liblzma/api/lzma/check.h @@ -43,14 +43,14 @@ typedef enum { * Size of the Check field: 4 bytes */ - LZMA_CHECK_CRC64 = 3, + LZMA_CHECK_CRC64 = 4, /**< * CRC64 using the polynomial from the ECMA-182 standard * * Size of the Check field: 8 bytes */ - LZMA_CHECK_SHA256 = 5 + LZMA_CHECK_SHA256 = 10 /**< * SHA-256 * @@ -62,7 +62,7 @@ typedef enum { /** * \brief Maximum valid Check ID * - * The .lzma file format specification specifies eight Check IDs (0-7). Some + * The .lzma file format specification specifies eight Check IDs (0-15). Some * of them are only reserved i.e. no actual Check algorithm has been assigned. * Still liblzma accepts any of these eight IDs for future compatibility * when decoding files. If a valid but unsupported Check ID is detected, @@ -70,7 +70,7 @@ typedef enum { * * FIXME bad desc */ -#define LZMA_CHECK_ID_MAX 7 +#define LZMA_CHECK_ID_MAX 15 /** @@ -89,13 +89,19 @@ extern const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1]; * Although not all Check IDs have a check algorithm associated, the size of * every Check is already frozen. This array contains the size (in bytes) of * the Check field with specified Check ID. The values are taken from the - * section 2.2.2 of the .lzma file format specification: - * { 0, 4, 4, 8, 16, 32, 32, 64 } + * section 2.1.1.2 of the .lzma file format specification: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } */ extern const uint32_t lzma_check_sizes[LZMA_CHECK_ID_MAX + 1]; /** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** * \brief Calculate CRC32 * * Calculates CRC32 using the polynomial from the IEEE 802.3 standard. diff --git a/src/liblzma/api/lzma/copy.h b/src/liblzma/api/lzma/copy.h deleted file mode 100644 index f5617462..00000000 --- a/src/liblzma/api/lzma/copy.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * \file lzma/copy.h - * \brief Copy filter - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use <lzma.h> instead. -#endif - - -/** - * \brief Filter ID - * - * Filter ID of the Copy filter. This is used as lzma_options_filter.id. - */ -#define LZMA_FILTER_COPY LZMA_VLI_C(0x00) diff --git a/src/liblzma/api/lzma/easy.h b/src/liblzma/api/lzma/easy.h index b04c7b4f..d83a79a2 100644 --- a/src/liblzma/api/lzma/easy.h +++ b/src/liblzma/api/lzma/easy.h @@ -69,7 +69,7 @@ typedef enum { /** - * \brief Default compression level for Data Blocks + * \brief Default compression level * * Data Blocks contain the actual compressed data. It's not straightforward * to recommend a default level, because in some cases keeping the resource @@ -80,16 +80,6 @@ typedef enum { /** - * \brief Default compression level for Metadata Blocks - * - * Metadata Blocks are present only in Multi-Block Streams. Metadata Blocks - * contain the Extra Records (if any) and some book-keeping data that is - * used by decoders. - */ -#define LZMA_EASY_METADATA_DEFAULT LZMA_EASY_LZMA_3 - - -/** * \brief Calculates rough memory requirements of a compression level * * This function is a wrapper for lzma_memory_usage(), which is declared @@ -104,11 +94,11 @@ extern uint32_t lzma_easy_memory_usage(lzma_easy_level level); /** - * \brief Initializes Single-Block .lzma Stream encoder + * \brief Initializes .lzma Stream encoder * * This function is intended for those who just want to use the basic LZMA * features (that is, most developers out there). Lots of assumptions are - * made, which are correct for most situations or at least good enough. + * made, which are correct or at least good enough for most situations. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. @@ -125,50 +115,7 @@ extern uint32_t lzma_easy_memory_usage(lzma_easy_level level); * * If initialization succeeds, use lzma_code() to do the actual encoding. * Valid values for `action' (the second argument of lzma_code()) are - * LZMA_RUN, LZMA_SYNC_FLUSH, and LZMA_FINISH. In future, there may be - * compression levels that don't support LZMA_SYNC_FLUSH. - */ -extern lzma_ret lzma_easy_encoder_single( - lzma_stream *strm, lzma_easy_level level); - - -/** - * \brief Initializes Multi-Block .lzma Stream encoder - * - * If you want to be able to store Extra Records or want to be able to use - * LZMA_FULL_FLUSH, you need to create a Multi-Block Stream. - * - * \param strm Pointer to lzma_stream that is at least initialized - * with LZMA_STREAM_INIT. - * \param level Compression level to use for the data being encoded. - * \param metadata_level - * Compression level to use for Metadata Blocks. - * Metadata Blocks contain the Extra Records (if any) - * and some book-keeping data that is used by decoders. - * \param header Pointer to a list of Extra Records to be stored to - * the Header Metadata Block. Set this to NULL to omit - * Header Metadata Block completely. The list must be - * kept available until the encoding has finished. - * \param footer Pointer to a list of Extra Records to be stored to - * the Footer Metadata Block. Set this to NULL if you - * don't want to store any Extra Records (the Footer - * Metadata Block will still be written for other - * reasons.) The list must be kept available until - * the encoding has finished. - * - * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to - * encode your data. - * - LZMA_MEM_ERROR: Memory allocation failed. All memory - * previously allocated for *strm is now freed. - * - LZMA_HEADER_ERROR: The given compression level is not - * supported by this build of liblzma. - * - * If initialization succeeds, use lzma_code() to do the actual encoding. - * Valid values for `action' (the second argument of lzma_code()) are * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, * there may be compression levels that don't support LZMA_SYNC_FLUSH. - * LZMA_FULL_FLUSH will always work with all compression levels. */ -extern lzma_ret lzma_easy_encoder_multi(lzma_stream *strm, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer); +extern lzma_ret lzma_easy_encoder(lzma_stream *strm, lzma_easy_level level); diff --git a/src/liblzma/api/lzma/extra.h b/src/liblzma/api/lzma/extra.h deleted file mode 100644 index 29426a74..00000000 --- a/src/liblzma/api/lzma/extra.h +++ /dev/null @@ -1,114 +0,0 @@ -/** - * \file lzma/extra.h - * \brief Handling of Extra Records in Metadata - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use <lzma.h> instead. -#endif - - -/* - * Extra Record IDs - * - * See the .lzma file format specification for description what each - * Extra Record type exactly means. - * - * If you ever need to update .lzma files with Extra Records, note that - * the Record IDs are divided in two categories: - * - Safe-to-Copy Records may be preserved as is when the - * Stream is modified in ways that don't change the actual - * uncompressed data. Examples of such operatings include - * recompressing and adding, modifying, or deleting unrelated - * Extra Records. - * - Unsafe-to-Copy Records should be removed (and possibly - * recreated) when any kind of changes are made to the Stream. - */ - -#define LZMA_EXTRA_PADDING 0x00 -#define LZMA_EXTRA_OPENPGP 0x01 -#define LZMA_EXTRA_FILTERS 0x02 -#define LZMA_EXTRA_COMMENT 0x03 -#define LZMA_EXTRA_CHECKS 0x04 -#define LZMA_EXTRA_FILENAME 0x05 -#define LZMA_EXTRA_MTIME 0x07 -#define LZMA_EXTRA_MTIME_HR 0x09 -#define LZMA_EXTRA_MIME_TYPE 0x0B -#define LZMA_EXTRA_HOMEPAGE 0x0D - - -/** - * \brief Extra Records - * - * The .lzma format provides a way to store custom information along - * the actual compressed content. Information about these Records - * are passed to and from liblzma via this linked list. - */ -typedef struct lzma_extra_s lzma_extra; -struct lzma_extra_s { - /** - * \brief Pointer to the next Extra Record - * - * This is NULL on the last Extra Record. - */ - lzma_extra *next; - - /** - * \brief Record ID - * - * Extra Record IDs are divided in three categories: - * - Zero is a special case used for padding. It doesn't have - * Size of Data fields. - * - Odd IDs (1, 3, 5, ...) are Safe-to-Copy IDs. - * These can be preserved as is if the Stream is - * modified in a way that doesn't alter the actual - * uncompressed content. - * - Even IDs (2, 4, 6, ...) are Unsafe-to-Copy IDs. - * If the .lzma Stream is modified in any way, - * the Extra Records having a sensitive ID should - * be removed or updated accordingly. - * - * Refer to the .lzma file format specification for - * the up to date list of Extra Record IDs. - */ - lzma_vli id; - - /** - * \brief Size of the Record data - * - * In case of strings, this should not include the - * trailing '\0'. - */ - size_t size; - - /** - * \brief Record data - * - * Record data is often a string in UTF-8 encoding, - * but it can be arbitrary binary data. In case of - * strings, the trailing '\0' is usually not stored - * in the .lzma file. - * - * To ease working with Extra Records containing strings, - * liblzma always adds '\0' to the end of data even when - * it wasn't present in the .lzma file. This '\0' is not - * counted in the size of the data. - */ - uint8_t *data; -}; - - -extern void lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator); diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h index a8bdd4bd..412c30e5 100644 --- a/src/liblzma/api/lzma/filter.h +++ b/src/liblzma/api/lzma/filter.h @@ -162,5 +162,6 @@ extern lzma_ret lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ -extern lzma_ret lzma_filter_flags_decoder( - lzma_stream *strm, lzma_options_filter *options); +extern lzma_ret lzma_filter_flags_decode( + lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size); diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h index 7e59c4b3..13cddf47 100644 --- a/src/liblzma/api/lzma/index.h +++ b/src/liblzma/api/lzma/index.h @@ -22,63 +22,211 @@ /** - * \brief - * - * FIXME desc + * \brief Opaque data type to hold the Index */ typedef struct lzma_index_s lzma_index; -struct lzma_index_s { + + +/** + * \brief Index Record and its location + */ +typedef struct { /** - * \brief Total Size of the Block - * - * This includes Block Header, Compressed Data, and Block Footer. + * Total Size of a Block. */ lzma_vli total_size; /** - * \brief Uncompressed Size of the Block + * Uncompressed Size of a Block */ lzma_vli uncompressed_size; /** - * \brief Pointer to the next Index Record - * - * This is NULL on the last Index Record. + * Offset of the first byte of a Block relative to the beginning + * of the Stream, or if there are multiple Indexes combined, + * relative to the beginning of the first Stream. */ - lzma_index *next; -}; + lzma_vli stream_offset; + + /** + * Uncompressed offset + */ + lzma_vli uncompressed_offset; + +} lzma_index_record; /** - * \brief Duplicates an Index list + * \brief Allocate and initialize a new lzma_index structure + * + * If i is NULL, a new lzma_index structure is allocated, initialized, + * and a pointer to it returned. If allocation fails, NULL is returned. * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. + * If i is non-NULL, it is reinitialized and the same pointer returned. + * In this case, return value cannot be NULL or a different pointer than + * the i given as argument. */ -extern lzma_index *lzma_index_dup( - const lzma_index *index, lzma_allocator *allocator); +extern lzma_index *lzma_index_init(lzma_index *i, lzma_allocator *allocator); + + +/** + * \brief Deallocate the Index + */ +extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); /** - * \brief Frees an Index list + * \brief Add a new Record to an Index * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. + * \param index Pointer to a lzma_index structure + * \param total_size Total Size of a Block + * \param uncompressed_size Uncompressed Size of a Block, or + * LZMA_VLI_VALUE_UNKNOWN to indicate padding. + * + * Appending a new Record does not affect the read position. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR */ -extern void lzma_index_free(lzma_index *index, lzma_allocator *allocator); +extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size); /** - * \brief Calculates information about the Index + * \brief Get the number of Records + */ +extern lzma_vli lzma_index_count(const lzma_index *i); + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Index Size field from the Stream Footer. + */ +extern lzma_vli lzma_index_size(const lzma_index *i); + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern lzma_vli lzma_index_total_size(const lzma_index *i); + + +/** + * \brief Get the total size of the Stream + * + * If multiple Indexes have been combined, this works as if the Blocks + * were in a single Stream. + */ +extern lzma_vli lzma_index_stream_size(const lzma_index *i); + + +/** + * \brief Get the total size of the file * - * \return LZMA_OK on success, LZMA_PROG_ERROR on error. FIXME + * When no Indexes have been combined with lzma_index_cat(), this function is + * identical to lzma_index_stream_size(). If multiple Indexes have been + * combined, this includes also the possible Stream Padding fields. + */ +extern lzma_vli lzma_index_file_size(const lzma_index *i); + + +/** + * \brief Get the uncompressed size of the Stream */ -extern lzma_ret lzma_index_count(const lzma_index *index, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size); +extern lzma_vli lzma_index_uncompressed_size(const lzma_index *i); + + +/** + * \brief Get the next Record from the Index + */ +extern lzma_bool lzma_index_read(lzma_index *i, lzma_index_record *record); + + +/** + * \brief Rewind the Index + * + * Rewind the Index so that next call to lzma_index_read() will return the + * first Record. + */ +extern void lzma_index_rewind(lzma_index *i); + + +/** + * \brief Locate a Record + * + * When the Index is available, it is possible to do random-access reading + * with granularity of Block size. + * + * \param i Pointer to lzma_index structure + * \param record Pointer to a structure to hold the search results + * \param target Uncompressed target offset + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Record containing the requested uncompressed + * offset is stored into *record. + * - Read offset will be adjusted so that calling lzma_index_read() can be + * used to read subsequent Records. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *record + * and the read position are not modified, and this function returns true. + */ +extern lzma_bool lzma_index_locate( + lzma_index *i, lzma_index_record *record, lzma_vli target); + + +/** + * \brief Concatenate Indexes of two Streams + * + * + * + * \param dest Destination Index after which src is appended Source + * \param src Index. The memory allocated for this is either moved + * to be part of *dest or freed iff the function call + * succeeds, and src will be an invalid pointer. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * \param padding Size of the Stream Padding field between Streams. + * + * \return - LZMA_OK: Indexes concatenated successfully. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_index_cat(lzma_index *lzma_restrict dest, + lzma_index *lzma_restrict src, + lzma_allocator *allocator, lzma_vli padding); + + +/** + * \brief Duplicates an Index list + * + * \return A copy of the Index, or NULL if memory allocation failed. + */ +extern lzma_index *lzma_index_dup( + const lzma_index *i, lzma_allocator *allocator); /** * \brief Compares if two Index lists are identical */ -extern lzma_bool lzma_index_is_equal(const lzma_index *a, const lzma_index *b); +extern lzma_bool lzma_index_equal(const lzma_index *a, const lzma_index *b); + + +/** + * \brief Initializes Index encoder + */ +extern lzma_ret lzma_index_encoder(lzma_stream *strm, lzma_index *i); + + +/** + * \brief Initializes Index decoder + */ +extern lzma_ret lzma_index_decoder(lzma_stream *strm, lzma_index **i); diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h new file mode 100644 index 00000000..1edbbeaa --- /dev/null +++ b/src/liblzma/api/lzma/index_hash.h @@ -0,0 +1,94 @@ +/** + * \file lzma/index_hash.h + * \brief Validates Index by using a hash function + * + * Instead of constructing complete Index while decoding Blocks, Index hash + * calculates a hash of the Block sizes and Index, and then compares the + * hashes. This way memory usage is constant even with large number of + * Blocks and huge Index. + * + * \author Copyright (C) 2008 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use <lzma.h> instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash given as argument. + */ +extern lzma_index_hash *lzma_index_hash_init( + lzma_index_hash *index_hash, lzma_allocator *allocator); + + +/** + * \brief Deallocate the Index hash + */ +extern void lzma_index_hash_end( + lzma_index_hash *index_hash, lzma_allocator *allocator); + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param total_size Total Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli total_size, lzma_vli uncompressed_size); + + +/** + * \brief Decode the Index field + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_PROG_ERROR + * + * \note Once decoding of the Index field has been started, no more + * Records can be added using lzma_index_hash_append(). + */ +extern lzma_ret lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size); + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Index Size field from the Stream Footer. + */ +extern lzma_vli lzma_index_hash_size(const lzma_index_hash *index_hash); diff --git a/src/liblzma/api/lzma/info.h b/src/liblzma/api/lzma/info.h deleted file mode 100644 index 3a91850f..00000000 --- a/src/liblzma/api/lzma/info.h +++ /dev/null @@ -1,315 +0,0 @@ -/** - * \file lzma/info.h - * \brief Handling of Stream size information - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use <lzma.h> instead. -#endif - - -/********** - * Basics * - **********/ - -/** - * \brief Opaque data type to hold the size information - */ -typedef struct lzma_info_s lzma_info; - - -typedef struct { - /** - * \brief Total Size of this Block - * - * This can be LZMA_VLI_VALUE_UNKNOWN. - */ - lzma_vli total_size; - - /** - * \brief Uncompressed Size of this Block - * - * This can be LZMA_VLI_VALUE_UNKNOWN. - */ - lzma_vli uncompressed_size; - - /** - * \brief Offset of the first byte of the Block - * - * In encoder, this is useful to find out the alignment of the Block. - * - * In decoder, this is useful when doing random-access reading - * with help from lzma_info_data_locate(). - */ - lzma_vli stream_offset; - - /** - * \brief Uncompressed offset of the Block - * - * Offset of the first uncompressed byte of the Block relative to - * all uncompressed data in the Block. - * FIXME desc - */ - lzma_vli uncompressed_offset; - - /** - * \brief Pointers to internal data structures - * - * Applications must not touch these. - */ - void *internal[4]; - -} lzma_info_iter; - - -typedef enum { - LZMA_INFO_STREAM_START, - LZMA_INFO_HEADER_METADATA, - LZMA_INFO_TOTAL, - LZMA_INFO_UNCOMPRESSED, - LZMA_INFO_FOOTER_METADATA -} lzma_info_size; - - -/** - * \brief Allocates and initializes a new lzma_info structure - * - * If info is NULL, a new lzma_info structure is allocated, initialized, and - * a pointer to it returned. If allocation fails, NULL is returned. - * - * If info is non-NULL, it is reinitialized and the same pointer returned. - * (In this case, return value cannot be NULL or a different pointer than - * the info given as argument.) - */ -extern lzma_info *lzma_info_init(lzma_info *info, lzma_allocator *allocator); - - -/** - * \brief Resets lzma_info - * - * This is like calling lzma_info_end() and lzma_info_create(), but - * re-uses the existing base structure. - */ -extern void lzma_info_reset( - lzma_info *info, lzma_allocator *allocator); - - -/** - * \brief Frees memory allocated for a lzma_info structure - */ -extern void lzma_info_free(lzma_info *info, lzma_allocator *allocator); - - -/************************ - * Setting known values * - ************************/ - -/** - * \brief Set a known size value - * - * \param info Pointer returned by lzma_info_create() - * \param type Any value from lzma_info_size - * \param size Value to set or verify - * - * \return LZMA_OK on success, LZMA_DATA_ERROR if the size doesn't - * match the existing information, or LZMA_PROG_ERROR - * if type is invalid or size is not a valid VLI. - */ -extern lzma_ret lzma_info_size_set( - lzma_info *info, lzma_info_size type, lzma_vli size); - - -/** - * \brief Sets the Index - * - * The given lzma_index list is "absorbed" by this function. The application - * must not access it after this function call, even if this function returns - * an error. - * - * \note The given lzma_index will at some point get freed by the - * lzma_info_* functions. If you use a custom lzma_allocator, - * make sure that it can free the lzma_index. - */ -extern lzma_ret lzma_info_index_set( - lzma_info *info, lzma_allocator *allocator, - lzma_index *index, lzma_bool eat_index); - - -/** - * \brief Sets information from a known Metadata Block - * - * This is a shortcut for calling lzma_info_size_set() with different type - * arguments, lzma_info_index_set() with metadata->index. - */ -extern lzma_ret lzma_info_metadata_set(lzma_info *info, - lzma_allocator *allocator, lzma_metadata *metadata, - lzma_bool is_header_metadata, lzma_bool eat_index); - - -/*************** - * Incremental * - ***************/ - -/** - * \brief Prepares an iterator to be used with given lzma_info structure - * - * - */ -extern void lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter); - - -/** - * \brief Moves to the next Index Record - * - * - */ -extern lzma_ret lzma_info_iter_next( - lzma_info_iter *iter, lzma_allocator *allocator); - - -/** - * \brief Sets or verifies the sizes in the Index Record - * - * \param iter Pointer to iterator to be set or verified - * \param total_size - * Total Size in bytes or LZMA_VLI_VALUE_UNKNOWN - * \param uncompressed_size - * Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN - * - * \return - LZMA_OK: All OK. - * - LZMA_DATA_ERROR: Given sizes don't match with the already - * known sizes. - * - LZMA_PROG_ERROR: Internal error, possibly integer - * overflow (e.g. the sum of all the known sizes is too big) - */ -extern lzma_ret lzma_info_iter_set(lzma_info_iter *iter, - lzma_vli total_size, lzma_vli uncompressed_size); - - -/** - * \brief Locates a Data Block - * - * \param iter Properly initialized iterator - * \param allocator Pointer to lzma_allocator or NULL - * \param uncompressed_offset - * Target offset to locate. The final offset - * will be equal or smaller than this. - * \param allow_alloc True if this function is allowed to call - * lzma_info_iter_next() to allocate a new Record - * if the requested offset reached end of Index - * Record list. Note that if Index has been marked - * final, lzma_info_iter_next() is never called. - * - * \return - LZMA_OK: All OK, *iter updated accordingly. - * - LZMA_DATA_ERROR: Trying to search past the end of the Index - * Record list, and allocating a new Record was not allowed - * either because allow_alloc was false or Index was final. - * - LZMA_PROG_ERROR: Internal error (probably integer - * overflow causing some lzma_vli getting too big). - */ -extern lzma_ret lzma_info_iter_locate(lzma_info_iter *iter, - lzma_allocator *allocator, lzma_vli uncompressed_offset, - lzma_bool allow_alloc); - - -/** - * \brief Finishes incrementally constructed Index - * - * This sets the known Total Size and Uncompressed of the Data Blocks - * based on the information collected from the Index Records, and marks - * the Index as final. - */ -extern lzma_ret lzma_info_index_finish(lzma_info *info); - - -/*************************** - * Reading the information * - ***************************/ - -/** - * \brief Gets a known size - * - * - */ -extern lzma_vli lzma_info_size_get( - const lzma_info *info, lzma_info_size type); - -extern lzma_vli lzma_info_metadata_locate( - const lzma_info *info, lzma_bool is_header_metadata); - -/** - * \brief Gets a pointer to the beginning of the Index list - * - * If detach is true, the Index will be detached from the lzma_info - * structure, and thus not be modified or freed by lzma_info_end(). - * - * If detach is false, the application must not modify the Index in any way. - * Also, the Index list is guaranteed to be valid only till the next call - * to any lzma_info_* function. - */ -extern lzma_index *lzma_info_index_get(lzma_info *info, lzma_bool detach); - - -extern size_t lzma_info_index_count_get(const lzma_info *info); - - -extern uint32_t lzma_info_metadata_alignment_get( - const lzma_info *info, lzma_bool is_header_metadata); - - - -/** - * \brief Locate a Block containing the given uncompressed offset - * - * This function is useful when you need to do random-access reading in - * a Multi-Block Stream. - * - * \param info Pointer to lzma_info that has at least one - * Index Record. The Index doesn't need to be finished. - * \param uncompressed_target - * Uncompressed target offset which the caller would - * like to locate from the Stream. - * \param stream_offset - * Starting offset (relative to the beginning the Stream) - * of the Block containing the requested location. - * \param uncompressed_offset - * The actual uncompressed offset of the beginning of - * the Block. uncompressed_offset <= uncompressed_target - * is always true; the application needs to uncompress - * uncompressed_target - uncompressed_offset bytes to - * reach the requested target offset. - * \param total_size - * Total Size of the Block. If the Index is incomplete, - * this may be LZMA_VLI_VALUE_UNKNOWN indicating unknown - * size. - * \param uncompressed_size - * Uncompressed Size of the Block. If the Index is - * incomplete, this may be LZMA_VLI_VALUE_UNKNOWN - * indicating unknown size. The application must pass - * this value to the Block decoder to verify FIXME - * - * \return - * - * \note This function is currently implemented as a linear search. - * If there are many Index Records, this can be really slow. - * This can be improved in newer liblzma versions if needed. - */ -extern lzma_bool lzma_info_data_locate(const lzma_info *info, - lzma_vli uncompressed_target, - lzma_vli *lzma_restrict stream_offset, - lzma_vli *lzma_restrict uncompressed_offset, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size); diff --git a/src/liblzma/api/lzma/lzma.h b/src/liblzma/api/lzma/lzma.h index 9ff25d86..da0bb52d 100644 --- a/src/liblzma/api/lzma/lzma.h +++ b/src/liblzma/api/lzma/lzma.h @@ -152,7 +152,7 @@ typedef struct { * because it uses the target buffer as the dictionary. */ uint32_t dictionary_size; -# define LZMA_DICTIONARY_SIZE_MIN 1 +# define LZMA_DICTIONARY_SIZE_MIN (UINT32_C(1) << 12) # define LZMA_DICTIONARY_SIZE_MAX (UINT32_C(1) << 30) # define LZMA_DICTIONARY_SIZE_DEFAULT (UINT32_C(1) << 23) diff --git a/src/liblzma/api/lzma/metadata.h b/src/liblzma/api/lzma/metadata.h deleted file mode 100644 index 69592a3a..00000000 --- a/src/liblzma/api/lzma/metadata.h +++ /dev/null @@ -1,100 +0,0 @@ -/** - * \file lzma/metadata.h - * \brief Metadata handling - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use <lzma.h> instead. -#endif - - -/** - * \brief Information stored into a Metadata Block - * - * This structure holds all the information that can be stored to - * a Metadata Block. - */ -typedef struct { - /** - * \brief Size of Header Metadata Block - */ - lzma_vli header_metadata_size; - - /** - * \brief Total Size of the Stream - */ - lzma_vli total_size; - - /** - * \brief Uncompressed Size of the Stream - */ - lzma_vli uncompressed_size; - - /** - * \brief Index of the Blocks stored in the Stream - */ - lzma_index *index; - - /** - * \brief Extra information - */ - lzma_extra *extra; - -} lzma_metadata; - - -/** - * \brief Calculate the encoded size of Metadata - * - * \return Uncompressed size of the Metadata in encoded form. This value - * may be passed to Block encoder as Uncompressed Size when using - * Metadata filter. On error, zero is returned. - */ -extern lzma_vli lzma_metadata_size(const lzma_metadata *metadata); - - -/** - * \brief Initializes Metadata encoder - * - * \param coder Pointer to a pointer to hold Metadata encoder's - * internal state. Original value is ignored, thus - * you don't need to initialize the pointer. - * \param allocator Custom memory allocator; usually NULL. - * \param metadata Pointer to Metadata to encoded - * - * \return - LZMA_OK: Initialization succeeded. - * - LZMA_MEM_ERROR: Cannot allocate memory for *coder. - * - * The initialization function makes internal copy of the *metadata structure. - * However, the linked lists metadata->index and metadata->extra are NOT - * copied. Thus, the application may destroy *metadata after initialization - * if it likes, but not Index or Extra. - */ -extern lzma_ret lzma_metadata_encoder(lzma_stream *strm, - lzma_options_block *options, const lzma_metadata *metadata); - - -/** - * \brief Initializes Metadata decoder - * - * \param want_extra If this is true, Extra Records will be stored - * to metadata->extra. If this is false, Extra - * Records will be parsed but not stored anywhere, - * metadata->extra will be set to NULL. - */ -extern lzma_ret lzma_metadata_decoder( - lzma_stream *strm, lzma_options_block *options, - lzma_metadata *metadata, lzma_bool want_extra); diff --git a/src/liblzma/api/lzma/raw.h b/src/liblzma/api/lzma/raw.h index c1ee41d8..db8cba15 100644 --- a/src/liblzma/api/lzma/raw.h +++ b/src/liblzma/api/lzma/raw.h @@ -30,20 +30,10 @@ * \param options Array of lzma_options_filter structures. * The end of the array must be marked with * .id = LZMA_VLI_VALUE_UNKNOWN. The minimum - * number of filters is zero; the maximum is - * determined by available memory. - * \param uncompressed_size - * Size of the uncompressed data. If it is unknown, - * use LZMA_VLI_VALUE_UNKNOWN. You need to give the - * same value to the raw decoder to decode the data. - * \param allow_implicit - * If true, an implicit Copy or Subblock filter should be - * automatically added when needed. If this is false and - * an implicit filter would be needed, LZMA_PROG_ERROR is - * returned. + * number of filters is one and the maximum is four. * * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the - * filter chain support it), or LZMA_FINISH. + * filter chain supports it), or LZMA_FINISH. * * \return - LZMA_OK * - LZMA_MEM_ERROR @@ -51,8 +41,7 @@ * - LZMA_PROG_ERROR */ extern lzma_ret lzma_raw_encoder( - lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit); + lzma_stream *strm, const lzma_options_filter *options); /** @@ -68,5 +57,4 @@ extern lzma_ret lzma_raw_encoder( * - LZMA_PROG_ERROR */ extern lzma_ret lzma_raw_decoder( - lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit); + lzma_stream *strm, const lzma_options_filter *options); diff --git a/src/liblzma/api/lzma/stream.h b/src/liblzma/api/lzma/stream.h index 346bdd17..4bb17e7d 100644 --- a/src/liblzma/api/lzma/stream.h +++ b/src/liblzma/api/lzma/stream.h @@ -22,157 +22,32 @@ /** - * \brief Options for .lzma Stream encoder - */ -typedef struct { - /** - * \brief Type of integrity Check - * - * The type of the integrity Check is stored into Stream Header - * and Stream Footer. The same Check is used for all Blocks in - * the Stream. - */ - lzma_check_type check; - - /** - * \brief Precense of CRC32 of the Block Header - * - * Set this to true if CRC32 of every Block Header should be - * calculated and stored in the Block Header. This is recommended. - * - * This setting is stored into Stream Header and Stream Footer. - */ - lzma_bool has_crc32; - - /** - * \brief Uncompressed Size in bytes - * - * This is somewhat advanced feature. Most users want to set this to - * LZMA_VLI_VALUE_UNKNOWN to indicate unknown Uncompressed Size. - * - * If the Uncompressed Size of the Stream being encoded is known, - * it can be stored to the beginning of the Stream. The details - * differ for Single-Block and Multi-Block Streams: - * - With Single-Block Streams, the Uncompressed Size is stored to - * the Block Header and End of Payload Marker is omitted. - * - With Multi-Block Streams, the Uncompressed Size is stored to - * the Header Metadata Block. The Uncompressed Size of the Blocks - * will be unknown, because liblzma cannot predict how the - * application is going to split the data in Blocks. - */ - lzma_vli uncompressed_size; - - /** - * \brief Alignment of the beginning of the Stream - * - * Certain filters handle data in bigger chunks than single bytes. - * This affects two things: - * - Performance: aligned memory access is usually faster. - * - Further compression ratio in custom file formats: if you - * encode multiple Blocks with some non-compression filter - * such as LZMA_FILTER_POWERPC, it is a good idea to keep - * the inter-Block alignment correct to maximize compression - * ratio when all these Blocks are finally compressed as a - * single step. - * - * Usually the Stream is stored into its own file, thus - * the alignment is usually zero. - */ - uint32_t alignment; - - /** - * \brief Array of filters used to encode Data Blocks - * - * There can be at maximum of seven filters. The end of the array is - * marked with .id = LZMA_VLI_VALUE_UNKNOWN. (That's why the array - * has eight members.) Minimum number of filters is zero; in that - * case, an implicit Copy filter is used. - */ - lzma_options_filter filters[8]; - - /** - * \brief Array of filters used to encode Metadata Blocks - * - * This is like filters[] but for Metadata Blocks. If Metadata - * Blocks are compressed, they usually are compressed with - * settings that require only little memory to uncompress e.g. - * LZMA with 64 KiB dictionary. - * - * \todo Recommend a preset. - * - * When liblzma sees that the Metadata Block would be very small - * even in uncompressed form, it is not compressed no matter - * what filter have been set here. This is to avoid possibly - * increasing the size of the Metadata Block with bad compression, - * and to avoid useless overhead of filters in uncompression phase. - */ - lzma_options_filter metadata_filters[8]; - - /** - * \brief Extra information in the Header Metadata Block - */ - const lzma_extra *header; - - /** - * \brief Extra information in the Footer Metadata Block - * - * It is enough to set this pointer any time before calling - * lzma_code() with LZMA_FINISH as the second argument. - */ - const lzma_extra *footer; - -} lzma_options_stream; - - -/** - * \brief Initializes Single-Block .lzma Stream encoder - * - * This is the function that most developers are looking for. :-) It - * compresses using the specified options without storing any extra - * information. + * \brief Initializes .lzma Stream encoder * - * \todo Describe that is_metadata is ignored, maybe something else. - */ -extern lzma_ret lzma_stream_encoder_single( - lzma_stream *strm, const lzma_options_stream *options); - - -/** - * \brief Initializes Multi-Block .lzma Stream encoder + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_VALUE_UNKNOWN. There must + * be 1-4 filters, but there are restrictions on how + * multiple filters can be combined. FIXME Tell where + * to find more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_HEADER_ERROR + * - LZMA_PROG_ERROR */ -extern lzma_ret lzma_stream_encoder_multi( - lzma_stream *strm, const lzma_options_stream *options); +extern lzma_ret lzma_stream_encoder(lzma_stream *strm, + const lzma_options_filter *filters, lzma_check_type check); /** * \brief Initializes decoder for .lzma Stream * * \param strm Pointer to propertily prepared lzma_stream - * \param header Pointer to hold a pointer to Extra Records read - * from the Header Metadata Block. Use NULL if - * you don't care about Extra Records. - * \param footer Same as header, but for Footer Metadata Block. * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. - * - * If header and/or footer are not NULL, *header and/or *footer will be - * initially set to NULL. - * - * The application can detect that Header Metadata Block has been completely - * parsed when the decoder procudes some output the first time. If *header - * is still NULL, there was no Extra field in the Header Metadata Block (or - * the whole Header Metadata Block wasn't present at all). - * - * The application can detect that Footer Metadata Block has been parsed - * completely when lzma_code() returns LZMA_STREAM_END. If *footer is still - * NULL, there was no Extra field in the Footer Metadata Block. - * - * \note If you use lzma_memory_limiter, the Extra Records will be - * allocated with it, and thus remain in the lzma_memory_limiter - * even after they get exported to the application via *header - * and *footer pointers. */ -extern lzma_ret lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer); +extern lzma_ret lzma_stream_decoder(lzma_stream *strm); diff --git a/src/liblzma/api/lzma/stream_flags.h b/src/liblzma/api/lzma/stream_flags.h index 070c91c9..f4c5c335 100644 --- a/src/liblzma/api/lzma/stream_flags.h +++ b/src/liblzma/api/lzma/stream_flags.h @@ -1,6 +1,6 @@ /** * \file lzma/stream_flags.h - * \brief .lzma Stream Header and Stream tail encoder and decoder + * \brief .lzma Stream Header and Stream Footer encoder and decoder * * \author Copyright (C) 1999-2006 Igor Pavlov * \author Copyright (C) 2007 Lasse Collin @@ -22,121 +22,113 @@ /** - * \brief Size of Stream Header + * \brief Size of Stream Header and Stream Footer * - * Magic Bytes (6) + Stream Flags (1) + CRC32 (4) + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .lzma file format is + * developed in future. */ -#define LZMA_STREAM_HEADER_SIZE (6 + 1 + 4) +#define LZMA_STREAM_HEADER_SIZE 12 /** - * \brief Size of Stream tail - * - * Because Stream Footer already has a defined meaning in the file format - * specification, we use Stream tail to denote these two fields: - * Stream Flags (1) + Magic Bytes (2) - */ -#define LZMA_STREAM_TAIL_SIZE (1 + 2) - - -/** - * Options for encoding and decoding Stream Header and Stream tail + * Options for encoding and decoding Stream Header and Stream Footer */ typedef struct { /** - * Type of the Check calculated from uncompressed data + * Backward Size must be a multiple of four bytes. In this Stream + * format version Backward Size is the size of the Index field. */ - lzma_check_type check; + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) /** - * True if Block Headers have the CRC32 field. Note that the CRC32 - * field is always present in the Stream Header. - */ - lzma_bool has_crc32; - - /** - * True if the Stream is a Multi-Block Stream. + * Type of the Check calculated from uncompressed data */ - lzma_bool is_multi; + lzma_check_type check; } lzma_stream_flags; -#define lzma_stream_flags_is_equal(a, b) \ - ((a).check == (b).check \ - && (a).has_crc32 == (b).has_crc32 \ - && (a).is_multi == (b).is_multi) - - /** - * \brief Encodes Stream Header - * - * Encoding of the Stream Header is done with a single call instead of - * first initializing and then doing the actual work with lzma_code(). + * \brief Encode Stream Header * - * \param out Beginning of the output buffer - * \param out_pos out[*out_pos] is the next write position. This - * is updated by the encoder. - * \param out_size out[out_size] is the first byte to not write. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. * \param options Stream Header options to be encoded. + * options->index_size is ignored and doesn't + * need to be initialized. * * \return - LZMA_OK: Encoding was successful. * - LZMA_PROG_ERROR: Invalid options. - * - LZMA_BUF_ERROR: Not enough output buffer space. */ extern lzma_ret lzma_stream_header_encode( - uint8_t *out, const lzma_stream_flags *options); + const lzma_stream_flags *options, uint8_t *out); /** - * \brief Encodes Stream tail + * \brief Encode Stream Footer * - * \param footer Pointer to a pointer that will hold the - * allocated buffer. Caller must free it once - * it isn't needed anymore. - * \param footer_size Pointer to a variable that will the final size - * of the footer buffer. - * \param allocator lzma_allocator for custom allocator functions. - * Set to NULL to use malloc(). - * \param options Stream Header options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * \param options Stream Footer options to be encoded. * - * \return - LZMA_OK: Success; *header and *header_size set. - * - LZMA_PROG_ERROR: *options is invalid. - * - LZMA_MEM_ERROR: Cannot allocate memory. + * \return - LZMA_OK: Encoding was successful. + * - LZMA_PROG_ERROR: Invalid options. */ -extern lzma_ret lzma_stream_tail_encode( - uint8_t *out, const lzma_stream_flags *options); +extern lzma_ret lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out); /** - * \brief Initializes Stream Header decoder - * - * \param strm Pointer to lzma_stream used to pass input data - * \param options Target structure for parsed results - * - * \return - LZMA_OK: Successfully initialized - * - LZMA_MEM_ERROR: Cannot allocate memory + * \brief Decode Stream Header * - * The actual decoding is done with lzma_code() and freed with lzma_end(). + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->index_size is always set to LZMA_VLI_VALUE_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_equal(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_HEADER_ERROR: Unsupported options are present + * in the header. */ -extern lzma_ret lzma_stream_header_decoder( - lzma_stream *strm, lzma_stream_flags *options); +extern lzma_ret lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in); /** - * \brief Initializes Stream tail decoder + * \brief Decode Stream Footer * - * \param strm Pointer to lzma_stream used to pass input data - * \param options Target structure for parsed results. - * \param decode_uncompressed_size - * Set to true if the first field to decode is - * Uncompressed Size. Set to false if the first - * field to decode is Backward Size. + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the footer + * is corrupt. + * - LZMA_HEADER_ERROR: Unsupported options are present + * in the footer. + */ +extern lzma_ret lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in); + + +/** + * \brief Compare two lzma_stream_flags structures * - * \return - LZMA_OK: Successfully initialized - * - LZMA_MEM_ERROR: Cannot allocate memory + * index_size values are compared only if both are not LZMA_VLI_VALUE_UNKNOWN. * - * The actual decoding is done with lzma_code() and freed with lzma_end(). + * \return true if both structures are considered equal; false otherwise. */ -extern lzma_ret lzma_stream_tail_decoder( - lzma_stream *strm, lzma_stream_flags *options); +extern lzma_bool lzma_stream_flags_equal( + const lzma_stream_flags *a, lzma_stream_flags *b); diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h index d88aa305..252458a3 100644 --- a/src/liblzma/api/lzma/version.h +++ b/src/liblzma/api/lzma/version.h @@ -35,7 +35,7 @@ * \note The version number of LZMA Utils (and thus liblzma) * has nothing to with the version number of LZMA SDK. */ -#define LZMA_VERSION UINT32_C(49990030) +#define LZMA_VERSION UINT32_C(49990050) /** diff --git a/src/liblzma/api/lzma/vli.h b/src/liblzma/api/lzma/vli.h index bc0770ce..15a9d0bf 100644 --- a/src/liblzma/api/lzma/vli.h +++ b/src/liblzma/api/lzma/vli.h @@ -158,25 +158,34 @@ typedef uint64_t lzma_vli; * may use LZMA_VLI_VALUE_MAX for clarity. * * \param vli Integer to be encoded - * \param vli_pos How many bytes have already been written out. This - * must be less than 9 before calling this function. - * \param vli_size Minimum size that the variable-length representation - * must take. This is useful if you want to use - * variable-length integers as padding. Usually you want - * to set this to zero. The maximum allowed value is 9. + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer, *vli_pos + * must be set to zero. To use single-call encoding, + * set vli_pos to NULL. * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * - * \return - LZMA_OK: So far all OK, but the integer is not + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not * completely written out yet. * - LZMA_STREAM_END: Integer successfully encoded. - * - LZMA_BUF_ERROR: No output space (*out_pos == out_size) - * - LZMA_PROG_ERROR: Arguments are not sane. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to no *out_pos == out_size; this function doesn't use + * LZMA_BUF_ERROR. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; this function doesn't use + * LZMA_BUF_ERROR. */ extern lzma_ret lzma_vli_encode( - lzma_vli vli, size_t *lzma_restrict vli_pos, size_t vli_size, + lzma_vli vli, size_t *lzma_restrict vli_pos, uint8_t *lzma_restrict out, size_t *lzma_restrict out_pos, size_t out_size); @@ -189,18 +198,30 @@ extern lzma_ret lzma_vli_encode( * application isn't required to initialize *vli. * \param vli_pos How many bytes have already been decoded. When * starting to decode a new integer, *vli_pos must - * be initialized to zero. + * be initialized to zero. To use single-call decoding, + * set this to NULL. * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * - * \return - LZMA_OK: So far all OK, but the integer is not + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not * completely decoded yet. * - LZMA_STREAM_END: Integer successfully decoded. - * - LZMA_BUF_ERROR: No input data (*in_pos == in_size) - * - LZMA_DATA_ERROR: Integer is longer than nine bytes. - * - LZMA_PROG_ERROR: Arguments are not sane. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be + * due to *in_pos == in_size; this function doesn't use + * LZMA_BUF_ERROR. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due to + * too little input; this function doesn't use LZMA_BUF_ERROR. */ extern lzma_ret lzma_vli_decode(lzma_vli *lzma_restrict vli, size_t *lzma_restrict vli_pos, const uint8_t *lzma_restrict in, @@ -208,37 +229,9 @@ extern lzma_ret lzma_vli_decode(lzma_vli *lzma_restrict vli, /** - * \brief Decodes variable-length integer reading buffer backwards - * - * The variable-length integer encoding is designed so that it can be read - * either from the beginning to the end, or from the end to the beginning. - * This feature is needed to make the Stream parseable backwards; - * specifically, to read the Backward Size field in Stream Footer. - * - * \param vli Pointer to variable to hold the decoded integer. - * \param in Beginning of the input buffer - * \param in_size Number of bytes available in the in[] buffer. - * On successful decoding, this is updated to match - * the number of bytes used. (in[*in_size - 1] is the - * first byte to process. After successful decoding, - * in[*in_size] will point to the first byte of the - * variable-length integer.) - * - * \return - LZMA_OK: Decoding successful - * - LZMA_DATA_ERROR: No valid variable-length integer was found. - * - LZMA_BUF_ERROR: Not enough input. Note that in practice, - * this tends to be a sign of broken input, because the - * applications usually do give as much input to this function - * as the applications have available. - */ -extern lzma_ret lzma_vli_reverse_decode( - lzma_vli *vli, const uint8_t *in, size_t *in_size); - - -/** - * \brief Gets the minimum number of bytes required to encode vli + * \brief Gets the number of bytes required to encode vli * * \return Number of bytes on success (1-9). If vli isn't valid, * zero is returned. */ -extern size_t lzma_vli_size(lzma_vli vli); +extern uint32_t lzma_vli_size(lzma_vli vli); diff --git a/src/liblzma/check/Makefile.am b/src/liblzma/check/Makefile.am index e436cb59..182e0868 100644 --- a/src/liblzma/check/Makefile.am +++ b/src/liblzma/check/Makefile.am @@ -14,7 +14,6 @@ libcheck_la_SOURCES = \ check.c \ check.h \ check_init.c \ - check_byteswap.h \ crc_macros.h libcheck_la_CPPFLAGS = \ -I@top_srcdir@/src/liblzma/api \ diff --git a/src/liblzma/check/check.c b/src/liblzma/check/check.c index ba59af2e..388b57e8 100644 --- a/src/liblzma/check/check.c +++ b/src/liblzma/check/check.c @@ -13,8 +13,15 @@ #include "check.h" -// See the .lzma header format specification section 2.2.2. -LZMA_API const uint32_t lzma_check_sizes[8] = { 0, 4, 4, 8, 16, 32, 32, 64 }; +// See the .lzma header format specification section 2.1.1.2. +LZMA_API const uint32_t lzma_check_sizes[LZMA_CHECK_ID_MAX + 1] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 +}; LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { @@ -27,6 +34,7 @@ LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { #endif false, // Reserved + false, // Reserved #ifdef HAVE_CHECK_CRC64 true, @@ -35,6 +43,10 @@ LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { #endif false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved #ifdef HAVE_CHECK_SHA256 true, @@ -44,6 +56,9 @@ LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { false, // Reserved false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved }; @@ -58,24 +73,24 @@ lzma_check_init(lzma_check *check, lzma_check_type type) #ifdef HAVE_CHECK_CRC32 case LZMA_CHECK_CRC32: - check->crc32 = 0; + check->state.crc32 = 0; break; #endif #ifdef HAVE_CHECK_CRC64 case LZMA_CHECK_CRC64: - check->crc64 = 0; + check->state.crc64 = 0; break; #endif #ifdef HAVE_CHECK_SHA256 case LZMA_CHECK_SHA256: - lzma_sha256_init(&check->sha256); + lzma_sha256_init(check); break; #endif default: - if (type <= LZMA_CHECK_ID_MAX) + if ((unsigned)(type) <= LZMA_CHECK_ID_MAX) ret = LZMA_UNSUPPORTED_CHECK; else ret = LZMA_PROG_ERROR; @@ -93,19 +108,19 @@ lzma_check_update(lzma_check *check, lzma_check_type type, switch (type) { #ifdef HAVE_CHECK_CRC32 case LZMA_CHECK_CRC32: - check->crc32 = lzma_crc32(buf, size, check->crc32); + check->state.crc32 = lzma_crc32(buf, size, check->state.crc32); break; #endif #ifdef HAVE_CHECK_CRC64 case LZMA_CHECK_CRC64: - check->crc64 = lzma_crc64(buf, size, check->crc64); + check->state.crc64 = lzma_crc64(buf, size, check->state.crc64); break; #endif #ifdef HAVE_CHECK_SHA256 case LZMA_CHECK_SHA256: - lzma_sha256_update(buf, size, &check->sha256); + lzma_sha256_update(buf, size, check); break; #endif @@ -120,11 +135,29 @@ lzma_check_update(lzma_check *check, lzma_check_type type, extern void lzma_check_finish(lzma_check *check, lzma_check_type type) { + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + *(uint32_t *)(check->buffer) = check->state.crc32; + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + *(uint64_t *)(check->buffer) = check->state.crc64; + break; +#endif + #ifdef HAVE_CHECK_SHA256 - if (type == LZMA_CHECK_SHA256) - lzma_sha256_finish(&check->sha256); + case LZMA_CHECK_SHA256: + lzma_sha256_finish(check); + break; #endif + default: + break; + } + return; } diff --git a/src/liblzma/check/check.h b/src/liblzma/check/check.h index 74279ceb..45ca25e9 100644 --- a/src/liblzma/check/check.h +++ b/src/liblzma/check/check.h @@ -17,15 +17,21 @@ #include "common.h" +// Index hashing used to verify the Index with O(1) memory usage needs +// a good hash function. +#if defined(HAVE_CHECK_SHA256) +# define LZMA_CHECK_BEST LZMA_CHECK_SHA256 +#elif defined(HAVE_CHECK_CRC64) +# define LZMA_CHECK_BEST LZMA_CHECK_CRC64 +#else +# define LZMA_CHECK_BEST LZMA_CHECK_CRC32 +#endif + + typedef struct { /// Internal state uint32_t state[8]; - /// Temporary 8-byte aligned buffer to hold incomplete chunk. - /// After lzma_check_finish(), the first 32 bytes will contain - /// the final digest in big endian byte order. - uint8_t buffer[64]; - /// Size of the message excluding padding uint64_t size; @@ -34,10 +40,27 @@ typedef struct { /// \note This is not in the public API because this structure will /// change in future. -typedef union { - uint32_t crc32; - uint64_t crc64; - lzma_sha256 sha256; +typedef struct { + // FIXME Guarantee 8-byte alignment + + /// Buffer to hold the final result; this is also used as a temporary + /// buffer in SHA256. Note that this buffer must be 8-byte aligned. + uint8_t buffer[64]; + + /// Check-specific data + union { + uint32_t crc32; + uint64_t crc64; + + struct { + /// Internal state + uint32_t state[8]; + + /// Size of the message excluding padding + uint64_t size; + } sha256; + } state; + } lzma_check; @@ -91,12 +114,12 @@ extern void lzma_crc64_init(void); // SHA256 -extern void lzma_sha256_init(lzma_sha256 *sha256); +extern void lzma_sha256_init(lzma_check *check); extern void lzma_sha256_update( - const uint8_t *buf, size_t size, lzma_sha256 *sha256); + const uint8_t *buf, size_t size, lzma_check *check); -extern void lzma_sha256_finish(lzma_sha256 *sha256); +extern void lzma_sha256_finish(lzma_check *check); #endif diff --git a/src/liblzma/check/crc32_init.c b/src/liblzma/check/crc32_init.c index 0dd402a4..8b596091 100644 --- a/src/liblzma/check/crc32_init.c +++ b/src/liblzma/check/crc32_init.c @@ -17,7 +17,7 @@ #endif #ifdef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" #endif diff --git a/src/liblzma/check/crc64_init.c b/src/liblzma/check/crc64_init.c index 4c91a771..0029987a 100644 --- a/src/liblzma/check/crc64_init.c +++ b/src/liblzma/check/crc64_init.c @@ -17,7 +17,7 @@ #endif #ifdef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" #endif diff --git a/src/liblzma/check/crc_macros.h b/src/liblzma/check/crc_macros.h index 5fbecf07..e827d07d 100644 --- a/src/liblzma/check/crc_macros.h +++ b/src/liblzma/check/crc_macros.h @@ -12,7 +12,7 @@ /////////////////////////////////////////////////////////////////////////////// #ifdef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" # define A(x) ((x) >> 24) # define B(x) (((x) >> 16) & 0xFF) diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c index 8e3d375a..ea51896e 100644 --- a/src/liblzma/check/sha256.c +++ b/src/liblzma/check/sha256.c @@ -20,7 +20,7 @@ #include "check.h" #ifndef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" #endif // At least on x86, GCC is able to optimize this to a rotate instruction. @@ -104,18 +104,18 @@ transform(uint32_t state[static 8], const uint32_t data[static 16]) static void -process(lzma_sha256 *sha256) +process(lzma_check *check) { #ifdef WORDS_BIGENDIAN - transform(sha256->state, (uint32_t *)(sha256->buffer)); + transform(check->state.sha256.state, (uint32_t *)(check->buffer)); #else uint32_t data[16]; for (size_t i = 0; i < 16; ++i) - data[i] = bswap_32(*((uint32_t*)(sha256->buffer) + i)); + data[i] = bswap_32(*((uint32_t*)(check->buffer) + i)); - transform(sha256->state, data); + transform(check->state.sha256.state, data); #endif return; @@ -123,41 +123,41 @@ process(lzma_sha256 *sha256) extern void -lzma_sha256_init(lzma_sha256 *sha256) +lzma_sha256_init(lzma_check *check) { static const uint32_t s[8] = { 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, }; - memcpy(sha256->state, s, sizeof(s)); - sha256->size = 0; + memcpy(check->state.sha256.state, s, sizeof(s)); + check->state.sha256.size = 0; return; } extern void -lzma_sha256_update(const uint8_t *buf, size_t size, lzma_sha256 *sha256) +lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check *check) { // Copy the input data into a properly aligned temporary buffer. // This way we can be called with arbitrarily sized buffers // (no need to be multiple of 64 bytes), and the code works also // on architectures that don't allow unaligned memory access. while (size > 0) { - const size_t copy_start = sha256->size & 0x3F; + const size_t copy_start = check->state.sha256.size & 0x3F; size_t copy_size = 64 - copy_start; if (copy_size > size) copy_size = size; - memcpy(sha256->buffer + copy_start, buf, copy_size); + memcpy(check->buffer + copy_start, buf, copy_size); buf += copy_size; size -= copy_size; - sha256->size += copy_size; + check->state.sha256.size += copy_size; - if ((sha256->size & 0x3F) == 0) - process(sha256); + if ((check->state.sha256.size & 0x3F) == 0) + process(check); } return; @@ -165,38 +165,41 @@ lzma_sha256_update(const uint8_t *buf, size_t size, lzma_sha256 *sha256) extern void -lzma_sha256_finish(lzma_sha256 *sha256) +lzma_sha256_finish(lzma_check *check) { // Add padding as described in RFC 3174 (it describes SHA-1 but // the same padding style is used for SHA-256 too). - size_t pos = sha256->size & 0x3F; - sha256->buffer[pos++] = 0x80; + size_t pos = check->state.sha256.size & 0x3F; + check->buffer[pos++] = 0x80; while (pos != 64 - 8) { if (pos == 64) { - process(sha256); + process(check); pos = 0; } - sha256->buffer[pos++] = 0x00; + check->buffer[pos++] = 0x00; } // Convert the message size from bytes to bits. - sha256->size *= 8; + check->state.sha256.size *= 8; #ifdef WORDS_BIGENDIAN - *(uint64_t *)(sha256->buffer + 64 - 8) = sha256->size; + *(uint64_t *)(check->buffer + 64 - 8) = check->state.sha256.size; #else - *(uint64_t *)(sha256->buffer + 64 - 8) = bswap_64(sha256->size); + *(uint64_t *)(check->buffer + 64 - 8) + = bswap_64(check->state.sha256.size); #endif - process(sha256); + process(check); for (size_t i = 0; i < 8; ++i) #ifdef WORDS_BIGENDIAN - ((uint32_t *)(sha256->buffer))[i] = sha256->state[i]; + ((uint32_t *)(check->buffer))[i] + = check->state.sha256.state[i]; #else - ((uint32_t *)(sha256->buffer))[i] = bswap_32(sha256->state[i]); + ((uint32_t *)(check->buffer))[i] + = bswap_32(check->state.sha256.state[i]); #endif return; diff --git a/src/liblzma/common/Makefile.am b/src/liblzma/common/Makefile.am index c76ce14f..40b42250 100644 --- a/src/liblzma/common/Makefile.am +++ b/src/liblzma/common/Makefile.am @@ -25,26 +25,20 @@ libcommon_la_SOURCES = \ common.h \ bsr.h \ allocator.c \ + block_util.c \ block_private.h \ - extra.c \ features.c \ index.c \ - info.c \ init.c \ memory_limiter.c \ memory_usage.c \ next_coder.c \ raw_common.c \ raw_common.h \ + stream_flags_equal.c \ code.c \ version.c -if COND_FILTER_COPY -libcommon_la_SOURCES += \ - copy_coder.c \ - copy_coder.h -endif - if COND_FILTER_DELTA libcommon_la_SOURCES += \ delta_common.c \ @@ -69,21 +63,17 @@ libcommon_la_SOURCES += \ block_encoder.c \ block_encoder.h \ block_header_encoder.c \ - easy_common.c \ - easy_common.h \ - easy_single.c \ - easy_multi.c \ + easy.c \ filter_flags_encoder.c \ + index_encoder.c \ + index_encoder.h \ init_encoder.c \ - metadata_encoder.c \ - metadata_encoder.h \ raw_encoder.c \ raw_encoder.h \ stream_common.c \ stream_common.h \ - stream_encoder_single.c \ - stream_encoder_multi.c \ - stream_encoder_multi.h \ + stream_encoder.c \ + stream_encoder.h \ stream_flags_encoder.c \ vli_encoder.c endif @@ -96,14 +86,13 @@ libcommon_la_SOURCES += \ block_decoder.h \ block_header_decoder.c \ filter_flags_decoder.c \ + index_decoder.c \ + index_hash.c \ init_decoder.c \ - metadata_decoder.c \ - metadata_decoder.h \ raw_decoder.c \ raw_decoder.h \ stream_decoder.c \ stream_flags_decoder.c \ stream_flags_decoder.h \ - vli_decoder.c \ - vli_reverse_decoder.c + vli_decoder.c endif diff --git a/src/liblzma/common/alignment.c b/src/liblzma/common/alignment.c index 2d468fe5..c80e5fab 100644 --- a/src/liblzma/common/alignment.c +++ b/src/liblzma/common/alignment.c @@ -25,7 +25,6 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) { for (size_t i = 0; filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // The same as the input, check the next filter. continue; @@ -69,9 +68,8 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) extern LZMA_API uint32_t lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) { - // Check if there is only an implicit Copy filter. if (filters[0].id == LZMA_VLI_VALUE_UNKNOWN) - return guess; + return UINT32_MAX; // Find the last filter in the chain. size_t i = 0; @@ -80,7 +78,6 @@ lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) do { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // It's the same as the input alignment, so // check the next filter. diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c index 91df5bf2..062f6fab 100644 --- a/src/liblzma/common/alone_decoder.c +++ b/src/liblzma/common/alone_decoder.c @@ -32,9 +32,15 @@ struct lzma_coder_s { SEQ_CODE, } sequence; + /// Position in the header fields size_t pos; - lzma_options_alone options; + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; }; @@ -50,34 +56,39 @@ alone_decode(lzma_coder *coder, && (coder->sequence == SEQ_CODE || *in_pos < in_size)) switch (coder->sequence) { case SEQ_PROPERTIES: - if (lzma_lzma_decode_properties( - &coder->options.lzma, in[*in_pos])) - return LZMA_DATA_ERROR; + if (lzma_lzma_decode_properties(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; coder->sequence = SEQ_DICTIONARY_SIZE; ++*in_pos; break; case SEQ_DICTIONARY_SIZE: - coder->options.lzma.dictionary_size + coder->options.dictionary_size |= (size_t)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 4) { - // A hack to ditch tons of false positives: We allow - // only dictionary sizes that are a power of two. - // LZMA_Alone didn't create other kinds of files, - // although it's not impossible that files with - // other dictionary sizes exist. Well, if someone - // complains, this will be reconsidered. - size_t count = 0; - for (size_t i = 0; i < 32; ++i) - if (coder->options.lzma.dictionary_size - & (UINT32_C(1) << i)) - ++count; - - if (count != 1 || coder->options.lzma.dictionary_size + if (coder->options.dictionary_size + < LZMA_DICTIONARY_SIZE_MIN + || coder->options.dictionary_size > LZMA_DICTIONARY_SIZE_MAX) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; + + // A hack to ditch tons of false positives: We allow + // only dictionary sizes that are 2^n or 2^n + 2^(n-1). + // LZMA_Alone created only files with 2^n, but accepts + // any dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dictionary_size) + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_UNCOMPRESSED_SIZE; @@ -87,7 +98,7 @@ alone_decode(lzma_coder *coder, break; case SEQ_UNCOMPRESSED_SIZE: - coder->options.uncompressed_size + coder->uncompressed_size |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 8) { @@ -95,11 +106,10 @@ alone_decode(lzma_coder *coder, // if the uncompressed size is known, it must be less // than 256 GiB. Again, if someone complains, this // will be reconsidered. - if (coder->options.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->options.uncompressed_size + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size >= (LZMA_VLI_C(1) << 38)) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_CODER_INIT; @@ -113,9 +123,7 @@ alone_decode(lzma_coder *coder, lzma_filter_info filters[2] = { { .init = &lzma_lzma_decoder_init, - .options = &coder->options.lzma, - .uncompressed_size = coder->options - .uncompressed_size, + .options = &coder->options, }, { .init = NULL, } @@ -126,6 +134,10 @@ alone_decode(lzma_coder *coder, if (ret != LZMA_OK) return ret; + // Use a hack to set the uncompressed size. + lzma_lzma_decoder_uncompressed_size(&coder->next, + coder->uncompressed_size); + coder->sequence = SEQ_CODE; } @@ -169,8 +181,8 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) next->coder->sequence = SEQ_PROPERTIES; next->coder->pos = 0; - next->coder->options.lzma.dictionary_size = 0; - next->coder->options.uncompressed_size = 0; + next->coder->options.dictionary_size = 0; + next->coder->uncompressed_size = 0; return LZMA_OK; } @@ -179,17 +191,14 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - // We need to use _init2 because we don't pass any varadic args. - lzma_next_coder_init2(next, allocator, alone_decoder_init, - alone_decoder_init, allocator); + lzma_next_coder_init0(alone_decoder_init, next, allocator); } extern LZMA_API lzma_ret lzma_alone_decoder(lzma_stream *strm) { - lzma_next_strm_init2(strm, alone_decoder_init, - alone_decoder_init, strm->allocator); + lzma_next_strm_init0(strm, alone_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c index 7629aa77..f94a21c1 100644 --- a/src/liblzma/common/alone_encoder.c +++ b/src/liblzma/common/alone_encoder.c @@ -21,19 +21,19 @@ #include "lzma_encoder.h" +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + struct lzma_coder_s { lzma_next_coder next; enum { - SEQ_PROPERTIES, - SEQ_DICTIONARY_SIZE, - SEQ_UNCOMPRESSED_SIZE, + SEQ_HEADER, SEQ_CODE, } sequence; - size_t pos; - - lzma_options_alone options; + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; }; @@ -47,47 +47,23 @@ alone_encode(lzma_coder *coder, { while (*out_pos < out_size) switch (coder->sequence) { - case SEQ_PROPERTIES: - if (lzma_lzma_encode_properties( - &coder->options.lzma, out + *out_pos)) { - return LZMA_PROG_ERROR; - } - - coder->sequence = SEQ_DICTIONARY_SIZE; - ++*out_pos; - break; - - case SEQ_DICTIONARY_SIZE: - out[*out_pos] = coder->options.lzma.dictionary_size - >> (coder->pos * 8); - - if (++coder->pos == 4) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - ++*out_pos; - break; - - case SEQ_UNCOMPRESSED_SIZE: - out[*out_pos] = coder->options.uncompressed_size - >> (coder->pos * 8); - - if (++coder->pos == 8) { - coder->pos = 0; - coder->sequence = SEQ_CODE; - } - - ++*out_pos; + case SEQ_HEADER: + bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; break; - case SEQ_CODE: { + case SEQ_CODE: return coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -107,7 +83,7 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) // At least for now, this is not used by any internal function. static lzma_ret alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_alone *options) + const lzma_options_lzma *options) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -119,23 +95,42 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the LZMA_Alone coder variables. - next->coder->sequence = SEQ_PROPERTIES; - next->coder->pos = 0; - next->coder->options = *options; + // Basic initializations + next->coder->sequence = SEQ_HEADER; + next->coder->header_pos = 0; - // Verify uncompressed_size since the other functions assume that - // it is valid. - if (!lzma_vli_is_valid(next->coder->options.uncompressed_size)) + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_encode_properties(options, next->coder->header)) return LZMA_PROG_ERROR; + // - Dictionary size (4 bytes) + if (options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || options->dictionary_size > LZMA_DICTIONARY_SIZE_MAX) + return LZMA_PROG_ERROR; + + // Round up to to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next. While the header would allow any 32-bit integer, + // we do this to keep the decoder of liblzma accepting the resulting + // files. + uint32_t d = options->dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + integer_write_32(next->coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(next->coder->header + 1 + 4, 0xFF, 8); + // Initialize the LZMA encoder. const lzma_filter_info filters[2] = { { .init = &lzma_lzma_encoder_init, - .options = &next->coder->options.lzma, - .uncompressed_size = next->coder->options - .uncompressed_size, + .options = (void *)(options), }, { .init = NULL, } @@ -156,7 +151,7 @@ lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_alone_encoder(lzma_stream *strm, const lzma_options_alone *options) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) { lzma_next_strm_init(strm, alone_encoder_init, options); diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c index 7e92df9a..765a27b1 100644 --- a/src/liblzma/common/auto_decoder.c +++ b/src/liblzma/common/auto_decoder.c @@ -17,15 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "stream_decoder.h" #include "alone_decoder.h" struct lzma_coder_s { lzma_next_coder next; - - lzma_extra **header; - lzma_extra **footer; bool initialized; }; @@ -43,8 +40,8 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator, lzma_ret ret; if (in[*in_pos] == 0xFF) - ret = lzma_stream_decoder_init(&coder->next, allocator, - coder->header, coder->footer); + ret = lzma_stream_decoder_init( + &coder->next, allocator); else ret = lzma_alone_decoder_init(&coder->next, allocator); @@ -69,8 +66,7 @@ auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -82,8 +78,6 @@ auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - next->coder->header = header; - next->coder->footer = footer; next->coder->initialized = false; return LZMA_OK; @@ -102,9 +96,9 @@ lzma_auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_auto_decoder(lzma_stream *strm, lzma_extra **header, lzma_extra **footer) +lzma_auto_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, auto_decoder_init, header, footer); + lzma_next_strm_init0(strm, auto_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index e1b5dc96..f07c4e06 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -26,129 +26,47 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, - SEQ_END, + SEQ_CHECK, } sequence; /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Decoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the encoding has been finished. lzma_options_block *options; - /// Position in variable-length integers (and in some other places). - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; + /// Position when reading the Check field + size_t check_pos; - /// Temporary location for the Uncompressed Size and Backward Size - /// fields in Block Footer. - lzma_vli tmp; - - /// Size of the Backward Size field - This is needed so that we - /// can verify the Backward Size and still keep updating total_size. - size_t size_of_backward_size; + /// Check of the uncompressed data + lzma_check check; }; static lzma_ret -update_sequence(lzma_coder *coder) -{ - switch (coder->sequence) { - case SEQ_CODE: - if (coder->options->check != LZMA_CHECK_NONE) { - lzma_check_finish(&coder->check, - coder->options->check); - coder->sequence = SEQ_CHECK; - break; - } - - // Fall through - - case SEQ_CHECK: - if (coder->options->has_uncompressed_size_in_footer) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_backward_size) { - coder->sequence = SEQ_BACKWARD_SIZE; - break; - } - - // Fall through - - case SEQ_BACKWARD_SIZE: - if (coder->options->handle_padding) { - coder->sequence = SEQ_PADDING; - break; - } - - case SEQ_PADDING: - if (!is_size_valid(coder->total_size, - coder->options->total_size) - || !is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret block_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - // Special case when the Block has only Block Header. - if (coder->sequence == SEQ_END) - return LZMA_STREAM_END; - - // FIXME: Termination condition should work but could be cleaner. - while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_CODE)) switch (coder->sequence) { case SEQ_CODE: { + if (*out_pos >= out_size) + return LZMA_OK; + const size_t in_start = *in_pos; const size_t out_start = *out_pos; @@ -159,13 +77,13 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, in_used, - coder->total_limit) - || update_size(&coder->compressed_size, - in_used, - coder->options->compressed_size) + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_VALUE_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) || update_size(&coder->uncompressed_size, - out_used, coder->uncompressed_limit)) + out_used, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; lzma_check_update(&coder->check, coder->options->check, @@ -174,116 +92,61 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - return_if_error(update_sequence(coder)); - - break; + coder->sequence = SEQ_PADDING; } - case SEQ_CHECK: - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - if (((coder->check.crc32 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + // Fall through - case LZMA_CHECK_CRC64: - if (((coder->check.crc64 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + case SEQ_PADDING: + // If Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; - case LZMA_CHECK_SHA256: - if (coder->check.sha256.buffer[coder->pos] - != in[*in_pos]) + if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - break; - - default: - assert(coder->options->check != LZMA_CHECK_NONE); - assert(coder->options->check <= LZMA_CHECK_ID_MAX); - break; - } - - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - ++*in_pos; - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - return_if_error(update_sequence(coder)); - coder->pos = 0; + if (update_size(&coder->compressed_size, 1, + coder->compressed_limit)) + return LZMA_DATA_ERROR; } - break; - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - if (update_size(&coder->total_size, *in_pos - in_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->tmp != coder->uncompressed_size) - return LZMA_DATA_ERROR; - - coder->pos = 0; - coder->tmp = 0; - - return_if_error(update_sequence(coder)); - - break; - } - - case SEQ_BACKWARD_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - if (update_size(&coder->total_size, in_used, - coder->total_limit)) + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->size_of_backward_size += in_used; - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->tmp != coder->total_size - - coder->size_of_backward_size) - return LZMA_DATA_ERROR; + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; - return_if_error(update_sequence(coder)); + lzma_check_finish(&coder->check, coder->options->check); + coder->sequence = SEQ_CHECK; - break; - } + // Fall through - case SEQ_PADDING: - if (in[*in_pos] == 0x00) { - if (update_size(&coder->total_size, 1, - coder->total_limit)) + case SEQ_CHECK: + while (*in_pos < in_size) { + if (in[(*in_pos)++] != coder->check.buffer[ + coder->check_pos]) return LZMA_DATA_ERROR; - ++*in_pos; - break; + if (++coder->check_pos == lzma_check_sizes[ + coder->options->check]) + return LZMA_STREAM_END; } - return update_sequence(coder); - - default: - return LZMA_PROG_ERROR; + return LZMA_OK; } - return LZMA_OK; + return LZMA_PROG_ERROR; } @@ -300,9 +163,12 @@ static lzma_ret block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // This is pretty similar to lzma_block_encoder_init(). - // See comments there. + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) + return LZMA_PROG_ERROR; + // Allocate and initialize *next->coder if needed. if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) @@ -313,40 +179,28 @@ block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - if (validate_options_1(options)) - return LZMA_PROG_ERROR; - - if (validate_options_2(options)) - return LZMA_DATA_ERROR; - - return_if_error(lzma_check_init(&next->coder->check, options->check)); - + // Basic initializations next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); - next->coder->tmp = 0; - next->coder->size_of_backward_size = 0; - - if (!options->has_eopm && options->uncompressed_size == 0) { - // The Compressed Data field is empty, thus we skip SEQ_CODE - // phase completely. - const lzma_ret ret = update_sequence(next->coder); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that Total Size of the Block still is a valid VLI and + // a multiple of four. + next->coder->compressed_limit + = options->compressed_size == LZMA_VLI_VALUE_UNKNOWN + ? (LZMA_VLI_VALUE_MAX & ~LZMA_VLI_C(3)) + - options->header_size + - lzma_check_sizes[options->check] + : options->compressed_size; + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); return lzma_raw_decoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 78185790..3add45a9 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -34,37 +34,21 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK_FINISH, - SEQ_CHECK_COPY, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, + SEQ_CHECK, } sequence; - /// Position in .header and .check. - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Position when writing out the Check field + size_t check_pos; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; - - /// Backward Size - This is a copy of total_size right before - /// the Backward Size field. - lzma_vli backward_size; + /// Check of the uncompressed data + lzma_check check; }; @@ -80,16 +64,16 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, if (coder->options->uncompressed_size - coder->uncompressed_size != (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } else { if (coder->options->uncompressed_size - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } } else if (LZMA_VLI_VALUE_MAX - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) { - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } // Main loop @@ -107,11 +91,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, out_used, - coder->total_limit) - || update_size(&coder->compressed_size, - out_used, - coder->options->compressed_size)) + // FIXME We must also check that Total Size doesn't get + // too big. + if (update_size(&coder->compressed_size, out_used, + coder->options->compressed_size)) return LZMA_DATA_ERROR; // No need to check for overflow because we have already @@ -125,141 +108,54 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; assert(*in_pos == in_size); + coder->sequence = SEQ_PADDING; + break; + } + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. + if (coder->compressed_size & 3) { + out[*out_pos] = 0x00; + ++*out_pos; + + if (update_size(&coder->compressed_size, 1, + coder->options->compressed_size)) + return LZMA_DATA_ERROR; + + break; + } // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values give to us. + // values. Verify that they match the values given to us. if (!is_size_valid(coder->compressed_size, coder->options->compressed_size) || !is_size_valid(coder->uncompressed_size, coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->sequence = SEQ_CHECK_FINISH; - break; - } + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - case SEQ_CHECK_FINISH: - if (coder->options->check == LZMA_CHECK_NONE) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); - coder->sequence = SEQ_CHECK_COPY; + coder->sequence = SEQ_CHECK; // Fall through - case SEQ_CHECK_COPY: - assert(lzma_check_sizes[coder->options->check] > 0); - - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - out[*out_pos] = coder->check.crc32 >> (coder->pos * 8); - break; - - case LZMA_CHECK_CRC64: - out[*out_pos] = coder->check.crc64 >> (coder->pos * 8); - break; - - case LZMA_CHECK_SHA256: - out[*out_pos] = coder->check.sha256.buffer[coder->pos]; - break; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - + case SEQ_CHECK: + out[*out_pos] = coder->check.buffer[coder->check_pos]; ++*out_pos; - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_uncompressed_size_in_footer) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->uncompressed_size, - &coder->pos, 1, - out, out_pos, out_size); - - // Updating the size this way instead of doing in a - // single chunk using lzma_vli_size(), because this - // way we detect when exactly we are going out of - // our limits. - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - coder->pos = 0; - } + if (++coder->check_pos + == lzma_check_sizes[coder->options->check]) + return LZMA_STREAM_END; - coder->backward_size = coder->total_size; - coder->sequence = SEQ_BACKWARD_SIZE; break; - case SEQ_BACKWARD_SIZE: - if (coder->options->has_backward_size) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->backward_size, &coder->pos, 1, - out, out_pos, out_size); - - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - } - - coder->sequence = SEQ_PADDING; - break; - - case SEQ_PADDING: - if (coder->options->handle_padding) { - assert(coder->options->total_size - != LZMA_VLI_VALUE_UNKNOWN); - - if (coder->total_size < coder->options->total_size) { - out[*out_pos] = 0x00; - ++*out_pos; - - if (update_size(&coder->total_size, 1, - coder->total_limit)) - return LZMA_DATA_ERROR; - - break; - } - } - - // Now also Total Size is known. Verify it. - if (!is_size_valid(coder->total_size, - coder->options->total_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - default: return LZMA_PROG_ERROR; } @@ -281,10 +177,9 @@ static lzma_ret block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // Validate some options. - if (validate_options_1(options) || validate_options_2(options) - || (options->handle_padding && options->total_size - == LZMA_VLI_VALUE_UNKNOWN)) + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -298,40 +193,19 @@ block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the check. - return_if_error(lzma_check_init(&next->coder->check, options->check)); - - // If End of Payload Marker is not used and Uncompressed Size is zero, - // Compressed Data is empty. That is, we don't call the encoder at all. - // We initialize it though; it allows detecting invalid options. - if (!options->has_eopm && options->uncompressed_size == 0) { - // Also Compressed Size must be zero if it has been - // given to us. - if (!is_size_valid(0, options->compressed_size)) - return LZMA_PROG_ERROR; - - next->coder->sequence = SEQ_CHECK_FINISH; - } else { - next->coder->sequence = SEQ_CODE; - } - - // Other initializations + // Basic initializations + next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); // Initialize the requested filters. return lzma_raw_encoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 7676c795..b9e072e0 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -21,353 +21,111 @@ #include "check.h" -struct lzma_coder_s { - lzma_options_block *options; - - enum { - SEQ_FLAGS_1, - SEQ_FLAGS_2, - SEQ_COMPRESSED_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_FILTER_FLAGS_INIT, - SEQ_FILTER_FLAGS_DECODE, - SEQ_CRC32, - SEQ_PADDING - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// CRC32 of the Block Header - uint32_t crc32; - - lzma_next_coder filter_flags_decoder; -}; - - -static bool -update_sequence(lzma_coder *coder) +static void +free_properties(lzma_options_block *options, lzma_allocator *allocator) { - switch (coder->sequence) { - case SEQ_FLAGS_2: - if (coder->options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_COMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_COMPRESSED_SIZE: - if (coder->options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - coder->pos = 0; - - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: - if (coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_FILTER_FLAGS_INIT; - break; - } - - if (coder->options->has_crc32) { - coder->pos = 0; - coder->sequence = SEQ_CRC32; - break; - } - - case SEQ_CRC32: - if (coder->options->padding != 0) { - coder->pos = 0; - coder->sequence = SEQ_PADDING; - break; - } - - return true; - - default: - assert(0); - return true; + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + lzma_free(options->filters[i].options, allocator); + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - return false; + return; } -static lzma_ret -block_header_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_block_header_decode(lzma_options_block *options, + lzma_allocator *allocator, const uint8_t *in) { - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FLAGS_1: - // Check that the reserved bit is unset. Use HEADER_ERROR - // because newer version of liblzma may support the reserved - // bit, although it is likely that this is just a broken file. - if (in[*in_pos] & 0x40) - return LZMA_HEADER_ERROR; - - // Number of filters: we prepare appropriate amount of - // variables for variable-length integer parsing. The - // initialization function has already reset the rest - // of the values to LZMA_VLI_VALUE_UNKNOWN, which allows - // us to later know how many filters there are. - for (int i = (int)(in[*in_pos] & 0x07) - 1; i >= 0; --i) - coder->options->filters[i].id = 0; - - // End of Payload Marker flag - coder->options->has_eopm = (in[*in_pos] & 0x08) != 0; - - // Compressed Size: Prepare for variable-length integer - // parsing if it is known. - if (in[*in_pos] & 0x10) - coder->options->compressed_size = 0; - - // Uncompressed Size: the same. - if (in[*in_pos] & 0x20) - coder->options->uncompressed_size = 0; - - // Is Metadata Block flag - coder->options->is_metadata = (in[*in_pos] & 0x80) != 0; - - // We need at least one: Uncompressed Size or EOPM. - if (coder->options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - && !coder->options->has_eopm) - return LZMA_DATA_ERROR; - - // Update header CRC32. - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - coder->sequence = SEQ_FLAGS_2; - break; - - case SEQ_FLAGS_2: - // Check that the reserved bits are unset. - if (in[*in_pos] & 0xE0) - return LZMA_DATA_ERROR; - - // Get the size of Header Padding. - coder->options->padding = in[*in_pos] & 0x1F; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - - case SEQ_COMPRESSED_SIZE: { - // Store the old input position to be used when - // updating coder->header_crc32. - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->compressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->compressed_reserve += in_used; - assert(coder->options->compressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->uncompressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->uncompressed_reserve += in_used; - assert(coder->options->uncompressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_FILTER_FLAGS_INIT: { - assert(coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN); - - const lzma_ret ret = lzma_filter_flags_decoder_init( - &coder->filter_flags_decoder, allocator, - &coder->options->filters[coder->pos]); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_FILTER_FLAGS_DECODE; + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or not over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: { - const size_t in_start = *in_pos; + size_t in_size = options->header_size; - const lzma_ret ret = coder->filter_flags_decoder.code( - coder->filter_flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - const size_t in_used = *in_pos - in_start; - coder->options->header_size += in_used; - coder->crc32 = lzma_crc32(in + in_start, - in_used, coder->crc32); + // Validate. The caller must have set options->header_size with + // lzma_block_header_size_decode() macro, so it is a programming error + // if these tests fail. + if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN + || in_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (in_size & 3) + || lzma_block_header_size_decode(in[0]) != in_size) + return LZMA_PROG_ERROR; - if (ret != LZMA_STREAM_END) - return ret; + // Exclude the CRC32 field. + in_size -= 4; - ++coder->pos; + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) + return LZMA_DATA_ERROR; - if (update_sequence(coder)) - return LZMA_STREAM_END; + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_HEADER_ERROR; - break; - } + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; - case SEQ_CRC32: - assert(coder->options->has_crc32); + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&options->compressed_size, + NULL, in, &in_pos, in_size)); - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1) return LZMA_DATA_ERROR; - ++*in_pos; - ++coder->pos; - - // Check if we reached end of the CRC32 field. - if (coder->pos == 4) { - coder->options->header_size += 4; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - } - - break; + options->compressed_size = (options->compressed_size + 1) * 4; - case SEQ_PADDING: - if (in[*in_pos] != 0x00) + // Check that Total Size (that is, size of + // Block Header + Compressed Data + Check) is + // representable as a VLI. + if (lzma_block_total_size_get(options) == 0) return LZMA_DATA_ERROR; - - ++*in_pos; - ++coder->options->header_size; - ++coder->pos; - - if (coder->pos < (size_t)(coder->options->padding)) - break; - - return LZMA_STREAM_END; - - default: - return LZMA_PROG_ERROR; + } else { + options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; } - return LZMA_OK; -} - - -static void -block_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->filter_flags_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &block_header_decode; - next->end = &block_header_decoder_end; - next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&options->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &options->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(options, allocator); + return ret; + } } - // Assume that Compressed Size and Uncompressed Size are unknown. - options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; - options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // We will calculate the sizes of these fields too so that the - // application may rewrite the header if it wishes so. - options->compressed_reserve = 0; - options->uncompressed_reserve = 0; + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(options, allocator); - // The Block Flags field is always present, so include its size here - // and we don't need to worry about it in block_header_decode(). - options->header_size = 2; - - // Reset filters[] to indicate empty list of filters. - // See SEQ_FLAGS_1 in block_header_decode() for reasoning of this. - for (size_t i = 0; i < 8; ++i) { - options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; - options->filters[i].options = NULL; + // Possibly some new field present so use + // LZMA_HEADER_ERROR instead of LZMA_DATA_ERROR. + return LZMA_HEADER_ERROR; + } } - next->coder->options = options; - next->coder->sequence = SEQ_FLAGS_1; - next->coder->pos = 0; - next->coder->crc32 = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_block_header_decoder(lzma_stream *strm, - lzma_options_block *options) -{ - lzma_next_strm_init(strm, lzma_block_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - return LZMA_OK; } diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 594b4fc0..ed0c88ba 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -24,188 +24,129 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_options_block *options) { - // Block Flags take two bytes. - size_t size = 2; + // Block Header Size + Block Flags + CRC32. + size_t size = 1 + 1 + 4; // Compressed Size - if (!lzma_vli_is_valid(options->compressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->compressed_reserve != 0) { - // Make sure that the known Compressed Size fits into the - // reserved space. Note that lzma_vli_size() will return zero - // if options->compressed_size is LZMA_VLI_VALUE_UNKNOWN, so - // we don't need to handle that special case separately. - if (options->compressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->compressed_size) - > (size_t)(options->compressed_reserve)) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1 + || options->compressed_size == 0 + || (options->compressed_size & 3)) return LZMA_PROG_ERROR; - size += options->compressed_reserve; - - } else if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Compressed Size is known. We have already checked - // that is is a valid VLI, and since it isn't - // LZMA_VLI_VALUE_UNKNOWN, we can be sure that - // lzma_vli_size() will succeed. - size += lzma_vli_size(options->compressed_size); + size += lzma_vli_size(options->compressed_size / 4 - 1); } // Uncompressed Size - if (!lzma_vli_is_valid(options->uncompressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->uncompressed_reserve != 0) { - if (options->uncompressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->uncompressed_size) - > (size_t)(options->uncompressed_reserve)) + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + const size_t add = lzma_vli_size(options->uncompressed_size); + if (add == 0) return LZMA_PROG_ERROR; - size += options->uncompressed_reserve; - - } else if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - size += lzma_vli_size(options->uncompressed_size); + size += add; } // List of Filter Flags + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_PROG_ERROR; + for (size_t i = 0; options->filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 7) + if (i == 4) return LZMA_PROG_ERROR; - uint32_t tmp; - const lzma_ret ret = lzma_filter_flags_size(&tmp, - options->filters + i); - if (ret != LZMA_OK) - return ret; + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + options->filters + i)); - size += tmp; + size += add; } - // CRC32 - if (options->has_crc32) - size += 4; - - // Padding - int32_t padding; - if (options->padding == LZMA_BLOCK_HEADER_PADDING_AUTO) { - const uint32_t preferred = lzma_alignment_output( - options->filters, 1); - const uint32_t unaligned = size + options->alignment; - padding = (int32_t)(unaligned % preferred); - if (padding != 0) - padding = preferred - padding; - } else if (options->padding >= LZMA_BLOCK_HEADER_PADDING_MIN - && options->padding <= LZMA_BLOCK_HEADER_PADDING_MAX) { - padding = options->padding; - } else { - return LZMA_PROG_ERROR; - } + // Pad to a multiple of four bytes. + options->header_size = (size + 3) & ~(size_t)(3); - // All success. Copy the calculated values to the options structure. - options->padding = padding; - options->header_size = size + (size_t)(padding); + // NOTE: We don't verify that Total Size of the Block stays within + // limits. This is because it is possible that we are called with + // exaggerated values to reserve space for Block Header, and later + // called again with lower, real values. return LZMA_OK; } extern LZMA_API lzma_ret -lzma_block_header_encode(uint8_t *out, const lzma_options_block *options) +lzma_block_header_encode(const lzma_options_block *options, uint8_t *out) { - // We write the Block Flags later. - if (options->header_size < 2) + if ((options->header_size & 3) + || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) return LZMA_PROG_ERROR; - const size_t out_size = options->header_size; + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = options->header_size - 4; + + // Store the Block Header Size. + out[0] = out_size / 4; + + // We write Block Flags a little later. size_t out_pos = 2; // Compressed Size - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) { - const lzma_vli size = options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->compressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->compressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Compressed Size must be non-zero, fit into a 63-bit + // integer and be a multiple of four. Also the Total Size + // of the Block must fit into 63-bit integer. + if (options->compressed_size == 0 + || (options->compressed_size & 3) + || options->compressed_size + > LZMA_VLI_VALUE_MAX + || lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; + return_if_error(lzma_vli_encode( + options->compressed_size / 4 - 1, NULL, + out, &out_pos, out_size)); } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) { - const lzma_vli size = options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->uncompressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->uncompressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + return_if_error(lzma_vli_encode( + options->uncompressed_size, NULL, + out, &out_pos, out_size)); // Filter Flags - size_t filter_count; - for (filter_count = 0; options->filters[filter_count].id - != LZMA_VLI_VALUE_UNKNOWN; ++filter_count) { - // There can be at maximum of seven filters. - if (filter_count == 7) - return LZMA_PROG_ERROR; - - const lzma_ret ret = lzma_filter_flags_encode(out, &out_pos, - out_size, options->filters + filter_count); - // FIXME: Don't return LZMA_BUF_ERROR. - if (ret != LZMA_OK) - return ret; - } - - // Block Flags 1 - out[0] = filter_count; - - if (options->has_eopm) - out[0] |= 0x08; - else if (options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) return LZMA_PROG_ERROR; - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) - out[0] |= 0x10; - - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) - out[0] |= 0x20; + size_t filter_count = 0; + do { + // There can be at maximum of four filters. + if (filter_count == 4) + return LZMA_PROG_ERROR; - if (options->is_metadata) - out[0] |= 0x80; + return_if_error(lzma_filter_flags_encode(out, &out_pos, + out_size, options->filters + filter_count)); - // Block Flags 2 - if (options->padding < LZMA_BLOCK_HEADER_PADDING_MIN - || options->padding > LZMA_BLOCK_HEADER_PADDING_MAX) - return LZMA_PROG_ERROR; + } while (options->filters[++filter_count].id + != LZMA_VLI_VALUE_UNKNOWN); - out[1] = (uint8_t)(options->padding); + // Block Flags + out[1] = filter_count - 1; - // CRC32 - if (options->has_crc32) { - if (out_size - out_pos < 4) - return LZMA_PROG_ERROR; + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x40; - const uint32_t crc = lzma_crc32(out, out_pos, 0); - for (size_t i = 0; i < 4; ++i) - out[out_pos++] = crc >> (i * 8); - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x80; - // Padding - the amount of available space must now match with - // the size of the Padding field. - if (out_size - out_pos != (size_t)(options->padding)) - return LZMA_PROG_ERROR; + // Padding + memzero(out + out_pos, out_size - out_pos); - memzero(out + out_pos, (size_t)(options->padding)); + // CRC32 + integer_write_32(out + out_size, lzma_crc32(out, out_size, 0)); return LZMA_OK; } diff --git a/src/liblzma/common/block_private.h b/src/liblzma/common/block_private.h index 16d95b9f..235e96b8 100644 --- a/src/liblzma/common/block_private.h +++ b/src/liblzma/common/block_private.h @@ -22,6 +22,7 @@ #include "common.h" + static inline bool update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) { @@ -43,54 +44,4 @@ is_size_valid(lzma_vli size, lzma_vli reference) return reference == LZMA_VLI_VALUE_UNKNOWN || reference == size; } - -/// If any of these tests fail, the caller has to return LZMA_PROG_ERROR. -static inline bool -validate_options_1(const lzma_options_block *options) -{ - return options == NULL - || !lzma_vli_is_valid(options->compressed_size) - || !lzma_vli_is_valid(options->uncompressed_size) - || !lzma_vli_is_valid(options->total_size) - || !lzma_vli_is_valid(options->total_limit) - || !lzma_vli_is_valid(options->uncompressed_limit); -} - - -/// If any of these tests fail, the encoder has to return LZMA_PROG_ERROR -/// because something is going horribly wrong if such values get passed -/// to the encoder. In contrast, the decoder has to return LZMA_DATA_ERROR, -/// since these tests failing indicate that something is wrong in the Stream. -static inline bool -validate_options_2(const lzma_options_block *options) -{ - if ((options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - && options->uncompressed_size - > options->uncompressed_limit) - || (options->total_size != LZMA_VLI_VALUE_UNKNOWN - && options->total_size - > options->total_limit) - || (!options->has_eopm && options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - || options->header_size > options->total_size) - return true; - - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Calculate a rough minimum possible valid Total Size of - // this Block, and check that total_size and total_limit - // are big enough. Note that the real minimum size can be - // bigger due to the Check, Uncompressed Size, Backwards - // Size, pr Padding being present. A rough check here is - // enough for us to catch the most obvious errors as early - // as possible. - const lzma_vli total_min = options->compressed_size - + (lzma_vli)(options->header_size); - if (total_min > options->total_size - || total_min > options->total_limit) - return true; - } - - return false; -} - #endif diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c new file mode 100644 index 00000000..6bffc2f1 --- /dev/null +++ b/src/liblzma/common/block_util.c @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_options_block +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_block_total_size_set(lzma_options_block *options, lzma_vli total_size) +{ + // Validate. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX + || (total_size & 3)) + return LZMA_PROG_ERROR; + + const uint32_t container_size = options->header_size + + lzma_check_sizes[options->check]; + + // Validate that Compressed Size will be greater than zero. + if (container_size <= total_size) + return LZMA_DATA_ERROR; + + options->compressed_size = total_size - container_size; + + return LZMA_OK; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size_get(const lzma_options_block *options) +{ + // Validate the values that we are interested in. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // Total Size either. + if (options->compressed_size == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_VLI_VALUE_UNKNOWN; + + const lzma_vli total_size = options->compressed_size + + options->header_size + + lzma_check_sizes[options->check]; + + // Validate the calculated Total Size. + if (options->compressed_size > LZMA_VLI_VALUE_MAX + || (options->compressed_size & 3) + || total_size > LZMA_VLI_VALUE_MAX) + return 0; + + return total_size; +} diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 5dd7a87f..4f30427d 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -21,6 +21,7 @@ #define LZMA_COMMON_H #include "../../common/sysdefs.h" +#include "../../common/integer.h" // Don't use ifdef... #if HAVE_VISIBILITY @@ -30,6 +31,17 @@ #endif +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + /// Size of temporary buffers needed in some filters #define LZMA_BUFFER_SIZE 4096 @@ -117,10 +129,6 @@ struct lzma_filter_info_s { /// Pointer to filter's options structure void *options; - - /// Uncompressed size of the filter, or LZMA_VLI_VALUE_UNKNOWN - /// if unknown. - lzma_vli uncompressed_size; }; @@ -158,20 +166,6 @@ extern void lzma_next_coder_end(lzma_next_coder *next, lzma_allocator *allocator); -extern lzma_ret lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options); - -extern lzma_ret lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options); - -extern lzma_ret lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); - -extern lzma_ret lzma_stream_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer); - - /// \brief Wrapper for memcpy() /// /// This function copies as much data as possible from in[] to out[] and @@ -225,6 +219,13 @@ do { \ lzma_next_coder_init2(next, allocator, \ func, func, allocator, __VA_ARGS__) +/// \brief Initializing lzma_next_coder +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_coder_init0(func, next, allocator) \ + lzma_next_coder_init2(next, allocator, func, func, allocator) + /// \brief Initializing lzma_stream /// @@ -254,6 +255,13 @@ do { \ #define lzma_next_strm_init(strm, func, ...) \ lzma_next_strm_init2(strm, func, func, (strm)->allocator, __VA_ARGS__) +/// \brief Initializing lzma_stream +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_strm_init0(strm, func) \ + lzma_next_strm_init2(strm, func, func, (strm)->allocator) + /// \brief Return if expression doesn't evaluate to LZMA_OK /// diff --git a/src/liblzma/common/copy_coder.c b/src/liblzma/common/copy_coder.c deleted file mode 100644 index 0bd674f6..00000000 --- a/src/liblzma/common/copy_coder.c +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.c -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "copy_coder.h" - - -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - -#ifdef HAVE_ENCODER -static lzma_ret -copy_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // If we aren't the last filter in the chain, the Copy filter - // is totally useless. Note that it is job of the next coder to - // take care of Uncompressed Size, so we don't need to update our - // coder->uncompressed_size at all. - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - // We are the last coder in the chain. - // Just copy as much data as possible. - bufcpy(in, in_pos, in_size, out, out_pos, out_size); - - // LZMA_SYNC_FLUSH and LZMA_FINISH are the same thing for us. - if (action != LZMA_RUN && *in_pos == in_size) - return LZMA_STREAM_END; - - return LZMA_OK; -} -#endif - - -#ifdef HAVE_DECODER -static lzma_ret -copy_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - - const size_t in_avail = in_size - *in_pos; - - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_avail) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - // We are the last coder in the chain. - // Just copy as much data as possible. - const size_t in_used = bufcpy( - in, in_pos, in_size, out, out_pos, out_size); - - // Update uncompressed_size if it is known. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= in_used; - - return coder->uncompressed_size == 0 ? LZMA_STREAM_END : LZMA_OK; -} -#endif - - -static void -copy_coder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -copy_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters, lzma_code_function encode) -{ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = encode; - next->end = ©_coder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - // Copy Uncompressed Size which is used to limit the output size. - next->coder->uncompressed_size = filters[0].uncompressed_size; - - // Initialize the next decoder in the chain, if any. - return lzma_next_filter_init( - &next->coder->next, allocator, filters + 1); -} - - -#ifdef HAVE_ENCODER -extern lzma_ret -lzma_copy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_encode); -} -#endif - - -#ifdef HAVE_DECODER -extern lzma_ret -lzma_copy_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_decode); -} -#endif diff --git a/src/liblzma/common/copy_coder.h b/src/liblzma/common/copy_coder.h deleted file mode 100644 index b8d0295d..00000000 --- a/src/liblzma/common/copy_coder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.h -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_COPY_CODER_H -#define LZMA_COPY_CODER_H - -#include "common.h" - -extern lzma_ret lzma_copy_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -extern lzma_ret lzma_copy_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -#endif diff --git a/src/liblzma/common/delta_common.c b/src/liblzma/common/delta_common.c index de27b5a6..acd31e14 100644 --- a/src/liblzma/common/delta_common.c +++ b/src/liblzma/common/delta_common.c @@ -47,10 +47,6 @@ lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // Coding function is different for encoder and decoder. next->code = code; - // Copy Uncompressed Size which is used to limit the output size - // in the Delta decoder. - next->coder->uncompressed_size = filters[0].uncompressed_size; - // Set the delta distance. if (filters[0].options == NULL) return LZMA_PROG_ERROR; diff --git a/src/liblzma/common/delta_common.h b/src/liblzma/common/delta_common.h index 3ec955b7..1d58899d 100644 --- a/src/liblzma/common/delta_common.h +++ b/src/liblzma/common/delta_common.h @@ -26,10 +26,6 @@ struct lzma_coder_s { /// Next coder in the chain lzma_next_coder next; - /// Uncompressed size - This is needed when we are the last - /// filter in the chain. - lzma_vli uncompressed_size; - /// Delta distance size_t distance; diff --git a/src/liblzma/common/delta_decoder.c b/src/liblzma/common/delta_decoder.c index af2b840d..8f5a4cbf 100644 --- a/src/liblzma/common/delta_decoder.c +++ b/src/liblzma/common/delta_decoder.c @@ -21,26 +21,8 @@ #include "delta_common.h" -/// Copies and decodes the data at the same time. This is used when Delta -/// is the last filter in the chain. static void -copy_and_decode(lzma_coder *coder, - const uint8_t *restrict in, uint8_t *restrict out, size_t size) -{ - const size_t distance = coder->distance; - - for (size_t i = 0; i < size; ++i) { - out[i] = in[i] + coder->history[ - (distance + coder->pos) & 0xFF]; - coder->history[coder->pos-- & 0xFF] = out[i]; - } -} - - -/// Decodes the data in place. This is used when we are not the last filter -/// in the chain. -static void -decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) { const size_t distance = coder->distance; @@ -51,44 +33,21 @@ decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) } - static lzma_ret delta_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - lzma_ret ret; - - if (coder->next.code == NULL) { - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_size - *in_pos) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - const size_t in_avail = in_size - *in_pos; - const size_t out_avail = out_size - *out_pos; - const size_t size = MIN(in_avail, out_avail); - - copy_and_decode(coder, in + *in_pos, out + *out_pos, size); + assert(coder->next.code != NULL); - *in_pos += size; - *out_pos += size; + const size_t out_start = *out_pos; - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - coder->uncompressed_size -= size; + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); - ret = coder->uncompressed_size == 0 - ? LZMA_STREAM_END : LZMA_OK; - - } else { - const size_t out_start = *out_pos; - - ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - decode_in_place(coder, out + out_start, *out_pos - out_start); - } + decode_buffer(coder, out + out_start, *out_pos - out_start); return ret; } diff --git a/src/liblzma/common/delta_encoder.c b/src/liblzma/common/delta_encoder.c index b94f92de..a8bb9341 100644 --- a/src/liblzma/common/delta_encoder.c +++ b/src/liblzma/common/delta_encoder.c @@ -22,7 +22,8 @@ /// Copies and encodes the data at the same time. This is used when Delta -/// is the last filter in the chain. +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). static void copy_and_encode(lzma_coder *coder, const uint8_t *restrict in, uint8_t *restrict out, size_t size) @@ -38,8 +39,8 @@ copy_and_encode(lzma_coder *coder, } -/// Encodes the data in place. This is used when we are not the last filter -/// in the chain. +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). static void encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) { diff --git a/src/liblzma/common/easy_multi.c b/src/liblzma/common/easy.c index 15778fab..6c258204 100644 --- a/src/liblzma/common/easy_multi.c +++ b/src/liblzma/common/easy.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_multi.c -/// \brief Easy Multi-Block Stream encoder initialization +/// \file easy.c +/// \brief Easy Stream encoder initialization // // Copyright (C) 2008 Lasse Collin // @@ -17,23 +17,50 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "easy_common.h" -#include "stream_encoder_multi.h" +#include "stream_encoder.h" struct lzma_coder_s { - lzma_next_coder encoder; - lzma_options_stream options; + lzma_next_coder stream_encoder; + + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_options_filter filters[5]; }; +static bool +easy_set_filters(lzma_options_filter *filters, uint32_t level) +{ + bool error = false; + + if (level == 0) { + // TODO FIXME Use Subblock or LZMA2 with no compression. + error = true; + +#ifdef HAVE_FILTER_LZMA + } else if (level <= 9) { + filters[0].id = LZMA_FILTER_LZMA; + filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); + filters[1].id = LZMA_VLI_VALUE_UNKNOWN; +#endif + + } else { + error = true; + } + + return error; +} + + static lzma_ret easy_encode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - return coder->encoder.code(coder->encoder.coder, allocator, + return coder->stream_encoder.code( + coder->stream_encoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); } @@ -41,7 +68,7 @@ easy_encode(lzma_coder *coder, lzma_allocator *allocator, static void easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) { - lzma_next_coder_end(&coder->encoder, allocator); + lzma_next_coder_end(&coder->stream_encoder, allocator); lzma_free(coder, allocator); return; } @@ -49,8 +76,7 @@ easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) + lzma_easy_level level) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -60,39 +86,21 @@ easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->code = &easy_encode; next->end = &easy_encoder_end; - next->coder->encoder = LZMA_NEXT_CODER_INIT; + next->coder->stream_encoder = LZMA_NEXT_CODER_INIT; } - next->coder->options = (lzma_options_stream){ - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - .header = header, - .footer = footer, - }; - - if (lzma_easy_set_filters(next->coder->options.filters, level) - || lzma_easy_set_filters( - next->coder->options.metadata_filters, - metadata_level)) + if (easy_set_filters(next->coder->filters, level)) return LZMA_HEADER_ERROR; - return lzma_stream_encoder_multi_init(&next->coder->encoder, - allocator, &next->coder->options); + return lzma_stream_encoder_init(&next->coder->stream_encoder, + allocator, next->coder->filters, LZMA_CHECK_CRC32); } extern LZMA_API lzma_ret -lzma_easy_encoder_multi(lzma_stream *strm, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) +lzma_easy_encoder(lzma_stream *strm, lzma_easy_level level) { - // This is more complicated than lzma_easy_encoder_single(), - // because lzma_stream_encoder_multi() wants that the options - // structure is available until the encoding is finished. - lzma_next_strm_init(strm, easy_encoder_init, - level, metadata_level, header, footer); + lzma_next_strm_init(strm, easy_encoder_init, level); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; @@ -101,3 +109,14 @@ lzma_easy_encoder_multi(lzma_stream *strm, return LZMA_OK; } + + +extern LZMA_API uint32_t +lzma_easy_memory_usage(lzma_easy_level level) +{ + lzma_options_filter filters[5]; + if (easy_set_filters(filters, level)) + return UINT32_MAX; + + return lzma_memory_usage(filters, true); +} diff --git a/src/liblzma/common/easy_common.c b/src/liblzma/common/easy_common.c deleted file mode 100644 index e0c12a52..00000000 --- a/src/liblzma/common/easy_common.c +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "easy_common.h" - - -extern bool -lzma_easy_set_filters(lzma_options_filter *filters, uint32_t level) -{ - bool error = false; - - if (level == 0) { - filters[0].id = LZMA_VLI_VALUE_UNKNOWN; - -#ifdef HAVE_FILTER_LZMA - } else if (level <= 9) { - filters[0].id = LZMA_FILTER_LZMA; - filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); - filters[1].id = LZMA_VLI_VALUE_UNKNOWN; -#endif - - } else { - error = true; - } - - return error; -} - - -extern LZMA_API uint32_t -lzma_easy_memory_usage(lzma_easy_level level) -{ - lzma_options_filter filters[8]; - if (lzma_easy_set_filters(filters, level)) - return UINT32_MAX; - - return lzma_memory_usage(filters, true); -} diff --git a/src/liblzma/common/extra.c b/src/liblzma/common/extra.c deleted file mode 100644 index c532abb0..00000000 --- a/src/liblzma/common/extra.c +++ /dev/null @@ -1,34 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file extra.c -/// \brief Handling of Extra in Metadata -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API void -lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator) -{ - while (extra != NULL) { - lzma_extra *tmp = extra->next; - lzma_free(extra->data, allocator); - lzma_free(extra, allocator); - extra = tmp; - } - - return; -} diff --git a/src/liblzma/common/features.c b/src/liblzma/common/features.c index 33b2e0a2..a02949d9 100644 --- a/src/liblzma/common/features.c +++ b/src/liblzma/common/features.c @@ -21,10 +21,6 @@ static const lzma_vli filters[] = { -#ifdef HAVE_FILTER_COPY - LZMA_FILTER_COPY, -#endif - #ifdef HAVE_FILTER_SUBBLOCK LZMA_FILTER_SUBBLOCK, #endif diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c index 515f9346..498b2ad6 100644 --- a/src/liblzma/common/filter_flags_decoder.c +++ b/src/liblzma/common/filter_flags_decoder.c @@ -21,362 +21,188 @@ #include "lzma_decoder.h" -struct lzma_coder_s { - lzma_options_filter *options; - - enum { - SEQ_MISC, - SEQ_ID, - SEQ_SIZE, - SEQ_PROPERTIES, - } sequence; - - /// \brief Position in variable-length integers - size_t pos; - - /// \brief Size of Filter Properties - lzma_vli properties_size; -}; - - #ifdef HAVE_FILTER_SUBBLOCK static lzma_ret -properties_subblock(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in lzma_attribute((unused)), - size_t *in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused))) +properties_subblock(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props lzma_attribute((unused)), + size_t prop_size lzma_attribute((unused))) { - if (coder->properties_size != 0) + if (prop_size != 0) return LZMA_HEADER_ERROR; - coder->options->options = lzma_alloc( + options->options = lzma_alloc( sizeof(lzma_options_subblock), allocator); - if (coder->options->options == NULL) + if (options->options == NULL) return LZMA_MEM_ERROR; - ((lzma_options_subblock *)(coder->options->options)) - ->allow_subfilters = true; - return LZMA_STREAM_END; + ((lzma_options_subblock *)(options->options))->allow_subfilters = true; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_SIMPLE static lzma_ret -properties_simple(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_simple(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size == 0) - return LZMA_STREAM_END; + if (prop_size == 0) + return LZMA_OK; - if (coder->properties_size != 4) + if (prop_size != 4) return LZMA_HEADER_ERROR; - lzma_options_simple *options = coder->options->options; - - if (options == NULL) { - options = lzma_alloc(sizeof(lzma_options_simple), allocator); - if (options == NULL) - return LZMA_MEM_ERROR; - - options->start_offset = 0; - coder->options->options = options; - } - - while (coder->pos < 4) { - if (*in_pos == in_size) - return LZMA_OK; + lzma_options_simple *simple = lzma_alloc( + sizeof(lzma_options_simple), allocator); + if (simple == NULL) + return LZMA_MEM_ERROR; - options->start_offset - |= (uint32_t)(in[*in_pos]) << (8 * coder->pos); - ++*in_pos; - ++coder->pos; - } + simple->start_offset = integer_read_32(props); // Don't leave an options structure allocated if start_offset is zero. - if (options->start_offset == 0) { - lzma_free(options, allocator); - coder->options->options = NULL; - } + if (simple->start_offset == 0) + lzma_free(simple, allocator); + else + options->options = simple; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_DELTA static lzma_ret -properties_delta(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_delta(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size != 1) + if (prop_size != 1) return LZMA_HEADER_ERROR; - if (*in_pos == in_size) - return LZMA_OK; - - lzma_options_delta *options = lzma_alloc( - sizeof(lzma_options_delta), allocator); - if (options == NULL) + options->options = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (options->options == NULL) return LZMA_MEM_ERROR; - coder->options->options = options; - - options->distance = (uint32_t)(in[*in_pos]) + 1; - ++*in_pos; + ((lzma_options_delta *)(options->options))->distance + = (uint32_t)(props[0]) + 1; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_LZMA static lzma_ret -properties_lzma(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_lzma(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { // LZMA properties are always two bytes (at least for now). - if (coder->properties_size != 2) + if (prop_size != 2) return LZMA_HEADER_ERROR; - assert(coder->pos < 2); - - while (*in_pos < in_size) { - switch (coder->pos) { - case 0: - // Allocate the options structure. - coder->options->options = lzma_alloc( - sizeof(lzma_options_lzma), allocator); - if (coder->options->options == NULL) - return LZMA_MEM_ERROR; - - // Decode lc, lp, and pb. - if (lzma_lzma_decode_properties( - coder->options->options, in[*in_pos])) - return LZMA_HEADER_ERROR; - - ++*in_pos; - ++coder->pos; - break; - - case 1: { - lzma_options_lzma *options = coder->options->options; - - // Check that reserved bits are unset. - if (in[*in_pos] & 0xC0) - return LZMA_HEADER_ERROR; - - // Decode the dictionary size. See the file format - // specification section 4.3.4.2 to understand this. - if (in[*in_pos] == 0) { - options->dictionary_size = 1; - - } else if (in[*in_pos] > 59) { - // Dictionary size is over 1 GiB. - // It's not supported at the moment. - return LZMA_HEADER_ERROR; -# if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 -# error Update the if()-condition a few lines -# error above to match LZMA_DICTIONARY_SIZE_MAX. -# endif - - } else { - options->dictionary_size - = 2 | ((in[*in_pos] + 1) & 1); - options->dictionary_size - <<= (in[*in_pos] - 1) / 2; - } - - ++*in_pos; - return LZMA_STREAM_END; - } - } - } + lzma_options_lzma *lzma = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (lzma == NULL) + return LZMA_MEM_ERROR; + + // Decode lc, lp, and pb. + if (lzma_lzma_decode_properties(lzma, props[0])) + goto error; + + // Check that reserved bits are unset. + if (props[1] & 0xC0) + goto error; + + // Decode the dictionary size. + // FIXME The specification says that maximum is 4 GiB. + if (props[1] > 36) + goto error; +#if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 +# error Update the if()-condition a few lines +# error above to match LZMA_DICTIONARY_SIZE_MAX. +#endif + + lzma->dictionary_size = 2 | (props[1] & 1); + lzma->dictionary_size <<= props[1] / 2 + 11; - assert(coder->pos < 2); + options->options = lzma; return LZMA_OK; + +error: + lzma_free(lzma, allocator); + return LZMA_HEADER_ERROR; } #endif -static lzma_ret -filter_flags_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_filter_flags_decode( + lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) { - while (*in_pos < in_size || coder->sequence == SEQ_PROPERTIES) - switch (coder->sequence) { - case SEQ_MISC: - // Determine the Filter ID and Size of Filter Properties. - if (in[*in_pos] >= 0xE0) { - // Using External ID. Prepare the ID - // for variable-length integer parsing. - coder->options->id = 0; - - if (in[*in_pos] == 0xFF) { - // Mark that Size of Filter Properties is - // unknown, so we know later that there is - // external Size of Filter Properties present. - coder->properties_size - = LZMA_VLI_VALUE_UNKNOWN; - } else { - // Take Size of Filter Properties from Misc. - coder->properties_size = in[*in_pos] - 0xE0; - } - - coder->sequence = SEQ_ID; - - } else { - // The Filter ID is the same as Misc. - coder->options->id = in[*in_pos]; - - // The Size of Filter Properties can be calculated - // from Misc too. - coder->properties_size = in[*in_pos] / 0x20; - - coder->sequence = SEQ_PROPERTIES; - } - - ++*in_pos; - break; + // Set the pointer to NULL so the caller can always safely free it. + options->options = NULL; - case SEQ_ID: { - const lzma_ret ret = lzma_vli_decode(&coder->options->id, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->properties_size == LZMA_VLI_VALUE_UNKNOWN) { - // We have also external Size of Filter - // Properties. Prepare the size for - // variable-length integer parsing. - coder->properties_size = 0; - coder->sequence = SEQ_SIZE; - } else { - coder->sequence = SEQ_PROPERTIES; - } - - // Reset pos for its next job. - coder->pos = 0; - break; - } + // Filter ID + return_if_error(lzma_vli_decode(&options->id, NULL, + in, in_pos, in_size)); - case SEQ_SIZE: { - const lzma_ret ret = lzma_vli_decode(&coder->properties_size, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; + // Size of Properties + lzma_vli prop_size; + return_if_error(lzma_vli_decode(&prop_size, NULL, + in, in_pos, in_size)); - coder->pos = 0; - coder->sequence = SEQ_PROPERTIES; - break; - } + // Check that we have enough input. + if (prop_size > in_size - *in_pos) + return LZMA_DATA_ERROR; - case SEQ_PROPERTIES: { - lzma_ret (*get_properties)(lzma_coder *coder, - lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size); + // Determine the function to decode the properties. + lzma_ret (*get_properties)(lzma_options_filter *options, + lzma_allocator *allocator, const uint8_t *props, + size_t prop_size); - switch (coder->options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return coder->properties_size > 0 - ? LZMA_HEADER_ERROR : LZMA_STREAM_END; -#endif + switch (options->id) { #ifdef HAVE_FILTER_SUBBLOCK - case LZMA_FILTER_SUBBLOCK: - get_properties = &properties_subblock; - break; + case LZMA_FILTER_SUBBLOCK: + get_properties = &properties_subblock; + break; #endif #ifdef HAVE_FILTER_SIMPLE # ifdef HAVE_FILTER_X86 - case LZMA_FILTER_X86: + case LZMA_FILTER_X86: # endif # ifdef HAVE_FILTER_POWERPC - case LZMA_FILTER_POWERPC: + case LZMA_FILTER_POWERPC: # endif # ifdef HAVE_FILTER_IA64 - case LZMA_FILTER_IA64: + case LZMA_FILTER_IA64: # endif # ifdef HAVE_FILTER_ARM - case LZMA_FILTER_ARM: + case LZMA_FILTER_ARM: # endif # ifdef HAVE_FILTER_ARMTHUMB - case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_ARMTHUMB: # endif # ifdef HAVE_FILTER_SPARC - case LZMA_FILTER_SPARC: + case LZMA_FILTER_SPARC: # endif - get_properties = &properties_simple; - break; + get_properties = &properties_simple; + break; #endif #ifdef HAVE_FILTER_DELTA - case LZMA_FILTER_DELTA: - get_properties = &properties_delta; - break; + case LZMA_FILTER_DELTA: + get_properties = &properties_delta; + break; #endif #ifdef HAVE_FILTER_LZMA - case LZMA_FILTER_LZMA: - get_properties = &properties_lzma; - break; + case LZMA_FILTER_LZMA: + get_properties = &properties_lzma; + break; #endif - default: - return LZMA_HEADER_ERROR; - } - - return get_properties(coder, allocator, in, in_pos, in_size); - } - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -filter_flags_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &filter_flags_decode; - next->end = &filter_flags_decoder_end; + return LZMA_HEADER_ERROR; } - options->id = 0; - options->options = NULL; - - next->coder->options = options; - next->coder->sequence = SEQ_MISC; - next->coder->pos = 0; - next->coder->properties_size = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_filter_flags_decoder(lzma_stream *strm, lzma_options_filter *options) -{ - lzma_next_strm_init(strm, lzma_filter_flags_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; + const uint8_t *props = in + *in_pos; + *in_pos += prop_size; + return get_properties(options, allocator, props, prop_size); } diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c index 2d11dd3a..45fbbb00 100644 --- a/src/liblzma/common/filter_flags_encoder.c +++ b/src/liblzma/common/filter_flags_encoder.c @@ -22,22 +22,13 @@ #include "fastpos.h" -/// \brief Calculates the size of the Filter Properties field -/// -/// This currently can return only LZMA_OK or LZMA_HEADER_ERROR, but -/// with some new filters it may return also LZMA_PROG_ERROR. +/// Calculate the size of the Filter Properties field static lzma_ret get_properties_size(uint32_t *size, const lzma_options_filter *options) { lzma_ret ret = LZMA_OK; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - *size = 0; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: *size = 0; @@ -100,40 +91,14 @@ get_properties_size(uint32_t *size, const lzma_options_filter *options) extern LZMA_API lzma_ret lzma_filter_flags_size(uint32_t *size, const lzma_options_filter *options) { - // Get size of Filter Properties. + // Get size of Filter Properties. This also validates the Filter ID. uint32_t prop_size; - const lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Size of Filter ID field if it exists. - size_t id_size; - size_t prop_size_size; - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - id_size = 0; - prop_size_size = 0; - - } else { - // At least Filter ID is stored using the External ID field. - id_size = lzma_vli_size(options->id); - if (id_size == 0) - return LZMA_PROG_ERROR; - - if (prop_size <= 30) { - // Size of Filter Properties fits into Misc still. - prop_size_size = 0; - } else { - // The Size of Filter Properties field is used too. - prop_size_size = lzma_vli_size(prop_size); - if (prop_size_size == 0) - return LZMA_PROG_ERROR; - } - } + return_if_error(get_properties_size(&prop_size, options)); - // 1 is for the Misc field. - *size = 1 + id_size + prop_size_size + prop_size; + // Calculate the size of the Filter ID and Size of Properties fields. + // These cannot fail since get_properties_size() already succeeded. + *size = lzma_vli_size(options->id) + lzma_vli_size(prop_size) + + prop_size; return LZMA_OK; } @@ -149,10 +114,10 @@ properties_simple(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_OK; if (out_size - *out_pos < 4) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; - for (size_t i = 0; i < 4; ++i) - out[(*out_pos)++] = options->start_offset >> (i * 8); + integer_write_32(out + *out_pos, options->start_offset); + *out_pos += 4; return LZMA_OK; } @@ -175,7 +140,7 @@ properties_delta(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_HEADER_ERROR; if (out_size - *out_pos < 1) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; out[*out_pos] = options->distance - LZMA_DELTA_DISTANCE_MIN; ++*out_pos; @@ -195,7 +160,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_PROG_ERROR; if (out_size - *out_pos < 2) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; // LZMA Properties if (lzma_lzma_encode_properties(options, out + *out_pos)) @@ -230,7 +195,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, ++d; // Get the highest two bits using the proper encoding: - out[*out_pos] = get_pos_slot(d) - 1; + out[*out_pos] = get_pos_slot(d) - 24; ++*out_pos; return LZMA_OK; @@ -250,58 +215,19 @@ lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, size_t out_size, // Get size of Filter Properties. uint32_t prop_size; - lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Misc, External ID, and Size of Properties - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - out[*out_pos] = options->id; - ++*out_pos; - - } else if (prop_size <= 30) { - // Size of Filter Properties fits into Misc. - out[*out_pos] = prop_size + 0xE0; - ++*out_pos; - - // External ID is used to encode the Filter ID. If encoding - // the VLI fails, it's because the caller has given as too - // little output space, which it should have checked already. - // So return LZMA_PROG_ERROR, not LZMA_BUF_ERROR. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } else { - // Nothing fits into Misc. - out[*out_pos] = 0xFF; - ++*out_pos; - - // External ID is used to encode the Filter ID. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - // External Size of Filter Properties - dummy = 0; - if (lzma_vli_encode(prop_size, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + return_if_error(get_properties_size(&prop_size, options)); + + // Filter ID + return_if_error(lzma_vli_encode(options->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + return_if_error(lzma_vli_encode(prop_size, NULL, + out, out_pos, out_size)); // Filter Properties + lzma_ret ret; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - assert(prop_size == 0); - ret = options->options == NULL ? LZMA_OK : LZMA_HEADER_ERROR; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: assert(prop_size == 0); diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 6816b37a..f01206de 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c -/// \brief Handling of Index in Metadata +/// \brief Handling of Index // // Copyright (C) 2007 Lasse Collin // @@ -17,124 +17,733 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "index.h" -/** - * \brief Duplicates an Index list - * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. - */ -extern LZMA_API lzma_index * -lzma_index_dup(const lzma_index *old_current, lzma_allocator *allocator) +/// Number of Records to allocate at once. +#define INDEX_GROUP_SIZE 256 + + +typedef struct lzma_index_group_s lzma_index_group; +struct lzma_index_group_s { + /// Next group + lzma_index_group *prev; + + /// Previous group + lzma_index_group *next; + + /// Index of the last Record in this group + size_t last; + + /// Total Size fields as cumulative sum relative to the beginning + /// of the group. The total size of the group is total_sums[last]. + lzma_vli total_sums[INDEX_GROUP_SIZE]; + + /// Uncompressed Size fields as cumulative sum relative to the + /// beginning of the group. The uncompressed size of the group is + /// uncompressed_sums[last]. + lzma_vli uncompressed_sums[INDEX_GROUP_SIZE]; + + /// True if the Record is padding + bool paddings[INDEX_GROUP_SIZE]; +}; + + +struct lzma_index_s { + /// Total size of the Blocks and padding + lzma_vli total_size; + + /// Uncompressed size of the Stream + lzma_vli uncompressed_size; + + /// Number of non-padding records. This is needed by Index encoder. + lzma_vli count; + + /// Size of the List of Records field; this is updated every time + /// a new non-padding Record is added. + lzma_vli index_list_size; + + /// This is zero if no Indexes have been combined with + /// lzma_index_cat(). With combined Indexes, this contains the sizes + /// of all but latest the Streams, including possible Stream Padding + /// fields. + lzma_vli padding_size; + + /// First group of Records + lzma_index_group *head; + + /// Last group of Records + lzma_index_group *tail; + + /// Tracking the read position + struct { + /// Group where the current read position is. + lzma_index_group *group; + + /// The most recently read record in *group + lzma_vli record; + + /// Uncompressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli uncompressed_offset; + + /// Compressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli stream_offset; + } current; + + /// Information about earlier Indexes when multiple Indexes have + /// been combined. + struct { + /// Sum of the Record counts of the all but the last Stream. + lzma_vli count; + + /// Sum of the List of Records fields of all but the last + /// Stream. This is needed when a new Index is concatenated + /// to this lzma_index structure. + lzma_vli index_list_size; + } old; +}; + + +static void +free_index_list(lzma_index *i, lzma_allocator *allocator) { - lzma_index *new_head = NULL; - lzma_index *new_current = NULL; + lzma_index_group *g = i->head; - while (old_current != NULL) { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) { - lzma_index_free(new_head, allocator); - return NULL; - } + while (g != NULL) { + lzma_index_group *tmp = g->next; + lzma_free(g, allocator); + g = tmp; + } - i->total_size = old_current->total_size; - i->uncompressed_size = old_current->uncompressed_size; - i->next = NULL; + return; +} - if (new_head == NULL) - new_head = i; - else - new_current->next = i; - new_current = i; - old_current = old_current->next; +extern LZMA_API lzma_index * +lzma_index_init(lzma_index *i, lzma_allocator *allocator) +{ + if (i == NULL) { + i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return NULL; + } else { + free_index_list(i, allocator); } - return new_head; + i->total_size = 0; + i->uncompressed_size = 0; + i->count = 0; + i->index_list_size = 0; + i->padding_size = 0; + i->head = NULL; + i->tail = NULL; + i->current.group = NULL; + i->old.count = 0; + i->old.index_list_size = 0; + + return i; } -/** - * \brief Frees an Index list - * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. - */ extern LZMA_API void -lzma_index_free(lzma_index *i, lzma_allocator *allocator) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) { - while (i != NULL) { - lzma_index *tmp = i->next; + if (i != NULL) { + free_index_list(i, allocator); lzma_free(i, allocator); - i = tmp; } return; } -/** - * \brief Calculates properties of an Index list - * - * - */ -extern LZMA_API lzma_ret -lzma_index_count(const lzma_index *i, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size) -{ - *count = 0; - *total_size = 0; - *uncompressed_size = 0; - - while (i != NULL) { - if (i->total_size == LZMA_VLI_VALUE_UNKNOWN) { - *total_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->total_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*total_size != LZMA_VLI_VALUE_UNKNOWN) { - *total_size += i->total_size; - if (*total_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; +extern LZMA_API lzma_vli +lzma_index_count(const lzma_index *i) +{ + return i->count; +} + + +extern LZMA_API lzma_vli +lzma_index_size(const lzma_index *i) +{ + return index_size(i->count, i->index_list_size); +} + + +extern LZMA_API lzma_vli +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API lzma_vli +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +extern LZMA_API lzma_vli +lzma_index_file_size(const lzma_index *i) +{ + // If multiple Streams are concatenated, the Stream Header, Index, + // and Stream Footer fields of all but the last Stream are already + // included in padding_size. Thus, we need to calculate only the + // size of the last Index, not all Indexes. + return i->total_size + i->padding_size + + index_size(i->count - i->old.count, + i->index_list_size - i->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; +} + + +extern LZMA_API lzma_vli +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) + - index_size_unpadded(i->count, i->index_list_size)) & 3; +} + + +/// Helper function for index_append() +static lzma_ret +index_append_real(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size, + bool is_padding) +{ + // Add the new record. + if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) { + // Allocate a new group. + lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + // Initialize the group and set its first record. + g->prev = i->tail; + g->next = NULL; + g->last = 0; + g->total_sums[0] = total_size; + g->uncompressed_sums[0] = uncompressed_size; + g->paddings[0] = is_padding; + + // If this is the first group, make it the head. + if (i->head == NULL) + i->head = g; + else + i->tail->next = g; + + // Make it the new tail. + i->tail = g; + + } else { + // i->tail has space left for at least one record. + i->tail->total_sums[i->tail->last + 1] + = i->tail->total_sums[i->tail->last] + + total_size; + i->tail->uncompressed_sums[i->tail->last + 1] + = i->tail->uncompressed_sums[i->tail->last] + + uncompressed_size; + i->tail->paddings[i->tail->last + 1] = is_padding; + ++i->tail->last; + } + + return LZMA_OK; +} + + +static lzma_ret +index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, + lzma_vli uncompressed_size, bool is_padding) +{ + if (total_size > LZMA_VLI_VALUE_MAX + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + // This looks a bit ugly. We want to first validate that the Index + // and Stream stay in valid limits after adding this Record. After + // validating, we may need to allocate a new lzma_index_group (it's + // slightly more correct to validate before allocating, YMMV). + lzma_ret ret; + + if (is_padding) { + assert(uncompressed_size == 0); + + // First update the info so we can validate it. + i->padding_size += total_size; + + if (i->padding_size > LZMA_VLI_VALUE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, true); + + // If something went wrong, undo the updated value. + if (ret != LZMA_OK) + i->padding_size -= total_size; + + } else { + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add + = lzma_vli_size(total_size / 4 - 1) + + lzma_vli_size(uncompressed_size); + + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; + + if (i->total_size > LZMA_VLI_VALUE_MAX + || i->uncompressed_size > LZMA_VLI_VALUE_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } + } + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size) +{ + return index_append(i, allocator, + total_size, uncompressed_size, false); +} + + +/// Initialize i->current to point to the first Record. +static bool +init_current(lzma_index *i) +{ + if (i->head == NULL) { + assert(i->count == 0); + return true; + } + + assert(i->count > 0); + + i->current.group = i->head; + i->current.record = 0; + i->current.stream_offset = LZMA_STREAM_HEADER_SIZE; + i->current.uncompressed_offset = 0; - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->uncompressed_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size += i->uncompressed_size; - if (*uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; + return false; +} + + +/// Go backward to the previous group. +static void +previous_group(lzma_index *i) +{ + assert(i->current.group->prev != NULL); + + // Go to the previous group first. + i->current.group = i->current.group->prev; + i->current.record = i->current.group->last; + + // Then update the offsets. + i->current.stream_offset -= i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset -= i->current.group + ->uncompressed_sums[i->current.group->last]; + + return; +} + + +/// Go forward to the next group. +static void +next_group(lzma_index *i) +{ + assert(i->current.group->next != NULL); + + // Update the offsets first. + i->current.stream_offset += i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.group->last]; + + // Then go to the next group. + i->current.record = 0; + i->current.group = i->current.group->next; + + return; +} + + +/// Set *info from i->current. +static void +set_info(const lzma_index *i, lzma_index_record *info) +{ + info->total_size = i->current.group->total_sums[i->current.record]; + info->uncompressed_size = i->current.group->uncompressed_sums[ + i->current.record]; + + info->stream_offset = i->current.stream_offset; + info->uncompressed_offset = i->current.uncompressed_offset; + + // If it's not the first Record in this group, we need to do some + // adjustements. + if (i->current.record > 0) { + // _sums[] are cumulative, thus we need to substract the + // _previous _sums[] to get the sizes of this Record. + info->total_size -= i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_size -= i->current.group + ->uncompressed_sums[i->current.record - 1]; + + // i->current.{total,uncompressed}_offsets have the offset + // of the beginning of the group, thus we need to add the + // appropriate amount to get the offsetes of this Record. + info->stream_offset += i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.record - 1]; + } + + return; +} + + +extern LZMA_API lzma_bool +lzma_index_read(lzma_index *i, lzma_index_record *info) +{ + if (i->current.group == NULL) { + // We are at the beginning of the Record list. Set up + // i->current point at the first Record. Return if there + // are no Records. + if (init_current(i)) + return true; + } else do { + // Try to go the next Record. + if (i->current.record < i->current.group->last) + ++i->current.record; + else if (i->current.group->next == NULL) + return true; + else + next_group(i); + } while (i->current.group->paddings[i->current.record]); + + // We found a new Record. Set the information to *info. + set_info(i, info); + + return false; +} + + +extern LZMA_API void +lzma_index_rewind(lzma_index *i) +{ + i->current.group = NULL; + return; +} + + +extern LZMA_API lzma_bool +lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target) +{ + // Check if it is possible to fullfill the request. + if (target >= i->uncompressed_size) + return true; + + // Now we know that we will have an answer. Initialize the current + // read position if needed. + if (i->current.group == NULL && init_current(i)) + return true; + + // Locate the group where the wanted Block is. First search forward. + while (i->current.uncompressed_offset <= target) { + // If the first uncompressed byte of the next group is past + // the target offset, it has to be this or an earlier group. + if (i->current.uncompressed_offset + i->current.group + ->uncompressed_sums[i->current.group->last] + > target) + break; + + // Go forward to the next group. + next_group(i); + } + + // Then search backward. + while (i->current.uncompressed_offset > target) + previous_group(i); + + // Now the target Block is somewhere in i->current.group. Offsets + // in groups are relative to the beginning of the group, thus + // we must adjust the target before starting the search loop. + assert(target >= i->current.uncompressed_offset); + target -= i->current.uncompressed_offset; + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sums[] value is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks or + // padding, we don't want to return them. + size_t left = 0; + size_t right = i->current.group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (i->current.group->uncompressed_sums[pos] <= target) + left = pos + 1; + else + right = pos; + } + + i->current.record = left; + +#ifndef NDEBUG + // The found Record must not be padding or have zero uncompressed size. + assert(!i->current.group->paddings[i->current.record]); + + if (i->current.record == 0) + assert(i->current.group->uncompressed_sums[0] > 0); + else + assert(i->current.group->uncompressed_sums[i->current.record] + - i->current.group->uncompressed_sums[ + i->current.record - 1] > 0); +#endif + + set_info(i, info); + + return false; +} + + +extern LZMA_API lzma_ret +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator, lzma_vli padding) +{ + if (dest == NULL || src == NULL || dest == src + || padding > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Check that the combined size of the Indexes stays within limits. + { + const lzma_vli dest_size = lzma_index_file_size(dest); + const lzma_vli src_size = lzma_index_file_size(src); + if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN + || dest_size + src_size + padding + > LZMA_VLI_VALUE_UNKNOWN) + return LZMA_DATA_ERROR; + } + + // Add a padding Record to take into account the size of + // Index + Stream Footer + Stream Padding + Stream Header. + // + // NOTE: This cannot overflow, because Index Size is always + // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs + // (Index Size and padding) doesn't overflow. It may become + // an invalid VLI if padding is huge, but that is caught by + // index_append(). + padding += index_size(dest->count - dest->old.count, + dest->index_list_size + - dest->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; + + // Add the padding Record. + return_if_error(index_append( + dest, allocator, padding, 0, true)); + + // Avoid wasting lots of memory if src->head has only a few records + // that fit into dest->tail. That is, combine two groups if possible. + // + // NOTE: We know that dest->tail != NULL since we just appended + // a padding Record. But we don't know about src->head. + if (src->head != NULL && src->head->last + 1 + <= INDEX_GROUP_SIZE - dest->tail->last - 1) { + // Copy the first Record. + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[0]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[dest->tail->last] + + src->head->uncompressed_sums[0]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[0]; + + ++dest->tail->last; + + // Copy the rest. + for (size_t i = 1; i < src->head->last; ++i) { + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[i + 1] + - src->head->total_sums[i]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[ + dest->tail->last] + + src->head->uncompressed_sums[i + 1] + - src->head->uncompressed_sums[i]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[i + 1]; + + ++dest->tail->last; } - ++*count; - i = i->next; + // Free the head group of *src. Don't bother updating prev + // pointers since those won't be used for anything before + // we deallocate the whole *src structure. + lzma_index_group *tmp = src->head; + src->head = src->head->next; + lzma_free(tmp, allocator); + } + + // If there are groups left in *src, join them as is. Note that if we + // are combining already combined Indexes, src->head can be non-NULL + // even if we just combined the old src->head to dest->tail. + if (src->head != NULL) { + src->head->prev = dest->tail; + dest->tail->next = src->head; + dest->tail = src->tail; } - // FIXME ? - if (*total_size == LZMA_VLI_VALUE_UNKNOWN - || *uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - return LZMA_HEADER_ERROR; + // Update information about earlier Indexes. Only the last Index + // from *src won't be counted in dest->old. The last Index is left + // open and can be even appended with lzma_index_append(). + dest->old.count = dest->count + src->old.count; + dest->old.index_list_size + = dest->index_list_size + src->old.index_list_size; + + // Update overall information. + dest->total_size += src->total_size; + dest->uncompressed_size += src->uncompressed_size; + dest->count += src->count; + dest->index_list_size += src->index_list_size; + dest->padding_size += src->padding_size; + + // *src has nothing left but the base structure. + lzma_free(src, allocator); return LZMA_OK; } +extern LZMA_API lzma_index * +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator); + if (dest == NULL) + return NULL; + + // Copy the base structure except the pointers. + *dest = *src; + dest->head = NULL; + dest->tail = NULL; + dest->current.group = NULL; + + // Copy the Records. + const lzma_index_group *src_group = src->head; + while (src_group != NULL) { + // Allocate a new group. + lzma_index_group *dest_group = lzma_alloc( + sizeof(lzma_index_group), allocator); + if (dest_group == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + // Set the pointers. + dest_group->prev = dest->tail; + dest_group->next = NULL; + + if (dest->head == NULL) + dest->head = dest_group; + else + dest->tail->next = dest_group; + + dest->tail = dest_group; + + dest_group->last = src_group->last; + + // Copy the arrays so that we don't read uninitialized memory. + const size_t count = src_group->last + 1; + memcpy(dest_group->total_sums, src_group->total_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->uncompressed_sums, + src_group->uncompressed_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->paddings, src_group->paddings, + sizeof(bool) * count); + + // Copy also the read position. + if (src_group == src->current.group) + dest->current.group = dest->tail; + + src_group = src_group->next; + } + + return dest; +} + extern LZMA_API lzma_bool -lzma_index_is_equal(const lzma_index *a, const lzma_index *b) +lzma_index_equal(const lzma_index *a, const lzma_index *b) { - while (a != NULL && b != NULL) { - if (a->total_size != b->total_size || a->uncompressed_size - != b->uncompressed_size) + // No point to compare more if the pointers are the same. + if (a == b) + return true; + + // Compare the basic properties. + if (a->total_size != b->total_size + || a->uncompressed_size != b->uncompressed_size + || a->index_list_size != b->index_list_size + || a->count != b->count) + return false; + + // Compare the Records. + const lzma_index_group *ag = a->head; + const lzma_index_group *bg = b->head; + while (ag != NULL && bg != NULL) { + const size_t count = ag->last + 1; + if (ag->last != bg->last + || memcmp(ag->total_sums, + bg->total_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->uncompressed_sums, + bg->uncompressed_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->paddings, bg->paddings, + sizeof(bool) * count) != 0) return false; - a = a->next; - b = b->next; + ag = ag->next; + bg = bg->next; } - return a == b; + return ag == NULL && bg == NULL; } diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h new file mode 100644 index 00000000..303ad43a --- /dev/null +++ b/src/liblzma/common/index.h @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Maximum encoded value of Total Size. +#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_VALUE_MAX / 4 - 1) + +/// Convert the real Total Size value to a value that is stored to the Index. +#define total_size_encode(size) ((size) / 4 - 1) + +/// Convert the encoded Total Size value from Index to the real Total Size. +#define total_size_decode(size) (((size) + 1) * 4) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + // Round up to a mulitiple of four. + return (index_size_unpadded(count, index_list_size) + 3) + & ~LZMA_VLI_C(3); +} + + +static inline lzma_vli +index_stream_size( + lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + total_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c new file mode 100644 index 00000000..1635948c --- /dev/null +++ b/src/liblzma/common/index_decoder.c @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Target Index + lzma_index *index; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Total Size field + lzma_vli total_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_TOTAL + ? &coder->total_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_TOTAL) { + // Validate that encoded Total Size isn't too big. + if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + // Convert the encoded Total Size to the real + // Total Size. + coder->total_size = total_size_decode( + coder->total_size); + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->total_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + // We always allocate a new lzma_index. + *i = lzma_index_init(NULL, allocator); + if (*i == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = *i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + lzma_next_coder_init(index_decoder_init, next, allocator, i); +} +*/ + + +extern LZMA_API lzma_ret +lzma_index_decoder(lzma_stream *strm, lzma_index **i) +{ + lzma_next_strm_init(strm, index_decoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c new file mode 100644 index 00000000..5a7d8c8c --- /dev/null +++ b/src/liblzma/common/index_encoder.c @@ -0,0 +1,222 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index given to us to encode. Note that we modify it in sense that + /// we read it, and read position is tracked in lzma_index structure. + lzma_index *index; + + /// The current Index Record being encoded + lzma_index_record record; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action lzma_attribute((unused))) +{ + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = 0x00; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli index_count = lzma_index_count(coder->index); + ret = lzma_vli_encode(index_count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_read(coder->index, &coder->record)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + // Total Size must be a multiple of four. + if (coder->record.total_size & 3) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_TOTAL; + + // Fall through + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_TOTAL + ? total_size_encode(coder->record.total_size) + : coder->record.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + return ret; +} + + +static void +index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + lzma_index_rewind(i); + + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + lzma_next_coder_init(index_encoder_init, next, allocator, i); +} + + +extern LZMA_API lzma_ret +lzma_index_encoder(lzma_stream *strm, lzma_index *i) +{ + lzma_next_strm_init(strm, index_encoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder_multi.h b/src/liblzma/common/index_encoder.h index e0ff02f3..0087c284 100644 --- a/src/liblzma/common/stream_encoder_multi.h +++ b/src/liblzma/common/index_encoder.h @@ -1,9 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file stream_encoder_multi.h -/// \brief Encodes Multi-Block .lzma files +/// \file index_encoder.h +/// \brief Encodes the Index field // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,10 +17,14 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_STREAM_ENCODER_MULTI_H -#define LZMA_STREAM_ENCODER_MULTI_H +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_index *i); -extern lzma_ret lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); #endif diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c new file mode 100644 index 00000000..35dea41f --- /dev/null +++ b/src/liblzma/common/index_hash.c @@ -0,0 +1,340 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Total Size fields + lzma_vli total_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Total Sizes and Uncompressed Sizes. + lzma_check check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Total Size currently being read from an Index Record. + lzma_vli total_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API lzma_index_hash * +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.total_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.total_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->total_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API void +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API lzma_vli +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + info->total_size += total_size; + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { total_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK || total_size == 0 || + total_size > LZMA_VLI_VALUE_MAX || (total_size & 3) + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + total_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.total_size > LZMA_VLI_VALUE_MAX + || index_hash->blocks.uncompressed_size + > LZMA_VLI_VALUE_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.total_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_TOTAL + ? &index_hash->total_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_TOTAL) { + if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + index_hash->total_size = total_size_decode( + index_hash->total_size); + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->total_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.total_size + < index_hash->records.total_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.total_size + != index_hash->records.total_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer, + index_hash->records.check.buffer, + lzma_check_sizes[LZMA_CHECK_BEST]) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} diff --git a/src/liblzma/common/info.c b/src/liblzma/common/info.c deleted file mode 100644 index ab7fc999..00000000 --- a/src/liblzma/common/info.c +++ /dev/null @@ -1,814 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file info.c -/// \brief Collects and verifies integrity of Stream size information -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -struct lzma_info_s { - struct { - /// Known Size of Header Metadata Block; here's some - /// special things: - /// - LZMA_VLI_VALUE_UNKNOWN indicates that we don't know - /// if Header Metadata Block is present. - /// - 0 indicates that Header Metadata Block is not present. - lzma_vli header_metadata_size; - - /// Known Total Size of the Data Blocks in the Stream - lzma_vli total_size; - - /// Known Uncompressed Size of the Data Blocks in the Stream - lzma_vli uncompressed_size; - - /// Known Size of Footer Metadata Block - lzma_vli footer_metadata_size; - } known; - - struct { - /// Sum of Total Size fields stored to the Index so far - lzma_vli total_size; - - /// Sum of Uncompressed Size fields stored to the Index so far - lzma_vli uncompressed_size; - - /// First Index Record in the list, or NULL if Index is empty. - lzma_index *head; - - /// Number of Index Records - size_t record_count; - - /// Number of Index Records - size_t incomplete_count; - - /// True when we know that no more Records will get added - /// to the Index. - bool is_final; - } index; - - /// Start offset of the Stream. This is needed to calculate - /// lzma_info_iter.stream_offset. - lzma_vli stream_start_offset; - - /// True if Index is present in Header Metadata Block - bool has_index_in_header_metadata; -}; - - -////////////////////// -// Create/Reset/End // -////////////////////// - -static void -index_init(lzma_info *info) -{ - info->index.total_size = 0; - info->index.uncompressed_size = 0; - info->index.head = NULL; - info->index.record_count = 0; - info->index.incomplete_count = 0; - info->index.is_final = false; - return; -} - - -static void -info_init(lzma_info *info) -{ - info->known.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.total_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->stream_start_offset = 0; - info->has_index_in_header_metadata = false; - - index_init(info); - - return; -} - - -extern LZMA_API lzma_info * -lzma_info_init(lzma_info *info, lzma_allocator *allocator) -{ - if (info == NULL) - info = lzma_alloc(sizeof(lzma_info), allocator); - else - lzma_index_free(info->index.head, allocator); - - if (info != NULL) - info_init(info); - - return info; -} - - -extern LZMA_API void -lzma_info_free(lzma_info *info, lzma_allocator *allocator) -{ - lzma_index_free(info->index.head, allocator); - lzma_free(info, allocator); - return; -} - - -///////// -// Set // -///////// - -static lzma_ret -set_size(lzma_vli new_size, lzma_vli *known_size, lzma_vli index_size, - bool forbid_zero) -{ - assert(new_size <= LZMA_VLI_VALUE_MAX); - - lzma_ret ret = LZMA_OK; - - if (forbid_zero && new_size == 0) - ret = LZMA_PROG_ERROR; - else if (index_size > new_size) - ret = LZMA_DATA_ERROR; - else if (*known_size == LZMA_VLI_VALUE_UNKNOWN) - *known_size = new_size; - else if (*known_size != new_size) - ret = LZMA_DATA_ERROR; - - return ret; -} - - -extern LZMA_API lzma_ret -lzma_info_size_set(lzma_info *info, lzma_info_size type, lzma_vli size) -{ - if (size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - switch (type) { - case LZMA_INFO_STREAM_START: - info->stream_start_offset = size; - return LZMA_OK; - - case LZMA_INFO_HEADER_METADATA: - return set_size(size, &info->known.header_metadata_size, - 0, false); - - case LZMA_INFO_TOTAL: - return set_size(size, &info->known.total_size, - info->index.total_size, true); - - case LZMA_INFO_UNCOMPRESSED: - return set_size(size, &info->known.uncompressed_size, - info->index.uncompressed_size, false); - - case LZMA_INFO_FOOTER_METADATA: - return set_size(size, &info->known.footer_metadata_size, - 0, true); - } - - return LZMA_PROG_ERROR; -} - - -extern LZMA_API lzma_ret -lzma_info_index_set(lzma_info *info, lzma_allocator *allocator, - lzma_index *i_new, lzma_bool eat_index) -{ - if (i_new == NULL) - return LZMA_PROG_ERROR; - - lzma_index *i_old = info->index.head; - - if (i_old != NULL) { - while (true) { - // If the new Index has fewer Records than the old one, - // the new Index cannot be valid. - if (i_new == NULL) - return LZMA_DATA_ERROR; - - // The new Index must be complete i.e. no unknown - // values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_PROG_ERROR; - } - - // Compare the values from the new Index with the old - // Index. The old Index may be incomplete; in that - // case we - // - use the value from the new Index as is; - // - update the appropriate info->index.foo_size; and - // - decrease the count of incomplete Index Records. - bool was_incomplete = false; - - if (i_old->total_size == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->total_size = i_new->total_size; - - if (lzma_vli_add(info->index.total_size, - i_new->total_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->total_size != i_new->total_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (i_old->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->uncompressed_size - = i_new->uncompressed_size; - - if (lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->uncompressed_size - != i_new->uncompressed_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (was_incomplete) { - assert(!info->index.is_final); - assert(info->index.incomplete_count > 0); - --info->index.incomplete_count; - } - - // Get rid of *i_new. It's now identical with *i_old. - lzma_index *tmp = i_new->next; - if (eat_index) - lzma_free(i_new, allocator); - - i_new = tmp; - - // We want to leave i_old pointing to the last - // Index Record in the old Index. This way we can - // concatenate the possible new Records from i_new. - if (i_old->next == NULL) - break; - - i_old = i_old->next; - } - } - - assert(info->index.incomplete_count == 0); - - // If Index was already known to be final, i_new must be NULL now. - // The new Index cannot contain more Records that we already have. - if (info->index.is_final) { - assert(info->index.head != NULL); - - if (i_new != NULL) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - return LZMA_OK; - } - - // The rest of the new Index is merged to the old Index. Keep the - // current i_new pointer in available. We need it when merging the - // new Index with the old one, and if an error occurs so we can - // get rid of the broken part of the new Index. - lzma_index *i_start = i_new; - while (i_new != NULL) { - // The new Index must be complete i.e. no unknown values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - // Update info->index.foo_sizes. - if (lzma_vli_add(info->index.total_size, i_new->total_size) - || lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - ++info->index.record_count; - i_new = i_new->next; - } - - // All the Records in the new Index are good, and info->index.foo_sizes - // were successfully updated. - if (lzma_info_index_finish(info) != LZMA_OK) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_DATA_ERROR; - } - - // The Index is ready to be merged. If we aren't supposed to eat - // the Index, make a copy of it first. - if (!eat_index && i_start != NULL) { - i_start = lzma_index_dup(i_start, allocator); - if (i_start == NULL) - return LZMA_MEM_ERROR; - } - - // Concatenate the new Index with the old one. Note that it is - // possible that we don't have any old Index. - if (info->index.head == NULL) - info->index.head = i_start; - else - i_old->next = i_start; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_metadata_set(lzma_info *info, lzma_allocator *allocator, - lzma_metadata *metadata, lzma_bool is_header_metadata, - lzma_bool eat_index) -{ - // Validate *metadata. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || !lzma_vli_is_valid(metadata->uncompressed_size)) { - if (eat_index) { - lzma_index_free(metadata->index, allocator); - metadata->index = NULL; - } - - return LZMA_PROG_ERROR; - } - - // Index - if (metadata->index != NULL) { - if (is_header_metadata) - info->has_index_in_header_metadata = true; - - const lzma_ret ret = lzma_info_index_set( - info, allocator, metadata->index, eat_index); - - if (eat_index) - metadata->index = NULL; - - if (ret != LZMA_OK) - return ret; - - } else if (!is_header_metadata - && (metadata->total_size == LZMA_VLI_VALUE_UNKNOWN - || !info->has_index_in_header_metadata)) { - // Either Total Size or Index must be present in Footer - // Metadata Block. If Index is not present, it must have - // already been in the Header Metadata Block. Since we - // got here, these conditions weren't met. - return LZMA_DATA_ERROR; - } - - // Size of Header Metadata - if (!is_header_metadata) - return_if_error(lzma_info_size_set( - info, LZMA_INFO_HEADER_METADATA, - metadata->header_metadata_size)); - - // Total Size - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_TOTAL, metadata->total_size)); - - // Uncompressed Size - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_UNCOMPRESSED, - metadata->uncompressed_size)); - - return LZMA_OK; -} - - -///////// -// Get // -///////// - -extern LZMA_API lzma_vli -lzma_info_size_get(const lzma_info *info, lzma_info_size type) -{ - switch (type) { - case LZMA_INFO_STREAM_START: - return info->stream_start_offset; - - case LZMA_INFO_HEADER_METADATA: - return info->known.header_metadata_size; - - case LZMA_INFO_TOTAL: - return info->known.total_size; - - case LZMA_INFO_UNCOMPRESSED: - return info->known.uncompressed_size; - - case LZMA_INFO_FOOTER_METADATA: - return info->known.footer_metadata_size; - } - - return LZMA_VLI_VALUE_UNKNOWN; -} - - -extern LZMA_API lzma_index * -lzma_info_index_get(lzma_info *info, lzma_bool detach) -{ - lzma_index *i = info->index.head; - - if (detach) - index_init(info); - - return i; -} - - -extern LZMA_API size_t -lzma_info_index_count_get(const lzma_info *info) -{ - return info->index.record_count; -} - - -///////////////// -// Incremental // -///////////////// - -enum { - ITER_INFO, - ITER_INDEX, - ITER_RESERVED_1, - ITER_RESERVED_2, -}; - - -#define iter_info ((lzma_info *)(iter->internal[ITER_INFO])) - -#define iter_index ((lzma_index *)(iter->internal[ITER_INDEX])) - - -extern LZMA_API void -lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter) -{ - *iter = (lzma_info_iter){ - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .stream_offset = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_offset = LZMA_VLI_VALUE_UNKNOWN, - .internal = { info, NULL, NULL, NULL }, - }; - - return; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_next(lzma_info_iter *iter, lzma_allocator *allocator) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL) { - // The first call after lzma_info_iter_begin(). - if (iter_info->known.header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - - iter->uncompressed_offset = 0; - - if (iter_info->index.head != NULL) { - // The first Index Record has already been allocated. - iter->internal[ITER_INDEX] = iter_info->index.head; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } else { - // Update iter->*_offsets. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->stream_offset, - iter_index->total_size)) - return LZMA_DATA_ERROR; - } - - if (iter->uncompressed_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->uncompressed_offset - = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->uncompressed_offset, - iter_index->uncompressed_size)) - return LZMA_DATA_ERROR; - } - - if (iter_index->next != NULL) { - // The next Record has already been allocated. - iter->internal[ITER_INDEX] = iter_index->next; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } - - // Don't add new Records to a final Index. - if (iter_info->index.is_final) - return LZMA_DATA_ERROR; - - // Allocate and initialize a new Index Record. - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = LZMA_VLI_VALUE_UNKNOWN; - i->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - i->next = NULL; - - iter->total_size = LZMA_VLI_VALUE_UNKNOWN; - iter->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // Decide where to put the new Index Record. - if (iter_info->index.head == NULL) - iter_info->index.head = i; - - if (iter_index != NULL) - iter_index->next = i; - - iter->internal[ITER_INDEX] = i; - - ++iter_info->index.record_count; - ++iter_info->index.incomplete_count; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_set(lzma_info_iter *iter, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL || !lzma_vli_is_valid(total_size) - || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - const bool was_incomplete = iter_index->total_size - == LZMA_VLI_VALUE_UNKNOWN - || iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - - if (total_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->total_size = total_size; - - if (lzma_vli_add(iter_info->index.total_size, - total_size) - || iter_info->index.total_size - > iter_info->known.total_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->total_size != total_size) { - return LZMA_DATA_ERROR; - } - } - - if (uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->uncompressed_size = uncompressed_size; - - if (lzma_vli_add(iter_info->index.uncompressed_size, - uncompressed_size) - || iter_info->index.uncompressed_size - > iter_info->known.uncompressed_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->uncompressed_size - != uncompressed_size) { - return LZMA_DATA_ERROR; - } - } - - // Check if the new information we got managed to finish this - // Index Record. If so, update the count of incomplete Index Records. - if (was_incomplete && iter_index->total_size - != LZMA_VLI_VALUE_UNKNOWN - && iter_index->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - assert(iter_info->index.incomplete_count > 0); - --iter_info->index.incomplete_count; - } - - // Make sure that the known sizes are now available in *iter. - iter->total_size = iter_index->total_size; - iter->uncompressed_size = iter_index->uncompressed_size; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_index_finish(lzma_info *info) -{ - if (info->index.record_count == 0 || info->index.incomplete_count > 0 - || lzma_info_size_set(info, LZMA_INFO_TOTAL, - info->index.total_size) - || lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, - info->index.uncompressed_size)) - return LZMA_DATA_ERROR; - - info->index.is_final = true; - - return LZMA_OK; -} - - -////////////// -// Locating // -////////////// - -extern LZMA_API lzma_vli -lzma_info_metadata_locate(const lzma_info *info, lzma_bool is_header_metadata) -{ - bool error = false; - lzma_vli size = 0; - - if (info->known.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN) { - // We don't know if Header Metadata Block is present, thus - // we cannot locate it either. - // - // Well, you could say that just assume that it is present. - // I'm not sure if this is useful. But it can be useful to - // be able to use this function and get LZMA_VLI_VALUE_UNKNOWN - // to detect that Header Metadata Block wasn't present. - error = true; - } else if (is_header_metadata) { - error = lzma_vli_sum(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE); - } else if (!info->index.is_final) { - // Since we don't know if we have all the Index Records yet, - // we cannot know where the Footer Metadata Block is. - error = true; - } else { - error = lzma_vli_sum4(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - info->known.header_metadata_size, - info->known.total_size); - } - - return error ? LZMA_VLI_VALUE_UNKNOWN : size; -} - - -extern LZMA_API uint32_t -lzma_info_metadata_alignment_get( - const lzma_info *info, lzma_bool is_header_metadata) -{ - uint32_t alignment; - - if (is_header_metadata) { - alignment = info->stream_start_offset - + LZMA_STREAM_HEADER_SIZE; - } else { - alignment = info->stream_start_offset + LZMA_STREAM_HEADER_SIZE - + info->known.header_metadata_size - + info->known.total_size; - } - - return alignment; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_locate(lzma_info_iter *iter, lzma_allocator *allocator, - lzma_vli uncompressed_offset, lzma_bool allow_alloc) -{ - if (iter == NULL || uncompressed_offset > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - // Quick check in case Index is final. - if (iter_info->index.is_final) { - assert(iter_info->known.uncompressed_size - == iter_info->index.uncompressed_size); - if (uncompressed_offset >= iter_info->index.uncompressed_size) - return LZMA_DATA_ERROR; - } - - // TODO: Optimize so that it uses existing info from *iter when - // seeking forward. - - // Initialize *iter - if (iter_info->known.header_metadata_size != LZMA_VLI_VALUE_UNKNOWN) { - if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - } else { - // We don't know the Size of Header Metadata Block, thus - // we cannot calculate the Stream offset either. - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - } - - iter->uncompressed_offset = 0; - - // If we have no Index Records, it's obvious that we need to - // add a new one. - if (iter_info->index.head == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - return lzma_info_iter_next(iter, allocator); - } - - // Locate an appropriate Index Record. - lzma_index *i = iter_info->index.head; - while (true) { - // - If Uncompressed Size in the Record is unknown, - // we have no chance to search further. - // - If the next Record would go past the requested offset, - // we have found our target Data Block. - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - || iter->uncompressed_offset - + i->uncompressed_size > uncompressed_offset) { - iter->total_size = i->total_size; - iter->uncompressed_size = i->uncompressed_size; - iter->internal[ITER_INDEX] = i; - return LZMA_OK; - } - - // Update the stream offset. It may be unknown if we didn't - // know the size of Header Metadata Block. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) - if (lzma_vli_add(iter->stream_offset, i->total_size)) - return LZMA_PROG_ERROR; - - // Update the uncompressed offset. This cannot overflow since - // the Index is known to be valid. - iter->uncompressed_offset += i->uncompressed_size; - - // Move to the next Block. - if (i->next == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - iter->internal[ITER_INDEX] = i; - return lzma_info_iter_next(iter, allocator); - } - - i = i->next; - } -} diff --git a/src/liblzma/common/memory_usage.c b/src/liblzma/common/memory_usage.c index b6f27957..8244c404 100644 --- a/src/liblzma/common/memory_usage.c +++ b/src/liblzma/common/memory_usage.c @@ -28,7 +28,6 @@ get_usage(const lzma_options_filter *filter, bool is_encoder) uint64_t ret; switch (filter->id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_X86: case LZMA_FILTER_POWERPC: case LZMA_FILTER_IA64: diff --git a/src/liblzma/common/metadata_decoder.c b/src/liblzma/common/metadata_decoder.c deleted file mode 100644 index 579b0a51..00000000 --- a/src/liblzma/common/metadata_decoder.c +++ /dev/null @@ -1,578 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.c -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_decoder.h" -#include "block_decoder.h" - - -/// Maximum size of a single Extra Record. Again, this is mostly to make -/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should -/// be smaller. -#define EXTRA_SIZE_MAX (SIZE_MAX / 4) - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_ALLOC, - SEQ_INDEX_TOTAL_SIZE, - SEQ_INDEX_UNCOMPRESSED_SIZE, - SEQ_EXTRA_PREPARE, - SEQ_EXTRA_ALLOC, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA_ALLOC, - SEQ_EXTRA_DATA_COPY, - SEQ_EXTRA_DUMMY_ALLOC, - SEQ_EXTRA_DUMMY_ID, - SEQ_EXTRA_DUMMY_SIZE, - SEQ_EXTRA_DUMMY_COPY, - } sequence; - - /// Number of "things" left to be parsed. If we hit end of input - /// when this isn't zero, we have corrupt Metadata Block. - size_t todo_count; - - /// Position in variable-length integers - size_t pos; - - /// Temporary variable needed to decode variables whose type - /// is size_t instead of lzma_vli. - lzma_vli tmp; - - /// Pointer to target structure to hold the parsed results. - lzma_metadata *metadata; - - /// The Index Record we currently are parsing - lzma_index *index_current; - - /// Number of Records in Index - size_t index_count; - - /// Sum of Total Size fields in the Index - lzma_vli index_total_size; - - /// Sum of Uncompressed Size fields in the Index - lzma_vli index_uncompressed_size; - - /// True if Extra is present. - bool has_extra; - - /// True if we have been requested to store the Extra to *metadata. - bool want_extra; - - /// Pointer to the end of the Extra Record list. - lzma_extra *extra_tail; - - /// Dummy Extra Record used when only verifying integrity of Extra - /// (not storing it to RAM). - lzma_extra extra_dummy; - - /// Block decoder - lzma_next_coder block_decoder; - - /// buffer[buffer_pos] is the next byte to process. - size_t buffer_pos; - - /// buffer[buffer_size] is the first byte to not process. - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is read before - /// it is parsed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -/// Reads a variable-length integer to coder->num. -#define read_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_decode( \ - &num, &coder->pos, \ - coder->buffer, &coder->buffer_pos, \ - coder->buffer_size); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder, lzma_allocator *allocator) -{ - while (coder->buffer_pos < coder->buffer_size) - switch (coder->sequence) { - case SEQ_FLAGS: - // Reserved bits must be unset. - if (coder->buffer[coder->buffer_pos] & 0x70) - return LZMA_HEADER_ERROR; - - coder->todo_count = 0; - - // If Size of Header Metadata is present, prepare the - // variable for variable-length integer decoding. Otherwise - // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the - // field isn't present. - if (coder->buffer[coder->buffer_pos] & 0x01) { - coder->metadata->header_metadata_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x02) { - coder->metadata->total_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x04) { - coder->metadata->uncompressed_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x08) { - // Setting index_count to 1 is just to indicate that - // Index is present. The real size is parsed later. - coder->index_count = 1; - ++coder->todo_count; - } - - coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80) - != 0; - - ++coder->buffer_pos; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata->header_metadata_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->header_metadata_size); - - if (coder->metadata->header_metadata_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->total_size); - - if (coder->metadata->total_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->uncompressed_size); - --coder->todo_count; - } - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - coder->sequence = SEQ_EXTRA_PREPARE; - break; - } - - read_vli(coder->tmp); - - // Index must not be empty nor far too big (wouldn't fit - // in RAM). - if (coder->tmp == 0 || coder->tmp - >= SIZE_MAX / sizeof(lzma_index)) - return LZMA_DATA_ERROR; - - coder->index_count = (size_t)(coder->tmp); - coder->tmp = 0; - - coder->sequence = SEQ_INDEX_ALLOC; - break; - - case SEQ_INDEX_ALLOC: { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = 0; - i->uncompressed_size = 0; - i->next = NULL; - - if (coder->metadata->index == NULL) - coder->metadata->index = i; - else - coder->index_current->next = i; - - coder->index_current = i; - - coder->sequence = SEQ_INDEX_TOTAL_SIZE; - } - - // Fall through - - case SEQ_INDEX_TOTAL_SIZE: { - read_vli(coder->index_current->total_size); - - coder->index_total_size += coder->index_current->total_size; - if (coder->index_total_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - // No Block can have Total Size of zero bytes. - if (coder->index_current->total_size == 0) - return LZMA_DATA_ERROR; - - if (--coder->index_count == 0) { - // If Total Size is present, it must match the sum - // of Total Sizes in Index. - if (coder->metadata->total_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->total_size - != coder->index_total_size) - return LZMA_DATA_ERROR; - - coder->index_current = coder->metadata->index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE; - } else { - coder->sequence = SEQ_INDEX_ALLOC; - } - - break; - } - - case SEQ_INDEX_UNCOMPRESSED_SIZE: { - read_vli(coder->index_current->uncompressed_size); - - coder->index_uncompressed_size - += coder->index_current->uncompressed_size; - if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->uncompressed_size - != coder->index_uncompressed_size) - return LZMA_DATA_ERROR; - - --coder->todo_count; - coder->sequence = SEQ_EXTRA_PREPARE; - } - - break; - } - - case SEQ_EXTRA_PREPARE: - assert(coder->todo_count == 0); - - // If we get here, we have at least one byte of input left. - // If "Extra is present" flag is unset in Metadata Flags, - // it means that there is some garbage and we return an error. - if (!coder->has_extra) - return LZMA_DATA_ERROR; - - if (!coder->want_extra) { - coder->extra_tail = &coder->extra_dummy; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - break; - } - - coder->sequence = SEQ_EXTRA_ALLOC; - - // Fall through - - case SEQ_EXTRA_ALLOC: { - lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator); - if (e == NULL) - return LZMA_MEM_ERROR; - - e->next = NULL; - e->id = 0; - e->size = 0; - e->data = NULL; - - if (coder->metadata->extra == NULL) - coder->metadata->extra = e; - else - coder->extra_tail->next = e; - - coder->extra_tail = e; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_ID; - } - - // Fall through - - case SEQ_EXTRA_ID: - case SEQ_EXTRA_DUMMY_ID: - read_vli(coder->extra_tail->id); - - if (coder->extra_tail->id == 0) { - coder->extra_tail->size = 0; - coder->extra_tail->data = NULL; - coder->todo_count = 0; - --coder->sequence; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_SIZE: - case SEQ_EXTRA_DUMMY_SIZE: - read_vli(coder->tmp); - - if (coder->tmp == 0) { - // We have no Data in the Extra Record. Don't - // allocate any memory for it. Go back to - // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC. - coder->tmp = 0; - coder->sequence -= 2; - coder->todo_count = 0; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_DATA_ALLOC: { - if (coder->tmp > EXTRA_SIZE_MAX) - return LZMA_DATA_ERROR; - - coder->extra_tail->size = (size_t)(coder->tmp); - coder->tmp = 0; - - // We reserve space for the trailing '\0' too. - uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1, - allocator); - if (d == NULL) - return LZMA_MEM_ERROR; - - coder->extra_tail->data = d; - coder->sequence = SEQ_EXTRA_DATA_COPY; - } - - // Fall through - - case SEQ_EXTRA_DATA_COPY: - bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, - coder->extra_tail->data, &coder->pos, - (size_t)(coder->extra_tail->size)); - - if ((size_t)(coder->extra_tail->size) == coder->pos) { - coder->extra_tail->data[coder->pos] = '\0'; - coder->pos = 0; - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_ALLOC; - } - - break; - - case SEQ_EXTRA_DUMMY_ALLOC: - // Not really alloc, just initialize the dummy entry. - coder->extra_dummy = (lzma_extra){ - .next = NULL, - .id = 0, - .size = 0, - .data = NULL, - }; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_DUMMY_ID; - break; - - case SEQ_EXTRA_DUMMY_COPY: { - // Simply skip as many bytes as indicated by Extra Record Size. - // We don't check lzma_extra_size_max because we don't - // allocate any memory to hold the data. - const size_t in_avail = coder->buffer_size - coder->buffer_pos; - const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp); - coder->buffer_pos += skip; - coder->tmp -= skip; - - if (coder->tmp == 0) { - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - } - - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - bool end_was_reached = false; - - while (true) { - // Fill the buffer if it is empty. - if (coder->buffer_pos == coder->buffer_size) { - coder->buffer_pos = 0; - coder->buffer_size = 0; - - const lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, coder->buffer, - &coder->buffer_size, LZMA_BUFFER_SIZE, - LZMA_RUN); - - switch (ret) { - case LZMA_OK: - // Return immediatelly if we got no new data. - if (coder->buffer_size == 0) - return LZMA_OK; - - break; - - case LZMA_STREAM_END: - end_was_reached = true; - break; - - default: - return ret; - } - } - - // Process coder->buffer. - const lzma_ret ret = process(coder, allocator); - if (ret != LZMA_OK) - return ret; - - // On success, process() eats all the input. - assert(coder->buffer_pos == coder->buffer_size); - - if (end_was_reached) { - // Check that the sequence is not in the - // middle of anything. - if (coder->todo_count != 0) - return LZMA_DATA_ERROR; - - // If Size of Header Metadata Block was not - // present, we use zero as its size instead - // of LZMA_VLI_VALUE_UNKNOWN. - if (coder->metadata->header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - coder->metadata->header_metadata_size = 0; - - return LZMA_STREAM_END; - } - } -} - - -static void -metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_decode; - next->end = &metadata_decoder_end; - next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - } - - metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->total_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->index = NULL; - metadata->extra = NULL; - - next->coder->sequence = SEQ_FLAGS; - next->coder->todo_count = 1; - next->coder->pos = 0; - next->coder->tmp = 0; - next->coder->metadata = metadata; - next->coder->index_current = NULL; - next->coder->index_count = 0; - next->coder->index_total_size = 0; - next->coder->index_uncompressed_size = 0; - next->coder->want_extra = want_extra; - next->coder->extra_tail = NULL; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - return lzma_block_decoder_init( - &next->coder->block_decoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - lzma_next_coder_init(metadata_decoder_init, next, allocator, - options, metadata, want_extra); -} - - -extern LZMA_API lzma_ret -lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options, - lzma_metadata *metadata, lzma_bool want_extra) -{ - lzma_next_strm_init(strm, lzma_metadata_decoder_init, - options, metadata, want_extra); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/metadata_decoder.h b/src/liblzma/common/metadata_decoder.h deleted file mode 100644 index 1fba2179..00000000 --- a/src/liblzma/common/metadata_decoder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.h -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_METADATA_DECODER_H -#define LZMA_METADATA_DECODER_H - -#include "common.h" - - -extern lzma_ret lzma_metadata_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra); - -#endif diff --git a/src/liblzma/common/metadata_encoder.c b/src/liblzma/common/metadata_encoder.c deleted file mode 100644 index 9f4a15b0..00000000 --- a/src/liblzma/common/metadata_encoder.c +++ /dev/null @@ -1,435 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_encoder.c -/// \brief Encodes metadata to be stored into Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_encoder.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_TOTAL, - SEQ_INDEX_UNCOMPRESSED, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA, - SEQ_END, - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// Local copy of the Metadata structure. Note that we keep - /// a copy only of the main structure, not Index or Extra Records. - lzma_metadata metadata; - - /// Number of Records in Index - size_t index_count; - - /// Index Record currently being processed - const lzma_index *index_current; - - /// Block encoder for the encoded Metadata - lzma_next_coder block_encoder; - - /// True once everything except compression has been done. - bool end_was_reached; - - /// buffer[buffer_pos] is the first byte that needs to be compressed. - size_t buffer_pos; - - /// buffer[buffer_size] is the next position where a byte will be - /// written by process(). - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is written before - /// it is compressed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -#define write_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_encode(num, &coder->pos, 1, \ - coder->buffer, &coder->buffer_size, \ - LZMA_BUFFER_SIZE); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder) -{ - while (coder->buffer_size < LZMA_BUFFER_SIZE) - switch (coder->sequence) { - case SEQ_FLAGS: - coder->buffer[coder->buffer_size] = 0; - - if (coder->metadata.header_metadata_size != 0) - coder->buffer[coder->buffer_size] |= 0x01; - - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x02; - - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x04; - - if (coder->index_count > 0) - coder->buffer[coder->buffer_size] |= 0x08; - - if (coder->metadata.extra != NULL) - coder->buffer[coder->buffer_size] |= 0x80; - - ++coder->buffer_size; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata.header_metadata_size != 0) - write_vli(coder->metadata.header_metadata_size); - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.total_size); - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.uncompressed_size); - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - break; - } - - write_vli(coder->index_count); - coder->sequence = SEQ_INDEX_TOTAL; - break; - - case SEQ_INDEX_TOTAL: - write_vli(coder->index_current->total_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - coder->index_current = coder->metadata.index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED; - } - - break; - - case SEQ_INDEX_UNCOMPRESSED: - write_vli(coder->index_current->uncompressed_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current != NULL) - break; - - if (coder->metadata.extra != NULL) { - coder->sequence = SEQ_EXTRA_ID; - break; - } - - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - - case SEQ_EXTRA_ID: { - const lzma_ret ret = lzma_vli_encode( - coder->metadata.extra->id, &coder->pos, 1, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->pos = 0; - - // Handle the special ID 0. - if (coder->metadata.extra->id == 0) { - coder->metadata.extra - = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - - } else { - coder->sequence = SEQ_EXTRA_SIZE; - } - - break; - - default: - return ret; - } - - break; - } - - case SEQ_EXTRA_SIZE: - if (coder->metadata.extra->size >= (lzma_vli)(SIZE_MAX)) - return LZMA_HEADER_ERROR; - - write_vli(coder->metadata.extra->size); - coder->sequence = SEQ_EXTRA_DATA; - break; - - case SEQ_EXTRA_DATA: - bufcpy(coder->metadata.extra->data, &coder->pos, - coder->metadata.extra->size, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - - if ((size_t)(coder->metadata.extra->size) == coder->pos) { - coder->metadata.extra = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->pos = 0; - coder->sequence = SEQ_EXTRA_ID; - } - - break; - - case SEQ_END: - // Everything is encoded. Let the compression code finish - // its work now. - return LZMA_STREAM_END; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in lzma_attribute((unused)), - size_t *restrict in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused)), uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action lzma_attribute((unused))) -{ - while (!coder->end_was_reached) { - // Flush coder->buffer if it isn't empty. - if (coder->buffer_size > 0) { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, - coder->buffer_size, - out, out_pos, out_size, LZMA_RUN); - if (coder->buffer_pos < coder->buffer_size - || ret != LZMA_OK) - return ret; - - coder->buffer_pos = 0; - coder->buffer_size = 0; - } - - const lzma_ret ret = process(coder); - - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->end_was_reached = true; - break; - - default: - return ret; - } - } - - // Finish - return coder->block_encoder.code(coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, coder->buffer_size, - out, out_pos, out_size, LZMA_FINISH); -} - - -static void -metadata_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_encode; - next->end = &metadata_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } - - next->coder->sequence = SEQ_FLAGS; - next->coder->pos = 0; - next->coder->metadata = *metadata; - next->coder->index_count = 0; - next->coder->index_current = metadata->index; - next->coder->end_was_reached = false; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - // Count and validate the Index Records. - { - const lzma_index *i = metadata->index; - while (i != NULL) { - if (i->total_size > LZMA_VLI_VALUE_MAX - || i->uncompressed_size - > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - ++next->coder->index_count; - i = i->next; - } - } - - // Initialize the Block encoder. - return lzma_block_encoder_init( - &next->coder->block_encoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - lzma_next_coder_init(metadata_encoder_init, next, allocator, - options, metadata); -} - - -extern LZMA_API lzma_ret -lzma_metadata_encoder(lzma_stream *strm, lzma_options_block *options, - const lzma_metadata *metadata) -{ - lzma_next_strm_init(strm, metadata_encoder_init, options, metadata); - - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} - - -extern LZMA_API lzma_vli -lzma_metadata_size(const lzma_metadata *metadata) -{ - lzma_vli size = 1; // Metadata Flags - - // Validate header_metadata_size, total_size, and uncompressed_size. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || metadata->total_size == 0 - || !lzma_vli_is_valid(metadata->uncompressed_size)) - return 0; - - // Add the sizes of these three fields. - if (metadata->header_metadata_size != 0) - size += lzma_vli_size(metadata->header_metadata_size); - - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->total_size); - - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->uncompressed_size); - - // Index - if (metadata->index != NULL) { - const lzma_index *i = metadata->index; - size_t count = 1; - - do { - const size_t x = lzma_vli_size(i->total_size); - const size_t y = lzma_vli_size(i->uncompressed_size); - if (x == 0 || y == 0) - return 0; - - size += x + y; - ++count; - i = i->next; - - } while (i != NULL); - - const size_t tmp = lzma_vli_size(count); - if (tmp == 0) - return 0; - - size += tmp; - } - - // Extra - { - const lzma_extra *e = metadata->extra; - while (e != NULL) { - // Validate the numbers. - if (e->id > LZMA_VLI_VALUE_MAX - || e->size >= (lzma_vli)(SIZE_MAX)) - return 0; - - // Add the sizes. - size += lzma_vli_size(e->id); - if (e->id != 0) { - size += lzma_vli_size(e->size); - size += e->size; - } - - e = e->next; - } - } - - return size; -} diff --git a/src/liblzma/common/raw_common.c b/src/liblzma/common/raw_common.c index d45bf4de..35252fc2 100644 --- a/src/liblzma/common/raw_common.c +++ b/src/liblzma/common/raw_common.c @@ -20,122 +20,81 @@ #include "raw_common.h" -/// \brief Prepares the filter chain -/// -/// Prepares the filter chain by setting uncompressed sizes for each filter, -/// and adding implicit Subblock filter when needed. -/// -/// \return true if error occurred, false on success. -/// -static bool -prepare(lzma_vli *id, lzma_vli *uncompressed_size, bool allow_implicit) +static lzma_ret +validate_options(const lzma_options_filter *options, size_t *count) { - bool needs_end_of_input = false; - - switch (id[0]) { - case LZMA_FILTER_COPY: - case LZMA_FILTER_X86: - case LZMA_FILTER_POWERPC: - case LZMA_FILTER_IA64: - case LZMA_FILTER_ARM: - case LZMA_FILTER_ARMTHUMB: - case LZMA_FILTER_SPARC: - case LZMA_FILTER_DELTA: - uncompressed_size[1] = uncompressed_size[0]; - needs_end_of_input = true; - break; - - case LZMA_FILTER_SUBBLOCK: - case LZMA_FILTER_LZMA: - // These change the size of the data unpredictably. - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - break; - - case LZMA_FILTER_SUBBLOCK_HELPER: - uncompressed_size[1] = uncompressed_size[0]; - break; - - default: - // Unknown filter. - return true; - } + if (options == NULL) + return LZMA_PROG_ERROR; - // Is this the last filter in the chain? - if (id[1] == LZMA_VLI_VALUE_UNKNOWN) { - if (needs_end_of_input && allow_implicit - && uncompressed_size[0] - == LZMA_VLI_VALUE_UNKNOWN) { - // Add implicit Subblock filter. - id[1] = LZMA_FILTER_SUBBLOCK; - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - id[2] = LZMA_VLI_VALUE_UNKNOWN; + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t change = 0; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i; + for (i = 0; options[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { + switch (options[i].id) { + // Not #ifdeffing these for simplicity. + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + case LZMA_FILTER_DELTA: + // These don't change the size of the data and cannot + // be used as the last filter in the chain. + last_ok = false; + break; + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + last_ok = true; + ++change; + break; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + last_ok = true; + break; +#endif + + default: + return LZMA_HEADER_ERROR; } - - return false; } - return prepare(id + 1, uncompressed_size + 1, allow_implicit); + // There must be 1-4 filters and the last filter must be usable as + // the last filter in the chain. + if (i == 0 || i > 4 || !last_ok) + return LZMA_HEADER_ERROR; + + // At maximum of two non-last filters are allowed to change the + // size of the data. + if (change > 2) + return LZMA_HEADER_ERROR; + + *count = i; + return LZMA_OK; } extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder) + bool is_encoder) { - if (options == NULL || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - // Count the number of filters in the chain. - size_t count = 0; - while (options[count].id != LZMA_VLI_VALUE_UNKNOWN) - ++count; - - // Allocate enough space from the stack for IDs and uncompressed - // sizes. We need two extra: possible implicit Subblock and end - // of array indicator. - lzma_vli ids[count + 2]; - lzma_vli uncompressed_sizes[count + 2]; - bool using_implicit = false; - - uncompressed_sizes[0] = uncompressed_size; - - if (count == 0) { - if (!allow_implicit) - return LZMA_PROG_ERROR; - - count = 1; - using_implicit = true; - - // Special case: no filters were specified, so an implicit - // Copy or Subblock filter is used. - if (uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - ids[0] = LZMA_FILTER_SUBBLOCK; - else - ids[0] = LZMA_FILTER_COPY; + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_options(options, &count)); - ids[1] = LZMA_VLI_VALUE_UNKNOWN; - - } else { - // Prepare the ids[] and uncompressed_sizes[]. - for (size_t i = 0; i < count; ++i) - ids[i] = options[i].id; - - ids[count] = LZMA_VLI_VALUE_UNKNOWN; - - if (prepare(ids, uncompressed_sizes, allow_implicit)) - return LZMA_HEADER_ERROR; - - // Check if implicit Subblock filter was added. - if (ids[count] != LZMA_VLI_VALUE_UNKNOWN) { - assert(ids[count] == LZMA_FILTER_SUBBLOCK); - ++count; - using_implicit = true; - } - } - - // Set the filter functions, and copy uncompressed sizes and options. + // Set the filter functions and copy the options pointer. lzma_filter_info filters[count + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { @@ -144,29 +103,20 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // of the uncompressed data. const size_t j = count - i - 1; - filters[j].init = get_function(ids[i]); + filters[j].init = get_function(options[i].id); if (filters[j].init == NULL) return LZMA_HEADER_ERROR; filters[j].options = options[i].options; - filters[j].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[0].options = NULL; - } else { for (size_t i = 0; i < count; ++i) { - filters[i].init = get_function(ids[i]); + filters[i].init = get_function(options[i].id); if (filters[i].init == NULL) return LZMA_HEADER_ERROR; filters[i].options = options[i].options; - filters[i].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[count - 1].options = NULL; } // Terminate the array. diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/common/raw_common.h index 172223cb..0a27f3dc 100644 --- a/src/liblzma/common/raw_common.h +++ b/src/liblzma/common/raw_common.h @@ -23,9 +23,8 @@ #include "common.h" extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, - lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + lzma_allocator *allocator, const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder); + bool is_encoder); #endif diff --git a/src/liblzma/common/raw_decoder.c b/src/liblzma/common/raw_decoder.c index 03f1d847..4fb7111c 100644 --- a/src/liblzma/common/raw_decoder.c +++ b/src/liblzma/common/raw_decoder.c @@ -18,24 +18,17 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_decoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_decoder.h" #include "subblock_decoder_helper.h" #include "delta_decoder.h" #include "lzma_decoder.h" -#include "metadata_decoder.h" static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_decoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_decoder_init; @@ -93,12 +86,10 @@ get_function(lzma_vli id) extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) + const lzma_options_filter *options) { const lzma_ret ret = lzma_raw_coder_init(next, allocator, - options, uncompressed_size, &get_function, - allow_implicit, false); + options, &get_function, false); if (ret != LZMA_OK) lzma_next_coder_end(next, allocator); @@ -108,8 +99,7 @@ lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options) { return_if_error(lzma_strm_init(strm)); @@ -117,8 +107,7 @@ lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, - strm->allocator, options, uncompressed_size, - &get_function, allow_implicit, false); + strm->allocator, options, &get_function, false); if (ret != LZMA_OK) lzma_end(strm); diff --git a/src/liblzma/common/raw_decoder.h b/src/liblzma/common/raw_decoder.h index 9d48074b..c0e626a8 100644 --- a/src/liblzma/common/raw_decoder.h +++ b/src/liblzma/common/raw_decoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/raw_encoder.c b/src/liblzma/common/raw_encoder.c index fb12862b..9b8cbfae 100644 --- a/src/liblzma/common/raw_encoder.c +++ b/src/liblzma/common/raw_encoder.c @@ -18,28 +18,16 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_encoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_encoder.h" #include "delta_encoder.h" #include "lzma_encoder.h" -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_encoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_encoder_init; @@ -90,91 +78,34 @@ get_function(lzma_vli id) } -static lzma_ret -raw_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options) { - // Check that our amount of input stays in proper limits. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (action == LZMA_FINISH) { - if (coder->uncompressed_size != in_size - *in_pos) - return LZMA_PROG_ERROR; - } else { - if (coder->uncompressed_size < in_size - *in_pos) - return LZMA_PROG_ERROR; - } - } - - const size_t in_start = *in_pos; + const lzma_ret ret = lzma_raw_coder_init(next, allocator, + options, &get_function, true); - const lzma_ret ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, action); - - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= *in_pos - in_start; + if (ret != LZMA_OK) + lzma_next_coder_end(next, allocator); return ret; } -static void -raw_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &raw_encode; - next->end = &raw_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - next->coder->uncompressed_size = uncompressed_size; - - // lzma_raw_coder_init() accesses get_function() via function pointer, - // because this way linker doesn't statically link both encoder and - // decoder functions if user needs only encoder or decoder. - return lzma_raw_coder_init(&next->coder->next, allocator, - options, uncompressed_size, - &get_function, allow_implicit, true); -} - - -extern lzma_ret -lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - lzma_next_coder_init(raw_encoder_init, next, allocator, - options, uncompressed_size, allow_implicit); -} - - extern LZMA_API lzma_ret -lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options) { - lzma_next_strm_init(strm, raw_encoder_init, - options, uncompressed_size, allow_implicit); + return_if_error(lzma_strm_init(strm)); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FINISH] = true; - return LZMA_OK; + const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, + strm->allocator, options, &get_function, true); + + if (ret != LZMA_OK) + lzma_end(strm); + + return ret; } diff --git a/src/liblzma/common/raw_encoder.h b/src/liblzma/common/raw_encoder.h index b0aab61a..4e148489 100644 --- a/src/liblzma/common/raw_encoder.h +++ b/src/liblzma/common/raw_encoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/stream_common.h b/src/liblzma/common/stream_common.h index b2f37f37..4f83fc58 100644 --- a/src/liblzma/common/stream_common.h +++ b/src/liblzma/common/stream_common.h @@ -22,6 +22,9 @@ #include "common.h" +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + extern const uint8_t lzma_header_magic[6]; extern const uint8_t lzma_footer_magic[2]; diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 56de3d9f..1bf7f1f8 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -18,281 +18,148 @@ /////////////////////////////////////////////////////////////////////////////// #include "stream_common.h" +#include "stream_decoder.h" #include "check.h" #include "stream_flags_decoder.h" #include "block_decoder.h" -#include "metadata_decoder.h" struct lzma_coder_s { enum { - SEQ_STREAM_HEADER_CODE, - SEQ_BLOCK_HEADER_INIT, - SEQ_BLOCK_HEADER_CODE, - SEQ_METADATA_CODE, - SEQ_DATA_CODE, - SEQ_STREAM_TAIL_INIT, - SEQ_STREAM_TAIL_CODE, + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, } sequence; - /// Position in variable-length integers and in some other things. - size_t pos; - /// Block or Metadata decoder. This takes little memory and the same /// data structure can be used to decode every Block Header, so it's /// a good idea to have a separate lzma_next_coder structure for it. lzma_next_coder block_decoder; - /// Block Header decoder; this is separate - lzma_next_coder block_header_decoder; - + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. lzma_options_block block_options; - /// Information about the sizes of the Blocks - lzma_info *info; - - /// Current Block in *info - lzma_info_iter iter; - - /// Number of bytes not yet processed from Data Blocks in the Stream. - /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is - /// decremented while decoding and verified to match the reality. - lzma_vli total_left; - - /// Like uncompressed_left above but for uncompressed data from - /// Data Blocks. - lzma_vli uncompressed_left; - /// Stream Flags from Stream Header - lzma_stream_flags header_flags; - - /// Stream Flags from Stream tail - lzma_stream_flags tail_flags; + lzma_stream_flags stream_flags; - /// Decoder for Stream Header and Stream tail. This takes very - /// little memory and the same data structure can be used for - /// both Header and tail, so it's a good idea to have a separate - /// lzma_next_coder structure for it. - lzma_next_coder flags_decoder; + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; - /// Temporary destination for the decoded Metadata. - lzma_metadata metadata; + /// Write position in buffer[] + size_t buffer_pos; - /// Pointer to application-supplied pointer where to store the list - /// of Extra Records from the Header Metadata Block. - lzma_extra **header_extra; - - /// Same as above but Footer Metadata Block - lzma_extra **footer_extra; + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; static lzma_ret -metadata_init(lzma_coder *coder, lzma_allocator *allocator) -{ - assert(coder->metadata.index == NULL); - assert(coder->metadata.extra == NULL); - - // Single-Block Streams don't have Metadata Blocks. - if (!coder->header_flags.is_multi) - return LZMA_DATA_ERROR; - - coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN; - - // Limit the Uncompressed Size of a Metadata Block. This is to - // prevent security issues where input file would have very huge - // Metadata. - // - // FIXME: Hardcoded constant is ugly. Maybe we should provide - // some way to specify this from the application. - coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23; - - lzma_info_size size_type; - bool want_extra; - - // If we haven't decoded any Data Blocks yet, this is Header - // Metadata Block. - if (lzma_info_index_count_get(coder->info) == 0) { - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - size_type = LZMA_INFO_HEADER_METADATA; - want_extra = coder->header_extra != NULL; - } else { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - size_type = LZMA_INFO_FOOTER_METADATA; - want_extra = coder->footer_extra != NULL; - } - - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.total_size = lzma_info_size_get( - coder->info, size_type); - - coder->sequence = SEQ_METADATA_CODE; - - return lzma_metadata_decoder_init(&coder->block_decoder, allocator, - &coder->block_options, &coder->metadata, want_extra); -} - - -static lzma_ret -data_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(lzma_info_iter_set( - &coder->iter, LZMA_VLI_VALUE_UNKNOWN, - coder->block_options.uncompressed_size)); - - coder->block_options.total_size = coder->iter.total_size; - coder->block_options.uncompressed_size = coder->iter.uncompressed_size; - coder->block_options.total_limit = coder->total_left; - coder->block_options.uncompressed_limit = coder->uncompressed_left; - - if (coder->header_flags.is_multi) { - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - } else { - coder->block_options.has_uncompressed_size_in_footer - = coder->iter.uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - } - - coder->sequence = SEQ_DATA_CODE; - - return lzma_block_decoder_init(&coder->block_decoder, allocator, - &coder->block_options); -} - - -static lzma_ret stream_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_DATA_CODE)) + || coder->sequence == SEQ_BLOCK)) switch (coder->sequence) { - case SEQ_STREAM_HEADER_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->buffer_pos = 0; + + // Decode the Stream Header. + return_if_error(lzma_stream_header_decode( + &coder->stream_flags, coder->buffer)); - coder->sequence = SEQ_BLOCK_HEADER_INIT; + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_UNSUPPORTED_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; // Detect if the Check type is supported and give appropriate // warning if it isn't. We don't warn every time a new Block // is started. - lzma_check tmp; - if (lzma_check_init(&tmp, coder->header_flags.check)) + if (!lzma_available_checks[coder->block_options.check]) return LZMA_UNSUPPORTED_CHECK; break; } - case SEQ_BLOCK_HEADER_INIT: { - coder->block_options.check = coder->header_flags.check; - coder->block_options.has_crc32 = coder->header_flags.has_crc32; + case SEQ_BLOCK_HEADER: { + if (coder->buffer_pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } - for (size_t i = 0; - i < ARRAY_SIZE(coder->block_options.filters); - ++i) { - lzma_free(coder->block_options.filters[i].options, - allocator); - coder->block_options.filters[i].options = NULL; + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); } - return_if_error(lzma_block_header_decoder_init( - &coder->block_header_decoder, allocator, - &coder->block_options)); - - coder->sequence = SEQ_BLOCK_HEADER_CODE; - } - - // Fall through - - case SEQ_BLOCK_HEADER_CODE: { - lzma_ret ret = coder->block_header_decoder.code( - coder->block_header_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the Block Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + coder->block_options.header_size); - if (coder->block_options.is_metadata) - ret = metadata_init(coder, allocator); - else - ret = data_init(coder, allocator); - - if (ret != LZMA_OK) - return ret; - - break; - } + // Return if we didn't get the whole Block Header yet. + if (coder->buffer_pos < coder->block_options.header_size) + return LZMA_OK; - case SEQ_METADATA_CODE: { - lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + coder->buffer_pos = 0; - const bool is_header_metadata = lzma_info_index_count_get( - coder->info) == 0; + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_options_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + coder->block_options.filters = filters; - if (is_header_metadata) { - if (coder->header_extra != NULL) { - *coder->header_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, - coder->block_options.total_size) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_BLOCK_HEADER_INIT; - } else { - if (coder->footer_extra != NULL) { - *coder->footer_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Initialize the Block decoder. + const lzma_ret ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, &coder->block_options); - coder->sequence = SEQ_STREAM_TAIL_INIT; - } + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); - assert(coder->metadata.extra == NULL); + coder->block_options.filters = NULL; - ret = lzma_info_metadata_set(coder->info, allocator, - &coder->metadata, is_header_metadata, true); - if (ret != LZMA_OK) + // Check if Block enocoder initialization succeeded. Don't + // warn about unsupported check anymore since we did it + // earlier if it was needed. + if (ret != LZMA_OK && ret != LZMA_UNSUPPORTED_CHECK) return ret; - // Intialize coder->total_size and coder->uncompressed_size - // from Header Metadata. - if (is_header_metadata) { - coder->total_left = lzma_info_size_get( - coder->info, LZMA_INFO_TOTAL); - coder->uncompressed_left = lzma_info_size_get( - coder->info, LZMA_INFO_UNCOMPRESSED); - } - + coder->sequence = SEQ_BLOCK; break; } - case SEQ_DATA_CODE: { + case SEQ_BLOCK: { lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, @@ -301,62 +168,59 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - ret = lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size); - if (ret != LZMA_OK) - return ret; - - // These won't overflow since lzma_info_iter_set() succeeded. - if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN) - coder->total_left -= coder->block_options.total_size; - if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_left -= coder->block_options - .uncompressed_size; + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_total_size_get( + &coder->block_options), + coder->block_options.uncompressed_size)); - if (!coder->header_flags.is_multi) { - ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_STREAM_TAIL_INIT; - break; - } - - coder->sequence = SEQ_BLOCK_HEADER_INIT; + coder->sequence = SEQ_BLOCK_HEADER; break; } - case SEQ_STREAM_TAIL_INIT: { - lzma_ret ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - ret = lzma_stream_tail_decoder_init(&coder->flags_decoder, - allocator, &coder->tail_flags); - if (ret != LZMA_OK) + case SEQ_INDEX: { + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) return ret; - coder->sequence = SEQ_STREAM_TAIL_CODE; + coder->sequence = SEQ_STREAM_FOOTER; + break; } - // Fall through + case SEQ_STREAM_FOOTER: + // Copy the Stream Footer to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); - case SEQ_STREAM_TAIL_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + // Return if we didn't get the whole Stream Footer yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; - if (!lzma_stream_flags_is_equal( - coder->header_flags, coder->tail_flags)) + // Decode the Stream Footer. + lzma_stream_flags footer_flags; + return_if_error(lzma_stream_footer_decode( + &footer_flags, coder->buffer)); + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + if (!lzma_stream_flags_equal(&coder->stream_flags, + &footer_flags)) return LZMA_DATA_ERROR; return LZMA_STREAM_END; - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -367,23 +231,15 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, static void stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { - for (size_t i = 0; i < ARRAY_SIZE(coder->block_options.filters); ++i) - lzma_free(coder->block_options.filters[i].options, allocator); - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_next_coder_end(&coder->block_header_decoder, allocator); - lzma_next_coder_end(&coder->flags_decoder, allocator); - lzma_info_free(coder->info, allocator); - lzma_index_free(coder->metadata.index, allocator); - lzma_extra_free(coder->metadata.extra, allocator); + lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_ret -stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -394,73 +250,35 @@ stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->end = &stream_decoder_end; next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - next->coder->flags_decoder = LZMA_NEXT_CODER_INIT; - next->coder->metadata.index = NULL; - next->coder->metadata.extra = NULL; - } else { - for (size_t i = 0; i < ARRAY_SIZE( - next->coder->block_options.filters); ++i) - lzma_free(next->coder->block_options - .filters[i].options, allocator); - - lzma_index_free(next->coder->metadata.index, allocator); - next->coder->metadata.index = NULL; - - lzma_extra_free(next->coder->metadata.extra, allocator); - next->coder->metadata.extra = NULL; + next->coder->index_hash = NULL; } - for (size_t i = 0; i < ARRAY_SIZE(next->coder->block_options.filters); - ++i) - next->coder->block_options.filters[i].options = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) + // Initialize the Index hash used to verify the Index. + next->coder->index_hash = lzma_index_hash_init( + next->coder->index_hash, allocator); + if (next->coder->index_hash == NULL) return LZMA_MEM_ERROR; - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - // Initialize Stream Header decoder. - return_if_error(lzma_stream_header_decoder_init( - &next->coder->flags_decoder, allocator, - &next->coder->header_flags)); - - // Reset the *foo_extra pointers to NULL. This way the caller knows - // if there were no Extra Records. (We don't support appending - // Records to Extra list.) - if (header != NULL) - *header = NULL; - if (footer != NULL) - *footer = NULL; - - // Reset some variables. - next->coder->sequence = SEQ_STREAM_HEADER_CODE; - next->coder->pos = 0; - next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->header_extra = header; - next->coder->footer_extra = footer; + // Reset the rest of the variables. + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.filters = NULL; + next->coder->buffer_pos = 0; return LZMA_OK; } extern lzma_ret -lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - lzma_next_coder_init( - stream_decoder_init, next, allocator, header, footer); + lzma_next_coder_init0(stream_decoder_init, next, allocator); } extern LZMA_API lzma_ret -lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, stream_decoder_init, header, footer); + lzma_next_strm_init0(strm, stream_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/easy_common.h b/src/liblzma/common/stream_decoder.h index d864cce5..dcda387d 100644 --- a/src/liblzma/common/easy_common.h +++ b/src/liblzma/common/stream_decoder.h @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions +/// \file stream_decoder.h +/// \brief Decodes .lzma Streams // // Copyright (C) 2008 Lasse Collin // @@ -17,12 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H -#ifndef LZMA_EASY_COMMON_H -#define LZMA_EASY_COMMON_H +#include "common.h" -extern bool lzma_easy_set_filters( - lzma_options_filter *filters, uint32_t level); +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator); #endif diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c new file mode 100644 index 00000000..767b8014 --- /dev/null +++ b/src/liblzma/common/stream_encoder.c @@ -0,0 +1,282 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .lzma Streams +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "stream_encoder.h" +#include "block_encoder.h" +#include "index_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_options_block block_options; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + // Prepare the Block options. + coder->block_options.compressed_size = LZMA_VLI_VALUE_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, + out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediatelly since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder except if this is the first + // Block, because stream_encoder_init() has already + // initialized it. + if (lzma_index_count(coder->index) != 0) + return_if_error(block_encoder_init(coder, allocator)); + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli total_size = lzma_block_total_size_get( + &coder->block_options); + assert(total_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + total_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_encoder, allocator); + lzma_next_coder_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + if (filters == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.check = check; + next->coder->block_options.filters = (lzma_options_filter *)(filters); + + // Initialize the Index + next->coder->index = lzma_index_init(next->coder->index, allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, next->coder->buffer)); + + next->coder->buffer_pos = 0; + next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect if the given + // filters are supported by the current liblzma build, and the + // application doesn't need to keep the filters structure available + // unless it is going to use LZMA_FULL_FLUSH. + return block_encoder_init(next->coder, allocator); +} + + +extern lzma_ret +lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_coder_init(stream_encoder_init, next, allocator, + filters, check); +} + + +extern LZMA_API lzma_ret +lzma_stream_encoder(lzma_stream *strm, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_strm_init(strm, stream_encoder_init, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/metadata_encoder.h b/src/liblzma/common/stream_encoder.h index 20357fe6..3ce29561 100644 --- a/src/liblzma/common/metadata_encoder.h +++ b/src/liblzma/common/stream_encoder.h @@ -1,9 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file metadata_encoder.h -/// \brief Encodes metadata to be stored into Metadata Blocks +/// \file stream_encoder.h +/// \brief Encodes .lzma Streams // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,14 +17,14 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_METADATA_ENCODER_H -#define LZMA_METADATA_ENCODER_H +#ifndef LZMA_STREAM_ENCODER_H +#define LZMA_STREAM_ENCODER_H #include "common.h" -extern lzma_ret lzma_metadata_encoder_init( +extern lzma_ret lzma_stream_encoder_init( lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata); + const lzma_options_filter *filters, lzma_check_type check); #endif diff --git a/src/liblzma/common/stream_encoder_multi.c b/src/liblzma/common/stream_encoder_multi.c deleted file mode 100644 index 403980cf..00000000 --- a/src/liblzma/common/stream_encoder_multi.c +++ /dev/null @@ -1,445 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_multi.c -/// \brief Encodes Multi-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "stream_encoder_multi.h" -#include "block_encoder.h" -#include "metadata_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_STREAM_HEADER_COPY, - SEQ_HEADER_METADATA_INIT, - SEQ_HEADER_METADATA_COPY, - SEQ_HEADER_METADATA_CODE, - SEQ_DATA_INIT, - SEQ_DATA_COPY, - SEQ_DATA_CODE, - SEQ_FOOTER_METADATA_INIT, - SEQ_FOOTER_METADATA_COPY, - SEQ_FOOTER_METADATA_CODE, - SEQ_STREAM_FOOTER_INIT, - SEQ_STREAM_FOOTER_COPY, - } sequence; - - /// Block or Metadata encoder - lzma_next_coder next; - - /// Options for the Block encoder - lzma_options_block block_options; - - /// Information about the Stream - lzma_info *info; - - /// Information about the current Data Block - lzma_info_iter iter; - - /// Pointer to user-supplied options structure. We don't write to - /// it, only read instructions from the application, thus this is - /// const even though the user-supplied pointer from - /// lzma_options_filter structure isn't. - const lzma_options_stream *stream_options; - - /// Stream Header or Stream Footer in encoded form - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -typedef enum { - BLOCK_HEADER_METADATA, - BLOCK_DATA, - BLOCK_FOOTER_METADATA, -} block_type; - - -static lzma_ret -block_header_encode(lzma_coder *coder, lzma_allocator *allocator, - lzma_vli uncompressed_size, block_type type) -{ - assert(coder->header == NULL); - - coder->block_options = (lzma_options_block){ - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .has_eopm = uncompressed_size == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = type != BLOCK_DATA, - .has_uncompressed_size_in_footer = false, - .has_backward_size = type == BLOCK_FOOTER_METADATA, - .handle_padding = false, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - }; - - if (type == BLOCK_DATA) { - memcpy(coder->block_options.filters, - coder->stream_options->filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment = coder->iter.stream_offset; - } else { - memcpy(coder->block_options.filters, - coder->stream_options->metadata_filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment - = lzma_info_metadata_alignment_get( - coder->info, type == BLOCK_HEADER_METADATA); - } - - return_if_error(lzma_block_header_size(&coder->block_options)); - - coder->header_size = coder->block_options.header_size; - coder->header = lzma_alloc(coder->header_size, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_block_header_encode( - coder->header, &coder->block_options)); - - coder->header_pos = 0; - return LZMA_OK; -} - - -static lzma_ret -metadata_encoder_init(lzma_coder *coder, lzma_allocator *allocator, - lzma_metadata *metadata, block_type type) -{ - return_if_error(lzma_info_metadata_set(coder->info, allocator, - metadata, type == BLOCK_HEADER_METADATA, false)); - - const lzma_vli metadata_size = lzma_metadata_size(metadata); - if (metadata_size == 0) - return LZMA_PROG_ERROR; - - return_if_error(block_header_encode( - coder, allocator, metadata_size, type)); - - return lzma_metadata_encoder_init(&coder->next, allocator, - &coder->block_options, metadata); -} - - -static lzma_ret -data_encoder_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(block_header_encode(coder, allocator, - LZMA_VLI_VALUE_UNKNOWN, BLOCK_DATA)); - - return lzma_block_encoder_init(&coder->next, allocator, - &coder->block_options); -} - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // Main loop - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - case SEQ_STREAM_FOOTER_COPY: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - if (coder->header_pos < coder->header_size) - return LZMA_OK; - - lzma_free(coder->header, allocator); - coder->header = NULL; - - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - // Write Header Metadata Block if we have Extra for it - // or known Uncompressed Size. - if (coder->stream_options->header != NULL - || coder->stream_options - ->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_HEADER_METADATA_INIT; - } else { - // Mark that Header Metadata Block doesn't - // exist. - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, 0) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_DATA_INIT; - } - break; - - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - ++coder->sequence; - break; - - case SEQ_STREAM_FOOTER_COPY: - return LZMA_STREAM_END; - - default: - assert(0); - } - - break; - - case SEQ_HEADER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size = LZMA_VLI_VALUE_UNKNOWN, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = coder->stream_options - ->uncompressed_size, - .index = NULL, - // Metadata encoder doesn't modify this, but since - // the lzma_extra structure is used also when decoding - // Metadata, the pointer is not const, and we need - // to cast the constness away in the encoder. - .extra = (lzma_extra *)(coder->stream_options->header), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_HEADER_METADATA)); - - coder->sequence = SEQ_HEADER_METADATA_COPY; - break; - } - - case SEQ_FOOTER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size - = lzma_info_size_get(coder->info, - LZMA_INFO_HEADER_METADATA), - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .index = lzma_info_index_get(coder->info, false), - .extra = (lzma_extra *)(coder->stream_options->footer), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_FOOTER_METADATA)); - - coder->sequence = SEQ_FOOTER_METADATA_COPY; - break; - } - - case SEQ_HEADER_METADATA_CODE: - case SEQ_FOOTER_METADATA_CODE: { - size_t dummy = 0; - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, NULL, &dummy, 0, - out, out_pos, out_size, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; - - return_if_error(lzma_info_size_set(coder->info, - coder->sequence == SEQ_HEADER_METADATA_CODE - ? LZMA_INFO_HEADER_METADATA - : LZMA_INFO_FOOTER_METADATA, - coder->block_options.total_size)); - - ++coder->sequence; - break; - } - - case SEQ_DATA_INIT: { - // Don't create an empty Block unless it would be - // the only Data Block. - if (*in_pos == in_size) { - // If we are LZMA_SYNC_FLUSHing or LZMA_FULL_FLUSHing, - // return LZMA_STREAM_END since there's nothing to - // flush. - if (action != LZMA_FINISH) - return action == LZMA_RUN - ? LZMA_OK : LZMA_STREAM_END; - - if (lzma_info_index_count_get(coder->info) != 0) { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->sequence = SEQ_FOOTER_METADATA_INIT; - break; - } - } - - return_if_error(data_encoder_init(coder, allocator)); - - coder->sequence = SEQ_DATA_COPY; - break; - } - - case SEQ_DATA_CODE: { - static const lzma_action convert[4] = { - LZMA_RUN, - LZMA_SYNC_FLUSH, - LZMA_FINISH, - LZMA_FINISH, - }; - - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, in, in_pos, in_size, - out, out_pos, out_size, convert[action]); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - return_if_error(lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size)); - - coder->sequence = SEQ_DATA_INIT; - break; - } - - case SEQ_STREAM_FOOTER_INIT: { - assert(coder->header == NULL); - - lzma_stream_flags flags = { - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .is_multi = true, - }; - - coder->header = lzma_alloc(LZMA_STREAM_TAIL_SIZE, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_tail_encode( - coder->header, &flags)); - - coder->header_size = LZMA_STREAM_TAIL_SIZE; - coder->header_pos = 0; - - coder->sequence = SEQ_STREAM_FOOTER_COPY; - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_info_free(coder->info, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - } else { - lzma_free(next->coder->header, allocator); - } - - next->coder->header = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) - return LZMA_MEM_ERROR; - - next->coder->sequence = SEQ_STREAM_HEADER_COPY; - next->coder->stream_options = options; - - // Encode Stream Flags - { - lzma_stream_flags flags = { - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = true, - }; - - next->coder->header = lzma_alloc(LZMA_STREAM_HEADER_SIZE, - allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode( - next->coder->header, &flags)); - - next->coder->header_pos = 0; - next->coder->header_size = LZMA_STREAM_HEADER_SIZE; - } - - if (lzma_info_size_set(next->coder->info, LZMA_INFO_STREAM_START, - options->alignment) != LZMA_OK) - return LZMA_PROG_ERROR; - - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, next, allocator, options); -} - - -extern LZMA_API lzma_ret -lzma_stream_encoder_multi( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_encoder_single.c b/src/liblzma/common/stream_encoder_single.c deleted file mode 100644 index d93e7169..00000000 --- a/src/liblzma/common/stream_encoder_single.c +++ /dev/null @@ -1,219 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_single.c -/// \brief Encodes Single-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - /// Uncompressed Size, Backward Size, and Footer Magic Bytes are - /// part of Block in the file format specification, but it is simpler - /// to implement them as part of Stream. - enum { - SEQ_HEADERS, - SEQ_DATA, - SEQ_FOOTER, - } sequence; - - /// Block encoder - lzma_next_coder block_encoder; - - /// Block encoder options - lzma_options_block block_options; - - /// Stream Flags; we need to have these in this struct so that we - /// can encode Stream Footer. - lzma_stream_flags stream_flags; - - /// Stream Header + Block Header, or Stream Footer - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, size_t *out_pos, - size_t out_size, lzma_action action) -{ - // NOTE: We don't check if the amount of input is in the proper limits, - // because the Block encoder will do it for us. - - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_HEADERS: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - if (coder->header_pos == coder->header_size) { - coder->header_pos = 0; - coder->sequence = SEQ_DATA; - } - - break; - - case SEQ_DATA: { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - in, in_pos, in_size, - out, out_pos, out_size, action); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - assert(*in_pos == in_size); - - assert(coder->header_size >= LZMA_STREAM_TAIL_SIZE); - coder->header_size = LZMA_STREAM_TAIL_SIZE; - - return_if_error(lzma_stream_tail_encode( - coder->header, &coder->stream_flags)); - - coder->sequence = SEQ_FOOTER; - break; - } - - case SEQ_FOOTER: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - return coder->header_pos == coder->header_size - ? LZMA_STREAM_END : LZMA_OK; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } else { - // Free the previous buffer, if any. - lzma_free(next->coder->header, allocator); - } - - // At this point, next->coder->header points to nothing useful. - next->coder->header = NULL; - - // Basic initializations - next->coder->sequence = SEQ_HEADERS; - next->coder->header_pos = 0; - - // Initialize next->coder->stream_flags. - next->coder->stream_flags = (lzma_stream_flags){ - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = false, - }; - - // Initialize next->coder->block_options. - next->coder->block_options = (lzma_options_block){ - .check = options->check, - .has_crc32 = options->has_crc32, - .has_eopm = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = false, - .has_uncompressed_size_in_footer = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .has_backward_size = true, - .handle_padding = false, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = options->uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - .alignment = options->alignment + LZMA_STREAM_HEADER_SIZE, - }; - memcpy(next->coder->block_options.filters, options->filters, - sizeof(options->filters)); - - return_if_error(lzma_block_header_size(&next->coder->block_options)); - - // Encode Stream Flags and Block Header into next->coder->header. - next->coder->header_size = (size_t)(LZMA_STREAM_HEADER_SIZE) - + next->coder->block_options.header_size; - next->coder->header = lzma_alloc(next->coder->header_size, allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode(next->coder->header, - &next->coder->stream_flags)); - - return_if_error(lzma_block_header_encode( - next->coder->header + LZMA_STREAM_HEADER_SIZE, - &next->coder->block_options)); - - // Initialize the Block encoder. - return lzma_block_encoder_init(&next->coder->block_encoder, allocator, - &next->coder->block_options); -} - - -/* -extern lzma_ret -lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, allocator, options); -} -*/ - - -extern LZMA_API lzma_ret -lzma_stream_encoder_single( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c index d9c847ac..0270875a 100644 --- a/src/liblzma/common/stream_flags_decoder.c +++ b/src/liblzma/common/stream_flags_decoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_decoder.c -/// \brief Decodes Stream Header and tail from .lzma files +/// \brief Decodes Stream Header and Stream Footer from .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -17,242 +17,72 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "stream_flags_decoder.h" #include "stream_common.h" -//////////// -// Common // -//////////// - -struct lzma_coder_s { - enum { - SEQ_HEADER_MAGIC, - SEQ_HEADER_FLAGS, - SEQ_HEADER_CRC32, - - SEQ_FOOTER_FLAGS, - SEQ_FOOTER_MAGIC, - } sequence; - - size_t pos; - uint32_t crc32; - - lzma_stream_flags *options; -}; - - -static void -stream_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - static bool -stream_flags_decode(const uint8_t *in, lzma_stream_flags *options) +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) { // Reserved bits must be unset. - if (*in & 0xE0) + if (in[0] != 0x00 || (in[1] & 0xF0)) return true; - options->check = *in & 0x07; - options->has_crc32 = (*in & 0x08) != 0; - options->is_multi = (*in & 0x10) != 0; + options->check = in[1] & 0x0F; return false; } -//////////// -// Header // -//////////// - -static lzma_ret -stream_header_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_HEADER_MAGIC: - if (in[*in_pos] != lzma_header_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_header_magic)) { - coder->pos = 0; - coder->sequence = SEQ_HEADER_FLAGS; - } - - break; - - case SEQ_HEADER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, 0); - - ++*in_pos; - coder->sequence = SEQ_HEADER_CRC32; - break; - - case SEQ_HEADER_CRC32: - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == 4) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_header_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_HEADER_MAGIC; - next->coder->pos = 0; - next->coder->crc32 = 0; - next->coder->options = options; - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init( - stream_header_decoder_init, next, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_header_decoder(lzma_stream *strm, lzma_stream_flags *options) -{ - lzma_next_strm_init(strm, stream_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} - - -////////// -// Tail // -////////// - -static lzma_ret -stream_tail_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FOOTER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - ++*in_pos; - coder->sequence = SEQ_FOOTER_MAGIC; - break; - - case SEQ_FOOTER_MAGIC: - if (in[*in_pos] != lzma_footer_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_footer_magic)) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) { - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_tail_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_FOOTER_FLAGS; - next->coder->pos = 0; - next->coder->options = options; + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_HEADER_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_equal can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_VALUE_UNKNOWN; return LZMA_OK; } -extern lzma_ret -lzma_stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init2(next, allocator, stream_header_decoder_init, - stream_tail_decoder_init, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_tail_decoder(lzma_stream *strm, lzma_stream_flags *options) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) { - lzma_next_strm_init2(strm, stream_header_decoder_init, - stream_tail_decoder_init, strm->allocator, options); - - strm->internal->supported_actions[LZMA_RUN] = true; + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_HEADER_ERROR; + + // Backward Size + options->backward_size = integer_read_32(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; return LZMA_OK; } diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c index 55468580..4efbb6f4 100644 --- a/src/liblzma/common/stream_flags_encoder.c +++ b/src/liblzma/common/stream_flags_encoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_encoder.c -/// \brief Encodes Stream Header and Footer for .lzma files +/// \brief Encodes Stream Header and Stream Footer for .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -21,55 +21,69 @@ static bool -stream_flags_encode(uint8_t *flags_byte, const lzma_stream_flags *options) +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) { - // Check type if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return true; - *flags_byte = options->check; - - // Usage of CRC32 in Block Headers - if (options->has_crc32) - *flags_byte |= 0x08; - - // Single- or Multi-Block - if (options->is_multi) - *flags_byte |= 0x10; + out[0] = 0x00; + out[1] = options->check; return false; } extern LZMA_API lzma_ret -lzma_stream_header_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + // Magic memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); // Stream Flags - if (stream_flags_encode(out + sizeof(lzma_header_magic), options)) - return LZMA_PROG_ERROR;; + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; // CRC32 of the Stream Header - const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), 1, 0); + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); - for (size_t i = 0; i < 4; ++i) - out[sizeof(lzma_header_magic) + 1 + i] = crc >> (i * 8); + integer_write_32(out + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE, crc); return LZMA_OK; } extern LZMA_API lzma_ret -lzma_stream_tail_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + // Backward Size + if (options->backward_size < LZMA_BACKWARD_SIZE_MIN + || options->backward_size > LZMA_BACKWARD_SIZE_MAX + || (options->backward_size & 3)) + return LZMA_PROG_ERROR; + + integer_write_32(out + 4, options->backward_size / 4 - 1); + // Stream Flags - if (stream_flags_encode(out, options)) + if (stream_flags_encode(options, out + 2 * 4)) return LZMA_PROG_ERROR; + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + integer_write_32(out, crc); + // Magic - memcpy(out + 1, lzma_footer_magic, sizeof(lzma_footer_magic)); + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); return LZMA_OK; } diff --git a/src/liblzma/common/easy_single.c b/src/liblzma/common/stream_flags_equal.c index e2fa4e13..db22567f 100644 --- a/src/liblzma/common/easy_single.c +++ b/src/liblzma/common/stream_flags_equal.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file easy_single.c -/// \brief Easy Single-Block Stream encoder initialization +/// \file stream_flags_equal.c +/// \brief Compare Stream Header and Stream Footer // // Copyright (C) 2008 Lasse Collin // @@ -17,21 +17,20 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "easy_common.h" +#include "common.h" -extern LZMA_API lzma_ret -lzma_easy_encoder_single(lzma_stream *strm, lzma_easy_level level) +extern LZMA_API lzma_bool +lzma_stream_flags_equal(const lzma_stream_flags *a, lzma_stream_flags *b) { - lzma_options_stream opt_stream = { - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - }; + if (a->check != b->check) + return false; - if (lzma_easy_set_filters(opt_stream.filters, level)) - return LZMA_HEADER_ERROR; + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_VALUE_UNKNOWN + && b->backward_size != LZMA_VLI_VALUE_UNKNOWN + && a->backward_size != b->backward_size) + return false; - return lzma_stream_encoder_single(strm, &opt_stream); + return true; } diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c index 2b89c1a7..faff6ccb 100644 --- a/src/liblzma/common/vli_decoder.c +++ b/src/liblzma/common/vli_decoder.c @@ -3,7 +3,7 @@ /// \file vli_decoder.c /// \brief Decodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,45 +25,53 @@ lzma_vli_decode(lzma_vli *restrict vli, size_t *restrict vli_pos, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { - if (*vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 - || (*vli >> (7 * *vli_pos)) != 0) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*in_pos >= in_size) - return LZMA_BUF_ERROR; + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; - if (*vli_pos == 0) { - *vli_pos = 1; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *in_pos >= in_size + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; - if (in[*in_pos] <= 0x7F) { - // Single-byte integer - *vli = in[*in_pos]; - ++*in_pos; - return LZMA_STREAM_END; - } - - *vli = in[*in_pos] & 0x7F; - ++*in_pos; - } - - while (*in_pos < in_size) { - // Read in the next byte. + do { + // Read the next byte. *vli |= (lzma_vli)(in[*in_pos] & 0x7F) << (*vli_pos * 7); ++*vli_pos; // Check if this is the last byte of a multibyte integer. - if (in[*in_pos] & 0x80) { - ++*in_pos; - return LZMA_STREAM_END; + if (!(in[*in_pos] & 0x80)) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (in[(*in_pos)++] == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; } - // Limit variable-length representation to nine bytes. - if (*vli_pos == 9) + ++*in_pos; + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) return LZMA_DATA_ERROR; - // Increment input position only when the byte was accepted. - ++*in_pos; - } + } while (*in_pos < in_size); - return LZMA_OK; + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; } diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c index 1ecdb0d2..c48d6474 100644 --- a/src/liblzma/common/vli_encoder.c +++ b/src/liblzma/common/vli_encoder.c @@ -3,7 +3,7 @@ /// \file vli_encoder.c /// \brief Encodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -21,61 +21,54 @@ extern LZMA_API lzma_ret -lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, size_t vli_size, +lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { - if (vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 || vli_size > 9 - || (vli != 0 && (vli >> (7 * *vli_pos)) == 0)) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*out_pos >= out_size) - return LZMA_BUF_ERROR; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *out_pos >= out_size + || vli > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; - if (*vli_pos == 0) { - *vli_pos = 1; + // Write the non-last bytes in a loop. + while ((vli >> (*vli_pos * 7)) >= 0x80) { + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)) | 0x80; - if (vli <= 0x7F && *vli_pos >= vli_size) { - // Single-byte integer - out[(*out_pos)++] = vli; - return LZMA_STREAM_END; - } + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); - // First byte of a multibyte integer - out[(*out_pos)++] = (vli & 0x7F) | 0x80; + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; } - while (*out_pos < out_size) { - const lzma_vli b = vli >> (7 * *vli_pos); - ++*vli_pos; - - if (b <= 0x7F && *vli_pos >= vli_size) { - // Last byte of a multibyte integer - out[(*out_pos)++] = (b & 0xFF) | 0x80; - return LZMA_STREAM_END; - } + // Write the last byte. + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)); + ++*out_pos; + ++*vli_pos; - // Middle byte of a multibyte integer - out[(*out_pos)++] = b & 0x7F; - } + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; - // vli is not yet completely written out. - return LZMA_OK; } -extern LZMA_API size_t +extern LZMA_API uint32_t lzma_vli_size(lzma_vli vli) { if (vli > LZMA_VLI_VALUE_MAX) return 0; - size_t i = 0; + uint32_t i = 0; do { vli >>= 7; ++i; } while (vli != 0); - assert(i <= 9); + assert(i <= LZMA_VLI_BYTES_MAX); return i; } diff --git a/src/liblzma/common/vli_reverse_decoder.c b/src/liblzma/common/vli_reverse_decoder.c deleted file mode 100644 index 68ca6a42..00000000 --- a/src/liblzma/common/vli_reverse_decoder.c +++ /dev/null @@ -1,55 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file vli_reverse_decoder.c -/// \brief Decodes variable-length integers starting at end of the buffer -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API lzma_ret -lzma_vli_reverse_decode(lzma_vli *vli, const uint8_t *in, size_t *in_size) -{ - if (*in_size == 0) - return LZMA_BUF_ERROR; - - size_t i = *in_size - 1; - *vli = in[i] & 0x7F; - - if (!(in[i] & 0x80)) { - *in_size = i; - return LZMA_OK; - } - - const size_t end = *in_size > LZMA_VLI_BYTES_MAX - ? *in_size - LZMA_VLI_BYTES_MAX : 0; - - do { - if (i-- == end) { - if (*in_size < LZMA_VLI_BYTES_MAX) - return LZMA_BUF_ERROR; - - return LZMA_DATA_ERROR; - } - - *vli <<= 7; - *vli = in[i] & 0x7F; - - } while (!(in[i] & 0x80)); - - *in_size = i; - return LZMA_OK; -} diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index a400bde1..ae969d62 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -387,11 +387,11 @@ lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator, bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, bool has_safe_buffer), - lzma_vli uncompressed_size, size_t history_size, size_t match_max_len) { - // Set uncompressed size. - lz->uncompressed_size = uncompressed_size; + // Known uncompressed size is used only with LZMA_Alone files so we + // set it always to unknown by default. + lz->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; // Limit the history size to roughly sane values. This is primarily // to prevent integer overflows. diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index a8a585cd..1acf9831 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -31,6 +31,11 @@ : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) +/// Test if dictionary is empty. +#define lz_is_empty(lz) \ + ((lz).pos == 0 && !(lz).is_full) + + #define LZMA_LZ_DECODER_INIT \ (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } @@ -109,7 +114,6 @@ extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_coder *restrict coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, bool has_safe_buffer), - lzma_vli uncompressed_size, size_t history_size, size_t match_max_len); extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, @@ -155,12 +159,12 @@ lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) // in which e.g. the data of the previously decoded file(s) // would be leaked (or whatever happens to be in unused // part of the dictionary buffer). - if (distance >= lz->pos && !lz->is_full) + if (unlikely(distance >= lz->pos && !lz->is_full)) return false; // It also doesn't make sense to copy data farer than // the dictionary size. - if (distance >= lz->requested_size) + if (unlikely(distance >= lz->requested_size)) return false; // The caller must have checked these! diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index dfe83589..d4cefe0b 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -547,6 +547,9 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // Note that rep0 is known to have a safe value, thus we // don't need to check if we are wrapping the dictionary // when it isn't full yet. + if (unlikely(lz_is_empty(coder->lz))) + return true; + coder->lz.dict[coder->lz.pos] = lz_get_byte(coder->lz, rep0); ++coder->lz.pos; @@ -698,7 +701,6 @@ lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, { const lzma_ret ret = lzma_lz_decoder_reset( &next->coder->lz, allocator, &decode_real, - filters[0].uncompressed_size, options->dictionary_size, MATCH_MAX_LEN); if (ret != LZMA_OK) { lzma_literal_end(&next->coder->literal_coder, @@ -785,6 +787,15 @@ lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, } +extern void +lzma_lzma_decoder_uncompressed_size( + lzma_next_coder *next, lzma_vli uncompressed_size) +{ + next->coder->lz.uncompressed_size = uncompressed_size; + return; +} + + extern bool lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) { diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h index 929c2bff..9d57c7e5 100644 --- a/src/liblzma/lzma/lzma_decoder.h +++ b/src/liblzma/lzma/lzma_decoder.h @@ -24,10 +24,15 @@ #include "common.h" -/// \brief Allocates and initializes LZMA decoder +/// Allocates and initializes LZMA decoder extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters); +/// Set known uncompressed size. This is a hack needed to support +/// LZMA_Alone files that don't have EOPM. +extern void lzma_lzma_decoder_uncompressed_size( + lzma_next_coder *next, lzma_vli uncompressed_size); + /// \brief Decodes the LZMA Properties byte (lc/lp/pb) /// /// \return true if error occorred, false on success @@ -35,7 +40,4 @@ extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, extern bool lzma_lzma_decode_properties( lzma_options_lzma *options, uint8_t byte); -// There is no public lzma_lzma_encode() because lzma_lz_encode() works -// as a wrapper for it. - #endif diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c index e9674308..078f1b95 100644 --- a/src/liblzma/simple/simple_coder.c +++ b/src/liblzma/simple/simple_coder.c @@ -23,11 +23,7 @@ #include "simple_private.h" -/// Copied or encodes/decodes more data to out[]. Checks and updates -/// uncompressed_size when we are the last coder in the chain. -/// If we aren't the last filter in the chain, we don't need to care about -/// uncompressed size, since we don't change it; the next filter in the -/// chain will check it anyway. +/// Copied or encodes/decodes more data to out[]. static lzma_ret copy_or_code(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, @@ -37,28 +33,12 @@ copy_or_code(lzma_coder *coder, lzma_allocator *allocator, assert(!coder->end_was_reached); if (coder->next.code == NULL) { - const size_t in_avail = in_size - *in_pos; - - if (!coder->is_encoder) { - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_avail) > coder->uncompressed_size) - in_size = *in_pos + (size_t)( - coder->uncompressed_size); - } - - const size_t out_start = *out_pos; bufcpy(in, in_pos, in_size, out, out_pos, out_size); // Check if end of stream was reached. - if (coder->is_encoder) { - if (action == LZMA_FINISH && *in_pos == in_size) - coder->end_was_reached = true; - } else if (coder->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->uncompressed_size -= *out_pos - out_start; - if (coder->uncompressed_size == 0) - coder->end_was_reached = true; - } + if (coder->is_encoder && action == LZMA_FINISH + && *in_pos == in_size) + coder->end_was_reached = true; } else { // Call the next coder in the chain to provide us some data. @@ -283,7 +263,6 @@ lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // Reset variables. next->coder->is_encoder = is_encoder; next->coder->end_was_reached = false; - next->coder->uncompressed_size = filters[0].uncompressed_size; next->coder->pos = 0; next->coder->filtered = 0; next->coder->size = 0; diff --git a/src/liblzma/simple/simple_private.h b/src/liblzma/simple/simple_private.h index a512396c..4e7a9db3 100644 --- a/src/liblzma/simple/simple_private.h +++ b/src/liblzma/simple/simple_private.h @@ -39,10 +39,6 @@ struct lzma_coder_s { /// is very small. bool is_encoder; - /// Size of the data *left* to be processed, or LZMA_VLI_VALUE_UNKNOWN - /// if unknown. - lzma_vli uncompressed_size; - /// Pointer to filter-specific function, which does /// the actual filtering. size_t (*filter)(lzma_simple *simple, uint32_t now_pos, diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c index 6f38caff..39ec35c1 100644 --- a/src/liblzma/subblock/subblock_decoder.c +++ b/src/liblzma/subblock/subblock_decoder.c @@ -53,9 +53,6 @@ struct lzma_coder_s { /// Number of bytes left in the current Subblock Data field. size_t size; - /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown. - lzma_vli uncompressed_size; - /// Number of consecutive Subblocks with Subblock Type Padding uint32_t padding; @@ -124,22 +121,6 @@ enum { }; -/// Substracts size from coder->uncompressed_size uncompressed size is known -/// and size isn't bigger than coder->uncompressed_size. -static inline bool -update_uncompressed_size(lzma_coder *coder, size_t size) -{ - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if ((lzma_vli)(size) > coder->uncompressed_size) - return true; - - coder->uncompressed_size -= size; - } - - return false; -} - - /// Calls the subfilter and updates coder->uncompressed_size. static lzma_ret subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, @@ -149,17 +130,11 @@ subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, { assert(coder->subfilter.code != NULL); - const size_t out_start = *out_pos; - // Call the subfilter. const lzma_ret ret = coder->subfilter.code( coder->subfilter.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); - // Update uncompressed_size. - if (update_uncompressed_size(coder, *out_pos - out_start)) - return LZMA_DATA_ERROR; - return ret; } @@ -174,9 +149,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, || coder->sequence >= SEQ_DATA)) switch (coder->sequence) { case SEQ_FLAGS: { - if ((in[*in_pos] >> 4) != FLAG_PADDING) - coder->padding = 0; - // Do the correct action depending on the Subblock Type. switch (in[*in_pos] >> 4) { case FLAG_PADDING: @@ -188,6 +160,10 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; case FLAG_EOPM: + // There must be no Padding before EOPM. + if (coder->padding != 0) + return LZMA_DATA_ERROR; + // Check that reserved bits are zero. if (in[*in_pos] & 0x0F) return LZMA_DATA_ERROR; @@ -196,11 +172,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, if (coder->subfilter.code != NULL) return LZMA_DATA_ERROR; - // End of Payload Marker must not be used if - // uncompressed size is known. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - return LZMA_DATA_ERROR; - ++*in_pos; return LZMA_STREAM_END; @@ -222,15 +193,16 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; case FLAG_SET_SUBFILTER: { - if ((in[*in_pos] & 0x0F) + if (coder->padding != 0 || (in[*in_pos] & 0x0F) || coder->subfilter.code != NULL || !coder->allow_subfilters) return LZMA_DATA_ERROR; assert(coder->filter_flags.options == NULL); - return_if_error(lzma_filter_flags_decoder_init( - &coder->filter_flags_decoder, - allocator, &coder->filter_flags)); + abort(); +// return_if_error(lzma_filter_flags_decoder_init( +// &coder->filter_flags_decoder, +// allocator, &coder->filter_flags)); coder->got_output_with_subfilter = false; @@ -240,7 +212,8 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } case FLAG_END_SUBFILTER: - if (coder->subfilter.code == NULL + if (coder->padding != 0 || (in[*in_pos] & 0x0F) + || coder->subfilter.code == NULL || !coder->got_output_with_subfilter) return LZMA_DATA_ERROR; @@ -276,9 +249,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, ++*in_pos; - if (coder->uncompressed_size == 0) - return LZMA_STREAM_END; - break; default: @@ -301,9 +271,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, // Initialize the Subfilter. Subblock and Copy filters are // not allowed. - if (coder->filter_flags.id == LZMA_FILTER_COPY - || coder->filter_flags.id - == LZMA_FILTER_SUBBLOCK) + if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK) return LZMA_DATA_ERROR; coder->helper.end_was_reached = false; @@ -327,8 +295,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, filters[1].id = LZMA_VLI_VALUE_UNKNOWN; return_if_error(lzma_raw_decoder_init( - &coder->subfilter, allocator, - filters, LZMA_VLI_VALUE_UNKNOWN, false)); + &coder->subfilter, allocator, filters)); coder->sequence = SEQ_FLAGS; break; @@ -385,7 +352,14 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, coder->repeat.count = coder->size; coder->repeat.size = (size_t)(in[*in_pos]) + 1; coder->repeat.pos = 0; + + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + if (coder->repeat.size <= coder->padding) + return LZMA_DATA_ERROR; + ++*in_pos; + coder->padding = 0; coder->sequence = SEQ_REPEAT_READ_DATA; break; @@ -415,6 +389,14 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } case SEQ_DATA: { + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + assert(coder->size > 0); + if (coder->size <= coder->padding) + return LZMA_DATA_ERROR; + + coder->padding = 0; + // Limit the amount of input to match the available // Subblock Data size. size_t in_limit; @@ -429,10 +411,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, out, out_pos, out_size); coder->size -= copy_size; - - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - } else { const size_t in_start = *in_pos; const lzma_ret ret = subfilter_decode( @@ -467,11 +445,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, if (coder->size > 0) return LZMA_OK; - // Check if we have decoded all the data. - if (coder->uncompressed_size == 0 - && coder->subfilter.code == NULL) - return LZMA_STREAM_END; - coder->sequence = SEQ_FLAGS; break; } @@ -487,16 +460,8 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, *out_pos += copy_size; coder->repeat.count -= copy_size; - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - - if (coder->repeat.count == 0) { - assert(coder->subfilter.code == NULL); - if (coder->uncompressed_size == 0) - return LZMA_STREAM_END; - } else { + if (coder->repeat.count != 0) return LZMA_OK; - } coder->sequence = SEQ_FLAGS; break; @@ -515,15 +480,10 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } if (coder->subfilter.code == NULL) { - const size_t copy_size = bufcpy( - coder->repeat.buffer, + bufcpy(coder->repeat.buffer, &coder->repeat.pos, coder->repeat.size, out, out_pos, out_size); - - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - } else { const lzma_ret ret = subfilter_decode( coder, allocator, @@ -553,11 +513,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } } while (*out_pos < out_size); - // Check if we have decoded all the data. - if (coder->uncompressed_size == 0 - && coder->subfilter.code == NULL) - return LZMA_STREAM_END; - break; default: @@ -664,7 +619,6 @@ lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->filter_flags.options = NULL; next->coder->sequence = SEQ_FLAGS; - next->coder->uncompressed_size = filters[0].uncompressed_size; next->coder->padding = 0; next->coder->next_finished = false; next->coder->this_finished = false; diff --git a/src/liblzma/subblock/subblock_decoder_helper.c b/src/liblzma/subblock/subblock_decoder_helper.c index 77d1f4bd..e8063e1e 100644 --- a/src/liblzma/subblock/subblock_decoder_helper.c +++ b/src/liblzma/subblock/subblock_decoder_helper.c @@ -62,14 +62,11 @@ lzma_subblock_decoder_helper_init(lzma_next_coder *next, // This is always the last filter in the chain. assert(filters[1].init == NULL); - // We never know uncompressed size. - assert(filters[0].uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); - if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) return LZMA_MEM_ERROR; - + next->code = &helper_decode; next->end = &helper_end; } diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c index a8aedbd7..01e8007a 100644 --- a/src/liblzma/subblock/subblock_encoder.c +++ b/src/liblzma/subblock/subblock_encoder.c @@ -51,7 +51,6 @@ do { \ struct lzma_coder_s { lzma_next_coder next; bool next_finished; - bool use_eopm; enum { SEQ_FILL, @@ -636,9 +635,10 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, coder->subfilter.mode_locked = false; coder->sequence = SEQ_FILL; - } else if (coder->use_eopm) { + } else { assert(action == LZMA_FINISH); + // Write EOPM. // NOTE: No need to use write_byte() here // since we are finishing. out[*out_pos] = 0x10; @@ -797,7 +797,7 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, return_if_error(lzma_raw_encoder_init( &coder->subfilter.subcoder, allocator, - options, LZMA_VLI_VALUE_UNKNOWN, false)); + options)); // Encode the Filter Flags field into a buffer. This should // never fail since we have already successfully initialized @@ -948,8 +948,6 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next_finished = false; next->coder->sequence = SEQ_FILL; next->coder->options = filters[0].options; - next->coder->use_eopm = filters[0].uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; next->coder->pos = 0; next->coder->alignment.in_pos = 0; diff --git a/src/lzma/args.c b/src/lzma/args.c index 4393a6bd..a4764032 100644 --- a/src/lzma/args.c +++ b/src/lzma/args.c @@ -52,8 +52,7 @@ static size_t filter_count = 0; enum { - OPT_COPY = INT_MIN, - OPT_SUBBLOCK, + OPT_SUBBLOCK = INT_MIN, OPT_X86, OPT_POWERPC, OPT_IA64, @@ -97,7 +96,6 @@ static const struct option long_opts[] = { { "compress", no_argument, NULL, 'z' }, // Filters - { "copy", no_argument, NULL, OPT_COPY }, { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, { "x86", no_argument, NULL, OPT_X86 }, { "bcj", no_argument, NULL, OPT_X86 }, @@ -267,10 +265,6 @@ parse_real(int argc, char **argv) // Filter setup - case OPT_COPY: - add_filter(LZMA_FILTER_COPY, NULL); - break; - case OPT_SUBBLOCK: add_filter(LZMA_FILTER_SUBBLOCK, optarg); break; @@ -314,8 +308,6 @@ parse_real(int argc, char **argv) static const char *types[] = { "auto", "native", - "single", - "multi", "alone", // "gzip", NULL @@ -471,18 +463,6 @@ set_compression_settings(void) my_exit(ERROR); } - // Optimize the filter chain a little by removing all - // Copy filters. - for (size_t i = 0; opt_filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { - while (opt_filters[i].id == LZMA_FILTER_COPY) { - size_t j = i; - do { - opt_filters[j] = opt_filters[j + 1]; - } while (opt_filters[++j].id - != LZMA_VLI_VALUE_UNKNOWN); - } - } - const uint32_t memory_limit = opt_memory / (1024 * 1024) + 1; uint32_t memory_usage = lzma_memory_usage(opt_filters, true); diff --git a/src/lzma/args.h b/src/lzma/args.h index 4f19a01e..c6098558 100644 --- a/src/lzma/args.h +++ b/src/lzma/args.h @@ -33,8 +33,6 @@ enum tool_mode { enum header_type { HEADER_AUTO, HEADER_NATIVE, - HEADER_SINGLE, - HEADER_MULTI, HEADER_ALONE, // HEADER_GZIP, }; diff --git a/src/lzma/error.c b/src/lzma/error.c index a83de27a..e5391068 100644 --- a/src/lzma/error.c +++ b/src/lzma/error.c @@ -55,6 +55,12 @@ str_strm_error(lzma_ret code) case LZMA_UNSUPPORTED_CHECK: return _("Unsupported integrity check type"); + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + default: return NULL; } diff --git a/src/lzma/process.c b/src/lzma/process.c index 56bcda9a..c180caf7 100644 --- a/src/lzma/process.c +++ b/src/lzma/process.c @@ -160,32 +160,16 @@ single_init(thread_data *t) lzma_ret ret; if (opt_mode == MODE_COMPRESS) { - const lzma_vli uncompressed_size - = t->pair->src_fd != STDIN_FILENO - ? (lzma_vli)(t->pair->src_st.st_size) - : LZMA_VLI_VALUE_UNKNOWN; - - // TODO Support Multi-Block Streams to store Extra. if (opt_header == HEADER_ALONE) { - lzma_options_alone alone; - alone.uncompressed_size = uncompressed_size; - memcpy(&alone.lzma, opt_filters[0].options, - sizeof(alone.lzma)); - ret = lzma_alone_encoder(&t->strm, &alone); + ret = lzma_alone_encoder(&t->strm, + opt_filters[0].options); } else { - lzma_options_stream stream = { - .check = opt_check, - .has_crc32 = opt_check != LZMA_CHECK_NONE, - .uncompressed_size = uncompressed_size, - .alignment = 0, - }; - memcpy(stream.filters, opt_filters, - sizeof(stream.filters)); - ret = lzma_stream_encoder_single(&t->strm, &stream); + ret = lzma_stream_encoder(&t->strm, + opt_filters, opt_check); } } else { // TODO Restrict file format if requested on the command line. - ret = lzma_auto_decoder(&t->strm, NULL, NULL); + ret = lzma_auto_decoder(&t->strm); } if (ret != LZMA_OK) { diff --git a/src/lzmadec/lzmadec.c b/src/lzmadec/lzmadec.c index a1383842..1fc561b7 100644 --- a/src/lzmadec/lzmadec.c +++ b/src/lzmadec/lzmadec.c @@ -284,9 +284,7 @@ parse_options(int argc, char **argv) case OPTION_FORMAT: { if (strcmp("auto", optarg) == 0) { format_type = FORMAT_AUTO; - } else if (strcmp("native", optarg) == 0 - || strcmp("single", optarg) == 0 - || strcmp("multi", optarg) == 0) { + } else if (strcmp("native", optarg) == 0) { format_type = FORMAT_NATIVE; } else if (strcmp("alone", optarg) == 0) { format_type = FORMAT_ALONE; @@ -315,11 +313,11 @@ init(void) switch (format_type) { case FORMAT_AUTO: - ret = lzma_auto_decoder(&strm, NULL, NULL); + ret = lzma_auto_decoder(&strm); break; case FORMAT_NATIVE: - ret = lzma_stream_decoder(&strm, NULL, NULL); + ret = lzma_stream_decoder(&strm); break; case FORMAT_ALONE: |