From 7d17818cec8597f847b0a2537fde991bbc3d9e96 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Wed, 18 Jun 2008 18:02:10 +0300 Subject: Update the code to mostly match the new simpler file format specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided. --- src/common/bswap.h | 44 ++ src/common/integer.h | 167 +++++ src/liblzma/api/Makefile.am | 5 +- src/liblzma/api/lzma.h | 9 +- src/liblzma/api/lzma/alone.h | 32 +- src/liblzma/api/lzma/auto.h | 7 +- src/liblzma/api/lzma/base.h | 15 + src/liblzma/api/lzma/block.h | 306 +++------- src/liblzma/api/lzma/check.h | 18 +- src/liblzma/api/lzma/copy.h | 29 - src/liblzma/api/lzma/easy.h | 61 +- src/liblzma/api/lzma/extra.h | 114 ---- src/liblzma/api/lzma/filter.h | 5 +- src/liblzma/api/lzma/index.h | 204 ++++++- src/liblzma/api/lzma/index_hash.h | 94 +++ src/liblzma/api/lzma/info.h | 315 ---------- src/liblzma/api/lzma/lzma.h | 2 +- src/liblzma/api/lzma/metadata.h | 100 --- src/liblzma/api/lzma/raw.h | 20 +- src/liblzma/api/lzma/stream.h | 157 +---- src/liblzma/api/lzma/stream_flags.h | 146 +++-- src/liblzma/api/lzma/version.h | 2 +- src/liblzma/api/lzma/vli.h | 83 ++- src/liblzma/check/Makefile.am | 1 - src/liblzma/check/check.c | 55 +- src/liblzma/check/check.h | 47 +- src/liblzma/check/check_byteswap.h | 43 -- src/liblzma/check/crc32_init.c | 2 +- src/liblzma/check/crc64_init.c | 2 +- src/liblzma/check/crc_macros.h | 2 +- src/liblzma/check/sha256.c | 53 +- src/liblzma/common/Makefile.am | 31 +- src/liblzma/common/alignment.c | 5 +- src/liblzma/common/alone_decoder.c | 77 +-- src/liblzma/common/alone_encoder.c | 99 ++- src/liblzma/common/auto_decoder.c | 18 +- src/liblzma/common/block_decoder.c | 298 +++------ src/liblzma/common/block_encoder.c | 228 ++----- src/liblzma/common/block_header_decoder.c | 400 +++--------- src/liblzma/common/block_header_encoder.c | 207 +++---- src/liblzma/common/block_private.h | 51 +- src/liblzma/common/block_util.c | 73 +++ src/liblzma/common/common.h | 44 +- src/liblzma/common/copy_coder.c | 144 ----- src/liblzma/common/copy_coder.h | 31 - src/liblzma/common/delta_common.c | 4 - src/liblzma/common/delta_common.h | 4 - src/liblzma/common/delta_decoder.c | 55 +- src/liblzma/common/delta_encoder.c | 7 +- src/liblzma/common/easy.c | 122 ++++ src/liblzma/common/easy_common.c | 54 -- src/liblzma/common/easy_common.h | 28 - src/liblzma/common/easy_multi.c | 103 ---- src/liblzma/common/easy_single.c | 37 -- src/liblzma/common/extra.c | 34 -- src/liblzma/common/features.c | 4 - src/liblzma/common/filter_flags_decoder.c | 384 ++++-------- src/liblzma/common/filter_flags_encoder.c | 120 +--- src/liblzma/common/index.c | 773 ++++++++++++++++++++--- src/liblzma/common/index.h | 67 ++ src/liblzma/common/index_decoder.c | 252 ++++++++ src/liblzma/common/index_encoder.c | 222 +++++++ src/liblzma/common/index_encoder.h | 30 + src/liblzma/common/index_hash.c | 340 +++++++++++ src/liblzma/common/info.c | 814 ------------------------- src/liblzma/common/memory_usage.c | 1 - src/liblzma/common/metadata_decoder.c | 578 ------------------ src/liblzma/common/metadata_decoder.h | 31 - src/liblzma/common/metadata_encoder.c | 435 ------------- src/liblzma/common/metadata_encoder.h | 30 - src/liblzma/common/raw_common.c | 178 ++---- src/liblzma/common/raw_common.h | 5 +- src/liblzma/common/raw_decoder.c | 19 +- src/liblzma/common/raw_decoder.h | 3 +- src/liblzma/common/raw_encoder.c | 101 +-- src/liblzma/common/raw_encoder.h | 3 +- src/liblzma/common/stream_common.h | 3 + src/liblzma/common/stream_decoder.c | 458 +++++--------- src/liblzma/common/stream_decoder.h | 28 + src/liblzma/common/stream_encoder.c | 282 +++++++++ src/liblzma/common/stream_encoder.h | 30 + src/liblzma/common/stream_encoder_multi.c | 445 -------------- src/liblzma/common/stream_encoder_multi.h | 26 - src/liblzma/common/stream_encoder_single.c | 219 ------- src/liblzma/common/stream_flags_decoder.c | 260 ++------ src/liblzma/common/stream_flags_encoder.c | 56 +- src/liblzma/common/stream_flags_equal.c | 36 ++ src/liblzma/common/vli_decoder.c | 68 ++- src/liblzma/common/vli_encoder.c | 59 +- src/liblzma/common/vli_reverse_decoder.c | 55 -- src/liblzma/lz/lz_decoder.c | 6 +- src/liblzma/lz/lz_decoder.h | 10 +- src/liblzma/lzma/lzma_decoder.c | 13 +- src/liblzma/lzma/lzma_decoder.h | 10 +- src/liblzma/simple/simple_coder.c | 29 +- src/liblzma/simple/simple_private.h | 4 - src/liblzma/subblock/subblock_decoder.c | 106 +--- src/liblzma/subblock/subblock_decoder_helper.c | 5 +- src/liblzma/subblock/subblock_encoder.c | 8 +- src/lzma/args.c | 22 +- src/lzma/args.h | 2 - src/lzma/error.c | 6 + src/lzma/process.c | 26 +- src/lzmadec/lzmadec.c | 8 +- 104 files changed, 4027 insertions(+), 6937 deletions(-) create mode 100644 src/common/bswap.h create mode 100644 src/common/integer.h delete mode 100644 src/liblzma/api/lzma/copy.h delete mode 100644 src/liblzma/api/lzma/extra.h create mode 100644 src/liblzma/api/lzma/index_hash.h delete mode 100644 src/liblzma/api/lzma/info.h delete mode 100644 src/liblzma/api/lzma/metadata.h delete mode 100644 src/liblzma/check/check_byteswap.h create mode 100644 src/liblzma/common/block_util.c delete mode 100644 src/liblzma/common/copy_coder.c delete mode 100644 src/liblzma/common/copy_coder.h create mode 100644 src/liblzma/common/easy.c delete mode 100644 src/liblzma/common/easy_common.c delete mode 100644 src/liblzma/common/easy_common.h delete mode 100644 src/liblzma/common/easy_multi.c delete mode 100644 src/liblzma/common/easy_single.c delete mode 100644 src/liblzma/common/extra.c create mode 100644 src/liblzma/common/index.h create mode 100644 src/liblzma/common/index_decoder.c create mode 100644 src/liblzma/common/index_encoder.c create mode 100644 src/liblzma/common/index_encoder.h create mode 100644 src/liblzma/common/index_hash.c delete mode 100644 src/liblzma/common/info.c delete mode 100644 src/liblzma/common/metadata_decoder.c delete mode 100644 src/liblzma/common/metadata_decoder.h delete mode 100644 src/liblzma/common/metadata_encoder.c delete mode 100644 src/liblzma/common/metadata_encoder.h create mode 100644 src/liblzma/common/stream_decoder.h create mode 100644 src/liblzma/common/stream_encoder.c create mode 100644 src/liblzma/common/stream_encoder.h delete mode 100644 src/liblzma/common/stream_encoder_multi.c delete mode 100644 src/liblzma/common/stream_encoder_multi.h delete mode 100644 src/liblzma/common/stream_encoder_single.c create mode 100644 src/liblzma/common/stream_flags_equal.c delete mode 100644 src/liblzma/common/vli_reverse_decoder.c (limited to 'src') diff --git a/src/common/bswap.h b/src/common/bswap.h new file mode 100644 index 00000000..8f82a8f4 --- /dev/null +++ b/src/common/bswap.h @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bswap.h +/// \brief Byte swapping +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BSWAP_H +#define LZMA_BSWAP_H + +// NOTE: We assume that config.h is already #included. + +// byteswap.h is a GNU extension. It contains inline assembly versions +// for byteswapping. When byteswap.h is not available, we use generic code. +#ifdef HAVE_BYTESWAP_H +# include +#else +# define bswap_16(num) \ + (((num) << 8) | ((num) >> 8)) + +# define bswap_32(num) \ + ( (((num) << 24) ) \ + | (((num) << 8) & UINT32_C(0x00FF0000)) \ + | (((num) >> 8) & UINT32_C(0x0000FF00)) \ + | (((num) >> 24) ) ) + +# define bswap_64(num) \ + ( (((num) << 56) ) \ + | (((num) << 40) & UINT64_C(0x00FF000000000000)) \ + | (((num) << 24) & UINT64_C(0x0000FF0000000000)) \ + | (((num) << 8) & UINT64_C(0x000000FF00000000)) \ + | (((num) >> 8) & UINT64_C(0x00000000FF000000)) \ + | (((num) >> 24) & UINT64_C(0x0000000000FF0000)) \ + | (((num) >> 40) & UINT64_C(0x000000000000FF00)) \ + | (((num) >> 56) ) ) +#endif + +#endif diff --git a/src/common/integer.h b/src/common/integer.h new file mode 100644 index 00000000..136a0f8d --- /dev/null +++ b/src/common/integer.h @@ -0,0 +1,167 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file integer.h +/// \brief Reading and writing integers from and to buffers +// +// This code has been put into the public domain. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INTEGER_H +#define LZMA_INTEGER_H + +// I'm aware of AC_CHECK_ALIGNED_ACCESS_REQUIRED from Autoconf archive, but +// it's not useful for us. We don't care if unaligned access is supported, +// we care if it is fast. Some systems can emulate unaligned access in +// software, which is horribly slow; we want to use byte-by-byte access on +// such systems but the Autoconf test would detect such a system as +// supporting unaligned access. +// +// NOTE: HAVE_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and +// 32-bit integer loads and stores. 64-bit integers may or may not work. +// That's why 64-bit functions are commented out. +#ifdef HAVE_FAST_UNALIGNED_ACCESS + +// On big endian, we need byte swapping. +// +// TODO: Big endian PowerPC supports byte swapping load and store instructions +// that also allow unaligned access. Inline assembler could be OK for that. +#ifdef WORDS_BIGENDIAN +# include "bswap.h" +# define integer_convert_16(n) bswap_16(n) +# define integer_convert_32(n) bswap_32(n) +# define integer_convert_64(n) bswap_64(n) +#else +# define integer_convert_16(n) (n) +# define integer_convert_32(n) (n) +# define integer_convert_64(n) (n) +#endif + + +static inline uint16_t +integer_read_16(const uint8_t buf[static 2]) +{ + uint16_t ret = *(const uint16_t *)(buf); + return integer_convert_16(ret); +} + + +static inline uint32_t +integer_read_32(const uint8_t buf[static 4]) +{ + uint32_t ret = *(const uint32_t *)(buf); + return integer_convert_32(ret); +} + + +/* +static inline uint64_t +integer_read_64(const uint8_t buf[static 8]) +{ + uint64_t ret = *(const uint64_t *)(buf); + return integer_convert_64(ret); +} +*/ + + +static inline void +integer_write_16(uint8_t buf[static 2], uint16_t num) +{ + *(uint16_t *)(buf) = integer_convert_16(num); +} + + +static inline void +integer_write_32(uint8_t buf[static 4], uint32_t num) +{ + *(uint32_t *)(buf) = integer_convert_32(num); +} + + +/* +static inline void +integer_write_64(uint8_t buf[static 8], uint64_t num) +{ + *(uint64_t *)(buf) = integer_convert_64(num); +} +*/ + + +#else + +static inline uint16_t +integer_read_16(const uint8_t buf[static 2]) +{ + uint16_t ret = buf[0] | (buf[1] << 8); + return ret; +} + + +static inline uint32_t +integer_read_32(const uint8_t buf[static 4]) +{ + uint32_t ret = buf[0]; + ret |= (uint32_t)(buf[1]) << 8; + ret |= (uint32_t)(buf[2]) << 16; + ret |= (uint32_t)(buf[3]) << 24; + return ret; +} + + +/* +static inline uint64_t +integer_read_64(const uint8_t buf[static 8]) +{ + uint64_t ret = buf[0]; + ret |= (uint64_t)(buf[1]) << 8; + ret |= (uint64_t)(buf[2]) << 16; + ret |= (uint64_t)(buf[3]) << 24; + ret |= (uint64_t)(buf[4]) << 32; + ret |= (uint64_t)(buf[5]) << 40; + ret |= (uint64_t)(buf[6]) << 48; + ret |= (uint64_t)(buf[7]) << 56; + return ret; +} +*/ + + +static inline void +integer_write_16(uint8_t buf[static 2], uint16_t num) +{ + buf[0] = (uint8_t)(num); + buf[1] = (uint8_t)(num >> 8); +} + + +static inline void +integer_write_32(uint8_t buf[static 4], uint32_t num) +{ + buf[0] = (uint8_t)(num); + buf[1] = (uint8_t)(num >> 8); + buf[2] = (uint8_t)(num >> 16); + buf[3] = (uint8_t)(num >> 24); +} + + +/* +static inline void +integer_write_64(uint8_t buf[static 8], uint64_t num) +{ + buf[0] = (uint8_t)(num); + buf[1] = (uint8_t)(num >> 8); + buf[2] = (uint8_t)(num >> 16); + buf[3] = (uint8_t)(num >> 24); + buf[4] = (uint8_t)(num >> 32); + buf[5] = (uint8_t)(num >> 40); + buf[6] = (uint8_t)(num >> 48); + buf[7] = (uint8_t)(num >> 56); +} +*/ + +#endif + +#endif diff --git a/src/liblzma/api/Makefile.am b/src/liblzma/api/Makefile.am index 83e47444..194f85db 100644 --- a/src/liblzma/api/Makefile.am +++ b/src/liblzma/api/Makefile.am @@ -20,17 +20,14 @@ nobase_include_HEADERS = \ lzma/base.h \ lzma/block.h \ lzma/check.h \ - lzma/copy.h \ lzma/delta.h \ lzma/easy.h \ - lzma/extra.h \ lzma/filter.h \ lzma/index.h \ - lzma/info.h \ + lzma/index_hash.h \ lzma/init.h \ lzma/lzma.h \ lzma/memlimit.h \ - lzma/metadata.h \ lzma/raw.h \ lzma/simple.h \ lzma/stream.h \ diff --git a/src/liblzma/api/lzma.h b/src/liblzma/api/lzma.h index fedcd25b..9dec904f 100644 --- a/src/liblzma/api/lzma.h +++ b/src/liblzma/api/lzma.h @@ -96,17 +96,13 @@ extern "C" { #include "lzma/check.h" /* Filters */ -#include "lzma/copy.h" #include "lzma/subblock.h" #include "lzma/simple.h" #include "lzma/delta.h" #include "lzma/lzma.h" -/* Container formats and Metadata */ +/* Container formats */ #include "lzma/block.h" -#include "lzma/index.h" -#include "lzma/extra.h" -#include "lzma/metadata.h" #include "lzma/stream.h" #include "lzma/alone.h" #include "lzma/raw.h" @@ -114,7 +110,8 @@ extern "C" { #include "lzma/easy.h" /* Advanced features */ -#include "lzma/info.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" #include "lzma/alignment.h" #include "lzma/stream_flags.h" #include "lzma/memlimit.h" diff --git a/src/liblzma/api/lzma/alone.h b/src/liblzma/api/lzma/alone.h index 1a6b8e27..72299773 100644 --- a/src/liblzma/api/lzma/alone.h +++ b/src/liblzma/api/lzma/alone.h @@ -21,36 +21,6 @@ #endif -/** - * \brief Options for files in the LZMA_Alone format - */ -typedef struct { - /** - * \brief Uncompressed Size and usage of End of Payload Marker - * - * In contrast to .lzma Blocks, LZMA_Alone format cannot have both - * uncompressed size field in the header and end of payload marker. - * If you don't know the uncompressed size beforehand, set it to - * LZMA_VLI_VALUE_UNKNOWN and liblzma will embed end of payload - * marker. - */ - lzma_vli uncompressed_size; - - /** - * \brief LZMA options - * - * The LZMA_Alone format supports only one filter: the LZMA filter. - * - * \note There exists also an undocumented variant of the - * LZMA_Alone format, which uses the x86 filter in - * addition to LZMA. This format was never supported - * by LZMA Utils and is not supported by liblzma either. - */ - lzma_options_lzma lzma; - -} lzma_options_alone; - - /** * \brief Initializes LZMA_Alone encoder * @@ -68,7 +38,7 @@ typedef struct { * - LZMA_PROG_ERROR */ extern lzma_ret lzma_alone_encoder( - lzma_stream *strm, const lzma_options_alone *options); + lzma_stream *strm, const lzma_options_lzma *options); /** diff --git a/src/liblzma/api/lzma/auto.h b/src/liblzma/api/lzma/auto.h index 327e726f..fd5bf7d2 100644 --- a/src/liblzma/api/lzma/auto.h +++ b/src/liblzma/api/lzma/auto.h @@ -29,13 +29,8 @@ * the type of the file has been detected. * * \param strm Pointer to propertily prepared lzma_stream - * \param header Pointer to hold a pointer to Extra Records read - * from the Header Metadata Block. Use NULL if - * you don't care about Extra Records. - * \param footer Same as header, but for Footer Metadata Block. * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. */ -extern lzma_ret lzma_auto_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer); +extern lzma_ret lzma_auto_decoder(lzma_stream *strm); diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h index d39bfe95..b0dfed95 100644 --- a/src/liblzma/api/lzma/base.h +++ b/src/liblzma/api/lzma/base.h @@ -128,6 +128,21 @@ typedef enum { * In the decoder, this is only a warning and decoding can * still proceed normally (but the Check is ignored). */ + + LZMA_FORMAT_ERROR = -8, + /**< + * \brief Unknown file format + */ + + LZMA_MEMLIMIT_ERROR = -9 + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased. See functions + * lzma_memlimit_get() and lzma_memlimit_set(). + */ } lzma_ret; diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h index 210c1d87..a8941165 100644 --- a/src/liblzma/api/lzma/block.h +++ b/src/liblzma/api/lzma/block.h @@ -33,95 +33,31 @@ */ typedef struct { /** - * \brief Type of integrity Check - * - * The type of the integrity Check is not stored into the Block - * Header, thus its value must be provided also when decoding. - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_check_type check; - - /** - * \brief Precense of CRC32 of the Block Header - * - * Set this to true if CRC32 of the Block Header should be - * calculated and stored in the Block Header. - * - * There is no way to autodetect if CRC32 is present in the Block - * Header, thus this information must be provided also when decoding. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - * - lzma_block_header_decoder() - */ - lzma_bool has_crc32; - - /** - * \brief Usage of End of Payload Marker - * - * If this is true, End of Payload Marker is used even if - * Uncompressed Size is known. + * \brief Size of the Block Header * * Read by: - * - lzma_block_header_encoder() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder() - */ - lzma_bool has_eopm; - - /** - * \brief True if the Block is a Metadata Block - * - * If this is true, the Metadata bit will be set in the Block Header. - * It is up to the application to store correctly formatted data - * into Metadata Block. - * - * Read by: - * - lzma_block_header_encoder() - * - * Written by: - * - lzma_block_header_decoder() - */ - lzma_bool is_metadata; - - /** - * \brief True if Uncompressed Size is in Block Footer - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_bool has_uncompressed_size_in_footer; - - /** - * \brief True if Backward Size is in Block Footer - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() + * - lzma_block_header_size() + * - lzma_block_header_decode() */ - lzma_bool has_backward_size; + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 /** - * \brief True if Block coder should take care of Padding + * \brief Type of integrity Check * - * In liblzma, Stream decoder sets this to true when decoding - * Header Metadata Block or Data Blocks from Multi-Block Stream, - * and to false when decoding Single-Block Stream or Footer - * Metadata Block from a Multi-Block Stream. + * The type of the integrity Check is not stored into the Block + * Header, thus its value must be provided also when decoding. * * Read by: * - lzma_block_encoder() * - lzma_block_decoder() */ - lzma_bool handle_padding; + lzma_check_type check; /** * \brief Size of the Compressed Data in bytes @@ -134,12 +70,12 @@ typedef struct { * * Read by: * - lzma_block_header_size() - * - lzma_block_header_encoder() + * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder() + * - lzma_block_header_decode() * - lzma_block_encoder() * - lzma_block_decoder() */ @@ -163,167 +99,61 @@ typedef struct { * * Read by: * - lzma_block_header_size() - * - lzma_block_header_encoder() + * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder() + * - lzma_block_header_decode() * - lzma_block_encoder() * - lzma_block_decoder() */ lzma_vli uncompressed_size; - /** - * \brief Number of bytes to reserve for Compressed Size - * - * This is useful if you want to be able to store the Compressed Size - * to the Block Header, but you don't know it when starting to encode. - * Setting this to non-zero value at maximum of LZMA_VLI_BYTES_MAX, - * the Block Header encoder will force the Compressed Size field to - * occupy specified number of bytes. You can later rewrite the Block - * Header to contain correct information by using otherwise identical - * lzma_options_block structure except the correct compressed_size. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - * - * Written by: - * - lzma_block_header_decoder() - */ - uint32_t compressed_reserve; - - /** - * \brief Number of bytes to reserve for Uncompressed Size - * - * See the description of compressed_size above. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - * - * Written by: - * - lzma_block_header_decoder() - */ - uint32_t uncompressed_reserve; - - /** - * \brief Total Size of the Block in bytes - * - * This is useful in the decoder, which can verify the Total Size - * if it is known from Index. - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - * - * Written by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_vli total_size; - - /** - * \brief Upper limit of Total Size - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_vli total_limit; - - /** - * \brief Upper limit of Uncompressed Size - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - */ - lzma_vli uncompressed_limit; - /** * \brief Array of filters * - * There can be at maximum of seven filters. The end of the array - * is marked with .id = LZMA_VLI_VALUE_UNKNOWN. Minimum number of - * filters is zero; in that case, an implicit Copy filter is used. + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_VALUE_UNKNOWN. * * Read by: * - lzma_block_header_size() - * - lzma_block_header_encoder() + * - lzma_block_header_encode() * - lzma_block_encoder() * - lzma_block_decoder() * * Written by: - * - lzma_block_header_decoder(): Note that this does NOT free() - * the old filter options structures. If decoding fails, the - * caller must take care of freeing the options structures - * that may have been allocated and decoded before the error - * occurred. - */ - lzma_options_filter filters[8]; - - /** - * \brief Size of the Padding field - * - * The Padding field exist to allow aligning the Compressed Data field - * optimally in the Block. See lzma_options_stream.alignment in - * stream.h for more information. - * - * If you want the Block Header encoder to automatically calculate - * optimal size for the Padding field by looking at the information - * in filters[], set this to LZMA_BLOCK_HEADER_PADDING_AUTO. In that - * case, you must also set the aligmnet variable to tell the the - * encoder the aligmnet of the beginning of the Block Header. - * - * The decoder never sets this to LZMA_BLOCK_HEADER_PADDING_AUTO. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder(): Note that this doesn't - * accept LZMA_BLOCK_HEADER_PADDING_AUTO. - * - * Written by (these never set padding to - * LZMA_BLOCK_HEADER_PADDING_AUTO): - * - lzma_block_header_size() - * - lzma_block_header_decoder() + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_VALUE_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_VALUE_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_VALUE_UNKNOWN, the actual array must + * have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. */ - int32_t padding; -# define LZMA_BLOCK_HEADER_PADDING_AUTO (-1) -# define LZMA_BLOCK_HEADER_PADDING_MIN 0 -# define LZMA_BLOCK_HEADER_PADDING_MAX 31 + lzma_options_filter *filters; +# define LZMA_BLOCK_FILTERS_MAX 4 - /** - * \brief Alignment of the beginning of the Block Header - * - * This variable is read only if padding has been set to - * LZMA_BLOCK_HEADER_PADDING_AUTO. - * - * Read by: - * - lzma_block_header_size() - * - lzma_block_header_encoder() - */ - uint32_t alignment; +} lzma_options_block; - /** - * \brief Size of the Block Header - * - * Read by: - * - lzma_block_encoder() - * - lzma_block_decoder() - * - * Written by: - * - lzma_block_header_size() - * - lzma_block_header_decoder() - */ - uint32_t header_size; -} lzma_options_block; +/** + * \brief Decodes the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_options_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) /** - * \brief Calculates the size of Header Padding and Block Header + * \brief Calculates the size of Block Header * * \return - LZMA_OK: Size calculated successfully and stored to * options->header_size. @@ -353,24 +183,62 @@ extern lzma_ret lzma_block_header_size(lzma_options_block *options); * - LZMA_PROG_ERROR */ extern lzma_ret lzma_block_header_encode( - uint8_t *out, const lzma_options_block *options); + const lzma_options_block *options, uint8_t *out); /** - * \brief Initializes Block Header decoder + * \brief Decodes Block Header * - * Because the results of this decoder are placed into *options, - * strm->next_in, strm->avail_in, and strm->total_in are not used. + * Decoding of the Block options is done with a single call instead of + * first initializing and then doing the actual work with lzma_code(). * - * The only valid `action' with lzma_code() is LZMA_RUN. + * \param options Destination for block options + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc(). + * \param in Beginning of the input buffer. This must be + * at least options->header_size bytes. * - * \return - LZMA_OK: Encoding was successful. options->header_size + * \return - LZMA_OK: Decoding was successful. options->header_size * bytes were written to output buffer. * - LZMA_HEADER_ERROR: Invalid or unsupported options. * - LZMA_PROG_ERROR */ -extern lzma_ret lzma_block_header_decoder( - lzma_stream *strm, lzma_options_block *options); +extern lzma_ret lzma_block_header_decode(lzma_options_block *options, + lzma_allocator *allocator, const uint8_t *in); + + +/** + * \brief Sets Compressed Size according to Total Size + * + * Block Header stores Compressed Size, but Index has Total Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Total Size. This function does + * exactly that with error checking, so application doesn't need to check, + * for example, if the value in Index is too small to contain even the + * Block Header. Note that you need to call this function after decoding + * the Block Header field. + * + * \return - LZMA_OK: options->compressed_size was set successfully. + * - LZMA_DATA_ERROR: total_size is too small compared to + * options->header_size and lzma_check_sizes[options->check]. + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * total_size and options->header_size must be multiples + * of four, total_size must be at least 12, and + * options->header_size between 8 and 1024 inclusive. + */ +extern lzma_ret lzma_block_total_size_set( + lzma_options_block *options, lzma_vli total_size); + + +/** + * \brief Calculates Total Size + * + * This function can be useful after decoding a Block to get Total Size + * that is stored in Index. + * + * \return Total Size on success, or zero on error. + */ +extern lzma_vli lzma_block_total_size_get(const lzma_options_block *options); /** diff --git a/src/liblzma/api/lzma/check.h b/src/liblzma/api/lzma/check.h index 4a2a453b..dcba8269 100644 --- a/src/liblzma/api/lzma/check.h +++ b/src/liblzma/api/lzma/check.h @@ -43,14 +43,14 @@ typedef enum { * Size of the Check field: 4 bytes */ - LZMA_CHECK_CRC64 = 3, + LZMA_CHECK_CRC64 = 4, /**< * CRC64 using the polynomial from the ECMA-182 standard * * Size of the Check field: 8 bytes */ - LZMA_CHECK_SHA256 = 5 + LZMA_CHECK_SHA256 = 10 /**< * SHA-256 * @@ -62,7 +62,7 @@ typedef enum { /** * \brief Maximum valid Check ID * - * The .lzma file format specification specifies eight Check IDs (0-7). Some + * The .lzma file format specification specifies eight Check IDs (0-15). Some * of them are only reserved i.e. no actual Check algorithm has been assigned. * Still liblzma accepts any of these eight IDs for future compatibility * when decoding files. If a valid but unsupported Check ID is detected, @@ -70,7 +70,7 @@ typedef enum { * * FIXME bad desc */ -#define LZMA_CHECK_ID_MAX 7 +#define LZMA_CHECK_ID_MAX 15 /** @@ -89,12 +89,18 @@ extern const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1]; * Although not all Check IDs have a check algorithm associated, the size of * every Check is already frozen. This array contains the size (in bytes) of * the Check field with specified Check ID. The values are taken from the - * section 2.2.2 of the .lzma file format specification: - * { 0, 4, 4, 8, 16, 32, 32, 64 } + * section 2.1.1.2 of the .lzma file format specification: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } */ extern const uint32_t lzma_check_sizes[LZMA_CHECK_ID_MAX + 1]; +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + /** * \brief Calculate CRC32 * diff --git a/src/liblzma/api/lzma/copy.h b/src/liblzma/api/lzma/copy.h deleted file mode 100644 index f5617462..00000000 --- a/src/liblzma/api/lzma/copy.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * \file lzma/copy.h - * \brief Copy filter - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use instead. -#endif - - -/** - * \brief Filter ID - * - * Filter ID of the Copy filter. This is used as lzma_options_filter.id. - */ -#define LZMA_FILTER_COPY LZMA_VLI_C(0x00) diff --git a/src/liblzma/api/lzma/easy.h b/src/liblzma/api/lzma/easy.h index b04c7b4f..d83a79a2 100644 --- a/src/liblzma/api/lzma/easy.h +++ b/src/liblzma/api/lzma/easy.h @@ -69,7 +69,7 @@ typedef enum { /** - * \brief Default compression level for Data Blocks + * \brief Default compression level * * Data Blocks contain the actual compressed data. It's not straightforward * to recommend a default level, because in some cases keeping the resource @@ -79,16 +79,6 @@ typedef enum { #define LZMA_EASY_DEFAULT LZMA_EASY_LZMA_7 -/** - * \brief Default compression level for Metadata Blocks - * - * Metadata Blocks are present only in Multi-Block Streams. Metadata Blocks - * contain the Extra Records (if any) and some book-keeping data that is - * used by decoders. - */ -#define LZMA_EASY_METADATA_DEFAULT LZMA_EASY_LZMA_3 - - /** * \brief Calculates rough memory requirements of a compression level * @@ -104,11 +94,11 @@ extern uint32_t lzma_easy_memory_usage(lzma_easy_level level); /** - * \brief Initializes Single-Block .lzma Stream encoder + * \brief Initializes .lzma Stream encoder * * This function is intended for those who just want to use the basic LZMA * features (that is, most developers out there). Lots of assumptions are - * made, which are correct for most situations or at least good enough. + * made, which are correct or at least good enough for most situations. * * \param strm Pointer to lzma_stream that is at least initialized * with LZMA_STREAM_INIT. @@ -125,50 +115,7 @@ extern uint32_t lzma_easy_memory_usage(lzma_easy_level level); * * If initialization succeeds, use lzma_code() to do the actual encoding. * Valid values for `action' (the second argument of lzma_code()) are - * LZMA_RUN, LZMA_SYNC_FLUSH, and LZMA_FINISH. In future, there may be - * compression levels that don't support LZMA_SYNC_FLUSH. - */ -extern lzma_ret lzma_easy_encoder_single( - lzma_stream *strm, lzma_easy_level level); - - -/** - * \brief Initializes Multi-Block .lzma Stream encoder - * - * If you want to be able to store Extra Records or want to be able to use - * LZMA_FULL_FLUSH, you need to create a Multi-Block Stream. - * - * \param strm Pointer to lzma_stream that is at least initialized - * with LZMA_STREAM_INIT. - * \param level Compression level to use for the data being encoded. - * \param metadata_level - * Compression level to use for Metadata Blocks. - * Metadata Blocks contain the Extra Records (if any) - * and some book-keeping data that is used by decoders. - * \param header Pointer to a list of Extra Records to be stored to - * the Header Metadata Block. Set this to NULL to omit - * Header Metadata Block completely. The list must be - * kept available until the encoding has finished. - * \param footer Pointer to a list of Extra Records to be stored to - * the Footer Metadata Block. Set this to NULL if you - * don't want to store any Extra Records (the Footer - * Metadata Block will still be written for other - * reasons.) The list must be kept available until - * the encoding has finished. - * - * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to - * encode your data. - * - LZMA_MEM_ERROR: Memory allocation failed. All memory - * previously allocated for *strm is now freed. - * - LZMA_HEADER_ERROR: The given compression level is not - * supported by this build of liblzma. - * - * If initialization succeeds, use lzma_code() to do the actual encoding. - * Valid values for `action' (the second argument of lzma_code()) are * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, * there may be compression levels that don't support LZMA_SYNC_FLUSH. - * LZMA_FULL_FLUSH will always work with all compression levels. */ -extern lzma_ret lzma_easy_encoder_multi(lzma_stream *strm, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer); +extern lzma_ret lzma_easy_encoder(lzma_stream *strm, lzma_easy_level level); diff --git a/src/liblzma/api/lzma/extra.h b/src/liblzma/api/lzma/extra.h deleted file mode 100644 index 29426a74..00000000 --- a/src/liblzma/api/lzma/extra.h +++ /dev/null @@ -1,114 +0,0 @@ -/** - * \file lzma/extra.h - * \brief Handling of Extra Records in Metadata - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use instead. -#endif - - -/* - * Extra Record IDs - * - * See the .lzma file format specification for description what each - * Extra Record type exactly means. - * - * If you ever need to update .lzma files with Extra Records, note that - * the Record IDs are divided in two categories: - * - Safe-to-Copy Records may be preserved as is when the - * Stream is modified in ways that don't change the actual - * uncompressed data. Examples of such operatings include - * recompressing and adding, modifying, or deleting unrelated - * Extra Records. - * - Unsafe-to-Copy Records should be removed (and possibly - * recreated) when any kind of changes are made to the Stream. - */ - -#define LZMA_EXTRA_PADDING 0x00 -#define LZMA_EXTRA_OPENPGP 0x01 -#define LZMA_EXTRA_FILTERS 0x02 -#define LZMA_EXTRA_COMMENT 0x03 -#define LZMA_EXTRA_CHECKS 0x04 -#define LZMA_EXTRA_FILENAME 0x05 -#define LZMA_EXTRA_MTIME 0x07 -#define LZMA_EXTRA_MTIME_HR 0x09 -#define LZMA_EXTRA_MIME_TYPE 0x0B -#define LZMA_EXTRA_HOMEPAGE 0x0D - - -/** - * \brief Extra Records - * - * The .lzma format provides a way to store custom information along - * the actual compressed content. Information about these Records - * are passed to and from liblzma via this linked list. - */ -typedef struct lzma_extra_s lzma_extra; -struct lzma_extra_s { - /** - * \brief Pointer to the next Extra Record - * - * This is NULL on the last Extra Record. - */ - lzma_extra *next; - - /** - * \brief Record ID - * - * Extra Record IDs are divided in three categories: - * - Zero is a special case used for padding. It doesn't have - * Size of Data fields. - * - Odd IDs (1, 3, 5, ...) are Safe-to-Copy IDs. - * These can be preserved as is if the Stream is - * modified in a way that doesn't alter the actual - * uncompressed content. - * - Even IDs (2, 4, 6, ...) are Unsafe-to-Copy IDs. - * If the .lzma Stream is modified in any way, - * the Extra Records having a sensitive ID should - * be removed or updated accordingly. - * - * Refer to the .lzma file format specification for - * the up to date list of Extra Record IDs. - */ - lzma_vli id; - - /** - * \brief Size of the Record data - * - * In case of strings, this should not include the - * trailing '\0'. - */ - size_t size; - - /** - * \brief Record data - * - * Record data is often a string in UTF-8 encoding, - * but it can be arbitrary binary data. In case of - * strings, the trailing '\0' is usually not stored - * in the .lzma file. - * - * To ease working with Extra Records containing strings, - * liblzma always adds '\0' to the end of data even when - * it wasn't present in the .lzma file. This '\0' is not - * counted in the size of the data. - */ - uint8_t *data; -}; - - -extern void lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator); diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h index a8bdd4bd..412c30e5 100644 --- a/src/liblzma/api/lzma/filter.h +++ b/src/liblzma/api/lzma/filter.h @@ -162,5 +162,6 @@ extern lzma_ret lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, * - LZMA_MEM_ERROR * - LZMA_PROG_ERROR */ -extern lzma_ret lzma_filter_flags_decoder( - lzma_stream *strm, lzma_options_filter *options); +extern lzma_ret lzma_filter_flags_decode( + lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size); diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h index 7e59c4b3..13cddf47 100644 --- a/src/liblzma/api/lzma/index.h +++ b/src/liblzma/api/lzma/index.h @@ -22,63 +22,211 @@ /** - * \brief - * - * FIXME desc + * \brief Opaque data type to hold the Index */ typedef struct lzma_index_s lzma_index; -struct lzma_index_s { + + +/** + * \brief Index Record and its location + */ +typedef struct { /** - * \brief Total Size of the Block - * - * This includes Block Header, Compressed Data, and Block Footer. + * Total Size of a Block. */ lzma_vli total_size; /** - * \brief Uncompressed Size of the Block + * Uncompressed Size of a Block */ lzma_vli uncompressed_size; /** - * \brief Pointer to the next Index Record - * - * This is NULL on the last Index Record. + * Offset of the first byte of a Block relative to the beginning + * of the Stream, or if there are multiple Indexes combined, + * relative to the beginning of the first Stream. */ - lzma_index *next; -}; + lzma_vli stream_offset; + + /** + * Uncompressed offset + */ + lzma_vli uncompressed_offset; + +} lzma_index_record; /** - * \brief Duplicates an Index list + * \brief Allocate and initialize a new lzma_index structure + * + * If i is NULL, a new lzma_index structure is allocated, initialized, + * and a pointer to it returned. If allocation fails, NULL is returned. * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. + * If i is non-NULL, it is reinitialized and the same pointer returned. + * In this case, return value cannot be NULL or a different pointer than + * the i given as argument. */ -extern lzma_index *lzma_index_dup( - const lzma_index *index, lzma_allocator *allocator); +extern lzma_index *lzma_index_init(lzma_index *i, lzma_allocator *allocator); + + +/** + * \brief Deallocate the Index + */ +extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); /** - * \brief Frees an Index list + * \brief Add a new Record to an Index * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. + * \param index Pointer to a lzma_index structure + * \param total_size Total Size of a Block + * \param uncompressed_size Uncompressed Size of a Block, or + * LZMA_VLI_VALUE_UNKNOWN to indicate padding. + * + * Appending a new Record does not affect the read position. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR */ -extern void lzma_index_free(lzma_index *index, lzma_allocator *allocator); +extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size); /** - * \brief Calculates information about the Index + * \brief Get the number of Records + */ +extern lzma_vli lzma_index_count(const lzma_index *i); + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Index Size field from the Stream Footer. + */ +extern lzma_vli lzma_index_size(const lzma_index *i); + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern lzma_vli lzma_index_total_size(const lzma_index *i); + + +/** + * \brief Get the total size of the Stream + * + * If multiple Indexes have been combined, this works as if the Blocks + * were in a single Stream. + */ +extern lzma_vli lzma_index_stream_size(const lzma_index *i); + + +/** + * \brief Get the total size of the file * - * \return LZMA_OK on success, LZMA_PROG_ERROR on error. FIXME + * When no Indexes have been combined with lzma_index_cat(), this function is + * identical to lzma_index_stream_size(). If multiple Indexes have been + * combined, this includes also the possible Stream Padding fields. + */ +extern lzma_vli lzma_index_file_size(const lzma_index *i); + + +/** + * \brief Get the uncompressed size of the Stream */ -extern lzma_ret lzma_index_count(const lzma_index *index, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size); +extern lzma_vli lzma_index_uncompressed_size(const lzma_index *i); + + +/** + * \brief Get the next Record from the Index + */ +extern lzma_bool lzma_index_read(lzma_index *i, lzma_index_record *record); + + +/** + * \brief Rewind the Index + * + * Rewind the Index so that next call to lzma_index_read() will return the + * first Record. + */ +extern void lzma_index_rewind(lzma_index *i); + + +/** + * \brief Locate a Record + * + * When the Index is available, it is possible to do random-access reading + * with granularity of Block size. + * + * \param i Pointer to lzma_index structure + * \param record Pointer to a structure to hold the search results + * \param target Uncompressed target offset + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Record containing the requested uncompressed + * offset is stored into *record. + * - Read offset will be adjusted so that calling lzma_index_read() can be + * used to read subsequent Records. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *record + * and the read position are not modified, and this function returns true. + */ +extern lzma_bool lzma_index_locate( + lzma_index *i, lzma_index_record *record, lzma_vli target); + + +/** + * \brief Concatenate Indexes of two Streams + * + * + * + * \param dest Destination Index after which src is appended Source + * \param src Index. The memory allocated for this is either moved + * to be part of *dest or freed iff the function call + * succeeds, and src will be an invalid pointer. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * \param padding Size of the Stream Padding field between Streams. + * + * \return - LZMA_OK: Indexes concatenated successfully. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern lzma_ret lzma_index_cat(lzma_index *lzma_restrict dest, + lzma_index *lzma_restrict src, + lzma_allocator *allocator, lzma_vli padding); + + +/** + * \brief Duplicates an Index list + * + * \return A copy of the Index, or NULL if memory allocation failed. + */ +extern lzma_index *lzma_index_dup( + const lzma_index *i, lzma_allocator *allocator); /** * \brief Compares if two Index lists are identical */ -extern lzma_bool lzma_index_is_equal(const lzma_index *a, const lzma_index *b); +extern lzma_bool lzma_index_equal(const lzma_index *a, const lzma_index *b); + + +/** + * \brief Initializes Index encoder + */ +extern lzma_ret lzma_index_encoder(lzma_stream *strm, lzma_index *i); + + +/** + * \brief Initializes Index decoder + */ +extern lzma_ret lzma_index_decoder(lzma_stream *strm, lzma_index **i); diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h new file mode 100644 index 00000000..1edbbeaa --- /dev/null +++ b/src/liblzma/api/lzma/index_hash.h @@ -0,0 +1,94 @@ +/** + * \file lzma/index_hash.h + * \brief Validates Index by using a hash function + * + * Instead of constructing complete Index while decoding Blocks, Index hash + * calculates a hash of the Block sizes and Index, and then compares the + * hashes. This way memory usage is constant even with large number of + * Blocks and huge Index. + * + * \author Copyright (C) 2008 Lasse Collin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash given as argument. + */ +extern lzma_index_hash *lzma_index_hash_init( + lzma_index_hash *index_hash, lzma_allocator *allocator); + + +/** + * \brief Deallocate the Index hash + */ +extern void lzma_index_hash_end( + lzma_index_hash *index_hash, lzma_allocator *allocator); + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param total_size Total Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli total_size, lzma_vli uncompressed_size); + + +/** + * \brief Decode the Index field + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_PROG_ERROR + * + * \note Once decoding of the Index field has been started, no more + * Records can be added using lzma_index_hash_append(). + */ +extern lzma_ret lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size); + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Index Size field from the Stream Footer. + */ +extern lzma_vli lzma_index_hash_size(const lzma_index_hash *index_hash); diff --git a/src/liblzma/api/lzma/info.h b/src/liblzma/api/lzma/info.h deleted file mode 100644 index 3a91850f..00000000 --- a/src/liblzma/api/lzma/info.h +++ /dev/null @@ -1,315 +0,0 @@ -/** - * \file lzma/info.h - * \brief Handling of Stream size information - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use instead. -#endif - - -/********** - * Basics * - **********/ - -/** - * \brief Opaque data type to hold the size information - */ -typedef struct lzma_info_s lzma_info; - - -typedef struct { - /** - * \brief Total Size of this Block - * - * This can be LZMA_VLI_VALUE_UNKNOWN. - */ - lzma_vli total_size; - - /** - * \brief Uncompressed Size of this Block - * - * This can be LZMA_VLI_VALUE_UNKNOWN. - */ - lzma_vli uncompressed_size; - - /** - * \brief Offset of the first byte of the Block - * - * In encoder, this is useful to find out the alignment of the Block. - * - * In decoder, this is useful when doing random-access reading - * with help from lzma_info_data_locate(). - */ - lzma_vli stream_offset; - - /** - * \brief Uncompressed offset of the Block - * - * Offset of the first uncompressed byte of the Block relative to - * all uncompressed data in the Block. - * FIXME desc - */ - lzma_vli uncompressed_offset; - - /** - * \brief Pointers to internal data structures - * - * Applications must not touch these. - */ - void *internal[4]; - -} lzma_info_iter; - - -typedef enum { - LZMA_INFO_STREAM_START, - LZMA_INFO_HEADER_METADATA, - LZMA_INFO_TOTAL, - LZMA_INFO_UNCOMPRESSED, - LZMA_INFO_FOOTER_METADATA -} lzma_info_size; - - -/** - * \brief Allocates and initializes a new lzma_info structure - * - * If info is NULL, a new lzma_info structure is allocated, initialized, and - * a pointer to it returned. If allocation fails, NULL is returned. - * - * If info is non-NULL, it is reinitialized and the same pointer returned. - * (In this case, return value cannot be NULL or a different pointer than - * the info given as argument.) - */ -extern lzma_info *lzma_info_init(lzma_info *info, lzma_allocator *allocator); - - -/** - * \brief Resets lzma_info - * - * This is like calling lzma_info_end() and lzma_info_create(), but - * re-uses the existing base structure. - */ -extern void lzma_info_reset( - lzma_info *info, lzma_allocator *allocator); - - -/** - * \brief Frees memory allocated for a lzma_info structure - */ -extern void lzma_info_free(lzma_info *info, lzma_allocator *allocator); - - -/************************ - * Setting known values * - ************************/ - -/** - * \brief Set a known size value - * - * \param info Pointer returned by lzma_info_create() - * \param type Any value from lzma_info_size - * \param size Value to set or verify - * - * \return LZMA_OK on success, LZMA_DATA_ERROR if the size doesn't - * match the existing information, or LZMA_PROG_ERROR - * if type is invalid or size is not a valid VLI. - */ -extern lzma_ret lzma_info_size_set( - lzma_info *info, lzma_info_size type, lzma_vli size); - - -/** - * \brief Sets the Index - * - * The given lzma_index list is "absorbed" by this function. The application - * must not access it after this function call, even if this function returns - * an error. - * - * \note The given lzma_index will at some point get freed by the - * lzma_info_* functions. If you use a custom lzma_allocator, - * make sure that it can free the lzma_index. - */ -extern lzma_ret lzma_info_index_set( - lzma_info *info, lzma_allocator *allocator, - lzma_index *index, lzma_bool eat_index); - - -/** - * \brief Sets information from a known Metadata Block - * - * This is a shortcut for calling lzma_info_size_set() with different type - * arguments, lzma_info_index_set() with metadata->index. - */ -extern lzma_ret lzma_info_metadata_set(lzma_info *info, - lzma_allocator *allocator, lzma_metadata *metadata, - lzma_bool is_header_metadata, lzma_bool eat_index); - - -/*************** - * Incremental * - ***************/ - -/** - * \brief Prepares an iterator to be used with given lzma_info structure - * - * - */ -extern void lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter); - - -/** - * \brief Moves to the next Index Record - * - * - */ -extern lzma_ret lzma_info_iter_next( - lzma_info_iter *iter, lzma_allocator *allocator); - - -/** - * \brief Sets or verifies the sizes in the Index Record - * - * \param iter Pointer to iterator to be set or verified - * \param total_size - * Total Size in bytes or LZMA_VLI_VALUE_UNKNOWN - * \param uncompressed_size - * Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN - * - * \return - LZMA_OK: All OK. - * - LZMA_DATA_ERROR: Given sizes don't match with the already - * known sizes. - * - LZMA_PROG_ERROR: Internal error, possibly integer - * overflow (e.g. the sum of all the known sizes is too big) - */ -extern lzma_ret lzma_info_iter_set(lzma_info_iter *iter, - lzma_vli total_size, lzma_vli uncompressed_size); - - -/** - * \brief Locates a Data Block - * - * \param iter Properly initialized iterator - * \param allocator Pointer to lzma_allocator or NULL - * \param uncompressed_offset - * Target offset to locate. The final offset - * will be equal or smaller than this. - * \param allow_alloc True if this function is allowed to call - * lzma_info_iter_next() to allocate a new Record - * if the requested offset reached end of Index - * Record list. Note that if Index has been marked - * final, lzma_info_iter_next() is never called. - * - * \return - LZMA_OK: All OK, *iter updated accordingly. - * - LZMA_DATA_ERROR: Trying to search past the end of the Index - * Record list, and allocating a new Record was not allowed - * either because allow_alloc was false or Index was final. - * - LZMA_PROG_ERROR: Internal error (probably integer - * overflow causing some lzma_vli getting too big). - */ -extern lzma_ret lzma_info_iter_locate(lzma_info_iter *iter, - lzma_allocator *allocator, lzma_vli uncompressed_offset, - lzma_bool allow_alloc); - - -/** - * \brief Finishes incrementally constructed Index - * - * This sets the known Total Size and Uncompressed of the Data Blocks - * based on the information collected from the Index Records, and marks - * the Index as final. - */ -extern lzma_ret lzma_info_index_finish(lzma_info *info); - - -/*************************** - * Reading the information * - ***************************/ - -/** - * \brief Gets a known size - * - * - */ -extern lzma_vli lzma_info_size_get( - const lzma_info *info, lzma_info_size type); - -extern lzma_vli lzma_info_metadata_locate( - const lzma_info *info, lzma_bool is_header_metadata); - -/** - * \brief Gets a pointer to the beginning of the Index list - * - * If detach is true, the Index will be detached from the lzma_info - * structure, and thus not be modified or freed by lzma_info_end(). - * - * If detach is false, the application must not modify the Index in any way. - * Also, the Index list is guaranteed to be valid only till the next call - * to any lzma_info_* function. - */ -extern lzma_index *lzma_info_index_get(lzma_info *info, lzma_bool detach); - - -extern size_t lzma_info_index_count_get(const lzma_info *info); - - -extern uint32_t lzma_info_metadata_alignment_get( - const lzma_info *info, lzma_bool is_header_metadata); - - - -/** - * \brief Locate a Block containing the given uncompressed offset - * - * This function is useful when you need to do random-access reading in - * a Multi-Block Stream. - * - * \param info Pointer to lzma_info that has at least one - * Index Record. The Index doesn't need to be finished. - * \param uncompressed_target - * Uncompressed target offset which the caller would - * like to locate from the Stream. - * \param stream_offset - * Starting offset (relative to the beginning the Stream) - * of the Block containing the requested location. - * \param uncompressed_offset - * The actual uncompressed offset of the beginning of - * the Block. uncompressed_offset <= uncompressed_target - * is always true; the application needs to uncompress - * uncompressed_target - uncompressed_offset bytes to - * reach the requested target offset. - * \param total_size - * Total Size of the Block. If the Index is incomplete, - * this may be LZMA_VLI_VALUE_UNKNOWN indicating unknown - * size. - * \param uncompressed_size - * Uncompressed Size of the Block. If the Index is - * incomplete, this may be LZMA_VLI_VALUE_UNKNOWN - * indicating unknown size. The application must pass - * this value to the Block decoder to verify FIXME - * - * \return - * - * \note This function is currently implemented as a linear search. - * If there are many Index Records, this can be really slow. - * This can be improved in newer liblzma versions if needed. - */ -extern lzma_bool lzma_info_data_locate(const lzma_info *info, - lzma_vli uncompressed_target, - lzma_vli *lzma_restrict stream_offset, - lzma_vli *lzma_restrict uncompressed_offset, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size); diff --git a/src/liblzma/api/lzma/lzma.h b/src/liblzma/api/lzma/lzma.h index 9ff25d86..da0bb52d 100644 --- a/src/liblzma/api/lzma/lzma.h +++ b/src/liblzma/api/lzma/lzma.h @@ -152,7 +152,7 @@ typedef struct { * because it uses the target buffer as the dictionary. */ uint32_t dictionary_size; -# define LZMA_DICTIONARY_SIZE_MIN 1 +# define LZMA_DICTIONARY_SIZE_MIN (UINT32_C(1) << 12) # define LZMA_DICTIONARY_SIZE_MAX (UINT32_C(1) << 30) # define LZMA_DICTIONARY_SIZE_DEFAULT (UINT32_C(1) << 23) diff --git a/src/liblzma/api/lzma/metadata.h b/src/liblzma/api/lzma/metadata.h deleted file mode 100644 index 69592a3a..00000000 --- a/src/liblzma/api/lzma/metadata.h +++ /dev/null @@ -1,100 +0,0 @@ -/** - * \file lzma/metadata.h - * \brief Metadata handling - * - * \author Copyright (C) 1999-2006 Igor Pavlov - * \author Copyright (C) 2007 Lasse Collin - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ - -#ifndef LZMA_H_INTERNAL -# error Never include this file directly. Use instead. -#endif - - -/** - * \brief Information stored into a Metadata Block - * - * This structure holds all the information that can be stored to - * a Metadata Block. - */ -typedef struct { - /** - * \brief Size of Header Metadata Block - */ - lzma_vli header_metadata_size; - - /** - * \brief Total Size of the Stream - */ - lzma_vli total_size; - - /** - * \brief Uncompressed Size of the Stream - */ - lzma_vli uncompressed_size; - - /** - * \brief Index of the Blocks stored in the Stream - */ - lzma_index *index; - - /** - * \brief Extra information - */ - lzma_extra *extra; - -} lzma_metadata; - - -/** - * \brief Calculate the encoded size of Metadata - * - * \return Uncompressed size of the Metadata in encoded form. This value - * may be passed to Block encoder as Uncompressed Size when using - * Metadata filter. On error, zero is returned. - */ -extern lzma_vli lzma_metadata_size(const lzma_metadata *metadata); - - -/** - * \brief Initializes Metadata encoder - * - * \param coder Pointer to a pointer to hold Metadata encoder's - * internal state. Original value is ignored, thus - * you don't need to initialize the pointer. - * \param allocator Custom memory allocator; usually NULL. - * \param metadata Pointer to Metadata to encoded - * - * \return - LZMA_OK: Initialization succeeded. - * - LZMA_MEM_ERROR: Cannot allocate memory for *coder. - * - * The initialization function makes internal copy of the *metadata structure. - * However, the linked lists metadata->index and metadata->extra are NOT - * copied. Thus, the application may destroy *metadata after initialization - * if it likes, but not Index or Extra. - */ -extern lzma_ret lzma_metadata_encoder(lzma_stream *strm, - lzma_options_block *options, const lzma_metadata *metadata); - - -/** - * \brief Initializes Metadata decoder - * - * \param want_extra If this is true, Extra Records will be stored - * to metadata->extra. If this is false, Extra - * Records will be parsed but not stored anywhere, - * metadata->extra will be set to NULL. - */ -extern lzma_ret lzma_metadata_decoder( - lzma_stream *strm, lzma_options_block *options, - lzma_metadata *metadata, lzma_bool want_extra); diff --git a/src/liblzma/api/lzma/raw.h b/src/liblzma/api/lzma/raw.h index c1ee41d8..db8cba15 100644 --- a/src/liblzma/api/lzma/raw.h +++ b/src/liblzma/api/lzma/raw.h @@ -30,20 +30,10 @@ * \param options Array of lzma_options_filter structures. * The end of the array must be marked with * .id = LZMA_VLI_VALUE_UNKNOWN. The minimum - * number of filters is zero; the maximum is - * determined by available memory. - * \param uncompressed_size - * Size of the uncompressed data. If it is unknown, - * use LZMA_VLI_VALUE_UNKNOWN. You need to give the - * same value to the raw decoder to decode the data. - * \param allow_implicit - * If true, an implicit Copy or Subblock filter should be - * automatically added when needed. If this is false and - * an implicit filter would be needed, LZMA_PROG_ERROR is - * returned. + * number of filters is one and the maximum is four. * * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the - * filter chain support it), or LZMA_FINISH. + * filter chain supports it), or LZMA_FINISH. * * \return - LZMA_OK * - LZMA_MEM_ERROR @@ -51,8 +41,7 @@ * - LZMA_PROG_ERROR */ extern lzma_ret lzma_raw_encoder( - lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit); + lzma_stream *strm, const lzma_options_filter *options); /** @@ -68,5 +57,4 @@ extern lzma_ret lzma_raw_encoder( * - LZMA_PROG_ERROR */ extern lzma_ret lzma_raw_decoder( - lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit); + lzma_stream *strm, const lzma_options_filter *options); diff --git a/src/liblzma/api/lzma/stream.h b/src/liblzma/api/lzma/stream.h index 346bdd17..4bb17e7d 100644 --- a/src/liblzma/api/lzma/stream.h +++ b/src/liblzma/api/lzma/stream.h @@ -22,157 +22,32 @@ /** - * \brief Options for .lzma Stream encoder - */ -typedef struct { - /** - * \brief Type of integrity Check - * - * The type of the integrity Check is stored into Stream Header - * and Stream Footer. The same Check is used for all Blocks in - * the Stream. - */ - lzma_check_type check; - - /** - * \brief Precense of CRC32 of the Block Header - * - * Set this to true if CRC32 of every Block Header should be - * calculated and stored in the Block Header. This is recommended. - * - * This setting is stored into Stream Header and Stream Footer. - */ - lzma_bool has_crc32; - - /** - * \brief Uncompressed Size in bytes - * - * This is somewhat advanced feature. Most users want to set this to - * LZMA_VLI_VALUE_UNKNOWN to indicate unknown Uncompressed Size. - * - * If the Uncompressed Size of the Stream being encoded is known, - * it can be stored to the beginning of the Stream. The details - * differ for Single-Block and Multi-Block Streams: - * - With Single-Block Streams, the Uncompressed Size is stored to - * the Block Header and End of Payload Marker is omitted. - * - With Multi-Block Streams, the Uncompressed Size is stored to - * the Header Metadata Block. The Uncompressed Size of the Blocks - * will be unknown, because liblzma cannot predict how the - * application is going to split the data in Blocks. - */ - lzma_vli uncompressed_size; - - /** - * \brief Alignment of the beginning of the Stream - * - * Certain filters handle data in bigger chunks than single bytes. - * This affects two things: - * - Performance: aligned memory access is usually faster. - * - Further compression ratio in custom file formats: if you - * encode multiple Blocks with some non-compression filter - * such as LZMA_FILTER_POWERPC, it is a good idea to keep - * the inter-Block alignment correct to maximize compression - * ratio when all these Blocks are finally compressed as a - * single step. - * - * Usually the Stream is stored into its own file, thus - * the alignment is usually zero. - */ - uint32_t alignment; - - /** - * \brief Array of filters used to encode Data Blocks - * - * There can be at maximum of seven filters. The end of the array is - * marked with .id = LZMA_VLI_VALUE_UNKNOWN. (That's why the array - * has eight members.) Minimum number of filters is zero; in that - * case, an implicit Copy filter is used. - */ - lzma_options_filter filters[8]; - - /** - * \brief Array of filters used to encode Metadata Blocks - * - * This is like filters[] but for Metadata Blocks. If Metadata - * Blocks are compressed, they usually are compressed with - * settings that require only little memory to uncompress e.g. - * LZMA with 64 KiB dictionary. - * - * \todo Recommend a preset. - * - * When liblzma sees that the Metadata Block would be very small - * even in uncompressed form, it is not compressed no matter - * what filter have been set here. This is to avoid possibly - * increasing the size of the Metadata Block with bad compression, - * and to avoid useless overhead of filters in uncompression phase. - */ - lzma_options_filter metadata_filters[8]; - - /** - * \brief Extra information in the Header Metadata Block - */ - const lzma_extra *header; - - /** - * \brief Extra information in the Footer Metadata Block - * - * It is enough to set this pointer any time before calling - * lzma_code() with LZMA_FINISH as the second argument. - */ - const lzma_extra *footer; - -} lzma_options_stream; - - -/** - * \brief Initializes Single-Block .lzma Stream encoder - * - * This is the function that most developers are looking for. :-) It - * compresses using the specified options without storing any extra - * information. + * \brief Initializes .lzma Stream encoder * - * \todo Describe that is_metadata is ignored, maybe something else. - */ -extern lzma_ret lzma_stream_encoder_single( - lzma_stream *strm, const lzma_options_stream *options); - - -/** - * \brief Initializes Multi-Block .lzma Stream encoder + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_VALUE_UNKNOWN. There must + * be 1-4 filters, but there are restrictions on how + * multiple filters can be combined. FIXME Tell where + * to find more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_HEADER_ERROR + * - LZMA_PROG_ERROR */ -extern lzma_ret lzma_stream_encoder_multi( - lzma_stream *strm, const lzma_options_stream *options); +extern lzma_ret lzma_stream_encoder(lzma_stream *strm, + const lzma_options_filter *filters, lzma_check_type check); /** * \brief Initializes decoder for .lzma Stream * * \param strm Pointer to propertily prepared lzma_stream - * \param header Pointer to hold a pointer to Extra Records read - * from the Header Metadata Block. Use NULL if - * you don't care about Extra Records. - * \param footer Same as header, but for Footer Metadata Block. * * \return - LZMA_OK: Initialization was successful. * - LZMA_MEM_ERROR: Cannot allocate memory. - * - * If header and/or footer are not NULL, *header and/or *footer will be - * initially set to NULL. - * - * The application can detect that Header Metadata Block has been completely - * parsed when the decoder procudes some output the first time. If *header - * is still NULL, there was no Extra field in the Header Metadata Block (or - * the whole Header Metadata Block wasn't present at all). - * - * The application can detect that Footer Metadata Block has been parsed - * completely when lzma_code() returns LZMA_STREAM_END. If *footer is still - * NULL, there was no Extra field in the Footer Metadata Block. - * - * \note If you use lzma_memory_limiter, the Extra Records will be - * allocated with it, and thus remain in the lzma_memory_limiter - * even after they get exported to the application via *header - * and *footer pointers. */ -extern lzma_ret lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer); +extern lzma_ret lzma_stream_decoder(lzma_stream *strm); diff --git a/src/liblzma/api/lzma/stream_flags.h b/src/liblzma/api/lzma/stream_flags.h index 070c91c9..f4c5c335 100644 --- a/src/liblzma/api/lzma/stream_flags.h +++ b/src/liblzma/api/lzma/stream_flags.h @@ -1,6 +1,6 @@ /** * \file lzma/stream_flags.h - * \brief .lzma Stream Header and Stream tail encoder and decoder + * \brief .lzma Stream Header and Stream Footer encoder and decoder * * \author Copyright (C) 1999-2006 Igor Pavlov * \author Copyright (C) 2007 Lasse Collin @@ -22,121 +22,113 @@ /** - * \brief Size of Stream Header + * \brief Size of Stream Header and Stream Footer * - * Magic Bytes (6) + Stream Flags (1) + CRC32 (4) + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .lzma file format is + * developed in future. */ -#define LZMA_STREAM_HEADER_SIZE (6 + 1 + 4) +#define LZMA_STREAM_HEADER_SIZE 12 /** - * \brief Size of Stream tail - * - * Because Stream Footer already has a defined meaning in the file format - * specification, we use Stream tail to denote these two fields: - * Stream Flags (1) + Magic Bytes (2) - */ -#define LZMA_STREAM_TAIL_SIZE (1 + 2) - - -/** - * Options for encoding and decoding Stream Header and Stream tail + * Options for encoding and decoding Stream Header and Stream Footer */ typedef struct { /** - * Type of the Check calculated from uncompressed data + * Backward Size must be a multiple of four bytes. In this Stream + * format version Backward Size is the size of the Index field. */ - lzma_check_type check; + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) /** - * True if Block Headers have the CRC32 field. Note that the CRC32 - * field is always present in the Stream Header. - */ - lzma_bool has_crc32; - - /** - * True if the Stream is a Multi-Block Stream. + * Type of the Check calculated from uncompressed data */ - lzma_bool is_multi; + lzma_check_type check; } lzma_stream_flags; -#define lzma_stream_flags_is_equal(a, b) \ - ((a).check == (b).check \ - && (a).has_crc32 == (b).has_crc32 \ - && (a).is_multi == (b).is_multi) - - /** - * \brief Encodes Stream Header - * - * Encoding of the Stream Header is done with a single call instead of - * first initializing and then doing the actual work with lzma_code(). + * \brief Encode Stream Header * - * \param out Beginning of the output buffer - * \param out_pos out[*out_pos] is the next write position. This - * is updated by the encoder. - * \param out_size out[out_size] is the first byte to not write. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. * \param options Stream Header options to be encoded. + * options->index_size is ignored and doesn't + * need to be initialized. * * \return - LZMA_OK: Encoding was successful. * - LZMA_PROG_ERROR: Invalid options. - * - LZMA_BUF_ERROR: Not enough output buffer space. */ extern lzma_ret lzma_stream_header_encode( - uint8_t *out, const lzma_stream_flags *options); + const lzma_stream_flags *options, uint8_t *out); /** - * \brief Encodes Stream tail + * \brief Encode Stream Footer * - * \param footer Pointer to a pointer that will hold the - * allocated buffer. Caller must free it once - * it isn't needed anymore. - * \param footer_size Pointer to a variable that will the final size - * of the footer buffer. - * \param allocator lzma_allocator for custom allocator functions. - * Set to NULL to use malloc(). - * \param options Stream Header options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * \param options Stream Footer options to be encoded. * - * \return - LZMA_OK: Success; *header and *header_size set. - * - LZMA_PROG_ERROR: *options is invalid. - * - LZMA_MEM_ERROR: Cannot allocate memory. + * \return - LZMA_OK: Encoding was successful. + * - LZMA_PROG_ERROR: Invalid options. */ -extern lzma_ret lzma_stream_tail_encode( - uint8_t *out, const lzma_stream_flags *options); +extern lzma_ret lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out); /** - * \brief Initializes Stream Header decoder - * - * \param strm Pointer to lzma_stream used to pass input data - * \param options Target structure for parsed results - * - * \return - LZMA_OK: Successfully initialized - * - LZMA_MEM_ERROR: Cannot allocate memory + * \brief Decode Stream Header * - * The actual decoding is done with lzma_code() and freed with lzma_end(). + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->index_size is always set to LZMA_VLI_VALUE_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_equal(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_HEADER_ERROR: Unsupported options are present + * in the header. */ -extern lzma_ret lzma_stream_header_decoder( - lzma_stream *strm, lzma_stream_flags *options); +extern lzma_ret lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in); /** - * \brief Initializes Stream tail decoder + * \brief Decode Stream Footer * - * \param strm Pointer to lzma_stream used to pass input data - * \param options Target structure for parsed results. - * \param decode_uncompressed_size - * Set to true if the first field to decode is - * Uncompressed Size. Set to false if the first - * field to decode is Backward Size. + * \param options Stream Header options to be encoded. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the footer + * is corrupt. + * - LZMA_HEADER_ERROR: Unsupported options are present + * in the footer. + */ +extern lzma_ret lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in); + + +/** + * \brief Compare two lzma_stream_flags structures * - * \return - LZMA_OK: Successfully initialized - * - LZMA_MEM_ERROR: Cannot allocate memory + * index_size values are compared only if both are not LZMA_VLI_VALUE_UNKNOWN. * - * The actual decoding is done with lzma_code() and freed with lzma_end(). + * \return true if both structures are considered equal; false otherwise. */ -extern lzma_ret lzma_stream_tail_decoder( - lzma_stream *strm, lzma_stream_flags *options); +extern lzma_bool lzma_stream_flags_equal( + const lzma_stream_flags *a, lzma_stream_flags *b); diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h index d88aa305..252458a3 100644 --- a/src/liblzma/api/lzma/version.h +++ b/src/liblzma/api/lzma/version.h @@ -35,7 +35,7 @@ * \note The version number of LZMA Utils (and thus liblzma) * has nothing to with the version number of LZMA SDK. */ -#define LZMA_VERSION UINT32_C(49990030) +#define LZMA_VERSION UINT32_C(49990050) /** diff --git a/src/liblzma/api/lzma/vli.h b/src/liblzma/api/lzma/vli.h index bc0770ce..15a9d0bf 100644 --- a/src/liblzma/api/lzma/vli.h +++ b/src/liblzma/api/lzma/vli.h @@ -158,25 +158,34 @@ typedef uint64_t lzma_vli; * may use LZMA_VLI_VALUE_MAX for clarity. * * \param vli Integer to be encoded - * \param vli_pos How many bytes have already been written out. This - * must be less than 9 before calling this function. - * \param vli_size Minimum size that the variable-length representation - * must take. This is useful if you want to use - * variable-length integers as padding. Usually you want - * to set this to zero. The maximum allowed value is 9. + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer, *vli_pos + * must be set to zero. To use single-call encoding, + * set vli_pos to NULL. * \param out Beginning of the output buffer * \param out_pos The next byte will be written to out[*out_pos]. * \param out_size Size of the out buffer; the first byte into * which no data is written to is out[out_size]. * - * \return - LZMA_OK: So far all OK, but the integer is not + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not * completely written out yet. * - LZMA_STREAM_END: Integer successfully encoded. - * - LZMA_BUF_ERROR: No output space (*out_pos == out_size) - * - LZMA_PROG_ERROR: Arguments are not sane. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to no *out_pos == out_size; this function doesn't use + * LZMA_BUF_ERROR. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; this function doesn't use + * LZMA_BUF_ERROR. */ extern lzma_ret lzma_vli_encode( - lzma_vli vli, size_t *lzma_restrict vli_pos, size_t vli_size, + lzma_vli vli, size_t *lzma_restrict vli_pos, uint8_t *lzma_restrict out, size_t *lzma_restrict out_pos, size_t out_size); @@ -189,18 +198,30 @@ extern lzma_ret lzma_vli_encode( * application isn't required to initialize *vli. * \param vli_pos How many bytes have already been decoded. When * starting to decode a new integer, *vli_pos must - * be initialized to zero. + * be initialized to zero. To use single-call decoding, + * set this to NULL. * \param in Beginning of the input buffer * \param in_pos The next byte will be read from in[*in_pos]. * \param in_size Size of the input buffer; the first byte that * won't be read is in[in_size]. * - * \return - LZMA_OK: So far all OK, but the integer is not + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not * completely decoded yet. * - LZMA_STREAM_END: Integer successfully decoded. - * - LZMA_BUF_ERROR: No input data (*in_pos == in_size) - * - LZMA_DATA_ERROR: Integer is longer than nine bytes. - * - LZMA_PROG_ERROR: Arguments are not sane. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be + * due to *in_pos == in_size; this function doesn't use + * LZMA_BUF_ERROR. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due to + * too little input; this function doesn't use LZMA_BUF_ERROR. */ extern lzma_ret lzma_vli_decode(lzma_vli *lzma_restrict vli, size_t *lzma_restrict vli_pos, const uint8_t *lzma_restrict in, @@ -208,37 +229,9 @@ extern lzma_ret lzma_vli_decode(lzma_vli *lzma_restrict vli, /** - * \brief Decodes variable-length integer reading buffer backwards - * - * The variable-length integer encoding is designed so that it can be read - * either from the beginning to the end, or from the end to the beginning. - * This feature is needed to make the Stream parseable backwards; - * specifically, to read the Backward Size field in Stream Footer. - * - * \param vli Pointer to variable to hold the decoded integer. - * \param in Beginning of the input buffer - * \param in_size Number of bytes available in the in[] buffer. - * On successful decoding, this is updated to match - * the number of bytes used. (in[*in_size - 1] is the - * first byte to process. After successful decoding, - * in[*in_size] will point to the first byte of the - * variable-length integer.) - * - * \return - LZMA_OK: Decoding successful - * - LZMA_DATA_ERROR: No valid variable-length integer was found. - * - LZMA_BUF_ERROR: Not enough input. Note that in practice, - * this tends to be a sign of broken input, because the - * applications usually do give as much input to this function - * as the applications have available. - */ -extern lzma_ret lzma_vli_reverse_decode( - lzma_vli *vli, const uint8_t *in, size_t *in_size); - - -/** - * \brief Gets the minimum number of bytes required to encode vli + * \brief Gets the number of bytes required to encode vli * * \return Number of bytes on success (1-9). If vli isn't valid, * zero is returned. */ -extern size_t lzma_vli_size(lzma_vli vli); +extern uint32_t lzma_vli_size(lzma_vli vli); diff --git a/src/liblzma/check/Makefile.am b/src/liblzma/check/Makefile.am index e436cb59..182e0868 100644 --- a/src/liblzma/check/Makefile.am +++ b/src/liblzma/check/Makefile.am @@ -14,7 +14,6 @@ libcheck_la_SOURCES = \ check.c \ check.h \ check_init.c \ - check_byteswap.h \ crc_macros.h libcheck_la_CPPFLAGS = \ -I@top_srcdir@/src/liblzma/api \ diff --git a/src/liblzma/check/check.c b/src/liblzma/check/check.c index ba59af2e..388b57e8 100644 --- a/src/liblzma/check/check.c +++ b/src/liblzma/check/check.c @@ -13,8 +13,15 @@ #include "check.h" -// See the .lzma header format specification section 2.2.2. -LZMA_API const uint32_t lzma_check_sizes[8] = { 0, 4, 4, 8, 16, 32, 32, 64 }; +// See the .lzma header format specification section 2.1.1.2. +LZMA_API const uint32_t lzma_check_sizes[LZMA_CHECK_ID_MAX + 1] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 +}; LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { @@ -27,6 +34,7 @@ LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { #endif false, // Reserved + false, // Reserved #ifdef HAVE_CHECK_CRC64 true, @@ -35,6 +43,10 @@ LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { #endif false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved #ifdef HAVE_CHECK_SHA256 true, @@ -44,6 +56,9 @@ LZMA_API const lzma_bool lzma_available_checks[LZMA_CHECK_ID_MAX + 1] = { false, // Reserved false, // Reserved + false, // Reserved + false, // Reserved + false, // Reserved }; @@ -58,24 +73,24 @@ lzma_check_init(lzma_check *check, lzma_check_type type) #ifdef HAVE_CHECK_CRC32 case LZMA_CHECK_CRC32: - check->crc32 = 0; + check->state.crc32 = 0; break; #endif #ifdef HAVE_CHECK_CRC64 case LZMA_CHECK_CRC64: - check->crc64 = 0; + check->state.crc64 = 0; break; #endif #ifdef HAVE_CHECK_SHA256 case LZMA_CHECK_SHA256: - lzma_sha256_init(&check->sha256); + lzma_sha256_init(check); break; #endif default: - if (type <= LZMA_CHECK_ID_MAX) + if ((unsigned)(type) <= LZMA_CHECK_ID_MAX) ret = LZMA_UNSUPPORTED_CHECK; else ret = LZMA_PROG_ERROR; @@ -93,19 +108,19 @@ lzma_check_update(lzma_check *check, lzma_check_type type, switch (type) { #ifdef HAVE_CHECK_CRC32 case LZMA_CHECK_CRC32: - check->crc32 = lzma_crc32(buf, size, check->crc32); + check->state.crc32 = lzma_crc32(buf, size, check->state.crc32); break; #endif #ifdef HAVE_CHECK_CRC64 case LZMA_CHECK_CRC64: - check->crc64 = lzma_crc64(buf, size, check->crc64); + check->state.crc64 = lzma_crc64(buf, size, check->state.crc64); break; #endif #ifdef HAVE_CHECK_SHA256 case LZMA_CHECK_SHA256: - lzma_sha256_update(buf, size, &check->sha256); + lzma_sha256_update(buf, size, check); break; #endif @@ -120,11 +135,29 @@ lzma_check_update(lzma_check *check, lzma_check_type type, extern void lzma_check_finish(lzma_check *check, lzma_check_type type) { + switch (type) { +#ifdef HAVE_CHECK_CRC32 + case LZMA_CHECK_CRC32: + *(uint32_t *)(check->buffer) = check->state.crc32; + break; +#endif + +#ifdef HAVE_CHECK_CRC64 + case LZMA_CHECK_CRC64: + *(uint64_t *)(check->buffer) = check->state.crc64; + break; +#endif + #ifdef HAVE_CHECK_SHA256 - if (type == LZMA_CHECK_SHA256) - lzma_sha256_finish(&check->sha256); + case LZMA_CHECK_SHA256: + lzma_sha256_finish(check); + break; #endif + default: + break; + } + return; } diff --git a/src/liblzma/check/check.h b/src/liblzma/check/check.h index 74279ceb..45ca25e9 100644 --- a/src/liblzma/check/check.h +++ b/src/liblzma/check/check.h @@ -17,15 +17,21 @@ #include "common.h" +// Index hashing used to verify the Index with O(1) memory usage needs +// a good hash function. +#if defined(HAVE_CHECK_SHA256) +# define LZMA_CHECK_BEST LZMA_CHECK_SHA256 +#elif defined(HAVE_CHECK_CRC64) +# define LZMA_CHECK_BEST LZMA_CHECK_CRC64 +#else +# define LZMA_CHECK_BEST LZMA_CHECK_CRC32 +#endif + + typedef struct { /// Internal state uint32_t state[8]; - /// Temporary 8-byte aligned buffer to hold incomplete chunk. - /// After lzma_check_finish(), the first 32 bytes will contain - /// the final digest in big endian byte order. - uint8_t buffer[64]; - /// Size of the message excluding padding uint64_t size; @@ -34,10 +40,27 @@ typedef struct { /// \note This is not in the public API because this structure will /// change in future. -typedef union { - uint32_t crc32; - uint64_t crc64; - lzma_sha256 sha256; +typedef struct { + // FIXME Guarantee 8-byte alignment + + /// Buffer to hold the final result; this is also used as a temporary + /// buffer in SHA256. Note that this buffer must be 8-byte aligned. + uint8_t buffer[64]; + + /// Check-specific data + union { + uint32_t crc32; + uint64_t crc64; + + struct { + /// Internal state + uint32_t state[8]; + + /// Size of the message excluding padding + uint64_t size; + } sha256; + } state; + } lzma_check; @@ -91,12 +114,12 @@ extern void lzma_crc64_init(void); // SHA256 -extern void lzma_sha256_init(lzma_sha256 *sha256); +extern void lzma_sha256_init(lzma_check *check); extern void lzma_sha256_update( - const uint8_t *buf, size_t size, lzma_sha256 *sha256); + const uint8_t *buf, size_t size, lzma_check *check); -extern void lzma_sha256_finish(lzma_sha256 *sha256); +extern void lzma_sha256_finish(lzma_check *check); #endif diff --git a/src/liblzma/check/check_byteswap.h b/src/liblzma/check/check_byteswap.h deleted file mode 100644 index 264def26..00000000 --- a/src/liblzma/check/check_byteswap.h +++ /dev/null @@ -1,43 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file check_byteswap.h -/// \brief Byteswapping needed by the checks -// -// This code has been put into the public domain. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_CHECK_BYTESWAP_H -#define LZMA_CHECK_BYTESWAP_H - -#ifdef HAVE_CONFIG_H -# include -#endif - -// byteswap.h is a GNU extension. It contains inline assembly versions -// for byteswapping. When byteswap.h is not available, we use generic code. -#ifdef HAVE_BYTESWAP_H -# include -#else -# define bswap_32(num) \ - ( (((num) << 24) ) \ - | (((num) << 8) & UINT32_C(0x00FF0000)) \ - | (((num) >> 8) & UINT32_C(0x0000FF00)) \ - | (((num) >> 24) ) ) - -# define bswap_64(num) \ - ( (((num) << 56) ) \ - | (((num) << 40) & UINT64_C(0x00FF000000000000)) \ - | (((num) << 24) & UINT64_C(0x0000FF0000000000)) \ - | (((num) << 8) & UINT64_C(0x000000FF00000000)) \ - | (((num) >> 8) & UINT64_C(0x00000000FF000000)) \ - | (((num) >> 24) & UINT64_C(0x0000000000FF0000)) \ - | (((num) >> 40) & UINT64_C(0x000000000000FF00)) \ - | (((num) >> 56) ) ) -#endif - -#endif diff --git a/src/liblzma/check/crc32_init.c b/src/liblzma/check/crc32_init.c index 0dd402a4..8b596091 100644 --- a/src/liblzma/check/crc32_init.c +++ b/src/liblzma/check/crc32_init.c @@ -17,7 +17,7 @@ #endif #ifdef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" #endif diff --git a/src/liblzma/check/crc64_init.c b/src/liblzma/check/crc64_init.c index 4c91a771..0029987a 100644 --- a/src/liblzma/check/crc64_init.c +++ b/src/liblzma/check/crc64_init.c @@ -17,7 +17,7 @@ #endif #ifdef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" #endif diff --git a/src/liblzma/check/crc_macros.h b/src/liblzma/check/crc_macros.h index 5fbecf07..e827d07d 100644 --- a/src/liblzma/check/crc_macros.h +++ b/src/liblzma/check/crc_macros.h @@ -12,7 +12,7 @@ /////////////////////////////////////////////////////////////////////////////// #ifdef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" # define A(x) ((x) >> 24) # define B(x) (((x) >> 16) & 0xFF) diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c index 8e3d375a..ea51896e 100644 --- a/src/liblzma/check/sha256.c +++ b/src/liblzma/check/sha256.c @@ -20,7 +20,7 @@ #include "check.h" #ifndef WORDS_BIGENDIAN -# include "check_byteswap.h" +# include "../../common/bswap.h" #endif // At least on x86, GCC is able to optimize this to a rotate instruction. @@ -104,18 +104,18 @@ transform(uint32_t state[static 8], const uint32_t data[static 16]) static void -process(lzma_sha256 *sha256) +process(lzma_check *check) { #ifdef WORDS_BIGENDIAN - transform(sha256->state, (uint32_t *)(sha256->buffer)); + transform(check->state.sha256.state, (uint32_t *)(check->buffer)); #else uint32_t data[16]; for (size_t i = 0; i < 16; ++i) - data[i] = bswap_32(*((uint32_t*)(sha256->buffer) + i)); + data[i] = bswap_32(*((uint32_t*)(check->buffer) + i)); - transform(sha256->state, data); + transform(check->state.sha256.state, data); #endif return; @@ -123,41 +123,41 @@ process(lzma_sha256 *sha256) extern void -lzma_sha256_init(lzma_sha256 *sha256) +lzma_sha256_init(lzma_check *check) { static const uint32_t s[8] = { 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, }; - memcpy(sha256->state, s, sizeof(s)); - sha256->size = 0; + memcpy(check->state.sha256.state, s, sizeof(s)); + check->state.sha256.size = 0; return; } extern void -lzma_sha256_update(const uint8_t *buf, size_t size, lzma_sha256 *sha256) +lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check *check) { // Copy the input data into a properly aligned temporary buffer. // This way we can be called with arbitrarily sized buffers // (no need to be multiple of 64 bytes), and the code works also // on architectures that don't allow unaligned memory access. while (size > 0) { - const size_t copy_start = sha256->size & 0x3F; + const size_t copy_start = check->state.sha256.size & 0x3F; size_t copy_size = 64 - copy_start; if (copy_size > size) copy_size = size; - memcpy(sha256->buffer + copy_start, buf, copy_size); + memcpy(check->buffer + copy_start, buf, copy_size); buf += copy_size; size -= copy_size; - sha256->size += copy_size; + check->state.sha256.size += copy_size; - if ((sha256->size & 0x3F) == 0) - process(sha256); + if ((check->state.sha256.size & 0x3F) == 0) + process(check); } return; @@ -165,38 +165,41 @@ lzma_sha256_update(const uint8_t *buf, size_t size, lzma_sha256 *sha256) extern void -lzma_sha256_finish(lzma_sha256 *sha256) +lzma_sha256_finish(lzma_check *check) { // Add padding as described in RFC 3174 (it describes SHA-1 but // the same padding style is used for SHA-256 too). - size_t pos = sha256->size & 0x3F; - sha256->buffer[pos++] = 0x80; + size_t pos = check->state.sha256.size & 0x3F; + check->buffer[pos++] = 0x80; while (pos != 64 - 8) { if (pos == 64) { - process(sha256); + process(check); pos = 0; } - sha256->buffer[pos++] = 0x00; + check->buffer[pos++] = 0x00; } // Convert the message size from bytes to bits. - sha256->size *= 8; + check->state.sha256.size *= 8; #ifdef WORDS_BIGENDIAN - *(uint64_t *)(sha256->buffer + 64 - 8) = sha256->size; + *(uint64_t *)(check->buffer + 64 - 8) = check->state.sha256.size; #else - *(uint64_t *)(sha256->buffer + 64 - 8) = bswap_64(sha256->size); + *(uint64_t *)(check->buffer + 64 - 8) + = bswap_64(check->state.sha256.size); #endif - process(sha256); + process(check); for (size_t i = 0; i < 8; ++i) #ifdef WORDS_BIGENDIAN - ((uint32_t *)(sha256->buffer))[i] = sha256->state[i]; + ((uint32_t *)(check->buffer))[i] + = check->state.sha256.state[i]; #else - ((uint32_t *)(sha256->buffer))[i] = bswap_32(sha256->state[i]); + ((uint32_t *)(check->buffer))[i] + = bswap_32(check->state.sha256.state[i]); #endif return; diff --git a/src/liblzma/common/Makefile.am b/src/liblzma/common/Makefile.am index c76ce14f..40b42250 100644 --- a/src/liblzma/common/Makefile.am +++ b/src/liblzma/common/Makefile.am @@ -25,26 +25,20 @@ libcommon_la_SOURCES = \ common.h \ bsr.h \ allocator.c \ + block_util.c \ block_private.h \ - extra.c \ features.c \ index.c \ - info.c \ init.c \ memory_limiter.c \ memory_usage.c \ next_coder.c \ raw_common.c \ raw_common.h \ + stream_flags_equal.c \ code.c \ version.c -if COND_FILTER_COPY -libcommon_la_SOURCES += \ - copy_coder.c \ - copy_coder.h -endif - if COND_FILTER_DELTA libcommon_la_SOURCES += \ delta_common.c \ @@ -69,21 +63,17 @@ libcommon_la_SOURCES += \ block_encoder.c \ block_encoder.h \ block_header_encoder.c \ - easy_common.c \ - easy_common.h \ - easy_single.c \ - easy_multi.c \ + easy.c \ filter_flags_encoder.c \ + index_encoder.c \ + index_encoder.h \ init_encoder.c \ - metadata_encoder.c \ - metadata_encoder.h \ raw_encoder.c \ raw_encoder.h \ stream_common.c \ stream_common.h \ - stream_encoder_single.c \ - stream_encoder_multi.c \ - stream_encoder_multi.h \ + stream_encoder.c \ + stream_encoder.h \ stream_flags_encoder.c \ vli_encoder.c endif @@ -96,14 +86,13 @@ libcommon_la_SOURCES += \ block_decoder.h \ block_header_decoder.c \ filter_flags_decoder.c \ + index_decoder.c \ + index_hash.c \ init_decoder.c \ - metadata_decoder.c \ - metadata_decoder.h \ raw_decoder.c \ raw_decoder.h \ stream_decoder.c \ stream_flags_decoder.c \ stream_flags_decoder.h \ - vli_decoder.c \ - vli_reverse_decoder.c + vli_decoder.c endif diff --git a/src/liblzma/common/alignment.c b/src/liblzma/common/alignment.c index 2d468fe5..c80e5fab 100644 --- a/src/liblzma/common/alignment.c +++ b/src/liblzma/common/alignment.c @@ -25,7 +25,6 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) { for (size_t i = 0; filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // The same as the input, check the next filter. continue; @@ -69,9 +68,8 @@ lzma_alignment_input(const lzma_options_filter *filters, uint32_t guess) extern LZMA_API uint32_t lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) { - // Check if there is only an implicit Copy filter. if (filters[0].id == LZMA_VLI_VALUE_UNKNOWN) - return guess; + return UINT32_MAX; // Find the last filter in the chain. size_t i = 0; @@ -80,7 +78,6 @@ lzma_alignment_output(const lzma_options_filter *filters, uint32_t guess) do { switch (filters[i].id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_DELTA: // It's the same as the input alignment, so // check the next filter. diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c index 91df5bf2..062f6fab 100644 --- a/src/liblzma/common/alone_decoder.c +++ b/src/liblzma/common/alone_decoder.c @@ -32,9 +32,15 @@ struct lzma_coder_s { SEQ_CODE, } sequence; + /// Position in the header fields size_t pos; - lzma_options_alone options; + /// Uncompressed size decoded from the header + lzma_vli uncompressed_size; + + /// Options decoded from the header needed to initialize + /// the LZMA decoder + lzma_options_lzma options; }; @@ -50,34 +56,39 @@ alone_decode(lzma_coder *coder, && (coder->sequence == SEQ_CODE || *in_pos < in_size)) switch (coder->sequence) { case SEQ_PROPERTIES: - if (lzma_lzma_decode_properties( - &coder->options.lzma, in[*in_pos])) - return LZMA_DATA_ERROR; + if (lzma_lzma_decode_properties(&coder->options, in[*in_pos])) + return LZMA_FORMAT_ERROR; coder->sequence = SEQ_DICTIONARY_SIZE; ++*in_pos; break; case SEQ_DICTIONARY_SIZE: - coder->options.lzma.dictionary_size + coder->options.dictionary_size |= (size_t)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 4) { - // A hack to ditch tons of false positives: We allow - // only dictionary sizes that are a power of two. - // LZMA_Alone didn't create other kinds of files, - // although it's not impossible that files with - // other dictionary sizes exist. Well, if someone - // complains, this will be reconsidered. - size_t count = 0; - for (size_t i = 0; i < 32; ++i) - if (coder->options.lzma.dictionary_size - & (UINT32_C(1) << i)) - ++count; - - if (count != 1 || coder->options.lzma.dictionary_size + if (coder->options.dictionary_size + < LZMA_DICTIONARY_SIZE_MIN + || coder->options.dictionary_size > LZMA_DICTIONARY_SIZE_MAX) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; + + // A hack to ditch tons of false positives: We allow + // only dictionary sizes that are 2^n or 2^n + 2^(n-1). + // LZMA_Alone created only files with 2^n, but accepts + // any dictionary size. If someone complains, this + // will be reconsidered. + uint32_t d = coder->options.dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + if (d != coder->options.dictionary_size) + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_UNCOMPRESSED_SIZE; @@ -87,7 +98,7 @@ alone_decode(lzma_coder *coder, break; case SEQ_UNCOMPRESSED_SIZE: - coder->options.uncompressed_size + coder->uncompressed_size |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8); if (++coder->pos == 8) { @@ -95,11 +106,10 @@ alone_decode(lzma_coder *coder, // if the uncompressed size is known, it must be less // than 256 GiB. Again, if someone complains, this // will be reconsidered. - if (coder->options.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->options.uncompressed_size + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN + && coder->uncompressed_size >= (LZMA_VLI_C(1) << 38)) - return LZMA_DATA_ERROR; + return LZMA_FORMAT_ERROR; coder->pos = 0; coder->sequence = SEQ_CODER_INIT; @@ -113,9 +123,7 @@ alone_decode(lzma_coder *coder, lzma_filter_info filters[2] = { { .init = &lzma_lzma_decoder_init, - .options = &coder->options.lzma, - .uncompressed_size = coder->options - .uncompressed_size, + .options = &coder->options, }, { .init = NULL, } @@ -126,6 +134,10 @@ alone_decode(lzma_coder *coder, if (ret != LZMA_OK) return ret; + // Use a hack to set the uncompressed size. + lzma_lzma_decoder_uncompressed_size(&coder->next, + coder->uncompressed_size); + coder->sequence = SEQ_CODE; } @@ -169,8 +181,8 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) next->coder->sequence = SEQ_PROPERTIES; next->coder->pos = 0; - next->coder->options.lzma.dictionary_size = 0; - next->coder->options.uncompressed_size = 0; + next->coder->options.dictionary_size = 0; + next->coder->uncompressed_size = 0; return LZMA_OK; } @@ -179,17 +191,14 @@ alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) extern lzma_ret lzma_alone_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - // We need to use _init2 because we don't pass any varadic args. - lzma_next_coder_init2(next, allocator, alone_decoder_init, - alone_decoder_init, allocator); + lzma_next_coder_init0(alone_decoder_init, next, allocator); } extern LZMA_API lzma_ret lzma_alone_decoder(lzma_stream *strm) { - lzma_next_strm_init2(strm, alone_decoder_init, - alone_decoder_init, strm->allocator); + lzma_next_strm_init0(strm, alone_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c index 7629aa77..f94a21c1 100644 --- a/src/liblzma/common/alone_encoder.c +++ b/src/liblzma/common/alone_encoder.c @@ -21,19 +21,19 @@ #include "lzma_encoder.h" +#define ALONE_HEADER_SIZE (1 + 4 + 8) + + struct lzma_coder_s { lzma_next_coder next; enum { - SEQ_PROPERTIES, - SEQ_DICTIONARY_SIZE, - SEQ_UNCOMPRESSED_SIZE, + SEQ_HEADER, SEQ_CODE, } sequence; - size_t pos; - - lzma_options_alone options; + size_t header_pos; + uint8_t header[ALONE_HEADER_SIZE]; }; @@ -47,47 +47,23 @@ alone_encode(lzma_coder *coder, { while (*out_pos < out_size) switch (coder->sequence) { - case SEQ_PROPERTIES: - if (lzma_lzma_encode_properties( - &coder->options.lzma, out + *out_pos)) { - return LZMA_PROG_ERROR; - } - - coder->sequence = SEQ_DICTIONARY_SIZE; - ++*out_pos; - break; - - case SEQ_DICTIONARY_SIZE: - out[*out_pos] = coder->options.lzma.dictionary_size - >> (coder->pos * 8); - - if (++coder->pos == 4) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - ++*out_pos; - break; - - case SEQ_UNCOMPRESSED_SIZE: - out[*out_pos] = coder->options.uncompressed_size - >> (coder->pos * 8); - - if (++coder->pos == 8) { - coder->pos = 0; - coder->sequence = SEQ_CODE; - } - - ++*out_pos; + case SEQ_HEADER: + bufcpy(coder->header, &coder->header_pos, + ALONE_HEADER_SIZE, + out, out_pos, out_size); + if (coder->header_pos < ALONE_HEADER_SIZE) + return LZMA_OK; + + coder->sequence = SEQ_CODE; break; - case SEQ_CODE: { + case SEQ_CODE: return coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -107,7 +83,7 @@ alone_encoder_end(lzma_coder *coder, lzma_allocator *allocator) // At least for now, this is not used by any internal function. static lzma_ret alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_alone *options) + const lzma_options_lzma *options) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -119,23 +95,42 @@ alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the LZMA_Alone coder variables. - next->coder->sequence = SEQ_PROPERTIES; - next->coder->pos = 0; - next->coder->options = *options; + // Basic initializations + next->coder->sequence = SEQ_HEADER; + next->coder->header_pos = 0; - // Verify uncompressed_size since the other functions assume that - // it is valid. - if (!lzma_vli_is_valid(next->coder->options.uncompressed_size)) + // Encode the header: + // - Properties (1 byte) + if (lzma_lzma_encode_properties(options, next->coder->header)) return LZMA_PROG_ERROR; + // - Dictionary size (4 bytes) + if (options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || options->dictionary_size > LZMA_DICTIONARY_SIZE_MAX) + return LZMA_PROG_ERROR; + + // Round up to to the next 2^n or 2^n + 2^(n - 1) depending on which + // one is the next. While the header would allow any 32-bit integer, + // we do this to keep the decoder of liblzma accepting the resulting + // files. + uint32_t d = options->dictionary_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + + integer_write_32(next->coder->header + 1, d); + + // - Uncompressed size (always unknown and using EOPM) + memset(next->coder->header + 1 + 4, 0xFF, 8); + // Initialize the LZMA encoder. const lzma_filter_info filters[2] = { { .init = &lzma_lzma_encoder_init, - .options = &next->coder->options.lzma, - .uncompressed_size = next->coder->options - .uncompressed_size, + .options = (void *)(options), }, { .init = NULL, } @@ -156,7 +151,7 @@ lzma_alone_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_alone_encoder(lzma_stream *strm, const lzma_options_alone *options) +lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options) { lzma_next_strm_init(strm, alone_encoder_init, options); diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c index 7e92df9a..765a27b1 100644 --- a/src/liblzma/common/auto_decoder.c +++ b/src/liblzma/common/auto_decoder.c @@ -17,15 +17,12 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "stream_decoder.h" #include "alone_decoder.h" struct lzma_coder_s { lzma_next_coder next; - - lzma_extra **header; - lzma_extra **footer; bool initialized; }; @@ -43,8 +40,8 @@ auto_decode(lzma_coder *coder, lzma_allocator *allocator, lzma_ret ret; if (in[*in_pos] == 0xFF) - ret = lzma_stream_decoder_init(&coder->next, allocator, - coder->header, coder->footer); + ret = lzma_stream_decoder_init( + &coder->next, allocator); else ret = lzma_alone_decoder_init(&coder->next, allocator); @@ -69,8 +66,7 @@ auto_decoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret -auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -82,8 +78,6 @@ auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - next->coder->header = header; - next->coder->footer = footer; next->coder->initialized = false; return LZMA_OK; @@ -102,9 +96,9 @@ lzma_auto_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_auto_decoder(lzma_stream *strm, lzma_extra **header, lzma_extra **footer) +lzma_auto_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, auto_decoder_init, header, footer); + lzma_next_strm_init0(strm, auto_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index e1b5dc96..f07c4e06 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -26,129 +26,47 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, - SEQ_END, + SEQ_CHECK, } sequence; /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Decoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Decoding options; we also write Compressed Size and Uncompressed + /// Size back to this structure when the encoding has been finished. lzma_options_block *options; - /// Position in variable-length integers (and in some other places). - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Maximum allowed Compressed Size; this takes into account the + /// size of the Block Header and Check fields when Compressed Size + /// is unknown. + lzma_vli compressed_limit; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; + /// Position when reading the Check field + size_t check_pos; - /// Temporary location for the Uncompressed Size and Backward Size - /// fields in Block Footer. - lzma_vli tmp; - - /// Size of the Backward Size field - This is needed so that we - /// can verify the Backward Size and still keep updating total_size. - size_t size_of_backward_size; + /// Check of the uncompressed data + lzma_check check; }; -static lzma_ret -update_sequence(lzma_coder *coder) -{ - switch (coder->sequence) { - case SEQ_CODE: - if (coder->options->check != LZMA_CHECK_NONE) { - lzma_check_finish(&coder->check, - coder->options->check); - coder->sequence = SEQ_CHECK; - break; - } - - // Fall through - - case SEQ_CHECK: - if (coder->options->has_uncompressed_size_in_footer) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_backward_size) { - coder->sequence = SEQ_BACKWARD_SIZE; - break; - } - - // Fall through - - case SEQ_BACKWARD_SIZE: - if (coder->options->handle_padding) { - coder->sequence = SEQ_PADDING; - break; - } - - case SEQ_PADDING: - if (!is_size_valid(coder->total_size, - coder->options->total_size) - || !is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - static lzma_ret block_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - // Special case when the Block has only Block Header. - if (coder->sequence == SEQ_END) - return LZMA_STREAM_END; - - // FIXME: Termination condition should work but could be cleaner. - while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_CODE)) switch (coder->sequence) { case SEQ_CODE: { + if (*out_pos >= out_size) + return LZMA_OK; + const size_t in_start = *in_pos; const size_t out_start = *out_pos; @@ -159,13 +77,13 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, in_used, - coder->total_limit) - || update_size(&coder->compressed_size, - in_used, - coder->options->compressed_size) + // NOTE: We compare to compressed_limit here, which prevents + // the total size of the Block growing past LZMA_VLI_VALUE_MAX. + if (update_size(&coder->compressed_size, in_used, + coder->compressed_limit) || update_size(&coder->uncompressed_size, - out_used, coder->uncompressed_limit)) + out_used, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; lzma_check_update(&coder->check, coder->options->check, @@ -174,116 +92,61 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - return_if_error(update_sequence(coder)); - - break; + coder->sequence = SEQ_PADDING; } - case SEQ_CHECK: - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - if (((coder->check.crc32 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + // Fall through - case LZMA_CHECK_CRC64: - if (((coder->check.crc64 >> (coder->pos * 8)) - & 0xFF) != in[*in_pos]) - return LZMA_DATA_ERROR; - break; + case SEQ_PADDING: + // If Compressed Data is padded to a multiple of four bytes. + while (coder->compressed_size & 3) { + if (*in_pos >= in_size) + return LZMA_OK; - case LZMA_CHECK_SHA256: - if (coder->check.sha256.buffer[coder->pos] - != in[*in_pos]) + if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - break; - - default: - assert(coder->options->check != LZMA_CHECK_NONE); - assert(coder->options->check <= LZMA_CHECK_ID_MAX); - break; - } - - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - ++*in_pos; - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - return_if_error(update_sequence(coder)); - coder->pos = 0; + if (update_size(&coder->compressed_size, 1, + coder->compressed_limit)) + return LZMA_DATA_ERROR; } - break; - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - if (update_size(&coder->total_size, *in_pos - in_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->tmp != coder->uncompressed_size) - return LZMA_DATA_ERROR; - - coder->pos = 0; - coder->tmp = 0; - - return_if_error(update_sequence(coder)); - - break; - } - - case SEQ_BACKWARD_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode(&coder->tmp, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - if (update_size(&coder->total_size, in_used, - coder->total_limit)) + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->size_of_backward_size += in_used; - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->tmp != coder->total_size - - coder->size_of_backward_size) - return LZMA_DATA_ERROR; + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; - return_if_error(update_sequence(coder)); + lzma_check_finish(&coder->check, coder->options->check); + coder->sequence = SEQ_CHECK; - break; - } + // Fall through - case SEQ_PADDING: - if (in[*in_pos] == 0x00) { - if (update_size(&coder->total_size, 1, - coder->total_limit)) + case SEQ_CHECK: + while (*in_pos < in_size) { + if (in[(*in_pos)++] != coder->check.buffer[ + coder->check_pos]) return LZMA_DATA_ERROR; - ++*in_pos; - break; + if (++coder->check_pos == lzma_check_sizes[ + coder->options->check]) + return LZMA_STREAM_END; } - return update_sequence(coder); - - default: - return LZMA_PROG_ERROR; + return LZMA_OK; } - return LZMA_OK; + return LZMA_PROG_ERROR; } @@ -300,9 +163,12 @@ static lzma_ret block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // This is pretty similar to lzma_block_encoder_init(). - // See comments there. + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) + return LZMA_PROG_ERROR; + // Allocate and initialize *next->coder if needed. if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) @@ -313,40 +179,28 @@ block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - if (validate_options_1(options)) - return LZMA_PROG_ERROR; - - if (validate_options_2(options)) - return LZMA_DATA_ERROR; - - return_if_error(lzma_check_init(&next->coder->check, options->check)); - + // Basic initializations next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); - next->coder->tmp = 0; - next->coder->size_of_backward_size = 0; - - if (!options->has_eopm && options->uncompressed_size == 0) { - // The Compressed Data field is empty, thus we skip SEQ_CODE - // phase completely. - const lzma_ret ret = update_sequence(next->coder); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + + // If Compressed Size is not known, we calculate the maximum allowed + // value so that Total Size of the Block still is a valid VLI and + // a multiple of four. + next->coder->compressed_limit + = options->compressed_size == LZMA_VLI_VALUE_UNKNOWN + ? (LZMA_VLI_VALUE_MAX & ~LZMA_VLI_C(3)) + - options->header_size + - lzma_check_sizes[options->check] + : options->compressed_size; + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); return lzma_raw_decoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 78185790..3add45a9 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -34,37 +34,21 @@ struct lzma_coder_s { enum { SEQ_CODE, - SEQ_CHECK_FINISH, - SEQ_CHECK_COPY, - SEQ_UNCOMPRESSED_SIZE, - SEQ_BACKWARD_SIZE, SEQ_PADDING, + SEQ_CHECK, } sequence; - /// Position in .header and .check. - size_t pos; - - /// Check of the uncompressed data - lzma_check check; - - /// Total Size calculated while encoding - lzma_vli total_size; - /// Compressed Size calculated while encoding lzma_vli compressed_size; /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Maximum allowed total_size - lzma_vli total_limit; + /// Position when writing out the Check field + size_t check_pos; - /// Maximum allowed uncompressed_size - lzma_vli uncompressed_limit; - - /// Backward Size - This is a copy of total_size right before - /// the Backward Size field. - lzma_vli backward_size; + /// Check of the uncompressed data + lzma_check check; }; @@ -80,16 +64,16 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, if (coder->options->uncompressed_size - coder->uncompressed_size != (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } else { if (coder->options->uncompressed_size - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } } else if (LZMA_VLI_VALUE_MAX - coder->uncompressed_size < (lzma_vli)(in_size - *in_pos)) { - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; } // Main loop @@ -107,11 +91,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, const size_t in_used = *in_pos - in_start; const size_t out_used = *out_pos - out_start; - if (update_size(&coder->total_size, out_used, - coder->total_limit) - || update_size(&coder->compressed_size, - out_used, - coder->options->compressed_size)) + // FIXME We must also check that Total Size doesn't get + // too big. + if (update_size(&coder->compressed_size, out_used, + coder->options->compressed_size)) return LZMA_DATA_ERROR; // No need to check for overflow because we have already @@ -125,141 +108,54 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; assert(*in_pos == in_size); + coder->sequence = SEQ_PADDING; + break; + } + + case SEQ_PADDING: + // Pad Compressed Data to a multiple of four bytes. + if (coder->compressed_size & 3) { + out[*out_pos] = 0x00; + ++*out_pos; + + if (update_size(&coder->compressed_size, 1, + coder->options->compressed_size)) + return LZMA_DATA_ERROR; + + break; + } // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values give to us. + // values. Verify that they match the values given to us. if (!is_size_valid(coder->compressed_size, coder->options->compressed_size) || !is_size_valid(coder->uncompressed_size, coder->options->uncompressed_size)) return LZMA_DATA_ERROR; - coder->sequence = SEQ_CHECK_FINISH; - break; - } + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; - case SEQ_CHECK_FINISH: - if (coder->options->check == LZMA_CHECK_NONE) { - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } + if (coder->options->check == LZMA_CHECK_NONE) + return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); - coder->sequence = SEQ_CHECK_COPY; + coder->sequence = SEQ_CHECK; // Fall through - case SEQ_CHECK_COPY: - assert(lzma_check_sizes[coder->options->check] > 0); - - switch (coder->options->check) { - case LZMA_CHECK_CRC32: - out[*out_pos] = coder->check.crc32 >> (coder->pos * 8); - break; - - case LZMA_CHECK_CRC64: - out[*out_pos] = coder->check.crc64 >> (coder->pos * 8); - break; - - case LZMA_CHECK_SHA256: - out[*out_pos] = coder->check.sha256.buffer[coder->pos]; - break; - - default: - assert(0); - return LZMA_PROG_ERROR; - } - + case SEQ_CHECK: + out[*out_pos] = coder->check.buffer[coder->check_pos]; ++*out_pos; - if (update_size(&coder->total_size, 1, coder->total_limit)) - return LZMA_DATA_ERROR; - - if (++coder->pos == lzma_check_sizes[coder->options->check]) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - } - - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->options->has_uncompressed_size_in_footer) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->uncompressed_size, - &coder->pos, 1, - out, out_pos, out_size); - - // Updating the size this way instead of doing in a - // single chunk using lzma_vli_size(), because this - // way we detect when exactly we are going out of - // our limits. - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - - coder->pos = 0; - } + if (++coder->check_pos + == lzma_check_sizes[coder->options->check]) + return LZMA_STREAM_END; - coder->backward_size = coder->total_size; - coder->sequence = SEQ_BACKWARD_SIZE; break; - case SEQ_BACKWARD_SIZE: - if (coder->options->has_backward_size) { - const size_t out_start = *out_pos; - - const lzma_ret ret = lzma_vli_encode( - coder->backward_size, &coder->pos, 1, - out, out_pos, out_size); - - if (update_size(&coder->total_size, - *out_pos - out_start, - coder->total_limit)) - return LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) - return ret; - } - - coder->sequence = SEQ_PADDING; - break; - - case SEQ_PADDING: - if (coder->options->handle_padding) { - assert(coder->options->total_size - != LZMA_VLI_VALUE_UNKNOWN); - - if (coder->total_size < coder->options->total_size) { - out[*out_pos] = 0x00; - ++*out_pos; - - if (update_size(&coder->total_size, 1, - coder->total_limit)) - return LZMA_DATA_ERROR; - - break; - } - } - - // Now also Total Size is known. Verify it. - if (!is_size_valid(coder->total_size, - coder->options->total_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->total_size = coder->total_size; - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - - return LZMA_STREAM_END; - default: return LZMA_PROG_ERROR; } @@ -281,10 +177,9 @@ static lzma_ret block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, lzma_options_block *options) { - // Validate some options. - if (validate_options_1(options) || validate_options_2(options) - || (options->handle_padding && options->total_size - == LZMA_VLI_VALUE_UNKNOWN)) + // While lzma_block_total_size_get() is meant to calculate the Total + // Size, it also validates the options excluding the filters. + if (lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -298,40 +193,19 @@ block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next = LZMA_NEXT_CODER_INIT; } - // Initialize the check. - return_if_error(lzma_check_init(&next->coder->check, options->check)); - - // If End of Payload Marker is not used and Uncompressed Size is zero, - // Compressed Data is empty. That is, we don't call the encoder at all. - // We initialize it though; it allows detecting invalid options. - if (!options->has_eopm && options->uncompressed_size == 0) { - // Also Compressed Size must be zero if it has been - // given to us. - if (!is_size_valid(0, options->compressed_size)) - return LZMA_PROG_ERROR; - - next->coder->sequence = SEQ_CHECK_FINISH; - } else { - next->coder->sequence = SEQ_CODE; - } - - // Other initializations + // Basic initializations + next->coder->sequence = SEQ_CODE; next->coder->options = options; - next->coder->pos = 0; - next->coder->total_size = options->header_size; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; - next->coder->total_limit - = MIN(options->total_size, options->total_limit); - next->coder->uncompressed_limit = MIN(options->uncompressed_size, - options->uncompressed_limit); + + // Initialize the check + next->coder->check_pos = 0; + return_if_error(lzma_check_init(&next->coder->check, options->check)); // Initialize the requested filters. return lzma_raw_encoder_init(&next->coder->next, allocator, - options->filters, options->has_eopm - ? LZMA_VLI_VALUE_UNKNOWN - : options->uncompressed_size, - true); + options->filters); } diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 7676c795..b9e072e0 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -21,353 +21,111 @@ #include "check.h" -struct lzma_coder_s { - lzma_options_block *options; - - enum { - SEQ_FLAGS_1, - SEQ_FLAGS_2, - SEQ_COMPRESSED_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_FILTER_FLAGS_INIT, - SEQ_FILTER_FLAGS_DECODE, - SEQ_CRC32, - SEQ_PADDING - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// CRC32 of the Block Header - uint32_t crc32; - - lzma_next_coder filter_flags_decoder; -}; - - -static bool -update_sequence(lzma_coder *coder) +static void +free_properties(lzma_options_block *options, lzma_allocator *allocator) { - switch (coder->sequence) { - case SEQ_FLAGS_2: - if (coder->options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_COMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_COMPRESSED_SIZE: - if (coder->options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->pos = 0; - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - } - - // Fall through - - case SEQ_UNCOMPRESSED_SIZE: - coder->pos = 0; - - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: - if (coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_FILTER_FLAGS_INIT; - break; - } - - if (coder->options->has_crc32) { - coder->pos = 0; - coder->sequence = SEQ_CRC32; - break; - } - - case SEQ_CRC32: - if (coder->options->padding != 0) { - coder->pos = 0; - coder->sequence = SEQ_PADDING; - break; - } - - return true; - - default: - assert(0); - return true; + // Free allocated filter options. The last array member is not + // touched after the initialization in the beginning of + // lzma_block_header_decode(), so we don't need to touch that here. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + lzma_free(options->filters[i].options, allocator); + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - return false; + return; } -static lzma_ret -block_header_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_block_header_decode(lzma_options_block *options, + lzma_allocator *allocator, const uint8_t *in) { - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FLAGS_1: - // Check that the reserved bit is unset. Use HEADER_ERROR - // because newer version of liblzma may support the reserved - // bit, although it is likely that this is just a broken file. - if (in[*in_pos] & 0x40) - return LZMA_HEADER_ERROR; - - // Number of filters: we prepare appropriate amount of - // variables for variable-length integer parsing. The - // initialization function has already reset the rest - // of the values to LZMA_VLI_VALUE_UNKNOWN, which allows - // us to later know how many filters there are. - for (int i = (int)(in[*in_pos] & 0x07) - 1; i >= 0; --i) - coder->options->filters[i].id = 0; - - // End of Payload Marker flag - coder->options->has_eopm = (in[*in_pos] & 0x08) != 0; - - // Compressed Size: Prepare for variable-length integer - // parsing if it is known. - if (in[*in_pos] & 0x10) - coder->options->compressed_size = 0; - - // Uncompressed Size: the same. - if (in[*in_pos] & 0x20) - coder->options->uncompressed_size = 0; - - // Is Metadata Block flag - coder->options->is_metadata = (in[*in_pos] & 0x80) != 0; - - // We need at least one: Uncompressed Size or EOPM. - if (coder->options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - && !coder->options->has_eopm) - return LZMA_DATA_ERROR; - - // Update header CRC32. - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - coder->sequence = SEQ_FLAGS_2; - break; - - case SEQ_FLAGS_2: - // Check that the reserved bits are unset. - if (in[*in_pos] & 0xE0) - return LZMA_DATA_ERROR; - - // Get the size of Header Padding. - coder->options->padding = in[*in_pos] & 0x1F; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, coder->crc32); - - ++*in_pos; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - - case SEQ_COMPRESSED_SIZE: { - // Store the old input position to be used when - // updating coder->header_crc32. - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->compressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->compressed_reserve += in_used; - assert(coder->options->compressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_UNCOMPRESSED_SIZE: { - const size_t in_start = *in_pos; - - const lzma_ret ret = lzma_vli_decode( - &coder->options->uncompressed_size, - &coder->pos, in, in_pos, in_size); - - const size_t in_used = *in_pos - in_start; - - coder->options->uncompressed_reserve += in_used; - assert(coder->options->uncompressed_reserve - <= LZMA_VLI_BYTES_MAX); - - coder->options->header_size += in_used; - - coder->crc32 = lzma_crc32(in + in_start, in_used, - coder->crc32); - - if (ret != LZMA_STREAM_END) - return ret; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - - break; - } - - case SEQ_FILTER_FLAGS_INIT: { - assert(coder->options->filters[coder->pos].id - != LZMA_VLI_VALUE_UNKNOWN); - - const lzma_ret ret = lzma_filter_flags_decoder_init( - &coder->filter_flags_decoder, allocator, - &coder->options->filters[coder->pos]); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_FILTER_FLAGS_DECODE; + // NOTE: We consider the header to be corrupt not only when the + // CRC32 doesn't match, but also when variable-length integers + // are invalid or not over 63 bits, or if the header is too small + // to contain the claimed information. + + // Initialize the filter options array. This way the caller can + // safely free() the options even if an error occurs in this function. + for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; + options->filters[i].options = NULL; } - // Fall through - - case SEQ_FILTER_FLAGS_DECODE: { - const size_t in_start = *in_pos; + size_t in_size = options->header_size; - const lzma_ret ret = coder->filter_flags_decoder.code( - coder->filter_flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - const size_t in_used = *in_pos - in_start; - coder->options->header_size += in_used; - coder->crc32 = lzma_crc32(in + in_start, - in_used, coder->crc32); + // Validate. The caller must have set options->header_size with + // lzma_block_header_size_decode() macro, so it is a programming error + // if these tests fail. + if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN + || in_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (in_size & 3) + || lzma_block_header_size_decode(in[0]) != in_size) + return LZMA_PROG_ERROR; - if (ret != LZMA_STREAM_END) - return ret; + // Exclude the CRC32 field. + in_size -= 4; - ++coder->pos; + // Verify CRC32 + if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) + return LZMA_DATA_ERROR; - if (update_sequence(coder)) - return LZMA_STREAM_END; + // Check for unsupported flags. + if (in[1] & 0x3C) + return LZMA_HEADER_ERROR; - break; - } + // Start after the Block Header Size and Block Flags fields. + size_t in_pos = 2; - case SEQ_CRC32: - assert(coder->options->has_crc32); + // Compressed Size + if (in[1] & 0x40) { + return_if_error(lzma_vli_decode(&options->compressed_size, + NULL, in, &in_pos, in_size)); - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1) return LZMA_DATA_ERROR; - ++*in_pos; - ++coder->pos; - - // Check if we reached end of the CRC32 field. - if (coder->pos == 4) { - coder->options->header_size += 4; - - if (update_sequence(coder)) - return LZMA_STREAM_END; - } - - break; + options->compressed_size = (options->compressed_size + 1) * 4; - case SEQ_PADDING: - if (in[*in_pos] != 0x00) + // Check that Total Size (that is, size of + // Block Header + Compressed Data + Check) is + // representable as a VLI. + if (lzma_block_total_size_get(options) == 0) return LZMA_DATA_ERROR; - - ++*in_pos; - ++coder->options->header_size; - ++coder->pos; - - if (coder->pos < (size_t)(coder->options->padding)) - break; - - return LZMA_STREAM_END; - - default: - return LZMA_PROG_ERROR; + } else { + options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; } - return LZMA_OK; -} - - -static void -block_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->filter_flags_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &block_header_decode; - next->end = &block_header_decoder_end; - next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT; + // Uncompressed Size + if (in[1] & 0x80) + return_if_error(lzma_vli_decode(&options->uncompressed_size, + NULL, in, &in_pos, in_size)); + else + options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + // Filter Flags + const size_t filter_count = (in[1] & 3) + 1; + for (size_t i = 0; i < filter_count; ++i) { + const lzma_ret ret = lzma_filter_flags_decode( + &options->filters[i], allocator, + in, &in_pos, in_size); + if (ret != LZMA_OK) { + free_properties(options, allocator); + return ret; + } } - // Assume that Compressed Size and Uncompressed Size are unknown. - options->compressed_size = LZMA_VLI_VALUE_UNKNOWN; - options->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // We will calculate the sizes of these fields too so that the - // application may rewrite the header if it wishes so. - options->compressed_reserve = 0; - options->uncompressed_reserve = 0; + // Padding + while (in_pos < in_size) { + if (in[in_pos++] != 0x00) { + free_properties(options, allocator); - // The Block Flags field is always present, so include its size here - // and we don't need to worry about it in block_header_decode(). - options->header_size = 2; - - // Reset filters[] to indicate empty list of filters. - // See SEQ_FLAGS_1 in block_header_decode() for reasoning of this. - for (size_t i = 0; i < 8; ++i) { - options->filters[i].id = LZMA_VLI_VALUE_UNKNOWN; - options->filters[i].options = NULL; + // Possibly some new field present so use + // LZMA_HEADER_ERROR instead of LZMA_DATA_ERROR. + return LZMA_HEADER_ERROR; + } } - next->coder->options = options; - next->coder->sequence = SEQ_FLAGS_1; - next->coder->pos = 0; - next->coder->crc32 = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_block_header_decoder(lzma_stream *strm, - lzma_options_block *options) -{ - lzma_next_strm_init(strm, lzma_block_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - return LZMA_OK; } diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 594b4fc0..ed0c88ba 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -24,188 +24,129 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_options_block *options) { - // Block Flags take two bytes. - size_t size = 2; + // Block Header Size + Block Flags + CRC32. + size_t size = 1 + 1 + 4; // Compressed Size - if (!lzma_vli_is_valid(options->compressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->compressed_reserve != 0) { - // Make sure that the known Compressed Size fits into the - // reserved space. Note that lzma_vli_size() will return zero - // if options->compressed_size is LZMA_VLI_VALUE_UNKNOWN, so - // we don't need to handle that special case separately. - if (options->compressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->compressed_size) - > (size_t)(options->compressed_reserve)) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + if (options->compressed_size > LZMA_VLI_VALUE_MAX / 4 - 1 + || options->compressed_size == 0 + || (options->compressed_size & 3)) return LZMA_PROG_ERROR; - size += options->compressed_reserve; - - } else if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Compressed Size is known. We have already checked - // that is is a valid VLI, and since it isn't - // LZMA_VLI_VALUE_UNKNOWN, we can be sure that - // lzma_vli_size() will succeed. - size += lzma_vli_size(options->compressed_size); + size += lzma_vli_size(options->compressed_size / 4 - 1); } // Uncompressed Size - if (!lzma_vli_is_valid(options->uncompressed_size)) { - return LZMA_PROG_ERROR; - - } else if (options->uncompressed_reserve != 0) { - if (options->uncompressed_reserve > LZMA_VLI_BYTES_MAX - || lzma_vli_size(options->uncompressed_size) - > (size_t)(options->uncompressed_reserve)) + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + const size_t add = lzma_vli_size(options->uncompressed_size); + if (add == 0) return LZMA_PROG_ERROR; - size += options->uncompressed_reserve; - - } else if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - size += lzma_vli_size(options->uncompressed_size); + size += add; } // List of Filter Flags + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_PROG_ERROR; + for (size_t i = 0; options->filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 7) + if (i == 4) return LZMA_PROG_ERROR; - uint32_t tmp; - const lzma_ret ret = lzma_filter_flags_size(&tmp, - options->filters + i); - if (ret != LZMA_OK) - return ret; + uint32_t add; + return_if_error(lzma_filter_flags_size(&add, + options->filters + i)); - size += tmp; + size += add; } - // CRC32 - if (options->has_crc32) - size += 4; - - // Padding - int32_t padding; - if (options->padding == LZMA_BLOCK_HEADER_PADDING_AUTO) { - const uint32_t preferred = lzma_alignment_output( - options->filters, 1); - const uint32_t unaligned = size + options->alignment; - padding = (int32_t)(unaligned % preferred); - if (padding != 0) - padding = preferred - padding; - } else if (options->padding >= LZMA_BLOCK_HEADER_PADDING_MIN - && options->padding <= LZMA_BLOCK_HEADER_PADDING_MAX) { - padding = options->padding; - } else { - return LZMA_PROG_ERROR; - } + // Pad to a multiple of four bytes. + options->header_size = (size + 3) & ~(size_t)(3); - // All success. Copy the calculated values to the options structure. - options->padding = padding; - options->header_size = size + (size_t)(padding); + // NOTE: We don't verify that Total Size of the Block stays within + // limits. This is because it is possible that we are called with + // exaggerated values to reserve space for Block Header, and later + // called again with lower, real values. return LZMA_OK; } extern LZMA_API lzma_ret -lzma_block_header_encode(uint8_t *out, const lzma_options_block *options) +lzma_block_header_encode(const lzma_options_block *options, uint8_t *out) { - // We write the Block Flags later. - if (options->header_size < 2) + if ((options->header_size & 3) + || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) return LZMA_PROG_ERROR; - const size_t out_size = options->header_size; + // Indicate the size of the buffer _excluding_ the CRC32 field. + const size_t out_size = options->header_size - 4; + + // Store the Block Header Size. + out[0] = out_size / 4; + + // We write Block Flags a little later. size_t out_pos = 2; // Compressed Size - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) { - const lzma_vli size = options->compressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->compressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->compressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Compressed Size must be non-zero, fit into a 63-bit + // integer and be a multiple of four. Also the Total Size + // of the Block must fit into 63-bit integer. + if (options->compressed_size == 0 + || (options->compressed_size & 3) + || options->compressed_size + > LZMA_VLI_VALUE_MAX + || lzma_block_total_size_get(options) == 0) return LZMA_PROG_ERROR; + return_if_error(lzma_vli_encode( + options->compressed_size / 4 - 1, NULL, + out, &out_pos, out_size)); } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) { - const lzma_vli size = options->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - ? options->uncompressed_size : 0; - size_t vli_pos = 0; - if (lzma_vli_encode( - size, &vli_pos, options->uncompressed_reserve, - out, &out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + return_if_error(lzma_vli_encode( + options->uncompressed_size, NULL, + out, &out_pos, out_size)); // Filter Flags - size_t filter_count; - for (filter_count = 0; options->filters[filter_count].id - != LZMA_VLI_VALUE_UNKNOWN; ++filter_count) { - // There can be at maximum of seven filters. - if (filter_count == 7) - return LZMA_PROG_ERROR; - - const lzma_ret ret = lzma_filter_flags_encode(out, &out_pos, - out_size, options->filters + filter_count); - // FIXME: Don't return LZMA_BUF_ERROR. - if (ret != LZMA_OK) - return ret; - } - - // Block Flags 1 - out[0] = filter_count; - - if (options->has_eopm) - out[0] |= 0x08; - else if (options->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) + if (options->filters == NULL + || options->filters[0].id == LZMA_VLI_VALUE_UNKNOWN) return LZMA_PROG_ERROR; - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->compressed_reserve != 0) - out[0] |= 0x10; - - if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - || options->uncompressed_reserve != 0) - out[0] |= 0x20; + size_t filter_count = 0; + do { + // There can be at maximum of four filters. + if (filter_count == 4) + return LZMA_PROG_ERROR; - if (options->is_metadata) - out[0] |= 0x80; + return_if_error(lzma_filter_flags_encode(out, &out_pos, + out_size, options->filters + filter_count)); - // Block Flags 2 - if (options->padding < LZMA_BLOCK_HEADER_PADDING_MIN - || options->padding > LZMA_BLOCK_HEADER_PADDING_MAX) - return LZMA_PROG_ERROR; + } while (options->filters[++filter_count].id + != LZMA_VLI_VALUE_UNKNOWN); - out[1] = (uint8_t)(options->padding); + // Block Flags + out[1] = filter_count - 1; - // CRC32 - if (options->has_crc32) { - if (out_size - out_pos < 4) - return LZMA_PROG_ERROR; + if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x40; - const uint32_t crc = lzma_crc32(out, out_pos, 0); - for (size_t i = 0; i < 4; ++i) - out[out_pos++] = crc >> (i * 8); - } + if (options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + out[1] |= 0x80; - // Padding - the amount of available space must now match with - // the size of the Padding field. - if (out_size - out_pos != (size_t)(options->padding)) - return LZMA_PROG_ERROR; + // Padding + memzero(out + out_pos, out_size - out_pos); - memzero(out + out_pos, (size_t)(options->padding)); + // CRC32 + integer_write_32(out + out_size, lzma_crc32(out, out_size, 0)); return LZMA_OK; } diff --git a/src/liblzma/common/block_private.h b/src/liblzma/common/block_private.h index 16d95b9f..235e96b8 100644 --- a/src/liblzma/common/block_private.h +++ b/src/liblzma/common/block_private.h @@ -22,6 +22,7 @@ #include "common.h" + static inline bool update_size(lzma_vli *size, lzma_vli add, lzma_vli limit) { @@ -43,54 +44,4 @@ is_size_valid(lzma_vli size, lzma_vli reference) return reference == LZMA_VLI_VALUE_UNKNOWN || reference == size; } - -/// If any of these tests fail, the caller has to return LZMA_PROG_ERROR. -static inline bool -validate_options_1(const lzma_options_block *options) -{ - return options == NULL - || !lzma_vli_is_valid(options->compressed_size) - || !lzma_vli_is_valid(options->uncompressed_size) - || !lzma_vli_is_valid(options->total_size) - || !lzma_vli_is_valid(options->total_limit) - || !lzma_vli_is_valid(options->uncompressed_limit); -} - - -/// If any of these tests fail, the encoder has to return LZMA_PROG_ERROR -/// because something is going horribly wrong if such values get passed -/// to the encoder. In contrast, the decoder has to return LZMA_DATA_ERROR, -/// since these tests failing indicate that something is wrong in the Stream. -static inline bool -validate_options_2(const lzma_options_block *options) -{ - if ((options->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN - && options->uncompressed_size - > options->uncompressed_limit) - || (options->total_size != LZMA_VLI_VALUE_UNKNOWN - && options->total_size - > options->total_limit) - || (!options->has_eopm && options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - || options->header_size > options->total_size) - return true; - - if (options->compressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Calculate a rough minimum possible valid Total Size of - // this Block, and check that total_size and total_limit - // are big enough. Note that the real minimum size can be - // bigger due to the Check, Uncompressed Size, Backwards - // Size, pr Padding being present. A rough check here is - // enough for us to catch the most obvious errors as early - // as possible. - const lzma_vli total_min = options->compressed_size - + (lzma_vli)(options->header_size); - if (total_min > options->total_size - || total_min > options->total_limit) - return true; - } - - return false; -} - #endif diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c new file mode 100644 index 00000000..6bffc2f1 --- /dev/null +++ b/src/liblzma/common/block_util.c @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file block_header.c +/// \brief Utility functions to handle lzma_options_block +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_ret +lzma_block_total_size_set(lzma_options_block *options, lzma_vli total_size) +{ + // Validate. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX + || (total_size & 3)) + return LZMA_PROG_ERROR; + + const uint32_t container_size = options->header_size + + lzma_check_sizes[options->check]; + + // Validate that Compressed Size will be greater than zero. + if (container_size <= total_size) + return LZMA_DATA_ERROR; + + options->compressed_size = total_size - container_size; + + return LZMA_OK; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size_get(const lzma_options_block *options) +{ + // Validate the values that we are interested in. + if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN + || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX + || (options->header_size & 3) + || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + return 0; + + // If Compressed Size is unknown, return that we cannot know + // Total Size either. + if (options->compressed_size == LZMA_VLI_VALUE_UNKNOWN) + return LZMA_VLI_VALUE_UNKNOWN; + + const lzma_vli total_size = options->compressed_size + + options->header_size + + lzma_check_sizes[options->check]; + + // Validate the calculated Total Size. + if (options->compressed_size > LZMA_VLI_VALUE_MAX + || (options->compressed_size & 3) + || total_size > LZMA_VLI_VALUE_MAX) + return 0; + + return total_size; +} diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 5dd7a87f..4f30427d 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -21,6 +21,7 @@ #define LZMA_COMMON_H #include "../../common/sysdefs.h" +#include "../../common/integer.h" // Don't use ifdef... #if HAVE_VISIBILITY @@ -30,6 +31,17 @@ #endif +// These allow helping the compiler in some often-executed branches, whose +// result is almost always the same. +#ifdef __GNUC__ +# define likely(expr) __builtin_expect(expr, true) +# define unlikely(expr) __builtin_expect(expr, false) +#else +# define likely(expr) (expr) +# define unlikely(expr) (expr) +#endif + + /// Size of temporary buffers needed in some filters #define LZMA_BUFFER_SIZE 4096 @@ -117,10 +129,6 @@ struct lzma_filter_info_s { /// Pointer to filter's options structure void *options; - - /// Uncompressed size of the filter, or LZMA_VLI_VALUE_UNKNOWN - /// if unknown. - lzma_vli uncompressed_size; }; @@ -158,20 +166,6 @@ extern void lzma_next_coder_end(lzma_next_coder *next, lzma_allocator *allocator); -extern lzma_ret lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options); - -extern lzma_ret lzma_block_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_block *options); - -extern lzma_ret lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); - -extern lzma_ret lzma_stream_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer); - - /// \brief Wrapper for memcpy() /// /// This function copies as much data as possible from in[] to out[] and @@ -225,6 +219,13 @@ do { \ lzma_next_coder_init2(next, allocator, \ func, func, allocator, __VA_ARGS__) +/// \brief Initializing lzma_next_coder +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_coder_init0(func, next, allocator) \ + lzma_next_coder_init2(next, allocator, func, func, allocator) + /// \brief Initializing lzma_stream /// @@ -254,6 +255,13 @@ do { \ #define lzma_next_strm_init(strm, func, ...) \ lzma_next_strm_init2(strm, func, func, (strm)->allocator, __VA_ARGS__) +/// \brief Initializing lzma_stream +/// +/// Call the initialization function, which takes no other arguments than +/// lzma_next_coder and lzma_allocator. +#define lzma_next_strm_init0(strm, func) \ + lzma_next_strm_init2(strm, func, func, (strm)->allocator) + /// \brief Return if expression doesn't evaluate to LZMA_OK /// diff --git a/src/liblzma/common/copy_coder.c b/src/liblzma/common/copy_coder.c deleted file mode 100644 index 0bd674f6..00000000 --- a/src/liblzma/common/copy_coder.c +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.c -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "copy_coder.h" - - -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - -#ifdef HAVE_ENCODER -static lzma_ret -copy_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // If we aren't the last filter in the chain, the Copy filter - // is totally useless. Note that it is job of the next coder to - // take care of Uncompressed Size, so we don't need to update our - // coder->uncompressed_size at all. - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - // We are the last coder in the chain. - // Just copy as much data as possible. - bufcpy(in, in_pos, in_size, out, out_pos, out_size); - - // LZMA_SYNC_FLUSH and LZMA_FINISH are the same thing for us. - if (action != LZMA_RUN && *in_pos == in_size) - return LZMA_STREAM_END; - - return LZMA_OK; -} -#endif - - -#ifdef HAVE_DECODER -static lzma_ret -copy_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - if (coder->next.code != NULL) - return coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - - const size_t in_avail = in_size - *in_pos; - - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_avail) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - // We are the last coder in the chain. - // Just copy as much data as possible. - const size_t in_used = bufcpy( - in, in_pos, in_size, out, out_pos, out_size); - - // Update uncompressed_size if it is known. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= in_used; - - return coder->uncompressed_size == 0 ? LZMA_STREAM_END : LZMA_OK; -} -#endif - - -static void -copy_coder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -copy_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters, lzma_code_function encode) -{ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = encode; - next->end = ©_coder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - // Copy Uncompressed Size which is used to limit the output size. - next->coder->uncompressed_size = filters[0].uncompressed_size; - - // Initialize the next decoder in the chain, if any. - return lzma_next_filter_init( - &next->coder->next, allocator, filters + 1); -} - - -#ifdef HAVE_ENCODER -extern lzma_ret -lzma_copy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_encode); -} -#endif - - -#ifdef HAVE_DECODER -extern lzma_ret -lzma_copy_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - lzma_next_coder_init(copy_coder_init, next, allocator, filters, - ©_decode); -} -#endif diff --git a/src/liblzma/common/copy_coder.h b/src/liblzma/common/copy_coder.h deleted file mode 100644 index b8d0295d..00000000 --- a/src/liblzma/common/copy_coder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file copy_coder.h -/// \brief The Copy filter encoder and decoder -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_COPY_CODER_H -#define LZMA_COPY_CODER_H - -#include "common.h" - -extern lzma_ret lzma_copy_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -extern lzma_ret lzma_copy_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_filter_info *filters); - -#endif diff --git a/src/liblzma/common/delta_common.c b/src/liblzma/common/delta_common.c index de27b5a6..acd31e14 100644 --- a/src/liblzma/common/delta_common.c +++ b/src/liblzma/common/delta_common.c @@ -47,10 +47,6 @@ lzma_delta_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // Coding function is different for encoder and decoder. next->code = code; - // Copy Uncompressed Size which is used to limit the output size - // in the Delta decoder. - next->coder->uncompressed_size = filters[0].uncompressed_size; - // Set the delta distance. if (filters[0].options == NULL) return LZMA_PROG_ERROR; diff --git a/src/liblzma/common/delta_common.h b/src/liblzma/common/delta_common.h index 3ec955b7..1d58899d 100644 --- a/src/liblzma/common/delta_common.h +++ b/src/liblzma/common/delta_common.h @@ -26,10 +26,6 @@ struct lzma_coder_s { /// Next coder in the chain lzma_next_coder next; - /// Uncompressed size - This is needed when we are the last - /// filter in the chain. - lzma_vli uncompressed_size; - /// Delta distance size_t distance; diff --git a/src/liblzma/common/delta_decoder.c b/src/liblzma/common/delta_decoder.c index af2b840d..8f5a4cbf 100644 --- a/src/liblzma/common/delta_decoder.c +++ b/src/liblzma/common/delta_decoder.c @@ -21,26 +21,8 @@ #include "delta_common.h" -/// Copies and decodes the data at the same time. This is used when Delta -/// is the last filter in the chain. static void -copy_and_decode(lzma_coder *coder, - const uint8_t *restrict in, uint8_t *restrict out, size_t size) -{ - const size_t distance = coder->distance; - - for (size_t i = 0; i < size; ++i) { - out[i] = in[i] + coder->history[ - (distance + coder->pos) & 0xFF]; - coder->history[coder->pos-- & 0xFF] = out[i]; - } -} - - -/// Decodes the data in place. This is used when we are not the last filter -/// in the chain. -static void -decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) +decode_buffer(lzma_coder *coder, uint8_t *buffer, size_t size) { const size_t distance = coder->distance; @@ -51,44 +33,21 @@ decode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) } - static lzma_ret delta_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - lzma_ret ret; - - if (coder->next.code == NULL) { - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_size - *in_pos) > coder->uncompressed_size) - in_size = *in_pos + (size_t)(coder->uncompressed_size); - - const size_t in_avail = in_size - *in_pos; - const size_t out_avail = out_size - *out_pos; - const size_t size = MIN(in_avail, out_avail); - - copy_and_decode(coder, in + *in_pos, out + *out_pos, size); + assert(coder->next.code != NULL); - *in_pos += size; - *out_pos += size; + const size_t out_start = *out_pos; - assert(coder->uncompressed_size <= LZMA_VLI_VALUE_MAX); - coder->uncompressed_size -= size; + const lzma_ret ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, + action); - ret = coder->uncompressed_size == 0 - ? LZMA_STREAM_END : LZMA_OK; - - } else { - const size_t out_start = *out_pos; - - ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, - action); - - decode_in_place(coder, out + out_start, *out_pos - out_start); - } + decode_buffer(coder, out + out_start, *out_pos - out_start); return ret; } diff --git a/src/liblzma/common/delta_encoder.c b/src/liblzma/common/delta_encoder.c index b94f92de..a8bb9341 100644 --- a/src/liblzma/common/delta_encoder.c +++ b/src/liblzma/common/delta_encoder.c @@ -22,7 +22,8 @@ /// Copies and encodes the data at the same time. This is used when Delta -/// is the last filter in the chain. +/// is the first filter in the chain (and thus the last filter in the +/// encoder's filter stack). static void copy_and_encode(lzma_coder *coder, const uint8_t *restrict in, uint8_t *restrict out, size_t size) @@ -38,8 +39,8 @@ copy_and_encode(lzma_coder *coder, } -/// Encodes the data in place. This is used when we are not the last filter -/// in the chain. +/// Encodes the data in place. This is used when we are the last filter +/// in the chain (and thus non-last filter in the encoder's filter stack). static void encode_in_place(lzma_coder *coder, uint8_t *buffer, size_t size) { diff --git a/src/liblzma/common/easy.c b/src/liblzma/common/easy.c new file mode 100644 index 00000000..6c258204 --- /dev/null +++ b/src/liblzma/common/easy.c @@ -0,0 +1,122 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file easy.c +/// \brief Easy Stream encoder initialization +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_encoder.h" + + +struct lzma_coder_s { + lzma_next_coder stream_encoder; + + /// We need to keep the filters array available in case + /// LZMA_FULL_FLUSH is used. + lzma_options_filter filters[5]; +}; + + +static bool +easy_set_filters(lzma_options_filter *filters, uint32_t level) +{ + bool error = false; + + if (level == 0) { + // TODO FIXME Use Subblock or LZMA2 with no compression. + error = true; + +#ifdef HAVE_FILTER_LZMA + } else if (level <= 9) { + filters[0].id = LZMA_FILTER_LZMA; + filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); + filters[1].id = LZMA_VLI_VALUE_UNKNOWN; +#endif + + } else { + error = true; + } + + return error; +} + + +static lzma_ret +easy_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + return coder->stream_encoder.code( + coder->stream_encoder.coder, allocator, + in, in_pos, in_size, out, out_pos, out_size, action); +} + + +static void +easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->stream_encoder, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_easy_level level) +{ + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &easy_encode; + next->end = &easy_encoder_end; + + next->coder->stream_encoder = LZMA_NEXT_CODER_INIT; + } + + if (easy_set_filters(next->coder->filters, level)) + return LZMA_HEADER_ERROR; + + return lzma_stream_encoder_init(&next->coder->stream_encoder, + allocator, next->coder->filters, LZMA_CHECK_CRC32); +} + + +extern LZMA_API lzma_ret +lzma_easy_encoder(lzma_stream *strm, lzma_easy_level level) +{ + lzma_next_strm_init(strm, easy_encoder_init, level); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} + + +extern LZMA_API uint32_t +lzma_easy_memory_usage(lzma_easy_level level) +{ + lzma_options_filter filters[5]; + if (easy_set_filters(filters, level)) + return UINT32_MAX; + + return lzma_memory_usage(filters, true); +} diff --git a/src/liblzma/common/easy_common.c b/src/liblzma/common/easy_common.c deleted file mode 100644 index e0c12a52..00000000 --- a/src/liblzma/common/easy_common.c +++ /dev/null @@ -1,54 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "easy_common.h" - - -extern bool -lzma_easy_set_filters(lzma_options_filter *filters, uint32_t level) -{ - bool error = false; - - if (level == 0) { - filters[0].id = LZMA_VLI_VALUE_UNKNOWN; - -#ifdef HAVE_FILTER_LZMA - } else if (level <= 9) { - filters[0].id = LZMA_FILTER_LZMA; - filters[0].options = (void *)(&lzma_preset_lzma[level - 1]); - filters[1].id = LZMA_VLI_VALUE_UNKNOWN; -#endif - - } else { - error = true; - } - - return error; -} - - -extern LZMA_API uint32_t -lzma_easy_memory_usage(lzma_easy_level level) -{ - lzma_options_filter filters[8]; - if (lzma_easy_set_filters(filters, level)) - return UINT32_MAX; - - return lzma_memory_usage(filters, true); -} diff --git a/src/liblzma/common/easy_common.h b/src/liblzma/common/easy_common.h deleted file mode 100644 index d864cce5..00000000 --- a/src/liblzma/common/easy_common.h +++ /dev/null @@ -1,28 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_common.c -/// \brief Shared stuff for easy encoder initialization functions -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - -#ifndef LZMA_EASY_COMMON_H -#define LZMA_EASY_COMMON_H - -extern bool lzma_easy_set_filters( - lzma_options_filter *filters, uint32_t level); - -#endif diff --git a/src/liblzma/common/easy_multi.c b/src/liblzma/common/easy_multi.c deleted file mode 100644 index 15778fab..00000000 --- a/src/liblzma/common/easy_multi.c +++ /dev/null @@ -1,103 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_multi.c -/// \brief Easy Multi-Block Stream encoder initialization -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "easy_common.h" -#include "stream_encoder_multi.h" - - -struct lzma_coder_s { - lzma_next_coder encoder; - lzma_options_stream options; -}; - - -static lzma_ret -easy_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - return coder->encoder.code(coder->encoder.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, action); -} - - -static void -easy_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->encoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -easy_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &easy_encode; - next->end = &easy_encoder_end; - - next->coder->encoder = LZMA_NEXT_CODER_INIT; - } - - next->coder->options = (lzma_options_stream){ - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - .header = header, - .footer = footer, - }; - - if (lzma_easy_set_filters(next->coder->options.filters, level) - || lzma_easy_set_filters( - next->coder->options.metadata_filters, - metadata_level)) - return LZMA_HEADER_ERROR; - - return lzma_stream_encoder_multi_init(&next->coder->encoder, - allocator, &next->coder->options); -} - - -extern LZMA_API lzma_ret -lzma_easy_encoder_multi(lzma_stream *strm, - lzma_easy_level level, lzma_easy_level metadata_level, - const lzma_extra *header, const lzma_extra *footer) -{ - // This is more complicated than lzma_easy_encoder_single(), - // because lzma_stream_encoder_multi() wants that the options - // structure is available until the encoding is finished. - lzma_next_strm_init(strm, easy_encoder_init, - level, metadata_level, header, footer); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/easy_single.c b/src/liblzma/common/easy_single.c deleted file mode 100644 index e2fa4e13..00000000 --- a/src/liblzma/common/easy_single.c +++ /dev/null @@ -1,37 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file easy_single.c -/// \brief Easy Single-Block Stream encoder initialization -// -// Copyright (C) 2008 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "easy_common.h" - - -extern LZMA_API lzma_ret -lzma_easy_encoder_single(lzma_stream *strm, lzma_easy_level level) -{ - lzma_options_stream opt_stream = { - .check = LZMA_CHECK_CRC32, - .has_crc32 = true, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .alignment = 0, - }; - - if (lzma_easy_set_filters(opt_stream.filters, level)) - return LZMA_HEADER_ERROR; - - return lzma_stream_encoder_single(strm, &opt_stream); -} diff --git a/src/liblzma/common/extra.c b/src/liblzma/common/extra.c deleted file mode 100644 index c532abb0..00000000 --- a/src/liblzma/common/extra.c +++ /dev/null @@ -1,34 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file extra.c -/// \brief Handling of Extra in Metadata -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API void -lzma_extra_free(lzma_extra *extra, lzma_allocator *allocator) -{ - while (extra != NULL) { - lzma_extra *tmp = extra->next; - lzma_free(extra->data, allocator); - lzma_free(extra, allocator); - extra = tmp; - } - - return; -} diff --git a/src/liblzma/common/features.c b/src/liblzma/common/features.c index 33b2e0a2..a02949d9 100644 --- a/src/liblzma/common/features.c +++ b/src/liblzma/common/features.c @@ -21,10 +21,6 @@ static const lzma_vli filters[] = { -#ifdef HAVE_FILTER_COPY - LZMA_FILTER_COPY, -#endif - #ifdef HAVE_FILTER_SUBBLOCK LZMA_FILTER_SUBBLOCK, #endif diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c index 515f9346..498b2ad6 100644 --- a/src/liblzma/common/filter_flags_decoder.c +++ b/src/liblzma/common/filter_flags_decoder.c @@ -21,362 +21,188 @@ #include "lzma_decoder.h" -struct lzma_coder_s { - lzma_options_filter *options; - - enum { - SEQ_MISC, - SEQ_ID, - SEQ_SIZE, - SEQ_PROPERTIES, - } sequence; - - /// \brief Position in variable-length integers - size_t pos; - - /// \brief Size of Filter Properties - lzma_vli properties_size; -}; - - #ifdef HAVE_FILTER_SUBBLOCK static lzma_ret -properties_subblock(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in lzma_attribute((unused)), - size_t *in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused))) +properties_subblock(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props lzma_attribute((unused)), + size_t prop_size lzma_attribute((unused))) { - if (coder->properties_size != 0) + if (prop_size != 0) return LZMA_HEADER_ERROR; - coder->options->options = lzma_alloc( + options->options = lzma_alloc( sizeof(lzma_options_subblock), allocator); - if (coder->options->options == NULL) + if (options->options == NULL) return LZMA_MEM_ERROR; - ((lzma_options_subblock *)(coder->options->options)) - ->allow_subfilters = true; - return LZMA_STREAM_END; + ((lzma_options_subblock *)(options->options))->allow_subfilters = true; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_SIMPLE static lzma_ret -properties_simple(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_simple(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size == 0) - return LZMA_STREAM_END; + if (prop_size == 0) + return LZMA_OK; - if (coder->properties_size != 4) + if (prop_size != 4) return LZMA_HEADER_ERROR; - lzma_options_simple *options = coder->options->options; - - if (options == NULL) { - options = lzma_alloc(sizeof(lzma_options_simple), allocator); - if (options == NULL) - return LZMA_MEM_ERROR; - - options->start_offset = 0; - coder->options->options = options; - } - - while (coder->pos < 4) { - if (*in_pos == in_size) - return LZMA_OK; + lzma_options_simple *simple = lzma_alloc( + sizeof(lzma_options_simple), allocator); + if (simple == NULL) + return LZMA_MEM_ERROR; - options->start_offset - |= (uint32_t)(in[*in_pos]) << (8 * coder->pos); - ++*in_pos; - ++coder->pos; - } + simple->start_offset = integer_read_32(props); // Don't leave an options structure allocated if start_offset is zero. - if (options->start_offset == 0) { - lzma_free(options, allocator); - coder->options->options = NULL; - } + if (simple->start_offset == 0) + lzma_free(simple, allocator); + else + options->options = simple; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_DELTA static lzma_ret -properties_delta(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_delta(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { - if (coder->properties_size != 1) + if (prop_size != 1) return LZMA_HEADER_ERROR; - if (*in_pos == in_size) - return LZMA_OK; - - lzma_options_delta *options = lzma_alloc( - sizeof(lzma_options_delta), allocator); - if (options == NULL) + options->options = lzma_alloc(sizeof(lzma_options_delta), allocator); + if (options->options == NULL) return LZMA_MEM_ERROR; - coder->options->options = options; - - options->distance = (uint32_t)(in[*in_pos]) + 1; - ++*in_pos; + ((lzma_options_delta *)(options->options))->distance + = (uint32_t)(props[0]) + 1; - return LZMA_STREAM_END; + return LZMA_OK; } #endif #ifdef HAVE_FILTER_LZMA static lzma_ret -properties_lzma(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *in, size_t *in_pos, size_t in_size) +properties_lzma(lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *props, size_t prop_size) { // LZMA properties are always two bytes (at least for now). - if (coder->properties_size != 2) + if (prop_size != 2) return LZMA_HEADER_ERROR; - assert(coder->pos < 2); - - while (*in_pos < in_size) { - switch (coder->pos) { - case 0: - // Allocate the options structure. - coder->options->options = lzma_alloc( - sizeof(lzma_options_lzma), allocator); - if (coder->options->options == NULL) - return LZMA_MEM_ERROR; - - // Decode lc, lp, and pb. - if (lzma_lzma_decode_properties( - coder->options->options, in[*in_pos])) - return LZMA_HEADER_ERROR; - - ++*in_pos; - ++coder->pos; - break; - - case 1: { - lzma_options_lzma *options = coder->options->options; - - // Check that reserved bits are unset. - if (in[*in_pos] & 0xC0) - return LZMA_HEADER_ERROR; - - // Decode the dictionary size. See the file format - // specification section 4.3.4.2 to understand this. - if (in[*in_pos] == 0) { - options->dictionary_size = 1; - - } else if (in[*in_pos] > 59) { - // Dictionary size is over 1 GiB. - // It's not supported at the moment. - return LZMA_HEADER_ERROR; -# if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 -# error Update the if()-condition a few lines -# error above to match LZMA_DICTIONARY_SIZE_MAX. -# endif - - } else { - options->dictionary_size - = 2 | ((in[*in_pos] + 1) & 1); - options->dictionary_size - <<= (in[*in_pos] - 1) / 2; - } - - ++*in_pos; - return LZMA_STREAM_END; - } - } - } + lzma_options_lzma *lzma = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (lzma == NULL) + return LZMA_MEM_ERROR; + + // Decode lc, lp, and pb. + if (lzma_lzma_decode_properties(lzma, props[0])) + goto error; + + // Check that reserved bits are unset. + if (props[1] & 0xC0) + goto error; + + // Decode the dictionary size. + // FIXME The specification says that maximum is 4 GiB. + if (props[1] > 36) + goto error; +#if LZMA_DICTIONARY_SIZE_MAX != UINT32_C(1) << 30 +# error Update the if()-condition a few lines +# error above to match LZMA_DICTIONARY_SIZE_MAX. +#endif + + lzma->dictionary_size = 2 | (props[1] & 1); + lzma->dictionary_size <<= props[1] / 2 + 11; - assert(coder->pos < 2); + options->options = lzma; return LZMA_OK; + +error: + lzma_free(lzma, allocator); + return LZMA_HEADER_ERROR; } #endif -static lzma_ret -filter_flags_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) +extern LZMA_API lzma_ret +lzma_filter_flags_decode( + lzma_options_filter *options, lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) { - while (*in_pos < in_size || coder->sequence == SEQ_PROPERTIES) - switch (coder->sequence) { - case SEQ_MISC: - // Determine the Filter ID and Size of Filter Properties. - if (in[*in_pos] >= 0xE0) { - // Using External ID. Prepare the ID - // for variable-length integer parsing. - coder->options->id = 0; - - if (in[*in_pos] == 0xFF) { - // Mark that Size of Filter Properties is - // unknown, so we know later that there is - // external Size of Filter Properties present. - coder->properties_size - = LZMA_VLI_VALUE_UNKNOWN; - } else { - // Take Size of Filter Properties from Misc. - coder->properties_size = in[*in_pos] - 0xE0; - } - - coder->sequence = SEQ_ID; - - } else { - // The Filter ID is the same as Misc. - coder->options->id = in[*in_pos]; - - // The Size of Filter Properties can be calculated - // from Misc too. - coder->properties_size = in[*in_pos] / 0x20; - - coder->sequence = SEQ_PROPERTIES; - } - - ++*in_pos; - break; + // Set the pointer to NULL so the caller can always safely free it. + options->options = NULL; - case SEQ_ID: { - const lzma_ret ret = lzma_vli_decode(&coder->options->id, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; - - if (coder->properties_size == LZMA_VLI_VALUE_UNKNOWN) { - // We have also external Size of Filter - // Properties. Prepare the size for - // variable-length integer parsing. - coder->properties_size = 0; - coder->sequence = SEQ_SIZE; - } else { - coder->sequence = SEQ_PROPERTIES; - } - - // Reset pos for its next job. - coder->pos = 0; - break; - } + // Filter ID + return_if_error(lzma_vli_decode(&options->id, NULL, + in, in_pos, in_size)); - case SEQ_SIZE: { - const lzma_ret ret = lzma_vli_decode(&coder->properties_size, - &coder->pos, in, in_pos, in_size); - if (ret != LZMA_STREAM_END) - return ret; + // Size of Properties + lzma_vli prop_size; + return_if_error(lzma_vli_decode(&prop_size, NULL, + in, in_pos, in_size)); - coder->pos = 0; - coder->sequence = SEQ_PROPERTIES; - break; - } + // Check that we have enough input. + if (prop_size > in_size - *in_pos) + return LZMA_DATA_ERROR; - case SEQ_PROPERTIES: { - lzma_ret (*get_properties)(lzma_coder *coder, - lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size); + // Determine the function to decode the properties. + lzma_ret (*get_properties)(lzma_options_filter *options, + lzma_allocator *allocator, const uint8_t *props, + size_t prop_size); - switch (coder->options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return coder->properties_size > 0 - ? LZMA_HEADER_ERROR : LZMA_STREAM_END; -#endif + switch (options->id) { #ifdef HAVE_FILTER_SUBBLOCK - case LZMA_FILTER_SUBBLOCK: - get_properties = &properties_subblock; - break; + case LZMA_FILTER_SUBBLOCK: + get_properties = &properties_subblock; + break; #endif #ifdef HAVE_FILTER_SIMPLE # ifdef HAVE_FILTER_X86 - case LZMA_FILTER_X86: + case LZMA_FILTER_X86: # endif # ifdef HAVE_FILTER_POWERPC - case LZMA_FILTER_POWERPC: + case LZMA_FILTER_POWERPC: # endif # ifdef HAVE_FILTER_IA64 - case LZMA_FILTER_IA64: + case LZMA_FILTER_IA64: # endif # ifdef HAVE_FILTER_ARM - case LZMA_FILTER_ARM: + case LZMA_FILTER_ARM: # endif # ifdef HAVE_FILTER_ARMTHUMB - case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_ARMTHUMB: # endif # ifdef HAVE_FILTER_SPARC - case LZMA_FILTER_SPARC: + case LZMA_FILTER_SPARC: # endif - get_properties = &properties_simple; - break; + get_properties = &properties_simple; + break; #endif #ifdef HAVE_FILTER_DELTA - case LZMA_FILTER_DELTA: - get_properties = &properties_delta; - break; + case LZMA_FILTER_DELTA: + get_properties = &properties_delta; + break; #endif #ifdef HAVE_FILTER_LZMA - case LZMA_FILTER_LZMA: - get_properties = &properties_lzma; - break; + case LZMA_FILTER_LZMA: + get_properties = &properties_lzma; + break; #endif - default: - return LZMA_HEADER_ERROR; - } - - return get_properties(coder, allocator, in, in_pos, in_size); - } - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -filter_flags_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_filter_flags_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_options_filter *options) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &filter_flags_decode; - next->end = &filter_flags_decoder_end; + return LZMA_HEADER_ERROR; } - options->id = 0; - options->options = NULL; - - next->coder->options = options; - next->coder->sequence = SEQ_MISC; - next->coder->pos = 0; - next->coder->properties_size = 0; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_filter_flags_decoder(lzma_stream *strm, lzma_options_filter *options) -{ - lzma_next_strm_init(strm, lzma_filter_flags_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; + const uint8_t *props = in + *in_pos; + *in_pos += prop_size; + return get_properties(options, allocator, props, prop_size); } diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c index 2d11dd3a..45fbbb00 100644 --- a/src/liblzma/common/filter_flags_encoder.c +++ b/src/liblzma/common/filter_flags_encoder.c @@ -22,22 +22,13 @@ #include "fastpos.h" -/// \brief Calculates the size of the Filter Properties field -/// -/// This currently can return only LZMA_OK or LZMA_HEADER_ERROR, but -/// with some new filters it may return also LZMA_PROG_ERROR. +/// Calculate the size of the Filter Properties field static lzma_ret get_properties_size(uint32_t *size, const lzma_options_filter *options) { lzma_ret ret = LZMA_OK; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - *size = 0; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: *size = 0; @@ -100,40 +91,14 @@ get_properties_size(uint32_t *size, const lzma_options_filter *options) extern LZMA_API lzma_ret lzma_filter_flags_size(uint32_t *size, const lzma_options_filter *options) { - // Get size of Filter Properties. + // Get size of Filter Properties. This also validates the Filter ID. uint32_t prop_size; - const lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Size of Filter ID field if it exists. - size_t id_size; - size_t prop_size_size; - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - id_size = 0; - prop_size_size = 0; - - } else { - // At least Filter ID is stored using the External ID field. - id_size = lzma_vli_size(options->id); - if (id_size == 0) - return LZMA_PROG_ERROR; - - if (prop_size <= 30) { - // Size of Filter Properties fits into Misc still. - prop_size_size = 0; - } else { - // The Size of Filter Properties field is used too. - prop_size_size = lzma_vli_size(prop_size); - if (prop_size_size == 0) - return LZMA_PROG_ERROR; - } - } + return_if_error(get_properties_size(&prop_size, options)); - // 1 is for the Misc field. - *size = 1 + id_size + prop_size_size + prop_size; + // Calculate the size of the Filter ID and Size of Properties fields. + // These cannot fail since get_properties_size() already succeeded. + *size = lzma_vli_size(options->id) + lzma_vli_size(prop_size) + + prop_size; return LZMA_OK; } @@ -149,10 +114,10 @@ properties_simple(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_OK; if (out_size - *out_pos < 4) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; - for (size_t i = 0; i < 4; ++i) - out[(*out_pos)++] = options->start_offset >> (i * 8); + integer_write_32(out + *out_pos, options->start_offset); + *out_pos += 4; return LZMA_OK; } @@ -175,7 +140,7 @@ properties_delta(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_HEADER_ERROR; if (out_size - *out_pos < 1) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; out[*out_pos] = options->distance - LZMA_DELTA_DISTANCE_MIN; ++*out_pos; @@ -195,7 +160,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, return LZMA_PROG_ERROR; if (out_size - *out_pos < 2) - return LZMA_BUF_ERROR; + return LZMA_PROG_ERROR; // LZMA Properties if (lzma_lzma_encode_properties(options, out + *out_pos)) @@ -230,7 +195,7 @@ properties_lzma(uint8_t *out, size_t *out_pos, size_t out_size, ++d; // Get the highest two bits using the proper encoding: - out[*out_pos] = get_pos_slot(d) - 1; + out[*out_pos] = get_pos_slot(d) - 24; ++*out_pos; return LZMA_OK; @@ -250,58 +215,19 @@ lzma_filter_flags_encode(uint8_t *out, size_t *out_pos, size_t out_size, // Get size of Filter Properties. uint32_t prop_size; - lzma_ret ret = get_properties_size(&prop_size, options); - if (ret != LZMA_OK) - return ret; - - // Misc, External ID, and Size of Properties - if (options->id < 0xE0 - && (lzma_vli)(prop_size) == options->id / 0x20) { - // ID and Size of Filter Properties fit into Misc. - out[*out_pos] = options->id; - ++*out_pos; - - } else if (prop_size <= 30) { - // Size of Filter Properties fits into Misc. - out[*out_pos] = prop_size + 0xE0; - ++*out_pos; - - // External ID is used to encode the Filter ID. If encoding - // the VLI fails, it's because the caller has given as too - // little output space, which it should have checked already. - // So return LZMA_PROG_ERROR, not LZMA_BUF_ERROR. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - } else { - // Nothing fits into Misc. - out[*out_pos] = 0xFF; - ++*out_pos; - - // External ID is used to encode the Filter ID. - size_t dummy = 0; - if (lzma_vli_encode(options->id, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - - // External Size of Filter Properties - dummy = 0; - if (lzma_vli_encode(prop_size, &dummy, 1, - out, out_pos, out_size) != LZMA_STREAM_END) - return LZMA_PROG_ERROR; - } + return_if_error(get_properties_size(&prop_size, options)); + + // Filter ID + return_if_error(lzma_vli_encode(options->id, NULL, + out, out_pos, out_size)); + + // Size of Properties + return_if_error(lzma_vli_encode(prop_size, NULL, + out, out_pos, out_size)); // Filter Properties + lzma_ret ret; switch (options->id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - assert(prop_size == 0); - ret = options->options == NULL ? LZMA_OK : LZMA_HEADER_ERROR; - break; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: assert(prop_size == 0); diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index 6816b37a..f01206de 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file index.c -/// \brief Handling of Index in Metadata +/// \brief Handling of Index // // Copyright (C) 2007 Lasse Collin // @@ -17,124 +17,733 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "common.h" +#include "index.h" -/** - * \brief Duplicates an Index list - * - * \return A copy of the Index list, or NULL if memory allocation - * failed or the original Index was empty. - */ -extern LZMA_API lzma_index * -lzma_index_dup(const lzma_index *old_current, lzma_allocator *allocator) +/// Number of Records to allocate at once. +#define INDEX_GROUP_SIZE 256 + + +typedef struct lzma_index_group_s lzma_index_group; +struct lzma_index_group_s { + /// Next group + lzma_index_group *prev; + + /// Previous group + lzma_index_group *next; + + /// Index of the last Record in this group + size_t last; + + /// Total Size fields as cumulative sum relative to the beginning + /// of the group. The total size of the group is total_sums[last]. + lzma_vli total_sums[INDEX_GROUP_SIZE]; + + /// Uncompressed Size fields as cumulative sum relative to the + /// beginning of the group. The uncompressed size of the group is + /// uncompressed_sums[last]. + lzma_vli uncompressed_sums[INDEX_GROUP_SIZE]; + + /// True if the Record is padding + bool paddings[INDEX_GROUP_SIZE]; +}; + + +struct lzma_index_s { + /// Total size of the Blocks and padding + lzma_vli total_size; + + /// Uncompressed size of the Stream + lzma_vli uncompressed_size; + + /// Number of non-padding records. This is needed by Index encoder. + lzma_vli count; + + /// Size of the List of Records field; this is updated every time + /// a new non-padding Record is added. + lzma_vli index_list_size; + + /// This is zero if no Indexes have been combined with + /// lzma_index_cat(). With combined Indexes, this contains the sizes + /// of all but latest the Streams, including possible Stream Padding + /// fields. + lzma_vli padding_size; + + /// First group of Records + lzma_index_group *head; + + /// Last group of Records + lzma_index_group *tail; + + /// Tracking the read position + struct { + /// Group where the current read position is. + lzma_index_group *group; + + /// The most recently read record in *group + lzma_vli record; + + /// Uncompressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli uncompressed_offset; + + /// Compressed offset of the beginning of *group relative + /// to the beginning of the Stream + lzma_vli stream_offset; + } current; + + /// Information about earlier Indexes when multiple Indexes have + /// been combined. + struct { + /// Sum of the Record counts of the all but the last Stream. + lzma_vli count; + + /// Sum of the List of Records fields of all but the last + /// Stream. This is needed when a new Index is concatenated + /// to this lzma_index structure. + lzma_vli index_list_size; + } old; +}; + + +static void +free_index_list(lzma_index *i, lzma_allocator *allocator) { - lzma_index *new_head = NULL; - lzma_index *new_current = NULL; + lzma_index_group *g = i->head; - while (old_current != NULL) { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) { - lzma_index_free(new_head, allocator); - return NULL; - } + while (g != NULL) { + lzma_index_group *tmp = g->next; + lzma_free(g, allocator); + g = tmp; + } - i->total_size = old_current->total_size; - i->uncompressed_size = old_current->uncompressed_size; - i->next = NULL; + return; +} - if (new_head == NULL) - new_head = i; - else - new_current->next = i; - new_current = i; - old_current = old_current->next; +extern LZMA_API lzma_index * +lzma_index_init(lzma_index *i, lzma_allocator *allocator) +{ + if (i == NULL) { + i = lzma_alloc(sizeof(lzma_index), allocator); + if (i == NULL) + return NULL; + } else { + free_index_list(i, allocator); } - return new_head; + i->total_size = 0; + i->uncompressed_size = 0; + i->count = 0; + i->index_list_size = 0; + i->padding_size = 0; + i->head = NULL; + i->tail = NULL; + i->current.group = NULL; + i->old.count = 0; + i->old.index_list_size = 0; + + return i; } -/** - * \brief Frees an Index list - * - * All Index Recors in the list are freed. This function is convenient when - * getting rid of lzma_metadata structures containing an Index. - */ extern LZMA_API void -lzma_index_free(lzma_index *i, lzma_allocator *allocator) +lzma_index_end(lzma_index *i, lzma_allocator *allocator) { - while (i != NULL) { - lzma_index *tmp = i->next; + if (i != NULL) { + free_index_list(i, allocator); lzma_free(i, allocator); - i = tmp; } return; } -/** - * \brief Calculates properties of an Index list - * - * - */ -extern LZMA_API lzma_ret -lzma_index_count(const lzma_index *i, size_t *count, - lzma_vli *lzma_restrict total_size, - lzma_vli *lzma_restrict uncompressed_size) -{ - *count = 0; - *total_size = 0; - *uncompressed_size = 0; - - while (i != NULL) { - if (i->total_size == LZMA_VLI_VALUE_UNKNOWN) { - *total_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->total_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*total_size != LZMA_VLI_VALUE_UNKNOWN) { - *total_size += i->total_size; - if (*total_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; +extern LZMA_API lzma_vli +lzma_index_count(const lzma_index *i) +{ + return i->count; +} + + +extern LZMA_API lzma_vli +lzma_index_size(const lzma_index *i) +{ + return index_size(i->count, i->index_list_size); +} + + +extern LZMA_API lzma_vli +lzma_index_total_size(const lzma_index *i) +{ + return i->total_size; +} + + +extern LZMA_API lzma_vli +lzma_index_stream_size(const lzma_index *i) +{ + // Stream Header + Blocks + Index + Stream Footer + return LZMA_STREAM_HEADER_SIZE + i->total_size + + index_size(i->count, i->index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + + +extern LZMA_API lzma_vli +lzma_index_file_size(const lzma_index *i) +{ + // If multiple Streams are concatenated, the Stream Header, Index, + // and Stream Footer fields of all but the last Stream are already + // included in padding_size. Thus, we need to calculate only the + // size of the last Index, not all Indexes. + return i->total_size + i->padding_size + + index_size(i->count - i->old.count, + i->index_list_size - i->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; +} + + +extern LZMA_API lzma_vli +lzma_index_uncompressed_size(const lzma_index *i) +{ + return i->uncompressed_size; +} + + +extern uint32_t +lzma_index_padding_size(const lzma_index *i) +{ + return (LZMA_VLI_C(4) + - index_size_unpadded(i->count, i->index_list_size)) & 3; +} + + +/// Helper function for index_append() +static lzma_ret +index_append_real(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size, + bool is_padding) +{ + // Add the new record. + if (i->tail == NULL || i->tail->last == INDEX_GROUP_SIZE - 1) { + // Allocate a new group. + lzma_index_group *g = lzma_alloc(sizeof(lzma_index_group), + allocator); + if (g == NULL) + return LZMA_MEM_ERROR; + + // Initialize the group and set its first record. + g->prev = i->tail; + g->next = NULL; + g->last = 0; + g->total_sums[0] = total_size; + g->uncompressed_sums[0] = uncompressed_size; + g->paddings[0] = is_padding; + + // If this is the first group, make it the head. + if (i->head == NULL) + i->head = g; + else + i->tail->next = g; + + // Make it the new tail. + i->tail = g; + + } else { + // i->tail has space left for at least one record. + i->tail->total_sums[i->tail->last + 1] + = i->tail->total_sums[i->tail->last] + + total_size; + i->tail->uncompressed_sums[i->tail->last + 1] + = i->tail->uncompressed_sums[i->tail->last] + + uncompressed_size; + i->tail->paddings[i->tail->last + 1] = is_padding; + ++i->tail->last; + } + + return LZMA_OK; +} + + +static lzma_ret +index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, + lzma_vli uncompressed_size, bool is_padding) +{ + if (total_size > LZMA_VLI_VALUE_MAX + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + // This looks a bit ugly. We want to first validate that the Index + // and Stream stay in valid limits after adding this Record. After + // validating, we may need to allocate a new lzma_index_group (it's + // slightly more correct to validate before allocating, YMMV). + lzma_ret ret; + + if (is_padding) { + assert(uncompressed_size == 0); + + // First update the info so we can validate it. + i->padding_size += total_size; + + if (i->padding_size > LZMA_VLI_VALUE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, true); + + // If something went wrong, undo the updated value. + if (ret != LZMA_OK) + i->padding_size -= total_size; + + } else { + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add + = lzma_vli_size(total_size / 4 - 1) + + lzma_vli_size(uncompressed_size); + + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; + + if (i->total_size > LZMA_VLI_VALUE_MAX + || i->uncompressed_size > LZMA_VLI_VALUE_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) + > LZMA_VLI_VALUE_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, + total_size, uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } + } + + return ret; +} + + +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli total_size, lzma_vli uncompressed_size) +{ + return index_append(i, allocator, + total_size, uncompressed_size, false); +} + + +/// Initialize i->current to point to the first Record. +static bool +init_current(lzma_index *i) +{ + if (i->head == NULL) { + assert(i->count == 0); + return true; + } + + assert(i->count > 0); + + i->current.group = i->head; + i->current.record = 0; + i->current.stream_offset = LZMA_STREAM_HEADER_SIZE; + i->current.uncompressed_offset = 0; - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - } else if (i->uncompressed_size > LZMA_VLI_VALUE_MAX) { - return LZMA_PROG_ERROR; - } else if (*uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - *uncompressed_size += i->uncompressed_size; - if (*uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; + return false; +} + + +/// Go backward to the previous group. +static void +previous_group(lzma_index *i) +{ + assert(i->current.group->prev != NULL); + + // Go to the previous group first. + i->current.group = i->current.group->prev; + i->current.record = i->current.group->last; + + // Then update the offsets. + i->current.stream_offset -= i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset -= i->current.group + ->uncompressed_sums[i->current.group->last]; + + return; +} + + +/// Go forward to the next group. +static void +next_group(lzma_index *i) +{ + assert(i->current.group->next != NULL); + + // Update the offsets first. + i->current.stream_offset += i->current.group + ->total_sums[i->current.group->last]; + i->current.uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.group->last]; + + // Then go to the next group. + i->current.record = 0; + i->current.group = i->current.group->next; + + return; +} + + +/// Set *info from i->current. +static void +set_info(const lzma_index *i, lzma_index_record *info) +{ + info->total_size = i->current.group->total_sums[i->current.record]; + info->uncompressed_size = i->current.group->uncompressed_sums[ + i->current.record]; + + info->stream_offset = i->current.stream_offset; + info->uncompressed_offset = i->current.uncompressed_offset; + + // If it's not the first Record in this group, we need to do some + // adjustements. + if (i->current.record > 0) { + // _sums[] are cumulative, thus we need to substract the + // _previous _sums[] to get the sizes of this Record. + info->total_size -= i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_size -= i->current.group + ->uncompressed_sums[i->current.record - 1]; + + // i->current.{total,uncompressed}_offsets have the offset + // of the beginning of the group, thus we need to add the + // appropriate amount to get the offsetes of this Record. + info->stream_offset += i->current.group + ->total_sums[i->current.record - 1]; + info->uncompressed_offset += i->current.group + ->uncompressed_sums[i->current.record - 1]; + } + + return; +} + + +extern LZMA_API lzma_bool +lzma_index_read(lzma_index *i, lzma_index_record *info) +{ + if (i->current.group == NULL) { + // We are at the beginning of the Record list. Set up + // i->current point at the first Record. Return if there + // are no Records. + if (init_current(i)) + return true; + } else do { + // Try to go the next Record. + if (i->current.record < i->current.group->last) + ++i->current.record; + else if (i->current.group->next == NULL) + return true; + else + next_group(i); + } while (i->current.group->paddings[i->current.record]); + + // We found a new Record. Set the information to *info. + set_info(i, info); + + return false; +} + + +extern LZMA_API void +lzma_index_rewind(lzma_index *i) +{ + i->current.group = NULL; + return; +} + + +extern LZMA_API lzma_bool +lzma_index_locate(lzma_index *i, lzma_index_record *info, lzma_vli target) +{ + // Check if it is possible to fullfill the request. + if (target >= i->uncompressed_size) + return true; + + // Now we know that we will have an answer. Initialize the current + // read position if needed. + if (i->current.group == NULL && init_current(i)) + return true; + + // Locate the group where the wanted Block is. First search forward. + while (i->current.uncompressed_offset <= target) { + // If the first uncompressed byte of the next group is past + // the target offset, it has to be this or an earlier group. + if (i->current.uncompressed_offset + i->current.group + ->uncompressed_sums[i->current.group->last] + > target) + break; + + // Go forward to the next group. + next_group(i); + } + + // Then search backward. + while (i->current.uncompressed_offset > target) + previous_group(i); + + // Now the target Block is somewhere in i->current.group. Offsets + // in groups are relative to the beginning of the group, thus + // we must adjust the target before starting the search loop. + assert(target >= i->current.uncompressed_offset); + target -= i->current.uncompressed_offset; + + // Use binary search to locate the exact Record. It is the first + // Record whose uncompressed_sums[] value is greater than target. + // This is because we want the rightmost Record that fullfills the + // search criterion. It is possible that there are empty Blocks or + // padding, we don't want to return them. + size_t left = 0; + size_t right = i->current.group->last; + + while (left < right) { + const size_t pos = left + (right - left) / 2; + if (i->current.group->uncompressed_sums[pos] <= target) + left = pos + 1; + else + right = pos; + } + + i->current.record = left; + +#ifndef NDEBUG + // The found Record must not be padding or have zero uncompressed size. + assert(!i->current.group->paddings[i->current.record]); + + if (i->current.record == 0) + assert(i->current.group->uncompressed_sums[0] > 0); + else + assert(i->current.group->uncompressed_sums[i->current.record] + - i->current.group->uncompressed_sums[ + i->current.record - 1] > 0); +#endif + + set_info(i, info); + + return false; +} + + +extern LZMA_API lzma_ret +lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, + lzma_allocator *allocator, lzma_vli padding) +{ + if (dest == NULL || src == NULL || dest == src + || padding > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Check that the combined size of the Indexes stays within limits. + { + const lzma_vli dest_size = lzma_index_file_size(dest); + const lzma_vli src_size = lzma_index_file_size(src); + if (dest_size + src_size > LZMA_VLI_VALUE_UNKNOWN + || dest_size + src_size + padding + > LZMA_VLI_VALUE_UNKNOWN) + return LZMA_DATA_ERROR; + } + + // Add a padding Record to take into account the size of + // Index + Stream Footer + Stream Padding + Stream Header. + // + // NOTE: This cannot overflow, because Index Size is always + // far smaller than LZMA_VLI_VALUE_MAX, and adding two VLIs + // (Index Size and padding) doesn't overflow. It may become + // an invalid VLI if padding is huge, but that is caught by + // index_append(). + padding += index_size(dest->count - dest->old.count, + dest->index_list_size + - dest->old.index_list_size) + + LZMA_STREAM_HEADER_SIZE * 2; + + // Add the padding Record. + return_if_error(index_append( + dest, allocator, padding, 0, true)); + + // Avoid wasting lots of memory if src->head has only a few records + // that fit into dest->tail. That is, combine two groups if possible. + // + // NOTE: We know that dest->tail != NULL since we just appended + // a padding Record. But we don't know about src->head. + if (src->head != NULL && src->head->last + 1 + <= INDEX_GROUP_SIZE - dest->tail->last - 1) { + // Copy the first Record. + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[0]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[dest->tail->last] + + src->head->uncompressed_sums[0]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[0]; + + ++dest->tail->last; + + // Copy the rest. + for (size_t i = 1; i < src->head->last; ++i) { + dest->tail->total_sums[dest->tail->last + 1] + = dest->tail->total_sums[dest->tail->last] + + src->head->total_sums[i + 1] + - src->head->total_sums[i]; + + dest->tail->uncompressed_sums[dest->tail->last + 1] + = dest->tail->uncompressed_sums[ + dest->tail->last] + + src->head->uncompressed_sums[i + 1] + - src->head->uncompressed_sums[i]; + + dest->tail->paddings[dest->tail->last + 1] + = src->head->paddings[i + 1]; + + ++dest->tail->last; } - ++*count; - i = i->next; + // Free the head group of *src. Don't bother updating prev + // pointers since those won't be used for anything before + // we deallocate the whole *src structure. + lzma_index_group *tmp = src->head; + src->head = src->head->next; + lzma_free(tmp, allocator); + } + + // If there are groups left in *src, join them as is. Note that if we + // are combining already combined Indexes, src->head can be non-NULL + // even if we just combined the old src->head to dest->tail. + if (src->head != NULL) { + src->head->prev = dest->tail; + dest->tail->next = src->head; + dest->tail = src->tail; } - // FIXME ? - if (*total_size == LZMA_VLI_VALUE_UNKNOWN - || *uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - return LZMA_HEADER_ERROR; + // Update information about earlier Indexes. Only the last Index + // from *src won't be counted in dest->old. The last Index is left + // open and can be even appended with lzma_index_append(). + dest->old.count = dest->count + src->old.count; + dest->old.index_list_size + = dest->index_list_size + src->old.index_list_size; + + // Update overall information. + dest->total_size += src->total_size; + dest->uncompressed_size += src->uncompressed_size; + dest->count += src->count; + dest->index_list_size += src->index_list_size; + dest->padding_size += src->padding_size; + + // *src has nothing left but the base structure. + lzma_free(src, allocator); return LZMA_OK; } +extern LZMA_API lzma_index * +lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) +{ + lzma_index *dest = lzma_alloc(sizeof(lzma_index), allocator); + if (dest == NULL) + return NULL; + + // Copy the base structure except the pointers. + *dest = *src; + dest->head = NULL; + dest->tail = NULL; + dest->current.group = NULL; + + // Copy the Records. + const lzma_index_group *src_group = src->head; + while (src_group != NULL) { + // Allocate a new group. + lzma_index_group *dest_group = lzma_alloc( + sizeof(lzma_index_group), allocator); + if (dest_group == NULL) { + lzma_index_end(dest, allocator); + return NULL; + } + + // Set the pointers. + dest_group->prev = dest->tail; + dest_group->next = NULL; + + if (dest->head == NULL) + dest->head = dest_group; + else + dest->tail->next = dest_group; + + dest->tail = dest_group; + + dest_group->last = src_group->last; + + // Copy the arrays so that we don't read uninitialized memory. + const size_t count = src_group->last + 1; + memcpy(dest_group->total_sums, src_group->total_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->uncompressed_sums, + src_group->uncompressed_sums, + sizeof(lzma_vli) * count); + memcpy(dest_group->paddings, src_group->paddings, + sizeof(bool) * count); + + // Copy also the read position. + if (src_group == src->current.group) + dest->current.group = dest->tail; + + src_group = src_group->next; + } + + return dest; +} + extern LZMA_API lzma_bool -lzma_index_is_equal(const lzma_index *a, const lzma_index *b) +lzma_index_equal(const lzma_index *a, const lzma_index *b) { - while (a != NULL && b != NULL) { - if (a->total_size != b->total_size || a->uncompressed_size - != b->uncompressed_size) + // No point to compare more if the pointers are the same. + if (a == b) + return true; + + // Compare the basic properties. + if (a->total_size != b->total_size + || a->uncompressed_size != b->uncompressed_size + || a->index_list_size != b->index_list_size + || a->count != b->count) + return false; + + // Compare the Records. + const lzma_index_group *ag = a->head; + const lzma_index_group *bg = b->head; + while (ag != NULL && bg != NULL) { + const size_t count = ag->last + 1; + if (ag->last != bg->last + || memcmp(ag->total_sums, + bg->total_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->uncompressed_sums, + bg->uncompressed_sums, + sizeof(lzma_vli) * count) != 0 + || memcmp(ag->paddings, bg->paddings, + sizeof(bool) * count) != 0) return false; - a = a->next; - b = b->next; + ag = ag->next; + bg = bg->next; } - return a == b; + return ag == NULL && bg == NULL; } diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h new file mode 100644 index 00000000..303ad43a --- /dev/null +++ b/src/liblzma/common/index.h @@ -0,0 +1,67 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index.h +/// \brief Handling of Index +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_H +#define LZMA_INDEX_H + +#include "common.h" + + +/// Maximum encoded value of Total Size. +#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_VALUE_MAX / 4 - 1) + +/// Convert the real Total Size value to a value that is stored to the Index. +#define total_size_encode(size) ((size) / 4 - 1) + +/// Convert the encoded Total Size value from Index to the real Total Size. +#define total_size_decode(size) (((size) + 1) * 4) + + +/// Get the size of the Index Padding field. This is needed by Index encoder +/// and decoder, but applications should have no use for this. +extern uint32_t lzma_index_padding_size(const lzma_index *i); + + +static inline lzma_vli +index_size_unpadded(lzma_vli count, lzma_vli index_list_size) +{ + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + lzma_vli_size(count) + index_list_size + 4; +} + + +static inline lzma_vli +index_size(lzma_vli count, lzma_vli index_list_size) +{ + // Round up to a mulitiple of four. + return (index_size_unpadded(count, index_list_size) + 3) + & ~LZMA_VLI_C(3); +} + + +static inline lzma_vli +index_stream_size( + lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +{ + return LZMA_STREAM_HEADER_SIZE + total_size + + index_size(count, index_list_size) + + LZMA_STREAM_HEADER_SIZE; +} + +#endif diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c new file mode 100644 index 00000000..1635948c --- /dev/null +++ b/src/liblzma/common/index_decoder.c @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Target Index + lzma_index *index; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Total Size field + lzma_vli total_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_TOTAL + ? &coder->total_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_TOTAL) { + // Validate that encoded Total Size isn't too big. + if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + // Convert the encoded Total Size to the real + // Total Size. + coder->total_size = total_size_decode( + coder->total_size); + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->total_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + // We always allocate a new lzma_index. + *i = lzma_index_init(NULL, allocator); + if (*i == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = *i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + lzma_next_coder_init(index_decoder_init, next, allocator, i); +} +*/ + + +extern LZMA_API lzma_ret +lzma_index_decoder(lzma_stream *strm, lzma_index **i) +{ + lzma_next_strm_init(strm, index_decoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c new file mode 100644 index 00000000..5a7d8c8c --- /dev/null +++ b/src/liblzma/common/index_encoder.c @@ -0,0 +1,222 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.c +/// \brief Encodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index_encoder.h" +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_NEXT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Index given to us to encode. Note that we modify it in sense that + /// we read it, and read position is tracked in lzma_index structure. + lzma_index *index; + + /// The current Index Record being encoded + lzma_index_record record; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in lzma_attribute((unused)), + size_t *restrict in_pos lzma_attribute((unused)), + size_t in_size lzma_attribute((unused)), + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action lzma_attribute((unused))) +{ + // Position where to start calculating CRC32. The idea is that we + // need to call lzma_crc32() only once per call to index_encode(). + const size_t out_start = *out_pos; + + // Return value to use if we return at the end of this function. + // We use "goto out" to jump out of the while-switch construct + // instead of returning directly, because that way we don't need + // to copypaste the lzma_crc32() call to many places. + lzma_ret ret = LZMA_OK; + + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + out[*out_pos] = 0x00; + ++*out_pos; + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + const lzma_vli index_count = lzma_index_count(coder->index); + ret = lzma_vli_encode(index_count, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = SEQ_NEXT; + break; + } + + case SEQ_NEXT: + if (lzma_index_read(coder->index, &coder->record)) { + // Get the size of the Index Padding field. + coder->pos = lzma_index_padding_size(coder->index); + assert(coder->pos <= 3); + coder->sequence = SEQ_PADDING; + break; + } + + // Total Size must be a multiple of four. + if (coder->record.total_size & 3) + return LZMA_PROG_ERROR; + + coder->sequence = SEQ_TOTAL; + + // Fall through + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + const lzma_vli size = coder->sequence == SEQ_TOTAL + ? total_size_encode(coder->record.total_size) + : coder->record.uncompressed_size; + + ret = lzma_vli_encode(size, &coder->pos, + out, out_pos, out_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT. + ++coder->sequence; + break; + } + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + out[(*out_pos)++] = 0x00; + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + // We don't use the main loop, because we don't want + // coder->crc32 to be touched anymore. + do { + if (*out_pos == out_size) + return LZMA_OK; + + out[*out_pos] = (coder->crc32 >> (coder->pos * 8)) + & 0xFF; + ++*out_pos; + + } while (++coder->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32. + coder->crc32 = lzma_crc32(out + out_start, + *out_pos - out_start, coder->crc32); + + return ret; +} + + +static void +index_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_encode; + next->end = &index_encoder_end; + } + + lzma_index_rewind(i); + + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_index_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index *i) +{ + lzma_next_coder_init(index_encoder_init, next, allocator, i); +} + + +extern LZMA_API lzma_ret +lzma_index_encoder(lzma_stream *strm, lzma_index *i) +{ + lzma_next_strm_init(strm, index_encoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/index_encoder.h b/src/liblzma/common/index_encoder.h new file mode 100644 index 00000000..0087c284 --- /dev/null +++ b/src/liblzma/common/index_encoder.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_encoder.h +/// \brief Encodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_INDEX_ENCODER_H +#define LZMA_INDEX_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next, + lzma_allocator *allocator, lzma_index *i); + + +#endif diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c new file mode 100644 index 00000000..35dea41f --- /dev/null +++ b/src/liblzma/common/index_hash.c @@ -0,0 +1,340 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_hash.c +/// \brief Validates Index by using a hash function +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" +#include "index.h" +#include "check.h" + + +typedef struct { + /// Sum of the Total Size fields + lzma_vli total_size; + + /// Sum of the Uncompressed Size fields + lzma_vli uncompressed_size; + + /// Number of Records + lzma_vli count; + + /// Size of the List of Index Records as bytes + lzma_vli index_list_size; + + /// Check calculated from Total Sizes and Uncompressed Sizes. + lzma_check check; + +} lzma_index_hash_info; + + +struct lzma_index_hash_s { + enum { + SEQ_BLOCK, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Information collected while decoding the actual Blocks. + lzma_index_hash_info blocks; + + /// Information collected from the Index field. + lzma_index_hash_info records; + + /// Number of Records not fully decoded + lzma_vli remaining; + + /// Total Size currently being read from an Index Record. + lzma_vli total_size; + + /// Uncompressed Size currently being read from an Index Record. + lzma_vli uncompressed_size; + + /// Position in variable-length integers when decoding them from + /// the List of Records. + size_t pos; + + /// CRC32 of the Index + uint32_t crc32; +}; + + +extern LZMA_API lzma_index_hash * +lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + if (index_hash == NULL) { + index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); + if (index_hash == NULL) + return NULL; + } + + index_hash->sequence = SEQ_BLOCK; + index_hash->blocks.total_size = 0; + index_hash->blocks.uncompressed_size = 0; + index_hash->blocks.count = 0; + index_hash->blocks.index_list_size = 0; + index_hash->records.total_size = 0; + index_hash->records.uncompressed_size = 0; + index_hash->records.count = 0; + index_hash->records.index_list_size = 0; + index_hash->total_size = 0; + index_hash->uncompressed_size = 0; + index_hash->pos = 0; + index_hash->crc32 = 0; + + // These cannot fail because LZMA_CHECK_BEST is known to be supported. + (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); + (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); + + return index_hash; +} + + +extern LZMA_API void +lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator) +{ + lzma_free(index_hash, allocator); + return; +} + + +extern LZMA_API lzma_vli +lzma_index_hash_size(const lzma_index_hash *index_hash) +{ + // Get the size of the Index from ->blocks instead of ->records for + // cases where application wants to know the Index Size before + // decoding the Index. + return index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size); +} + + +/// Updates the sizes and the hash without any validation. +static lzma_ret +hash_append(lzma_index_hash_info *info, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + info->total_size += total_size; + info->uncompressed_size += uncompressed_size; + info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + + lzma_vli_size(uncompressed_size); + ++info->count; + + const lzma_vli sizes[2] = { total_size, uncompressed_size }; + lzma_check_update(&info->check, LZMA_CHECK_BEST, + (const uint8_t *)(sizes), sizeof(sizes)); + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, + lzma_vli uncompressed_size) +{ + // Validate the arguments. + if (index_hash->sequence != SEQ_BLOCK || total_size == 0 || + total_size > LZMA_VLI_VALUE_MAX || (total_size & 3) + || uncompressed_size > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; + + // Update the hash. + return_if_error(hash_append(&index_hash->blocks, + total_size, uncompressed_size)); + + // Validate the properties of *info are still in allowed limits. + if (index_hash->blocks.total_size > LZMA_VLI_VALUE_MAX + || index_hash->blocks.uncompressed_size + > LZMA_VLI_VALUE_MAX + || index_size(index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_BACKWARD_SIZE_MAX + || index_stream_size(index_hash->blocks.total_size, + index_hash->blocks.count, + index_hash->blocks.index_list_size) + > LZMA_VLI_VALUE_MAX) + return LZMA_DATA_ERROR; + + return LZMA_OK; +} + + +extern LZMA_API lzma_ret +lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, + size_t *in_pos, size_t in_size) +{ + // Catch zero input buffer here, because in contrast to Index encoder + // and decoder functions, applications call this function directly + // instead of via lzma_code(), which does the buffer checking. + if (*in_pos >= in_size) + return LZMA_BUF_ERROR; + + // NOTE: This function has many similarities to index_encode() and + // index_decode() functions found from index_encoder.c and + // index_decoder.c. See the comments especially in index_encoder.c. + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (index_hash->sequence) { + case SEQ_BLOCK: + // Check the Index Indicator is present. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + index_hash->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&index_hash->remaining, + &index_hash->pos, in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + // The count must match the count of the Blocks decoded. + if (index_hash->remaining != index_hash->blocks.count) + return LZMA_DATA_ERROR; + + ret = LZMA_OK; + index_hash->pos = 0; + + // Handle the special case when there are no Blocks. + index_hash->sequence = index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = index_hash->sequence == SEQ_TOTAL + ? &index_hash->total_size + : &index_hash->uncompressed_size; + + ret = lzma_vli_decode(size, &index_hash->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + index_hash->pos = 0; + + if (index_hash->sequence == SEQ_TOTAL) { + if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + index_hash->total_size = total_size_decode( + index_hash->total_size); + + index_hash->sequence = SEQ_UNCOMPRESSED; + } else { + // Update the hash. + return_if_error(hash_append(&index_hash->records, + index_hash->total_size, + index_hash->uncompressed_size)); + + // Verify that we don't go over the known sizes. Note + // that this validation is simpler than the one used + // in lzma_index_hash_append(), because here we know + // that values in index_hash->blocks are already + // validated and we are fine as long as we don't + // exceed them in index_hash->records. + if (index_hash->blocks.total_size + < index_hash->records.total_size + || index_hash->blocks.uncompressed_size + < index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + < index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Check if this was the last Record. + index_hash->sequence = --index_hash->remaining == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( + index_hash->records.count, + index_hash->records.index_list_size)) & 3; + index_hash->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (index_hash->pos > 0) { + --index_hash->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Compare the sizes. + if (index_hash->blocks.total_size + != index_hash->records.total_size + || index_hash->blocks.uncompressed_size + != index_hash->records.uncompressed_size + || index_hash->blocks.index_list_size + != index_hash->records.index_list_size) + return LZMA_DATA_ERROR; + + // Finish the hashes and compare them. + lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); + lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); + if (memcmp(index_hash->blocks.check.buffer, + index_hash->records.check.buffer, + lzma_check_sizes[LZMA_CHECK_BEST]) != 0) + return LZMA_DATA_ERROR; + + // Finish the CRC32 calculation. + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + index_hash->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((index_hash->crc32 >> (index_hash->pos * 8)) + & 0xFF) != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++index_hash->pos < 4); + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + index_hash->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, index_hash->crc32); + + return ret; +} diff --git a/src/liblzma/common/info.c b/src/liblzma/common/info.c deleted file mode 100644 index ab7fc999..00000000 --- a/src/liblzma/common/info.c +++ /dev/null @@ -1,814 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file info.c -/// \brief Collects and verifies integrity of Stream size information -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -struct lzma_info_s { - struct { - /// Known Size of Header Metadata Block; here's some - /// special things: - /// - LZMA_VLI_VALUE_UNKNOWN indicates that we don't know - /// if Header Metadata Block is present. - /// - 0 indicates that Header Metadata Block is not present. - lzma_vli header_metadata_size; - - /// Known Total Size of the Data Blocks in the Stream - lzma_vli total_size; - - /// Known Uncompressed Size of the Data Blocks in the Stream - lzma_vli uncompressed_size; - - /// Known Size of Footer Metadata Block - lzma_vli footer_metadata_size; - } known; - - struct { - /// Sum of Total Size fields stored to the Index so far - lzma_vli total_size; - - /// Sum of Uncompressed Size fields stored to the Index so far - lzma_vli uncompressed_size; - - /// First Index Record in the list, or NULL if Index is empty. - lzma_index *head; - - /// Number of Index Records - size_t record_count; - - /// Number of Index Records - size_t incomplete_count; - - /// True when we know that no more Records will get added - /// to the Index. - bool is_final; - } index; - - /// Start offset of the Stream. This is needed to calculate - /// lzma_info_iter.stream_offset. - lzma_vli stream_start_offset; - - /// True if Index is present in Header Metadata Block - bool has_index_in_header_metadata; -}; - - -////////////////////// -// Create/Reset/End // -////////////////////// - -static void -index_init(lzma_info *info) -{ - info->index.total_size = 0; - info->index.uncompressed_size = 0; - info->index.head = NULL; - info->index.record_count = 0; - info->index.incomplete_count = 0; - info->index.is_final = false; - return; -} - - -static void -info_init(lzma_info *info) -{ - info->known.header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.total_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - info->known.footer_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - info->stream_start_offset = 0; - info->has_index_in_header_metadata = false; - - index_init(info); - - return; -} - - -extern LZMA_API lzma_info * -lzma_info_init(lzma_info *info, lzma_allocator *allocator) -{ - if (info == NULL) - info = lzma_alloc(sizeof(lzma_info), allocator); - else - lzma_index_free(info->index.head, allocator); - - if (info != NULL) - info_init(info); - - return info; -} - - -extern LZMA_API void -lzma_info_free(lzma_info *info, lzma_allocator *allocator) -{ - lzma_index_free(info->index.head, allocator); - lzma_free(info, allocator); - return; -} - - -///////// -// Set // -///////// - -static lzma_ret -set_size(lzma_vli new_size, lzma_vli *known_size, lzma_vli index_size, - bool forbid_zero) -{ - assert(new_size <= LZMA_VLI_VALUE_MAX); - - lzma_ret ret = LZMA_OK; - - if (forbid_zero && new_size == 0) - ret = LZMA_PROG_ERROR; - else if (index_size > new_size) - ret = LZMA_DATA_ERROR; - else if (*known_size == LZMA_VLI_VALUE_UNKNOWN) - *known_size = new_size; - else if (*known_size != new_size) - ret = LZMA_DATA_ERROR; - - return ret; -} - - -extern LZMA_API lzma_ret -lzma_info_size_set(lzma_info *info, lzma_info_size type, lzma_vli size) -{ - if (size > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - switch (type) { - case LZMA_INFO_STREAM_START: - info->stream_start_offset = size; - return LZMA_OK; - - case LZMA_INFO_HEADER_METADATA: - return set_size(size, &info->known.header_metadata_size, - 0, false); - - case LZMA_INFO_TOTAL: - return set_size(size, &info->known.total_size, - info->index.total_size, true); - - case LZMA_INFO_UNCOMPRESSED: - return set_size(size, &info->known.uncompressed_size, - info->index.uncompressed_size, false); - - case LZMA_INFO_FOOTER_METADATA: - return set_size(size, &info->known.footer_metadata_size, - 0, true); - } - - return LZMA_PROG_ERROR; -} - - -extern LZMA_API lzma_ret -lzma_info_index_set(lzma_info *info, lzma_allocator *allocator, - lzma_index *i_new, lzma_bool eat_index) -{ - if (i_new == NULL) - return LZMA_PROG_ERROR; - - lzma_index *i_old = info->index.head; - - if (i_old != NULL) { - while (true) { - // If the new Index has fewer Records than the old one, - // the new Index cannot be valid. - if (i_new == NULL) - return LZMA_DATA_ERROR; - - // The new Index must be complete i.e. no unknown - // values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_PROG_ERROR; - } - - // Compare the values from the new Index with the old - // Index. The old Index may be incomplete; in that - // case we - // - use the value from the new Index as is; - // - update the appropriate info->index.foo_size; and - // - decrease the count of incomplete Index Records. - bool was_incomplete = false; - - if (i_old->total_size == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->total_size = i_new->total_size; - - if (lzma_vli_add(info->index.total_size, - i_new->total_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->total_size != i_new->total_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (i_old->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) { - assert(!info->index.is_final); - was_incomplete = true; - - i_old->uncompressed_size - = i_new->uncompressed_size; - - if (lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_new, - allocator); - - return LZMA_PROG_ERROR; - } - } else if (i_old->uncompressed_size - != i_new->uncompressed_size) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - if (was_incomplete) { - assert(!info->index.is_final); - assert(info->index.incomplete_count > 0); - --info->index.incomplete_count; - } - - // Get rid of *i_new. It's now identical with *i_old. - lzma_index *tmp = i_new->next; - if (eat_index) - lzma_free(i_new, allocator); - - i_new = tmp; - - // We want to leave i_old pointing to the last - // Index Record in the old Index. This way we can - // concatenate the possible new Records from i_new. - if (i_old->next == NULL) - break; - - i_old = i_old->next; - } - } - - assert(info->index.incomplete_count == 0); - - // If Index was already known to be final, i_new must be NULL now. - // The new Index cannot contain more Records that we already have. - if (info->index.is_final) { - assert(info->index.head != NULL); - - if (i_new != NULL) { - if (eat_index) - lzma_index_free(i_new, allocator); - - return LZMA_DATA_ERROR; - } - - return LZMA_OK; - } - - // The rest of the new Index is merged to the old Index. Keep the - // current i_new pointer in available. We need it when merging the - // new Index with the old one, and if an error occurs so we can - // get rid of the broken part of the new Index. - lzma_index *i_start = i_new; - while (i_new != NULL) { - // The new Index must be complete i.e. no unknown values. - if (i_new->total_size > LZMA_VLI_VALUE_MAX - || i_new->uncompressed_size - > LZMA_VLI_VALUE_MAX) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - // Update info->index.foo_sizes. - if (lzma_vli_add(info->index.total_size, i_new->total_size) - || lzma_vli_add(info->index.uncompressed_size, - i_new->uncompressed_size)) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_PROG_ERROR; - } - - ++info->index.record_count; - i_new = i_new->next; - } - - // All the Records in the new Index are good, and info->index.foo_sizes - // were successfully updated. - if (lzma_info_index_finish(info) != LZMA_OK) { - if (eat_index) - lzma_index_free(i_start, allocator); - - return LZMA_DATA_ERROR; - } - - // The Index is ready to be merged. If we aren't supposed to eat - // the Index, make a copy of it first. - if (!eat_index && i_start != NULL) { - i_start = lzma_index_dup(i_start, allocator); - if (i_start == NULL) - return LZMA_MEM_ERROR; - } - - // Concatenate the new Index with the old one. Note that it is - // possible that we don't have any old Index. - if (info->index.head == NULL) - info->index.head = i_start; - else - i_old->next = i_start; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_metadata_set(lzma_info *info, lzma_allocator *allocator, - lzma_metadata *metadata, lzma_bool is_header_metadata, - lzma_bool eat_index) -{ - // Validate *metadata. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || !lzma_vli_is_valid(metadata->uncompressed_size)) { - if (eat_index) { - lzma_index_free(metadata->index, allocator); - metadata->index = NULL; - } - - return LZMA_PROG_ERROR; - } - - // Index - if (metadata->index != NULL) { - if (is_header_metadata) - info->has_index_in_header_metadata = true; - - const lzma_ret ret = lzma_info_index_set( - info, allocator, metadata->index, eat_index); - - if (eat_index) - metadata->index = NULL; - - if (ret != LZMA_OK) - return ret; - - } else if (!is_header_metadata - && (metadata->total_size == LZMA_VLI_VALUE_UNKNOWN - || !info->has_index_in_header_metadata)) { - // Either Total Size or Index must be present in Footer - // Metadata Block. If Index is not present, it must have - // already been in the Header Metadata Block. Since we - // got here, these conditions weren't met. - return LZMA_DATA_ERROR; - } - - // Size of Header Metadata - if (!is_header_metadata) - return_if_error(lzma_info_size_set( - info, LZMA_INFO_HEADER_METADATA, - metadata->header_metadata_size)); - - // Total Size - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_TOTAL, metadata->total_size)); - - // Uncompressed Size - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - return_if_error(lzma_info_size_set(info, - LZMA_INFO_UNCOMPRESSED, - metadata->uncompressed_size)); - - return LZMA_OK; -} - - -///////// -// Get // -///////// - -extern LZMA_API lzma_vli -lzma_info_size_get(const lzma_info *info, lzma_info_size type) -{ - switch (type) { - case LZMA_INFO_STREAM_START: - return info->stream_start_offset; - - case LZMA_INFO_HEADER_METADATA: - return info->known.header_metadata_size; - - case LZMA_INFO_TOTAL: - return info->known.total_size; - - case LZMA_INFO_UNCOMPRESSED: - return info->known.uncompressed_size; - - case LZMA_INFO_FOOTER_METADATA: - return info->known.footer_metadata_size; - } - - return LZMA_VLI_VALUE_UNKNOWN; -} - - -extern LZMA_API lzma_index * -lzma_info_index_get(lzma_info *info, lzma_bool detach) -{ - lzma_index *i = info->index.head; - - if (detach) - index_init(info); - - return i; -} - - -extern LZMA_API size_t -lzma_info_index_count_get(const lzma_info *info) -{ - return info->index.record_count; -} - - -///////////////// -// Incremental // -///////////////// - -enum { - ITER_INFO, - ITER_INDEX, - ITER_RESERVED_1, - ITER_RESERVED_2, -}; - - -#define iter_info ((lzma_info *)(iter->internal[ITER_INFO])) - -#define iter_index ((lzma_index *)(iter->internal[ITER_INDEX])) - - -extern LZMA_API void -lzma_info_iter_begin(lzma_info *info, lzma_info_iter *iter) -{ - *iter = (lzma_info_iter){ - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .stream_offset = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_offset = LZMA_VLI_VALUE_UNKNOWN, - .internal = { info, NULL, NULL, NULL }, - }; - - return; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_next(lzma_info_iter *iter, lzma_allocator *allocator) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL) { - // The first call after lzma_info_iter_begin(). - if (iter_info->known.header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - - iter->uncompressed_offset = 0; - - if (iter_info->index.head != NULL) { - // The first Index Record has already been allocated. - iter->internal[ITER_INDEX] = iter_info->index.head; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } else { - // Update iter->*_offsets. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->stream_offset, - iter_index->total_size)) - return LZMA_DATA_ERROR; - } - - if (iter->uncompressed_offset != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN) - iter->uncompressed_offset - = LZMA_VLI_VALUE_UNKNOWN; - else if (lzma_vli_add(iter->uncompressed_offset, - iter_index->uncompressed_size)) - return LZMA_DATA_ERROR; - } - - if (iter_index->next != NULL) { - // The next Record has already been allocated. - iter->internal[ITER_INDEX] = iter_index->next; - iter->total_size = iter_index->total_size; - iter->uncompressed_size - = iter_index->uncompressed_size; - return LZMA_OK; - } - } - - // Don't add new Records to a final Index. - if (iter_info->index.is_final) - return LZMA_DATA_ERROR; - - // Allocate and initialize a new Index Record. - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = LZMA_VLI_VALUE_UNKNOWN; - i->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - i->next = NULL; - - iter->total_size = LZMA_VLI_VALUE_UNKNOWN; - iter->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // Decide where to put the new Index Record. - if (iter_info->index.head == NULL) - iter_info->index.head = i; - - if (iter_index != NULL) - iter_index->next = i; - - iter->internal[ITER_INDEX] = i; - - ++iter_info->index.record_count; - ++iter_info->index.incomplete_count; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_set(lzma_info_iter *iter, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - // FIXME debug remove - lzma_info *info = iter_info; - (void)info; - - if (iter_index == NULL || !lzma_vli_is_valid(total_size) - || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - const bool was_incomplete = iter_index->total_size - == LZMA_VLI_VALUE_UNKNOWN - || iter_index->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - - if (total_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->total_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->total_size = total_size; - - if (lzma_vli_add(iter_info->index.total_size, - total_size) - || iter_info->index.total_size - > iter_info->known.total_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->total_size != total_size) { - return LZMA_DATA_ERROR; - } - } - - if (uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (iter_index->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { - iter_index->uncompressed_size = uncompressed_size; - - if (lzma_vli_add(iter_info->index.uncompressed_size, - uncompressed_size) - || iter_info->index.uncompressed_size - > iter_info->known.uncompressed_size) - return LZMA_DATA_ERROR; - - } else if (iter_index->uncompressed_size - != uncompressed_size) { - return LZMA_DATA_ERROR; - } - } - - // Check if the new information we got managed to finish this - // Index Record. If so, update the count of incomplete Index Records. - if (was_incomplete && iter_index->total_size - != LZMA_VLI_VALUE_UNKNOWN - && iter_index->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - assert(iter_info->index.incomplete_count > 0); - --iter_info->index.incomplete_count; - } - - // Make sure that the known sizes are now available in *iter. - iter->total_size = iter_index->total_size; - iter->uncompressed_size = iter_index->uncompressed_size; - - return LZMA_OK; -} - - -extern LZMA_API lzma_ret -lzma_info_index_finish(lzma_info *info) -{ - if (info->index.record_count == 0 || info->index.incomplete_count > 0 - || lzma_info_size_set(info, LZMA_INFO_TOTAL, - info->index.total_size) - || lzma_info_size_set(info, LZMA_INFO_UNCOMPRESSED, - info->index.uncompressed_size)) - return LZMA_DATA_ERROR; - - info->index.is_final = true; - - return LZMA_OK; -} - - -////////////// -// Locating // -////////////// - -extern LZMA_API lzma_vli -lzma_info_metadata_locate(const lzma_info *info, lzma_bool is_header_metadata) -{ - bool error = false; - lzma_vli size = 0; - - if (info->known.header_metadata_size == LZMA_VLI_VALUE_UNKNOWN) { - // We don't know if Header Metadata Block is present, thus - // we cannot locate it either. - // - // Well, you could say that just assume that it is present. - // I'm not sure if this is useful. But it can be useful to - // be able to use this function and get LZMA_VLI_VALUE_UNKNOWN - // to detect that Header Metadata Block wasn't present. - error = true; - } else if (is_header_metadata) { - error = lzma_vli_sum(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE); - } else if (!info->index.is_final) { - // Since we don't know if we have all the Index Records yet, - // we cannot know where the Footer Metadata Block is. - error = true; - } else { - error = lzma_vli_sum4(size, info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - info->known.header_metadata_size, - info->known.total_size); - } - - return error ? LZMA_VLI_VALUE_UNKNOWN : size; -} - - -extern LZMA_API uint32_t -lzma_info_metadata_alignment_get( - const lzma_info *info, lzma_bool is_header_metadata) -{ - uint32_t alignment; - - if (is_header_metadata) { - alignment = info->stream_start_offset - + LZMA_STREAM_HEADER_SIZE; - } else { - alignment = info->stream_start_offset + LZMA_STREAM_HEADER_SIZE - + info->known.header_metadata_size - + info->known.total_size; - } - - return alignment; -} - - -extern LZMA_API lzma_ret -lzma_info_iter_locate(lzma_info_iter *iter, lzma_allocator *allocator, - lzma_vli uncompressed_offset, lzma_bool allow_alloc) -{ - if (iter == NULL || uncompressed_offset > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - // Quick check in case Index is final. - if (iter_info->index.is_final) { - assert(iter_info->known.uncompressed_size - == iter_info->index.uncompressed_size); - if (uncompressed_offset >= iter_info->index.uncompressed_size) - return LZMA_DATA_ERROR; - } - - // TODO: Optimize so that it uses existing info from *iter when - // seeking forward. - - // Initialize *iter - if (iter_info->known.header_metadata_size != LZMA_VLI_VALUE_UNKNOWN) { - if (lzma_vli_sum3(iter->stream_offset, - iter_info->stream_start_offset, - LZMA_STREAM_HEADER_SIZE, - iter_info->known.header_metadata_size)) - return LZMA_PROG_ERROR; - } else { - // We don't know the Size of Header Metadata Block, thus - // we cannot calculate the Stream offset either. - iter->stream_offset = LZMA_VLI_VALUE_UNKNOWN; - } - - iter->uncompressed_offset = 0; - - // If we have no Index Records, it's obvious that we need to - // add a new one. - if (iter_info->index.head == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - return lzma_info_iter_next(iter, allocator); - } - - // Locate an appropriate Index Record. - lzma_index *i = iter_info->index.head; - while (true) { - // - If Uncompressed Size in the Record is unknown, - // we have no chance to search further. - // - If the next Record would go past the requested offset, - // we have found our target Data Block. - if (i->uncompressed_size == LZMA_VLI_VALUE_UNKNOWN - || iter->uncompressed_offset - + i->uncompressed_size > uncompressed_offset) { - iter->total_size = i->total_size; - iter->uncompressed_size = i->uncompressed_size; - iter->internal[ITER_INDEX] = i; - return LZMA_OK; - } - - // Update the stream offset. It may be unknown if we didn't - // know the size of Header Metadata Block. - if (iter->stream_offset != LZMA_VLI_VALUE_UNKNOWN) - if (lzma_vli_add(iter->stream_offset, i->total_size)) - return LZMA_PROG_ERROR; - - // Update the uncompressed offset. This cannot overflow since - // the Index is known to be valid. - iter->uncompressed_offset += i->uncompressed_size; - - // Move to the next Block. - if (i->next == NULL) { - assert(!iter_info->index.is_final); - if (!allow_alloc) - return LZMA_DATA_ERROR; - - iter->internal[ITER_INDEX] = i; - return lzma_info_iter_next(iter, allocator); - } - - i = i->next; - } -} diff --git a/src/liblzma/common/memory_usage.c b/src/liblzma/common/memory_usage.c index b6f27957..8244c404 100644 --- a/src/liblzma/common/memory_usage.c +++ b/src/liblzma/common/memory_usage.c @@ -28,7 +28,6 @@ get_usage(const lzma_options_filter *filter, bool is_encoder) uint64_t ret; switch (filter->id) { - case LZMA_FILTER_COPY: case LZMA_FILTER_X86: case LZMA_FILTER_POWERPC: case LZMA_FILTER_IA64: diff --git a/src/liblzma/common/metadata_decoder.c b/src/liblzma/common/metadata_decoder.c deleted file mode 100644 index 579b0a51..00000000 --- a/src/liblzma/common/metadata_decoder.c +++ /dev/null @@ -1,578 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.c -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_decoder.h" -#include "block_decoder.h" - - -/// Maximum size of a single Extra Record. Again, this is mostly to make -/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should -/// be smaller. -#define EXTRA_SIZE_MAX (SIZE_MAX / 4) - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_ALLOC, - SEQ_INDEX_TOTAL_SIZE, - SEQ_INDEX_UNCOMPRESSED_SIZE, - SEQ_EXTRA_PREPARE, - SEQ_EXTRA_ALLOC, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA_ALLOC, - SEQ_EXTRA_DATA_COPY, - SEQ_EXTRA_DUMMY_ALLOC, - SEQ_EXTRA_DUMMY_ID, - SEQ_EXTRA_DUMMY_SIZE, - SEQ_EXTRA_DUMMY_COPY, - } sequence; - - /// Number of "things" left to be parsed. If we hit end of input - /// when this isn't zero, we have corrupt Metadata Block. - size_t todo_count; - - /// Position in variable-length integers - size_t pos; - - /// Temporary variable needed to decode variables whose type - /// is size_t instead of lzma_vli. - lzma_vli tmp; - - /// Pointer to target structure to hold the parsed results. - lzma_metadata *metadata; - - /// The Index Record we currently are parsing - lzma_index *index_current; - - /// Number of Records in Index - size_t index_count; - - /// Sum of Total Size fields in the Index - lzma_vli index_total_size; - - /// Sum of Uncompressed Size fields in the Index - lzma_vli index_uncompressed_size; - - /// True if Extra is present. - bool has_extra; - - /// True if we have been requested to store the Extra to *metadata. - bool want_extra; - - /// Pointer to the end of the Extra Record list. - lzma_extra *extra_tail; - - /// Dummy Extra Record used when only verifying integrity of Extra - /// (not storing it to RAM). - lzma_extra extra_dummy; - - /// Block decoder - lzma_next_coder block_decoder; - - /// buffer[buffer_pos] is the next byte to process. - size_t buffer_pos; - - /// buffer[buffer_size] is the first byte to not process. - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is read before - /// it is parsed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -/// Reads a variable-length integer to coder->num. -#define read_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_decode( \ - &num, &coder->pos, \ - coder->buffer, &coder->buffer_pos, \ - coder->buffer_size); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder, lzma_allocator *allocator) -{ - while (coder->buffer_pos < coder->buffer_size) - switch (coder->sequence) { - case SEQ_FLAGS: - // Reserved bits must be unset. - if (coder->buffer[coder->buffer_pos] & 0x70) - return LZMA_HEADER_ERROR; - - coder->todo_count = 0; - - // If Size of Header Metadata is present, prepare the - // variable for variable-length integer decoding. Otherwise - // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the - // field isn't present. - if (coder->buffer[coder->buffer_pos] & 0x01) { - coder->metadata->header_metadata_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x02) { - coder->metadata->total_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x04) { - coder->metadata->uncompressed_size = 0; - ++coder->todo_count; - } - - if (coder->buffer[coder->buffer_pos] & 0x08) { - // Setting index_count to 1 is just to indicate that - // Index is present. The real size is parsed later. - coder->index_count = 1; - ++coder->todo_count; - } - - coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80) - != 0; - - ++coder->buffer_pos; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata->header_metadata_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->header_metadata_size); - - if (coder->metadata->header_metadata_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->total_size); - - if (coder->metadata->total_size == 0) - return LZMA_DATA_ERROR; - - --coder->todo_count; - } - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - read_vli(coder->metadata->uncompressed_size); - --coder->todo_count; - } - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - coder->sequence = SEQ_EXTRA_PREPARE; - break; - } - - read_vli(coder->tmp); - - // Index must not be empty nor far too big (wouldn't fit - // in RAM). - if (coder->tmp == 0 || coder->tmp - >= SIZE_MAX / sizeof(lzma_index)) - return LZMA_DATA_ERROR; - - coder->index_count = (size_t)(coder->tmp); - coder->tmp = 0; - - coder->sequence = SEQ_INDEX_ALLOC; - break; - - case SEQ_INDEX_ALLOC: { - lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator); - if (i == NULL) - return LZMA_MEM_ERROR; - - i->total_size = 0; - i->uncompressed_size = 0; - i->next = NULL; - - if (coder->metadata->index == NULL) - coder->metadata->index = i; - else - coder->index_current->next = i; - - coder->index_current = i; - - coder->sequence = SEQ_INDEX_TOTAL_SIZE; - } - - // Fall through - - case SEQ_INDEX_TOTAL_SIZE: { - read_vli(coder->index_current->total_size); - - coder->index_total_size += coder->index_current->total_size; - if (coder->index_total_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - // No Block can have Total Size of zero bytes. - if (coder->index_current->total_size == 0) - return LZMA_DATA_ERROR; - - if (--coder->index_count == 0) { - // If Total Size is present, it must match the sum - // of Total Sizes in Index. - if (coder->metadata->total_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->total_size - != coder->index_total_size) - return LZMA_DATA_ERROR; - - coder->index_current = coder->metadata->index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE; - } else { - coder->sequence = SEQ_INDEX_ALLOC; - } - - break; - } - - case SEQ_INDEX_UNCOMPRESSED_SIZE: { - read_vli(coder->index_current->uncompressed_size); - - coder->index_uncompressed_size - += coder->index_current->uncompressed_size; - if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX) - return LZMA_DATA_ERROR; - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - if (coder->metadata->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN - && coder->metadata->uncompressed_size - != coder->index_uncompressed_size) - return LZMA_DATA_ERROR; - - --coder->todo_count; - coder->sequence = SEQ_EXTRA_PREPARE; - } - - break; - } - - case SEQ_EXTRA_PREPARE: - assert(coder->todo_count == 0); - - // If we get here, we have at least one byte of input left. - // If "Extra is present" flag is unset in Metadata Flags, - // it means that there is some garbage and we return an error. - if (!coder->has_extra) - return LZMA_DATA_ERROR; - - if (!coder->want_extra) { - coder->extra_tail = &coder->extra_dummy; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - break; - } - - coder->sequence = SEQ_EXTRA_ALLOC; - - // Fall through - - case SEQ_EXTRA_ALLOC: { - lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator); - if (e == NULL) - return LZMA_MEM_ERROR; - - e->next = NULL; - e->id = 0; - e->size = 0; - e->data = NULL; - - if (coder->metadata->extra == NULL) - coder->metadata->extra = e; - else - coder->extra_tail->next = e; - - coder->extra_tail = e; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_ID; - } - - // Fall through - - case SEQ_EXTRA_ID: - case SEQ_EXTRA_DUMMY_ID: - read_vli(coder->extra_tail->id); - - if (coder->extra_tail->id == 0) { - coder->extra_tail->size = 0; - coder->extra_tail->data = NULL; - coder->todo_count = 0; - --coder->sequence; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_SIZE: - case SEQ_EXTRA_DUMMY_SIZE: - read_vli(coder->tmp); - - if (coder->tmp == 0) { - // We have no Data in the Extra Record. Don't - // allocate any memory for it. Go back to - // SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC. - coder->tmp = 0; - coder->sequence -= 2; - coder->todo_count = 0; - } else { - ++coder->sequence; - } - - break; - - case SEQ_EXTRA_DATA_ALLOC: { - if (coder->tmp > EXTRA_SIZE_MAX) - return LZMA_DATA_ERROR; - - coder->extra_tail->size = (size_t)(coder->tmp); - coder->tmp = 0; - - // We reserve space for the trailing '\0' too. - uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1, - allocator); - if (d == NULL) - return LZMA_MEM_ERROR; - - coder->extra_tail->data = d; - coder->sequence = SEQ_EXTRA_DATA_COPY; - } - - // Fall through - - case SEQ_EXTRA_DATA_COPY: - bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, - coder->extra_tail->data, &coder->pos, - (size_t)(coder->extra_tail->size)); - - if ((size_t)(coder->extra_tail->size) == coder->pos) { - coder->extra_tail->data[coder->pos] = '\0'; - coder->pos = 0; - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_ALLOC; - } - - break; - - case SEQ_EXTRA_DUMMY_ALLOC: - // Not really alloc, just initialize the dummy entry. - coder->extra_dummy = (lzma_extra){ - .next = NULL, - .id = 0, - .size = 0, - .data = NULL, - }; - - coder->todo_count = 1; - coder->sequence = SEQ_EXTRA_DUMMY_ID; - break; - - case SEQ_EXTRA_DUMMY_COPY: { - // Simply skip as many bytes as indicated by Extra Record Size. - // We don't check lzma_extra_size_max because we don't - // allocate any memory to hold the data. - const size_t in_avail = coder->buffer_size - coder->buffer_pos; - const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp); - coder->buffer_pos += skip; - coder->tmp -= skip; - - if (coder->tmp == 0) { - coder->todo_count = 0; - coder->sequence = SEQ_EXTRA_DUMMY_ALLOC; - } - - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - bool end_was_reached = false; - - while (true) { - // Fill the buffer if it is empty. - if (coder->buffer_pos == coder->buffer_size) { - coder->buffer_pos = 0; - coder->buffer_size = 0; - - const lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, coder->buffer, - &coder->buffer_size, LZMA_BUFFER_SIZE, - LZMA_RUN); - - switch (ret) { - case LZMA_OK: - // Return immediatelly if we got no new data. - if (coder->buffer_size == 0) - return LZMA_OK; - - break; - - case LZMA_STREAM_END: - end_was_reached = true; - break; - - default: - return ret; - } - } - - // Process coder->buffer. - const lzma_ret ret = process(coder, allocator); - if (ret != LZMA_OK) - return ret; - - // On success, process() eats all the input. - assert(coder->buffer_pos == coder->buffer_size); - - if (end_was_reached) { - // Check that the sequence is not in the - // middle of anything. - if (coder->todo_count != 0) - return LZMA_DATA_ERROR; - - // If Size of Header Metadata Block was not - // present, we use zero as its size instead - // of LZMA_VLI_VALUE_UNKNOWN. - if (coder->metadata->header_metadata_size - == LZMA_VLI_VALUE_UNKNOWN) - coder->metadata->header_metadata_size = 0; - - return LZMA_STREAM_END; - } - } -} - - -static void -metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_decode; - next->end = &metadata_decoder_end; - next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - } - - metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->total_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - metadata->index = NULL; - metadata->extra = NULL; - - next->coder->sequence = SEQ_FLAGS; - next->coder->todo_count = 1; - next->coder->pos = 0; - next->coder->tmp = 0; - next->coder->metadata = metadata; - next->coder->index_current = NULL; - next->coder->index_count = 0; - next->coder->index_total_size = 0; - next->coder->index_uncompressed_size = 0; - next->coder->want_extra = want_extra; - next->coder->extra_tail = NULL; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - return lzma_block_decoder_init( - &next->coder->block_decoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra) -{ - lzma_next_coder_init(metadata_decoder_init, next, allocator, - options, metadata, want_extra); -} - - -extern LZMA_API lzma_ret -lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options, - lzma_metadata *metadata, lzma_bool want_extra) -{ - lzma_next_strm_init(strm, lzma_metadata_decoder_init, - options, metadata, want_extra); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/metadata_decoder.h b/src/liblzma/common/metadata_decoder.h deleted file mode 100644 index 1fba2179..00000000 --- a/src/liblzma/common/metadata_decoder.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_decoder.h -/// \brief Decodes metadata stored in Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_METADATA_DECODER_H -#define LZMA_METADATA_DECODER_H - -#include "common.h" - - -extern lzma_ret lzma_metadata_decoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, lzma_metadata *metadata, - bool want_extra); - -#endif diff --git a/src/liblzma/common/metadata_encoder.c b/src/liblzma/common/metadata_encoder.c deleted file mode 100644 index 9f4a15b0..00000000 --- a/src/liblzma/common/metadata_encoder.c +++ /dev/null @@ -1,435 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_encoder.c -/// \brief Encodes metadata to be stored into Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "metadata_encoder.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_FLAGS, - SEQ_HEADER_METADATA_SIZE, - SEQ_TOTAL_SIZE, - SEQ_UNCOMPRESSED_SIZE, - SEQ_INDEX_COUNT, - SEQ_INDEX_TOTAL, - SEQ_INDEX_UNCOMPRESSED, - SEQ_EXTRA_ID, - SEQ_EXTRA_SIZE, - SEQ_EXTRA_DATA, - SEQ_END, - } sequence; - - /// Position in variable-length integers - size_t pos; - - /// Local copy of the Metadata structure. Note that we keep - /// a copy only of the main structure, not Index or Extra Records. - lzma_metadata metadata; - - /// Number of Records in Index - size_t index_count; - - /// Index Record currently being processed - const lzma_index *index_current; - - /// Block encoder for the encoded Metadata - lzma_next_coder block_encoder; - - /// True once everything except compression has been done. - bool end_was_reached; - - /// buffer[buffer_pos] is the first byte that needs to be compressed. - size_t buffer_pos; - - /// buffer[buffer_size] is the next position where a byte will be - /// written by process(). - size_t buffer_size; - - /// Temporary buffer to which encoded Metadata is written before - /// it is compressed. - uint8_t buffer[LZMA_BUFFER_SIZE]; -}; - - -#define write_vli(num) \ -do { \ - const lzma_ret ret = lzma_vli_encode(num, &coder->pos, 1, \ - coder->buffer, &coder->buffer_size, \ - LZMA_BUFFER_SIZE); \ - if (ret != LZMA_STREAM_END) \ - return ret; \ - coder->pos = 0; \ -} while (0) - - -static lzma_ret -process(lzma_coder *coder) -{ - while (coder->buffer_size < LZMA_BUFFER_SIZE) - switch (coder->sequence) { - case SEQ_FLAGS: - coder->buffer[coder->buffer_size] = 0; - - if (coder->metadata.header_metadata_size != 0) - coder->buffer[coder->buffer_size] |= 0x01; - - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x02; - - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - coder->buffer[coder->buffer_size] |= 0x04; - - if (coder->index_count > 0) - coder->buffer[coder->buffer_size] |= 0x08; - - if (coder->metadata.extra != NULL) - coder->buffer[coder->buffer_size] |= 0x80; - - ++coder->buffer_size; - coder->sequence = SEQ_HEADER_METADATA_SIZE; - break; - - case SEQ_HEADER_METADATA_SIZE: - if (coder->metadata.header_metadata_size != 0) - write_vli(coder->metadata.header_metadata_size); - - coder->sequence = SEQ_TOTAL_SIZE; - break; - - case SEQ_TOTAL_SIZE: - if (coder->metadata.total_size != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.total_size); - - coder->sequence = SEQ_UNCOMPRESSED_SIZE; - break; - - case SEQ_UNCOMPRESSED_SIZE: - if (coder->metadata.uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) - write_vli(coder->metadata.uncompressed_size); - - coder->sequence = SEQ_INDEX_COUNT; - break; - - case SEQ_INDEX_COUNT: - if (coder->index_count == 0) { - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - break; - } - - write_vli(coder->index_count); - coder->sequence = SEQ_INDEX_TOTAL; - break; - - case SEQ_INDEX_TOTAL: - write_vli(coder->index_current->total_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current == NULL) { - coder->index_current = coder->metadata.index; - coder->sequence = SEQ_INDEX_UNCOMPRESSED; - } - - break; - - case SEQ_INDEX_UNCOMPRESSED: - write_vli(coder->index_current->uncompressed_size); - - coder->index_current = coder->index_current->next; - if (coder->index_current != NULL) - break; - - if (coder->metadata.extra != NULL) { - coder->sequence = SEQ_EXTRA_ID; - break; - } - - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - - case SEQ_EXTRA_ID: { - const lzma_ret ret = lzma_vli_encode( - coder->metadata.extra->id, &coder->pos, 1, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->pos = 0; - - // Handle the special ID 0. - if (coder->metadata.extra->id == 0) { - coder->metadata.extra - = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->sequence = SEQ_EXTRA_ID; - - } else { - coder->sequence = SEQ_EXTRA_SIZE; - } - - break; - - default: - return ret; - } - - break; - } - - case SEQ_EXTRA_SIZE: - if (coder->metadata.extra->size >= (lzma_vli)(SIZE_MAX)) - return LZMA_HEADER_ERROR; - - write_vli(coder->metadata.extra->size); - coder->sequence = SEQ_EXTRA_DATA; - break; - - case SEQ_EXTRA_DATA: - bufcpy(coder->metadata.extra->data, &coder->pos, - coder->metadata.extra->size, - coder->buffer, &coder->buffer_size, - LZMA_BUFFER_SIZE); - - if ((size_t)(coder->metadata.extra->size) == coder->pos) { - coder->metadata.extra = coder->metadata.extra->next; - if (coder->metadata.extra == NULL) { - coder->sequence = SEQ_END; - return LZMA_STREAM_END; - } - - coder->pos = 0; - coder->sequence = SEQ_EXTRA_ID; - } - - break; - - case SEQ_END: - // Everything is encoded. Let the compression code finish - // its work now. - return LZMA_STREAM_END; - } - - return LZMA_OK; -} - - -static lzma_ret -metadata_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in lzma_attribute((unused)), - size_t *restrict in_pos lzma_attribute((unused)), - size_t in_size lzma_attribute((unused)), uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action lzma_attribute((unused))) -{ - while (!coder->end_was_reached) { - // Flush coder->buffer if it isn't empty. - if (coder->buffer_size > 0) { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, - coder->buffer_size, - out, out_pos, out_size, LZMA_RUN); - if (coder->buffer_pos < coder->buffer_size - || ret != LZMA_OK) - return ret; - - coder->buffer_pos = 0; - coder->buffer_size = 0; - } - - const lzma_ret ret = process(coder); - - switch (ret) { - case LZMA_OK: - break; - - case LZMA_STREAM_END: - coder->end_was_reached = true; - break; - - default: - return ret; - } - } - - // Finish - return coder->block_encoder.code(coder->block_encoder.coder, allocator, - coder->buffer, &coder->buffer_pos, coder->buffer_size, - out, out_pos, out_size, LZMA_FINISH); -} - - -static void -metadata_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - if (options == NULL || metadata == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &metadata_encode; - next->end = &metadata_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } - - next->coder->sequence = SEQ_FLAGS; - next->coder->pos = 0; - next->coder->metadata = *metadata; - next->coder->index_count = 0; - next->coder->index_current = metadata->index; - next->coder->end_was_reached = false; - next->coder->buffer_pos = 0; - next->coder->buffer_size = 0; - - // Count and validate the Index Records. - { - const lzma_index *i = metadata->index; - while (i != NULL) { - if (i->total_size > LZMA_VLI_VALUE_MAX - || i->uncompressed_size - > LZMA_VLI_VALUE_MAX) - return LZMA_PROG_ERROR; - - ++next->coder->index_count; - i = i->next; - } - } - - // Initialize the Block encoder. - return lzma_block_encoder_init( - &next->coder->block_encoder, allocator, options); -} - - -extern lzma_ret -lzma_metadata_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata) -{ - lzma_next_coder_init(metadata_encoder_init, next, allocator, - options, metadata); -} - - -extern LZMA_API lzma_ret -lzma_metadata_encoder(lzma_stream *strm, lzma_options_block *options, - const lzma_metadata *metadata) -{ - lzma_next_strm_init(strm, metadata_encoder_init, options, metadata); - - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} - - -extern LZMA_API lzma_vli -lzma_metadata_size(const lzma_metadata *metadata) -{ - lzma_vli size = 1; // Metadata Flags - - // Validate header_metadata_size, total_size, and uncompressed_size. - if (metadata->header_metadata_size > LZMA_VLI_VALUE_MAX - || !lzma_vli_is_valid(metadata->total_size) - || metadata->total_size == 0 - || !lzma_vli_is_valid(metadata->uncompressed_size)) - return 0; - - // Add the sizes of these three fields. - if (metadata->header_metadata_size != 0) - size += lzma_vli_size(metadata->header_metadata_size); - - if (metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->total_size); - - if (metadata->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - size += lzma_vli_size(metadata->uncompressed_size); - - // Index - if (metadata->index != NULL) { - const lzma_index *i = metadata->index; - size_t count = 1; - - do { - const size_t x = lzma_vli_size(i->total_size); - const size_t y = lzma_vli_size(i->uncompressed_size); - if (x == 0 || y == 0) - return 0; - - size += x + y; - ++count; - i = i->next; - - } while (i != NULL); - - const size_t tmp = lzma_vli_size(count); - if (tmp == 0) - return 0; - - size += tmp; - } - - // Extra - { - const lzma_extra *e = metadata->extra; - while (e != NULL) { - // Validate the numbers. - if (e->id > LZMA_VLI_VALUE_MAX - || e->size >= (lzma_vli)(SIZE_MAX)) - return 0; - - // Add the sizes. - size += lzma_vli_size(e->id); - if (e->id != 0) { - size += lzma_vli_size(e->size); - size += e->size; - } - - e = e->next; - } - } - - return size; -} diff --git a/src/liblzma/common/metadata_encoder.h b/src/liblzma/common/metadata_encoder.h deleted file mode 100644 index 20357fe6..00000000 --- a/src/liblzma/common/metadata_encoder.h +++ /dev/null @@ -1,30 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file metadata_encoder.h -/// \brief Encodes metadata to be stored into Metadata Blocks -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_METADATA_ENCODER_H -#define LZMA_METADATA_ENCODER_H - -#include "common.h" - - -extern lzma_ret lzma_metadata_encoder_init( - lzma_next_coder *next, lzma_allocator *allocator, - lzma_options_block *options, const lzma_metadata *metadata); - -#endif diff --git a/src/liblzma/common/raw_common.c b/src/liblzma/common/raw_common.c index d45bf4de..35252fc2 100644 --- a/src/liblzma/common/raw_common.c +++ b/src/liblzma/common/raw_common.c @@ -20,122 +20,81 @@ #include "raw_common.h" -/// \brief Prepares the filter chain -/// -/// Prepares the filter chain by setting uncompressed sizes for each filter, -/// and adding implicit Subblock filter when needed. -/// -/// \return true if error occurred, false on success. -/// -static bool -prepare(lzma_vli *id, lzma_vli *uncompressed_size, bool allow_implicit) +static lzma_ret +validate_options(const lzma_options_filter *options, size_t *count) { - bool needs_end_of_input = false; - - switch (id[0]) { - case LZMA_FILTER_COPY: - case LZMA_FILTER_X86: - case LZMA_FILTER_POWERPC: - case LZMA_FILTER_IA64: - case LZMA_FILTER_ARM: - case LZMA_FILTER_ARMTHUMB: - case LZMA_FILTER_SPARC: - case LZMA_FILTER_DELTA: - uncompressed_size[1] = uncompressed_size[0]; - needs_end_of_input = true; - break; - - case LZMA_FILTER_SUBBLOCK: - case LZMA_FILTER_LZMA: - // These change the size of the data unpredictably. - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - break; - - case LZMA_FILTER_SUBBLOCK_HELPER: - uncompressed_size[1] = uncompressed_size[0]; - break; - - default: - // Unknown filter. - return true; - } + if (options == NULL) + return LZMA_PROG_ERROR; - // Is this the last filter in the chain? - if (id[1] == LZMA_VLI_VALUE_UNKNOWN) { - if (needs_end_of_input && allow_implicit - && uncompressed_size[0] - == LZMA_VLI_VALUE_UNKNOWN) { - // Add implicit Subblock filter. - id[1] = LZMA_FILTER_SUBBLOCK; - uncompressed_size[1] = LZMA_VLI_VALUE_UNKNOWN; - id[2] = LZMA_VLI_VALUE_UNKNOWN; + // Number of non-last filters that may change the size of the data + // significantly (that is, more than 1-2 % or so). + size_t change = 0; + + // True if the last filter in the given chain is actually usable as + // the last filter. Only filters that support embedding End of Payload + // Marker can be used as the last filter in the chain. + bool last_ok = false; + + size_t i; + for (i = 0; options[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { + switch (options[i].id) { + // Not #ifdeffing these for simplicity. + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + case LZMA_FILTER_DELTA: + // These don't change the size of the data and cannot + // be used as the last filter in the chain. + last_ok = false; + break; + +#ifdef HAVE_FILTER_SUBBLOCK + case LZMA_FILTER_SUBBLOCK: + last_ok = true; + ++change; + break; +#endif + +#ifdef HAVE_FILTER_LZMA + case LZMA_FILTER_LZMA: + last_ok = true; + break; +#endif + + default: + return LZMA_HEADER_ERROR; } - - return false; } - return prepare(id + 1, uncompressed_size + 1, allow_implicit); + // There must be 1-4 filters and the last filter must be usable as + // the last filter in the chain. + if (i == 0 || i > 4 || !last_ok) + return LZMA_HEADER_ERROR; + + // At maximum of two non-last filters are allowed to change the + // size of the data. + if (change > 2) + return LZMA_HEADER_ERROR; + + *count = i; + return LZMA_OK; } extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder) + bool is_encoder) { - if (options == NULL || !lzma_vli_is_valid(uncompressed_size)) - return LZMA_PROG_ERROR; - - // Count the number of filters in the chain. - size_t count = 0; - while (options[count].id != LZMA_VLI_VALUE_UNKNOWN) - ++count; - - // Allocate enough space from the stack for IDs and uncompressed - // sizes. We need two extra: possible implicit Subblock and end - // of array indicator. - lzma_vli ids[count + 2]; - lzma_vli uncompressed_sizes[count + 2]; - bool using_implicit = false; - - uncompressed_sizes[0] = uncompressed_size; - - if (count == 0) { - if (!allow_implicit) - return LZMA_PROG_ERROR; - - count = 1; - using_implicit = true; - - // Special case: no filters were specified, so an implicit - // Copy or Subblock filter is used. - if (uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) - ids[0] = LZMA_FILTER_SUBBLOCK; - else - ids[0] = LZMA_FILTER_COPY; + // Do some basic validation and get the number of filters. + size_t count; + return_if_error(validate_options(options, &count)); - ids[1] = LZMA_VLI_VALUE_UNKNOWN; - - } else { - // Prepare the ids[] and uncompressed_sizes[]. - for (size_t i = 0; i < count; ++i) - ids[i] = options[i].id; - - ids[count] = LZMA_VLI_VALUE_UNKNOWN; - - if (prepare(ids, uncompressed_sizes, allow_implicit)) - return LZMA_HEADER_ERROR; - - // Check if implicit Subblock filter was added. - if (ids[count] != LZMA_VLI_VALUE_UNKNOWN) { - assert(ids[count] == LZMA_FILTER_SUBBLOCK); - ++count; - using_implicit = true; - } - } - - // Set the filter functions, and copy uncompressed sizes and options. + // Set the filter functions and copy the options pointer. lzma_filter_info filters[count + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { @@ -144,29 +103,20 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // of the uncompressed data. const size_t j = count - i - 1; - filters[j].init = get_function(ids[i]); + filters[j].init = get_function(options[i].id); if (filters[j].init == NULL) return LZMA_HEADER_ERROR; filters[j].options = options[i].options; - filters[j].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[0].options = NULL; - } else { for (size_t i = 0; i < count; ++i) { - filters[i].init = get_function(ids[i]); + filters[i].init = get_function(options[i].id); if (filters[i].init == NULL) return LZMA_HEADER_ERROR; filters[i].options = options[i].options; - filters[i].uncompressed_size = uncompressed_sizes[i]; } - - if (using_implicit) - filters[count - 1].options = NULL; } // Terminate the array. diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/common/raw_common.h index 172223cb..0a27f3dc 100644 --- a/src/liblzma/common/raw_common.h +++ b/src/liblzma/common/raw_common.h @@ -23,9 +23,8 @@ #include "common.h" extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, - lzma_allocator *allocator, - const lzma_options_filter *options, lzma_vli uncompressed_size, + lzma_allocator *allocator, const lzma_options_filter *options, lzma_init_function (*get_function)(lzma_vli id), - bool allow_implicit, bool is_encoder); + bool is_encoder); #endif diff --git a/src/liblzma/common/raw_decoder.c b/src/liblzma/common/raw_decoder.c index 03f1d847..4fb7111c 100644 --- a/src/liblzma/common/raw_decoder.c +++ b/src/liblzma/common/raw_decoder.c @@ -18,24 +18,17 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_decoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_decoder.h" #include "subblock_decoder_helper.h" #include "delta_decoder.h" #include "lzma_decoder.h" -#include "metadata_decoder.h" static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_decoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_decoder_init; @@ -93,12 +86,10 @@ get_function(lzma_vli id) extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) + const lzma_options_filter *options) { const lzma_ret ret = lzma_raw_coder_init(next, allocator, - options, uncompressed_size, &get_function, - allow_implicit, false); + options, &get_function, false); if (ret != LZMA_OK) lzma_next_coder_end(next, allocator); @@ -108,8 +99,7 @@ lzma_raw_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, extern LZMA_API lzma_ret -lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options) { return_if_error(lzma_strm_init(strm)); @@ -117,8 +107,7 @@ lzma_raw_decoder(lzma_stream *strm, const lzma_options_filter *options, strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, - strm->allocator, options, uncompressed_size, - &get_function, allow_implicit, false); + strm->allocator, options, &get_function, false); if (ret != LZMA_OK) lzma_end(strm); diff --git a/src/liblzma/common/raw_decoder.h b/src/liblzma/common/raw_decoder.h index 9d48074b..c0e626a8 100644 --- a/src/liblzma/common/raw_decoder.h +++ b/src/liblzma/common/raw_decoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/raw_encoder.c b/src/liblzma/common/raw_encoder.c index fb12862b..9b8cbfae 100644 --- a/src/liblzma/common/raw_encoder.c +++ b/src/liblzma/common/raw_encoder.c @@ -18,28 +18,16 @@ /////////////////////////////////////////////////////////////////////////////// #include "raw_encoder.h" -#include "copy_coder.h" #include "simple_coder.h" #include "subblock_encoder.h" #include "delta_encoder.h" #include "lzma_encoder.h" -struct lzma_coder_s { - lzma_next_coder next; - lzma_vli uncompressed_size; -}; - - static lzma_init_function get_function(lzma_vli id) { switch (id) { -#ifdef HAVE_FILTER_COPY - case LZMA_FILTER_COPY: - return &lzma_copy_encoder_init; -#endif - #ifdef HAVE_FILTER_SUBBLOCK case LZMA_FILTER_SUBBLOCK: return &lzma_subblock_encoder_init; @@ -90,91 +78,34 @@ get_function(lzma_vli id) } -static lzma_ret -raw_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) +extern lzma_ret +lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *options) { - // Check that our amount of input stays in proper limits. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if (action == LZMA_FINISH) { - if (coder->uncompressed_size != in_size - *in_pos) - return LZMA_PROG_ERROR; - } else { - if (coder->uncompressed_size < in_size - *in_pos) - return LZMA_PROG_ERROR; - } - } - - const size_t in_start = *in_pos; + const lzma_ret ret = lzma_raw_coder_init(next, allocator, + options, &get_function, true); - const lzma_ret ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, out, out_pos, out_size, action); - - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_size -= *in_pos - in_start; + if (ret != LZMA_OK) + lzma_next_coder_end(next, allocator); return ret; } -static void -raw_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &raw_encode; - next->end = &raw_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - } - - next->coder->uncompressed_size = uncompressed_size; - - // lzma_raw_coder_init() accesses get_function() via function pointer, - // because this way linker doesn't statically link both encoder and - // decoder functions if user needs only encoder or decoder. - return lzma_raw_coder_init(&next->coder->next, allocator, - options, uncompressed_size, - &get_function, allow_implicit, true); -} - - -extern lzma_ret -lzma_raw_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit) -{ - lzma_next_coder_init(raw_encoder_init, next, allocator, - options, uncompressed_size, allow_implicit); -} - - extern LZMA_API lzma_ret -lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options, - lzma_vli uncompressed_size, lzma_bool allow_implicit) +lzma_raw_encoder(lzma_stream *strm, const lzma_options_filter *options) { - lzma_next_strm_init(strm, raw_encoder_init, - options, uncompressed_size, allow_implicit); + return_if_error(lzma_strm_init(strm)); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FINISH] = true; - return LZMA_OK; + const lzma_ret ret = lzma_raw_coder_init(&strm->internal->next, + strm->allocator, options, &get_function, true); + + if (ret != LZMA_OK) + lzma_end(strm); + + return ret; } diff --git a/src/liblzma/common/raw_encoder.h b/src/liblzma/common/raw_encoder.h index b0aab61a..4e148489 100644 --- a/src/liblzma/common/raw_encoder.h +++ b/src/liblzma/common/raw_encoder.h @@ -24,7 +24,6 @@ extern lzma_ret lzma_raw_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_vli uncompressed_size, bool allow_implicit); + lzma_allocator *allocator, const lzma_options_filter *options); #endif diff --git a/src/liblzma/common/stream_common.h b/src/liblzma/common/stream_common.h index b2f37f37..4f83fc58 100644 --- a/src/liblzma/common/stream_common.h +++ b/src/liblzma/common/stream_common.h @@ -22,6 +22,9 @@ #include "common.h" +/// Size of the Stream Flags field +#define LZMA_STREAM_FLAGS_SIZE 2 + extern const uint8_t lzma_header_magic[6]; extern const uint8_t lzma_footer_magic[2]; diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 56de3d9f..1bf7f1f8 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -18,281 +18,148 @@ /////////////////////////////////////////////////////////////////////////////// #include "stream_common.h" +#include "stream_decoder.h" #include "check.h" #include "stream_flags_decoder.h" #include "block_decoder.h" -#include "metadata_decoder.h" struct lzma_coder_s { enum { - SEQ_STREAM_HEADER_CODE, - SEQ_BLOCK_HEADER_INIT, - SEQ_BLOCK_HEADER_CODE, - SEQ_METADATA_CODE, - SEQ_DATA_CODE, - SEQ_STREAM_TAIL_INIT, - SEQ_STREAM_TAIL_CODE, + SEQ_STREAM_HEADER, + SEQ_BLOCK_HEADER, + SEQ_BLOCK, + SEQ_INDEX, + SEQ_STREAM_FOOTER, } sequence; - /// Position in variable-length integers and in some other things. - size_t pos; - /// Block or Metadata decoder. This takes little memory and the same /// data structure can be used to decode every Block Header, so it's /// a good idea to have a separate lzma_next_coder structure for it. lzma_next_coder block_decoder; - /// Block Header decoder; this is separate - lzma_next_coder block_header_decoder; - + /// Block options decoded by the Block Header decoder and used by + /// the Block decoder. lzma_options_block block_options; - /// Information about the sizes of the Blocks - lzma_info *info; - - /// Current Block in *info - lzma_info_iter iter; - - /// Number of bytes not yet processed from Data Blocks in the Stream. - /// This can be LZMA_VLI_VALUE_UNKNOWN. If it is known, it is - /// decremented while decoding and verified to match the reality. - lzma_vli total_left; - - /// Like uncompressed_left above but for uncompressed data from - /// Data Blocks. - lzma_vli uncompressed_left; - /// Stream Flags from Stream Header - lzma_stream_flags header_flags; - - /// Stream Flags from Stream tail - lzma_stream_flags tail_flags; + lzma_stream_flags stream_flags; - /// Decoder for Stream Header and Stream tail. This takes very - /// little memory and the same data structure can be used for - /// both Header and tail, so it's a good idea to have a separate - /// lzma_next_coder structure for it. - lzma_next_coder flags_decoder; + /// Index is hashed so that it can be compared to the sizes of Blocks + /// with O(1) memory usage. + lzma_index_hash *index_hash; - /// Temporary destination for the decoded Metadata. - lzma_metadata metadata; + /// Write position in buffer[] + size_t buffer_pos; - /// Pointer to application-supplied pointer where to store the list - /// of Extra Records from the Header Metadata Block. - lzma_extra **header_extra; - - /// Same as above but Footer Metadata Block - lzma_extra **footer_extra; + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; }; -static lzma_ret -metadata_init(lzma_coder *coder, lzma_allocator *allocator) -{ - assert(coder->metadata.index == NULL); - assert(coder->metadata.extra == NULL); - - // Single-Block Streams don't have Metadata Blocks. - if (!coder->header_flags.is_multi) - return LZMA_DATA_ERROR; - - coder->block_options.total_limit = LZMA_VLI_VALUE_UNKNOWN; - - // Limit the Uncompressed Size of a Metadata Block. This is to - // prevent security issues where input file would have very huge - // Metadata. - // - // FIXME: Hardcoded constant is ugly. Maybe we should provide - // some way to specify this from the application. - coder->block_options.uncompressed_limit = LZMA_VLI_C(1) << 23; - - lzma_info_size size_type; - bool want_extra; - - // If we haven't decoded any Data Blocks yet, this is Header - // Metadata Block. - if (lzma_info_index_count_get(coder->info) == 0) { - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - size_type = LZMA_INFO_HEADER_METADATA; - want_extra = coder->header_extra != NULL; - } else { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - size_type = LZMA_INFO_FOOTER_METADATA; - want_extra = coder->footer_extra != NULL; - } - - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.total_size = lzma_info_size_get( - coder->info, size_type); - - coder->sequence = SEQ_METADATA_CODE; - - return lzma_metadata_decoder_init(&coder->block_decoder, allocator, - &coder->block_options, &coder->metadata, want_extra); -} - - -static lzma_ret -data_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(lzma_info_iter_set( - &coder->iter, LZMA_VLI_VALUE_UNKNOWN, - coder->block_options.uncompressed_size)); - - coder->block_options.total_size = coder->iter.total_size; - coder->block_options.uncompressed_size = coder->iter.uncompressed_size; - coder->block_options.total_limit = coder->total_left; - coder->block_options.uncompressed_limit = coder->uncompressed_left; - - if (coder->header_flags.is_multi) { - coder->block_options.has_uncompressed_size_in_footer = false; - coder->block_options.has_backward_size = false; - coder->block_options.handle_padding = true; - } else { - coder->block_options.has_uncompressed_size_in_footer - = coder->iter.uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; - coder->block_options.has_backward_size = true; - coder->block_options.handle_padding = false; - } - - coder->sequence = SEQ_DATA_CODE; - - return lzma_block_decoder_init(&coder->block_decoder, allocator, - &coder->block_options); -} - - static lzma_ret stream_decode(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { + // When decoding the actual Block, it may be able to produce more + // output even if we don't give it any new input. while (*out_pos < out_size && (*in_pos < in_size - || coder->sequence == SEQ_DATA_CODE)) + || coder->sequence == SEQ_BLOCK)) switch (coder->sequence) { - case SEQ_STREAM_HEADER_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + case SEQ_STREAM_HEADER: { + // Copy the Stream Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); + + // Return if we didn't get the whole Stream Header yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; + + coder->buffer_pos = 0; + + // Decode the Stream Header. + return_if_error(lzma_stream_header_decode( + &coder->stream_flags, coder->buffer)); - coder->sequence = SEQ_BLOCK_HEADER_INIT; + // Copy the type of the Check so that Block Header and Block + // decoders see it. + coder->block_options.check = coder->stream_flags.check; + + // Even if we return LZMA_UNSUPPORTED_CHECK below, we want + // to continue from Block Header decoding. + coder->sequence = SEQ_BLOCK_HEADER; // Detect if the Check type is supported and give appropriate // warning if it isn't. We don't warn every time a new Block // is started. - lzma_check tmp; - if (lzma_check_init(&tmp, coder->header_flags.check)) + if (!lzma_available_checks[coder->block_options.check]) return LZMA_UNSUPPORTED_CHECK; break; } - case SEQ_BLOCK_HEADER_INIT: { - coder->block_options.check = coder->header_flags.check; - coder->block_options.has_crc32 = coder->header_flags.has_crc32; + case SEQ_BLOCK_HEADER: { + if (coder->buffer_pos == 0) { + // Detect if it's Index. + if (in[*in_pos] == 0x00) { + coder->sequence = SEQ_INDEX; + break; + } - for (size_t i = 0; - i < ARRAY_SIZE(coder->block_options.filters); - ++i) { - lzma_free(coder->block_options.filters[i].options, - allocator); - coder->block_options.filters[i].options = NULL; + // Calculate the size of the Block Header. Note that + // Block Header decoder wants to see this byte too + // so don't advance *in_pos. + coder->block_options.header_size + = lzma_block_header_size_decode( + in[*in_pos]); } - return_if_error(lzma_block_header_decoder_init( - &coder->block_header_decoder, allocator, - &coder->block_options)); - - coder->sequence = SEQ_BLOCK_HEADER_CODE; - } - - // Fall through - - case SEQ_BLOCK_HEADER_CODE: { - lzma_ret ret = coder->block_header_decoder.code( - coder->block_header_decoder.coder, - allocator, in, in_pos, in_size, - NULL, NULL, 0, LZMA_RUN); - - if (ret != LZMA_STREAM_END) - return ret; + // Copy the Block Header to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + coder->block_options.header_size); - if (coder->block_options.is_metadata) - ret = metadata_init(coder, allocator); - else - ret = data_init(coder, allocator); - - if (ret != LZMA_OK) - return ret; - - break; - } + // Return if we didn't get the whole Block Header yet. + if (coder->buffer_pos < coder->block_options.header_size) + return LZMA_OK; - case SEQ_METADATA_CODE: { - lzma_ret ret = coder->block_decoder.code( - coder->block_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + coder->buffer_pos = 0; - const bool is_header_metadata = lzma_info_index_count_get( - coder->info) == 0; + // Set up a buffer to hold the filter chain. Block Header + // decoder will initialize all members of this array so + // we don't need to do it here. + lzma_options_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + coder->block_options.filters = filters; - if (is_header_metadata) { - if (coder->header_extra != NULL) { - *coder->header_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Decode the Block Header. + return_if_error(lzma_block_header_decode(&coder->block_options, + allocator, coder->buffer)); - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, - coder->block_options.total_size) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_BLOCK_HEADER_INIT; - } else { - if (coder->footer_extra != NULL) { - *coder->footer_extra = coder->metadata.extra; - coder->metadata.extra = NULL; - } + // Initialize the Block decoder. + const lzma_ret ret = lzma_block_decoder_init( + &coder->block_decoder, + allocator, &coder->block_options); - coder->sequence = SEQ_STREAM_TAIL_INIT; - } + // Free the allocated filter options since they are needed + // only to initialize the Block decoder. + for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + lzma_free(filters[i].options, allocator); - assert(coder->metadata.extra == NULL); + coder->block_options.filters = NULL; - ret = lzma_info_metadata_set(coder->info, allocator, - &coder->metadata, is_header_metadata, true); - if (ret != LZMA_OK) + // Check if Block enocoder initialization succeeded. Don't + // warn about unsupported check anymore since we did it + // earlier if it was needed. + if (ret != LZMA_OK && ret != LZMA_UNSUPPORTED_CHECK) return ret; - // Intialize coder->total_size and coder->uncompressed_size - // from Header Metadata. - if (is_header_metadata) { - coder->total_left = lzma_info_size_get( - coder->info, LZMA_INFO_TOTAL); - coder->uncompressed_left = lzma_info_size_get( - coder->info, LZMA_INFO_UNCOMPRESSED); - } - + coder->sequence = SEQ_BLOCK; break; } - case SEQ_DATA_CODE: { + case SEQ_BLOCK: { lzma_ret ret = coder->block_decoder.code( coder->block_decoder.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, @@ -301,62 +168,59 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; - ret = lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size); - if (ret != LZMA_OK) - return ret; - - // These won't overflow since lzma_info_iter_set() succeeded. - if (coder->total_left != LZMA_VLI_VALUE_UNKNOWN) - coder->total_left -= coder->block_options.total_size; - if (coder->uncompressed_left != LZMA_VLI_VALUE_UNKNOWN) - coder->uncompressed_left -= coder->block_options - .uncompressed_size; + // Block decoded successfully. Add the new size pair to + // the Index hash. + return_if_error(lzma_index_hash_append(coder->index_hash, + lzma_block_total_size_get( + &coder->block_options), + coder->block_options.uncompressed_size)); - if (!coder->header_flags.is_multi) { - ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - coder->sequence = SEQ_STREAM_TAIL_INIT; - break; - } - - coder->sequence = SEQ_BLOCK_HEADER_INIT; + coder->sequence = SEQ_BLOCK_HEADER; break; } - case SEQ_STREAM_TAIL_INIT: { - lzma_ret ret = lzma_info_index_finish(coder->info); - if (ret != LZMA_OK) - return ret; - - ret = lzma_stream_tail_decoder_init(&coder->flags_decoder, - allocator, &coder->tail_flags); - if (ret != LZMA_OK) + case SEQ_INDEX: { + // Decode the Index and compare it to the hash calculated + // from the sizes of the Blocks (if any). + const lzma_ret ret = lzma_index_hash_decode(coder->index_hash, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) return ret; - coder->sequence = SEQ_STREAM_TAIL_CODE; + coder->sequence = SEQ_STREAM_FOOTER; + break; } - // Fall through + case SEQ_STREAM_FOOTER: + // Copy the Stream Footer to the internal buffer. + bufcpy(in, in_pos, in_size, coder->buffer, &coder->buffer_pos, + LZMA_STREAM_HEADER_SIZE); - case SEQ_STREAM_TAIL_CODE: { - const lzma_ret ret = coder->flags_decoder.code( - coder->flags_decoder.coder, allocator, - in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; + // Return if we didn't get the whole Stream Footer yet. + if (coder->buffer_pos < LZMA_STREAM_HEADER_SIZE) + return LZMA_OK; - if (!lzma_stream_flags_is_equal( - coder->header_flags, coder->tail_flags)) + // Decode the Stream Footer. + lzma_stream_flags footer_flags; + return_if_error(lzma_stream_footer_decode( + &footer_flags, coder->buffer)); + + // Check that Index Size stored in the Stream Footer matches + // the real size of the Index field. + if (lzma_index_hash_size(coder->index_hash) + != footer_flags.backward_size) + return LZMA_DATA_ERROR; + + // Compare that the Stream Flags fields are identical in + // both Stream Header and Stream Footer. + if (!lzma_stream_flags_equal(&coder->stream_flags, + &footer_flags)) return LZMA_DATA_ERROR; return LZMA_STREAM_END; - } default: + assert(0); return LZMA_PROG_ERROR; } @@ -367,23 +231,15 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, static void stream_decoder_end(lzma_coder *coder, lzma_allocator *allocator) { - for (size_t i = 0; i < ARRAY_SIZE(coder->block_options.filters); ++i) - lzma_free(coder->block_options.filters[i].options, allocator); - lzma_next_coder_end(&coder->block_decoder, allocator); - lzma_next_coder_end(&coder->block_header_decoder, allocator); - lzma_next_coder_end(&coder->flags_decoder, allocator); - lzma_info_free(coder->info, allocator); - lzma_index_free(coder->metadata.index, allocator); - lzma_extra_free(coder->metadata.extra, allocator); + lzma_index_hash_end(coder->index_hash, allocator); lzma_free(coder, allocator); return; } static lzma_ret -stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -394,73 +250,35 @@ stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->end = &stream_decoder_end; next->coder->block_decoder = LZMA_NEXT_CODER_INIT; - next->coder->block_header_decoder = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - next->coder->flags_decoder = LZMA_NEXT_CODER_INIT; - next->coder->metadata.index = NULL; - next->coder->metadata.extra = NULL; - } else { - for (size_t i = 0; i < ARRAY_SIZE( - next->coder->block_options.filters); ++i) - lzma_free(next->coder->block_options - .filters[i].options, allocator); - - lzma_index_free(next->coder->metadata.index, allocator); - next->coder->metadata.index = NULL; - - lzma_extra_free(next->coder->metadata.extra, allocator); - next->coder->metadata.extra = NULL; + next->coder->index_hash = NULL; } - for (size_t i = 0; i < ARRAY_SIZE(next->coder->block_options.filters); - ++i) - next->coder->block_options.filters[i].options = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) + // Initialize the Index hash used to verify the Index. + next->coder->index_hash = lzma_index_hash_init( + next->coder->index_hash, allocator); + if (next->coder->index_hash == NULL) return LZMA_MEM_ERROR; - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - // Initialize Stream Header decoder. - return_if_error(lzma_stream_header_decoder_init( - &next->coder->flags_decoder, allocator, - &next->coder->header_flags)); - - // Reset the *foo_extra pointers to NULL. This way the caller knows - // if there were no Extra Records. (We don't support appending - // Records to Extra list.) - if (header != NULL) - *header = NULL; - if (footer != NULL) - *footer = NULL; - - // Reset some variables. - next->coder->sequence = SEQ_STREAM_HEADER_CODE; - next->coder->pos = 0; - next->coder->uncompressed_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->total_left = LZMA_VLI_VALUE_UNKNOWN; - next->coder->header_extra = header; - next->coder->footer_extra = footer; + // Reset the rest of the variables. + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.filters = NULL; + next->coder->buffer_pos = 0; return LZMA_OK; } extern lzma_ret -lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder_init(lzma_next_coder *next, lzma_allocator *allocator) { - lzma_next_coder_init( - stream_decoder_init, next, allocator, header, footer); + lzma_next_coder_init0(stream_decoder_init, next, allocator); } extern LZMA_API lzma_ret -lzma_stream_decoder(lzma_stream *strm, - lzma_extra **header, lzma_extra **footer) +lzma_stream_decoder(lzma_stream *strm) { - lzma_next_strm_init(strm, stream_decoder_init, header, footer); + lzma_next_strm_init0(strm, stream_decoder_init); strm->internal->supported_actions[LZMA_RUN] = true; strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; diff --git a/src/liblzma/common/stream_decoder.h b/src/liblzma/common/stream_decoder.h new file mode 100644 index 00000000..dcda387d --- /dev/null +++ b/src/liblzma/common/stream_decoder.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_decoder.h +/// \brief Decodes .lzma Streams +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_DECODER_H +#define LZMA_STREAM_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_stream_decoder_init( + lzma_next_coder *next, lzma_allocator *allocator); + +#endif diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c new file mode 100644 index 00000000..767b8014 --- /dev/null +++ b/src/liblzma/common/stream_encoder.c @@ -0,0 +1,282 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.c +/// \brief Encodes .lzma Streams +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "stream_common.h" +#include "stream_encoder.h" +#include "block_encoder.h" +#include "index_encoder.h" + + +struct lzma_coder_s { + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_INIT, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_ENCODE, + SEQ_INDEX_ENCODE, + SEQ_STREAM_FOOTER, + } sequence; + + /// Block + lzma_next_coder block_encoder; + + /// Options for the Block encoder + lzma_options_block block_options; + + /// Index encoder. This is separate from Block encoder, because this + /// doesn't take much memory, and when encoding multiple Streams + /// with the same encoding options we avoid reallocating memory. + lzma_next_coder index_encoder; + + /// Index to hold sizes of the Blocks + lzma_index *index; + + /// Read position in buffer[] + size_t buffer_pos; + + /// Total number of bytes in buffer[] + size_t buffer_size; + + /// Buffer to hold Stream Header, Block Header, and Stream Footer. + /// Block Header has biggest maximum size. + uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX]; +}; + + +static lzma_ret +block_encoder_init(lzma_coder *coder, lzma_allocator *allocator) +{ + // Prepare the Block options. + coder->block_options.compressed_size = LZMA_VLI_VALUE_UNKNOWN; + coder->block_options.uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; + + return_if_error(lzma_block_header_size(&coder->block_options)); + + // Initialize the actual Block encoder. + return lzma_block_encoder_init(&coder->block_encoder, allocator, + &coder->block_options); +} + + +static lzma_ret +stream_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + // Main loop + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_STREAM_HEADER: + case SEQ_BLOCK_HEADER: + case SEQ_STREAM_FOOTER: + bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size, + out, out_pos, out_size); + if (coder->buffer_pos < coder->buffer_size) + return LZMA_OK; + + if (coder->sequence == SEQ_STREAM_FOOTER) + return LZMA_STREAM_END; + + coder->buffer_pos = 0; + ++coder->sequence; + break; + + case SEQ_BLOCK_INIT: { + if (*in_pos == in_size) { + // If we are requested to flush or finish the current + // Block, return LZMA_STREAM_END immediatelly since + // there's nothing to do. + if (action != LZMA_FINISH) + return action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + + // The application had used LZMA_FULL_FLUSH to finish + // the previous Block, but now wants to finish without + // encoding new data, or it is simply creating an + // empty Stream with no Blocks. + // + // Initialize the Index encoder, and continue to + // actually encoding the Index. + return_if_error(lzma_index_encoder_init( + &coder->index_encoder, allocator, + coder->index)); + coder->sequence = SEQ_INDEX_ENCODE; + break; + } + + // Initialize the Block encoder except if this is the first + // Block, because stream_encoder_init() has already + // initialized it. + if (lzma_index_count(coder->index) != 0) + return_if_error(block_encoder_init(coder, allocator)); + + // Encode the Block Header. This shouldn't fail since we have + // already initialized the Block encoder. + if (lzma_block_header_encode(&coder->block_options, + coder->buffer) != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = coder->block_options.header_size; + coder->sequence = SEQ_BLOCK_HEADER; + break; + } + + case SEQ_BLOCK_ENCODE: { + static const lzma_action convert[4] = { + LZMA_RUN, + LZMA_SYNC_FLUSH, + LZMA_FINISH, + LZMA_FINISH, + }; + + const lzma_ret ret = coder->block_encoder.code( + coder->block_encoder.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, convert[action]); + if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) + return ret; + + // Add a new Index Record. + const lzma_vli total_size = lzma_block_total_size_get( + &coder->block_options); + assert(total_size != 0); + return_if_error(lzma_index_append(coder->index, allocator, + total_size, + coder->block_options.uncompressed_size)); + + coder->sequence = SEQ_BLOCK_INIT; + break; + } + + case SEQ_INDEX_ENCODE: { + // Call the Index encoder. It doesn't take any input, so + // those pointers can be NULL. + const lzma_ret ret = coder->index_encoder.code( + coder->index_encoder.coder, allocator, + NULL, NULL, 0, + out, out_pos, out_size, LZMA_RUN); + if (ret != LZMA_STREAM_END) + return ret; + + // Encode the Stream Footer into coder->buffer. + const lzma_stream_flags stream_flags = { + .backward_size = lzma_index_size(coder->index), + .check = coder->block_options.check, + }; + + if (lzma_stream_footer_encode(&stream_flags, coder->buffer) + != LZMA_OK) + return LZMA_PROG_ERROR; + + coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + coder->sequence = SEQ_STREAM_FOOTER; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_coder_end(&coder->block_encoder, allocator); + lzma_next_coder_end(&coder->index_encoder, allocator); + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + if (filters == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &stream_encode; + next->end = &stream_encoder_end; + + next->coder->block_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index_encoder = LZMA_NEXT_CODER_INIT; + next->coder->index = NULL; + } + + // Basic initializations + next->coder->sequence = SEQ_STREAM_HEADER; + next->coder->block_options.check = check; + next->coder->block_options.filters = (lzma_options_filter *)(filters); + + // Initialize the Index + next->coder->index = lzma_index_init(next->coder->index, allocator); + if (next->coder->index == NULL) + return LZMA_MEM_ERROR; + + // Encode the Stream Header + lzma_stream_flags stream_flags = { + .check = check, + }; + return_if_error(lzma_stream_header_encode( + &stream_flags, next->coder->buffer)); + + next->coder->buffer_pos = 0; + next->coder->buffer_size = LZMA_STREAM_HEADER_SIZE; + + // Initialize the Block encoder. This way we detect if the given + // filters are supported by the current liblzma build, and the + // application doesn't need to keep the filters structure available + // unless it is going to use LZMA_FULL_FLUSH. + return block_encoder_init(next->coder, allocator); +} + + +extern lzma_ret +lzma_stream_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_coder_init(stream_encoder_init, next, allocator, + filters, check); +} + + +extern LZMA_API lzma_ret +lzma_stream_encoder(lzma_stream *strm, + const lzma_options_filter *filters, lzma_check_type check) +{ + lzma_next_strm_init(strm, stream_encoder_init, filters, check); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; + strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} diff --git a/src/liblzma/common/stream_encoder.h b/src/liblzma/common/stream_encoder.h new file mode 100644 index 00000000..3ce29561 --- /dev/null +++ b/src/liblzma/common/stream_encoder.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_encoder.h +/// \brief Encodes .lzma Streams +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_STREAM_ENCODER_H +#define LZMA_STREAM_ENCODER_H + +#include "common.h" + + +extern lzma_ret lzma_stream_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_options_filter *filters, lzma_check_type check); + +#endif diff --git a/src/liblzma/common/stream_encoder_multi.c b/src/liblzma/common/stream_encoder_multi.c deleted file mode 100644 index 403980cf..00000000 --- a/src/liblzma/common/stream_encoder_multi.c +++ /dev/null @@ -1,445 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_multi.c -/// \brief Encodes Multi-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "stream_encoder_multi.h" -#include "block_encoder.h" -#include "metadata_encoder.h" - - -struct lzma_coder_s { - enum { - SEQ_STREAM_HEADER_COPY, - SEQ_HEADER_METADATA_INIT, - SEQ_HEADER_METADATA_COPY, - SEQ_HEADER_METADATA_CODE, - SEQ_DATA_INIT, - SEQ_DATA_COPY, - SEQ_DATA_CODE, - SEQ_FOOTER_METADATA_INIT, - SEQ_FOOTER_METADATA_COPY, - SEQ_FOOTER_METADATA_CODE, - SEQ_STREAM_FOOTER_INIT, - SEQ_STREAM_FOOTER_COPY, - } sequence; - - /// Block or Metadata encoder - lzma_next_coder next; - - /// Options for the Block encoder - lzma_options_block block_options; - - /// Information about the Stream - lzma_info *info; - - /// Information about the current Data Block - lzma_info_iter iter; - - /// Pointer to user-supplied options structure. We don't write to - /// it, only read instructions from the application, thus this is - /// const even though the user-supplied pointer from - /// lzma_options_filter structure isn't. - const lzma_options_stream *stream_options; - - /// Stream Header or Stream Footer in encoded form - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -typedef enum { - BLOCK_HEADER_METADATA, - BLOCK_DATA, - BLOCK_FOOTER_METADATA, -} block_type; - - -static lzma_ret -block_header_encode(lzma_coder *coder, lzma_allocator *allocator, - lzma_vli uncompressed_size, block_type type) -{ - assert(coder->header == NULL); - - coder->block_options = (lzma_options_block){ - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .has_eopm = uncompressed_size == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = type != BLOCK_DATA, - .has_uncompressed_size_in_footer = false, - .has_backward_size = type == BLOCK_FOOTER_METADATA, - .handle_padding = false, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - }; - - if (type == BLOCK_DATA) { - memcpy(coder->block_options.filters, - coder->stream_options->filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment = coder->iter.stream_offset; - } else { - memcpy(coder->block_options.filters, - coder->stream_options->metadata_filters, - sizeof(coder->stream_options->filters)); - coder->block_options.alignment - = lzma_info_metadata_alignment_get( - coder->info, type == BLOCK_HEADER_METADATA); - } - - return_if_error(lzma_block_header_size(&coder->block_options)); - - coder->header_size = coder->block_options.header_size; - coder->header = lzma_alloc(coder->header_size, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_block_header_encode( - coder->header, &coder->block_options)); - - coder->header_pos = 0; - return LZMA_OK; -} - - -static lzma_ret -metadata_encoder_init(lzma_coder *coder, lzma_allocator *allocator, - lzma_metadata *metadata, block_type type) -{ - return_if_error(lzma_info_metadata_set(coder->info, allocator, - metadata, type == BLOCK_HEADER_METADATA, false)); - - const lzma_vli metadata_size = lzma_metadata_size(metadata); - if (metadata_size == 0) - return LZMA_PROG_ERROR; - - return_if_error(block_header_encode( - coder, allocator, metadata_size, type)); - - return lzma_metadata_encoder_init(&coder->next, allocator, - &coder->block_options, metadata); -} - - -static lzma_ret -data_encoder_init(lzma_coder *coder, lzma_allocator *allocator) -{ - return_if_error(lzma_info_iter_next(&coder->iter, allocator)); - - return_if_error(block_header_encode(coder, allocator, - LZMA_VLI_VALUE_UNKNOWN, BLOCK_DATA)); - - return lzma_block_encoder_init(&coder->next, allocator, - &coder->block_options); -} - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, lzma_action action) -{ - // Main loop - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - case SEQ_STREAM_FOOTER_COPY: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - if (coder->header_pos < coder->header_size) - return LZMA_OK; - - lzma_free(coder->header, allocator); - coder->header = NULL; - - switch (coder->sequence) { - case SEQ_STREAM_HEADER_COPY: - // Write Header Metadata Block if we have Extra for it - // or known Uncompressed Size. - if (coder->stream_options->header != NULL - || coder->stream_options - ->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->sequence = SEQ_HEADER_METADATA_INIT; - } else { - // Mark that Header Metadata Block doesn't - // exist. - if (lzma_info_size_set(coder->info, - LZMA_INFO_HEADER_METADATA, 0) - != LZMA_OK) - return LZMA_PROG_ERROR; - - coder->sequence = SEQ_DATA_INIT; - } - break; - - case SEQ_HEADER_METADATA_COPY: - case SEQ_DATA_COPY: - case SEQ_FOOTER_METADATA_COPY: - ++coder->sequence; - break; - - case SEQ_STREAM_FOOTER_COPY: - return LZMA_STREAM_END; - - default: - assert(0); - } - - break; - - case SEQ_HEADER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size = LZMA_VLI_VALUE_UNKNOWN, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = coder->stream_options - ->uncompressed_size, - .index = NULL, - // Metadata encoder doesn't modify this, but since - // the lzma_extra structure is used also when decoding - // Metadata, the pointer is not const, and we need - // to cast the constness away in the encoder. - .extra = (lzma_extra *)(coder->stream_options->header), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_HEADER_METADATA)); - - coder->sequence = SEQ_HEADER_METADATA_COPY; - break; - } - - case SEQ_FOOTER_METADATA_INIT: { - lzma_metadata metadata = { - .header_metadata_size - = lzma_info_size_get(coder->info, - LZMA_INFO_HEADER_METADATA), - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = LZMA_VLI_VALUE_UNKNOWN, - .index = lzma_info_index_get(coder->info, false), - .extra = (lzma_extra *)(coder->stream_options->footer), - }; - - return_if_error(metadata_encoder_init(coder, allocator, - &metadata, BLOCK_FOOTER_METADATA)); - - coder->sequence = SEQ_FOOTER_METADATA_COPY; - break; - } - - case SEQ_HEADER_METADATA_CODE: - case SEQ_FOOTER_METADATA_CODE: { - size_t dummy = 0; - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, NULL, &dummy, 0, - out, out_pos, out_size, LZMA_RUN); - if (ret != LZMA_STREAM_END) - return ret; - - return_if_error(lzma_info_size_set(coder->info, - coder->sequence == SEQ_HEADER_METADATA_CODE - ? LZMA_INFO_HEADER_METADATA - : LZMA_INFO_FOOTER_METADATA, - coder->block_options.total_size)); - - ++coder->sequence; - break; - } - - case SEQ_DATA_INIT: { - // Don't create an empty Block unless it would be - // the only Data Block. - if (*in_pos == in_size) { - // If we are LZMA_SYNC_FLUSHing or LZMA_FULL_FLUSHing, - // return LZMA_STREAM_END since there's nothing to - // flush. - if (action != LZMA_FINISH) - return action == LZMA_RUN - ? LZMA_OK : LZMA_STREAM_END; - - if (lzma_info_index_count_get(coder->info) != 0) { - if (lzma_info_index_finish(coder->info)) - return LZMA_DATA_ERROR; - - coder->sequence = SEQ_FOOTER_METADATA_INIT; - break; - } - } - - return_if_error(data_encoder_init(coder, allocator)); - - coder->sequence = SEQ_DATA_COPY; - break; - } - - case SEQ_DATA_CODE: { - static const lzma_action convert[4] = { - LZMA_RUN, - LZMA_SYNC_FLUSH, - LZMA_FINISH, - LZMA_FINISH, - }; - - const lzma_ret ret = coder->next.code(coder->next.coder, - allocator, in, in_pos, in_size, - out, out_pos, out_size, convert[action]); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - return_if_error(lzma_info_iter_set(&coder->iter, - coder->block_options.total_size, - coder->block_options.uncompressed_size)); - - coder->sequence = SEQ_DATA_INIT; - break; - } - - case SEQ_STREAM_FOOTER_INIT: { - assert(coder->header == NULL); - - lzma_stream_flags flags = { - .check = coder->stream_options->check, - .has_crc32 = coder->stream_options->has_crc32, - .is_multi = true, - }; - - coder->header = lzma_alloc(LZMA_STREAM_TAIL_SIZE, allocator); - if (coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_tail_encode( - coder->header, &flags)); - - coder->header_size = LZMA_STREAM_TAIL_SIZE; - coder->header_pos = 0; - - coder->sequence = SEQ_STREAM_FOOTER_COPY; - break; - } - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->next, allocator); - lzma_info_free(coder->info, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->info = NULL; - } else { - lzma_free(next->coder->header, allocator); - } - - next->coder->header = NULL; - - next->coder->info = lzma_info_init(next->coder->info, allocator); - if (next->coder->info == NULL) - return LZMA_MEM_ERROR; - - next->coder->sequence = SEQ_STREAM_HEADER_COPY; - next->coder->stream_options = options; - - // Encode Stream Flags - { - lzma_stream_flags flags = { - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = true, - }; - - next->coder->header = lzma_alloc(LZMA_STREAM_HEADER_SIZE, - allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode( - next->coder->header, &flags)); - - next->coder->header_pos = 0; - next->coder->header_size = LZMA_STREAM_HEADER_SIZE; - } - - if (lzma_info_size_set(next->coder->info, LZMA_INFO_STREAM_START, - options->alignment) != LZMA_OK) - return LZMA_PROG_ERROR; - - lzma_info_iter_begin(next->coder->info, &next->coder->iter); - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, next, allocator, options); -} - - -extern LZMA_API lzma_ret -lzma_stream_encoder_multi( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FULL_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_encoder_multi.h b/src/liblzma/common/stream_encoder_multi.h deleted file mode 100644 index e0ff02f3..00000000 --- a/src/liblzma/common/stream_encoder_multi.h +++ /dev/null @@ -1,26 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_multi.h -/// \brief Encodes Multi-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_STREAM_ENCODER_MULTI_H -#define LZMA_STREAM_ENCODER_MULTI_H - -extern lzma_ret lzma_stream_encoder_multi_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options); - -#endif diff --git a/src/liblzma/common/stream_encoder_single.c b/src/liblzma/common/stream_encoder_single.c deleted file mode 100644 index d93e7169..00000000 --- a/src/liblzma/common/stream_encoder_single.c +++ /dev/null @@ -1,219 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file stream_encoder_single.c -/// \brief Encodes Single-Block .lzma files -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "stream_common.h" -#include "block_encoder.h" - - -struct lzma_coder_s { - /// Uncompressed Size, Backward Size, and Footer Magic Bytes are - /// part of Block in the file format specification, but it is simpler - /// to implement them as part of Stream. - enum { - SEQ_HEADERS, - SEQ_DATA, - SEQ_FOOTER, - } sequence; - - /// Block encoder - lzma_next_coder block_encoder; - - /// Block encoder options - lzma_options_block block_options; - - /// Stream Flags; we need to have these in this struct so that we - /// can encode Stream Footer. - lzma_stream_flags stream_flags; - - /// Stream Header + Block Header, or Stream Footer - uint8_t *header; - size_t header_pos; - size_t header_size; -}; - - -static lzma_ret -stream_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, size_t *out_pos, - size_t out_size, lzma_action action) -{ - // NOTE: We don't check if the amount of input is in the proper limits, - // because the Block encoder will do it for us. - - while (*out_pos < out_size) - switch (coder->sequence) { - case SEQ_HEADERS: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - if (coder->header_pos == coder->header_size) { - coder->header_pos = 0; - coder->sequence = SEQ_DATA; - } - - break; - - case SEQ_DATA: { - const lzma_ret ret = coder->block_encoder.code( - coder->block_encoder.coder, allocator, - in, in_pos, in_size, - out, out_pos, out_size, action); - if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH) - return ret; - - assert(*in_pos == in_size); - - assert(coder->header_size >= LZMA_STREAM_TAIL_SIZE); - coder->header_size = LZMA_STREAM_TAIL_SIZE; - - return_if_error(lzma_stream_tail_encode( - coder->header, &coder->stream_flags)); - - coder->sequence = SEQ_FOOTER; - break; - } - - case SEQ_FOOTER: - bufcpy(coder->header, &coder->header_pos, coder->header_size, - out, out_pos, out_size); - - return coder->header_pos == coder->header_size - ? LZMA_STREAM_END : LZMA_OK; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static void -stream_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_next_coder_end(&coder->block_encoder, allocator); - lzma_free(coder->header, allocator); - lzma_free(coder, allocator); - return; -} - - -static lzma_ret -stream_encoder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->code = &stream_encode; - next->end = &stream_encoder_end; - next->coder->block_encoder = LZMA_NEXT_CODER_INIT; - } else { - // Free the previous buffer, if any. - lzma_free(next->coder->header, allocator); - } - - // At this point, next->coder->header points to nothing useful. - next->coder->header = NULL; - - // Basic initializations - next->coder->sequence = SEQ_HEADERS; - next->coder->header_pos = 0; - - // Initialize next->coder->stream_flags. - next->coder->stream_flags = (lzma_stream_flags){ - .check = options->check, - .has_crc32 = options->has_crc32, - .is_multi = false, - }; - - // Initialize next->coder->block_options. - next->coder->block_options = (lzma_options_block){ - .check = options->check, - .has_crc32 = options->has_crc32, - .has_eopm = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .is_metadata = false, - .has_uncompressed_size_in_footer = options->uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN, - .has_backward_size = true, - .handle_padding = false, - .compressed_size = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_size = options->uncompressed_size, - .compressed_reserve = 0, - .uncompressed_reserve = 0, - .total_size = LZMA_VLI_VALUE_UNKNOWN, - .total_limit = LZMA_VLI_VALUE_UNKNOWN, - .uncompressed_limit = LZMA_VLI_VALUE_UNKNOWN, - .padding = LZMA_BLOCK_HEADER_PADDING_AUTO, - .alignment = options->alignment + LZMA_STREAM_HEADER_SIZE, - }; - memcpy(next->coder->block_options.filters, options->filters, - sizeof(options->filters)); - - return_if_error(lzma_block_header_size(&next->coder->block_options)); - - // Encode Stream Flags and Block Header into next->coder->header. - next->coder->header_size = (size_t)(LZMA_STREAM_HEADER_SIZE) - + next->coder->block_options.header_size; - next->coder->header = lzma_alloc(next->coder->header_size, allocator); - if (next->coder->header == NULL) - return LZMA_MEM_ERROR; - - return_if_error(lzma_stream_header_encode(next->coder->header, - &next->coder->stream_flags)); - - return_if_error(lzma_block_header_encode( - next->coder->header + LZMA_STREAM_HEADER_SIZE, - &next->coder->block_options)); - - // Initialize the Block encoder. - return lzma_block_encoder_init(&next->coder->block_encoder, allocator, - &next->coder->block_options); -} - - -/* -extern lzma_ret -lzma_stream_encoder_single_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_stream *options) -{ - lzma_next_coder_init(stream_encoder_init, allocator, options); -} -*/ - - -extern LZMA_API lzma_ret -lzma_stream_encoder_single( - lzma_stream *strm, const lzma_options_stream *options) -{ - lzma_next_strm_init(strm, stream_encoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; - strm->internal->supported_actions[LZMA_FINISH] = true; - - return LZMA_OK; -} diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c index d9c847ac..0270875a 100644 --- a/src/liblzma/common/stream_flags_decoder.c +++ b/src/liblzma/common/stream_flags_decoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_decoder.c -/// \brief Decodes Stream Header and tail from .lzma files +/// \brief Decodes Stream Header and Stream Footer from .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -17,242 +17,72 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "stream_flags_decoder.h" #include "stream_common.h" -//////////// -// Common // -//////////// - -struct lzma_coder_s { - enum { - SEQ_HEADER_MAGIC, - SEQ_HEADER_FLAGS, - SEQ_HEADER_CRC32, - - SEQ_FOOTER_FLAGS, - SEQ_FOOTER_MAGIC, - } sequence; - - size_t pos; - uint32_t crc32; - - lzma_stream_flags *options; -}; - - -static void -stream_header_decoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_free(coder, allocator); - return; -} - - static bool -stream_flags_decode(const uint8_t *in, lzma_stream_flags *options) +stream_flags_decode(lzma_stream_flags *options, const uint8_t *in) { // Reserved bits must be unset. - if (*in & 0xE0) + if (in[0] != 0x00 || (in[1] & 0xF0)) return true; - options->check = *in & 0x07; - options->has_crc32 = (*in & 0x08) != 0; - options->is_multi = (*in & 0x10) != 0; + options->check = in[1] & 0x0F; return false; } -//////////// -// Header // -//////////// - -static lzma_ret -stream_header_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_HEADER_MAGIC: - if (in[*in_pos] != lzma_header_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_header_magic)) { - coder->pos = 0; - coder->sequence = SEQ_HEADER_FLAGS; - } - - break; - - case SEQ_HEADER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - coder->crc32 = lzma_crc32(in + *in_pos, 1, 0); - - ++*in_pos; - coder->sequence = SEQ_HEADER_CRC32; - break; - - case SEQ_HEADER_CRC32: - if (in[*in_pos] != ((coder->crc32 >> (coder->pos * 8)) & 0xFF)) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == 4) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_header_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_HEADER_MAGIC; - next->coder->pos = 0; - next->coder->crc32 = 0; - next->coder->options = options; - - return LZMA_OK; -} - - -extern lzma_ret -lzma_stream_header_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init( - stream_header_decoder_init, next, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_header_decoder(lzma_stream *strm, lzma_stream_flags *options) -{ - lzma_next_strm_init(strm, stream_header_decoder_init, options); - - strm->internal->supported_actions[LZMA_RUN] = true; - - return LZMA_OK; -} - - -////////// -// Tail // -////////// - -static lzma_ret -stream_tail_decode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out lzma_attribute((unused)), - size_t *restrict out_pos lzma_attribute((unused)), - size_t out_size lzma_attribute((unused)), - lzma_action action lzma_attribute((unused))) -{ - while (*in_pos < in_size) - switch (coder->sequence) { - case SEQ_FOOTER_FLAGS: - if (stream_flags_decode(in + *in_pos, coder->options)) - return LZMA_HEADER_ERROR; - - ++*in_pos; - coder->sequence = SEQ_FOOTER_MAGIC; - break; - - case SEQ_FOOTER_MAGIC: - if (in[*in_pos] != lzma_footer_magic[coder->pos]) - return LZMA_DATA_ERROR; - - ++*in_pos; - - if (++coder->pos == sizeof(lzma_footer_magic)) - return LZMA_STREAM_END; - - break; - - default: - return LZMA_PROG_ERROR; - } - - return LZMA_OK; -} - - -static lzma_ret -stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) +lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in) { - if (options == NULL) - return LZMA_PROG_ERROR; - - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - } - - // Set the function pointers unconditionally, because they may - // have been pointing to footer decoder too. - next->code = &stream_tail_decode; - next->end = &stream_header_decoder_end; - - next->coder->sequence = SEQ_FOOTER_FLAGS; - next->coder->pos = 0; - next->coder->options = options; + // Magic + if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // Verify the CRC32 so we can distinguish between corrupt + // and unsupported files. + const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(lzma_header_magic))) + return LZMA_HEADER_ERROR; + + // Set Backward Size to indicate unknown value. That way + // lzma_stream_flags_equal can be used to compare Stream Header + // and Stream Footer while keeping it useful also for comparing + // two Stream Footers. + options->backward_size = LZMA_VLI_VALUE_UNKNOWN; return LZMA_OK; } -extern lzma_ret -lzma_stream_tail_decoder_init(lzma_next_coder *next, - lzma_allocator *allocator, lzma_stream_flags *options) -{ - lzma_next_coder_init2(next, allocator, stream_header_decoder_init, - stream_tail_decoder_init, allocator, options); -} - - extern LZMA_API lzma_ret -lzma_stream_tail_decoder(lzma_stream *strm, lzma_stream_flags *options) +lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in) { - lzma_next_strm_init2(strm, stream_header_decoder_init, - stream_tail_decoder_init, strm->allocator, options); - - strm->internal->supported_actions[LZMA_RUN] = true; + // Magic + if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)) != 0) + return LZMA_FORMAT_ERROR; + + // CRC32 + const uint32_t crc = lzma_crc32(in + sizeof(uint32_t), + sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0); + if (crc != integer_read_32(in)) + return LZMA_DATA_ERROR; + + // Stream Flags + if (stream_flags_decode(options, in + sizeof(uint32_t) * 2)) + return LZMA_HEADER_ERROR; + + // Backward Size + options->backward_size = integer_read_32(in + sizeof(uint32_t)); + options->backward_size = (options->backward_size + 1) * 4; return LZMA_OK; } diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c index 55468580..4efbb6f4 100644 --- a/src/liblzma/common/stream_flags_encoder.c +++ b/src/liblzma/common/stream_flags_encoder.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file stream_flags_encoder.c -/// \brief Encodes Stream Header and Footer for .lzma files +/// \brief Encodes Stream Header and Stream Footer for .lzma files // // Copyright (C) 2007 Lasse Collin // @@ -21,55 +21,69 @@ static bool -stream_flags_encode(uint8_t *flags_byte, const lzma_stream_flags *options) +stream_flags_encode(const lzma_stream_flags *options, uint8_t *out) { - // Check type if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return true; - *flags_byte = options->check; - - // Usage of CRC32 in Block Headers - if (options->has_crc32) - *flags_byte |= 0x08; - - // Single- or Multi-Block - if (options->is_multi) - *flags_byte |= 0x10; + out[0] = 0x00; + out[1] = options->check; return false; } extern LZMA_API lzma_ret -lzma_stream_header_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE + + 4 == LZMA_STREAM_HEADER_SIZE); + // Magic memcpy(out, lzma_header_magic, sizeof(lzma_header_magic)); // Stream Flags - if (stream_flags_encode(out + sizeof(lzma_header_magic), options)) - return LZMA_PROG_ERROR;; + if (stream_flags_encode(options, out + sizeof(lzma_header_magic))) + return LZMA_PROG_ERROR; // CRC32 of the Stream Header - const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), 1, 0); + const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic), + LZMA_STREAM_FLAGS_SIZE, 0); - for (size_t i = 0; i < 4; ++i) - out[sizeof(lzma_header_magic) + 1 + i] = crc >> (i * 8); + integer_write_32(out + sizeof(lzma_header_magic) + + LZMA_STREAM_FLAGS_SIZE, crc); return LZMA_OK; } extern LZMA_API lzma_ret -lzma_stream_tail_encode(uint8_t *out, const lzma_stream_flags *options) +lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out) { + assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic) + == LZMA_STREAM_HEADER_SIZE); + + // Backward Size + if (options->backward_size < LZMA_BACKWARD_SIZE_MIN + || options->backward_size > LZMA_BACKWARD_SIZE_MAX + || (options->backward_size & 3)) + return LZMA_PROG_ERROR; + + integer_write_32(out + 4, options->backward_size / 4 - 1); + // Stream Flags - if (stream_flags_encode(out, options)) + if (stream_flags_encode(options, out + 2 * 4)) return LZMA_PROG_ERROR; + // CRC32 + const uint32_t crc = lzma_crc32( + out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0); + + integer_write_32(out, crc); + // Magic - memcpy(out + 1, lzma_footer_magic, sizeof(lzma_footer_magic)); + memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE, + lzma_footer_magic, sizeof(lzma_footer_magic)); return LZMA_OK; } diff --git a/src/liblzma/common/stream_flags_equal.c b/src/liblzma/common/stream_flags_equal.c new file mode 100644 index 00000000..db22567f --- /dev/null +++ b/src/liblzma/common/stream_flags_equal.c @@ -0,0 +1,36 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file stream_flags_equal.c +/// \brief Compare Stream Header and Stream Footer +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "common.h" + + +extern LZMA_API lzma_bool +lzma_stream_flags_equal(const lzma_stream_flags *a, lzma_stream_flags *b) +{ + if (a->check != b->check) + return false; + + // Backward Sizes are compared only if they are known in both. + if (a->backward_size != LZMA_VLI_VALUE_UNKNOWN + && b->backward_size != LZMA_VLI_VALUE_UNKNOWN + && a->backward_size != b->backward_size) + return false; + + return true; +} diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c index 2b89c1a7..faff6ccb 100644 --- a/src/liblzma/common/vli_decoder.c +++ b/src/liblzma/common/vli_decoder.c @@ -3,7 +3,7 @@ /// \file vli_decoder.c /// \brief Decodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -25,45 +25,53 @@ lzma_vli_decode(lzma_vli *restrict vli, size_t *restrict vli_pos, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size) { - if (*vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 - || (*vli >> (7 * *vli_pos)) != 0) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*in_pos >= in_size) - return LZMA_BUF_ERROR; + // Initialize *vli when starting to decode a new integer. + if (*vli_pos == 0) + *vli = 0; - if (*vli_pos == 0) { - *vli_pos = 1; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *in_pos >= in_size + || (*vli >> (*vli_pos * 7)) != 0) + return LZMA_PROG_ERROR;; - if (in[*in_pos] <= 0x7F) { - // Single-byte integer - *vli = in[*in_pos]; - ++*in_pos; - return LZMA_STREAM_END; - } - - *vli = in[*in_pos] & 0x7F; - ++*in_pos; - } - - while (*in_pos < in_size) { - // Read in the next byte. + do { + // Read the next byte. *vli |= (lzma_vli)(in[*in_pos] & 0x7F) << (*vli_pos * 7); ++*vli_pos; // Check if this is the last byte of a multibyte integer. - if (in[*in_pos] & 0x80) { - ++*in_pos; - return LZMA_STREAM_END; + if (!(in[*in_pos] & 0x80)) { + // We don't allow using variable-length integers as + // padding i.e. the encoding must use the most the + // compact form. + if (in[(*in_pos)++] == 0x00 && *vli_pos > 1) + return LZMA_DATA_ERROR; + + return vli_pos == &vli_pos_internal + ? LZMA_OK : LZMA_STREAM_END; } - // Limit variable-length representation to nine bytes. - if (*vli_pos == 9) + ++*in_pos; + + // There is at least one more byte coming. If we have already + // read maximum number of bytes, the integer is considered + // corrupt. + // + // If we need bigger integers in future, old versions liblzma + // will confusingly indicate the file being corrupt istead of + // unsupported. I suppose it's still better this way, because + // in the foreseeable future (writing this in 2008) the only + // reason why files would appear having over 63-bit integers + // is that the files are simply corrupt. + if (*vli_pos == LZMA_VLI_BYTES_MAX) return LZMA_DATA_ERROR; - // Increment input position only when the byte was accepted. - ++*in_pos; - } + } while (*in_pos < in_size); - return LZMA_OK; + return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK; } diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c index 1ecdb0d2..c48d6474 100644 --- a/src/liblzma/common/vli_encoder.c +++ b/src/liblzma/common/vli_encoder.c @@ -3,7 +3,7 @@ /// \file vli_encoder.c /// \brief Encodes variable-length integers // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -21,61 +21,54 @@ extern LZMA_API lzma_ret -lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, size_t vli_size, +lzma_vli_encode(lzma_vli vli, size_t *restrict vli_pos, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { - if (vli > LZMA_VLI_VALUE_MAX || *vli_pos >= 9 || vli_size > 9 - || (vli != 0 && (vli >> (7 * *vli_pos)) == 0)) - return LZMA_PROG_ERROR; + // If we haven't been given vli_pos, work in single-call mode. + size_t vli_pos_internal = 0; + if (vli_pos == NULL) + vli_pos = &vli_pos_internal; - if (*out_pos >= out_size) - return LZMA_BUF_ERROR; + // Validate the arguments. + if (*vli_pos >= LZMA_VLI_BYTES_MAX || *out_pos >= out_size + || vli > LZMA_VLI_VALUE_MAX) + return LZMA_PROG_ERROR; - if (*vli_pos == 0) { - *vli_pos = 1; + // Write the non-last bytes in a loop. + while ((vli >> (*vli_pos * 7)) >= 0x80) { + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)) | 0x80; - if (vli <= 0x7F && *vli_pos >= vli_size) { - // Single-byte integer - out[(*out_pos)++] = vli; - return LZMA_STREAM_END; - } + ++*vli_pos; + assert(*vli_pos < LZMA_VLI_BYTES_MAX); - // First byte of a multibyte integer - out[(*out_pos)++] = (vli & 0x7F) | 0x80; + if (++*out_pos == out_size) + return vli_pos == &vli_pos_internal + ? LZMA_PROG_ERROR : LZMA_OK; } - while (*out_pos < out_size) { - const lzma_vli b = vli >> (7 * *vli_pos); - ++*vli_pos; - - if (b <= 0x7F && *vli_pos >= vli_size) { - // Last byte of a multibyte integer - out[(*out_pos)++] = (b & 0xFF) | 0x80; - return LZMA_STREAM_END; - } + // Write the last byte. + out[*out_pos] = (uint8_t)(vli >> (*vli_pos * 7)); + ++*out_pos; + ++*vli_pos; - // Middle byte of a multibyte integer - out[(*out_pos)++] = b & 0x7F; - } + return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END; - // vli is not yet completely written out. - return LZMA_OK; } -extern LZMA_API size_t +extern LZMA_API uint32_t lzma_vli_size(lzma_vli vli) { if (vli > LZMA_VLI_VALUE_MAX) return 0; - size_t i = 0; + uint32_t i = 0; do { vli >>= 7; ++i; } while (vli != 0); - assert(i <= 9); + assert(i <= LZMA_VLI_BYTES_MAX); return i; } diff --git a/src/liblzma/common/vli_reverse_decoder.c b/src/liblzma/common/vli_reverse_decoder.c deleted file mode 100644 index 68ca6a42..00000000 --- a/src/liblzma/common/vli_reverse_decoder.c +++ /dev/null @@ -1,55 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file vli_reverse_decoder.c -/// \brief Decodes variable-length integers starting at end of the buffer -// -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "common.h" - - -extern LZMA_API lzma_ret -lzma_vli_reverse_decode(lzma_vli *vli, const uint8_t *in, size_t *in_size) -{ - if (*in_size == 0) - return LZMA_BUF_ERROR; - - size_t i = *in_size - 1; - *vli = in[i] & 0x7F; - - if (!(in[i] & 0x80)) { - *in_size = i; - return LZMA_OK; - } - - const size_t end = *in_size > LZMA_VLI_BYTES_MAX - ? *in_size - LZMA_VLI_BYTES_MAX : 0; - - do { - if (i-- == end) { - if (*in_size < LZMA_VLI_BYTES_MAX) - return LZMA_BUF_ERROR; - - return LZMA_DATA_ERROR; - } - - *vli <<= 7; - *vli = in[i] & 0x7F; - - } while (!(in[i] & 0x80)); - - *in_size = i; - return LZMA_OK; -} diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index a400bde1..ae969d62 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -387,11 +387,11 @@ lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator, bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, bool has_safe_buffer), - lzma_vli uncompressed_size, size_t history_size, size_t match_max_len) { - // Set uncompressed size. - lz->uncompressed_size = uncompressed_size; + // Known uncompressed size is used only with LZMA_Alone files so we + // set it always to unknown by default. + lz->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; // Limit the history size to roughly sane values. This is primarily // to prevent integer overflows. diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index a8a585cd..1acf9831 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -31,6 +31,11 @@ : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) +/// Test if dictionary is empty. +#define lz_is_empty(lz) \ + ((lz).pos == 0 && !(lz).is_full) + + #define LZMA_LZ_DECODER_INIT \ (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } @@ -109,7 +114,6 @@ extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_coder *restrict coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, bool has_safe_buffer), - lzma_vli uncompressed_size, size_t history_size, size_t match_max_len); extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, @@ -155,12 +159,12 @@ lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) // in which e.g. the data of the previously decoded file(s) // would be leaked (or whatever happens to be in unused // part of the dictionary buffer). - if (distance >= lz->pos && !lz->is_full) + if (unlikely(distance >= lz->pos && !lz->is_full)) return false; // It also doesn't make sense to copy data farer than // the dictionary size. - if (distance >= lz->requested_size) + if (unlikely(distance >= lz->requested_size)) return false; // The caller must have checked these! diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index dfe83589..d4cefe0b 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -547,6 +547,9 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // Note that rep0 is known to have a safe value, thus we // don't need to check if we are wrapping the dictionary // when it isn't full yet. + if (unlikely(lz_is_empty(coder->lz))) + return true; + coder->lz.dict[coder->lz.pos] = lz_get_byte(coder->lz, rep0); ++coder->lz.pos; @@ -698,7 +701,6 @@ lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, { const lzma_ret ret = lzma_lz_decoder_reset( &next->coder->lz, allocator, &decode_real, - filters[0].uncompressed_size, options->dictionary_size, MATCH_MAX_LEN); if (ret != LZMA_OK) { lzma_literal_end(&next->coder->literal_coder, @@ -785,6 +787,15 @@ lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, } +extern void +lzma_lzma_decoder_uncompressed_size( + lzma_next_coder *next, lzma_vli uncompressed_size) +{ + next->coder->lz.uncompressed_size = uncompressed_size; + return; +} + + extern bool lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) { diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h index 929c2bff..9d57c7e5 100644 --- a/src/liblzma/lzma/lzma_decoder.h +++ b/src/liblzma/lzma/lzma_decoder.h @@ -24,10 +24,15 @@ #include "common.h" -/// \brief Allocates and initializes LZMA decoder +/// Allocates and initializes LZMA decoder extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters); +/// Set known uncompressed size. This is a hack needed to support +/// LZMA_Alone files that don't have EOPM. +extern void lzma_lzma_decoder_uncompressed_size( + lzma_next_coder *next, lzma_vli uncompressed_size); + /// \brief Decodes the LZMA Properties byte (lc/lp/pb) /// /// \return true if error occorred, false on success @@ -35,7 +40,4 @@ extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, extern bool lzma_lzma_decode_properties( lzma_options_lzma *options, uint8_t byte); -// There is no public lzma_lzma_encode() because lzma_lz_encode() works -// as a wrapper for it. - #endif diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c index e9674308..078f1b95 100644 --- a/src/liblzma/simple/simple_coder.c +++ b/src/liblzma/simple/simple_coder.c @@ -23,11 +23,7 @@ #include "simple_private.h" -/// Copied or encodes/decodes more data to out[]. Checks and updates -/// uncompressed_size when we are the last coder in the chain. -/// If we aren't the last filter in the chain, we don't need to care about -/// uncompressed size, since we don't change it; the next filter in the -/// chain will check it anyway. +/// Copied or encodes/decodes more data to out[]. static lzma_ret copy_or_code(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *restrict in, size_t *restrict in_pos, @@ -37,28 +33,12 @@ copy_or_code(lzma_coder *coder, lzma_allocator *allocator, assert(!coder->end_was_reached); if (coder->next.code == NULL) { - const size_t in_avail = in_size - *in_pos; - - if (!coder->is_encoder) { - // Limit in_size so that we don't copy too much. - if ((lzma_vli)(in_avail) > coder->uncompressed_size) - in_size = *in_pos + (size_t)( - coder->uncompressed_size); - } - - const size_t out_start = *out_pos; bufcpy(in, in_pos, in_size, out, out_pos, out_size); // Check if end of stream was reached. - if (coder->is_encoder) { - if (action == LZMA_FINISH && *in_pos == in_size) - coder->end_was_reached = true; - } else if (coder->uncompressed_size - != LZMA_VLI_VALUE_UNKNOWN) { - coder->uncompressed_size -= *out_pos - out_start; - if (coder->uncompressed_size == 0) - coder->end_was_reached = true; - } + if (coder->is_encoder && action == LZMA_FINISH + && *in_pos == in_size) + coder->end_was_reached = true; } else { // Call the next coder in the chain to provide us some data. @@ -283,7 +263,6 @@ lzma_simple_coder_init(lzma_next_coder *next, lzma_allocator *allocator, // Reset variables. next->coder->is_encoder = is_encoder; next->coder->end_was_reached = false; - next->coder->uncompressed_size = filters[0].uncompressed_size; next->coder->pos = 0; next->coder->filtered = 0; next->coder->size = 0; diff --git a/src/liblzma/simple/simple_private.h b/src/liblzma/simple/simple_private.h index a512396c..4e7a9db3 100644 --- a/src/liblzma/simple/simple_private.h +++ b/src/liblzma/simple/simple_private.h @@ -39,10 +39,6 @@ struct lzma_coder_s { /// is very small. bool is_encoder; - /// Size of the data *left* to be processed, or LZMA_VLI_VALUE_UNKNOWN - /// if unknown. - lzma_vli uncompressed_size; - /// Pointer to filter-specific function, which does /// the actual filtering. size_t (*filter)(lzma_simple *simple, uint32_t now_pos, diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c index 6f38caff..39ec35c1 100644 --- a/src/liblzma/subblock/subblock_decoder.c +++ b/src/liblzma/subblock/subblock_decoder.c @@ -53,9 +53,6 @@ struct lzma_coder_s { /// Number of bytes left in the current Subblock Data field. size_t size; - /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown. - lzma_vli uncompressed_size; - /// Number of consecutive Subblocks with Subblock Type Padding uint32_t padding; @@ -124,22 +121,6 @@ enum { }; -/// Substracts size from coder->uncompressed_size uncompressed size is known -/// and size isn't bigger than coder->uncompressed_size. -static inline bool -update_uncompressed_size(lzma_coder *coder, size_t size) -{ - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - if ((lzma_vli)(size) > coder->uncompressed_size) - return true; - - coder->uncompressed_size -= size; - } - - return false; -} - - /// Calls the subfilter and updates coder->uncompressed_size. static lzma_ret subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, @@ -149,17 +130,11 @@ subfilter_decode(lzma_coder *coder, lzma_allocator *allocator, { assert(coder->subfilter.code != NULL); - const size_t out_start = *out_pos; - // Call the subfilter. const lzma_ret ret = coder->subfilter.code( coder->subfilter.coder, allocator, in, in_pos, in_size, out, out_pos, out_size, action); - // Update uncompressed_size. - if (update_uncompressed_size(coder, *out_pos - out_start)) - return LZMA_DATA_ERROR; - return ret; } @@ -174,9 +149,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, || coder->sequence >= SEQ_DATA)) switch (coder->sequence) { case SEQ_FLAGS: { - if ((in[*in_pos] >> 4) != FLAG_PADDING) - coder->padding = 0; - // Do the correct action depending on the Subblock Type. switch (in[*in_pos] >> 4) { case FLAG_PADDING: @@ -188,6 +160,10 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; case FLAG_EOPM: + // There must be no Padding before EOPM. + if (coder->padding != 0) + return LZMA_DATA_ERROR; + // Check that reserved bits are zero. if (in[*in_pos] & 0x0F) return LZMA_DATA_ERROR; @@ -196,11 +172,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, if (coder->subfilter.code != NULL) return LZMA_DATA_ERROR; - // End of Payload Marker must not be used if - // uncompressed size is known. - if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) - return LZMA_DATA_ERROR; - ++*in_pos; return LZMA_STREAM_END; @@ -222,15 +193,16 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; case FLAG_SET_SUBFILTER: { - if ((in[*in_pos] & 0x0F) + if (coder->padding != 0 || (in[*in_pos] & 0x0F) || coder->subfilter.code != NULL || !coder->allow_subfilters) return LZMA_DATA_ERROR; assert(coder->filter_flags.options == NULL); - return_if_error(lzma_filter_flags_decoder_init( - &coder->filter_flags_decoder, - allocator, &coder->filter_flags)); + abort(); +// return_if_error(lzma_filter_flags_decoder_init( +// &coder->filter_flags_decoder, +// allocator, &coder->filter_flags)); coder->got_output_with_subfilter = false; @@ -240,7 +212,8 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } case FLAG_END_SUBFILTER: - if (coder->subfilter.code == NULL + if (coder->padding != 0 || (in[*in_pos] & 0x0F) + || coder->subfilter.code == NULL || !coder->got_output_with_subfilter) return LZMA_DATA_ERROR; @@ -276,9 +249,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, ++*in_pos; - if (coder->uncompressed_size == 0) - return LZMA_STREAM_END; - break; default: @@ -301,9 +271,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, // Initialize the Subfilter. Subblock and Copy filters are // not allowed. - if (coder->filter_flags.id == LZMA_FILTER_COPY - || coder->filter_flags.id - == LZMA_FILTER_SUBBLOCK) + if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK) return LZMA_DATA_ERROR; coder->helper.end_was_reached = false; @@ -327,8 +295,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, filters[1].id = LZMA_VLI_VALUE_UNKNOWN; return_if_error(lzma_raw_decoder_init( - &coder->subfilter, allocator, - filters, LZMA_VLI_VALUE_UNKNOWN, false)); + &coder->subfilter, allocator, filters)); coder->sequence = SEQ_FLAGS; break; @@ -385,7 +352,14 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, coder->repeat.count = coder->size; coder->repeat.size = (size_t)(in[*in_pos]) + 1; coder->repeat.pos = 0; + + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + if (coder->repeat.size <= coder->padding) + return LZMA_DATA_ERROR; + ++*in_pos; + coder->padding = 0; coder->sequence = SEQ_REPEAT_READ_DATA; break; @@ -415,6 +389,14 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } case SEQ_DATA: { + // The size of the Data field must be bigger than the number + // of Padding bytes before this Subblock. + assert(coder->size > 0); + if (coder->size <= coder->padding) + return LZMA_DATA_ERROR; + + coder->padding = 0; + // Limit the amount of input to match the available // Subblock Data size. size_t in_limit; @@ -429,10 +411,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, out, out_pos, out_size); coder->size -= copy_size; - - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - } else { const size_t in_start = *in_pos; const lzma_ret ret = subfilter_decode( @@ -467,11 +445,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, if (coder->size > 0) return LZMA_OK; - // Check if we have decoded all the data. - if (coder->uncompressed_size == 0 - && coder->subfilter.code == NULL) - return LZMA_STREAM_END; - coder->sequence = SEQ_FLAGS; break; } @@ -487,16 +460,8 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, *out_pos += copy_size; coder->repeat.count -= copy_size; - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - - if (coder->repeat.count == 0) { - assert(coder->subfilter.code == NULL); - if (coder->uncompressed_size == 0) - return LZMA_STREAM_END; - } else { + if (coder->repeat.count != 0) return LZMA_OK; - } coder->sequence = SEQ_FLAGS; break; @@ -515,15 +480,10 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } if (coder->subfilter.code == NULL) { - const size_t copy_size = bufcpy( - coder->repeat.buffer, + bufcpy(coder->repeat.buffer, &coder->repeat.pos, coder->repeat.size, out, out_pos, out_size); - - if (update_uncompressed_size(coder, copy_size)) - return LZMA_DATA_ERROR; - } else { const lzma_ret ret = subfilter_decode( coder, allocator, @@ -553,11 +513,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, } } while (*out_pos < out_size); - // Check if we have decoded all the data. - if (coder->uncompressed_size == 0 - && coder->subfilter.code == NULL) - return LZMA_STREAM_END; - break; default: @@ -664,7 +619,6 @@ lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->filter_flags.options = NULL; next->coder->sequence = SEQ_FLAGS; - next->coder->uncompressed_size = filters[0].uncompressed_size; next->coder->padding = 0; next->coder->next_finished = false; next->coder->this_finished = false; diff --git a/src/liblzma/subblock/subblock_decoder_helper.c b/src/liblzma/subblock/subblock_decoder_helper.c index 77d1f4bd..e8063e1e 100644 --- a/src/liblzma/subblock/subblock_decoder_helper.c +++ b/src/liblzma/subblock/subblock_decoder_helper.c @@ -62,14 +62,11 @@ lzma_subblock_decoder_helper_init(lzma_next_coder *next, // This is always the last filter in the chain. assert(filters[1].init == NULL); - // We never know uncompressed size. - assert(filters[0].uncompressed_size == LZMA_VLI_VALUE_UNKNOWN); - if (next->coder == NULL) { next->coder = lzma_alloc(sizeof(lzma_coder), allocator); if (next->coder == NULL) return LZMA_MEM_ERROR; - + next->code = &helper_decode; next->end = &helper_end; } diff --git a/src/liblzma/subblock/subblock_encoder.c b/src/liblzma/subblock/subblock_encoder.c index a8aedbd7..01e8007a 100644 --- a/src/liblzma/subblock/subblock_encoder.c +++ b/src/liblzma/subblock/subblock_encoder.c @@ -51,7 +51,6 @@ do { \ struct lzma_coder_s { lzma_next_coder next; bool next_finished; - bool use_eopm; enum { SEQ_FILL, @@ -636,9 +635,10 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, coder->subfilter.mode_locked = false; coder->sequence = SEQ_FILL; - } else if (coder->use_eopm) { + } else { assert(action == LZMA_FINISH); + // Write EOPM. // NOTE: No need to use write_byte() here // since we are finishing. out[*out_pos] = 0x10; @@ -797,7 +797,7 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator, return_if_error(lzma_raw_encoder_init( &coder->subfilter.subcoder, allocator, - options, LZMA_VLI_VALUE_UNKNOWN, false)); + options)); // Encode the Filter Flags field into a buffer. This should // never fail since we have already successfully initialized @@ -948,8 +948,6 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->next_finished = false; next->coder->sequence = SEQ_FILL; next->coder->options = filters[0].options; - next->coder->use_eopm = filters[0].uncompressed_size - == LZMA_VLI_VALUE_UNKNOWN; next->coder->pos = 0; next->coder->alignment.in_pos = 0; diff --git a/src/lzma/args.c b/src/lzma/args.c index 4393a6bd..a4764032 100644 --- a/src/lzma/args.c +++ b/src/lzma/args.c @@ -52,8 +52,7 @@ static size_t filter_count = 0; enum { - OPT_COPY = INT_MIN, - OPT_SUBBLOCK, + OPT_SUBBLOCK = INT_MIN, OPT_X86, OPT_POWERPC, OPT_IA64, @@ -97,7 +96,6 @@ static const struct option long_opts[] = { { "compress", no_argument, NULL, 'z' }, // Filters - { "copy", no_argument, NULL, OPT_COPY }, { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, { "x86", no_argument, NULL, OPT_X86 }, { "bcj", no_argument, NULL, OPT_X86 }, @@ -267,10 +265,6 @@ parse_real(int argc, char **argv) // Filter setup - case OPT_COPY: - add_filter(LZMA_FILTER_COPY, NULL); - break; - case OPT_SUBBLOCK: add_filter(LZMA_FILTER_SUBBLOCK, optarg); break; @@ -314,8 +308,6 @@ parse_real(int argc, char **argv) static const char *types[] = { "auto", "native", - "single", - "multi", "alone", // "gzip", NULL @@ -471,18 +463,6 @@ set_compression_settings(void) my_exit(ERROR); } - // Optimize the filter chain a little by removing all - // Copy filters. - for (size_t i = 0; opt_filters[i].id != LZMA_VLI_VALUE_UNKNOWN; ++i) { - while (opt_filters[i].id == LZMA_FILTER_COPY) { - size_t j = i; - do { - opt_filters[j] = opt_filters[j + 1]; - } while (opt_filters[++j].id - != LZMA_VLI_VALUE_UNKNOWN); - } - } - const uint32_t memory_limit = opt_memory / (1024 * 1024) + 1; uint32_t memory_usage = lzma_memory_usage(opt_filters, true); diff --git a/src/lzma/args.h b/src/lzma/args.h index 4f19a01e..c6098558 100644 --- a/src/lzma/args.h +++ b/src/lzma/args.h @@ -33,8 +33,6 @@ enum tool_mode { enum header_type { HEADER_AUTO, HEADER_NATIVE, - HEADER_SINGLE, - HEADER_MULTI, HEADER_ALONE, // HEADER_GZIP, }; diff --git a/src/lzma/error.c b/src/lzma/error.c index a83de27a..e5391068 100644 --- a/src/lzma/error.c +++ b/src/lzma/error.c @@ -55,6 +55,12 @@ str_strm_error(lzma_ret code) case LZMA_UNSUPPORTED_CHECK: return _("Unsupported integrity check type"); + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + default: return NULL; } diff --git a/src/lzma/process.c b/src/lzma/process.c index 56bcda9a..c180caf7 100644 --- a/src/lzma/process.c +++ b/src/lzma/process.c @@ -160,32 +160,16 @@ single_init(thread_data *t) lzma_ret ret; if (opt_mode == MODE_COMPRESS) { - const lzma_vli uncompressed_size - = t->pair->src_fd != STDIN_FILENO - ? (lzma_vli)(t->pair->src_st.st_size) - : LZMA_VLI_VALUE_UNKNOWN; - - // TODO Support Multi-Block Streams to store Extra. if (opt_header == HEADER_ALONE) { - lzma_options_alone alone; - alone.uncompressed_size = uncompressed_size; - memcpy(&alone.lzma, opt_filters[0].options, - sizeof(alone.lzma)); - ret = lzma_alone_encoder(&t->strm, &alone); + ret = lzma_alone_encoder(&t->strm, + opt_filters[0].options); } else { - lzma_options_stream stream = { - .check = opt_check, - .has_crc32 = opt_check != LZMA_CHECK_NONE, - .uncompressed_size = uncompressed_size, - .alignment = 0, - }; - memcpy(stream.filters, opt_filters, - sizeof(stream.filters)); - ret = lzma_stream_encoder_single(&t->strm, &stream); + ret = lzma_stream_encoder(&t->strm, + opt_filters, opt_check); } } else { // TODO Restrict file format if requested on the command line. - ret = lzma_auto_decoder(&t->strm, NULL, NULL); + ret = lzma_auto_decoder(&t->strm); } if (ret != LZMA_OK) { diff --git a/src/lzmadec/lzmadec.c b/src/lzmadec/lzmadec.c index a1383842..1fc561b7 100644 --- a/src/lzmadec/lzmadec.c +++ b/src/lzmadec/lzmadec.c @@ -284,9 +284,7 @@ parse_options(int argc, char **argv) case OPTION_FORMAT: { if (strcmp("auto", optarg) == 0) { format_type = FORMAT_AUTO; - } else if (strcmp("native", optarg) == 0 - || strcmp("single", optarg) == 0 - || strcmp("multi", optarg) == 0) { + } else if (strcmp("native", optarg) == 0) { format_type = FORMAT_NATIVE; } else if (strcmp("alone", optarg) == 0) { format_type = FORMAT_ALONE; @@ -315,11 +313,11 @@ init(void) switch (format_type) { case FORMAT_AUTO: - ret = lzma_auto_decoder(&strm, NULL, NULL); + ret = lzma_auto_decoder(&strm); break; case FORMAT_NATIVE: - ret = lzma_stream_decoder(&strm, NULL, NULL); + ret = lzma_stream_decoder(&strm); break; case FORMAT_ALONE: -- cgit v1.2.3