diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 20:46:52 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 20:46:52 +0200 |
commit | e114502b2bc371e4a45449832cb69be036360722 (patch) | |
tree | 449c41d0408f99926de202611091747f1fbe2f85 /src | |
parent | Fixed the test that should have been fixed as part (diff) | |
download | xz-e114502b2bc371e4a45449832cb69be036360722.tar.xz |
Oh well, big messy commit again. Some highlights:
- Updated to the latest, probably final file format version.
- Command line tool reworked to not use threads anymore.
Threading will probably go into liblzma anyway.
- Memory usage limit is now about 30 % for uncompression
and about 90 % for compression.
- Progress indicator with --verbose
- Simplified --help and full --long-help
- Upgraded to the last LGPLv2.1+ getopt_long from gnulib.
- Some bug fixes
Diffstat (limited to '')
50 files changed, 2963 insertions, 2258 deletions
diff --git a/src/common/bswap.h b/src/common/bswap.h index 8f82a8f4..f5cb8345 100644 --- a/src/common/bswap.h +++ b/src/common/bswap.h @@ -16,20 +16,29 @@ // NOTE: We assume that config.h is already #included. -// byteswap.h is a GNU extension. It contains inline assembly versions -// for byteswapping. When byteswap.h is not available, we use generic code. +// At least glibc has byteswap.h which contains inline assembly code for +// byteswapping. Some systems have byteswap.h but lack one or more of the +// bswap_xx macros/functions, which is why we check them separately even +// if byteswap.h is available. + #ifdef HAVE_BYTESWAP_H # include <byteswap.h> -#else +#endif + +#ifndef HAVE_BSWAP_16 # define bswap_16(num) \ (((num) << 8) | ((num) >> 8)) +#endif +#ifndef HAVE_BSWAP_32 # define bswap_32(num) \ ( (((num) << 24) ) \ | (((num) << 8) & UINT32_C(0x00FF0000)) \ | (((num) >> 8) & UINT32_C(0x0000FF00)) \ | (((num) >> 24) ) ) +#endif +#ifndef HAVE_BSWAP_64 # define bswap_64(num) \ ( (((num) << 56) ) \ | (((num) << 40) & UINT64_C(0x00FF000000000000)) \ diff --git a/src/common/physmem.h b/src/common/physmem.h index 597227ac..04a7ab4b 100644 --- a/src/common/physmem.h +++ b/src/common/physmem.h @@ -23,6 +23,10 @@ # endif #endif +#if defined(HAVE_PHYSMEM_SYSCONF) || defined(HAVE_NCPU_SYSCONF) +# include <unistd.h> +#endif + /// \brief Get the amount of physical memory in bytes /// diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h index 7f935f67..47a49fde 100644 --- a/src/common/sysdefs.h +++ b/src/common/sysdefs.h @@ -111,6 +111,7 @@ #endif #include <stdlib.h> +#include <assert.h> // Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written // so that it works with fake bool type, for example: @@ -134,17 +135,6 @@ typedef unsigned char _Bool; # define __bool_true_false_are_defined 1 #endif -#ifdef HAVE_ASSERT_H -# include <assert.h> -#else -# ifdef NDEBUG -# define assert(x) -# else - // TODO: Pretty bad assert macro. -# define assert(x) (!(x) && abort()) -# endif -#endif - // string.h should be enough but let's include strings.h and memory.h too if // they exists, since that shouldn't do any harm, but may improve portability. #ifdef HAVE_STRING_H diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h index eb3768e2..06c1633c 100644 --- a/src/liblzma/api/lzma/block.h +++ b/src/liblzma/api/lzma/block.h @@ -1,6 +1,6 @@ /** * \file lzma/block.h - * \brief .lzma Block handling + * \brief .xz Block handling * * \author Copyright (C) 1999-2006 Igor Pavlov * \author Copyright (C) 2007 Lasse Collin @@ -131,11 +131,10 @@ typedef struct { * * \note Because of the array is terminated with * .id = LZMA_VLI_UNKNOWN, the actual array must - * have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block + * have LZMA_FILTERS_MAX + 1 members or the Block * Header decoder will overflow the buffer. */ lzma_filter *filters; -# define LZMA_BLOCK_FILTERS_MAX 4 } lzma_block; @@ -148,6 +147,8 @@ typedef struct { * The size can be calculated from the first byte of a Block using this macro. * Note that if the first byte is 0x00, it indicates beginning of Index; use * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. */ #define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) @@ -211,38 +212,50 @@ extern lzma_ret lzma_block_header_decode(lzma_block *options, /** - * \brief Sets Compressed Size according to Total Size + * \brief Sets Compressed Size according to Unpadded Size * - * Block Header stores Compressed Size, but Index has Total Size. If the + * Block Header stores Compressed Size, but Index has Unpadded Size. If the * application has already parsed the Index and is now decoding Blocks, - * it can calculate Compressed Size from Total Size. This function does + * it can calculate Compressed Size from Unpadded Size. This function does * exactly that with error checking, so application doesn't need to check, * for example, if the value in Index is too small to contain even the - * Block Header. Note that you need to call this function after decoding + * Block Header. Note that you need to call this function _after_ decoding * the Block Header field. * * \return - LZMA_OK: options->compressed_size was set successfully. - * - LZMA_DATA_ERROR: total_size is too small compared to + * - LZMA_DATA_ERROR: unpadded_size is too small compared to * options->header_size and lzma_check_sizes[options->check]. * - LZMA_PROG_ERROR: Some values are invalid. For example, - * total_size and options->header_size must be multiples - * of four, total_size must be at least 12, and + * options->header_size must be a multiple of four, and * options->header_size between 8 and 1024 inclusive. */ -extern lzma_ret lzma_block_total_size_set( - lzma_block *options, lzma_vli total_size) +extern lzma_ret lzma_block_compressed_size( + lzma_block *options, lzma_vli unpadded_size) lzma_attr_warn_unused_result; /** - * \brief Calculates Total Size + * \brief Calculates Unpadded Size * - * This function can be useful after decoding a Block to get Total Size + * This function can be useful after decoding a Block to get Unpadded Size * that is stored in Index. * - * \return Total Size on success, or zero on error. + * \return Unpadded Size on success, or zero on error. + */ +extern lzma_vli lzma_block_unpadded_size(const lzma_block *options) + lzma_attr_pure; + + +/** + * \brief Calculates the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. */ -extern lzma_vli lzma_block_total_size_get(const lzma_block *options) +extern lzma_vli lzma_block_total_size(const lzma_block *options) lzma_attr_pure; @@ -255,8 +268,6 @@ extern lzma_vli lzma_block_total_size_get(const lzma_block *options) * \return - LZMA_OK: All good, continue with lzma_code(). * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR - * - LZMA_DATA_ERROR: Limits (total_limit and uncompressed_limit) - * have been reached already. * - LZMA_UNSUPPORTED_CHECK: options->check specfies a Check * that is not supported by this buid of liblzma. Initializing * the encoder failed. diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h index 53e5737e..b4fb02a7 100644 --- a/src/liblzma/api/lzma/filter.h +++ b/src/liblzma/api/lzma/filter.h @@ -55,6 +55,14 @@ typedef struct { /** + * \brief Maximum number of filters in a chain + * + * FIXME desc + */ +#define LZMA_FILTERS_MAX 4 + + +/** * \brief Test if the given Filter ID is supported for encoding * * Returns true if the give Filter ID is supported for encoding by this diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h index 522969d4..d6072614 100644 --- a/src/liblzma/api/lzma/index.h +++ b/src/liblzma/api/lzma/index.h @@ -32,12 +32,24 @@ typedef struct lzma_index_s lzma_index; */ typedef struct { /** - * Total Size of a Block. + * \brief Total encoded size of a Block including Block Padding + * + * This value is useful if you need to know the actual size of the + * Block that the Block decoder will read. */ lzma_vli total_size; /** - * Uncompressed Size of a Block + * \brief Encoded size of a Block excluding Block Padding + * + * This value is stored in the Index. When doing random-access + * reading, you should give this value to the Block decoder along + * with uncompressed_size. + */ + lzma_vli unpadded_size; + + /** + * \brief Uncompressed Size of a Block */ lzma_vli uncompressed_size; @@ -80,7 +92,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); * \brief Add a new Record to an Index * * \param index Pointer to a lzma_index structure - * \param total_size Total Size of a Block + * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block, or * LZMA_VLI_UNKNOWN to indicate padding. * @@ -92,7 +104,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); * - LZMA_PROG_ERROR */ extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) + lzma_vli unpadded_size, lzma_vli uncompressed_size) lzma_attr_warn_unused_result; diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h index 58fc8061..001e6b5c 100644 --- a/src/liblzma/api/lzma/index_hash.h +++ b/src/liblzma/api/lzma/index_hash.h @@ -57,7 +57,7 @@ extern void lzma_index_hash_end( * \brief Add a new Record to an Index hash * * \param index Pointer to a lzma_index_hash structure - * \param total_size Total Size of a Block + * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block * * \return - LZMA_OK @@ -67,7 +67,7 @@ extern void lzma_index_hash_end( * used when lzma_index_hash_decode() has already been used. */ extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash, - lzma_vli total_size, lzma_vli uncompressed_size) + lzma_vli unpadded_size, lzma_vli uncompressed_size) lzma_attr_warn_unused_result; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index f9101c7d..2bfe0b92 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -33,13 +33,13 @@ struct lzma_coder_s { lzma_next_coder next; /// Decoding options; we also write Compressed Size and Uncompressed - /// Size back to this structure when the encoding has been finished. + /// Size back to this structure when the decoding has been finished. lzma_block *options; - /// Compressed Size calculated while encoding + /// Compressed Size calculated while decoding lzma_vli compressed_size; - /// Uncompressed Size calculated while encoding + /// Uncompressed Size calculated while decoding lzma_vli uncompressed_size; /// Maximum allowed Compressed Size; this takes into account the @@ -110,6 +110,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + coder->sequence = SEQ_PADDING; } @@ -118,30 +131,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, case SEQ_PADDING: // Compressed Data is padded to a multiple of four bytes. while (coder->compressed_size & 3) { + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->options already, and won't be modified by + // us anymore. + ++coder->compressed_size; + if (*in_pos >= in_size) return LZMA_OK; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - - if (update_size(&coder->compressed_size, 1, - coder->compressed_limit)) - return LZMA_DATA_ERROR; } - // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values given to us. - if (!is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->options->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; @@ -193,14 +195,11 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, { lzma_next_coder_init(lzma_block_decoder_init, next, allocator); - // While lzma_block_total_size_get() is meant to calculate the Total - // Size, it also validates the options excluding the filters. - if (lzma_block_total_size_get(options) == 0) - return LZMA_PROG_ERROR; - - // options->check is used for array indexing so we need to know that - // it is in the valid range. - if ((unsigned)(options->check) > LZMA_CHECK_ID_MAX) + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(options) == 0 + || !lzma_vli_is_valid(options->uncompressed_size)) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -221,8 +220,8 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->uncompressed_size = 0; // If Compressed Size is not known, we calculate the maximum allowed - // value so that Total Size of the Block still is a valid VLI and - // a multiple of four. + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. next->coder->compressed_limit = options->compressed_size == LZMA_VLI_UNKNOWN ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 3c678f7d..6468cb44 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -27,8 +27,8 @@ /// take into account the headers etc. to determine the exact maximum size /// of the Compressed Data field, but the complexity would give us nothing /// useful. Instead, limit the size of Compressed Data so that even with -/// biggest possible Block Header and Check fields the total size of the -/// Block stays as valid VLI. This way we don't produce incorrect output +/// biggest possible Block Header and Check fields the total encoded size of +/// the Block stays as valid VLI. This way we don't produce incorrect output /// if someone will really try creating a Block of 8 EiB. /// /// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of @@ -41,9 +41,9 @@ struct lzma_coder_s { /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Encoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Encoding options; we also write Unpadded Size, Compressed Size, + /// and Uncompressed Size back to this structure when the encoding + /// has been finished. lzma_block *options; enum { @@ -58,8 +58,8 @@ struct lzma_coder_s { /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Position when writing out the Check field - size_t check_pos; + /// Position in Block Padding and the Check fields + size_t pos; /// Check of the uncompressed data lzma_check_state check; @@ -106,6 +106,11 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, assert(*in_pos == in_size); assert(action == LZMA_FINISH); + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + coder->sequence = SEQ_PADDING; } @@ -113,28 +118,21 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, case SEQ_PADDING: // Pad Compressed Data to a multiple of four bytes. - while (coder->compressed_size & 3) { + while ((coder->compressed_size + coder->pos) & 3) { if (*out_pos >= out_size) return LZMA_OK; out[*out_pos] = 0x00; ++*out_pos; - - // No need to use check for overflow here since we - // have already checked in SEQ_CODE that Compressed - // Size will stay in proper limits. - ++coder->compressed_size; + ++coder->pos; } - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->options->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); + + coder->pos = 0; coder->sequence = SEQ_CHECK; // Fall through @@ -144,11 +142,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, = lzma_check_size(coder->options->check); while (*out_pos < out_size) { - out[*out_pos] = coder->check.buffer.u8[ - coder->check_pos]; + out[*out_pos] = coder->check.buffer.u8[coder->pos]; ++*out_pos; - if (++coder->check_pos == check_size) + if (++coder->pos == check_size) return LZMA_STREAM_END; } @@ -199,9 +196,9 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->options = options; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; + next->coder->pos = 0; // Initialize the check - next->coder->check_pos = 0; lzma_check_init(&next->coder->check, options->check); // Initialize the requested filters. diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 3b8e9f36..8421ac37 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -27,7 +27,7 @@ free_properties(lzma_block *options, lzma_allocator *allocator) // Free allocated filter options. The last array member is not // touched after the initialization in the beginning of // lzma_block_header_decode(), so we don't need to touch that here. - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) { lzma_free(options->filters[i].options, allocator); options->filters[i].id = LZMA_VLI_UNKNOWN; options->filters[i].options = NULL; @@ -48,24 +48,19 @@ lzma_block_header_decode(lzma_block *options, // Initialize the filter options array. This way the caller can // safely free() the options even if an error occurs in this function. - for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { options->filters[i].id = LZMA_VLI_UNKNOWN; options->filters[i].options = NULL; } - size_t in_size = options->header_size; - - // Validate. The caller must have set options->header_size with - // lzma_block_header_size_decode() macro, so it is a programming error - // if these tests fail. - if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN - || in_size > LZMA_BLOCK_HEADER_SIZE_MAX - || (in_size & 3) - || lzma_block_header_size_decode(in[0]) != in_size) + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != options->header_size + || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return LZMA_PROG_ERROR; // Exclude the CRC32 field. - in_size -= 4; + const size_t in_size = options->header_size - 4; // Verify CRC32 if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) @@ -83,15 +78,9 @@ lzma_block_header_decode(lzma_block *options, return_if_error(lzma_vli_decode(&options->compressed_size, NULL, in, &in_pos, in_size)); - if (options->compressed_size > LZMA_VLI_MAX / 4 - 1) - return LZMA_DATA_ERROR; - - options->compressed_size = (options->compressed_size + 1) * 4; - - // Check that Total Size (that is, size of - // Block Header + Compressed Data + Check) is - // representable as a VLI. - if (lzma_block_total_size_get(options) == 0) + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(options) == 0) return LZMA_DATA_ERROR; } else { options->compressed_size = LZMA_VLI_UNKNOWN; diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 9326350b..b9980363 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -25,21 +25,20 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_block *options) { // Block Header Size + Block Flags + CRC32. - size_t size = 1 + 1 + 4; + uint32_t size = 1 + 1 + 4; // Compressed Size if (options->compressed_size != LZMA_VLI_UNKNOWN) { - if (options->compressed_size > LZMA_VLI_MAX / 4 - 1 - || options->compressed_size == 0 - || (options->compressed_size & 3)) + const uint32_t add = lzma_vli_size(options->compressed_size); + if (add == 0 || options->compressed_size == 0) return LZMA_PROG_ERROR; - size += lzma_vli_size(options->compressed_size / 4 - 1); + size += add; } // Uncompressed Size if (options->uncompressed_size != LZMA_VLI_UNKNOWN) { - const size_t add = lzma_vli_size(options->uncompressed_size); + const uint32_t add = lzma_vli_size(options->uncompressed_size); if (add == 0) return LZMA_PROG_ERROR; @@ -51,10 +50,9 @@ lzma_block_header_size(lzma_block *options) || options->filters[0].id == LZMA_VLI_UNKNOWN) return LZMA_PROG_ERROR; - for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; - ++i) { + for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 4) + if (i == LZMA_FILTERS_MAX) return LZMA_PROG_ERROR; uint32_t add; @@ -65,12 +63,13 @@ lzma_block_header_size(lzma_block *options) } // Pad to a multiple of four bytes. - options->header_size = (size + 3) & ~(size_t)(3); + options->header_size = (size + 3) & ~UINT32_C(3); - // NOTE: We don't verify that Total Size of the Block stays within - // limits. This is because it is possible that we are called with - // exaggerated values to reserve space for Block Header, and later - // called again with lower, real values. + // NOTE: We don't verify that the encoded size of the Block stays + // within limits. This is because it is possible that we are called + // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve + // space for Block Header, and later called again with lower, + // real values. return LZMA_OK; } @@ -79,9 +78,9 @@ lzma_block_header_size(lzma_block *options) extern LZMA_API lzma_ret lzma_block_header_encode(const lzma_block *options, uint8_t *out) { - if ((options->header_size & 3) - || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN - || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) + // Valdidate everything but filters. + if (lzma_block_unpadded_size(options) == 0 + || !lzma_vli_is_valid(options->uncompressed_size)) return LZMA_PROG_ERROR; // Indicate the size of the buffer _excluding_ the CRC32 field. @@ -90,32 +89,28 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out) // Store the Block Header Size. out[0] = out_size / 4; - // We write Block Flags a little later. + // We write Block Flags in pieces. + out[1] = 0x00; size_t out_pos = 2; // Compressed Size if (options->compressed_size != LZMA_VLI_UNKNOWN) { - // Compressed Size must be non-zero, fit into a 63-bit - // integer and be a multiple of four. Also the Total Size - // of the Block must fit into 63-bit integer. - if (options->compressed_size == 0 - || (options->compressed_size & 3) - || options->compressed_size - > LZMA_VLI_MAX - || lzma_block_total_size_get(options) == 0) - return LZMA_PROG_ERROR; - return_if_error(lzma_vli_encode( - options->compressed_size / 4 - 1, NULL, + options->compressed_size, NULL, out, &out_pos, out_size)); + + out[1] |= 0x40; } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_UNKNOWN) + if (options->uncompressed_size != LZMA_VLI_UNKNOWN) { return_if_error(lzma_vli_encode( options->uncompressed_size, NULL, out, &out_pos, out_size)); + out[1] |= 0x80; + } + // Filter Flags if (options->filters == NULL || options->filters[0].id == LZMA_VLI_UNKNOWN) @@ -124,24 +119,16 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out) size_t filter_count = 0; do { // There can be at maximum of four filters. - if (filter_count == 4) + if (filter_count == LZMA_FILTERS_MAX) return LZMA_PROG_ERROR; return_if_error(lzma_filter_flags_encode( options->filters + filter_count, out, &out_pos, out_size)); - } while (options->filters[++filter_count].id - != LZMA_VLI_UNKNOWN); - - // Block Flags - out[1] = filter_count - 1; + } while (options->filters[++filter_count].id != LZMA_VLI_UNKNOWN); - if (options->compressed_size != LZMA_VLI_UNKNOWN) - out[1] |= 0x40; - - if (options->uncompressed_size != LZMA_VLI_UNKNOWN) - out[1] |= 0x80; + out[1] |= filter_count - 1; // Padding memzero(out + out_pos, out_size - out_pos); diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c index 7b46ba32..66e1cad9 100644 --- a/src/liblzma/common/block_util.c +++ b/src/liblzma/common/block_util.c @@ -18,10 +18,11 @@ /////////////////////////////////////////////////////////////////////////////// #include "common.h" +#include "index.h" extern LZMA_API lzma_ret -lzma_block_total_size_set(lzma_block *options, lzma_vli total_size) +lzma_block_compressed_size(lzma_block *options, lzma_vli total_size) { // Validate. if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN @@ -45,29 +46,47 @@ lzma_block_total_size_set(lzma_block *options, lzma_vli total_size) extern LZMA_API lzma_vli -lzma_block_total_size_get(const lzma_block *options) +lzma_block_unpadded_size(const lzma_block *options) { - // Validate the values that we are interested in. + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX || (options->header_size & 3) - || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + || !lzma_vli_is_valid(options->compressed_size) + || options->compressed_size == 0 + || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return 0; // If Compressed Size is unknown, return that we cannot know - // Total Size either. + // size of the Block either. if (options->compressed_size == LZMA_VLI_UNKNOWN) return LZMA_VLI_UNKNOWN; - const lzma_vli total_size = options->compressed_size - + options->header_size - + lzma_check_size(options->check); + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = options->compressed_size + + options->header_size + + lzma_check_size(options->check); - // Validate the calculated Total Size. - if (options->compressed_size > LZMA_VLI_MAX - || (options->compressed_size & 3) - || total_size > LZMA_VLI_MAX) + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) return 0; - return total_size; + return unpadded_size; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size(const lzma_block *options) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(options); + + if (unpadded_size != 0 && unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; } diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 275cf05f..0ee8574c 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -66,10 +66,6 @@ | LZMA_CONCATENATED ) -/////////// -// Types // -/////////// - /// Type of encoder/decoder specific data; the actual structure is defined /// differently in different coders. typedef struct lzma_coder_s lzma_coder; @@ -187,10 +183,6 @@ struct lzma_internal_s { }; -/////////////// -// Functions // -/////////////// - /// Allocates memory extern void *lzma_alloc(size_t size, lzma_allocator *allocator) lzma_attribute((malloc)); diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c index 71ceeca0..03b6859a 100644 --- a/src/liblzma/common/filter_common.c +++ b/src/liblzma/common/filter_common.c @@ -164,7 +164,7 @@ validate_chain(const lzma_filter *filters, size_t *count) // There must be 1-4 filters. The last filter must be usable as // the last filter in the chain. At maximum of three filters are // allowed to change the size of the data. - if (i > LZMA_BLOCK_FILTERS_MAX || !last_ok || changes_size_count > 3) + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) return LZMA_OPTIONS_ERROR; *count = i; @@ -182,7 +182,7 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, return_if_error(validate_chain(options, &count)); // Set the filter functions and copy the options pointer. - lzma_filter_info filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { // The order of the filters is reversed in the diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index f965749f..1fe65650 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -20,24 +20,34 @@ #include "index.h" -/// Number of Records to allocate at once. +/// Number of Records to allocate at once in the unrolled list. #define INDEX_GROUP_SIZE 256 typedef struct lzma_index_group_s lzma_index_group; struct lzma_index_group_s { - /// Next group + /// Previous group lzma_index_group *prev; - /// Previous group + /// Next group lzma_index_group *next; /// Index of the last Record in this group size_t last; - /// Total Size fields as cumulative sum relative to the beginning - /// of the group. The total size of the group is total_sums[last]. - lzma_vli total_sums[INDEX_GROUP_SIZE]; + /// Unpadded Size fields as special cumulative sum relative to the + /// beginning of the group. It's special in sense that the previous + /// value is rounded up the next multiple of four with before + /// calculating the new value. The total encoded size of the Blocks + /// in the group is unpadded_sums[last] rounded up to the next + /// multiple of four. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored + /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total + /// encoded size of these Blocks is 184. + /// + /// This encoding is nice from point of view of lzma_index_locate(). + lzma_vli unpadded_sums[INDEX_GROUP_SIZE]; /// Uncompressed Size fields as cumulative sum relative to the /// beginning of the group. The uncompressed size of the group is @@ -56,19 +66,13 @@ struct lzma_index_s { /// Uncompressed size of the Stream lzma_vli uncompressed_size; - /// Number of non-padding records. This is needed by Index encoder. + /// Number of non-padding records. This is needed for Index encoder. lzma_vli count; /// Size of the List of Records field; this is updated every time /// a new non-padding Record is added. lzma_vli index_list_size; - /// This is zero if no Indexes have been combined with - /// lzma_index_cat(). With combined Indexes, this contains the sizes - /// of all but latest the Streams, including possible Stream Padding - /// fields. - lzma_vli padding_size; - /// First group of Records lzma_index_group *head; @@ -80,8 +84,8 @@ struct lzma_index_s { /// Group where the current read position is. lzma_index_group *group; - /// The most recently read record in *group - lzma_vli record; + /// The most recently read Record in *group + size_t record; /// Uncompressed offset of the beginning of *group relative /// to the beginning of the Stream @@ -102,6 +106,10 @@ struct lzma_index_s { /// Stream. This is needed when a new Index is concatenated /// to this lzma_index structure. lzma_vli index_list_size; + + /// Total size of all but the last Stream and all Stream + /// Padding fields. + lzma_vli streams_size; } old; }; @@ -136,12 +144,12 @@ lzma_index_init(lzma_index *i, lzma_allocator *allocator) i->uncompressed_size = 0; i->count = 0; i->index_list_size = 0; - i->padding_size = 0; i->head = NULL; i->tail = NULL; i->current.group = NULL; i->old.count = 0; i->old.index_list_size = 0; + i->old.streams_size = 0; return i; } @@ -195,12 +203,12 @@ lzma_index_file_size(const lzma_index *i) { // If multiple Streams are concatenated, the Stream Header, Index, // and Stream Footer fields of all but the last Stream are already - // included in padding_size. Thus, we need to calculate only the + // included in old.streams_size. Thus, we need to calculate only the // size of the last Index, not all Indexes. - return i->total_size + i->padding_size + return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size + index_size(i->count - i->old.count, i->index_list_size - i->old.index_list_size) - + LZMA_STREAM_HEADER_SIZE * 2; + + LZMA_STREAM_HEADER_SIZE; } @@ -219,10 +227,11 @@ lzma_index_padding_size(const lzma_index *i) } -/// Helper function for index_append() +/// Appends a new Record to the Index. If needed, this allocates a new +/// Record group. static lzma_ret index_append_real(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size, + lzma_vli unpadded_size, lzma_vli uncompressed_size, bool is_padding) { // Add the new record. @@ -237,7 +246,7 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, g->prev = i->tail; g->next = NULL; g->last = 0; - g->total_sums[0] = total_size; + g->unpadded_sums[0] = unpadded_size; g->uncompressed_sums[0] = uncompressed_size; g->paddings[0] = is_padding; @@ -252,9 +261,9 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } else { // i->tail has space left for at least one record. - i->tail->total_sums[i->tail->last + 1] - = i->tail->total_sums[i->tail->last] - + total_size; + i->tail->unpadded_sums[i->tail->last + 1] + = unpadded_size + vli_ceil4( + i->tail->unpadded_sums[i->tail->last]); i->tail->uncompressed_sums[i->tail->last + 1] = i->tail->uncompressed_sums[i->tail->last] + uncompressed_size; @@ -266,13 +275,14 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } -static lzma_ret -index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, - lzma_vli uncompressed_size, bool is_padding) +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) { - if (total_size > LZMA_VLI_MAX + if (unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; // This looks a bit ugly. We want to first validate that the Index // and Stream stay in valid limits after adding this Record. After @@ -280,65 +290,38 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, // slightly more correct to validate before allocating, YMMV). lzma_ret ret; - if (is_padding) { - assert(uncompressed_size == 0); + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); - // First update the info so we can validate it. - i->padding_size += total_size; - - if (i->padding_size > LZMA_VLI_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, true); - - // If something went wrong, undo the updated value. - if (ret != LZMA_OK) - i->padding_size -= total_size; + const lzma_vli total_size = vli_ceil4(unpadded_size); - } else { - // First update the overall info so we can validate it. - const lzma_vli index_list_size_add - = lzma_vli_size(total_size / 4 - 1) - + lzma_vli_size(uncompressed_size); - - i->total_size += total_size; - i->uncompressed_size += uncompressed_size; - ++i->count; - i->index_list_size += index_list_size_add; - - if (i->total_size > LZMA_VLI_MAX - || i->uncompressed_size > LZMA_VLI_MAX - || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, false); + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; - if (ret != LZMA_OK) { - // Something went wrong. Undo the updates. - i->total_size -= total_size; - i->uncompressed_size -= uncompressed_size; - --i->count; - i->index_list_size -= index_list_size_add; - } + if (i->total_size > LZMA_VLI_MAX + || i->uncompressed_size > LZMA_VLI_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, unpadded_size, + uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } return ret; } -extern LZMA_API lzma_ret -lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - return index_append(i, allocator, - total_size, uncompressed_size, false); -} - - /// Initialize i->current to point to the first Record. static bool init_current(lzma_index *i) @@ -370,10 +353,10 @@ previous_group(lzma_index *i) i->current.record = i->current.group->last; // Then update the offsets. - i->current.stream_offset -= i->current.group - ->total_sums[i->current.group->last]; - i->current.uncompressed_offset -= i->current.group - ->uncompressed_sums[i->current.group->last]; + i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); + i->current.uncompressed_offset -= i->current.group->uncompressed_sums[ + i->current.group->last]; return; } @@ -386,8 +369,8 @@ next_group(lzma_index *i) assert(i->current.group->next != NULL); // Update the offsets first. - i->current.stream_offset += i->current.group - ->total_sums[i->current.group->last]; + i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); i->current.uncompressed_offset += i->current.group ->uncompressed_sums[i->current.group->last]; @@ -403,30 +386,39 @@ next_group(lzma_index *i) static void set_info(const lzma_index *i, lzma_index_record *info) { - info->total_size = i->current.group->total_sums[i->current.record]; + // First copy the cumulative sizes from the current Record of the + // current group. + info->unpadded_size + = i->current.group->unpadded_sums[i->current.record]; + info->total_size = vli_ceil4(info->unpadded_size); info->uncompressed_size = i->current.group->uncompressed_sums[ i->current.record]; + // Copy the start offsets of this group. info->stream_offset = i->current.stream_offset; info->uncompressed_offset = i->current.uncompressed_offset; // If it's not the first Record in this group, we need to do some // adjustements. if (i->current.record > 0) { - // _sums[] are cumulative, thus we need to substract the - // _previous _sums[] to get the sizes of this Record. - info->total_size -= i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_size -= i->current.group + // Since the _sums[] are cumulative, we substract the sums of + // the previous Record to get the sizes of the current Record, + // and add the sums of the previous Record to the offsets. + // With unpadded_sums[] we need to take into account that it + // uses a bit weird way to do the cumulative summing + const lzma_vli total_sum + = vli_ceil4(i->current.group->unpadded_sums[ + i->current.record - 1]); + + const lzma_vli uncompressed_sum = i->current.group ->uncompressed_sums[i->current.record - 1]; - // i->current.{total,uncompressed}_offsets have the offset - // of the beginning of the group, thus we need to add the - // appropriate amount to get the offsetes of this Record. - info->stream_offset += i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_offset += i->current.group - ->uncompressed_sums[i->current.record - 1]; + info->total_size -= total_sum; + info->unpadded_size -= total_sum; + info->uncompressed_size -= uncompressed_sum; + + info->stream_offset += total_sum; + info->uncompressed_offset += uncompressed_sum; } return; @@ -548,11 +540,22 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Check that the combined size of the Indexes stays within limits. { + const lzma_vli dest_size = index_size_unpadded( + dest->count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Check that the combined size of the "files" (combined total + // encoded sizes) stays within limits. + { const lzma_vli dest_size = lzma_index_file_size(dest); const lzma_vli src_size = lzma_index_file_size(src); - if (dest_size + src_size > LZMA_VLI_UNKNOWN + if (dest_size + src_size > LZMA_VLI_MAX || dest_size + src_size + padding - > LZMA_VLI_UNKNOWN) + > LZMA_VLI_MAX) return LZMA_DATA_ERROR; } @@ -561,17 +564,37 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // // NOTE: This cannot overflow, because Index Size is always // far smaller than LZMA_VLI_MAX, and adding two VLIs - // (Index Size and padding) doesn't overflow. It may become - // an invalid VLI if padding is huge, but that is caught by - // index_append(). + // (Index Size and padding) doesn't overflow. padding += index_size(dest->count - dest->old.count, dest->index_list_size - dest->old.index_list_size) + LZMA_STREAM_HEADER_SIZE * 2; + // While the above cannot overflow, but it may become an invalid VLI. + if (padding > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + // Add the padding Record. - return_if_error(index_append( - dest, allocator, padding, 0, true)); + { + lzma_ret ret; + + // First update the info so we can validate it. + dest->old.streams_size += padding; + + if (dest->old.streams_size > LZMA_VLI_MAX + || lzma_index_file_size(dest) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(dest, allocator, + padding, 0, true); + + // If something went wrong, undo the updated value and return + // the error. + if (ret != LZMA_OK) { + dest->old.streams_size -= padding; + return ret; + } + } // Avoid wasting lots of memory if src->head has only a few records // that fit into dest->tail. That is, combine two groups if possible. @@ -581,9 +604,10 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, if (src->head != NULL && src->head->last + 1 <= INDEX_GROUP_SIZE - dest->tail->last - 1) { // Copy the first Record. - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[0]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[0]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[dest->tail->last] @@ -596,10 +620,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Copy the rest. for (size_t i = 1; i < src->head->last; ++i) { - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[i + 1] - - src->head->total_sums[i]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[i + 1] + - src->head->unpadded_sums[i]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[ @@ -636,13 +661,13 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, dest->old.count = dest->count + src->old.count; dest->old.index_list_size = dest->index_list_size + src->old.index_list_size; + dest->old.streams_size += src->old.streams_size; // Update overall information. dest->total_size += src->total_size; dest->uncompressed_size += src->uncompressed_size; dest->count += src->count; dest->index_list_size += src->index_list_size; - dest->padding_size += src->padding_size; // *src has nothing left but the base structure. lzma_free(src, allocator); @@ -690,7 +715,7 @@ lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) // Copy the arrays so that we don't read uninitialized memory. const size_t count = src_group->last + 1; - memcpy(dest_group->total_sums, src_group->total_sums, + memcpy(dest_group->unpadded_sums, src_group->unpadded_sums, sizeof(lzma_vli) * count); memcpy(dest_group->uncompressed_sums, src_group->uncompressed_sums, @@ -729,8 +754,8 @@ lzma_index_equal(const lzma_index *a, const lzma_index *b) while (ag != NULL && bg != NULL) { const size_t count = ag->last + 1; if (ag->last != bg->last - || memcmp(ag->total_sums, - bg->total_sums, + || memcmp(ag->unpadded_sums, + bg->unpadded_sums, sizeof(lzma_vli) * count) != 0 || memcmp(ag->uncompressed_sums, bg->uncompressed_sums, diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h index df897367..79719dd7 100644 --- a/src/liblzma/common/index.h +++ b/src/liblzma/common/index.h @@ -23,14 +23,11 @@ #include "common.h" -/// Maximum encoded value of Total Size. -#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_MAX / 4 - 1) +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) -/// Convert the real Total Size value to a value that is stored to the Index. -#define total_size_encode(size) ((size) / 4 - 1) - -/// Convert the encoded Total Size value from Index to the real Total Size. -#define total_size_decode(size) (((size) + 1) * 4) +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) /// Get the size of the Index Padding field. This is needed by Index encoder @@ -38,6 +35,16 @@ extern uint32_t lzma_index_padding_size(const lzma_index *i); +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= LZMA_VLI_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding static inline lzma_vli index_size_unpadded(lzma_vli count, lzma_vli index_list_size) { @@ -46,20 +53,20 @@ index_size_unpadded(lzma_vli count, lzma_vli index_list_size) } +/// Calculate the size of the Index field including Index Padding static inline lzma_vli index_size(lzma_vli count, lzma_vli index_list_size) { - // Round up to a mulitiple of four. - return (index_size_unpadded(count, index_list_size) + 3) - & ~LZMA_VLI_C(3); + return vli_ceil4(index_size_unpadded(count, index_list_size)); } +/// Calculate the total size of the Stream static inline lzma_vli -index_stream_size( - lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) { - return LZMA_STREAM_HEADER_SIZE + total_size + return LZMA_STREAM_HEADER_SIZE + blocks_size + index_size(count, index_list_size) + LZMA_STREAM_HEADER_SIZE; } diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c index ae66595a..5faac161 100644 --- a/src/liblzma/common/index_decoder.c +++ b/src/liblzma/common/index_decoder.c @@ -25,7 +25,7 @@ struct lzma_coder_s { enum { SEQ_INDICATOR, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, @@ -38,8 +38,8 @@ struct lzma_coder_s { /// Number of Records left to decode. lzma_vli count; - /// The most recent Total Size field - lzma_vli total_size; + /// The most recent Unpadded Size field + lzma_vli unpadded_size; /// The most recent Uncompressed Size field lzma_vli uncompressed_size; @@ -91,14 +91,14 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator, ret = LZMA_OK; coder->pos = 0; coder->sequence = coder->count == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - lzma_vli *size = coder->sequence == SEQ_TOTAL - ? &coder->total_size + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size : &coder->uncompressed_size; ret = lzma_vli_decode(size, &coder->pos, @@ -109,27 +109,26 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator, ret = LZMA_OK; coder->pos = 0; - if (coder->sequence == SEQ_TOTAL) { - // Validate that encoded Total Size isn't too big. - if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; - // Convert the encoded Total Size to the real - // Total Size. - coder->total_size = total_size_decode( - coder->total_size); coder->sequence = SEQ_UNCOMPRESSED; } else { // Add the decoded Record to the Index. return_if_error(lzma_index_append( coder->index, allocator, - coder->total_size, + coder->unpadded_size, coder->uncompressed_size)); // Check if this was the last Record. coder->sequence = --coder->count == 0 ? SEQ_PADDING_INIT - : SEQ_TOTAL; + : SEQ_UNPADDED; } break; diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c index 3005f835..522dbb53 100644 --- a/src/liblzma/common/index_encoder.c +++ b/src/liblzma/common/index_encoder.c @@ -26,7 +26,7 @@ struct lzma_coder_s { enum { SEQ_INDICATOR, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_NEXT, SEQ_PADDING, @@ -97,18 +97,20 @@ index_encode(lzma_coder *coder, break; } - // Total Size must be a multiple of four. - if (coder->record.total_size & 3) + // Unpadded Size must be within valid limits. + if (coder->record.unpadded_size < UNPADDED_SIZE_MIN + || coder->record.unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_PROG_ERROR; - coder->sequence = SEQ_TOTAL; + coder->sequence = SEQ_UNPADDED; // Fall through - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - const lzma_vli size = coder->sequence == SEQ_TOTAL - ? total_size_encode(coder->record.total_size) + const lzma_vli size = coder->sequence == SEQ_UNPADDED + ? coder->record.unpadded_size : coder->record.uncompressed_size; ret = lzma_vli_encode(size, &coder->pos, diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c index 5e581838..162094d1 100644 --- a/src/liblzma/common/index_hash.c +++ b/src/liblzma/common/index_hash.c @@ -23,8 +23,8 @@ typedef struct { - /// Sum of the Total Size fields - lzma_vli total_size; + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; /// Sum of the Uncompressed Size fields lzma_vli uncompressed_size; @@ -35,7 +35,7 @@ typedef struct { /// Size of the List of Index Records as bytes lzma_vli index_list_size; - /// Check calculated from Total Sizes and Uncompressed Sizes. + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. lzma_check_state check; } lzma_index_hash_info; @@ -45,7 +45,7 @@ struct lzma_index_hash_s { enum { SEQ_BLOCK, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, @@ -61,8 +61,8 @@ struct lzma_index_hash_s { /// Number of Records not fully decoded lzma_vli remaining; - /// Total Size currently being read from an Index Record. - lzma_vli total_size; + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; /// Uncompressed Size currently being read from an Index Record. lzma_vli uncompressed_size; @@ -86,15 +86,15 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) } index_hash->sequence = SEQ_BLOCK; - index_hash->blocks.total_size = 0; + index_hash->blocks.blocks_size = 0; index_hash->blocks.uncompressed_size = 0; index_hash->blocks.count = 0; index_hash->blocks.index_list_size = 0; - index_hash->records.total_size = 0; + index_hash->records.blocks_size = 0; index_hash->records.uncompressed_size = 0; index_hash->records.count = 0; index_hash->records.index_list_size = 0; - index_hash->total_size = 0; + index_hash->unpadded_size = 0; index_hash->uncompressed_size = 0; index_hash->pos = 0; index_hash->crc32 = 0; @@ -128,16 +128,16 @@ lzma_index_hash_size(const lzma_index_hash *index_hash) /// Updates the sizes and the hash without any validation. static lzma_ret -hash_append(lzma_index_hash_info *info, lzma_vli total_size, +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, lzma_vli uncompressed_size) { - info->total_size += total_size; + info->blocks_size += vli_ceil4(unpadded_size); info->uncompressed_size += uncompressed_size; - info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + info->index_list_size += lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); ++info->count; - const lzma_vli sizes[2] = { total_size, uncompressed_size }; + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; lzma_check_update(&info->check, LZMA_CHECK_BEST, (const uint8_t *)(sizes), sizeof(sizes)); @@ -146,26 +146,27 @@ hash_append(lzma_index_hash_info *info, lzma_vli total_size, extern LZMA_API lzma_ret -lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate the arguments. - if (index_hash->sequence != SEQ_BLOCK || total_size == 0 - || total_size > LZMA_VLI_MAX || (total_size & 3) + if (index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; // Update the hash. return_if_error(hash_append(&index_hash->blocks, - total_size, uncompressed_size)); + unpadded_size, uncompressed_size)); // Validate the properties of *info are still in allowed limits. - if (index_hash->blocks.total_size > LZMA_VLI_MAX + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX || index_size(index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_BACKWARD_SIZE_MAX - || index_stream_size(index_hash->blocks.total_size, + || index_stream_size(index_hash->blocks.blocks_size, index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_VLI_MAX) @@ -216,14 +217,14 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // Handle the special case when there are no Blocks. index_hash->sequence = index_hash->remaining == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - lzma_vli *size = index_hash->sequence == SEQ_TOTAL - ? &index_hash->total_size + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size : &index_hash->uncompressed_size; ret = lzma_vli_decode(size, &index_hash->pos, @@ -234,18 +235,17 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, ret = LZMA_OK; index_hash->pos = 0; - if (index_hash->sequence == SEQ_TOTAL) { - if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; - index_hash->total_size = total_size_decode( - index_hash->total_size); - index_hash->sequence = SEQ_UNCOMPRESSED; } else { // Update the hash. return_if_error(hash_append(&index_hash->records, - index_hash->total_size, + index_hash->unpadded_size, index_hash->uncompressed_size)); // Verify that we don't go over the known sizes. Note @@ -254,8 +254,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // that values in index_hash->blocks are already // validated and we are fine as long as we don't // exceed them in index_hash->records. - if (index_hash->blocks.total_size - < index_hash->records.total_size + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size || index_hash->blocks.uncompressed_size < index_hash->records.uncompressed_size || index_hash->blocks.index_list_size @@ -264,7 +264,7 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // Check if this was the last Record. index_hash->sequence = --index_hash->remaining == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; } break; @@ -288,8 +288,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, } // Compare the sizes. - if (index_hash->blocks.total_size - != index_hash->records.total_size + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size || index_hash->blocks.uncompressed_size != index_hash->records.uncompressed_size || index_hash->blocks.index_list_size diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index e137685f..9be47893 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -190,7 +190,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Set up a buffer to hold the filter chain. Block Header // decoder will initialize all members of this array so // we don't need to do it here. - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; coder->block_options.filters = filters; // Decode the Block Header. @@ -216,7 +216,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Free the allocated filter options since they are needed // only to initialize the Block decoder. - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) lzma_free(filters[i].options, allocator); coder->block_options.filters = NULL; @@ -243,7 +243,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, - lzma_block_total_size_get( + lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); @@ -270,7 +270,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Fall through - case SEQ_STREAM_FOOTER: + case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); @@ -306,6 +306,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; + } // Fall through diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c index 0376fd3b..e52ad692 100644 --- a/src/liblzma/common/stream_encoder.c +++ b/src/liblzma/common/stream_encoder.c @@ -157,11 +157,11 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; // Add a new Index Record. - const lzma_vli total_size = lzma_block_total_size_get( + const lzma_vli unpadded_size = lzma_block_unpadded_size( &coder->block_options); - assert(total_size != 0); + assert(unpadded_size != 0); return_if_error(lzma_index_append(coder->index, allocator, - total_size, + unpadded_size, coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_INIT; diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index d2a77ba4..53ee1c1e 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -157,14 +157,14 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) uint32_t copy_size = dict->size - copy_pos; if (copy_size < left) { - memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + memmove(dict->buf + dict->pos, dict->buf + copy_pos, copy_size); dict->pos += copy_size; copy_size = left - copy_size; memcpy(dict->buf + dict->pos, dict->buf, copy_size); dict->pos += copy_size; } else { - memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + memmove(dict->buf + dict->pos, dict->buf + copy_pos, left); dict->pos += left; } diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c index 7cf06988..3096b442 100644 --- a/src/liblzma/subblock/subblock_decoder.c +++ b/src/liblzma/subblock/subblock_decoder.c @@ -211,7 +211,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; } - case FLAG_END_SUBFILTER: + case FLAG_END_SUBFILTER: { if (coder->padding != 0 || (in[*in_pos] & 0x0F) || coder->subfilter.code == NULL || !coder->got_output_with_subfilter) @@ -250,6 +250,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, ++*in_pos; break; + } default: return LZMA_DATA_ERROR; diff --git a/src/lzma/Makefile.am b/src/lzma/Makefile.am index cd8bb771..e5c5c29a 100644 --- a/src/lzma/Makefile.am +++ b/src/lzma/Makefile.am @@ -15,19 +15,16 @@ bin_PROGRAMS = lzma lzma_SOURCES = \ - alloc.c \ - alloc.h \ args.c \ args.h \ - error.c \ - error.h \ hardware.c \ hardware.h \ - help.c \ - help.h \ io.c \ io.h \ main.c \ + main.h \ + message.c \ + message.h \ options.c \ options.h \ private.h \ diff --git a/src/lzma/alloc.c b/src/lzma/alloc.c deleted file mode 100644 index d0fee68b..00000000 --- a/src/lzma/alloc.c +++ /dev/null @@ -1,106 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file alloc.c -/// \brief Memory allocation functions -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "private.h" - - -/// Called when memory allocation fails. Prints and error message and -/// quits the application. -static void lzma_attribute((noreturn)) -xerror(void) -{ - errmsg(V_ERROR, "%s", strerror(errno)); - my_exit(ERROR); -} - - -extern void * -xmalloc(size_t size) -{ - if (size < 1) { - errno = EINVAL; - xerror(); - } - - void *p = malloc(size); - if (p == NULL) - xerror(); - - return p; -} - - -/* -extern void * -xrealloc(void *ptr, size_t size) -{ - if (size < 1) { - errno = EINVAL; - xerror(); - } - - ptr = realloc(ptr, size); - if (ptr == NULL) - xerror(); - - return ptr; -} -*/ - - -extern char * -xstrdup(const char *src) -{ - if (src == NULL) { - errno = EINVAL; - xerror(); - } - - const size_t size = strlen(src) + 1; - char *dest = malloc(size); - if (dest == NULL) - xerror(); - - memcpy(dest, src, size); - - return dest; -} - - -extern void -xstrcpy(char **dest, const char *src) -{ - size_t len = strlen(src) + 1; - - *dest = realloc(*dest, len); - if (*dest == NULL) - xerror(); - - memcpy(*dest, src, len + 1); - - return; -} - - -extern void * -allocator(void *opaque lzma_attribute((unused)), - size_t nmemb lzma_attribute((unused)), size_t size) -{ - return xmalloc(size); -} diff --git a/src/lzma/alloc.h b/src/lzma/alloc.h deleted file mode 100644 index 80317269..00000000 --- a/src/lzma/alloc.h +++ /dev/null @@ -1,42 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file alloc.h -/// \brief Memory allocation functions -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef ALLOC_H -#define ALLOC_H - -#include "private.h" - - -/// Safe malloc() that never returns NULL. -extern void *xmalloc(size_t size); - -/// Safe realloc() that never returns NULL. -extern void *xrealloc(void *ptr, size_t size); - -/// Safe strdup() that never returns NULL. -extern char *xstrdup(const char *src); - -/// xrealloc()s *dest to the size needed by src, and copies src to *dest. -extern void xstrcpy(char **dest, const char *src); - -/// Function for lzma_allocator.alloc. This uses xmalloc(). -extern void *allocator(void *opaque lzma_attribute((unused)), - size_t nmemb lzma_attribute((unused)), size_t size); - -#endif diff --git a/src/lzma/args.c b/src/lzma/args.c index 14ccfb6d..a2efb277 100644 --- a/src/lzma/args.c +++ b/src/lzma/args.c @@ -25,150 +25,90 @@ #include <ctype.h> -enum tool_mode opt_mode = MODE_COMPRESS; -enum format_type opt_format = FORMAT_AUTO; - -char *opt_suffix = NULL; - -char *opt_files_name = NULL; -char opt_files_split = '\0'; -FILE *opt_files_file = NULL; - bool opt_stdout = false; bool opt_force = false; bool opt_keep_original = false; -bool opt_preserve_name = false; - -lzma_check opt_check = LZMA_CHECK_CRC64; -lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1]; // We don't modify or free() this, but we need to assign it in some // non-const pointers. const char *stdin_filename = "(stdin)"; -static size_t preset_number = 7; -static bool preset_default = true; -static size_t filter_count = 0; - -/// When compressing, which file format to use if --format=auto or no --format -/// at all has been specified. We need a variable because this depends on -/// with which name we are called. All names with "lz" in them makes us to -/// use the legacy .lzma format. -static enum format_type format_compress_auto = FORMAT_XZ; - - -enum { - OPT_SUBBLOCK = INT_MIN, - OPT_X86, - OPT_POWERPC, - OPT_IA64, - OPT_ARM, - OPT_ARMTHUMB, - OPT_SPARC, - OPT_DELTA, - OPT_LZMA1, - OPT_LZMA2, - - OPT_FILES, - OPT_FILES0, -}; - - -static const char short_opts[] = "cC:dfF:hlLkM:qrS:tT:vVz123456789"; - - -static const struct option long_opts[] = { - // gzip-like options - { "fast", no_argument, NULL, '1' }, - { "best", no_argument, NULL, '9' }, - { "memory", required_argument, NULL, 'M' }, - { "name", no_argument, NULL, 'N' }, - { "suffix", required_argument, NULL, 'S' }, - { "threads", required_argument, NULL, 'T' }, - { "version", no_argument, NULL, 'V' }, - { "stdout", no_argument, NULL, 'c' }, - { "to-stdout", no_argument, NULL, 'c' }, - { "decompress", no_argument, NULL, 'd' }, - { "uncompress", no_argument, NULL, 'd' }, - { "force", no_argument, NULL, 'f' }, - { "help", no_argument, NULL, 'h' }, - { "list", no_argument, NULL, 'l' }, - { "info", no_argument, NULL, 'l' }, - { "keep", no_argument, NULL, 'k' }, - { "no-name", no_argument, NULL, 'n' }, - { "quiet", no_argument, NULL, 'q' }, -// { "recursive", no_argument, NULL, 'r' }, // TODO - { "test", no_argument, NULL, 't' }, - { "verbose", no_argument, NULL, 'v' }, - { "compress", no_argument, NULL, 'z' }, - - // Filters - { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, - { "x86", no_argument, NULL, OPT_X86 }, - { "bcj", no_argument, NULL, OPT_X86 }, - { "powerpc", no_argument, NULL, OPT_POWERPC }, - { "ppc", no_argument, NULL, OPT_POWERPC }, - { "ia64", no_argument, NULL, OPT_IA64 }, - { "itanium", no_argument, NULL, OPT_IA64 }, - { "arm", no_argument, NULL, OPT_ARM }, - { "armthumb", no_argument, NULL, OPT_ARMTHUMB }, - { "sparc", no_argument, NULL, OPT_SPARC }, - { "delta", optional_argument, NULL, OPT_DELTA }, - { "lzma1", optional_argument, NULL, OPT_LZMA1 }, - { "lzma2", optional_argument, NULL, OPT_LZMA2 }, - - // Other - { "format", required_argument, NULL, 'F' }, - { "check", required_argument, NULL, 'C' }, - { "files", optional_argument, NULL, OPT_FILES }, - { "files0", optional_argument, NULL, OPT_FILES0 }, - - { NULL, 0, NULL, 0 } -}; - static void -add_filter(lzma_vli id, const char *opt_str) +parse_real(args_info *args, int argc, char **argv) { - if (filter_count == LZMA_BLOCK_FILTERS_MAX) { - errmsg(V_ERROR, _("Maximum number of filters is seven")); - my_exit(ERROR); - } - - opt_filters[filter_count].id = id; - - switch (id) { - case LZMA_FILTER_SUBBLOCK: - opt_filters[filter_count].options - = parse_options_subblock(opt_str); - break; - - case LZMA_FILTER_DELTA: - opt_filters[filter_count].options - = parse_options_delta(opt_str); - break; - - case LZMA_FILTER_LZMA1: - case LZMA_FILTER_LZMA2: - opt_filters[filter_count].options - = parse_options_lzma(opt_str); - break; - - default: - assert(opt_str == NULL); - opt_filters[filter_count].options = NULL; - break; - } + enum { + OPT_SUBBLOCK = INT_MIN, + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_FILES, + OPT_FILES0, + }; + + static const char short_opts[] = "cC:dfF:hHlLkM:p:qrS:tT:vVz123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + { "info", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "preset", required_argument, NULL, 'p' }, + { "memory", required_argument, NULL, 'M' }, + { "threads", required_argument, NULL, 'T' }, + + { "fast", no_argument, NULL, '1' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", no_argument, NULL, OPT_X86 }, + { "bcj", no_argument, NULL, OPT_X86 }, + { "powerpc", no_argument, NULL, OPT_POWERPC }, + { "ppc", no_argument, NULL, OPT_POWERPC }, + { "ia64", no_argument, NULL, OPT_IA64 }, + { "itanium", no_argument, NULL, OPT_IA64 }, + { "arm", no_argument, NULL, OPT_ARM }, + { "armthumb", no_argument, NULL, OPT_ARMTHUMB }, + { "sparc", no_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; - ++filter_count; - preset_default = false; - return; -} - - -static void -parse_real(int argc, char **argv) -{ int c; while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) @@ -178,32 +118,28 @@ parse_real(int argc, char **argv) case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - preset_number = c - '0'; - preset_default = false; + coder_set_preset(c - '0'); break; - // --memory - case 'M': - opt_memory = str_to_uint64("memory", optarg, - 1, SIZE_MAX); + case 'p': { + const uint64_t preset = str_to_uint64( + "preset", optarg, 1, 9); + coder_set_preset(preset); break; + } - case 'N': - opt_preserve_name = true; + // --memory + case 'M': + // On 32-bit systems, SIZE_MAX would make more sense + // than UINT64_MAX. But use UINT64_MAX still so that + // scripts that assume > 4 GiB values don't break. + hardware_memlimit_set(str_to_uint64( + "memory", optarg, 0, UINT64_MAX)); break; // --suffix case 'S': - // Empty suffix and suffixes having a slash are - // rejected. Such suffixes would break things later. - if (optarg[0] == '\0' || strchr(optarg, '/') != NULL) { - errmsg(V_ERROR, _("%s: Invalid filename " - "suffix"), optarg); - my_exit(ERROR); - } - - free(opt_suffix); - opt_suffix = xstrdup(optarg); + suffix_set(optarg); break; case 'T': @@ -214,7 +150,7 @@ parse_real(int argc, char **argv) // --version case 'V': // This doesn't return. - show_version(); + message_version(); // --stdout case 'c': @@ -234,7 +170,12 @@ parse_real(int argc, char **argv) // --help case 'h': // This doesn't return. - show_help(); + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); // --list case 'l': @@ -246,15 +187,9 @@ parse_real(int argc, char **argv) opt_keep_original = true; break; - case 'n': - opt_preserve_name = false; - break; - // --quiet case 'q': - if (verbosity > V_SILENT) - --verbosity; - + message_verbosity_decrease(); break; case 't': @@ -263,9 +198,7 @@ parse_real(int argc, char **argv) // --verbose case 'v': - if (verbosity < V_DEBUG) - ++verbosity; - + message_verbosity_increase(); break; case 'z': @@ -275,43 +208,47 @@ parse_real(int argc, char **argv) // Filter setup case OPT_SUBBLOCK: - add_filter(LZMA_FILTER_SUBBLOCK, optarg); + coder_add_filter(LZMA_FILTER_SUBBLOCK, + options_subblock(optarg)); break; case OPT_X86: - add_filter(LZMA_FILTER_X86, NULL); + coder_add_filter(LZMA_FILTER_X86, NULL); break; case OPT_POWERPC: - add_filter(LZMA_FILTER_POWERPC, NULL); + coder_add_filter(LZMA_FILTER_POWERPC, NULL); break; case OPT_IA64: - add_filter(LZMA_FILTER_IA64, NULL); + coder_add_filter(LZMA_FILTER_IA64, NULL); break; case OPT_ARM: - add_filter(LZMA_FILTER_ARM, NULL); + coder_add_filter(LZMA_FILTER_ARM, NULL); break; case OPT_ARMTHUMB: - add_filter(LZMA_FILTER_ARMTHUMB, NULL); + coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL); break; case OPT_SPARC: - add_filter(LZMA_FILTER_SPARC, NULL); + coder_add_filter(LZMA_FILTER_SPARC, NULL); break; case OPT_DELTA: - add_filter(LZMA_FILTER_DELTA, optarg); + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); break; case OPT_LZMA1: - add_filter(LZMA_FILTER_LZMA1, optarg); + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); break; case OPT_LZMA2: - add_filter(LZMA_FILTER_LZMA2, optarg); + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); break; // Other @@ -335,14 +272,11 @@ parse_real(int argc, char **argv) }; size_t i = 0; - while (strcmp(types[i].str, optarg) != 0) { - if (++i == ARRAY_SIZE(types)) { - errmsg(V_ERROR, _("%s: Unknown file " + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " "format type"), optarg); - my_exit(ERROR); - } - } opt_format = types[i].format; break; @@ -362,50 +296,43 @@ parse_real(int argc, char **argv) size_t i = 0; while (strcmp(types[i].str, optarg) != 0) { - if (++i == ARRAY_SIZE(types)) { - errmsg(V_ERROR, _("%s: Unknown " - "integrity check " - "type"), optarg); - my_exit(ERROR); - } + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown integrity" + "check type"), optarg); } - opt_check = types[i].check; + coder_set_check(types[i].check); break; } case OPT_FILES: - opt_files_split = '\n'; + args->files_delim = '\n'; // Fall through case OPT_FILES0: - if (opt_files_name != NULL) { - errmsg(V_ERROR, _("Only one file can be " + if (args->files_name != NULL) + message_fatal(_("Only one file can be " "specified with `--files'" "or `--files0'.")); - my_exit(ERROR); - } if (optarg == NULL) { - opt_files_name = (char *)stdin_filename; - opt_files_file = stdin; + args->files_name = (char *)stdin_filename; + args->files_file = stdin; } else { - opt_files_name = optarg; - opt_files_file = fopen(optarg, + args->files_name = optarg; + args->files_file = fopen(optarg, c == OPT_FILES ? "r" : "rb"); - if (opt_files_file == NULL) { - errmsg(V_ERROR, "%s: %s", optarg, + if (args->files_file == NULL) + message_fatal("%s: %s", optarg, strerror(errno)); - my_exit(ERROR); - } } break; default: - show_try_help(); - my_exit(ERROR); + message_try_help(); + my_exit(E_ERROR); } } @@ -414,163 +341,124 @@ parse_real(int argc, char **argv) static void -parse_environment(void) +parse_environment(args_info *args, char *argv0) { - char *env = getenv("LZMA_OPT"); + char *env = getenv("XZ_OPT"); if (env == NULL) return; + // We modify the string, so make a copy of it. env = xstrdup(env); - // Calculate the number of arguments in env. - unsigned int argc = 1; + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; bool prev_was_space = true; for (size_t i = 0; env[i] != '\0'; ++i) { if (isspace(env[i])) { prev_was_space = true; } else if (prev_was_space) { prev_was_space = false; - if (++argc > (unsigned int)(INT_MAX)) { - errmsg(V_ERROR, _("The environment variable " - "LZMA_OPT contains too many " + + // Keep argc small enough to fit into a singed int + // and to keep it usable for memory allocation. + if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "XZ_OPT contains too many " "arguments")); - my_exit(ERROR); - } } } - char **argv = xmalloc((argc + 1) * sizeof(char*)); + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); argv[0] = argv0; argv[argc] = NULL; + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. argc = 1; prev_was_space = true; for (size_t i = 0; env[i] != '\0'; ++i) { if (isspace(env[i])) { prev_was_space = true; + env[i] = '\0'; } else if (prev_was_space) { prev_was_space = false; argv[argc++] = env + i; } } - parse_real((int)(argc), argv); + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); free(env); return; } -static void -set_compression_settings(void) +extern void +args_parse(args_info *args, int argc, char **argv) { - static lzma_options_lzma opt_lzma; - - if (filter_count == 0) { - if (lzma_lzma_preset(&opt_lzma, preset_number)) { - errmsg(V_ERROR, _("Internal error (bug)")); - my_exit(ERROR); - } - - opt_filters[0].id = opt_format == FORMAT_LZMA - ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; - opt_filters[0].options = &opt_lzma; - filter_count = 1; - } - - // Terminate the filter options array. - opt_filters[filter_count].id = LZMA_VLI_UNKNOWN; - - // If we are using the LZMA_Alone format, allow exactly one filter - // which has to be LZMA. - if (opt_format == FORMAT_LZMA && (filter_count != 1 - || opt_filters[0].id != LZMA_FILTER_LZMA1)) { - errmsg(V_ERROR, _("With --format=lzma only the LZMA1 filter " - "is supported")); - my_exit(ERROR); - } - - // TODO: liblzma probably needs an API to validate the filter chain. - - // If using --format=raw, we can be decoding. - uint64_t memory_usage = opt_mode == MODE_COMPRESS - ? lzma_memusage_encoder(opt_filters) - : lzma_memusage_decoder(opt_filters); - - // Don't go over the memory limits when the default - // setting is used. - if (preset_default) { - while (memory_usage > opt_memory) { - if (preset_number == 1) { - errmsg(V_ERROR, _("Memory usage limit is too " - "small for any internal " - "filter preset")); - my_exit(ERROR); - } - - if (lzma_lzma_preset(&opt_lzma, --preset_number)) { - errmsg(V_ERROR, _("Internal error (bug)")); - my_exit(ERROR); - } - - memory_usage = lzma_memusage_encoder(opt_filters); - } - - // TODO: With --format=raw, we should print a warning since - // the presets may change and thus the next version may not - // be able to uncompress the raw stream with the same preset - // number. + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; - } else { - if (memory_usage > opt_memory) { - errmsg(V_ERROR, _("Memory usage limit is too small " - "for the given filter setup")); - my_exit(ERROR); - } - } - - // Limit the number of worked threads so that memory usage - // limit isn't exceeded. - assert(memory_usage > 0); - size_t thread_limit = opt_memory / memory_usage; - if (thread_limit == 0) - thread_limit = 1; - - if (opt_threads > thread_limit) - opt_threads = thread_limit; - - return; -} + // Type of the file format to use when --format=auto or no --format + // was specified. + enum format_type format_compress_auto = FORMAT_XZ; - -extern char ** -parse_args(int argc, char **argv) -{ // Check how we were called. { - const char *name = str_filename(argv[0]); - if (name != NULL) { - // Default file format - if (strstr(name, "lz") != NULL) - format_compress_auto = FORMAT_LZMA; - - // Operation mode - if (strstr(name, "cat") != NULL) { - opt_mode = MODE_DECOMPRESS; - opt_stdout = true; - } else if (strstr(name, "un") != NULL) { - opt_mode = MODE_DECOMPRESS; - } + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // The default file format is .lzma if the command name + // contains "lz". + if (strstr(name, "lz") != NULL) + format_compress_auto = FORMAT_LZMA; + + // Operation mode + if (strstr(name, "cat") != NULL) { + // Imply --decompress --stdout + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "un") != NULL) { + // Imply --decompress + opt_mode = MODE_DECOMPRESS; } } // First the flags from environment - parse_environment(); + parse_environment(args, argv[0]); // Then from the command line optind = 1; - parse_real(argc, argv); + parse_real(args, argc, argv); // Never remove the source file when the destination is not on disk. // In test mode the data is written nowhere, but setting opt_stdout @@ -580,18 +468,33 @@ parse_args(int argc, char **argv) opt_stdout = true; } + // If no --format flag was used, or it was --format=auto, we need to + // decide what is the target file format we are going to use. This + // depends on how we were called (checked earlier in this function). if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) opt_format = format_compress_auto; + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) - set_compression_settings(); + coder_set_compression_settings(); // If no filenames are given, use stdin. - if (argv[optind] == NULL && opt_files_name == NULL) { - // We don't modify or free() the "-" constant. - static char *argv_stdin[2] = { (char *)"-", NULL }; - return argv_stdin; + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = argc - optind; } - return argv + optind; + return; } diff --git a/src/lzma/args.h b/src/lzma/args.h index 8d9cd306..6d4e8282 100644 --- a/src/lzma/args.h +++ b/src/lzma/args.h @@ -23,42 +23,34 @@ #include "private.h" -enum tool_mode { - MODE_COMPRESS, - MODE_DECOMPRESS, - MODE_TEST, - MODE_LIST, -}; +typedef struct { + /// Filenames from command line + char **arg_names; -// NOTE: The order of these is significant in suffix.c. -enum format_type { - FORMAT_AUTO, - FORMAT_XZ, - FORMAT_LZMA, - // HEADER_GZIP, - FORMAT_RAW, -}; + /// Number of filenames from command line + size_t arg_count; + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + char *files_name; -extern char *opt_suffix; + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; -extern char *opt_files_name; -extern char opt_files_split; -extern FILE *opt_files_file; extern bool opt_stdout; extern bool opt_force; extern bool opt_keep_original; -extern bool opt_preserve_name; // extern bool opt_recursive; -extern enum tool_mode opt_mode; -extern enum format_type opt_format; - -extern lzma_check opt_check; -extern lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1]; extern const char *stdin_filename; -extern char **parse_args(int argc, char **argv); +extern void args_parse(args_info *args, int argc, char **argv); #endif diff --git a/src/lzma/error.c b/src/lzma/error.c deleted file mode 100644 index e66fd140..00000000 --- a/src/lzma/error.c +++ /dev/null @@ -1,162 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file error.c -/// \brief Error message printing -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "private.h" -#include <stdarg.h> - - -exit_status_type exit_status = SUCCESS; -verbosity_type verbosity = V_WARNING; -char *argv0 = NULL; -volatile sig_atomic_t user_abort = 0; - - -extern const char * -str_strm_error(lzma_ret code) -{ - switch (code) { - case LZMA_OK: - return _("Operation successful"); - - case LZMA_STREAM_END: - return _("Operation finished successfully"); - - case LZMA_PROG_ERROR: - return _("Internal error (bug)"); - - case LZMA_DATA_ERROR: - return _("Compressed data is corrupt"); - - case LZMA_MEM_ERROR: - return strerror(ENOMEM); - - case LZMA_BUF_ERROR: - return _("Unexpected end of input"); - - case LZMA_OPTIONS_ERROR: - return _("Unsupported options"); - - case LZMA_UNSUPPORTED_CHECK: - return _("Unsupported integrity check type"); - - case LZMA_MEMLIMIT_ERROR: - return _("Memory usage limit reached"); - - case LZMA_FORMAT_ERROR: - return _("File format not recognized"); - - default: - return NULL; - } -} - - -extern void -set_exit_status(exit_status_type new_status) -{ - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_lock(&mutex); - - if (new_status != WARNING || exit_status == SUCCESS) - exit_status = new_status; - - pthread_mutex_unlock(&mutex); - return; -} - - -extern void lzma_attribute((noreturn)) -my_exit(int status) -{ - // Close stdout. If something goes wrong, print an error message - // to stderr. - { - const int ferror_err = ferror(stdout); - const int fclose_err = fclose(stdout); - if (fclose_err) { - errmsg(V_ERROR, _("Writing to standard output " - "failed: %s"), strerror(errno)); - status = ERROR; - } else if (ferror_err) { - // Some error has occurred but we have no clue about - // the reason since fclose() succeeded. - errmsg(V_ERROR, _("Writing to standard output " - "failed: %s"), "Unknown error"); - status = ERROR; - } - } - - // Close stderr. If something goes wrong, there's nothing where we - // could print an error message. Just set the exit status. - { - const int ferror_err = ferror(stderr); - const int fclose_err = fclose(stderr); - if (fclose_err || ferror_err) - status = ERROR; - } - - exit(status); -} - - -extern void lzma_attribute((format(printf, 2, 3))) -errmsg(verbosity_type v, const char *fmt, ...) -{ - va_list ap; - - if (v <= verbosity) { - va_start(ap, fmt); - - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_lock(&mutex); - - fprintf(stderr, "%s: ", argv0); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - - pthread_mutex_unlock(&mutex); - - va_end(ap); - } - - if (v == V_ERROR) - set_exit_status(ERROR); - else if (v == V_WARNING) - set_exit_status(WARNING); - - return; -} - - -extern void -out_of_memory(void) -{ - errmsg(V_ERROR, "%s", strerror(ENOMEM)); - user_abort = 1; - return; -} - - -extern void -internal_error(void) -{ - errmsg(V_ERROR, _("Internal error (bug)")); - user_abort = 1; - return; -} diff --git a/src/lzma/error.h b/src/lzma/error.h deleted file mode 100644 index 34ec30e1..00000000 --- a/src/lzma/error.h +++ /dev/null @@ -1,67 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file error.c -/// \brief Error message printing -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef ERROR_H -#define ERROR_H - -#include "private.h" - - -typedef enum { - SUCCESS = 0, - ERROR = 1, - WARNING = 2, -} exit_status_type; - - -typedef enum { - V_SILENT, - V_ERROR, - V_WARNING, - V_VERBOSE, - V_DEBUG, -} verbosity_type; - - -extern exit_status_type exit_status; - -extern verbosity_type verbosity; - -/// Like GNU's program_invocation_name but portable -extern char *argv0; - -/// Once this is non-zero, all threads must shutdown and clean up incomplete -/// output files from the disk. -extern volatile sig_atomic_t user_abort; - - -extern const char * str_strm_error(lzma_ret code); - -extern void errmsg(verbosity_type v, const char *fmt, ...) - lzma_attribute((format(printf, 2, 3))); - -extern void set_exit_status(exit_status_type new_status); - -extern void my_exit(int status) lzma_attribute((noreturn)); - -extern void out_of_memory(void); - -extern void internal_error(void); - -#endif diff --git a/src/lzma/hardware.c b/src/lzma/hardware.c index 6cb3cdfc..63bf0937 100644 --- a/src/lzma/hardware.c +++ b/src/lzma/hardware.c @@ -26,33 +26,15 @@ size_t opt_threads = 1; -/// Number of bytes of memory to use at maximum (only a rough limit). -/// This can be set with the --memory=NUM command line option. -/// If no better value can be determined, the default is 14 MiB, which -/// should be quite safe even for older systems while still allowing -/// reasonable compression ratio. -size_t opt_memory = 14 * 1024 * 1024; +/// Memory usage limit for encoding +static uint64_t memlimit_encoder; +/// Memory usage limit for decoding +static uint64_t memlimit_decoder; -/// Get the amount of physical memory, and set opt_memory to 1/3 of it. -/// User can then override this with --memory command line option. -static void -hardware_memory(void) -{ - uint64_t mem = physmem(); - if (mem != 0) { - mem /= 3; - -#if UINT64_MAX > SIZE_MAX - if (mem > SIZE_MAX) - mem = SIZE_MAX; -#endif - - opt_memory = mem; - } - - return; -} +/// Memory usage limit given on the command line or environment variable. +/// Zero indicates the default (memlimit_encoder or memlimit_decoder). +static uint64_t memlimit_custom = 0; /// Get the number of CPU cores, and set opt_threads to default to that value. @@ -90,10 +72,51 @@ hardware_cores(void) } +static void +hardware_memlimit_init(void) +{ + uint64_t mem = physmem(); + + // If we cannot determine the amount of RAM, assume 32 MiB. Maybe + // even that is too much on some systems. But on most systems it's + // far too little, and can be annoying. + if (mem == 0) + mem = UINT64_C(16) * 1024 * 1024; + + // Use at maximum of 90 % of RAM when encoding and 33 % when decoding. + memlimit_encoder = mem - mem / 10; + memlimit_decoder = mem / 3; + + return; +} + + +extern void +hardware_memlimit_set(uint64_t memlimit) +{ + memlimit_custom = memlimit; + return; +} + + +extern uint64_t +hardware_memlimit_encoder(void) +{ + return memlimit_custom != 0 ? memlimit_custom : memlimit_encoder; +} + + +extern uint64_t +hardware_memlimit_decoder(void) +{ + return memlimit_custom != 0 ? memlimit_custom : memlimit_decoder; +} + + extern void hardware_init(void) { - hardware_memory(); + hardware_memlimit_init(); hardware_cores(); return; } diff --git a/src/lzma/hardware.h b/src/lzma/hardware.h index d47bd29f..f604df20 100644 --- a/src/lzma/hardware.h +++ b/src/lzma/hardware.h @@ -24,8 +24,22 @@ extern size_t opt_threads; -extern size_t opt_memory; + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. extern void hardware_init(void); + +/// Set custom memory usage limit. This is used for both encoding and +/// decoding. Zero indicates resetting the limit back to defaults. +extern void hardware_memlimit_set(uint64_t memlimit); + +/// Get the memory usage limit for encoding. By default this is 90 % of RAM. +extern uint64_t hardware_memlimit_encoder(void); + + +/// Get the memory usage limit for decoding. By default this is 30 % of RAM. +extern uint64_t hardware_memlimit_decoder(void); + #endif diff --git a/src/lzma/help.c b/src/lzma/help.c deleted file mode 100644 index 2e59f3b5..00000000 --- a/src/lzma/help.c +++ /dev/null @@ -1,170 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file help.c -/// \brief Help messages -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "private.h" - - -extern void -show_try_help(void) -{ - // Print this with V_WARNING instead of V_ERROR to prevent it from - // showing up when --quiet has been specified. - errmsg(V_WARNING, _("Try `%s --help' for more information."), argv0); - return; -} - - -extern void lzma_attribute((noreturn)) -show_help(void) -{ - printf(_("Usage: %s [OPTION]... [FILE]...\n" - "Compress or decompress FILEs in the .lzma format.\n" - "\n"), argv0); - - puts(_("Mandatory arguments to long options are mandatory for " - "short options too.\n")); - - puts(_( -" Operation mode:\n" -"\n" -" -z, --compress force compression\n" -" -d, --decompress force decompression\n" -" -t, --test test compressed file integrity\n" -" -l, --list list information about files\n" -)); - - puts(_( -" Operation modifiers:\n" -"\n" -" -k, --keep keep (don't delete) input files\n" -" -f, --force force overwrite of output file and (de)compress links\n" -" -c, --stdout write to standard output and don't delete input files\n" -" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" -" -F, --format=FMT file format to encode or decode; possible values are\n" -" `auto' (default), `xz', `lzma', and `raw'\n" -" --files=[FILE] read filenames to process from FILE; if FILE is\n" -" omitted, filenames are read from the standard input;\n" -" filenames must be terminated with the newline character\n" -" --files0=[FILE] like --files but use the nul byte as terminator\n" -)); - - puts(_( -" Compression presets and basic compression options:\n" -"\n" -" -1 .. -2 fast compression\n" -" -3 .. -6 good compression\n" -" -7 .. -9 excellent compression, but needs a lot of memory;\n" -" default is -7 if memory limit allows\n" -"\n" -" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" -" or `sha256'\n" -)); - - puts(_( -" Custom filter chain for compression (alternative for using presets):\n" -"\n" -" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" -" --lzma2=[OPTS] more of the following options (valid values; default):\n" -" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" -" lc=NUM number of literal context bits (0-4; 3)\n" -" lp=NUM number of literal position bits (0-4; 0)\n" -" pb=NUM number of position bits (0-4; 2)\n" -" mode=MODE compression mode (fast, normal; normal)\n" -" nice=NUM nice length of a match (2-273; 64)\n" -" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" -" depth=NUM maximum search depth; 0=automatic (default)\n" -"\n" -" --x86 x86 filter (sometimes called BCJ filter)\n" -" --powerpc PowerPC (big endian) filter\n" -" --ia64 IA64 (Itanium) filter\n" -" --arm ARM filter\n" -" --armthumb ARM-Thumb filter\n" -" --sparc SPARC filter\n" -"\n" -" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n" -" dist=NUM distance between bytes being subtracted\n" -" from each other (1-256; 1)\n" -"\n" -" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" -" size=NUM number of bytes of data per subblock\n" -" (1 - 256Mi; 4Ki)\n" -" rle=NUM run-length encoder chunk size (0-256; 0)\n" -)); - - puts(_( -" Resource usage options:\n" -"\n" -" -M, --memory=NUM use roughly NUM bytes of memory at maximum\n" -" -T, --threads=NUM use a maximum of NUM (de)compression threads\n" -// " --threading=STR threading style; possible values are `auto' (default),\n" -// " `files', and `stream' -)); - - puts(_( -" Other options:\n" -"\n" -" -q, --quiet suppress warnings; specify twice to suppress errors too\n" -" -v, --verbose be verbose; specify twice for even more verbose\n" -"\n" -" -h, --help display this help and exit\n" -" -V, --version display version and license information and exit\n")); - - puts(_("With no FILE, or when FILE is -, read standard input.\n")); - - size_t mem_limit = opt_memory / (1024 * 1024); - if (mem_limit == 0) - mem_limit = 1; - - // We use PRIu64 instead of %zu to support pre-C99 libc. - puts(_("On this system and configuration, the tool will use")); - printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"), - (uint64_t)(mem_limit)); - printf(N_(" * at maximum of one thread for (de)compression.\n\n", - " * at maximum of %" PRIu64 - " threads for (de)compression.\n\n", - (uint64_t)(opt_threads)), (uint64_t)(opt_threads)); - - printf(_("Report bugs to <%s> (in English or Finnish).\n"), - PACKAGE_BUGREPORT); - - my_exit(SUCCESS); -} - - -extern void lzma_attribute((noreturn)) -show_version(void) -{ - printf( -"lzma (LZMA Utils) " PACKAGE_VERSION "\n" -"\n" -"Copyright (C) 1999-2008 Igor Pavlov\n" -"Copyright (C) 2007-2008 Lasse Collin\n" -"\n" -"This program is free software; you can redistribute it and/or modify\n" -"it under the terms of the GNU General Public License as published by\n" -"the Free Software Foundation; either version 2 of the License, or\n" -"(at your option) any later version.\n" -"\n" -"This program is distributed in the hope that it will be useful,\n" -"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" -"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" -"GNU General Public License for more details.\n" -"\n"); - my_exit(SUCCESS); -} diff --git a/src/lzma/help.h b/src/lzma/help.h deleted file mode 100644 index 659c66a0..00000000 --- a/src/lzma/help.h +++ /dev/null @@ -1,32 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file help.h -/// \brief Help messages -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef HELP_H -#define HELP_H - -#include "private.h" - - -extern void show_try_help(void); - -extern void show_help(void) lzma_attribute((noreturn)); - -extern void show_version(void) lzma_attribute((noreturn)); - -#endif diff --git a/src/lzma/io.c b/src/lzma/io.c index b972099f..0ec63f03 100644 --- a/src/lzma/io.c +++ b/src/lzma/io.c @@ -19,131 +19,39 @@ #include "private.h" -#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) -# include <sys/time.h> -#endif +#include <fcntl.h> -#ifndef O_SEARCH -# define O_SEARCH O_RDONLY +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include <sys/time.h> +#elif defined(HAVE_UTIME) +# include <utime.h> #endif -/// \brief Number of open file_pairs -/// -/// Once the main() function has requested processing of all files, -/// we wait that open_pairs drops back to zero. Then it is safe to -/// exit from the program. -static size_t open_pairs = 0; - - -/// \brief mutex for file system operations -/// -/// All file system operations are done via the functions in this file. -/// They use fchdir() to avoid some race conditions (more portable than -/// openat() & co.). -/// -/// Synchronizing all file system operations shouldn't affect speed notably, -/// since the actual reading from and writing to files is done in parallel. -static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - -/// This condition is invoked when a file is closed and the value of -/// the open_files variable has dropped to zero. The only listener for -/// this condition is io_finish() which is called from main(). -static pthread_cond_t io_cond = PTHREAD_COND_INITIALIZER; - - -/// True when stdout is being used by some thread -static bool stdout_in_use = false; - - -/// This condition is signalled when a thread releases stdout (no longer -/// writes data to it). -static pthread_cond_t stdout_cond = PTHREAD_COND_INITIALIZER; - - -/// \brief Directory where we were started -/// -/// This is needed when a new file, whose name was given on command line, -/// is opened. -static int start_dir; - - -static uid_t uid; -static gid_t gid; - - -extern void -io_init(void) -{ - start_dir = open(".", O_SEARCH | O_NOCTTY); - if (start_dir == -1) { - errmsg(V_ERROR, _("Cannot get file descriptor of the current " - "directory: %s"), strerror(errno)); - my_exit(ERROR); - } - - uid = getuid(); - gid = getgid(); - - return; -} - - -/// Waits until the number of open file_pairs has dropped to zero. -extern void -io_finish(void) -{ - pthread_mutex_lock(&mutex); - - while (open_pairs != 0) - pthread_cond_wait(&io_cond, &mutex); - - (void)close(start_dir); - - pthread_mutex_unlock(&mutex); - - return; -} - - /// \brief Unlinks a file /// -/// \param dir_fd File descriptor of the directory containing the file -/// \param name Name of the file with or without path -/// -/// \return Zero on success. On error, -1 is returned and errno set. -/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). static void -io_unlink(int dir_fd, const char *name, ino_t ino) +io_unlink(const char *name, const struct stat *known_st) { - const char *base = str_filename(name); - if (base == NULL) { - // This shouldn't happen. - errmsg(V_ERROR, _("%s: Invalid filename"), name); - return; - } + struct stat new_st; - pthread_mutex_lock(&mutex); - - if (fchdir(dir_fd)) { - errmsg(V_ERROR, _("Cannot change directory: %s"), - strerror(errno)); + if (lstat(name, &new_st) + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino) { + message_error(_("%s: File seems to be moved, not removing"), + name); } else { - struct stat st; - if (lstat(base, &st) || st.st_ino != ino) - errmsg(V_ERROR, _("%s: File seems to be moved, " - "not removing"), name); - // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. - else if (unlink(base)) - errmsg(V_ERROR, _("%s: Cannot remove: %s"), + if (unlink(name)) + message_error(_("%s: Cannot remove: %s"), name, strerror(errno)); } - pthread_mutex_unlock(&mutex); - return; } @@ -160,14 +68,31 @@ io_copy_attrs(const file_pair *pair) // destination file who didn't have permission to access the // source file. - if (uid == 0 && fchown(pair->dest_fd, pair->src_st.st_uid, -1)) - errmsg(V_WARNING, _("%s: Cannot set the file owner: %s"), - pair->dest_name, strerror(errno)); + // Simple cache to avoid repeated calls to geteuid(). + static enum { + WARN_FCHOWN_UNKNOWN, + WARN_FCHOWN_NO, + WARN_FCHOWN_YES, + } warn_fchown = WARN_FCHOWN_UNKNOWN; + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) + && warn_fchown != WARN_FCHOWN_NO) { + if (warn_fchown == WARN_FCHOWN_UNKNOWN) + warn_fchown = geteuid() == 0 + ? WARN_FCHOWN_YES : WARN_FCHOWN_NO; + + if (warn_fchown == WARN_FCHOWN_YES) + message_warning(_("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + } mode_t mode; if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { - errmsg(V_WARNING, _("%s: Cannot set the file group: %s"), + message_warning(_("%s: Cannot set the file group: %s"), pair->dest_name, strerror(errno)); // We can still safely copy some additional permissions: // `group' must be at least as strict as `other' and @@ -186,192 +111,291 @@ io_copy_attrs(const file_pair *pair) } if (fchmod(pair->dest_fd, mode)) - errmsg(V_WARNING, _("%s: Cannot set the file permissions: %s"), + message_warning(_("%s: Cannot set the file permissions: %s"), pair->dest_name, strerror(errno)); - // Copy the timestamps only if we have a secure function to do it. -#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) - struct timeval tv[2]; - tv[0].tv_sec = pair->src_st.st_atime; - tv[1].tv_sec = pair->src_st.st_mtime; + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) - tv[0].tv_usec = pair->src_st.st_atim.tv_nsec / 1000; + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) - tv[0].tv_usec = pair->src_st.st_atimespec.tv_nsec / 1000; -# else - tv[0].tv_usec = 0; -# endif + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; -# if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC) - tv[1].tv_usec = pair->src_st.st_mtim.tv_nsec / 1000; -# elif defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC) - tv[1].tv_usec = pair->src_st.st_mtimespec.tv_nsec / 1000; # else - tv[1].tv_usec = 0; + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; # endif -# ifdef HAVE_FUTIMES + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) (void)futimes(pair->dest_fd, tv); -# else +# elif defined(HAVE_FUTIMESAT) (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->src_name, tv); # endif + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. + const struct utimbuf buf = { + .actime = pair->src_st.st_atime; + .modtime = pair->src_st.st_mtime; + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->src_name, &buf); #endif return; } -/// Opens and changes into the directory containing the source file. -static int -io_open_dir(file_pair *pair) +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src(file_pair *pair) { - if (pair->src_name == stdin_filename) - return 0; - - if (fchdir(start_dir)) { - errmsg(V_ERROR, _("Cannot change directory: %s"), - strerror(errno)); - return -1; + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; + return false; } - const char *split = strrchr(pair->src_name, '/'); - if (split == NULL) { - pair->dir_fd = start_dir; - } else { - // Copy also the slash. It's needed to support filenames - // like "/foo" (dirname being "/"), and it never hurts anyway. - const size_t dirname_len = split - pair->src_name + 1; - char dirname[dirname_len + 1]; - memcpy(dirname, pair->src_name, dirname_len); - dirname[dirname_len] = '\0'; - - // Open the directory and change into it. - pair->dir_fd = open(dirname, O_SEARCH | O_NOCTTY); - if (pair->dir_fd == -1 || fchdir(pair->dir_fd)) { - errmsg(V_ERROR, _("%s: Cannot open the directory " - "containing the file: %s"), - pair->src_name, strerror(errno)); - (void)close(pair->dir_fd); - return -1; + // We accept only regular files if we are writing the output + // to disk too, and if --force was not given. + const bool reg_files_only = !opt_stdout && !opt_force; + + // Flags for open() + int flags = O_RDONLY | O_NOCTTY; + + // If we accept only regular files, we need to be careful to avoid + // problems with special files like devices and FIFOs. O_NONBLOCK + // prevents blocking when opening such files. When we want to accept + // special files, we must not use O_NONBLOCK, or otherwise we won't + // block waiting e.g. FIFOs to become readable. + if (reg_files_only) + flags |= O_NONBLOCK; + +#ifdef O_NOFOLLOW + if (reg_files_only) + flags |= O_NOFOLLOW; +#else + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (reg_files_only) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error("%s: %s", pair->src_name, + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + return true; } } +#endif - return 0; -} + // Try to open the file. If we are accepting non-regular files, + // unblock the caught signals so that open() can be interrupted + // if it blocks e.g. due to a FIFO file. + if (!reg_files_only) + signals_unblock(); + + // Maybe this wouldn't need a loop, since all the signal handlers for + // which we don't use SA_RESTART set user_abort to true. But it + // doesn't hurt to have it just in case. + do { + pair->src_fd = open(pair->src_name, flags); + } while (pair->src_fd == -1 && errno == EINTR && !user_abort); + + if (!reg_files_only) + signals_block(); + + if (pair->src_fd == -1) { + // If we were interrupted, don't display any error message. + if (errno == EINTR) { + // All the signals that don't have SA_RESTART + // set user_abort. + assert(user_abort); + return true; + } +#ifdef O_NOFOLLOW + // Give an understandable error message in if reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate if O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; -static void -io_close_dir(file_pair *pair) -{ - if (pair->dir_fd != start_dir) - (void)close(pair->dir_fd); +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; - return; -} +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; +# else + if (errno == ELOOP && reg_files_only) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif -/// Opens the source file. The file is opened using the plain filename without -/// path, thus the file must be in the current working directory. This is -/// ensured because io_open_dir() is always called before this function. -static int -io_open_src(file_pair *pair) -{ - if (pair->src_name == stdin_filename) { - pair->src_fd = STDIN_FILENO; - } else { - // Strip the pathname. Thanks to io_open_dir(), the file - // is now in the current working directory. - const char *filename = str_filename(pair->src_name); - if (filename == NULL) - return -1; - - // Symlinks are followed if --stdout or --force has been - // specified. - const bool follow_symlinks = opt_stdout || opt_force; - pair->src_fd = open(filename, O_RDONLY | O_NOCTTY - | (follow_symlinks ? 0 : O_NOFOLLOW)); - if (pair->src_fd == -1) { - // Give an understandable error message in if reason - // for failing was that the file was a symbolic link. - // - Linux, OpenBSD, Solaris: ELOOP - // - FreeBSD: EMLINK - // - Tru64: ENOTSUP - // It seems to be safe to check for all these, since - // those errno values aren't used for other purporses - // on any of the listed operating system *when* the - // above flags are used with open(). - if (!follow_symlinks - && (errno == ELOOP -#ifdef EMLINK - || errno == EMLINK -#endif -#ifdef ENOTSUP - || errno == ENOTSUP + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + else #endif - )) { - errmsg(V_WARNING, _("%s: Is a symbolic link, " - "skipping"), pair->src_name); - } else { - errmsg(V_ERROR, "%s: %s", pair->src_name, - strerror(errno)); - } + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error("%s: %s", pair->src_name, + strerror(errno)); - return -1; - } + return true; + } - if (fstat(pair->src_fd, &pair->src_st)) { - errmsg(V_ERROR, "%s: %s", pair->src_name, - strerror(errno)); + // Drop O_NONBLOCK, which is used only when we are accepting only + // regular files. After the open() call, we want things to block + // instead of giving EAGAIN. + if (reg_files_only) { + flags = fcntl(pair->src_fd, F_GETFL); + if (flags == -1) + goto error_msg; + + flags &= ~O_NONBLOCK; + + if (fcntl(pair->src_fd, F_SETFL, flags)) + goto error_msg; + } + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (reg_files_only) { + if (!S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, " + "skipping"), pair->src_name); goto error; } - if (S_ISDIR(pair->src_st.st_mode)) { - errmsg(V_WARNING, _("%s: Is a directory, skipping"), + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), pair->src_name); goto error; } - if (!opt_stdout) { - if (!opt_force && !S_ISREG(pair->src_st.st_mode)) { - errmsg(V_WARNING, _("%s: Not a regular file, " - "skipping"), pair->src_name); - goto error; - } - - if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { - // Setuid and setgid files are rejected even - // with --force. This is good for security - // (hopefully) but it's a bit weird to reject - // file when --force was given. At least this - // matches gzip's behavior. - errmsg(V_WARNING, _("%s: File has setuid or " - "setgid bit set, skipping"), - pair->src_name); - goto error; - } - - if (!opt_force && (pair->src_st.st_mode & S_ISVTX)) { - errmsg(V_WARNING, _("%s: File has sticky bit " - "set, skipping"), - pair->src_name); - goto error; - } + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } - if (pair->src_st.st_nlink > 1) { - errmsg(V_WARNING, _("%s: Input file has more " - "than one hard link, " - "skipping"), pair->src_name); - goto error; - } + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; } } - return 0; + return false; +error_msg: + message_error("%s: %s", pair->src_name, strerror(errno)); error: (void)close(pair->src_fd); - return -1; + return true; } @@ -383,65 +407,73 @@ error: static void io_close_src(file_pair *pair, bool success) { - if (pair->src_fd == STDIN_FILENO || pair->src_fd == -1) - return; - - if (close(pair->src_fd)) { - errmsg(V_ERROR, _("%s: Closing the file failed: %s"), - pair->src_name, strerror(errno)); - } else if (success && !opt_keep_original) { - io_unlink(pair->dir_fd, pair->src_name, pair->src_st.st_ino); + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { + // If we are going to unlink(), do it before closing the file. + // This way there's no risk that someone replaces the file and + // happens to get same inode number, which would make us + // unlink() wrong file. + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + + (void)close(pair->src_fd); } return; } -static int +static bool io_open_dest(file_pair *pair) { if (opt_stdout || pair->src_fd == STDIN_FILENO) { // We don't modify or free() this. pair->dest_name = (char *)"(stdout)"; pair->dest_fd = STDOUT_FILENO; + return false; + } - // Synchronize the order in which files get written to stdout. - // Unlocking the mutex is safe, because opening the file_pair - // can no longer fail. - while (stdout_in_use) - pthread_cond_wait(&stdout_cond, &mutex); + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; - stdout_in_use = true; + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error("%s: Cannot unlink: %s", + pair->dest_name, strerror(errno)); + free(pair->dest_name); + return true; + } - } else { - pair->dest_name = get_dest_name(pair->src_name); - if (pair->dest_name == NULL) - return -1; - - // This cannot fail, because get_dest_name() doesn't return - // invalid names. - const char *filename = str_filename(pair->dest_name); - assert(filename != NULL); - - pair->dest_fd = open(filename, O_WRONLY | O_NOCTTY | O_CREAT - | (opt_force ? O_TRUNC : O_EXCL), - S_IRUSR | S_IWUSR); - if (pair->dest_fd == -1) { - errmsg(V_ERROR, "%s: %s", pair->dest_name, + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error("%s: Cannot unlink: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return true; + } + + // Open the file. + const int flags = O_WRONLY | O_NOCTTY | O_CREAT | O_EXCL; + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + // Don't bother with error message if user requested + // us to exit anyway. + if (!user_abort) + message_error("%s: %s", pair->dest_name, strerror(errno)); - free(pair->dest_name); - return -1; - } - // If this really fails... well, we have a safe fallback. - struct stat st; - if (fstat(pair->dest_fd, &st)) - pair->dest_ino = 0; - else - pair->dest_ino = st.st_ino; + free(pair->dest_name); + return true; } - return 0; + // If this really fails... well, we have a safe fallback. + if (fstat(pair->dest_fd, &pair->dest_st)) { + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; + } + + return false; } @@ -455,22 +487,16 @@ io_open_dest(file_pair *pair) static int io_close_dest(file_pair *pair, bool success) { - if (pair->dest_fd == -1) + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) return 0; - if (pair->dest_fd == STDOUT_FILENO) { - stdout_in_use = false; - pthread_cond_signal(&stdout_cond); - return 0; - } - if (close(pair->dest_fd)) { - errmsg(V_ERROR, _("%s: Closing the file failed: %s"), + message_error(_("%s: Closing the file failed: %s"), pair->dest_name, strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: - io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino); + io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return -1; } @@ -478,7 +504,7 @@ io_close_dest(file_pair *pair, bool success) // If the operation using this file wasn't successful, we git rid // of the junk file. if (!success) - io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino); + io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); @@ -492,98 +518,63 @@ io_open(const char *src_name) if (is_empty_filename(src_name)) return NULL; - file_pair *pair = malloc(sizeof(file_pair)); - if (pair == NULL) { - out_of_memory(); - return NULL; - } + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; - *pair = (file_pair){ + pair = (file_pair){ .src_name = src_name, .dest_name = NULL, - .dir_fd = -1, .src_fd = -1, .dest_fd = -1, .src_eof = false, }; - pthread_mutex_lock(&mutex); - - ++open_pairs; - - if (io_open_dir(pair)) - goto error_dir; - - if (io_open_src(pair)) - goto error_src; - - if (user_abort || io_open_dest(pair)) - goto error_dest; - - pthread_mutex_unlock(&mutex); + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + + file_pair *ret = NULL; + if (!io_open_src(&pair)) { + // io_open_src() may have unblocked the signals temporarily, + // and thus user_abort may have got set even if open() + // succeeded. + if (user_abort || io_open_dest(&pair)) + io_close_src(&pair, false); + else + ret = &pair; + } - return pair; + signals_unblock(); -error_dest: - io_close_src(pair, false); -error_src: - io_close_dir(pair); -error_dir: - --open_pairs; - pthread_mutex_unlock(&mutex); - free(pair); - return NULL; + return ret; } -/// \brief Closes the file descriptors and frees the structure extern void io_close(file_pair *pair, bool success) { + signals_block(); + if (success && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); // Close the destination first. If it fails, we must not remove // the source file! - if (!io_close_dest(pair, success)) { - // Closing destination file succeeded. Remove the source file - // if the operation using this file pair was successful - // and we haven't been requested to keep the source file. - io_close_src(pair, success); - } else { - // We don't care if operation using this file pair was - // successful or not, since closing the destination file - // failed. Don't remove the original file. - io_close_src(pair, false); - } - - io_close_dir(pair); + if (io_close_dest(pair, success)) + success = false; - free(pair); - - pthread_mutex_lock(&mutex); - - if (--open_pairs == 0) - pthread_cond_signal(&io_cond); + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); - pthread_mutex_unlock(&mutex); + signals_unblock(); return; } -/// \brief Reads from a file to a buffer -/// -/// \param pair File pair having the sourcefile open for reading -/// \param buf Destination buffer to hold the read data -/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX -/// -/// \return On success, number of bytes read is returned. On end of -/// file zero is returned and pair->src_eof set to true. -/// On error, SIZE_MAX is returned and error message printed. -/// -/// \note This does no locking, thus two threads must not read from -/// the same file. This no problem in this program. extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size) { @@ -608,7 +599,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size) continue; } - errmsg(V_ERROR, _("%s: Read error: %s"), + message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); // FIXME Is this needed? @@ -625,18 +616,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size) } -/// \brief Writes a buffer to a file -/// -/// \param pair File pair having the destination file open for writing -/// \param buf Buffer containing the data to be written -/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX -/// -/// \return On success, zero is returned. On error, -1 is returned -/// and error message printed. -/// -/// \note This does no locking, thus two threads must not write to -/// the same file. This no problem in this program. -extern int +extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size) { assert(size < SSIZE_MAX); @@ -660,18 +640,19 @@ io_write(const file_pair *pair, const uint8_t *buf, size_t size) // GNU bash). // // We don't do anything special with --quiet, which - // is what bzip2 does too. However, we print a - // message if --verbose was used (or should that - // only be with double --verbose i.e. debugging?). - errmsg(errno == EPIPE ? V_VERBOSE : V_ERROR, - _("%s: Write error: %s"), + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), pair->dest_name, strerror(errno)); - return -1; + + return true; } buf += (size_t)(amount); size -= (size_t)(amount); } - return 0; + return false; } diff --git a/src/lzma/io.h b/src/lzma/io.h index d1aa17f4..4d8e61b2 100644 --- a/src/lzma/io.h +++ b/src/lzma/io.h @@ -22,6 +22,8 @@ #include "private.h" + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. #if BUFSIZ <= 1024 # define IO_BUFFER_SIZE 8192 #else @@ -30,31 +32,66 @@ typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. char *dest_name; - int dir_fd; + /// File descriptor of the source file int src_fd; + + /// File descriptor of the target file int dest_fd; + /// Stat of the source file. struct stat src_st; - ino_t dest_ino; - bool src_eof; -} file_pair; + /// Stat of the destination file. + struct stat dest_st; + /// True once end of the source file has been detected. + bool src_eof; -extern void io_init(void); +} file_pair; -extern void io_finish(void); +/// \brief Opens a file pair extern file_pair *io_open(const char *src_name); + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. extern void io_close(file_pair *pair, bool success); + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size); -extern int io_write(const file_pair *pair, const uint8_t *buf, size_t size); +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size); #endif diff --git a/src/lzma/main.c b/src/lzma/main.c index 02891193..4e24b98d 100644 --- a/src/lzma/main.c +++ b/src/lzma/main.c @@ -21,16 +21,30 @@ #include "open_stdxxx.h" #include <ctype.h> -static sig_atomic_t exit_signal = 0; + +volatile sig_atomic_t user_abort = false; + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which have have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; static void signal_handler(int sig) { - // FIXME Is this thread-safe together with main()? exit_signal = sig; - - user_abort = 1; + user_abort = true; return; } @@ -38,116 +52,226 @@ signal_handler(int sig) static void establish_signal_handlers(void) { - struct sigaction sa; - sa.sa_handler = &signal_handler; - sigfillset(&sa.sa_mask); - sa.sa_flags = 0; - + // List of signals for which we establish the signal handler. static const int sigs[] = { - SIGHUP, SIGINT, - SIGPIPE, SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU SIGXCPU, +#endif +#ifdef SIGXFSZ SIGXFSZ, +#endif }; - for (size_t i = 0; i < sizeof(sigs) / sizeof(sigs[0]); ++i) { - if (sigaction(sigs[i], &sa, NULL)) { - errmsg(V_ERROR, _("Cannot establish signal handlers")); - my_exit(ERROR); - } + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + + struct sigaction sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + sa.sa_flags = 0; + sa.sa_handler = &signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + struct sigaction old; + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); } - /* - SIGINFO/SIGUSR1 for status reporting? - */ + return; } -static bool -is_tty_stdin(void) +extern void +signals_block(void) { - const bool ret = isatty(STDIN_FILENO); - if (ret) { - // FIXME: Other threads may print between these lines. - // Maybe that should be fixed. Not a big issue in practice. - errmsg(V_ERROR, _("Compressed data not read from " - "a terminal.")); - errmsg(V_ERROR, _("Use `--force' to force decompression.")); - show_try_help(); + if (signals_block_count++ == 0) { + const int saved_errno = errno; + sigprocmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; } - return ret; + return; } -static bool -is_tty_stdout(void) +extern void +signals_unblock(void) { - const bool ret = isatty(STDOUT_FILENO); - if (ret) { - errmsg(V_ERROR, _("Compressed data not written to " - "a terminal.")); - errmsg(V_ERROR, _("Use `--force' to force compression.")); - show_try_help(); + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + sigprocmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; } - return ret; + return; } -static char * -read_name(void) +extern void +set_exit_status(enum exit_status_type new_status) { - size_t size = 256; - size_t pos = 0; - char *name = malloc(size); - if (name == NULL) { - out_of_memory(); - return NULL; + assert(new_status == E_WARNING || new_status == E_ERROR); + + if (exit_status != E_ERROR) + exit_status = new_status; + + return; +} + + +extern void +my_exit(enum exit_status_type status) +{ + // Close stdout. If something goes wrong, print an error message + // to stderr. + { + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (ferror_err || fclose_err) { + // If it was fclose() that failed, we have the reason + // in errno. If only ferror() indicated an error, + // we have no idea what the reason was. + message(V_ERROR, _("Writing to standard output " + "failed: %s"), + fclose_err ? strerror(errno) + : _("Unknown error")); + status = E_ERROR; + } + } + + // Close stderr. If something goes wrong, there's nothing where we + // could print an error message. Just set the exit status. + { + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = E_ERROR; } - while (true) { - const int c = fgetc(opt_files_file); - if (c == EOF) { - free(name); - - if (ferror(opt_files_file)) - errmsg(V_ERROR, _("%s: Error reading " - "filenames: %s"), - opt_files_name, - strerror(errno)); - else if (pos != 0) - errmsg(V_ERROR, _("%s: Unexpected end of " - "input when reading " - "filenames"), opt_files_name); + // If we have got a signal, raise it to kill the program. + const int sig = exit_signal; + if (sig != 0) { + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); + raise(exit_signal); + // If, for some weird reason, the signal doesn't kill us, + // we safely fall to the exit below. + } + + exit(status); +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and uncompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + args->files_name, strerror(errno)); return NULL; } - if (c == '\0' || c == opt_files_split) - break; + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + args->files_name); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use `--files0' instead " + "of `--files'?"), args->files_name); + return NULL; + } name[pos++] = c; + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. if (pos == size) { size *= 2; - char *tmp = realloc(name, size); - if (tmp == NULL) { - free(name); - out_of_memory(); - return NULL; - } - - name = tmp; + name = xrealloc(name, size); } } - if (name != NULL) - name[pos] = '\0'; - - return name; + return NULL; } @@ -158,35 +282,56 @@ main(int argc, char **argv) // a valid file descriptor. Exit immediatelly with exit code ERROR // if we cannot make the file descriptors valid. Maybe we should // print an error message, but our stderr could be screwed anyway. - open_stdxxx(ERROR); + open_stdxxx(E_ERROR); - // Set the program invocation name used in various messages. - argv0 = argv[0]; + // This has to be done before calling any liblzma functions. + lzma_init(); - setlocale(LC_ALL, "en_US.UTF-8"); + // Set up the locale. + setlocale(LC_ALL, ""); + +#ifdef ENABLE_NLS + // Set up the message translations too. bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); +#endif + + // Set the program invocation name used in various messages, and + // do other message handling related initializations. + message_init(argv[0]); // Set hardware-dependent default values. These can be overriden // on the command line, thus this must be done before parse_args(). hardware_init(); - char **files = parse_args(argc, argv); - - if (opt_mode == MODE_COMPRESS && opt_stdout && is_tty_stdout()) - return ERROR; - - if (opt_mode == MODE_COMPRESS) - lzma_init_encoder(); + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); else - lzma_init_decoder(); - - io_init(); - process_init(); + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal and --force wasn't used. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + my_exit(E_ERROR); + } + } + } if (opt_mode == MODE_LIST) { - errmsg(V_ERROR, "--list is not implemented yet."); - my_exit(ERROR); + message_fatal("--list is not implemented yet."); } // Hook the signal handlers. We don't need these before we start @@ -194,60 +339,63 @@ main(int argc, char **argv) // line arguments. establish_signal_handlers(); - while (*files != NULL && !user_abort) { - if (strcmp("-", *files) == 0) { + // Process the files given on the command line. Note that if no names + // were given, parse_args() gave us a fake "-" filename. + for (size_t i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Unless --force + // was used, check that we aren't writing compressed + // data to a terminal or reading it from terminal. if (!opt_force) { if (opt_mode == MODE_COMPRESS) { - if (is_tty_stdout()) { - ++files; + if (is_tty_stdout()) continue; - } } else if (is_tty_stdin()) { - ++files; continue; } } - if (opt_files_name == stdin_filename) { - errmsg(V_ERROR, _("Cannot read data from " + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " "standard input when " "reading filenames " "from standard input")); - ++files; continue; } - *files = (char *)stdin_filename; + // Replace the "-" with a special pointer, which is + // recognized by process_file() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; } - process_file(*files++); + // Do the actual compression or uncompression. + process_file(args.arg_names[i]); } - if (opt_files_name != NULL) { + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. while (true) { - char *name = read_name(); + const char *name = read_name(&args); if (name == NULL) break; - if (name[0] != '\0') - process_file(name); - - free(name); + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + process_file(name); } - if (opt_files_name != stdin_filename) - (void)fclose(opt_files_file); - } - - io_finish(); - - if (exit_signal != 0) { - struct sigaction sa; - sa.sa_handler = SIG_DFL; - sigfillset(&sa.sa_mask); - sa.sa_flags = 0; - sigaction(exit_signal, &sa, NULL); - raise(exit_signal); + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); } my_exit(exit_status); diff --git a/src/lzma/main.h b/src/lzma/main.h new file mode 100644 index 00000000..1e369425 --- /dev/null +++ b/src/lzma/main.h @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellanous declarations +// +// Copyright (C) 2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MAIN_H +#define MAIN_H + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is EX_WARNING and the old exit status was already EX_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Exits the program using the given status. This takes care of closing +/// stdin, stdout, and stderr and catches possible errors. If we had got +/// a signal, this function will raise it so that to the parent process it +/// appears that we were killed by the signal sent by the user. +extern void my_exit(enum exit_status_type status) lzma_attribute((noreturn)); + + +#endif diff --git a/src/lzma/message.c b/src/lzma/message.c new file mode 100644 index 00000000..caba9fbc --- /dev/null +++ b/src/lzma/message.c @@ -0,0 +1,892 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages to stderr +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if defined(HAVE_SYS_TIME_H) +# include <sys/time.h> +#elif defined(SIGALRM) +// FIXME +#endif + +#include <stdarg.h> + + +/// Name of the program which is prefixed to the error messages. +static const char *argv0; + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGALRM. Once this variable is true, we will print an empty line before +/// the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGALRM signals, +/// we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically. +static bool progress_automatic; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + +/// Time when we started processing the file +static double start_time; + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((unused))) +{ + progress_needs_updating = true; + return; +} + + +/// Get the current time as double +static double +my_time(void) +{ + struct timeval tv; + + // This really shouldn't fail. I'm not sure what to return if it + // still fails. It doesn't look so useful to check the return value + // everywhere. FIXME? + if (gettimeofday(&tv, NULL)) + return -1.0; + + return (double)(tv.tv_sec) + (double)(tv.tv_usec) / 1.0e9; +} + + +/// Wrapper for snprintf() to help constructing a string in pieces. +static void /* lzma_attribute((format(printf, 3, 4))) */ +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= len; + } + + return; +} + + +extern void +message_init(const char *given_argv0) +{ + // Name of the program + argv0 = given_argv0; + + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = isatty(STDERR_FILENO); + +/* + if (progress_automatic) { + // stderr is a terminal. Check the COLUMNS environment + // variable to see if the terminal is wide enough. If COLUMNS + // doesn't exist or it has some unparseable value, we assume + // that the terminal is wide enough. + const char *columns_str = getenv("COLUMNS"); + uint64_t columns; + if (columns_str != NULL + && !str_to_uint64_raw(&columns, columns_str) + && columns < 80) + progress_automatic = false; + } +*/ + +#ifdef SIGALRM + // Establish the signal handler for SIGALRM. Since this signal + // doesn't require any quick action, we set SA_RESTART. + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sa.sa_handler = &progress_signal_handler; + if (sigaction(SIGALRM, &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (!current_filename_printed + && (files_total != 1 || filename != stdin_filename)) { + signals_block(); + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', stderr); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(stderr, "%s (%u)\n", filename, + files_pos); + else + fprintf(stderr, "%s (%u/%u)\n", filename, + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_progress_start(const char *src_name, uint64_t in_size) +{ + // Store the processing start time of the file and its expected size. + // If we aren't printing any statistics, then these are unused. But + // since it is possible that the user tells us with SIGALRM to show + // statistics, we need to have these available anyway. + start_time = my_time(); + filename = src_name; + expected_in_size = in_size; + + // Indicate the name of this file hasn't been printed to + // stderr yet. + current_filename_printed = false; + + // Start numbering the files starting from one. + ++files_pos; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. As an exception, if there is exactly one file, + // do not print the filename at all. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Print the filename to stderr if that is appropriate with + // the current settings. + print_filename(); + + // Start the timer to set progress_needs_updating to true + // after about one second. An alternative would to be set + // progress_needs_updating to true here immediatelly, but + // setting the timer looks better to me, since extremely + // early progress info is pretty much useless. + alarm(1); + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + static char buf[sizeof("99.9 %")]; + + // Never show 100.0 % before we actually are finished (that case is + // handled separately in message_progress_end()). + snprintf(buf, sizeof(buf), "%.1f %%", + (double)(in_pos) / (double)(expected_in_size) * 99.9); + + return buf; +} + + +static void +progress_sizes_helper(char **pos, size_t *left, uint64_t value, bool final) +{ + if (final) { + // At maximum of four digits is allowed for exact byte count. + if (value < 10000) { + my_snprintf(pos, left, "%'" PRIu64 " B", value); + return; + } + +// // At maximum of four significant digits is allowed for KiB. +// if (value < UINT64_C(1023900)) { + // At maximum of five significant digits is allowed for KiB. + if (value < UINT64_C(10239900)) { + my_snprintf(pos, left, "%'.1f KiB", + (double)(value) / 1024.0); + return; + } + } + + // Otherwise we use MiB. + my_snprintf(pos, left, "%'.1f MiB", + (double)(value) / (1024.0 * 1024.0)); + return; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // This is enough to hold sizes up to about 99 TiB if thousand + // separator is used, or about 1 PiB without thousand separator. + // After that the progress indicator will look a bit silly, since + // the compression ratio no longer fits with three decimal places. + static char buf[44]; + + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + progress_sizes_helper(&pos, &left, compressed_pos, final); + my_snprintf(&pos, &left, " / "); + progress_sizes_helper(&pos, &left, uncompressed_pos, final); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, double elapsed) +{ + // Don't print the speed immediatelly, since the early values look + // like somewhat random. + if (elapsed < 3.0) + return ""; + + static const char unit[][8] = { + "KiB/s", + "MiB/s", + "GiB/s", + }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) / (elapsed * 1024.0); + + // Adjust the unit of the speed if needed. + while (speed > 999.9) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + static char buf[sizeof("999.9 GiB/s")]; + snprintf(buf, sizeof(buf), "%.1f %s", speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed or remaining time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint32_t seconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((UINT32_C(9999) * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Make the string to contain the estimated remaining time, or if the amount +/// of input isn't known, how much time has elapsed. +static const char * +progress_remaining(uint64_t in_pos, double elapsed) +{ + // If we don't know the size of the input, we indicate the time + // spent so far. + if (expected_in_size == 0 || in_pos > expected_in_size) + return progress_time((uint32_t)(elapsed)); + + // If we are at the very beginning of the file or the file is very + // small, don't give any estimate to avoid far too wrong estimations. + if (in_pos < (UINT64_C(1) << 19) || elapsed < 8.0) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (double)(expected_in_size - in_pos) + * elapsed / (double)(in_pos); + if (remaining == 0) + remaining = 1; + + return progress_time(remaining); +} + + +extern void +message_progress_update(uint64_t in_pos, uint64_t out_pos) +{ + // If there's nothing to do, return immediatelly. + if (!progress_needs_updating || in_pos == 0) + return; + + // Print the filename if it hasn't been printed yet. + print_filename(); + + // Calculate how long we have been processing this file. + const double elapsed = my_time() - start_time; + + // Set compressed_pos and uncompressed_pos. + uint64_t compressed_pos; + uint64_t uncompressed_pos; + if (opt_mode == MODE_COMPRESS) { + compressed_pos = out_pos; + uncompressed_pos = in_pos; + } else { + compressed_pos = in_pos; + uncompressed_pos = out_pos; + } + + signals_block(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + fprintf(stderr, " %7s %43s %11s %10s\r", + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_remaining(in_pos, elapsed)); + + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm() call or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(uint64_t in_pos, uint64_t out_pos, bool success) +{ + // If we are not in verbose mode, we have nothing to do. + if (verbosity < V_VERBOSE || user_abort) + return; + + // Cancel a pending alarm, if any. + if (progress_automatic) { + alarm(0); + progress_active = false; + } + + const double elapsed = my_time() - start_time; + + uint64_t compressed_pos; + uint64_t uncompressed_pos; + if (opt_mode == MODE_COMPRESS) { + compressed_pos = out_pos; + uncompressed_pos = in_pos; + } else { + compressed_pos = in_pos; + uncompressed_pos = out_pos; + } + + // If it took less than a second, don't display the time. + const char *elapsed_str = progress_time((double)(elapsed)); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic && in_pos > 0) { + // Using floating point conversion for the percentage instead + // of static "100.0 %" string, because the decimal separator + // isn't a dot in all locales. + fprintf(stderr, " %5.1f %% %43s %11s %10s\n", + 100.0, + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + elapsed_str); + + // When no automatic progress indicator is used, don't print a verbose + // message at all if we something went wrong and we couldn't produce + // any output. If we did produce output, then it is sometimes useful + // to tell that to the user, especially if we detected an error after + // a time-consuming operation. + } else if (success || out_pos > 0) { + // The filename and size information are always printed. + fprintf(stderr, "%s: %s", filename, progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + // If there currently is a progress message on the screen, + // print a newline so that the progress message is left + // readable. This is good, because it is nice to be able to + // see where the error occurred. (The alternative would be + // to clear the progress message and replace it with the + // error message.) + if (progress_active) { + progress_active = false; + fputc('\n', stderr); + } + + fprintf(stderr, "%s: ", argv0); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + my_exit(E_ERROR); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + return _("Internal error (bug)"); + } + + return NULL; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try `%s --help' for more information."), argv0); + return; +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + printf("xz " PACKAGE_VERSION "\n"); + printf("liblzma %s\n", lzma_version_string()); + my_exit(E_SUCCESS); +} + + +extern void +message_help(bool long_help) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .xz format.\n\n"), + argv0); + + puts(_("Mandatory arguments to long options are mandatory for " + "short options too.\n")); + + if (long_help) + puts(_(" Operation mode:\n")); + + puts(_( +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about files")); + + if (long_help) + puts(_("\n Operation modifiers:\n")); + + puts(_( +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files")); + + if (long_help) + puts(_( +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the null character as terminator")); + + if (long_help) { + puts(_("\n Basic file format and compression options:\n")); + puts(_( +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', and `raw'\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" or `sha256'")); + } + + puts(_( +" -p, --preset=NUM compression preset: 1-2 fast compression, 3-6 good\n" +" compression, 7-9 excellent compression; default is 7")); + + puts(_( +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which depends on the operation mode\n" +" and the amount of physical memory (RAM)")); + + if (long_help) { + puts(_( +"\n Custom filter chain for compression (alternative for using presets):")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + puts(_( +"\n" +" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" +" --lzma2=[OPTS] more of the following options (valid values; default):\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" depth=NUM maximum search depth; 0=automatic (default)")); +#endif + + puts(_( +"\n" +" --x86 x86 filter (sometimes called BCJ filter)\n" +" --powerpc PowerPC (big endian) filter\n" +" --ia64 IA64 (Itanium) filter\n" +" --arm ARM filter\n" +" --armthumb ARM-Thumb filter\n" +" --sparc SPARC filter")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + puts(_( +"\n" +" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)")); +#endif + +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + puts(_( +"\n" +" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)")); +#endif + } + +/* + if (long_help) + puts(_( +"\n" +" Resource usage options:\n" +"\n" +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which depends on the operation mode\n" +" and the amount of physical memory (RAM)\n" +" -T, --threads=NUM use a maximum of NUM (de)compression threads" +// " --threading=STR threading style; possible values are `auto' (default),\n" +// " `files', and `stream' +)); +*/ + if (long_help) + puts(_("\n Other options:\n")); + + puts(_( +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose")); + + if (long_help) + puts(_( +"\n" +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help")); + else + puts(_( +" -h, --help display this short help\n" +" -H, --long-help display the long help (lists also the advanced options)")); + + puts(_( +" -V, --version display the version number")); + + puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); + + if (long_help) { + // FIXME !!! + size_t mem_limit = hardware_memlimit_encoder() / (1024 * 1024); + if (mem_limit == 0) + mem_limit = 1; + + // We use PRIu64 instead of %zu to support pre-C99 libc. + // FIXME: Use ' but avoid warnings. + puts(_("On this system and configuration, the tool will use")); + printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"), + (uint64_t)(mem_limit)); + printf(N_(" * at maximum of one thread for (de)compression.\n\n", + " * at maximum of %" PRIu64 + " threads for (de)compression.\n\n", + (uint64_t)(opt_threads)), (uint64_t)(opt_threads)); + } + + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + + my_exit(E_SUCCESS); +} diff --git a/src/lzma/message.h b/src/lzma/message.h new file mode 100644 index 00000000..7ef9b165 --- /dev/null +++ b/src/lzma/message.h @@ -0,0 +1,132 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MESSAGE_H +#define MESSAGE_H + + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Debugging, FIXME remove? +}; + + +/// \brief Initializes the message functions +/// +/// \param argv0 Name of the program i.e. argv[0] from main() +/// \param verbosity Verbosity level +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(const char *argv0); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + + +/// Set the total number of files to be processed (stdin is counted as a file +/// here). The default is one. +extern void message_set_files(unsigned int files); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +extern void message(enum message_verbosity verbosity, const char *fmt, ...) + lzma_attribute((format(printf, 2, 3))); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +extern void message_warning(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +extern void message_error(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +extern void message_fatal(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))) + lzma_attribute((noreturn)); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +extern void message_bug(void) lzma_attribute((noreturn)); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +extern void message_signal_handler(void) lzma_attribute((noreturn)); + + +/// Converts lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +extern void message_version(void) lzma_attribute((noreturn)); + + +/// Print the help message. +extern void message_help(bool long_help) lzma_attribute((noreturn)); + + +/// +extern void message_progress_start(const char *filename, uint64_t in_size); + + +/// +extern void message_progress_update(uint64_t in_pos, uint64_t out_pos); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param in_pos Final input position i.e. how much input there was. +/// \param out_pos Final output position +/// \param success True if the operation was successful. We don't +/// print the final progress message if the operation +/// wasn't successful. +/// +extern void message_progress_end( + uint64_t in_pos, uint64_t out_pos, bool success); + +#endif diff --git a/src/lzma/options.c b/src/lzma/options.c index f5ebdd8e..77ebddd6 100644 --- a/src/lzma/options.c +++ b/src/lzma/options.c @@ -79,11 +79,9 @@ parse_options(const char *str, const option_map *opts, if (value != NULL) *value++ = '\0'; - if (value == NULL || value[0] == '\0') { - errmsg(V_ERROR, _("%s: Options must be `name=value' " + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: Options must be `name=value' " "pairs separated with commas"), str); - my_exit(ERROR); - } // Look for the option name from the option map. bool found = false; @@ -106,11 +104,9 @@ parse_options(const char *str, const option_map *opts, break; } - if (opts[i].map[j].name == NULL) { - errmsg(V_ERROR, _("%s: Invalid option " + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: Invalid option " "value"), value); - my_exit(ERROR); - } set(filter_options, i, opts[i].map[j].id); } @@ -119,10 +115,8 @@ parse_options(const char *str, const option_map *opts, break; } - if (!found) { - errmsg(V_ERROR, _("%s: Invalid option name"), name); - my_exit(ERROR); - } + if (!found) + message_fatal(_("%s: Invalid option name"), name); if (split == NULL) break; @@ -168,7 +162,7 @@ set_subblock(void *options, uint32_t key, uint64_t value) extern lzma_options_subblock * -parse_options_subblock(const char *str) +options_subblock(const char *str) { static const option_map opts[] = { { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN, @@ -217,7 +211,7 @@ set_delta(void *options, uint32_t key, uint64_t value) extern lzma_options_delta * -parse_options_delta(const char *str) +options_delta(const char *str) { static const option_map opts[] = { { "dist", NULL, LZMA_DELTA_DIST_MIN, @@ -225,7 +219,7 @@ parse_options_delta(const char *str) { NULL, NULL, 0, 0 } }; - lzma_options_delta *options = xmalloc(sizeof(lzma_options_subblock)); + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); *options = (lzma_options_delta){ // It's hard to give a useful default for this. .type = LZMA_DELTA_TYPE_BYTE, @@ -296,7 +290,7 @@ set_lzma(void *options, uint32_t key, uint64_t value) extern lzma_options_lzma * -parse_options_lzma(const char *str) +options_lzma(const char *str) { static const name_id_map modes[] = { { "fast", LZMA_MODE_FAST }, @@ -345,18 +339,14 @@ parse_options_lzma(const char *str) parse_options(str, opts, &set_lzma, options); - if (options->lc + options->lp > LZMA_LCLP_MAX) { - errmsg(V_ERROR, "The sum of lc and lp must be at " - "maximum of 4"); - exit(ERROR); - } + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must be at " + "maximum of 4")); const uint32_t nice_len_min = options->mf & 0x0F; - if (options->nice_len < nice_len_min) { - errmsg(V_ERROR, "The selected match finder requires at " - "least nice=%" PRIu32, nice_len_min); - exit(ERROR); - } + if (options->nice_len < nice_len_min) + message_fatal(_("The selected match finder requires at " + "least nice=%" PRIu32), nice_len_min); return options; } diff --git a/src/lzma/options.h b/src/lzma/options.h index 885c5969..4253ac3c 100644 --- a/src/lzma/options.h +++ b/src/lzma/options.h @@ -27,20 +27,20 @@ /// /// \return Pointer to allocated options structure. /// Doesn't return on error. -extern lzma_options_subblock *parse_options_subblock(const char *str); +extern lzma_options_subblock *options_subblock(const char *str); /// \brief Parser for Delta options /// /// \return Pointer to allocated options structure. /// Doesn't return on error. -extern lzma_options_delta *parse_options_delta(const char *str); +extern lzma_options_delta *options_delta(const char *str); /// \brief Parser for LZMA options /// /// \return Pointer to allocated options structure. /// Doesn't return on error. -extern lzma_options_lzma *parse_options_lzma(const char *str); +extern lzma_options_lzma *options_lzma(const char *str); #endif diff --git a/src/lzma/private.h b/src/lzma/private.h index f6a75645..b463a08e 100644 --- a/src/lzma/private.h +++ b/src/lzma/private.h @@ -22,32 +22,30 @@ #include "sysdefs.h" -#ifdef HAVE_ERRNO_H -# include <errno.h> -#else -extern int errno; -#endif - +#include <sys/types.h> #include <sys/stat.h> +#include <errno.h> #include <signal.h> -#include <pthread.h> #include <locale.h> #include <stdio.h> -#include <fcntl.h> #include <unistd.h> -#include "gettext.h" -#define _(msgid) gettext(msgid) -#define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#ifdef ENABLE_NLS +# include <libintl.h> +# define _(msgid) gettext(msgid) +# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#else +# define _(msgid) (msgid) +# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) +#endif -#include "alloc.h" +#include "main.h" +#include "process.h" +#include "message.h" #include "args.h" -#include "error.h" #include "hardware.h" -#include "help.h" #include "io.h" #include "options.h" -#include "process.h" #include "suffix.h" #include "util.h" diff --git a/src/lzma/process.c b/src/lzma/process.c index fc4ef96a..d30878e4 100644 --- a/src/lzma/process.c +++ b/src/lzma/process.c @@ -20,137 +20,158 @@ #include "private.h" -typedef struct { - lzma_stream strm; - void *options; +enum operation_mode opt_mode = MODE_COMPRESS; - file_pair *pair; +enum format_type opt_format = FORMAT_AUTO; - /// We don't need this for *anything* but seems that at least with - /// glibc pthread_create() doesn't allow NULL. - pthread_t thread; - bool in_use; +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; -} thread_data; +/// Filters needed for all encoding all formats, and also decoding in raw data +static lzma_filter filters[LZMA_FILTERS_MAX + 1]; +/// Number of filters. Zero indicates that we are using a preset. +static size_t filters_count = 0; -/// Number of available threads -static size_t free_threads; +/// Number of the preset (1-9) +static size_t preset_number = 7; -/// Thread-specific data -static thread_data *threads; +/// Indicate if no preset has been given. In that case, we will auto-adjust +/// the compression preset so that it doesn't use too much RAM. +// FIXME +static bool preset_default = true; -static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +/// Integrity check type +static lzma_check check = LZMA_CHECK_CRC64; -/// Attributes of new coder threads. They are created in detached state. -/// Coder threads signal to the service thread themselves when they are done. -static pthread_attr_t thread_attr; +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + return; +} -////////// -// Init // -////////// extern void -process_init(void) +coder_set_preset(size_t new_preset) { - threads = malloc(sizeof(thread_data) * opt_threads); - if (threads == NULL) { - out_of_memory(); - my_exit(ERROR); - } + preset_number = new_preset; + preset_default = false; + return; +} - for (size_t i = 0; i < opt_threads; ++i) - memzero(&threads[i], sizeof(threads[0])); - if (pthread_attr_init(&thread_attr) - || pthread_attr_setdetachstate( - &thread_attr, PTHREAD_CREATE_DETACHED)) { - out_of_memory(); - my_exit(ERROR); - } +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); - free_threads = opt_threads; + filters[filters_count].id = id; + filters[filters_count].options = options; + ++filters_count; return; } -////////////////////////// -// Thread-specific data // -////////////////////////// - -static thread_data * -get_thread_data(void) +extern void +coder_set_compression_settings(void) { - pthread_mutex_lock(&mutex); + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + if (filters_count == 0) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } - while (free_threads == 0) { - pthread_cond_wait(&cond, &mutex); + // Get the preset for LZMA1 or LZMA2. + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); - if (user_abort) { - pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); - return NULL; - } + // Use LZMA2 except with --format=lzma we use LZMA1. + filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + filters[0].options = &opt_lzma; + filters_count = 1; } - thread_data *t = threads; - while (t->in_use) - ++t; + // Terminate the filter options array. + filters[filters_count].id = LZMA_VLI_UNKNOWN; - t->in_use = true; - --free_threads; + // If we are using the LZMA_Alone format, allow exactly one filter + // which has to be LZMA. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("With --format=lzma only the LZMA1 filter " + "is supported")); - pthread_mutex_unlock(&mutex); - - return t; -} + // TODO: liblzma probably needs an API to validate the filter chain. + // If using --format=raw, we can be decoding. + uint64_t memory_usage; + uint64_t memory_limit; + if (opt_mode == MODE_COMPRESS) { + memory_usage = lzma_memusage_encoder(filters); + memory_limit = hardware_memlimit_encoder(); + } else { + memory_usage = lzma_memusage_decoder(filters); + memory_limit = hardware_memlimit_decoder(); + } -static void -release_thread_data(thread_data *t) -{ - pthread_mutex_lock(&mutex); + if (memory_usage == UINT64_MAX) + message_bug(); - t->in_use = false; - ++free_threads; + if (preset_default) { + // When no preset was explicitly requested, we use the default + // preset only if the memory usage limit allows. Otherwise we + // select a lower preset automatically. + while (memory_usage > memory_limit) { + if (preset_number == 1) + message_fatal(_("Memory usage limit is too " + "small for any internal " + "filter preset")); - pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); + if (lzma_lzma_preset(&opt_lzma, --preset_number)) + message_bug(); - return; -} - - -static int -create_thread(void *(*func)(thread_data *t), thread_data *t) -{ - if (opt_threads == 1) { - func(t); - } else { - const int err = pthread_create(&t->thread, &thread_attr, - (void *(*)(void *))(func), t); - if (err) { - errmsg(V_ERROR, _("Cannot create a thread: %s"), - strerror(err)); - user_abort = 1; - return -1; + memory_usage = lzma_memusage_encoder(filters); } + } else { + if (memory_usage > memory_limit) + message_fatal(_("Memory usage limit is too small " + "for the given filter setup")); } - return 0; -} + // Limit the number of worked threads so that memory usage + // limit isn't exceeded. + assert(memory_usage > 0); + size_t thread_limit = memory_limit / memory_usage; + if (thread_limit == 0) + thread_limit = 1; + if (opt_threads > thread_limit) + opt_threads = thread_limit; + + return; +} -///////////////////////// -// One thread per file // -///////////////////////// -static int -single_init(thread_data *t) +static bool +coder_init(void) { lzma_ret ret = LZMA_PROG_ERROR; @@ -162,17 +183,15 @@ single_init(thread_data *t) break; case FORMAT_XZ: - ret = lzma_stream_encoder(&t->strm, - opt_filters, opt_check); + ret = lzma_stream_encoder(&strm, filters, check); break; case FORMAT_LZMA: - ret = lzma_alone_encoder(&t->strm, - opt_filters[0].options); + ret = lzma_alone_encoder(&strm, filters[0].options); break; case FORMAT_RAW: - ret = lzma_raw_encoder(&t->strm, opt_filters); + ret = lzma_raw_encoder(&strm, filters); break; } } else { @@ -181,254 +200,192 @@ single_init(thread_data *t) switch (opt_format) { case FORMAT_AUTO: - ret = lzma_auto_decoder(&t->strm, opt_memory, flags); + ret = lzma_auto_decoder(&strm, + hardware_memlimit_decoder(), flags); break; case FORMAT_XZ: - ret = lzma_stream_decoder(&t->strm, opt_memory, flags); + ret = lzma_stream_decoder(&strm, + hardware_memlimit_decoder(), flags); break; case FORMAT_LZMA: - ret = lzma_alone_decoder(&t->strm, opt_memory); + ret = lzma_alone_decoder(&strm, + hardware_memlimit_decoder()); break; case FORMAT_RAW: // Memory usage has already been checked in args.c. - ret = lzma_raw_decoder(&t->strm, opt_filters); + // FIXME Comment + ret = lzma_raw_decoder(&strm, filters); break; } } if (ret != LZMA_OK) { if (ret == LZMA_MEM_ERROR) - out_of_memory(); + message_error("%s", message_strm(LZMA_MEM_ERROR)); else - internal_error(); + message_bug(); - return -1; + return true; } - return 0; + return false; } -static void * -single(thread_data *t) +static bool +coder_run(file_pair *pair) { - if (single_init(t)) { - io_close(t->pair, false); - release_thread_data(t); - return NULL; - } + // Buffers to hold input and output data. + uint8_t in_buf[IO_BUFFER_SIZE]; + uint8_t out_buf[IO_BUFFER_SIZE]; + + // Initialize the progress indicator. + const uint64_t in_size = pair->src_st.st_size <= (off_t)(0) + ? 0 : (uint64_t)(pair->src_st.st_size); + message_progress_start(pair->src_name, in_size); - uint8_t in_buf[BUFSIZ]; - uint8_t out_buf[BUFSIZ]; lzma_action action = LZMA_RUN; lzma_ret ret; - bool success = false; - t->strm.avail_in = 0; - t->strm.next_out = out_buf; - t->strm.avail_out = BUFSIZ; + strm.avail_in = 0; + strm.next_out = out_buf; + strm.avail_out = IO_BUFFER_SIZE; while (!user_abort) { - if (t->strm.avail_in == 0 && !t->pair->src_eof) { - t->strm.next_in = in_buf; - t->strm.avail_in = io_read(t->pair, in_buf, BUFSIZ); + // Fill the input buffer if it is empty and we haven't reached + // end of file yet. + if (strm.avail_in == 0 && !pair->src_eof) { + strm.next_in = in_buf; + strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE); - if (t->strm.avail_in == SIZE_MAX) + if (strm.avail_in == SIZE_MAX) break; - if (t->pair->src_eof) + // Encoder needs to know when we have given all the + // input to it. The decoders need to know it too when + // we are using LZMA_CONCATENATED. + if (pair->src_eof) action = LZMA_FINISH; } - ret = lzma_code(&t->strm, action); + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); - if ((t->strm.avail_out == 0 || ret != LZMA_OK) - && opt_mode != MODE_TEST) { - if (io_write(t->pair, out_buf, - BUFSIZ - t->strm.avail_out)) - break; + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (opt_mode != MODE_TEST && io_write(pair, out_buf, + IO_BUFFER_SIZE - strm.avail_out)) + return false; - t->strm.next_out = out_buf; - t->strm.avail_out = BUFSIZ; + strm.next_out = out_buf; + strm.avail_out = IO_BUFFER_SIZE; } if (ret != LZMA_OK) { - // Check that there is no trailing garbage. This is - // needed for LZMA_Alone and raw streams. - if (ret == LZMA_STREAM_END && (t->strm.avail_in != 0 - || (!t->pair->src_eof && io_read( - t->pair, in_buf, 1) != 0))) - ret = LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) { - errmsg(V_ERROR, "%s: %s", t->pair->src_name, - str_strm_error(ret)); - break; + // Determine if the return value indicates that we + // won't continue coding. + const bool stop = ret != LZMA_NO_CHECK + && ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // First print the final progress info. + // This way the user sees more accurately + // where the error occurred. Note that we + // print this *before* the possible error + // message. + // + // FIXME: What if something goes wrong + // after this? + message_progress_end(strm.total_in, + strm.total_out, + ret == LZMA_STREAM_END); + + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (opt_mode != MODE_TEST && io_write(pair, + out_buf, IO_BUFFER_SIZE + - strm.avail_out)) + return false; } - assert(t->pair->src_eof); - success = true; - break; - } - } - - io_close(t->pair, success); - release_thread_data(t); - - return NULL; -} + if (ret == LZMA_STREAM_END) { + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. + if (strm.avail_in == 0 && (pair->src_eof + || io_read(pair, in_buf, 1) + == 0)) { + assert(pair->src_eof); + return true; + } + // FIXME: What about io_read() failing? -/////////////////////////////// -// Multiple threads per file // -/////////////////////////////// - -// TODO - -// I'm not sure what would the best way to implement this. Here's one -// possible way: -// - Reader thread would read the input data and control the coders threads. -// - Every coder thread is associated with input and output buffer pools. -// The input buffer pool is filled by reader thread, and the output buffer -// pool is emptied by the writer thread. -// - Writer thread writes the output data of the oldest living coder thread. -// -// The per-file thread started by the application's main thread is used as -// the reader thread. In the beginning, it starts the writer thread and the -// first coder thread. The coder thread would be left waiting for input from -// the reader thread, and the writer thread would be waiting for input from -// the coder thread. -// -// The reader thread reads the input data into a ring buffer, whose size -// depends on the value returned by lzma_chunk_size(). If the ring buffer -// gets full, the buffer is marked "to be finished", which indicates to -// the coder thread that no more input is coming. Then a new coder thread -// would be started. -// -// TODO - -/* -typedef struct { - /// Buffers - uint8_t (*buffers)[BUFSIZ]; - - /// Number of buffers - size_t buffer_count; - - /// buffers[read_pos] is the buffer currently being read. Once finish - /// is true and read_pos == write_pos, end of input has been reached. - size_t read_pos; - - /// buffers[write_pos] is the buffer into which data is currently - /// being written. - size_t write_pos; - - /// This variable matters only when read_pos == write_pos && finish. - /// In that case, this variable will contain the size of the - /// buffers[read_pos]. - size_t last_size; - - /// True once no more data is being written to the buffer. When this - /// is set, the last_size variable must have been set too. - bool finish; - - /// Mutex to protect access to the variables in this structure - pthread_mutex_t mutex; - - /// Condition to indicate when another thread can continue - pthread_cond_t cond; -} mem_pool; - - -static foo -multi_reader(thread_data *t) -{ - bool done = false; - - do { - const size_t size = io_read(t->pair, - m->buffers + m->write_pos, BUFSIZ); - if (size == SIZE_MAX) { - // TODO - } else if (t->pair->src_eof) { - m->last_size = size; - } - - pthread_mutex_lock(&m->mutex); - - if (++m->write_pos == m->buffer_count) - m->write_pos = 0; - - if (m->write_pos == m->read_pos || t->pair->src_eof) - m->finish = true; - - pthread_cond_signal(&m->cond); - pthread_mutex_unlock(&m->mutex); - - } while (!m->finish); - - return done ? 0 : -1; -} - - -static foo -multi_code() -{ - lzma_action = LZMA_RUN; - - while (true) { - pthread_mutex_lock(&m->mutex); + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } - while (m->read_pos == m->write_pos && !m->finish) - pthread_cond_wait(&m->cond, &m->mutex); + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + } else { + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } - pthread_mutex_unlock(&m->mutex); + if (ret == LZMA_MEMLIMIT_ERROR) { + // Figure out how much memory would have + // actually needed. + // TODO + } - if (m->finish) { - t->strm.avail_in = m->last_size; - if (opt_mode == MODE_COMPRESS) - action = LZMA_FINISH; - } else { - t->strm.avail_in = BUFSIZ; + if (stop) + return false; } - t->strm.next_in = m->buffers + m->read_pos; - - const lzma_ret ret = lzma_code(&t->strm, action); - + // Show progress information if --verbose was specified and + // stderr is a terminal. + message_progress_update(strm.total_in, strm.total_out); } -} - -*/ + return false; +} -/////////////////////// -// Starting new file // -/////////////////////// extern void process_file(const char *filename) { - thread_data *t = get_thread_data(); - if (t == NULL) - return; // User abort - - // If this fails, it shows appropriate error messages too. - t->pair = io_open(filename); - if (t->pair == NULL) { - release_thread_data(t); + // First try initializing the coder. If it fails, it's useless to try + // opening the file. Check also for user_abort just in case if we had + // got a signal while initializing the coder. + if (coder_init() || user_abort) return; - } - // TODO Currently only one-thread-per-file mode is implemented. + // Try to open the input and output files. + file_pair *pair = io_open(filename); + if (pair == NULL) + return; - if (create_thread(&single, t)) { - io_close(t->pair, false); - release_thread_data(t); - } + // Do the actual coding. + const bool success = coder_run(pair); + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); return; } diff --git a/src/lzma/process.h b/src/lzma/process.h index 7fdfbce6..de23eacb 100644 --- a/src/lzma/process.h +++ b/src/lzma/process.h @@ -23,6 +23,46 @@ #include "private.h" +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, + // HEADER_GZIP, + FORMAT_RAW, +}; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(size_t new_preset); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// +extern void coder_set_compression_settings(void); + extern void process_init(void); extern void process_file(const char *filename); diff --git a/src/lzma/suffix.c b/src/lzma/suffix.c index 460acee2..0d46855a 100644 --- a/src/lzma/suffix.c +++ b/src/lzma/suffix.c @@ -20,6 +20,9 @@ #include "private.h" +static char *custom_suffix = NULL; + + struct suffix_pair { const char *compressed; const char *uncompressed; @@ -74,8 +77,8 @@ uncompressed_name(const char *src_name, const size_t src_len) if (opt_format == FORMAT_RAW) { // Don't check for known suffixes when --format=raw was used. - if (opt_suffix == NULL) { - errmsg(V_ERROR, _("%s: With --format=raw, " + if (custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " "--suffix=.SUF is required unless " "writing to stdout"), src_name); return NULL; @@ -91,21 +94,17 @@ uncompressed_name(const char *src_name, const size_t src_len) } } - if (new_len == 0 && opt_suffix != NULL) - new_len = test_suffix(opt_suffix, src_name, src_len); + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); if (new_len == 0) { - errmsg(V_WARNING, _("%s: Filename has an unknown suffix, " + message_warning(_("%s: Filename has an unknown suffix, " "skipping"), src_name); return NULL; } const size_t new_suffix_len = strlen(new_suffix); - char *dest_name = malloc(new_len + new_suffix_len + 1); - if (dest_name == NULL) { - out_of_memory(); - return NULL; - } + char *dest_name = xmalloc(new_len + new_suffix_len + 1); memcpy(dest_name, src_name, new_len); memcpy(dest_name + new_len, new_suffix, new_suffix_len); @@ -154,7 +153,7 @@ compressed_name(const char *src_name, const size_t src_len) for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { if (test_suffix(suffixes[i].compressed, src_name, src_len) != 0) { - errmsg(V_WARNING, _("%s: File already has `%s' " + message_warning(_("%s: File already has `%s' " "suffix, skipping"), src_name, suffixes[i].compressed); return NULL; @@ -163,22 +162,18 @@ compressed_name(const char *src_name, const size_t src_len) // TODO: Hmm, maybe it would be better to validate this in args.c, // since the suffix handling when decoding is weird now. - if (opt_format == FORMAT_RAW && opt_suffix == NULL) { - errmsg(V_ERROR, _("%s: With --format=raw, " + if (opt_format == FORMAT_RAW && custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " "--suffix=.SUF is required unless " "writing to stdout"), src_name); return NULL; } - const char *suffix = opt_suffix != NULL - ? opt_suffix : suffixes[0].compressed; + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0].compressed; const size_t suffix_len = strlen(suffix); - char *dest_name = malloc(src_len + suffix_len + 1); - if (dest_name == NULL) { - out_of_memory(); - return NULL; - } + char *dest_name = xmalloc(src_len + suffix_len + 1); memcpy(dest_name, src_name, src_len); memcpy(dest_name + src_len, suffix, suffix_len); @@ -189,7 +184,7 @@ compressed_name(const char *src_name, const size_t src_len) extern char * -get_dest_name(const char *src_name) +suffix_get_dest_name(const char *src_name) { assert(src_name != NULL); @@ -201,3 +196,18 @@ get_dest_name(const char *src_name) ? compressed_name(src_name, src_len) : uncompressed_name(src_name, src_len); } + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a slash are rejected. Such + // suffixes would break things later. + if (suffix[0] == '\0' || strchr(suffix, '/') != NULL) + message_fatal(_("%s: Invalid filename suffix"), optarg); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} diff --git a/src/lzma/suffix.h b/src/lzma/suffix.h index 08315659..c92b92dc 100644 --- a/src/lzma/suffix.h +++ b/src/lzma/suffix.h @@ -20,6 +20,21 @@ #ifndef SUFFIX_H #define SUFFIX_H -extern char *get_dest_name(const char *src_name); +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); #endif diff --git a/src/lzma/util.c b/src/lzma/util.c index 4bdbf8ec..13b67925 100644 --- a/src/lzma/util.c +++ b/src/lzma/util.c @@ -20,17 +20,29 @@ #include "private.h" -/// \brief Fancy version of strtoull() -/// -/// \param name Name of the option to show in case of an error -/// \param value String containing the number to be parsed; may -/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" -/// \param min Minimum valid value -/// \param max Maximum valid value -/// -/// \return Parsed value that is in the range [min, max]. Does not return -/// if an error occurs. -/// +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + ptr = realloc(ptr, size); + if (ptr == NULL) + message_fatal("%s", strerror(errno)); + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + extern uint64_t str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) { @@ -40,12 +52,9 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) while (*value == ' ' || *value == '\t') ++value; - if (*value < '0' || *value > '9') { - errmsg(V_ERROR, _("%s: Value is not a non-negative " - "decimal integer"), - value); - my_exit(ERROR); - } + if (*value < '0' || *value > '9') + message_fatal(_("%s: Value is not a non-negative " + "decimal integer"), value); do { // Don't overflow. @@ -86,12 +95,11 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) } if (multiplier == 0) { - errmsg(V_ERROR, _("%s: Invalid multiplier suffix. " + message(V_ERROR, _("%s: Invalid multiplier suffix. " "Valid suffixes:"), value); - errmsg(V_ERROR, "`k' (10^3), `M' (10^6), `G' (10^9) " + message_fatal("`k' (10^3), `M' (10^6), `G' (10^9) " "`Ki' (2^10), `Mi' (2^20), " "`Gi' (2^30)"); - my_exit(ERROR); } // Don't overflow here either. @@ -107,32 +115,10 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) return result; error: - errmsg(V_ERROR, _("Value of the option `%s' must be in the range " + message_fatal(_("Value of the option `%s' must be in the range " "[%llu, %llu]"), name, (unsigned long long)(min), (unsigned long long)(max)); - my_exit(ERROR); -} - - -/// \brief Gets filename part from pathname+filename -/// -/// \return Pointer in the filename where the actual filename starts. -/// If the last character is a slash, NULL is returned. -/// -extern const char * -str_filename(const char *name) -{ - const char *base = strrchr(name, '/'); - - if (base == NULL) { - base = name; - } else if (*++base == '\0') { - base = NULL; - errmsg(V_ERROR, _("%s: Invalid filename"), name); - } - - return base; } @@ -179,9 +165,35 @@ extern bool is_empty_filename(const char *filename) { if (filename[0] == '\0') { - errmsg(V_WARNING, _("Empty filename, skipping")); + message_error(_("Empty filename, skipping")); return true; } return false; } + + +extern bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data not read from a terminal " + "unless `--force' is used.")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data not written to a terminal " + "unless `--force' is used.")); + + return ret; +} diff --git a/src/lzma/util.h b/src/lzma/util.h index 91bd9ba3..dca62b26 100644 --- a/src/lzma/util.h +++ b/src/lzma/util.h @@ -20,13 +20,52 @@ #ifndef UTIL_H #define UTIL_H -#include "private.h" +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + +/// \brief Safe realloc() that never returns NULL +extern void *xrealloc(void *ptr, size_t size); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// extern uint64_t str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max); -extern const char *str_filename(const char *filename); +/// \brief Check if filename is empty and print an error message extern bool is_empty_filename(const char *filename); + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); + #endif diff --git a/src/lzmadec/lzmadec.c b/src/lzmadec/lzmadec.c index eab00544..0b2adb97 100644 --- a/src/lzmadec/lzmadec.c +++ b/src/lzmadec/lzmadec.c @@ -19,12 +19,7 @@ #include "sysdefs.h" -#ifdef HAVE_ERRNO_H -# include <errno.h> -#else -extern int errno; -#endif - +#include <errno.h> #include <stdio.h> #include <unistd.h> @@ -65,7 +60,7 @@ static uint8_t out_buf[BUFSIZ]; static lzma_stream strm = LZMA_STREAM_INIT; /// Number of bytes to use memory at maximum -static size_t memlimit; +static uint64_t memlimit; /// Program name to be shown in error messages static const char *argv0; @@ -94,8 +89,8 @@ help(void) " -d, --decompress (ignored)\n" " -k, --keep (ignored)\n" " -f, --force allow reading compressed data from a terminal\n" -" -M, --memory=NUM use NUM bytes of memory at maximum; the suffixes\n" -" k, M, G, Ki, Mi, and Gi are supported.\n" +" -M, --memory=NUM use NUM bytes of memory at maximum (0 means default);\n" +" the suffixes k, M, G, Ki, Mi, and Gi are supported.\n" " --format=FMT accept only files in the given file format;\n" " possible FMTs are `auto', `native', and alone',\n" " -h, --help display this help and exit\n" @@ -141,20 +136,14 @@ version(void) static void set_default_memlimit(void) { - uint64_t mem = physmem(); - if (mem != 0) { - mem /= 3; + const uint64_t mem = physmem(); -#if UINT64_MAX > SIZE_MAX - if (mem > SIZE_MAX) - mem = SIZE_MAX; -#endif - - memlimit = mem / 3; - } else { + if (mem == 0) // Cannot autodetect, use 10 MiB as the default limit. memlimit = (1U << 23) + (1U << 21); - } + else + // Limit is 33 % of RAM. + memlimit = mem / 3; return; } @@ -165,7 +154,7 @@ set_default_memlimit(void) /// This is rudely copied from src/lzma/util.c and modified a little. :-( /// static size_t -str_to_size(const char *value) +str_to_uint64(const char *value) { size_t result = 0; @@ -263,7 +252,10 @@ parse_options(int argc, char **argv) break; case 'M': - memlimit = str_to_size(optarg); + memlimit = str_to_uint64(optarg); + if (memlimit == 0) + set_default_memlimit(); + break; case 'h': |