aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/bswap.h15
-rw-r--r--src/common/physmem.h4
-rw-r--r--src/common/sysdefs.h12
-rw-r--r--src/liblzma/api/lzma/block.h47
-rw-r--r--src/liblzma/api/lzma/filter.h8
-rw-r--r--src/liblzma/api/lzma/index.h20
-rw-r--r--src/liblzma/api/lzma/index_hash.h4
-rw-r--r--src/liblzma/common/block_decoder.c59
-rw-r--r--src/liblzma/common/block_encoder.c41
-rw-r--r--src/liblzma/common/block_header_decoder.c31
-rw-r--r--src/liblzma/common/block_header_encoder.c69
-rw-r--r--src/liblzma/common/block_util.c45
-rw-r--r--src/liblzma/common/common.h8
-rw-r--r--src/liblzma/common/filter_common.c4
-rw-r--r--src/liblzma/common/index.c259
-rw-r--r--src/liblzma/common/index.h33
-rw-r--r--src/liblzma/common/index_decoder.c31
-rw-r--r--src/liblzma/common/index_encoder.c16
-rw-r--r--src/liblzma/common/index_hash.c68
-rw-r--r--src/liblzma/common/stream_decoder.c9
-rw-r--r--src/liblzma/common/stream_encoder.c6
-rw-r--r--src/liblzma/lz/lz_decoder.h4
-rw-r--r--src/liblzma/subblock/subblock_decoder.c3
-rw-r--r--src/lzma/Makefile.am9
-rw-r--r--src/lzma/alloc.c106
-rw-r--r--src/lzma/alloc.h42
-rw-r--r--src/lzma/args.c531
-rw-r--r--src/lzma/args.h42
-rw-r--r--src/lzma/error.c162
-rw-r--r--src/lzma/error.h67
-rw-r--r--src/lzma/hardware.c75
-rw-r--r--src/lzma/hardware.h16
-rw-r--r--src/lzma/help.c170
-rw-r--r--src/lzma/help.h32
-rw-r--r--src/lzma/io.c757
-rw-r--r--src/lzma/io.h51
-rw-r--r--src/lzma/main.c392
-rw-r--r--src/lzma/main.h60
-rw-r--r--src/lzma/message.c892
-rw-r--r--src/lzma/message.h132
-rw-r--r--src/lzma/options.c42
-rw-r--r--src/lzma/options.h6
-rw-r--r--src/lzma/private.h28
-rw-r--r--src/lzma/process.c525
-rw-r--r--src/lzma/process.h40
-rw-r--r--src/lzma/suffix.c52
-rw-r--r--src/lzma/suffix.h17
-rw-r--r--src/lzma/util.c100
-rw-r--r--src/lzma/util.h43
-rw-r--r--src/lzmadec/lzmadec.c36
50 files changed, 2963 insertions, 2258 deletions
diff --git a/src/common/bswap.h b/src/common/bswap.h
index 8f82a8f4..f5cb8345 100644
--- a/src/common/bswap.h
+++ b/src/common/bswap.h
@@ -16,20 +16,29 @@
// NOTE: We assume that config.h is already #included.
-// byteswap.h is a GNU extension. It contains inline assembly versions
-// for byteswapping. When byteswap.h is not available, we use generic code.
+// At least glibc has byteswap.h which contains inline assembly code for
+// byteswapping. Some systems have byteswap.h but lack one or more of the
+// bswap_xx macros/functions, which is why we check them separately even
+// if byteswap.h is available.
+
#ifdef HAVE_BYTESWAP_H
# include <byteswap.h>
-#else
+#endif
+
+#ifndef HAVE_BSWAP_16
# define bswap_16(num) \
(((num) << 8) | ((num) >> 8))
+#endif
+#ifndef HAVE_BSWAP_32
# define bswap_32(num) \
( (((num) << 24) ) \
| (((num) << 8) & UINT32_C(0x00FF0000)) \
| (((num) >> 8) & UINT32_C(0x0000FF00)) \
| (((num) >> 24) ) )
+#endif
+#ifndef HAVE_BSWAP_64
# define bswap_64(num) \
( (((num) << 56) ) \
| (((num) << 40) & UINT64_C(0x00FF000000000000)) \
diff --git a/src/common/physmem.h b/src/common/physmem.h
index 597227ac..04a7ab4b 100644
--- a/src/common/physmem.h
+++ b/src/common/physmem.h
@@ -23,6 +23,10 @@
# endif
#endif
+#if defined(HAVE_PHYSMEM_SYSCONF) || defined(HAVE_NCPU_SYSCONF)
+# include <unistd.h>
+#endif
+
/// \brief Get the amount of physical memory in bytes
///
diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h
index 7f935f67..47a49fde 100644
--- a/src/common/sysdefs.h
+++ b/src/common/sysdefs.h
@@ -111,6 +111,7 @@
#endif
#include <stdlib.h>
+#include <assert.h>
// Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written
// so that it works with fake bool type, for example:
@@ -134,17 +135,6 @@ typedef unsigned char _Bool;
# define __bool_true_false_are_defined 1
#endif
-#ifdef HAVE_ASSERT_H
-# include <assert.h>
-#else
-# ifdef NDEBUG
-# define assert(x)
-# else
- // TODO: Pretty bad assert macro.
-# define assert(x) (!(x) && abort())
-# endif
-#endif
-
// string.h should be enough but let's include strings.h and memory.h too if
// they exists, since that shouldn't do any harm, but may improve portability.
#ifdef HAVE_STRING_H
diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h
index eb3768e2..06c1633c 100644
--- a/src/liblzma/api/lzma/block.h
+++ b/src/liblzma/api/lzma/block.h
@@ -1,6 +1,6 @@
/**
* \file lzma/block.h
- * \brief .lzma Block handling
+ * \brief .xz Block handling
*
* \author Copyright (C) 1999-2006 Igor Pavlov
* \author Copyright (C) 2007 Lasse Collin
@@ -131,11 +131,10 @@ typedef struct {
*
* \note Because of the array is terminated with
* .id = LZMA_VLI_UNKNOWN, the actual array must
- * have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block
+ * have LZMA_FILTERS_MAX + 1 members or the Block
* Header decoder will overflow the buffer.
*/
lzma_filter *filters;
-# define LZMA_BLOCK_FILTERS_MAX 4
} lzma_block;
@@ -148,6 +147,8 @@ typedef struct {
* The size can be calculated from the first byte of a Block using this macro.
* Note that if the first byte is 0x00, it indicates beginning of Index; use
* this macro only when the byte is not 0x00.
+ *
+ * There is no encoding macro, because Block Header encoder is enough for that.
*/
#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4)
@@ -211,38 +212,50 @@ extern lzma_ret lzma_block_header_decode(lzma_block *options,
/**
- * \brief Sets Compressed Size according to Total Size
+ * \brief Sets Compressed Size according to Unpadded Size
*
- * Block Header stores Compressed Size, but Index has Total Size. If the
+ * Block Header stores Compressed Size, but Index has Unpadded Size. If the
* application has already parsed the Index and is now decoding Blocks,
- * it can calculate Compressed Size from Total Size. This function does
+ * it can calculate Compressed Size from Unpadded Size. This function does
* exactly that with error checking, so application doesn't need to check,
* for example, if the value in Index is too small to contain even the
- * Block Header. Note that you need to call this function after decoding
+ * Block Header. Note that you need to call this function _after_ decoding
* the Block Header field.
*
* \return - LZMA_OK: options->compressed_size was set successfully.
- * - LZMA_DATA_ERROR: total_size is too small compared to
+ * - LZMA_DATA_ERROR: unpadded_size is too small compared to
* options->header_size and lzma_check_sizes[options->check].
* - LZMA_PROG_ERROR: Some values are invalid. For example,
- * total_size and options->header_size must be multiples
- * of four, total_size must be at least 12, and
+ * options->header_size must be a multiple of four, and
* options->header_size between 8 and 1024 inclusive.
*/
-extern lzma_ret lzma_block_total_size_set(
- lzma_block *options, lzma_vli total_size)
+extern lzma_ret lzma_block_compressed_size(
+ lzma_block *options, lzma_vli unpadded_size)
lzma_attr_warn_unused_result;
/**
- * \brief Calculates Total Size
+ * \brief Calculates Unpadded Size
*
- * This function can be useful after decoding a Block to get Total Size
+ * This function can be useful after decoding a Block to get Unpadded Size
* that is stored in Index.
*
- * \return Total Size on success, or zero on error.
+ * \return Unpadded Size on success, or zero on error.
+ */
+extern lzma_vli lzma_block_unpadded_size(const lzma_block *options)
+ lzma_attr_pure;
+
+
+/**
+ * \brief Calculates the total encoded size of a Block
+ *
+ * This is equivalent to lzma_block_unpadded_size() except that the returned
+ * value includes the size of the Block Padding field.
+ *
+ * \return On success, total encoded size of the Block. On error,
+ * zero is returned.
*/
-extern lzma_vli lzma_block_total_size_get(const lzma_block *options)
+extern lzma_vli lzma_block_total_size(const lzma_block *options)
lzma_attr_pure;
@@ -255,8 +268,6 @@ extern lzma_vli lzma_block_total_size_get(const lzma_block *options)
* \return - LZMA_OK: All good, continue with lzma_code().
* - LZMA_MEM_ERROR
* - LZMA_OPTIONS_ERROR
- * - LZMA_DATA_ERROR: Limits (total_limit and uncompressed_limit)
- * have been reached already.
* - LZMA_UNSUPPORTED_CHECK: options->check specfies a Check
* that is not supported by this buid of liblzma. Initializing
* the encoder failed.
diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h
index 53e5737e..b4fb02a7 100644
--- a/src/liblzma/api/lzma/filter.h
+++ b/src/liblzma/api/lzma/filter.h
@@ -55,6 +55,14 @@ typedef struct {
/**
+ * \brief Maximum number of filters in a chain
+ *
+ * FIXME desc
+ */
+#define LZMA_FILTERS_MAX 4
+
+
+/**
* \brief Test if the given Filter ID is supported for encoding
*
* Returns true if the give Filter ID is supported for encoding by this
diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h
index 522969d4..d6072614 100644
--- a/src/liblzma/api/lzma/index.h
+++ b/src/liblzma/api/lzma/index.h
@@ -32,12 +32,24 @@ typedef struct lzma_index_s lzma_index;
*/
typedef struct {
/**
- * Total Size of a Block.
+ * \brief Total encoded size of a Block including Block Padding
+ *
+ * This value is useful if you need to know the actual size of the
+ * Block that the Block decoder will read.
*/
lzma_vli total_size;
/**
- * Uncompressed Size of a Block
+ * \brief Encoded size of a Block excluding Block Padding
+ *
+ * This value is stored in the Index. When doing random-access
+ * reading, you should give this value to the Block decoder along
+ * with uncompressed_size.
+ */
+ lzma_vli unpadded_size;
+
+ /**
+ * \brief Uncompressed Size of a Block
*/
lzma_vli uncompressed_size;
@@ -80,7 +92,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator);
* \brief Add a new Record to an Index
*
* \param index Pointer to a lzma_index structure
- * \param total_size Total Size of a Block
+ * \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block, or
* LZMA_VLI_UNKNOWN to indicate padding.
*
@@ -92,7 +104,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator);
* - LZMA_PROG_ERROR
*/
extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator,
- lzma_vli total_size, lzma_vli uncompressed_size)
+ lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_attr_warn_unused_result;
diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h
index 58fc8061..001e6b5c 100644
--- a/src/liblzma/api/lzma/index_hash.h
+++ b/src/liblzma/api/lzma/index_hash.h
@@ -57,7 +57,7 @@ extern void lzma_index_hash_end(
* \brief Add a new Record to an Index hash
*
* \param index Pointer to a lzma_index_hash structure
- * \param total_size Total Size of a Block
+ * \param unpadded_size Unpadded Size of a Block
* \param uncompressed_size Uncompressed Size of a Block
*
* \return - LZMA_OK
@@ -67,7 +67,7 @@ extern void lzma_index_hash_end(
* used when lzma_index_hash_decode() has already been used.
*/
extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash,
- lzma_vli total_size, lzma_vli uncompressed_size)
+ lzma_vli unpadded_size, lzma_vli uncompressed_size)
lzma_attr_warn_unused_result;
diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c
index f9101c7d..2bfe0b92 100644
--- a/src/liblzma/common/block_decoder.c
+++ b/src/liblzma/common/block_decoder.c
@@ -33,13 +33,13 @@ struct lzma_coder_s {
lzma_next_coder next;
/// Decoding options; we also write Compressed Size and Uncompressed
- /// Size back to this structure when the encoding has been finished.
+ /// Size back to this structure when the decoding has been finished.
lzma_block *options;
- /// Compressed Size calculated while encoding
+ /// Compressed Size calculated while decoding
lzma_vli compressed_size;
- /// Uncompressed Size calculated while encoding
+ /// Uncompressed Size calculated while decoding
lzma_vli uncompressed_size;
/// Maximum allowed Compressed Size; this takes into account the
@@ -110,6 +110,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
if (ret != LZMA_STREAM_END)
return ret;
+ // Compressed and Uncompressed Sizes are now at their final
+ // values. Verify that they match the values given to us.
+ if (!is_size_valid(coder->compressed_size,
+ coder->options->compressed_size)
+ || !is_size_valid(coder->uncompressed_size,
+ coder->options->uncompressed_size))
+ return LZMA_DATA_ERROR;
+
+ // Copy the values into coder->options. The caller
+ // may use this information to construct Index.
+ coder->options->compressed_size = coder->compressed_size;
+ coder->options->uncompressed_size = coder->uncompressed_size;
+
coder->sequence = SEQ_PADDING;
}
@@ -118,30 +131,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator,
case SEQ_PADDING:
// Compressed Data is padded to a multiple of four bytes.
while (coder->compressed_size & 3) {
+ // We use compressed_size here just get the Padding
+ // right. The actual Compressed Size was stored to
+ // coder->options already, and won't be modified by
+ // us anymore.
+ ++coder->compressed_size;
+
if (*in_pos >= in_size)
return LZMA_OK;
if (in[(*in_pos)++] != 0x00)
return LZMA_DATA_ERROR;
-
- if (update_size(&coder->compressed_size, 1,
- coder->compressed_limit))
- return LZMA_DATA_ERROR;
}
- // Compressed and Uncompressed Sizes are now at their final
- // values. Verify that they match the values given to us.
- if (!is_size_valid(coder->compressed_size,
- coder->options->compressed_size)
- || !is_size_valid(coder->uncompressed_size,
- coder->options->uncompressed_size))
- return LZMA_DATA_ERROR;
-
- // Copy the values into coder->options. The caller
- // may use this information to construct Index.
- coder->options->compressed_size = coder->compressed_size;
- coder->options->uncompressed_size = coder->uncompressed_size;
-
if (coder->options->check == LZMA_CHECK_NONE)
return LZMA_STREAM_END;
@@ -193,14 +195,11 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
{
lzma_next_coder_init(lzma_block_decoder_init, next, allocator);
- // While lzma_block_total_size_get() is meant to calculate the Total
- // Size, it also validates the options excluding the filters.
- if (lzma_block_total_size_get(options) == 0)
- return LZMA_PROG_ERROR;
-
- // options->check is used for array indexing so we need to know that
- // it is in the valid range.
- if ((unsigned)(options->check) > LZMA_CHECK_ID_MAX)
+ // Validate the options. lzma_block_unpadded_size() does that for us
+ // except for Uncompressed Size and filters. Filters are validated
+ // by the raw decoder.
+ if (lzma_block_unpadded_size(options) == 0
+ || !lzma_vli_is_valid(options->uncompressed_size))
return LZMA_PROG_ERROR;
// Allocate and initialize *next->coder if needed.
@@ -221,8 +220,8 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
next->coder->uncompressed_size = 0;
// If Compressed Size is not known, we calculate the maximum allowed
- // value so that Total Size of the Block still is a valid VLI and
- // a multiple of four.
+ // value so that encoded size of the Block (including Block Padding)
+ // is still a valid VLI and a multiple of four.
next->coder->compressed_limit
= options->compressed_size == LZMA_VLI_UNKNOWN
? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c
index 3c678f7d..6468cb44 100644
--- a/src/liblzma/common/block_encoder.c
+++ b/src/liblzma/common/block_encoder.c
@@ -27,8 +27,8 @@
/// take into account the headers etc. to determine the exact maximum size
/// of the Compressed Data field, but the complexity would give us nothing
/// useful. Instead, limit the size of Compressed Data so that even with
-/// biggest possible Block Header and Check fields the total size of the
-/// Block stays as valid VLI. This way we don't produce incorrect output
+/// biggest possible Block Header and Check fields the total encoded size of
+/// the Block stays as valid VLI. This way we don't produce incorrect output
/// if someone will really try creating a Block of 8 EiB.
///
/// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of
@@ -41,9 +41,9 @@ struct lzma_coder_s {
/// The filters in the chain; initialized with lzma_raw_decoder_init().
lzma_next_coder next;
- /// Encoding options; we also write Total Size, Compressed Size, and
- /// Uncompressed Size back to this structure when the encoding has
- /// been finished.
+ /// Encoding options; we also write Unpadded Size, Compressed Size,
+ /// and Uncompressed Size back to this structure when the encoding
+ /// has been finished.
lzma_block *options;
enum {
@@ -58,8 +58,8 @@ struct lzma_coder_s {
/// Uncompressed Size calculated while encoding
lzma_vli uncompressed_size;
- /// Position when writing out the Check field
- size_t check_pos;
+ /// Position in Block Padding and the Check fields
+ size_t pos;
/// Check of the uncompressed data
lzma_check_state check;
@@ -106,6 +106,11 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
assert(*in_pos == in_size);
assert(action == LZMA_FINISH);
+ // Copy the values into coder->options. The caller
+ // may use this information to construct Index.
+ coder->options->compressed_size = coder->compressed_size;
+ coder->options->uncompressed_size = coder->uncompressed_size;
+
coder->sequence = SEQ_PADDING;
}
@@ -113,28 +118,21 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
case SEQ_PADDING:
// Pad Compressed Data to a multiple of four bytes.
- while (coder->compressed_size & 3) {
+ while ((coder->compressed_size + coder->pos) & 3) {
if (*out_pos >= out_size)
return LZMA_OK;
out[*out_pos] = 0x00;
++*out_pos;
-
- // No need to use check for overflow here since we
- // have already checked in SEQ_CODE that Compressed
- // Size will stay in proper limits.
- ++coder->compressed_size;
+ ++coder->pos;
}
- // Copy the values into coder->options. The caller
- // may use this information to construct Index.
- coder->options->compressed_size = coder->compressed_size;
- coder->options->uncompressed_size = coder->uncompressed_size;
-
if (coder->options->check == LZMA_CHECK_NONE)
return LZMA_STREAM_END;
lzma_check_finish(&coder->check, coder->options->check);
+
+ coder->pos = 0;
coder->sequence = SEQ_CHECK;
// Fall through
@@ -144,11 +142,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator,
= lzma_check_size(coder->options->check);
while (*out_pos < out_size) {
- out[*out_pos] = coder->check.buffer.u8[
- coder->check_pos];
+ out[*out_pos] = coder->check.buffer.u8[coder->pos];
++*out_pos;
- if (++coder->check_pos == check_size)
+ if (++coder->pos == check_size)
return LZMA_STREAM_END;
}
@@ -199,9 +196,9 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
next->coder->options = options;
next->coder->compressed_size = 0;
next->coder->uncompressed_size = 0;
+ next->coder->pos = 0;
// Initialize the check
- next->coder->check_pos = 0;
lzma_check_init(&next->coder->check, options->check);
// Initialize the requested filters.
diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c
index 3b8e9f36..8421ac37 100644
--- a/src/liblzma/common/block_header_decoder.c
+++ b/src/liblzma/common/block_header_decoder.c
@@ -27,7 +27,7 @@ free_properties(lzma_block *options, lzma_allocator *allocator)
// Free allocated filter options. The last array member is not
// touched after the initialization in the beginning of
// lzma_block_header_decode(), so we don't need to touch that here.
- for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) {
+ for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) {
lzma_free(options->filters[i].options, allocator);
options->filters[i].id = LZMA_VLI_UNKNOWN;
options->filters[i].options = NULL;
@@ -48,24 +48,19 @@ lzma_block_header_decode(lzma_block *options,
// Initialize the filter options array. This way the caller can
// safely free() the options even if an error occurs in this function.
- for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) {
+ for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
options->filters[i].id = LZMA_VLI_UNKNOWN;
options->filters[i].options = NULL;
}
- size_t in_size = options->header_size;
-
- // Validate. The caller must have set options->header_size with
- // lzma_block_header_size_decode() macro, so it is a programming error
- // if these tests fail.
- if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN
- || in_size > LZMA_BLOCK_HEADER_SIZE_MAX
- || (in_size & 3)
- || lzma_block_header_size_decode(in[0]) != in_size)
+ // Validate Block Header Size and Check type. The caller must have
+ // already set these, so it is a programming error if this test fails.
+ if (lzma_block_header_size_decode(in[0]) != options->header_size
+ || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
return LZMA_PROG_ERROR;
// Exclude the CRC32 field.
- in_size -= 4;
+ const size_t in_size = options->header_size - 4;
// Verify CRC32
if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size))
@@ -83,15 +78,9 @@ lzma_block_header_decode(lzma_block *options,
return_if_error(lzma_vli_decode(&options->compressed_size,
NULL, in, &in_pos, in_size));
- if (options->compressed_size > LZMA_VLI_MAX / 4 - 1)
- return LZMA_DATA_ERROR;
-
- options->compressed_size = (options->compressed_size + 1) * 4;
-
- // Check that Total Size (that is, size of
- // Block Header + Compressed Data + Check) is
- // representable as a VLI.
- if (lzma_block_total_size_get(options) == 0)
+ // Validate Compressed Size. This checks that it isn't zero
+ // and that the total size of the Block is a valid VLI.
+ if (lzma_block_unpadded_size(options) == 0)
return LZMA_DATA_ERROR;
} else {
options->compressed_size = LZMA_VLI_UNKNOWN;
diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c
index 9326350b..b9980363 100644
--- a/src/liblzma/common/block_header_encoder.c
+++ b/src/liblzma/common/block_header_encoder.c
@@ -25,21 +25,20 @@ extern LZMA_API lzma_ret
lzma_block_header_size(lzma_block *options)
{
// Block Header Size + Block Flags + CRC32.
- size_t size = 1 + 1 + 4;
+ uint32_t size = 1 + 1 + 4;
// Compressed Size
if (options->compressed_size != LZMA_VLI_UNKNOWN) {
- if (options->compressed_size > LZMA_VLI_MAX / 4 - 1
- || options->compressed_size == 0
- || (options->compressed_size & 3))
+ const uint32_t add = lzma_vli_size(options->compressed_size);
+ if (add == 0 || options->compressed_size == 0)
return LZMA_PROG_ERROR;
- size += lzma_vli_size(options->compressed_size / 4 - 1);
+ size += add;
}
// Uncompressed Size
if (options->uncompressed_size != LZMA_VLI_UNKNOWN) {
- const size_t add = lzma_vli_size(options->uncompressed_size);
+ const uint32_t add = lzma_vli_size(options->uncompressed_size);
if (add == 0)
return LZMA_PROG_ERROR;
@@ -51,10 +50,9 @@ lzma_block_header_size(lzma_block *options)
|| options->filters[0].id == LZMA_VLI_UNKNOWN)
return LZMA_PROG_ERROR;
- for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN;
- ++i) {
+ for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
// Don't allow too many filters.
- if (i == 4)
+ if (i == LZMA_FILTERS_MAX)
return LZMA_PROG_ERROR;
uint32_t add;
@@ -65,12 +63,13 @@ lzma_block_header_size(lzma_block *options)
}
// Pad to a multiple of four bytes.
- options->header_size = (size + 3) & ~(size_t)(3);
+ options->header_size = (size + 3) & ~UINT32_C(3);
- // NOTE: We don't verify that Total Size of the Block stays within
- // limits. This is because it is possible that we are called with
- // exaggerated values to reserve space for Block Header, and later
- // called again with lower, real values.
+ // NOTE: We don't verify that the encoded size of the Block stays
+ // within limits. This is because it is possible that we are called
+ // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve
+ // space for Block Header, and later called again with lower,
+ // real values.
return LZMA_OK;
}
@@ -79,9 +78,9 @@ lzma_block_header_size(lzma_block *options)
extern LZMA_API lzma_ret
lzma_block_header_encode(const lzma_block *options, uint8_t *out)
{
- if ((options->header_size & 3)
- || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
- || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
+ // Valdidate everything but filters.
+ if (lzma_block_unpadded_size(options) == 0
+ || !lzma_vli_is_valid(options->uncompressed_size))
return LZMA_PROG_ERROR;
// Indicate the size of the buffer _excluding_ the CRC32 field.
@@ -90,32 +89,28 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out)
// Store the Block Header Size.
out[0] = out_size / 4;
- // We write Block Flags a little later.
+ // We write Block Flags in pieces.
+ out[1] = 0x00;
size_t out_pos = 2;
// Compressed Size
if (options->compressed_size != LZMA_VLI_UNKNOWN) {
- // Compressed Size must be non-zero, fit into a 63-bit
- // integer and be a multiple of four. Also the Total Size
- // of the Block must fit into 63-bit integer.
- if (options->compressed_size == 0
- || (options->compressed_size & 3)
- || options->compressed_size
- > LZMA_VLI_MAX
- || lzma_block_total_size_get(options) == 0)
- return LZMA_PROG_ERROR;
-
return_if_error(lzma_vli_encode(
- options->compressed_size / 4 - 1, NULL,
+ options->compressed_size, NULL,
out, &out_pos, out_size));
+
+ out[1] |= 0x40;
}
// Uncompressed Size
- if (options->uncompressed_size != LZMA_VLI_UNKNOWN)
+ if (options->uncompressed_size != LZMA_VLI_UNKNOWN) {
return_if_error(lzma_vli_encode(
options->uncompressed_size, NULL,
out, &out_pos, out_size));
+ out[1] |= 0x80;
+ }
+
// Filter Flags
if (options->filters == NULL
|| options->filters[0].id == LZMA_VLI_UNKNOWN)
@@ -124,24 +119,16 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out)
size_t filter_count = 0;
do {
// There can be at maximum of four filters.
- if (filter_count == 4)
+ if (filter_count == LZMA_FILTERS_MAX)
return LZMA_PROG_ERROR;
return_if_error(lzma_filter_flags_encode(
options->filters + filter_count,
out, &out_pos, out_size));
- } while (options->filters[++filter_count].id
- != LZMA_VLI_UNKNOWN);
-
- // Block Flags
- out[1] = filter_count - 1;
+ } while (options->filters[++filter_count].id != LZMA_VLI_UNKNOWN);
- if (options->compressed_size != LZMA_VLI_UNKNOWN)
- out[1] |= 0x40;
-
- if (options->uncompressed_size != LZMA_VLI_UNKNOWN)
- out[1] |= 0x80;
+ out[1] |= filter_count - 1;
// Padding
memzero(out + out_pos, out_size - out_pos);
diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c
index 7b46ba32..66e1cad9 100644
--- a/src/liblzma/common/block_util.c
+++ b/src/liblzma/common/block_util.c
@@ -18,10 +18,11 @@
///////////////////////////////////////////////////////////////////////////////
#include "common.h"
+#include "index.h"
extern LZMA_API lzma_ret
-lzma_block_total_size_set(lzma_block *options, lzma_vli total_size)
+lzma_block_compressed_size(lzma_block *options, lzma_vli total_size)
{
// Validate.
if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
@@ -45,29 +46,47 @@ lzma_block_total_size_set(lzma_block *options, lzma_vli total_size)
extern LZMA_API lzma_vli
-lzma_block_total_size_get(const lzma_block *options)
+lzma_block_unpadded_size(const lzma_block *options)
{
- // Validate the values that we are interested in.
+ // Validate the values that we are interested in i.e. all but
+ // Uncompressed Size and the filters.
+ //
+ // NOTE: This function is used for validation too, so it is
+ // essential that these checks are always done even if
+ // Compressed Size is unknown.
if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
|| options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX
|| (options->header_size & 3)
- || (unsigned)(options->check) > LZMA_CHECK_ID_MAX)
+ || !lzma_vli_is_valid(options->compressed_size)
+ || options->compressed_size == 0
+ || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
return 0;
// If Compressed Size is unknown, return that we cannot know
- // Total Size either.
+ // size of the Block either.
if (options->compressed_size == LZMA_VLI_UNKNOWN)
return LZMA_VLI_UNKNOWN;
- const lzma_vli total_size = options->compressed_size
- + options->header_size
- + lzma_check_size(options->check);
+ // Calculate Unpadded Size and validate it.
+ const lzma_vli unpadded_size = options->compressed_size
+ + options->header_size
+ + lzma_check_size(options->check);
- // Validate the calculated Total Size.
- if (options->compressed_size > LZMA_VLI_MAX
- || (options->compressed_size & 3)
- || total_size > LZMA_VLI_MAX)
+ assert(unpadded_size >= UNPADDED_SIZE_MIN);
+ if (unpadded_size > UNPADDED_SIZE_MAX)
return 0;
- return total_size;
+ return unpadded_size;
+}
+
+
+extern LZMA_API lzma_vli
+lzma_block_total_size(const lzma_block *options)
+{
+ lzma_vli unpadded_size = lzma_block_unpadded_size(options);
+
+ if (unpadded_size != 0 && unpadded_size != LZMA_VLI_UNKNOWN)
+ unpadded_size = vli_ceil4(unpadded_size);
+
+ return unpadded_size;
}
diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h
index 275cf05f..0ee8574c 100644
--- a/src/liblzma/common/common.h
+++ b/src/liblzma/common/common.h
@@ -66,10 +66,6 @@
| LZMA_CONCATENATED )
-///////////
-// Types //
-///////////
-
/// Type of encoder/decoder specific data; the actual structure is defined
/// differently in different coders.
typedef struct lzma_coder_s lzma_coder;
@@ -187,10 +183,6 @@ struct lzma_internal_s {
};
-///////////////
-// Functions //
-///////////////
-
/// Allocates memory
extern void *lzma_alloc(size_t size, lzma_allocator *allocator)
lzma_attribute((malloc));
diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c
index 71ceeca0..03b6859a 100644
--- a/src/liblzma/common/filter_common.c
+++ b/src/liblzma/common/filter_common.c
@@ -164,7 +164,7 @@ validate_chain(const lzma_filter *filters, size_t *count)
// There must be 1-4 filters. The last filter must be usable as
// the last filter in the chain. At maximum of three filters are
// allowed to change the size of the data.
- if (i > LZMA_BLOCK_FILTERS_MAX || !last_ok || changes_size_count > 3)
+ if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3)
return LZMA_OPTIONS_ERROR;
*count = i;
@@ -182,7 +182,7 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator,
return_if_error(validate_chain(options, &count));
// Set the filter functions and copy the options pointer.
- lzma_filter_info filters[LZMA_BLOCK_FILTERS_MAX + 1];
+ lzma_filter_info filters[LZMA_FILTERS_MAX + 1];
if (is_encoder) {
for (size_t i = 0; i < count; ++i) {
// The order of the filters is reversed in the
diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c
index f965749f..1fe65650 100644
--- a/src/liblzma/common/index.c
+++ b/src/liblzma/common/index.c
@@ -20,24 +20,34 @@
#include "index.h"
-/// Number of Records to allocate at once.
+/// Number of Records to allocate at once in the unrolled list.
#define INDEX_GROUP_SIZE 256
typedef struct lzma_index_group_s lzma_index_group;
struct lzma_index_group_s {
- /// Next group
+ /// Previous group
lzma_index_group *prev;
- /// Previous group
+ /// Next group
lzma_index_group *next;
/// Index of the last Record in this group
size_t last;
- /// Total Size fields as cumulative sum relative to the beginning
- /// of the group. The total size of the group is total_sums[last].
- lzma_vli total_sums[INDEX_GROUP_SIZE];
+ /// Unpadded Size fields as special cumulative sum relative to the
+ /// beginning of the group. It's special in sense that the previous
+ /// value is rounded up the next multiple of four with before
+ /// calculating the new value. The total encoded size of the Blocks
+ /// in the group is unpadded_sums[last] rounded up to the next
+ /// multiple of four.
+ ///
+ /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored
+ /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total
+ /// encoded size of these Blocks is 184.
+ ///
+ /// This encoding is nice from point of view of lzma_index_locate().
+ lzma_vli unpadded_sums[INDEX_GROUP_SIZE];
/// Uncompressed Size fields as cumulative sum relative to the
/// beginning of the group. The uncompressed size of the group is
@@ -56,19 +66,13 @@ struct lzma_index_s {
/// Uncompressed size of the Stream
lzma_vli uncompressed_size;
- /// Number of non-padding records. This is needed by Index encoder.
+ /// Number of non-padding records. This is needed for Index encoder.
lzma_vli count;
/// Size of the List of Records field; this is updated every time
/// a new non-padding Record is added.
lzma_vli index_list_size;
- /// This is zero if no Indexes have been combined with
- /// lzma_index_cat(). With combined Indexes, this contains the sizes
- /// of all but latest the Streams, including possible Stream Padding
- /// fields.
- lzma_vli padding_size;
-
/// First group of Records
lzma_index_group *head;
@@ -80,8 +84,8 @@ struct lzma_index_s {
/// Group where the current read position is.
lzma_index_group *group;
- /// The most recently read record in *group
- lzma_vli record;
+ /// The most recently read Record in *group
+ size_t record;
/// Uncompressed offset of the beginning of *group relative
/// to the beginning of the Stream
@@ -102,6 +106,10 @@ struct lzma_index_s {
/// Stream. This is needed when a new Index is concatenated
/// to this lzma_index structure.
lzma_vli index_list_size;
+
+ /// Total size of all but the last Stream and all Stream
+ /// Padding fields.
+ lzma_vli streams_size;
} old;
};
@@ -136,12 +144,12 @@ lzma_index_init(lzma_index *i, lzma_allocator *allocator)
i->uncompressed_size = 0;
i->count = 0;
i->index_list_size = 0;
- i->padding_size = 0;
i->head = NULL;
i->tail = NULL;
i->current.group = NULL;
i->old.count = 0;
i->old.index_list_size = 0;
+ i->old.streams_size = 0;
return i;
}
@@ -195,12 +203,12 @@ lzma_index_file_size(const lzma_index *i)
{
// If multiple Streams are concatenated, the Stream Header, Index,
// and Stream Footer fields of all but the last Stream are already
- // included in padding_size. Thus, we need to calculate only the
+ // included in old.streams_size. Thus, we need to calculate only the
// size of the last Index, not all Indexes.
- return i->total_size + i->padding_size
+ return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size
+ index_size(i->count - i->old.count,
i->index_list_size - i->old.index_list_size)
- + LZMA_STREAM_HEADER_SIZE * 2;
+ + LZMA_STREAM_HEADER_SIZE;
}
@@ -219,10 +227,11 @@ lzma_index_padding_size(const lzma_index *i)
}
-/// Helper function for index_append()
+/// Appends a new Record to the Index. If needed, this allocates a new
+/// Record group.
static lzma_ret
index_append_real(lzma_index *i, lzma_allocator *allocator,
- lzma_vli total_size, lzma_vli uncompressed_size,
+ lzma_vli unpadded_size, lzma_vli uncompressed_size,
bool is_padding)
{
// Add the new record.
@@ -237,7 +246,7 @@ index_append_real(lzma_index *i, lzma_allocator *allocator,
g->prev = i->tail;
g->next = NULL;
g->last = 0;
- g->total_sums[0] = total_size;
+ g->unpadded_sums[0] = unpadded_size;
g->uncompressed_sums[0] = uncompressed_size;
g->paddings[0] = is_padding;
@@ -252,9 +261,9 @@ index_append_real(lzma_index *i, lzma_allocator *allocator,
} else {
// i->tail has space left for at least one record.
- i->tail->total_sums[i->tail->last + 1]
- = i->tail->total_sums[i->tail->last]
- + total_size;
+ i->tail->unpadded_sums[i->tail->last + 1]
+ = unpadded_size + vli_ceil4(
+ i->tail->unpadded_sums[i->tail->last]);
i->tail->uncompressed_sums[i->tail->last + 1]
= i->tail->uncompressed_sums[i->tail->last]
+ uncompressed_size;
@@ -266,13 +275,14 @@ index_append_real(lzma_index *i, lzma_allocator *allocator,
}
-static lzma_ret
-index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
- lzma_vli uncompressed_size, bool is_padding)
+extern LZMA_API lzma_ret
+lzma_index_append(lzma_index *i, lzma_allocator *allocator,
+ lzma_vli unpadded_size, lzma_vli uncompressed_size)
{
- if (total_size > LZMA_VLI_MAX
+ if (unpadded_size < UNPADDED_SIZE_MIN
+ || unpadded_size > UNPADDED_SIZE_MAX
|| uncompressed_size > LZMA_VLI_MAX)
- return LZMA_DATA_ERROR;
+ return LZMA_PROG_ERROR;
// This looks a bit ugly. We want to first validate that the Index
// and Stream stay in valid limits after adding this Record. After
@@ -280,65 +290,38 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size,
// slightly more correct to validate before allocating, YMMV).
lzma_ret ret;
- if (is_padding) {
- assert(uncompressed_size == 0);
+ // First update the overall info so we can validate it.
+ const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size)
+ + lzma_vli_size(uncompressed_size);
- // First update the info so we can validate it.
- i->padding_size += total_size;
-
- if (i->padding_size > LZMA_VLI_MAX
- || lzma_index_file_size(i) > LZMA_VLI_MAX)
- ret = LZMA_DATA_ERROR; // Would grow past the limits.
- else
- ret = index_append_real(i, allocator,
- total_size, uncompressed_size, true);
-
- // If something went wrong, undo the updated value.
- if (ret != LZMA_OK)
- i->padding_size -= total_size;
+ const lzma_vli total_size = vli_ceil4(unpadded_size);
- } else {
- // First update the overall info so we can validate it.
- const lzma_vli index_list_size_add
- = lzma_vli_size(total_size / 4 - 1)
- + lzma_vli_size(uncompressed_size);
-
- i->total_size += total_size;
- i->uncompressed_size += uncompressed_size;
- ++i->count;
- i->index_list_size += index_list_size_add;
-
- if (i->total_size > LZMA_VLI_MAX
- || i->uncompressed_size > LZMA_VLI_MAX
- || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
- || lzma_index_file_size(i) > LZMA_VLI_MAX)
- ret = LZMA_DATA_ERROR; // Would grow past the limits.
- else
- ret = index_append_real(i, allocator,
- total_size, uncompressed_size, false);
+ i->total_size += total_size;
+ i->uncompressed_size += uncompressed_size;
+ ++i->count;
+ i->index_list_size += index_list_size_add;
- if (ret != LZMA_OK) {
- // Something went wrong. Undo the updates.
- i->total_size -= total_size;
- i->uncompressed_size -= uncompressed_size;
- --i->count;
- i->index_list_size -= index_list_size_add;
- }
+ if (i->total_size > LZMA_VLI_MAX
+ || i->uncompressed_size > LZMA_VLI_MAX
+ || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX
+ || lzma_index_file_size(i) > LZMA_VLI_MAX)
+ ret = LZMA_DATA_ERROR; // Would grow past the limits.
+ else
+ ret = index_append_real(i, allocator, unpadded_size,
+ uncompressed_size, false);
+
+ if (ret != LZMA_OK) {
+ // Something went wrong. Undo the updates.
+ i->total_size -= total_size;
+ i->uncompressed_size -= uncompressed_size;
+ --i->count;
+ i->index_list_size -= index_list_size_add;
}
return ret;
}
-extern LZMA_API lzma_ret
-lzma_index_append(lzma_index *i, lzma_allocator *allocator,
- lzma_vli total_size, lzma_vli uncompressed_size)
-{
- return index_append(i, allocator,
- total_size, uncompressed_size, false);
-}
-
-
/// Initialize i->current to point to the first Record.
static bool
init_current(lzma_index *i)
@@ -370,10 +353,10 @@ previous_group(lzma_index *i)
i->current.record = i->current.group->last;
// Then update the offsets.
- i->current.stream_offset -= i->current.group
- ->total_sums[i->current.group->last];
- i->current.uncompressed_offset -= i->current.group
- ->uncompressed_sums[i->current.group->last];
+ i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[
+ i->current.group->last]);
+ i->current.uncompressed_offset -= i->current.group->uncompressed_sums[
+ i->current.group->last];
return;
}
@@ -386,8 +369,8 @@ next_group(lzma_index *i)
assert(i->current.group->next != NULL);
// Update the offsets first.
- i->current.stream_offset += i->current.group
- ->total_sums[i->current.group->last];
+ i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[
+ i->current.group->last]);
i->current.uncompressed_offset += i->current.group
->uncompressed_sums[i->current.group->last];
@@ -403,30 +386,39 @@ next_group(lzma_index *i)
static void
set_info(const lzma_index *i, lzma_index_record *info)
{
- info->total_size = i->current.group->total_sums[i->current.record];
+ // First copy the cumulative sizes from the current Record of the
+ // current group.
+ info->unpadded_size
+ = i->current.group->unpadded_sums[i->current.record];
+ info->total_size = vli_ceil4(info->unpadded_size);
info->uncompressed_size = i->current.group->uncompressed_sums[
i->current.record];
+ // Copy the start offsets of this group.
info->stream_offset = i->current.stream_offset;
info->uncompressed_offset = i->current.uncompressed_offset;
// If it's not the first Record in this group, we need to do some
// adjustements.
if (i->current.record > 0) {
- // _sums[] are cumulative, thus we need to substract the
- // _previous _sums[] to get the sizes of this Record.
- info->total_size -= i->current.group
- ->total_sums[i->current.record - 1];
- info->uncompressed_size -= i->current.group
+ // Since the _sums[] are cumulative, we substract the sums of
+ // the previous Record to get the sizes of the current Record,
+ // and add the sums of the previous Record to the offsets.
+ // With unpadded_sums[] we need to take into account that it
+ // uses a bit weird way to do the cumulative summing
+ const lzma_vli total_sum
+ = vli_ceil4(i->current.group->unpadded_sums[
+ i->current.record - 1]);
+
+ const lzma_vli uncompressed_sum = i->current.group
->uncompressed_sums[i->current.record - 1];
- // i->current.{total,uncompressed}_offsets have the offset
- // of the beginning of the group, thus we need to add the
- // appropriate amount to get the offsetes of this Record.
- info->stream_offset += i->current.group
- ->total_sums[i->current.record - 1];
- info->uncompressed_offset += i->current.group
- ->uncompressed_sums[i->current.record - 1];
+ info->total_size -= total_sum;
+ info->unpadded_size -= total_sum;
+ info->uncompressed_size -= uncompressed_sum;
+
+ info->stream_offset += total_sum;
+ info->uncompressed_offset += uncompressed_sum;
}
return;
@@ -548,11 +540,22 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
// Check that the combined size of the Indexes stays within limits.
{
+ const lzma_vli dest_size = index_size_unpadded(
+ dest->count, dest->index_list_size);
+ const lzma_vli src_size = index_size_unpadded(
+ src->count, src->index_list_size);
+ if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+ }
+
+ // Check that the combined size of the "files" (combined total
+ // encoded sizes) stays within limits.
+ {
const lzma_vli dest_size = lzma_index_file_size(dest);
const lzma_vli src_size = lzma_index_file_size(src);
- if (dest_size + src_size > LZMA_VLI_UNKNOWN
+ if (dest_size + src_size > LZMA_VLI_MAX
|| dest_size + src_size + padding
- > LZMA_VLI_UNKNOWN)
+ > LZMA_VLI_MAX)
return LZMA_DATA_ERROR;
}
@@ -561,17 +564,37 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
//
// NOTE: This cannot overflow, because Index Size is always
// far smaller than LZMA_VLI_MAX, and adding two VLIs
- // (Index Size and padding) doesn't overflow. It may become
- // an invalid VLI if padding is huge, but that is caught by
- // index_append().
+ // (Index Size and padding) doesn't overflow.
padding += index_size(dest->count - dest->old.count,
dest->index_list_size
- dest->old.index_list_size)
+ LZMA_STREAM_HEADER_SIZE * 2;
+ // While the above cannot overflow, but it may become an invalid VLI.
+ if (padding > LZMA_VLI_MAX)
+ return LZMA_DATA_ERROR;
+
// Add the padding Record.
- return_if_error(index_append(
- dest, allocator, padding, 0, true));
+ {
+ lzma_ret ret;
+
+ // First update the info so we can validate it.
+ dest->old.streams_size += padding;
+
+ if (dest->old.streams_size > LZMA_VLI_MAX
+ || lzma_index_file_size(dest) > LZMA_VLI_MAX)
+ ret = LZMA_DATA_ERROR; // Would grow past the limits.
+ else
+ ret = index_append_real(dest, allocator,
+ padding, 0, true);
+
+ // If something went wrong, undo the updated value and return
+ // the error.
+ if (ret != LZMA_OK) {
+ dest->old.streams_size -= padding;
+ return ret;
+ }
+ }
// Avoid wasting lots of memory if src->head has only a few records
// that fit into dest->tail. That is, combine two groups if possible.
@@ -581,9 +604,10 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
if (src->head != NULL && src->head->last + 1
<= INDEX_GROUP_SIZE - dest->tail->last - 1) {
// Copy the first Record.
- dest->tail->total_sums[dest->tail->last + 1]
- = dest->tail->total_sums[dest->tail->last]
- + src->head->total_sums[0];
+ dest->tail->unpadded_sums[dest->tail->last + 1]
+ = vli_ceil4(dest->tail->unpadded_sums[
+ dest->tail->last])
+ + src->head->unpadded_sums[0];
dest->tail->uncompressed_sums[dest->tail->last + 1]
= dest->tail->uncompressed_sums[dest->tail->last]
@@ -596,10 +620,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
// Copy the rest.
for (size_t i = 1; i < src->head->last; ++i) {
- dest->tail->total_sums[dest->tail->last + 1]
- = dest->tail->total_sums[dest->tail->last]
- + src->head->total_sums[i + 1]
- - src->head->total_sums[i];
+ dest->tail->unpadded_sums[dest->tail->last + 1]
+ = vli_ceil4(dest->tail->unpadded_sums[
+ dest->tail->last])
+ + src->head->unpadded_sums[i + 1]
+ - src->head->unpadded_sums[i];
dest->tail->uncompressed_sums[dest->tail->last + 1]
= dest->tail->uncompressed_sums[
@@ -636,13 +661,13 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
dest->old.count = dest->count + src->old.count;
dest->old.index_list_size
= dest->index_list_size + src->old.index_list_size;
+ dest->old.streams_size += src->old.streams_size;
// Update overall information.
dest->total_size += src->total_size;
dest->uncompressed_size += src->uncompressed_size;
dest->count += src->count;
dest->index_list_size += src->index_list_size;
- dest->padding_size += src->padding_size;
// *src has nothing left but the base structure.
lzma_free(src, allocator);
@@ -690,7 +715,7 @@ lzma_index_dup(const lzma_index *src, lzma_allocator *allocator)
// Copy the arrays so that we don't read uninitialized memory.
const size_t count = src_group->last + 1;
- memcpy(dest_group->total_sums, src_group->total_sums,
+ memcpy(dest_group->unpadded_sums, src_group->unpadded_sums,
sizeof(lzma_vli) * count);
memcpy(dest_group->uncompressed_sums,
src_group->uncompressed_sums,
@@ -729,8 +754,8 @@ lzma_index_equal(const lzma_index *a, const lzma_index *b)
while (ag != NULL && bg != NULL) {
const size_t count = ag->last + 1;
if (ag->last != bg->last
- || memcmp(ag->total_sums,
- bg->total_sums,
+ || memcmp(ag->unpadded_sums,
+ bg->unpadded_sums,
sizeof(lzma_vli) * count) != 0
|| memcmp(ag->uncompressed_sums,
bg->uncompressed_sums,
diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h
index df897367..79719dd7 100644
--- a/src/liblzma/common/index.h
+++ b/src/liblzma/common/index.h
@@ -23,14 +23,11 @@
#include "common.h"
-/// Maximum encoded value of Total Size.
-#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_MAX / 4 - 1)
+/// Minimum Unpadded Size
+#define UNPADDED_SIZE_MIN LZMA_VLI_C(5)
-/// Convert the real Total Size value to a value that is stored to the Index.
-#define total_size_encode(size) ((size) / 4 - 1)
-
-/// Convert the encoded Total Size value from Index to the real Total Size.
-#define total_size_decode(size) (((size) + 1) * 4)
+/// Maximum Unpadded Size
+#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
/// Get the size of the Index Padding field. This is needed by Index encoder
@@ -38,6 +35,16 @@
extern uint32_t lzma_index_padding_size(const lzma_index *i);
+/// Round the variable-length integer to the next multiple of four.
+static inline lzma_vli
+vli_ceil4(lzma_vli vli)
+{
+ assert(vli <= LZMA_VLI_MAX);
+ return (vli + 3) & ~LZMA_VLI_C(3);
+}
+
+
+/// Calculate the size of the Index field excluding Index Padding
static inline lzma_vli
index_size_unpadded(lzma_vli count, lzma_vli index_list_size)
{
@@ -46,20 +53,20 @@ index_size_unpadded(lzma_vli count, lzma_vli index_list_size)
}
+/// Calculate the size of the Index field including Index Padding
static inline lzma_vli
index_size(lzma_vli count, lzma_vli index_list_size)
{
- // Round up to a mulitiple of four.
- return (index_size_unpadded(count, index_list_size) + 3)
- & ~LZMA_VLI_C(3);
+ return vli_ceil4(index_size_unpadded(count, index_list_size));
}
+/// Calculate the total size of the Stream
static inline lzma_vli
-index_stream_size(
- lzma_vli total_size, lzma_vli count, lzma_vli index_list_size)
+index_stream_size(lzma_vli blocks_size,
+ lzma_vli count, lzma_vli index_list_size)
{
- return LZMA_STREAM_HEADER_SIZE + total_size
+ return LZMA_STREAM_HEADER_SIZE + blocks_size
+ index_size(count, index_list_size)
+ LZMA_STREAM_HEADER_SIZE;
}
diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c
index ae66595a..5faac161 100644
--- a/src/liblzma/common/index_decoder.c
+++ b/src/liblzma/common/index_decoder.c
@@ -25,7 +25,7 @@ struct lzma_coder_s {
enum {
SEQ_INDICATOR,
SEQ_COUNT,
- SEQ_TOTAL,
+ SEQ_UNPADDED,
SEQ_UNCOMPRESSED,
SEQ_PADDING_INIT,
SEQ_PADDING,
@@ -38,8 +38,8 @@ struct lzma_coder_s {
/// Number of Records left to decode.
lzma_vli count;
- /// The most recent Total Size field
- lzma_vli total_size;
+ /// The most recent Unpadded Size field
+ lzma_vli unpadded_size;
/// The most recent Uncompressed Size field
lzma_vli uncompressed_size;
@@ -91,14 +91,14 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator,
ret = LZMA_OK;
coder->pos = 0;
coder->sequence = coder->count == 0
- ? SEQ_PADDING_INIT : SEQ_TOTAL;
+ ? SEQ_PADDING_INIT : SEQ_UNPADDED;
break;
}
- case SEQ_TOTAL:
+ case SEQ_UNPADDED:
case SEQ_UNCOMPRESSED: {
- lzma_vli *size = coder->sequence == SEQ_TOTAL
- ? &coder->total_size
+ lzma_vli *size = coder->sequence == SEQ_UNPADDED
+ ? &coder->unpadded_size
: &coder->uncompressed_size;
ret = lzma_vli_decode(size, &coder->pos,
@@ -109,27 +109,26 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator,
ret = LZMA_OK;
coder->pos = 0;
- if (coder->sequence == SEQ_TOTAL) {
- // Validate that encoded Total Size isn't too big.
- if (coder->total_size > TOTAL_SIZE_ENCODED_MAX)
+ if (coder->sequence == SEQ_UNPADDED) {
+ // Validate that encoded Unpadded Size isn't too small
+ // or too big.
+ if (coder->unpadded_size < UNPADDED_SIZE_MIN
+ || coder->unpadded_size
+ > UNPADDED_SIZE_MAX)
return LZMA_DATA_ERROR;
- // Convert the encoded Total Size to the real
- // Total Size.
- coder->total_size = total_size_decode(
- coder->total_size);
coder->sequence = SEQ_UNCOMPRESSED;
} else {
// Add the decoded Record to the Index.
return_if_error(lzma_index_append(
coder->index, allocator,
- coder->total_size,
+ coder->unpadded_size,
coder->uncompressed_size));
// Check if this was the last Record.
coder->sequence = --coder->count == 0
? SEQ_PADDING_INIT
- : SEQ_TOTAL;
+ : SEQ_UNPADDED;
}
break;
diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c
index 3005f835..522dbb53 100644
--- a/src/liblzma/common/index_encoder.c
+++ b/src/liblzma/common/index_encoder.c
@@ -26,7 +26,7 @@ struct lzma_coder_s {
enum {
SEQ_INDICATOR,
SEQ_COUNT,
- SEQ_TOTAL,
+ SEQ_UNPADDED,
SEQ_UNCOMPRESSED,
SEQ_NEXT,
SEQ_PADDING,
@@ -97,18 +97,20 @@ index_encode(lzma_coder *coder,
break;
}
- // Total Size must be a multiple of four.
- if (coder->record.total_size & 3)
+ // Unpadded Size must be within valid limits.
+ if (coder->record.unpadded_size < UNPADDED_SIZE_MIN
+ || coder->record.unpadded_size
+ > UNPADDED_SIZE_MAX)
return LZMA_PROG_ERROR;
- coder->sequence = SEQ_TOTAL;
+ coder->sequence = SEQ_UNPADDED;
// Fall through
- case SEQ_TOTAL:
+ case SEQ_UNPADDED:
case SEQ_UNCOMPRESSED: {
- const lzma_vli size = coder->sequence == SEQ_TOTAL
- ? total_size_encode(coder->record.total_size)
+ const lzma_vli size = coder->sequence == SEQ_UNPADDED
+ ? coder->record.unpadded_size
: coder->record.uncompressed_size;
ret = lzma_vli_encode(size, &coder->pos,
diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c
index 5e581838..162094d1 100644
--- a/src/liblzma/common/index_hash.c
+++ b/src/liblzma/common/index_hash.c
@@ -23,8 +23,8 @@
typedef struct {
- /// Sum of the Total Size fields
- lzma_vli total_size;
+ /// Sum of the Block sizes (including Block Padding)
+ lzma_vli blocks_size;
/// Sum of the Uncompressed Size fields
lzma_vli uncompressed_size;
@@ -35,7 +35,7 @@ typedef struct {
/// Size of the List of Index Records as bytes
lzma_vli index_list_size;
- /// Check calculated from Total Sizes and Uncompressed Sizes.
+ /// Check calculated from Unpadded Sizes and Uncompressed Sizes.
lzma_check_state check;
} lzma_index_hash_info;
@@ -45,7 +45,7 @@ struct lzma_index_hash_s {
enum {
SEQ_BLOCK,
SEQ_COUNT,
- SEQ_TOTAL,
+ SEQ_UNPADDED,
SEQ_UNCOMPRESSED,
SEQ_PADDING_INIT,
SEQ_PADDING,
@@ -61,8 +61,8 @@ struct lzma_index_hash_s {
/// Number of Records not fully decoded
lzma_vli remaining;
- /// Total Size currently being read from an Index Record.
- lzma_vli total_size;
+ /// Unpadded Size currently being read from an Index Record.
+ lzma_vli unpadded_size;
/// Uncompressed Size currently being read from an Index Record.
lzma_vli uncompressed_size;
@@ -86,15 +86,15 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
}
index_hash->sequence = SEQ_BLOCK;
- index_hash->blocks.total_size = 0;
+ index_hash->blocks.blocks_size = 0;
index_hash->blocks.uncompressed_size = 0;
index_hash->blocks.count = 0;
index_hash->blocks.index_list_size = 0;
- index_hash->records.total_size = 0;
+ index_hash->records.blocks_size = 0;
index_hash->records.uncompressed_size = 0;
index_hash->records.count = 0;
index_hash->records.index_list_size = 0;
- index_hash->total_size = 0;
+ index_hash->unpadded_size = 0;
index_hash->uncompressed_size = 0;
index_hash->pos = 0;
index_hash->crc32 = 0;
@@ -128,16 +128,16 @@ lzma_index_hash_size(const lzma_index_hash *index_hash)
/// Updates the sizes and the hash without any validation.
static lzma_ret
-hash_append(lzma_index_hash_info *info, lzma_vli total_size,
+hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
lzma_vli uncompressed_size)
{
- info->total_size += total_size;
+ info->blocks_size += vli_ceil4(unpadded_size);
info->uncompressed_size += uncompressed_size;
- info->index_list_size += lzma_vli_size(total_size_encode(total_size))
+ info->index_list_size += lzma_vli_size(unpadded_size)
+ lzma_vli_size(uncompressed_size);
++info->count;
- const lzma_vli sizes[2] = { total_size, uncompressed_size };
+ const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
lzma_check_update(&info->check, LZMA_CHECK_BEST,
(const uint8_t *)(sizes), sizeof(sizes));
@@ -146,26 +146,27 @@ hash_append(lzma_index_hash_info *info, lzma_vli total_size,
extern LZMA_API lzma_ret
-lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size,
+lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
lzma_vli uncompressed_size)
{
// Validate the arguments.
- if (index_hash->sequence != SEQ_BLOCK || total_size == 0
- || total_size > LZMA_VLI_MAX || (total_size & 3)
+ if (index_hash->sequence != SEQ_BLOCK
+ || unpadded_size < UNPADDED_SIZE_MIN
+ || unpadded_size > UNPADDED_SIZE_MAX
|| uncompressed_size > LZMA_VLI_MAX)
return LZMA_PROG_ERROR;
// Update the hash.
return_if_error(hash_append(&index_hash->blocks,
- total_size, uncompressed_size));
+ unpadded_size, uncompressed_size));
// Validate the properties of *info are still in allowed limits.
- if (index_hash->blocks.total_size > LZMA_VLI_MAX
+ if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
|| index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
|| index_size(index_hash->blocks.count,
index_hash->blocks.index_list_size)
> LZMA_BACKWARD_SIZE_MAX
- || index_stream_size(index_hash->blocks.total_size,
+ || index_stream_size(index_hash->blocks.blocks_size,
index_hash->blocks.count,
index_hash->blocks.index_list_size)
> LZMA_VLI_MAX)
@@ -216,14 +217,14 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
// Handle the special case when there are no Blocks.
index_hash->sequence = index_hash->remaining == 0
- ? SEQ_PADDING_INIT : SEQ_TOTAL;
+ ? SEQ_PADDING_INIT : SEQ_UNPADDED;
break;
}
- case SEQ_TOTAL:
+ case SEQ_UNPADDED:
case SEQ_UNCOMPRESSED: {
- lzma_vli *size = index_hash->sequence == SEQ_TOTAL
- ? &index_hash->total_size
+ lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
+ ? &index_hash->unpadded_size
: &index_hash->uncompressed_size;
ret = lzma_vli_decode(size, &index_hash->pos,
@@ -234,18 +235,17 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
ret = LZMA_OK;
index_hash->pos = 0;
- if (index_hash->sequence == SEQ_TOTAL) {
- if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX)
+ if (index_hash->sequence == SEQ_UNPADDED) {
+ if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
+ || index_hash->unpadded_size
+ > UNPADDED_SIZE_MAX)
return LZMA_DATA_ERROR;
- index_hash->total_size = total_size_decode(
- index_hash->total_size);
-
index_hash->sequence = SEQ_UNCOMPRESSED;
} else {
// Update the hash.
return_if_error(hash_append(&index_hash->records,
- index_hash->total_size,
+ index_hash->unpadded_size,
index_hash->uncompressed_size));
// Verify that we don't go over the known sizes. Note
@@ -254,8 +254,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
// that values in index_hash->blocks are already
// validated and we are fine as long as we don't
// exceed them in index_hash->records.
- if (index_hash->blocks.total_size
- < index_hash->records.total_size
+ if (index_hash->blocks.blocks_size
+ < index_hash->records.blocks_size
|| index_hash->blocks.uncompressed_size
< index_hash->records.uncompressed_size
|| index_hash->blocks.index_list_size
@@ -264,7 +264,7 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
// Check if this was the last Record.
index_hash->sequence = --index_hash->remaining == 0
- ? SEQ_PADDING_INIT : SEQ_TOTAL;
+ ? SEQ_PADDING_INIT : SEQ_UNPADDED;
}
break;
@@ -288,8 +288,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
}
// Compare the sizes.
- if (index_hash->blocks.total_size
- != index_hash->records.total_size
+ if (index_hash->blocks.blocks_size
+ != index_hash->records.blocks_size
|| index_hash->blocks.uncompressed_size
!= index_hash->records.uncompressed_size
|| index_hash->blocks.index_list_size
diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c
index e137685f..9be47893 100644
--- a/src/liblzma/common/stream_decoder.c
+++ b/src/liblzma/common/stream_decoder.c
@@ -190,7 +190,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Set up a buffer to hold the filter chain. Block Header
// decoder will initialize all members of this array so
// we don't need to do it here.
- lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1];
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
coder->block_options.filters = filters;
// Decode the Block Header.
@@ -216,7 +216,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Free the allocated filter options since they are needed
// only to initialize the Block decoder.
- for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i)
+ for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i)
lzma_free(filters[i].options, allocator);
coder->block_options.filters = NULL;
@@ -243,7 +243,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Block decoded successfully. Add the new size pair to
// the Index hash.
return_if_error(lzma_index_hash_append(coder->index_hash,
- lzma_block_total_size_get(
+ lzma_block_unpadded_size(
&coder->block_options),
coder->block_options.uncompressed_size));
@@ -270,7 +270,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
// Fall through
- case SEQ_STREAM_FOOTER:
+ case SEQ_STREAM_FOOTER: {
// Copy the Stream Footer to the internal buffer.
lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
LZMA_STREAM_HEADER_SIZE);
@@ -306,6 +306,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator,
return LZMA_STREAM_END;
coder->sequence = SEQ_STREAM_PADDING;
+ }
// Fall through
diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c
index 0376fd3b..e52ad692 100644
--- a/src/liblzma/common/stream_encoder.c
+++ b/src/liblzma/common/stream_encoder.c
@@ -157,11 +157,11 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator,
return ret;
// Add a new Index Record.
- const lzma_vli total_size = lzma_block_total_size_get(
+ const lzma_vli unpadded_size = lzma_block_unpadded_size(
&coder->block_options);
- assert(total_size != 0);
+ assert(unpadded_size != 0);
return_if_error(lzma_index_append(coder->index, allocator,
- total_size,
+ unpadded_size,
coder->block_options.uncompressed_size));
coder->sequence = SEQ_BLOCK_INIT;
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index d2a77ba4..53ee1c1e 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -157,14 +157,14 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
uint32_t copy_size = dict->size - copy_pos;
if (copy_size < left) {
- memcpy(dict->buf + dict->pos, dict->buf + copy_pos,
+ memmove(dict->buf + dict->pos, dict->buf + copy_pos,
copy_size);
dict->pos += copy_size;
copy_size = left - copy_size;
memcpy(dict->buf + dict->pos, dict->buf, copy_size);
dict->pos += copy_size;
} else {
- memcpy(dict->buf + dict->pos, dict->buf + copy_pos,
+ memmove(dict->buf + dict->pos, dict->buf + copy_pos,
left);
dict->pos += left;
}
diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c
index 7cf06988..3096b442 100644
--- a/src/liblzma/subblock/subblock_decoder.c
+++ b/src/liblzma/subblock/subblock_decoder.c
@@ -211,7 +211,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break;
}
- case FLAG_END_SUBFILTER:
+ case FLAG_END_SUBFILTER: {
if (coder->padding != 0 || (in[*in_pos] & 0x0F)
|| coder->subfilter.code == NULL
|| !coder->got_output_with_subfilter)
@@ -250,6 +250,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
++*in_pos;
break;
+ }
default:
return LZMA_DATA_ERROR;
diff --git a/src/lzma/Makefile.am b/src/lzma/Makefile.am
index cd8bb771..e5c5c29a 100644
--- a/src/lzma/Makefile.am
+++ b/src/lzma/Makefile.am
@@ -15,19 +15,16 @@
bin_PROGRAMS = lzma
lzma_SOURCES = \
- alloc.c \
- alloc.h \
args.c \
args.h \
- error.c \
- error.h \
hardware.c \
hardware.h \
- help.c \
- help.h \
io.c \
io.h \
main.c \
+ main.h \
+ message.c \
+ message.h \
options.c \
options.h \
private.h \
diff --git a/src/lzma/alloc.c b/src/lzma/alloc.c
deleted file mode 100644
index d0fee68b..00000000
--- a/src/lzma/alloc.c
+++ /dev/null
@@ -1,106 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file alloc.c
-/// \brief Memory allocation functions
-//
-// Copyright (C) 2007 Lasse Collin
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "private.h"
-
-
-/// Called when memory allocation fails. Prints and error message and
-/// quits the application.
-static void lzma_attribute((noreturn))
-xerror(void)
-{
- errmsg(V_ERROR, "%s", strerror(errno));
- my_exit(ERROR);
-}
-
-
-extern void *
-xmalloc(size_t size)
-{
- if (size < 1) {
- errno = EINVAL;
- xerror();
- }
-
- void *p = malloc(size);
- if (p == NULL)
- xerror();
-
- return p;
-}
-
-
-/*
-extern void *
-xrealloc(void *ptr, size_t size)
-{
- if (size < 1) {
- errno = EINVAL;
- xerror();
- }
-
- ptr = realloc(ptr, size);
- if (ptr == NULL)
- xerror();
-
- return ptr;
-}
-*/
-
-
-extern char *
-xstrdup(const char *src)
-{
- if (src == NULL) {
- errno = EINVAL;
- xerror();
- }
-
- const size_t size = strlen(src) + 1;
- char *dest = malloc(size);
- if (dest == NULL)
- xerror();
-
- memcpy(dest, src, size);
-
- return dest;
-}
-
-
-extern void
-xstrcpy(char **dest, const char *src)
-{
- size_t len = strlen(src) + 1;
-
- *dest = realloc(*dest, len);
- if (*dest == NULL)
- xerror();
-
- memcpy(*dest, src, len + 1);
-
- return;
-}
-
-
-extern void *
-allocator(void *opaque lzma_attribute((unused)),
- size_t nmemb lzma_attribute((unused)), size_t size)
-{
- return xmalloc(size);
-}
diff --git a/src/lzma/alloc.h b/src/lzma/alloc.h
deleted file mode 100644
index 80317269..00000000
--- a/src/lzma/alloc.h
+++ /dev/null
@@ -1,42 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file alloc.h
-/// \brief Memory allocation functions
-//
-// Copyright (C) 2007 Lasse Collin
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef ALLOC_H
-#define ALLOC_H
-
-#include "private.h"
-
-
-/// Safe malloc() that never returns NULL.
-extern void *xmalloc(size_t size);
-
-/// Safe realloc() that never returns NULL.
-extern void *xrealloc(void *ptr, size_t size);
-
-/// Safe strdup() that never returns NULL.
-extern char *xstrdup(const char *src);
-
-/// xrealloc()s *dest to the size needed by src, and copies src to *dest.
-extern void xstrcpy(char **dest, const char *src);
-
-/// Function for lzma_allocator.alloc. This uses xmalloc().
-extern void *allocator(void *opaque lzma_attribute((unused)),
- size_t nmemb lzma_attribute((unused)), size_t size);
-
-#endif
diff --git a/src/lzma/args.c b/src/lzma/args.c
index 14ccfb6d..a2efb277 100644
--- a/src/lzma/args.c
+++ b/src/lzma/args.c
@@ -25,150 +25,90 @@
#include <ctype.h>
-enum tool_mode opt_mode = MODE_COMPRESS;
-enum format_type opt_format = FORMAT_AUTO;
-
-char *opt_suffix = NULL;
-
-char *opt_files_name = NULL;
-char opt_files_split = '\0';
-FILE *opt_files_file = NULL;
-
bool opt_stdout = false;
bool opt_force = false;
bool opt_keep_original = false;
-bool opt_preserve_name = false;
-
-lzma_check opt_check = LZMA_CHECK_CRC64;
-lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1];
// We don't modify or free() this, but we need to assign it in some
// non-const pointers.
const char *stdin_filename = "(stdin)";
-static size_t preset_number = 7;
-static bool preset_default = true;
-static size_t filter_count = 0;
-
-/// When compressing, which file format to use if --format=auto or no --format
-/// at all has been specified. We need a variable because this depends on
-/// with which name we are called. All names with "lz" in them makes us to
-/// use the legacy .lzma format.
-static enum format_type format_compress_auto = FORMAT_XZ;
-
-
-enum {
- OPT_SUBBLOCK = INT_MIN,
- OPT_X86,
- OPT_POWERPC,
- OPT_IA64,
- OPT_ARM,
- OPT_ARMTHUMB,
- OPT_SPARC,
- OPT_DELTA,
- OPT_LZMA1,
- OPT_LZMA2,
-
- OPT_FILES,
- OPT_FILES0,
-};
-
-
-static const char short_opts[] = "cC:dfF:hlLkM:qrS:tT:vVz123456789";
-
-
-static const struct option long_opts[] = {
- // gzip-like options
- { "fast", no_argument, NULL, '1' },
- { "best", no_argument, NULL, '9' },
- { "memory", required_argument, NULL, 'M' },
- { "name", no_argument, NULL, 'N' },
- { "suffix", required_argument, NULL, 'S' },
- { "threads", required_argument, NULL, 'T' },
- { "version", no_argument, NULL, 'V' },
- { "stdout", no_argument, NULL, 'c' },
- { "to-stdout", no_argument, NULL, 'c' },
- { "decompress", no_argument, NULL, 'd' },
- { "uncompress", no_argument, NULL, 'd' },
- { "force", no_argument, NULL, 'f' },
- { "help", no_argument, NULL, 'h' },
- { "list", no_argument, NULL, 'l' },
- { "info", no_argument, NULL, 'l' },
- { "keep", no_argument, NULL, 'k' },
- { "no-name", no_argument, NULL, 'n' },
- { "quiet", no_argument, NULL, 'q' },
-// { "recursive", no_argument, NULL, 'r' }, // TODO
- { "test", no_argument, NULL, 't' },
- { "verbose", no_argument, NULL, 'v' },
- { "compress", no_argument, NULL, 'z' },
-
- // Filters
- { "subblock", optional_argument, NULL, OPT_SUBBLOCK },
- { "x86", no_argument, NULL, OPT_X86 },
- { "bcj", no_argument, NULL, OPT_X86 },
- { "powerpc", no_argument, NULL, OPT_POWERPC },
- { "ppc", no_argument, NULL, OPT_POWERPC },
- { "ia64", no_argument, NULL, OPT_IA64 },
- { "itanium", no_argument, NULL, OPT_IA64 },
- { "arm", no_argument, NULL, OPT_ARM },
- { "armthumb", no_argument, NULL, OPT_ARMTHUMB },
- { "sparc", no_argument, NULL, OPT_SPARC },
- { "delta", optional_argument, NULL, OPT_DELTA },
- { "lzma1", optional_argument, NULL, OPT_LZMA1 },
- { "lzma2", optional_argument, NULL, OPT_LZMA2 },
-
- // Other
- { "format", required_argument, NULL, 'F' },
- { "check", required_argument, NULL, 'C' },
- { "files", optional_argument, NULL, OPT_FILES },
- { "files0", optional_argument, NULL, OPT_FILES0 },
-
- { NULL, 0, NULL, 0 }
-};
-
static void
-add_filter(lzma_vli id, const char *opt_str)
+parse_real(args_info *args, int argc, char **argv)
{
- if (filter_count == LZMA_BLOCK_FILTERS_MAX) {
- errmsg(V_ERROR, _("Maximum number of filters is seven"));
- my_exit(ERROR);
- }
-
- opt_filters[filter_count].id = id;
-
- switch (id) {
- case LZMA_FILTER_SUBBLOCK:
- opt_filters[filter_count].options
- = parse_options_subblock(opt_str);
- break;
-
- case LZMA_FILTER_DELTA:
- opt_filters[filter_count].options
- = parse_options_delta(opt_str);
- break;
-
- case LZMA_FILTER_LZMA1:
- case LZMA_FILTER_LZMA2:
- opt_filters[filter_count].options
- = parse_options_lzma(opt_str);
- break;
-
- default:
- assert(opt_str == NULL);
- opt_filters[filter_count].options = NULL;
- break;
- }
+ enum {
+ OPT_SUBBLOCK = INT_MIN,
+ OPT_X86,
+ OPT_POWERPC,
+ OPT_IA64,
+ OPT_ARM,
+ OPT_ARMTHUMB,
+ OPT_SPARC,
+ OPT_DELTA,
+ OPT_LZMA1,
+ OPT_LZMA2,
+
+ OPT_FILES,
+ OPT_FILES0,
+ };
+
+ static const char short_opts[] = "cC:dfF:hHlLkM:p:qrS:tT:vVz123456789";
+
+ static const struct option long_opts[] = {
+ // Operation mode
+ { "compress", no_argument, NULL, 'z' },
+ { "decompress", no_argument, NULL, 'd' },
+ { "uncompress", no_argument, NULL, 'd' },
+ { "test", no_argument, NULL, 't' },
+ { "list", no_argument, NULL, 'l' },
+ { "info", no_argument, NULL, 'l' },
+
+ // Operation modifiers
+ { "keep", no_argument, NULL, 'k' },
+ { "force", no_argument, NULL, 'f' },
+ { "stdout", no_argument, NULL, 'c' },
+ { "to-stdout", no_argument, NULL, 'c' },
+ { "suffix", required_argument, NULL, 'S' },
+ // { "recursive", no_argument, NULL, 'r' }, // TODO
+ { "files", optional_argument, NULL, OPT_FILES },
+ { "files0", optional_argument, NULL, OPT_FILES0 },
+
+ // Basic compression settings
+ { "format", required_argument, NULL, 'F' },
+ { "check", required_argument, NULL, 'C' },
+ { "preset", required_argument, NULL, 'p' },
+ { "memory", required_argument, NULL, 'M' },
+ { "threads", required_argument, NULL, 'T' },
+
+ { "fast", no_argument, NULL, '1' },
+ { "best", no_argument, NULL, '9' },
+
+ // Filters
+ { "lzma1", optional_argument, NULL, OPT_LZMA1 },
+ { "lzma2", optional_argument, NULL, OPT_LZMA2 },
+ { "x86", no_argument, NULL, OPT_X86 },
+ { "bcj", no_argument, NULL, OPT_X86 },
+ { "powerpc", no_argument, NULL, OPT_POWERPC },
+ { "ppc", no_argument, NULL, OPT_POWERPC },
+ { "ia64", no_argument, NULL, OPT_IA64 },
+ { "itanium", no_argument, NULL, OPT_IA64 },
+ { "arm", no_argument, NULL, OPT_ARM },
+ { "armthumb", no_argument, NULL, OPT_ARMTHUMB },
+ { "sparc", no_argument, NULL, OPT_SPARC },
+ { "delta", optional_argument, NULL, OPT_DELTA },
+ { "subblock", optional_argument, NULL, OPT_SUBBLOCK },
+
+ // Other options
+ { "quiet", no_argument, NULL, 'q' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, 'h' },
+ { "long-help", no_argument, NULL, 'H' },
+ { "version", no_argument, NULL, 'V' },
+
+ { NULL, 0, NULL, 0 }
+ };
- ++filter_count;
- preset_default = false;
- return;
-}
-
-
-static void
-parse_real(int argc, char **argv)
-{
int c;
while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
@@ -178,32 +118,28 @@ parse_real(int argc, char **argv)
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- preset_number = c - '0';
- preset_default = false;
+ coder_set_preset(c - '0');
break;
- // --memory
- case 'M':
- opt_memory = str_to_uint64("memory", optarg,
- 1, SIZE_MAX);
+ case 'p': {
+ const uint64_t preset = str_to_uint64(
+ "preset", optarg, 1, 9);
+ coder_set_preset(preset);
break;
+ }
- case 'N':
- opt_preserve_name = true;
+ // --memory
+ case 'M':
+ // On 32-bit systems, SIZE_MAX would make more sense
+ // than UINT64_MAX. But use UINT64_MAX still so that
+ // scripts that assume > 4 GiB values don't break.
+ hardware_memlimit_set(str_to_uint64(
+ "memory", optarg, 0, UINT64_MAX));
break;
// --suffix
case 'S':
- // Empty suffix and suffixes having a slash are
- // rejected. Such suffixes would break things later.
- if (optarg[0] == '\0' || strchr(optarg, '/') != NULL) {
- errmsg(V_ERROR, _("%s: Invalid filename "
- "suffix"), optarg);
- my_exit(ERROR);
- }
-
- free(opt_suffix);
- opt_suffix = xstrdup(optarg);
+ suffix_set(optarg);
break;
case 'T':
@@ -214,7 +150,7 @@ parse_real(int argc, char **argv)
// --version
case 'V':
// This doesn't return.
- show_version();
+ message_version();
// --stdout
case 'c':
@@ -234,7 +170,12 @@ parse_real(int argc, char **argv)
// --help
case 'h':
// This doesn't return.
- show_help();
+ message_help(false);
+
+ // --long-help
+ case 'H':
+ // This doesn't return.
+ message_help(true);
// --list
case 'l':
@@ -246,15 +187,9 @@ parse_real(int argc, char **argv)
opt_keep_original = true;
break;
- case 'n':
- opt_preserve_name = false;
- break;
-
// --quiet
case 'q':
- if (verbosity > V_SILENT)
- --verbosity;
-
+ message_verbosity_decrease();
break;
case 't':
@@ -263,9 +198,7 @@ parse_real(int argc, char **argv)
// --verbose
case 'v':
- if (verbosity < V_DEBUG)
- ++verbosity;
-
+ message_verbosity_increase();
break;
case 'z':
@@ -275,43 +208,47 @@ parse_real(int argc, char **argv)
// Filter setup
case OPT_SUBBLOCK:
- add_filter(LZMA_FILTER_SUBBLOCK, optarg);
+ coder_add_filter(LZMA_FILTER_SUBBLOCK,
+ options_subblock(optarg));
break;
case OPT_X86:
- add_filter(LZMA_FILTER_X86, NULL);
+ coder_add_filter(LZMA_FILTER_X86, NULL);
break;
case OPT_POWERPC:
- add_filter(LZMA_FILTER_POWERPC, NULL);
+ coder_add_filter(LZMA_FILTER_POWERPC, NULL);
break;
case OPT_IA64:
- add_filter(LZMA_FILTER_IA64, NULL);
+ coder_add_filter(LZMA_FILTER_IA64, NULL);
break;
case OPT_ARM:
- add_filter(LZMA_FILTER_ARM, NULL);
+ coder_add_filter(LZMA_FILTER_ARM, NULL);
break;
case OPT_ARMTHUMB:
- add_filter(LZMA_FILTER_ARMTHUMB, NULL);
+ coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL);
break;
case OPT_SPARC:
- add_filter(LZMA_FILTER_SPARC, NULL);
+ coder_add_filter(LZMA_FILTER_SPARC, NULL);
break;
case OPT_DELTA:
- add_filter(LZMA_FILTER_DELTA, optarg);
+ coder_add_filter(LZMA_FILTER_DELTA,
+ options_delta(optarg));
break;
case OPT_LZMA1:
- add_filter(LZMA_FILTER_LZMA1, optarg);
+ coder_add_filter(LZMA_FILTER_LZMA1,
+ options_lzma(optarg));
break;
case OPT_LZMA2:
- add_filter(LZMA_FILTER_LZMA2, optarg);
+ coder_add_filter(LZMA_FILTER_LZMA2,
+ options_lzma(optarg));
break;
// Other
@@ -335,14 +272,11 @@ parse_real(int argc, char **argv)
};
size_t i = 0;
- while (strcmp(types[i].str, optarg) != 0) {
- if (++i == ARRAY_SIZE(types)) {
- errmsg(V_ERROR, _("%s: Unknown file "
+ while (strcmp(types[i].str, optarg) != 0)
+ if (++i == ARRAY_SIZE(types))
+ message_fatal(_("%s: Unknown file "
"format type"),
optarg);
- my_exit(ERROR);
- }
- }
opt_format = types[i].format;
break;
@@ -362,50 +296,43 @@ parse_real(int argc, char **argv)
size_t i = 0;
while (strcmp(types[i].str, optarg) != 0) {
- if (++i == ARRAY_SIZE(types)) {
- errmsg(V_ERROR, _("%s: Unknown "
- "integrity check "
- "type"), optarg);
- my_exit(ERROR);
- }
+ if (++i == ARRAY_SIZE(types))
+ message_fatal(_("%s: Unknown integrity"
+ "check type"), optarg);
}
- opt_check = types[i].check;
+ coder_set_check(types[i].check);
break;
}
case OPT_FILES:
- opt_files_split = '\n';
+ args->files_delim = '\n';
// Fall through
case OPT_FILES0:
- if (opt_files_name != NULL) {
- errmsg(V_ERROR, _("Only one file can be "
+ if (args->files_name != NULL)
+ message_fatal(_("Only one file can be "
"specified with `--files'"
"or `--files0'."));
- my_exit(ERROR);
- }
if (optarg == NULL) {
- opt_files_name = (char *)stdin_filename;
- opt_files_file = stdin;
+ args->files_name = (char *)stdin_filename;
+ args->files_file = stdin;
} else {
- opt_files_name = optarg;
- opt_files_file = fopen(optarg,
+ args->files_name = optarg;
+ args->files_file = fopen(optarg,
c == OPT_FILES ? "r" : "rb");
- if (opt_files_file == NULL) {
- errmsg(V_ERROR, "%s: %s", optarg,
+ if (args->files_file == NULL)
+ message_fatal("%s: %s", optarg,
strerror(errno));
- my_exit(ERROR);
- }
}
break;
default:
- show_try_help();
- my_exit(ERROR);
+ message_try_help();
+ my_exit(E_ERROR);
}
}
@@ -414,163 +341,124 @@ parse_real(int argc, char **argv)
static void
-parse_environment(void)
+parse_environment(args_info *args, char *argv0)
{
- char *env = getenv("LZMA_OPT");
+ char *env = getenv("XZ_OPT");
if (env == NULL)
return;
+ // We modify the string, so make a copy of it.
env = xstrdup(env);
- // Calculate the number of arguments in env.
- unsigned int argc = 1;
+ // Calculate the number of arguments in env. argc stats at one
+ // to include space for the program name.
+ int argc = 1;
bool prev_was_space = true;
for (size_t i = 0; env[i] != '\0'; ++i) {
if (isspace(env[i])) {
prev_was_space = true;
} else if (prev_was_space) {
prev_was_space = false;
- if (++argc > (unsigned int)(INT_MAX)) {
- errmsg(V_ERROR, _("The environment variable "
- "LZMA_OPT contains too many "
+
+ // Keep argc small enough to fit into a singed int
+ // and to keep it usable for memory allocation.
+ if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
+ message_fatal(_("The environment variable "
+ "XZ_OPT contains too many "
"arguments"));
- my_exit(ERROR);
- }
}
}
- char **argv = xmalloc((argc + 1) * sizeof(char*));
+ // Allocate memory to hold pointers to the arguments. Add one to get
+ // space for the terminating NULL (if some systems happen to need it).
+ char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
argv[0] = argv0;
argv[argc] = NULL;
+ // Go through the string again. Split the arguments using '\0'
+ // characters and add pointers to the resulting strings to argv.
argc = 1;
prev_was_space = true;
for (size_t i = 0; env[i] != '\0'; ++i) {
if (isspace(env[i])) {
prev_was_space = true;
+ env[i] = '\0';
} else if (prev_was_space) {
prev_was_space = false;
argv[argc++] = env + i;
}
}
- parse_real((int)(argc), argv);
+ // Parse the argument list we got from the environment. All non-option
+ // arguments i.e. filenames are ignored.
+ parse_real(args, argc, argv);
+ // Reset the state of the getopt_long() so that we can parse the
+ // command line options too. There are two incompatible ways to
+ // do it.
+#ifdef HAVE_OPTRESET
+ // BSD
+ optind = 1;
+ optreset = 1;
+#else
+ // GNU, Solaris
+ optind = 0;
+#endif
+
+ // We don't need the argument list from environment anymore.
+ free(argv);
free(env);
return;
}
-static void
-set_compression_settings(void)
+extern void
+args_parse(args_info *args, int argc, char **argv)
{
- static lzma_options_lzma opt_lzma;
-
- if (filter_count == 0) {
- if (lzma_lzma_preset(&opt_lzma, preset_number)) {
- errmsg(V_ERROR, _("Internal error (bug)"));
- my_exit(ERROR);
- }
-
- opt_filters[0].id = opt_format == FORMAT_LZMA
- ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
- opt_filters[0].options = &opt_lzma;
- filter_count = 1;
- }
-
- // Terminate the filter options array.
- opt_filters[filter_count].id = LZMA_VLI_UNKNOWN;
-
- // If we are using the LZMA_Alone format, allow exactly one filter
- // which has to be LZMA.
- if (opt_format == FORMAT_LZMA && (filter_count != 1
- || opt_filters[0].id != LZMA_FILTER_LZMA1)) {
- errmsg(V_ERROR, _("With --format=lzma only the LZMA1 filter "
- "is supported"));
- my_exit(ERROR);
- }
-
- // TODO: liblzma probably needs an API to validate the filter chain.
-
- // If using --format=raw, we can be decoding.
- uint64_t memory_usage = opt_mode == MODE_COMPRESS
- ? lzma_memusage_encoder(opt_filters)
- : lzma_memusage_decoder(opt_filters);
-
- // Don't go over the memory limits when the default
- // setting is used.
- if (preset_default) {
- while (memory_usage > opt_memory) {
- if (preset_number == 1) {
- errmsg(V_ERROR, _("Memory usage limit is too "
- "small for any internal "
- "filter preset"));
- my_exit(ERROR);
- }
-
- if (lzma_lzma_preset(&opt_lzma, --preset_number)) {
- errmsg(V_ERROR, _("Internal error (bug)"));
- my_exit(ERROR);
- }
-
- memory_usage = lzma_memusage_encoder(opt_filters);
- }
-
- // TODO: With --format=raw, we should print a warning since
- // the presets may change and thus the next version may not
- // be able to uncompress the raw stream with the same preset
- // number.
+ // Initialize those parts of *args that we need later.
+ args->files_name = NULL;
+ args->files_file = NULL;
+ args->files_delim = '\0';
- } else {
- if (memory_usage > opt_memory) {
- errmsg(V_ERROR, _("Memory usage limit is too small "
- "for the given filter setup"));
- my_exit(ERROR);
- }
- }
-
- // Limit the number of worked threads so that memory usage
- // limit isn't exceeded.
- assert(memory_usage > 0);
- size_t thread_limit = opt_memory / memory_usage;
- if (thread_limit == 0)
- thread_limit = 1;
-
- if (opt_threads > thread_limit)
- opt_threads = thread_limit;
-
- return;
-}
+ // Type of the file format to use when --format=auto or no --format
+ // was specified.
+ enum format_type format_compress_auto = FORMAT_XZ;
-
-extern char **
-parse_args(int argc, char **argv)
-{
// Check how we were called.
{
- const char *name = str_filename(argv[0]);
- if (name != NULL) {
- // Default file format
- if (strstr(name, "lz") != NULL)
- format_compress_auto = FORMAT_LZMA;
-
- // Operation mode
- if (strstr(name, "cat") != NULL) {
- opt_mode = MODE_DECOMPRESS;
- opt_stdout = true;
- } else if (strstr(name, "un") != NULL) {
- opt_mode = MODE_DECOMPRESS;
- }
+ // Remove the leading path name, if any.
+ const char *name = strrchr(argv[0], '/');
+ if (name == NULL)
+ name = argv[0];
+ else
+ ++name;
+
+ // NOTE: It's possible that name[0] is now '\0' if argv[0]
+ // is weird, but it doesn't matter here.
+
+ // The default file format is .lzma if the command name
+ // contains "lz".
+ if (strstr(name, "lz") != NULL)
+ format_compress_auto = FORMAT_LZMA;
+
+ // Operation mode
+ if (strstr(name, "cat") != NULL) {
+ // Imply --decompress --stdout
+ opt_mode = MODE_DECOMPRESS;
+ opt_stdout = true;
+ } else if (strstr(name, "un") != NULL) {
+ // Imply --decompress
+ opt_mode = MODE_DECOMPRESS;
}
}
// First the flags from environment
- parse_environment();
+ parse_environment(args, argv[0]);
// Then from the command line
optind = 1;
- parse_real(argc, argv);
+ parse_real(args, argc, argv);
// Never remove the source file when the destination is not on disk.
// In test mode the data is written nowhere, but setting opt_stdout
@@ -580,18 +468,33 @@ parse_args(int argc, char **argv)
opt_stdout = true;
}
+ // If no --format flag was used, or it was --format=auto, we need to
+ // decide what is the target file format we are going to use. This
+ // depends on how we were called (checked earlier in this function).
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
opt_format = format_compress_auto;
+ // Compression settings need to be validated (options themselves and
+ // their memory usage) when compressing to any file format. It has to
+ // be done also when uncompressing raw data, since for raw decoding
+ // the options given on the command line are used to know what kind
+ // of raw data we are supposed to decode.
if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
- set_compression_settings();
+ coder_set_compression_settings();
// If no filenames are given, use stdin.
- if (argv[optind] == NULL && opt_files_name == NULL) {
- // We don't modify or free() the "-" constant.
- static char *argv_stdin[2] = { (char *)"-", NULL };
- return argv_stdin;
+ if (argv[optind] == NULL && args->files_name == NULL) {
+ // We don't modify or free() the "-" constant. The caller
+ // modifies this so don't make the struct itself const.
+ static char *names_stdin[2] = { (char *)"-", NULL };
+ args->arg_names = names_stdin;
+ args->arg_count = 1;
+ } else {
+ // We got at least one filename from the command line, or
+ // --files or --files0 was specified.
+ args->arg_names = argv + optind;
+ args->arg_count = argc - optind;
}
- return argv + optind;
+ return;
}
diff --git a/src/lzma/args.h b/src/lzma/args.h
index 8d9cd306..6d4e8282 100644
--- a/src/lzma/args.h
+++ b/src/lzma/args.h
@@ -23,42 +23,34 @@
#include "private.h"
-enum tool_mode {
- MODE_COMPRESS,
- MODE_DECOMPRESS,
- MODE_TEST,
- MODE_LIST,
-};
+typedef struct {
+ /// Filenames from command line
+ char **arg_names;
-// NOTE: The order of these is significant in suffix.c.
-enum format_type {
- FORMAT_AUTO,
- FORMAT_XZ,
- FORMAT_LZMA,
- // HEADER_GZIP,
- FORMAT_RAW,
-};
+ /// Number of filenames from command line
+ size_t arg_count;
+ /// Name of the file from which to read filenames. This is NULL
+ /// if --files or --files0 was not used.
+ char *files_name;
-extern char *opt_suffix;
+ /// File opened for reading from which filenames are read. This is
+ /// non-NULL only if files_name is non-NULL.
+ FILE *files_file;
+
+ /// Delimiter for filenames read from files_file
+ char files_delim;
+
+} args_info;
-extern char *opt_files_name;
-extern char opt_files_split;
-extern FILE *opt_files_file;
extern bool opt_stdout;
extern bool opt_force;
extern bool opt_keep_original;
-extern bool opt_preserve_name;
// extern bool opt_recursive;
-extern enum tool_mode opt_mode;
-extern enum format_type opt_format;
-
-extern lzma_check opt_check;
-extern lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1];
extern const char *stdin_filename;
-extern char **parse_args(int argc, char **argv);
+extern void args_parse(args_info *args, int argc, char **argv);
#endif
diff --git a/src/lzma/error.c b/src/lzma/error.c
deleted file mode 100644
index e66fd140..00000000
--- a/src/lzma/error.c
+++ /dev/null
@@ -1,162 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file error.c
-/// \brief Error message printing
-//
-// Copyright (C) 2007 Lasse Collin
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "private.h"
-#include <stdarg.h>
-
-
-exit_status_type exit_status = SUCCESS;
-verbosity_type verbosity = V_WARNING;
-char *argv0 = NULL;
-volatile sig_atomic_t user_abort = 0;
-
-
-extern const char *
-str_strm_error(lzma_ret code)
-{
- switch (code) {
- case LZMA_OK:
- return _("Operation successful");
-
- case LZMA_STREAM_END:
- return _("Operation finished successfully");
-
- case LZMA_PROG_ERROR:
- return _("Internal error (bug)");
-
- case LZMA_DATA_ERROR:
- return _("Compressed data is corrupt");
-
- case LZMA_MEM_ERROR:
- return strerror(ENOMEM);
-
- case LZMA_BUF_ERROR:
- return _("Unexpected end of input");
-
- case LZMA_OPTIONS_ERROR:
- return _("Unsupported options");
-
- case LZMA_UNSUPPORTED_CHECK:
- return _("Unsupported integrity check type");
-
- case LZMA_MEMLIMIT_ERROR:
- return _("Memory usage limit reached");
-
- case LZMA_FORMAT_ERROR:
- return _("File format not recognized");
-
- default:
- return NULL;
- }
-}
-
-
-extern void
-set_exit_status(exit_status_type new_status)
-{
- static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
- pthread_mutex_lock(&mutex);
-
- if (new_status != WARNING || exit_status == SUCCESS)
- exit_status = new_status;
-
- pthread_mutex_unlock(&mutex);
- return;
-}
-
-
-extern void lzma_attribute((noreturn))
-my_exit(int status)
-{
- // Close stdout. If something goes wrong, print an error message
- // to stderr.
- {
- const int ferror_err = ferror(stdout);
- const int fclose_err = fclose(stdout);
- if (fclose_err) {
- errmsg(V_ERROR, _("Writing to standard output "
- "failed: %s"), strerror(errno));
- status = ERROR;
- } else if (ferror_err) {
- // Some error has occurred but we have no clue about
- // the reason since fclose() succeeded.
- errmsg(V_ERROR, _("Writing to standard output "
- "failed: %s"), "Unknown error");
- status = ERROR;
- }
- }
-
- // Close stderr. If something goes wrong, there's nothing where we
- // could print an error message. Just set the exit status.
- {
- const int ferror_err = ferror(stderr);
- const int fclose_err = fclose(stderr);
- if (fclose_err || ferror_err)
- status = ERROR;
- }
-
- exit(status);
-}
-
-
-extern void lzma_attribute((format(printf, 2, 3)))
-errmsg(verbosity_type v, const char *fmt, ...)
-{
- va_list ap;
-
- if (v <= verbosity) {
- va_start(ap, fmt);
-
- static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
- pthread_mutex_lock(&mutex);
-
- fprintf(stderr, "%s: ", argv0);
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
-
- pthread_mutex_unlock(&mutex);
-
- va_end(ap);
- }
-
- if (v == V_ERROR)
- set_exit_status(ERROR);
- else if (v == V_WARNING)
- set_exit_status(WARNING);
-
- return;
-}
-
-
-extern void
-out_of_memory(void)
-{
- errmsg(V_ERROR, "%s", strerror(ENOMEM));
- user_abort = 1;
- return;
-}
-
-
-extern void
-internal_error(void)
-{
- errmsg(V_ERROR, _("Internal error (bug)"));
- user_abort = 1;
- return;
-}
diff --git a/src/lzma/error.h b/src/lzma/error.h
deleted file mode 100644
index 34ec30e1..00000000
--- a/src/lzma/error.h
+++ /dev/null
@@ -1,67 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file error.c
-/// \brief Error message printing
-//
-// Copyright (C) 2007 Lasse Collin
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef ERROR_H
-#define ERROR_H
-
-#include "private.h"
-
-
-typedef enum {
- SUCCESS = 0,
- ERROR = 1,
- WARNING = 2,
-} exit_status_type;
-
-
-typedef enum {
- V_SILENT,
- V_ERROR,
- V_WARNING,
- V_VERBOSE,
- V_DEBUG,
-} verbosity_type;
-
-
-extern exit_status_type exit_status;
-
-extern verbosity_type verbosity;
-
-/// Like GNU's program_invocation_name but portable
-extern char *argv0;
-
-/// Once this is non-zero, all threads must shutdown and clean up incomplete
-/// output files from the disk.
-extern volatile sig_atomic_t user_abort;
-
-
-extern const char * str_strm_error(lzma_ret code);
-
-extern void errmsg(verbosity_type v, const char *fmt, ...)
- lzma_attribute((format(printf, 2, 3)));
-
-extern void set_exit_status(exit_status_type new_status);
-
-extern void my_exit(int status) lzma_attribute((noreturn));
-
-extern void out_of_memory(void);
-
-extern void internal_error(void);
-
-#endif
diff --git a/src/lzma/hardware.c b/src/lzma/hardware.c
index 6cb3cdfc..63bf0937 100644
--- a/src/lzma/hardware.c
+++ b/src/lzma/hardware.c
@@ -26,33 +26,15 @@
size_t opt_threads = 1;
-/// Number of bytes of memory to use at maximum (only a rough limit).
-/// This can be set with the --memory=NUM command line option.
-/// If no better value can be determined, the default is 14 MiB, which
-/// should be quite safe even for older systems while still allowing
-/// reasonable compression ratio.
-size_t opt_memory = 14 * 1024 * 1024;
+/// Memory usage limit for encoding
+static uint64_t memlimit_encoder;
+/// Memory usage limit for decoding
+static uint64_t memlimit_decoder;
-/// Get the amount of physical memory, and set opt_memory to 1/3 of it.
-/// User can then override this with --memory command line option.
-static void
-hardware_memory(void)
-{
- uint64_t mem = physmem();
- if (mem != 0) {
- mem /= 3;
-
-#if UINT64_MAX > SIZE_MAX
- if (mem > SIZE_MAX)
- mem = SIZE_MAX;
-#endif
-
- opt_memory = mem;
- }
-
- return;
-}
+/// Memory usage limit given on the command line or environment variable.
+/// Zero indicates the default (memlimit_encoder or memlimit_decoder).
+static uint64_t memlimit_custom = 0;
/// Get the number of CPU cores, and set opt_threads to default to that value.
@@ -90,10 +72,51 @@ hardware_cores(void)
}
+static void
+hardware_memlimit_init(void)
+{
+ uint64_t mem = physmem();
+
+ // If we cannot determine the amount of RAM, assume 32 MiB. Maybe
+ // even that is too much on some systems. But on most systems it's
+ // far too little, and can be annoying.
+ if (mem == 0)
+ mem = UINT64_C(16) * 1024 * 1024;
+
+ // Use at maximum of 90 % of RAM when encoding and 33 % when decoding.
+ memlimit_encoder = mem - mem / 10;
+ memlimit_decoder = mem / 3;
+
+ return;
+}
+
+
+extern void
+hardware_memlimit_set(uint64_t memlimit)
+{
+ memlimit_custom = memlimit;
+ return;
+}
+
+
+extern uint64_t
+hardware_memlimit_encoder(void)
+{
+ return memlimit_custom != 0 ? memlimit_custom : memlimit_encoder;
+}
+
+
+extern uint64_t
+hardware_memlimit_decoder(void)
+{
+ return memlimit_custom != 0 ? memlimit_custom : memlimit_decoder;
+}
+
+
extern void
hardware_init(void)
{
- hardware_memory();
+ hardware_memlimit_init();
hardware_cores();
return;
}
diff --git a/src/lzma/hardware.h b/src/lzma/hardware.h
index d47bd29f..f604df20 100644
--- a/src/lzma/hardware.h
+++ b/src/lzma/hardware.h
@@ -24,8 +24,22 @@
extern size_t opt_threads;
-extern size_t opt_memory;
+
+/// Initialize some hardware-specific variables, which are needed by other
+/// hardware_* functions.
extern void hardware_init(void);
+
+/// Set custom memory usage limit. This is used for both encoding and
+/// decoding. Zero indicates resetting the limit back to defaults.
+extern void hardware_memlimit_set(uint64_t memlimit);
+
+/// Get the memory usage limit for encoding. By default this is 90 % of RAM.
+extern uint64_t hardware_memlimit_encoder(void);
+
+
+/// Get the memory usage limit for decoding. By default this is 30 % of RAM.
+extern uint64_t hardware_memlimit_decoder(void);
+
#endif
diff --git a/src/lzma/help.c b/src/lzma/help.c
deleted file mode 100644
index 2e59f3b5..00000000
--- a/src/lzma/help.c
+++ /dev/null
@@ -1,170 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file help.c
-/// \brief Help messages
-//
-// Copyright (C) 2007 Lasse Collin
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "private.h"
-
-
-extern void
-show_try_help(void)
-{
- // Print this with V_WARNING instead of V_ERROR to prevent it from
- // showing up when --quiet has been specified.
- errmsg(V_WARNING, _("Try `%s --help' for more information."), argv0);
- return;
-}
-
-
-extern void lzma_attribute((noreturn))
-show_help(void)
-{
- printf(_("Usage: %s [OPTION]... [FILE]...\n"
- "Compress or decompress FILEs in the .lzma format.\n"
- "\n"), argv0);
-
- puts(_("Mandatory arguments to long options are mandatory for "
- "short options too.\n"));
-
- puts(_(
-" Operation mode:\n"
-"\n"
-" -z, --compress force compression\n"
-" -d, --decompress force decompression\n"
-" -t, --test test compressed file integrity\n"
-" -l, --list list information about files\n"
-));
-
- puts(_(
-" Operation modifiers:\n"
-"\n"
-" -k, --keep keep (don't delete) input files\n"
-" -f, --force force overwrite of output file and (de)compress links\n"
-" -c, --stdout write to standard output and don't delete input files\n"
-" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
-" -F, --format=FMT file format to encode or decode; possible values are\n"
-" `auto' (default), `xz', `lzma', and `raw'\n"
-" --files=[FILE] read filenames to process from FILE; if FILE is\n"
-" omitted, filenames are read from the standard input;\n"
-" filenames must be terminated with the newline character\n"
-" --files0=[FILE] like --files but use the nul byte as terminator\n"
-));
-
- puts(_(
-" Compression presets and basic compression options:\n"
-"\n"
-" -1 .. -2 fast compression\n"
-" -3 .. -6 good compression\n"
-" -7 .. -9 excellent compression, but needs a lot of memory;\n"
-" default is -7 if memory limit allows\n"
-"\n"
-" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n"
-" or `sha256'\n"
-));
-
- puts(_(
-" Custom filter chain for compression (alternative for using presets):\n"
-"\n"
-" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n"
-" --lzma2=[OPTS] more of the following options (valid values; default):\n"
-" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n"
-" lc=NUM number of literal context bits (0-4; 3)\n"
-" lp=NUM number of literal position bits (0-4; 0)\n"
-" pb=NUM number of position bits (0-4; 2)\n"
-" mode=MODE compression mode (fast, normal; normal)\n"
-" nice=NUM nice length of a match (2-273; 64)\n"
-" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n"
-" depth=NUM maximum search depth; 0=automatic (default)\n"
-"\n"
-" --x86 x86 filter (sometimes called BCJ filter)\n"
-" --powerpc PowerPC (big endian) filter\n"
-" --ia64 IA64 (Itanium) filter\n"
-" --arm ARM filter\n"
-" --armthumb ARM-Thumb filter\n"
-" --sparc SPARC filter\n"
-"\n"
-" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n"
-" dist=NUM distance between bytes being subtracted\n"
-" from each other (1-256; 1)\n"
-"\n"
-" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n"
-" size=NUM number of bytes of data per subblock\n"
-" (1 - 256Mi; 4Ki)\n"
-" rle=NUM run-length encoder chunk size (0-256; 0)\n"
-));
-
- puts(_(
-" Resource usage options:\n"
-"\n"
-" -M, --memory=NUM use roughly NUM bytes of memory at maximum\n"
-" -T, --threads=NUM use a maximum of NUM (de)compression threads\n"
-// " --threading=STR threading style; possible values are `auto' (default),\n"
-// " `files', and `stream'
-));
-
- puts(_(
-" Other options:\n"
-"\n"
-" -q, --quiet suppress warnings; specify twice to suppress errors too\n"
-" -v, --verbose be verbose; specify twice for even more verbose\n"
-"\n"
-" -h, --help display this help and exit\n"
-" -V, --version display version and license information and exit\n"));
-
- puts(_("With no FILE, or when FILE is -, read standard input.\n"));
-
- size_t mem_limit = opt_memory / (1024 * 1024);
- if (mem_limit == 0)
- mem_limit = 1;
-
- // We use PRIu64 instead of %zu to support pre-C99 libc.
- puts(_("On this system and configuration, the tool will use"));
- printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"),
- (uint64_t)(mem_limit));
- printf(N_(" * at maximum of one thread for (de)compression.\n\n",
- " * at maximum of %" PRIu64
- " threads for (de)compression.\n\n",
- (uint64_t)(opt_threads)), (uint64_t)(opt_threads));
-
- printf(_("Report bugs to <%s> (in English or Finnish).\n"),
- PACKAGE_BUGREPORT);
-
- my_exit(SUCCESS);
-}
-
-
-extern void lzma_attribute((noreturn))
-show_version(void)
-{
- printf(
-"lzma (LZMA Utils) " PACKAGE_VERSION "\n"
-"\n"
-"Copyright (C) 1999-2008 Igor Pavlov\n"
-"Copyright (C) 2007-2008 Lasse Collin\n"
-"\n"
-"This program is free software; you can redistribute it and/or modify\n"
-"it under the terms of the GNU General Public License as published by\n"
-"the Free Software Foundation; either version 2 of the License, or\n"
-"(at your option) any later version.\n"
-"\n"
-"This program is distributed in the hope that it will be useful,\n"
-"but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
-"GNU General Public License for more details.\n"
-"\n");
- my_exit(SUCCESS);
-}
diff --git a/src/lzma/help.h b/src/lzma/help.h
deleted file mode 100644
index 659c66a0..00000000
--- a/src/lzma/help.h
+++ /dev/null
@@ -1,32 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file help.h
-/// \brief Help messages
-//
-// Copyright (C) 2007 Lasse Collin
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef HELP_H
-#define HELP_H
-
-#include "private.h"
-
-
-extern void show_try_help(void);
-
-extern void show_help(void) lzma_attribute((noreturn));
-
-extern void show_version(void) lzma_attribute((noreturn));
-
-#endif
diff --git a/src/lzma/io.c b/src/lzma/io.c
index b972099f..0ec63f03 100644
--- a/src/lzma/io.c
+++ b/src/lzma/io.c
@@ -19,131 +19,39 @@
#include "private.h"
-#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT)
-# include <sys/time.h>
-#endif
+#include <fcntl.h>
-#ifndef O_SEARCH
-# define O_SEARCH O_RDONLY
+#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
+# include <sys/time.h>
+#elif defined(HAVE_UTIME)
+# include <utime.h>
#endif
-/// \brief Number of open file_pairs
-///
-/// Once the main() function has requested processing of all files,
-/// we wait that open_pairs drops back to zero. Then it is safe to
-/// exit from the program.
-static size_t open_pairs = 0;
-
-
-/// \brief mutex for file system operations
-///
-/// All file system operations are done via the functions in this file.
-/// They use fchdir() to avoid some race conditions (more portable than
-/// openat() & co.).
-///
-/// Synchronizing all file system operations shouldn't affect speed notably,
-/// since the actual reading from and writing to files is done in parallel.
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-
-
-/// This condition is invoked when a file is closed and the value of
-/// the open_files variable has dropped to zero. The only listener for
-/// this condition is io_finish() which is called from main().
-static pthread_cond_t io_cond = PTHREAD_COND_INITIALIZER;
-
-
-/// True when stdout is being used by some thread
-static bool stdout_in_use = false;
-
-
-/// This condition is signalled when a thread releases stdout (no longer
-/// writes data to it).
-static pthread_cond_t stdout_cond = PTHREAD_COND_INITIALIZER;
-
-
-/// \brief Directory where we were started
-///
-/// This is needed when a new file, whose name was given on command line,
-/// is opened.
-static int start_dir;
-
-
-static uid_t uid;
-static gid_t gid;
-
-
-extern void
-io_init(void)
-{
- start_dir = open(".", O_SEARCH | O_NOCTTY);
- if (start_dir == -1) {
- errmsg(V_ERROR, _("Cannot get file descriptor of the current "
- "directory: %s"), strerror(errno));
- my_exit(ERROR);
- }
-
- uid = getuid();
- gid = getgid();
-
- return;
-}
-
-
-/// Waits until the number of open file_pairs has dropped to zero.
-extern void
-io_finish(void)
-{
- pthread_mutex_lock(&mutex);
-
- while (open_pairs != 0)
- pthread_cond_wait(&io_cond, &mutex);
-
- (void)close(start_dir);
-
- pthread_mutex_unlock(&mutex);
-
- return;
-}
-
-
/// \brief Unlinks a file
///
-/// \param dir_fd File descriptor of the directory containing the file
-/// \param name Name of the file with or without path
-///
-/// \return Zero on success. On error, -1 is returned and errno set.
-///
+/// This tries to verify that the file being unlinked really is the file that
+/// we want to unlink by verifying device and inode numbers. There's still
+/// a small unavoidable race, but this is much better than nothing (the file
+/// could have been moved/replaced even hours earlier).
static void
-io_unlink(int dir_fd, const char *name, ino_t ino)
+io_unlink(const char *name, const struct stat *known_st)
{
- const char *base = str_filename(name);
- if (base == NULL) {
- // This shouldn't happen.
- errmsg(V_ERROR, _("%s: Invalid filename"), name);
- return;
- }
+ struct stat new_st;
- pthread_mutex_lock(&mutex);
-
- if (fchdir(dir_fd)) {
- errmsg(V_ERROR, _("Cannot change directory: %s"),
- strerror(errno));
+ if (lstat(name, &new_st)
+ || new_st.st_dev != known_st->st_dev
+ || new_st.st_ino != known_st->st_ino) {
+ message_error(_("%s: File seems to be moved, not removing"),
+ name);
} else {
- struct stat st;
- if (lstat(base, &st) || st.st_ino != ino)
- errmsg(V_ERROR, _("%s: File seems to be moved, "
- "not removing"), name);
-
// There's a race condition between lstat() and unlink()
// but at least we have tried to avoid removing wrong file.
- else if (unlink(base))
- errmsg(V_ERROR, _("%s: Cannot remove: %s"),
+ if (unlink(name))
+ message_error(_("%s: Cannot remove: %s"),
name, strerror(errno));
}
- pthread_mutex_unlock(&mutex);
-
return;
}
@@ -160,14 +68,31 @@ io_copy_attrs(const file_pair *pair)
// destination file who didn't have permission to access the
// source file.
- if (uid == 0 && fchown(pair->dest_fd, pair->src_st.st_uid, -1))
- errmsg(V_WARNING, _("%s: Cannot set the file owner: %s"),
- pair->dest_name, strerror(errno));
+ // Simple cache to avoid repeated calls to geteuid().
+ static enum {
+ WARN_FCHOWN_UNKNOWN,
+ WARN_FCHOWN_NO,
+ WARN_FCHOWN_YES,
+ } warn_fchown = WARN_FCHOWN_UNKNOWN;
+
+ // Try changing the owner of the file. If we aren't root or the owner
+ // isn't already us, fchown() probably doesn't succeed. We warn
+ // about failing fchown() only if we are root.
+ if (fchown(pair->dest_fd, pair->src_st.st_uid, -1)
+ && warn_fchown != WARN_FCHOWN_NO) {
+ if (warn_fchown == WARN_FCHOWN_UNKNOWN)
+ warn_fchown = geteuid() == 0
+ ? WARN_FCHOWN_YES : WARN_FCHOWN_NO;
+
+ if (warn_fchown == WARN_FCHOWN_YES)
+ message_warning(_("%s: Cannot set the file owner: %s"),
+ pair->dest_name, strerror(errno));
+ }
mode_t mode;
if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
- errmsg(V_WARNING, _("%s: Cannot set the file group: %s"),
+ message_warning(_("%s: Cannot set the file group: %s"),
pair->dest_name, strerror(errno));
// We can still safely copy some additional permissions:
// `group' must be at least as strict as `other' and
@@ -186,192 +111,291 @@ io_copy_attrs(const file_pair *pair)
}
if (fchmod(pair->dest_fd, mode))
- errmsg(V_WARNING, _("%s: Cannot set the file permissions: %s"),
+ message_warning(_("%s: Cannot set the file permissions: %s"),
pair->dest_name, strerror(errno));
- // Copy the timestamps only if we have a secure function to do it.
-#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT)
- struct timeval tv[2];
- tv[0].tv_sec = pair->src_st.st_atime;
- tv[1].tv_sec = pair->src_st.st_mtime;
+ // Copy the timestamps. We have several possible ways to do this, of
+ // which some are better in both security and precision.
+ //
+ // First, get the nanosecond part of the timestamps. As of writing,
+ // it's not standardized by POSIX, and there are several names for
+ // the same thing in struct stat.
+ long atime_nsec;
+ long mtime_nsec;
# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
- tv[0].tv_usec = pair->src_st.st_atim.tv_nsec / 1000;
+ // GNU and Solaris
+ atime_nsec = pair->src_st.st_atim.tv_nsec;
+ mtime_nsec = pair->src_st.st_mtim.tv_nsec;
+
# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
- tv[0].tv_usec = pair->src_st.st_atimespec.tv_nsec / 1000;
-# else
- tv[0].tv_usec = 0;
-# endif
+ // BSD
+ atime_nsec = pair->src_st.st_atimespec.tv_nsec;
+ mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
+
+# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
+ // GNU and BSD without extensions
+ atime_nsec = pair->src_st.st_atimensec;
+ mtime_nsec = pair->src_st.st_mtimensec;
+
+# elif defined(HAVE_STRUCT_STAT_ST_UATIME)
+ // Tru64
+ atime_nsec = pair->src_st.st_uatime * 1000;
+ mtime_nsec = pair->src_st.st_umtime * 1000;
+
+# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
+ // UnixWare
+ atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
+ mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
-# if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC)
- tv[1].tv_usec = pair->src_st.st_mtim.tv_nsec / 1000;
-# elif defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC)
- tv[1].tv_usec = pair->src_st.st_mtimespec.tv_nsec / 1000;
# else
- tv[1].tv_usec = 0;
+ // Safe fallback
+ atime_nsec = 0;
+ mtime_nsec = 0;
# endif
-# ifdef HAVE_FUTIMES
+ // Construct a structure to hold the timestamps and call appropriate
+ // function to set the timestamps.
+#if defined(HAVE_FUTIMENS)
+ // Use nanosecond precision.
+ struct timespec tv[2];
+ tv[0].tv_sec = pair->src_st.st_atime;
+ tv[0].tv_nsec = atime_nsec;
+ tv[1].tv_sec = pair->src_st.st_mtime;
+ tv[1].tv_nsec = mtime_nsec;
+
+ (void)futimens(pair->dest_fd, tv);
+
+#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
+ // Use microsecond precision.
+ struct timeval tv[2];
+ tv[0].tv_sec = pair->src_st.st_atime;
+ tv[0].tv_usec = atime_nsec / 1000;
+ tv[1].tv_sec = pair->src_st.st_mtime;
+ tv[1].tv_usec = mtime_nsec / 1000;
+
+# if defined(HAVE_FUTIMES)
(void)futimes(pair->dest_fd, tv);
-# else
+# elif defined(HAVE_FUTIMESAT)
(void)futimesat(pair->dest_fd, NULL, tv);
+# else
+ // Argh, no function to use a file descriptor to set the timestamp.
+ (void)utimes(pair->src_name, tv);
# endif
+
+#elif defined(HAVE_UTIME)
+ // Use one-second precision. utime() doesn't support using file
+ // descriptor either.
+ const struct utimbuf buf = {
+ .actime = pair->src_st.st_atime;
+ .modtime = pair->src_st.st_mtime;
+ };
+
+ // Avoid warnings.
+ (void)atime_nsec;
+ (void)mtime_nsec;
+
+ (void)utime(pair->src_name, &buf);
#endif
return;
}
-/// Opens and changes into the directory containing the source file.
-static int
-io_open_dir(file_pair *pair)
+/// Opens the source file. Returns false on success, true on error.
+static bool
+io_open_src(file_pair *pair)
{
- if (pair->src_name == stdin_filename)
- return 0;
-
- if (fchdir(start_dir)) {
- errmsg(V_ERROR, _("Cannot change directory: %s"),
- strerror(errno));
- return -1;
+ // There's nothing to open when reading from stdin.
+ if (pair->src_name == stdin_filename) {
+ pair->src_fd = STDIN_FILENO;
+ return false;
}
- const char *split = strrchr(pair->src_name, '/');
- if (split == NULL) {
- pair->dir_fd = start_dir;
- } else {
- // Copy also the slash. It's needed to support filenames
- // like "/foo" (dirname being "/"), and it never hurts anyway.
- const size_t dirname_len = split - pair->src_name + 1;
- char dirname[dirname_len + 1];
- memcpy(dirname, pair->src_name, dirname_len);
- dirname[dirname_len] = '\0';
-
- // Open the directory and change into it.
- pair->dir_fd = open(dirname, O_SEARCH | O_NOCTTY);
- if (pair->dir_fd == -1 || fchdir(pair->dir_fd)) {
- errmsg(V_ERROR, _("%s: Cannot open the directory "
- "containing the file: %s"),
- pair->src_name, strerror(errno));
- (void)close(pair->dir_fd);
- return -1;
+ // We accept only regular files if we are writing the output
+ // to disk too, and if --force was not given.
+ const bool reg_files_only = !opt_stdout && !opt_force;
+
+ // Flags for open()
+ int flags = O_RDONLY | O_NOCTTY;
+
+ // If we accept only regular files, we need to be careful to avoid
+ // problems with special files like devices and FIFOs. O_NONBLOCK
+ // prevents blocking when opening such files. When we want to accept
+ // special files, we must not use O_NONBLOCK, or otherwise we won't
+ // block waiting e.g. FIFOs to become readable.
+ if (reg_files_only)
+ flags |= O_NONBLOCK;
+
+#ifdef O_NOFOLLOW
+ if (reg_files_only)
+ flags |= O_NOFOLLOW;
+#else
+ // Some POSIX-like systems lack O_NOFOLLOW (it's not required
+ // by POSIX). Check for symlinks with a separate lstat() on
+ // these systems.
+ if (reg_files_only) {
+ struct stat st;
+ if (lstat(pair->src_name, &st)) {
+ message_error("%s: %s", pair->src_name,
+ strerror(errno));
+ return true;
+
+ } else if (S_ISLNK(st.st_mode)) {
+ message_warning(_("%s: Is a symbolic link, "
+ "skipping"), pair->src_name);
+ return true;
}
}
+#endif
- return 0;
-}
+ // Try to open the file. If we are accepting non-regular files,
+ // unblock the caught signals so that open() can be interrupted
+ // if it blocks e.g. due to a FIFO file.
+ if (!reg_files_only)
+ signals_unblock();
+
+ // Maybe this wouldn't need a loop, since all the signal handlers for
+ // which we don't use SA_RESTART set user_abort to true. But it
+ // doesn't hurt to have it just in case.
+ do {
+ pair->src_fd = open(pair->src_name, flags);
+ } while (pair->src_fd == -1 && errno == EINTR && !user_abort);
+
+ if (!reg_files_only)
+ signals_block();
+
+ if (pair->src_fd == -1) {
+ // If we were interrupted, don't display any error message.
+ if (errno == EINTR) {
+ // All the signals that don't have SA_RESTART
+ // set user_abort.
+ assert(user_abort);
+ return true;
+ }
+#ifdef O_NOFOLLOW
+ // Give an understandable error message in if reason
+ // for failing was that the file was a symbolic link.
+ //
+ // Note that at least Linux, OpenBSD, Solaris, and Darwin
+ // use ELOOP to indicate if O_NOFOLLOW was the reason
+ // that open() failed. Because there may be
+ // directories in the pathname, ELOOP may occur also
+ // because of a symlink loop in the directory part.
+ // So ELOOP doesn't tell us what actually went wrong.
+ //
+ // FreeBSD associates EMLINK with O_NOFOLLOW and
+ // Tru64 uses ENOTSUP. We use these directly here
+ // and skip the lstat() call and the associated race.
+ // I want to hear if there are other kernels that
+ // fail with something else than ELOOP with O_NOFOLLOW.
+ bool was_symlink = false;
-static void
-io_close_dir(file_pair *pair)
-{
- if (pair->dir_fd != start_dir)
- (void)close(pair->dir_fd);
+# if defined(__FreeBSD__) || defined(__DragonFly__)
+ if (errno == EMLINK)
+ was_symlink = true;
- return;
-}
+# elif defined(__digital__) && defined(__unix__)
+ if (errno == ENOTSUP)
+ was_symlink = true;
+# else
+ if (errno == ELOOP && reg_files_only) {
+ const int saved_errno = errno;
+ struct stat st;
+ if (lstat(pair->src_name, &st) == 0
+ && S_ISLNK(st.st_mode))
+ was_symlink = true;
+
+ errno = saved_errno;
+ }
+# endif
-/// Opens the source file. The file is opened using the plain filename without
-/// path, thus the file must be in the current working directory. This is
-/// ensured because io_open_dir() is always called before this function.
-static int
-io_open_src(file_pair *pair)
-{
- if (pair->src_name == stdin_filename) {
- pair->src_fd = STDIN_FILENO;
- } else {
- // Strip the pathname. Thanks to io_open_dir(), the file
- // is now in the current working directory.
- const char *filename = str_filename(pair->src_name);
- if (filename == NULL)
- return -1;
-
- // Symlinks are followed if --stdout or --force has been
- // specified.
- const bool follow_symlinks = opt_stdout || opt_force;
- pair->src_fd = open(filename, O_RDONLY | O_NOCTTY
- | (follow_symlinks ? 0 : O_NOFOLLOW));
- if (pair->src_fd == -1) {
- // Give an understandable error message in if reason
- // for failing was that the file was a symbolic link.
- // - Linux, OpenBSD, Solaris: ELOOP
- // - FreeBSD: EMLINK
- // - Tru64: ENOTSUP
- // It seems to be safe to check for all these, since
- // those errno values aren't used for other purporses
- // on any of the listed operating system *when* the
- // above flags are used with open().
- if (!follow_symlinks
- && (errno == ELOOP
-#ifdef EMLINK
- || errno == EMLINK
-#endif
-#ifdef ENOTSUP
- || errno == ENOTSUP
+ if (was_symlink)
+ message_warning(_("%s: Is a symbolic link, "
+ "skipping"), pair->src_name);
+ else
#endif
- )) {
- errmsg(V_WARNING, _("%s: Is a symbolic link, "
- "skipping"), pair->src_name);
- } else {
- errmsg(V_ERROR, "%s: %s", pair->src_name,
- strerror(errno));
- }
+ // Something else than O_NOFOLLOW failing
+ // (assuming that the race conditions didn't
+ // confuse us).
+ message_error("%s: %s", pair->src_name,
+ strerror(errno));
- return -1;
- }
+ return true;
+ }
- if (fstat(pair->src_fd, &pair->src_st)) {
- errmsg(V_ERROR, "%s: %s", pair->src_name,
- strerror(errno));
+ // Drop O_NONBLOCK, which is used only when we are accepting only
+ // regular files. After the open() call, we want things to block
+ // instead of giving EAGAIN.
+ if (reg_files_only) {
+ flags = fcntl(pair->src_fd, F_GETFL);
+ if (flags == -1)
+ goto error_msg;
+
+ flags &= ~O_NONBLOCK;
+
+ if (fcntl(pair->src_fd, F_SETFL, flags))
+ goto error_msg;
+ }
+
+ // Stat the source file. We need the result also when we copy
+ // the permissions, and when unlinking.
+ if (fstat(pair->src_fd, &pair->src_st))
+ goto error_msg;
+
+ if (S_ISDIR(pair->src_st.st_mode)) {
+ message_warning(_("%s: Is a directory, skipping"),
+ pair->src_name);
+ goto error;
+ }
+
+ if (reg_files_only) {
+ if (!S_ISREG(pair->src_st.st_mode)) {
+ message_warning(_("%s: Not a regular file, "
+ "skipping"), pair->src_name);
goto error;
}
- if (S_ISDIR(pair->src_st.st_mode)) {
- errmsg(V_WARNING, _("%s: Is a directory, skipping"),
+ if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
+ // gzip rejects setuid and setgid files even
+ // when --force was used. bzip2 doesn't check
+ // for them, but calls fchown() after fchmod(),
+ // and many systems automatically drop setuid
+ // and setgid bits there.
+ //
+ // We accept setuid and setgid files if
+ // --force was used. We drop these bits
+ // explicitly in io_copy_attr().
+ message_warning(_("%s: File has setuid or "
+ "setgid bit set, skipping"),
pair->src_name);
goto error;
}
- if (!opt_stdout) {
- if (!opt_force && !S_ISREG(pair->src_st.st_mode)) {
- errmsg(V_WARNING, _("%s: Not a regular file, "
- "skipping"), pair->src_name);
- goto error;
- }
-
- if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
- // Setuid and setgid files are rejected even
- // with --force. This is good for security
- // (hopefully) but it's a bit weird to reject
- // file when --force was given. At least this
- // matches gzip's behavior.
- errmsg(V_WARNING, _("%s: File has setuid or "
- "setgid bit set, skipping"),
- pair->src_name);
- goto error;
- }
-
- if (!opt_force && (pair->src_st.st_mode & S_ISVTX)) {
- errmsg(V_WARNING, _("%s: File has sticky bit "
- "set, skipping"),
- pair->src_name);
- goto error;
- }
+ if (pair->src_st.st_mode & S_ISVTX) {
+ message_warning(_("%s: File has sticky bit "
+ "set, skipping"),
+ pair->src_name);
+ goto error;
+ }
- if (pair->src_st.st_nlink > 1) {
- errmsg(V_WARNING, _("%s: Input file has more "
- "than one hard link, "
- "skipping"), pair->src_name);
- goto error;
- }
+ if (pair->src_st.st_nlink > 1) {
+ message_warning(_("%s: Input file has more "
+ "than one hard link, "
+ "skipping"), pair->src_name);
+ goto error;
}
}
- return 0;
+ return false;
+error_msg:
+ message_error("%s: %s", pair->src_name, strerror(errno));
error:
(void)close(pair->src_fd);
- return -1;
+ return true;
}
@@ -383,65 +407,73 @@ error:
static void
io_close_src(file_pair *pair, bool success)
{
- if (pair->src_fd == STDIN_FILENO || pair->src_fd == -1)
- return;
-
- if (close(pair->src_fd)) {
- errmsg(V_ERROR, _("%s: Closing the file failed: %s"),
- pair->src_name, strerror(errno));
- } else if (success && !opt_keep_original) {
- io_unlink(pair->dir_fd, pair->src_name, pair->src_st.st_ino);
+ if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
+ // If we are going to unlink(), do it before closing the file.
+ // This way there's no risk that someone replaces the file and
+ // happens to get same inode number, which would make us
+ // unlink() wrong file.
+ if (success && !opt_keep_original)
+ io_unlink(pair->src_name, &pair->src_st);
+
+ (void)close(pair->src_fd);
}
return;
}
-static int
+static bool
io_open_dest(file_pair *pair)
{
if (opt_stdout || pair->src_fd == STDIN_FILENO) {
// We don't modify or free() this.
pair->dest_name = (char *)"(stdout)";
pair->dest_fd = STDOUT_FILENO;
+ return false;
+ }
- // Synchronize the order in which files get written to stdout.
- // Unlocking the mutex is safe, because opening the file_pair
- // can no longer fail.
- while (stdout_in_use)
- pthread_cond_wait(&stdout_cond, &mutex);
+ pair->dest_name = suffix_get_dest_name(pair->src_name);
+ if (pair->dest_name == NULL)
+ return true;
- stdout_in_use = true;
+ // If --force was used, unlink the target file first.
+ if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
+ message_error("%s: Cannot unlink: %s",
+ pair->dest_name, strerror(errno));
+ free(pair->dest_name);
+ return true;
+ }
- } else {
- pair->dest_name = get_dest_name(pair->src_name);
- if (pair->dest_name == NULL)
- return -1;
-
- // This cannot fail, because get_dest_name() doesn't return
- // invalid names.
- const char *filename = str_filename(pair->dest_name);
- assert(filename != NULL);
-
- pair->dest_fd = open(filename, O_WRONLY | O_NOCTTY | O_CREAT
- | (opt_force ? O_TRUNC : O_EXCL),
- S_IRUSR | S_IWUSR);
- if (pair->dest_fd == -1) {
- errmsg(V_ERROR, "%s: %s", pair->dest_name,
+ if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
+ message_error("%s: Cannot unlink: %s", pair->dest_name,
+ strerror(errno));
+ free(pair->dest_name);
+ return true;
+ }
+
+ // Open the file.
+ const int flags = O_WRONLY | O_NOCTTY | O_CREAT | O_EXCL;
+ const mode_t mode = S_IRUSR | S_IWUSR;
+ pair->dest_fd = open(pair->dest_name, flags, mode);
+
+ if (pair->dest_fd == -1) {
+ // Don't bother with error message if user requested
+ // us to exit anyway.
+ if (!user_abort)
+ message_error("%s: %s", pair->dest_name,
strerror(errno));
- free(pair->dest_name);
- return -1;
- }
- // If this really fails... well, we have a safe fallback.
- struct stat st;
- if (fstat(pair->dest_fd, &st))
- pair->dest_ino = 0;
- else
- pair->dest_ino = st.st_ino;
+ free(pair->dest_name);
+ return true;
}
- return 0;
+ // If this really fails... well, we have a safe fallback.
+ if (fstat(pair->dest_fd, &pair->dest_st)) {
+ pair->dest_st.st_dev = 0;
+ pair->dest_st.st_ino = 0;
+ }
+
+ return false;
}
@@ -455,22 +487,16 @@ io_open_dest(file_pair *pair)
static int
io_close_dest(file_pair *pair, bool success)
{
- if (pair->dest_fd == -1)
+ if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
return 0;
- if (pair->dest_fd == STDOUT_FILENO) {
- stdout_in_use = false;
- pthread_cond_signal(&stdout_cond);
- return 0;
- }
-
if (close(pair->dest_fd)) {
- errmsg(V_ERROR, _("%s: Closing the file failed: %s"),
+ message_error(_("%s: Closing the file failed: %s"),
pair->dest_name, strerror(errno));
// Closing destination file failed, so we cannot trust its
// contents. Get rid of junk:
- io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino);
+ io_unlink(pair->dest_name, &pair->dest_st);
free(pair->dest_name);
return -1;
}
@@ -478,7 +504,7 @@ io_close_dest(file_pair *pair, bool success)
// If the operation using this file wasn't successful, we git rid
// of the junk file.
if (!success)
- io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino);
+ io_unlink(pair->dest_name, &pair->dest_st);
free(pair->dest_name);
@@ -492,98 +518,63 @@ io_open(const char *src_name)
if (is_empty_filename(src_name))
return NULL;
- file_pair *pair = malloc(sizeof(file_pair));
- if (pair == NULL) {
- out_of_memory();
- return NULL;
- }
+ // Since we have only one file open at a time, we can use
+ // a statically allocated structure.
+ static file_pair pair;
- *pair = (file_pair){
+ pair = (file_pair){
.src_name = src_name,
.dest_name = NULL,
- .dir_fd = -1,
.src_fd = -1,
.dest_fd = -1,
.src_eof = false,
};
- pthread_mutex_lock(&mutex);
-
- ++open_pairs;
-
- if (io_open_dir(pair))
- goto error_dir;
-
- if (io_open_src(pair))
- goto error_src;
-
- if (user_abort || io_open_dest(pair))
- goto error_dest;
-
- pthread_mutex_unlock(&mutex);
+ // Block the signals, for which we have a custom signal handler, so
+ // that we don't need to worry about EINTR.
+ signals_block();
+
+ file_pair *ret = NULL;
+ if (!io_open_src(&pair)) {
+ // io_open_src() may have unblocked the signals temporarily,
+ // and thus user_abort may have got set even if open()
+ // succeeded.
+ if (user_abort || io_open_dest(&pair))
+ io_close_src(&pair, false);
+ else
+ ret = &pair;
+ }
- return pair;
+ signals_unblock();
-error_dest:
- io_close_src(pair, false);
-error_src:
- io_close_dir(pair);
-error_dir:
- --open_pairs;
- pthread_mutex_unlock(&mutex);
- free(pair);
- return NULL;
+ return ret;
}
-/// \brief Closes the file descriptors and frees the structure
extern void
io_close(file_pair *pair, bool success)
{
+ signals_block();
+
if (success && pair->dest_fd != STDOUT_FILENO)
io_copy_attrs(pair);
// Close the destination first. If it fails, we must not remove
// the source file!
- if (!io_close_dest(pair, success)) {
- // Closing destination file succeeded. Remove the source file
- // if the operation using this file pair was successful
- // and we haven't been requested to keep the source file.
- io_close_src(pair, success);
- } else {
- // We don't care if operation using this file pair was
- // successful or not, since closing the destination file
- // failed. Don't remove the original file.
- io_close_src(pair, false);
- }
-
- io_close_dir(pair);
+ if (io_close_dest(pair, success))
+ success = false;
- free(pair);
-
- pthread_mutex_lock(&mutex);
-
- if (--open_pairs == 0)
- pthread_cond_signal(&io_cond);
+ // Close the source file, and unlink it if the operation using this
+ // file pair was successful and we haven't requested to keep the
+ // source file.
+ io_close_src(pair, success);
- pthread_mutex_unlock(&mutex);
+ signals_unblock();
return;
}
-/// \brief Reads from a file to a buffer
-///
-/// \param pair File pair having the sourcefile open for reading
-/// \param buf Destination buffer to hold the read data
-/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
-///
-/// \return On success, number of bytes read is returned. On end of
-/// file zero is returned and pair->src_eof set to true.
-/// On error, SIZE_MAX is returned and error message printed.
-///
-/// \note This does no locking, thus two threads must not read from
-/// the same file. This no problem in this program.
extern size_t
io_read(file_pair *pair, uint8_t *buf, size_t size)
{
@@ -608,7 +599,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size)
continue;
}
- errmsg(V_ERROR, _("%s: Read error: %s"),
+ message_error(_("%s: Read error: %s"),
pair->src_name, strerror(errno));
// FIXME Is this needed?
@@ -625,18 +616,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size)
}
-/// \brief Writes a buffer to a file
-///
-/// \param pair File pair having the destination file open for writing
-/// \param buf Buffer containing the data to be written
-/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
-///
-/// \return On success, zero is returned. On error, -1 is returned
-/// and error message printed.
-///
-/// \note This does no locking, thus two threads must not write to
-/// the same file. This no problem in this program.
-extern int
+extern bool
io_write(const file_pair *pair, const uint8_t *buf, size_t size)
{
assert(size < SSIZE_MAX);
@@ -660,18 +640,19 @@ io_write(const file_pair *pair, const uint8_t *buf, size_t size)
// GNU bash).
//
// We don't do anything special with --quiet, which
- // is what bzip2 does too. However, we print a
- // message if --verbose was used (or should that
- // only be with double --verbose i.e. debugging?).
- errmsg(errno == EPIPE ? V_VERBOSE : V_ERROR,
- _("%s: Write error: %s"),
+ // is what bzip2 does too. If we get SIGPIPE, we
+ // will handle it like other signals by setting
+ // user_abort, and get EPIPE here.
+ if (errno != EPIPE)
+ message_error(_("%s: Write error: %s"),
pair->dest_name, strerror(errno));
- return -1;
+
+ return true;
}
buf += (size_t)(amount);
size -= (size_t)(amount);
}
- return 0;
+ return false;
}
diff --git a/src/lzma/io.h b/src/lzma/io.h
index d1aa17f4..4d8e61b2 100644
--- a/src/lzma/io.h
+++ b/src/lzma/io.h
@@ -22,6 +22,8 @@
#include "private.h"
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
#if BUFSIZ <= 1024
# define IO_BUFFER_SIZE 8192
#else
@@ -30,31 +32,66 @@
typedef struct {
+ /// Name of the source filename (as given on the command line) or
+ /// pointer to static "(stdin)" when reading from standard input.
const char *src_name;
+
+ /// Destination filename converted from src_name or pointer to static
+ /// "(stdout)" when writing to standard output.
char *dest_name;
- int dir_fd;
+ /// File descriptor of the source file
int src_fd;
+
+ /// File descriptor of the target file
int dest_fd;
+ /// Stat of the source file.
struct stat src_st;
- ino_t dest_ino;
- bool src_eof;
-} file_pair;
+ /// Stat of the destination file.
+ struct stat dest_st;
+ /// True once end of the source file has been detected.
+ bool src_eof;
-extern void io_init(void);
+} file_pair;
-extern void io_finish(void);
+/// \brief Opens a file pair
extern file_pair *io_open(const char *src_name);
+
+/// \brief Closes the file descriptors and frees possible allocated memory
+///
+/// The success argument determines if source or destination file gets
+/// unlinked:
+/// - false: The destination file is unlinked.
+/// - true: The source file is unlinked unless writing to stdout or --keep
+/// was used.
extern void io_close(file_pair *pair, bool success);
+
+/// \brief Reads from the source file to a buffer
+///
+/// \param pair File pair having the source file open for reading
+/// \param buf Destination buffer to hold the read data
+/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
+///
+/// \return On success, number of bytes read is returned. On end of
+/// file zero is returned and pair->src_eof set to true.
+/// On error, SIZE_MAX is returned and error message printed.
extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size);
-extern int io_write(const file_pair *pair, const uint8_t *buf, size_t size);
+/// \brief Writes a buffer to the destination file
+///
+/// \param pair File pair having the destination file open for writing
+/// \param buf Buffer containing the data to be written
+/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
+///
+/// \return On success, zero is returned. On error, -1 is returned
+/// and error message printed.
+extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size);
#endif
diff --git a/src/lzma/main.c b/src/lzma/main.c
index 02891193..4e24b98d 100644
--- a/src/lzma/main.c
+++ b/src/lzma/main.c
@@ -21,16 +21,30 @@
#include "open_stdxxx.h"
#include <ctype.h>
-static sig_atomic_t exit_signal = 0;
+
+volatile sig_atomic_t user_abort = false;
+
+/// Exit status to use. This can be changed with set_exit_status().
+static enum exit_status_type exit_status = E_SUCCESS;
+
+/// If we were interrupted by a signal, we store the signal number so that
+/// we can raise that signal to kill the program when all cleanups have
+/// been done.
+static volatile sig_atomic_t exit_signal = 0;
+
+/// Mask of signals for which have have established a signal handler to set
+/// user_abort to true.
+static sigset_t hooked_signals;
+
+/// signals_block() and signals_unblock() can be called recursively.
+static size_t signals_block_count = 0;
static void
signal_handler(int sig)
{
- // FIXME Is this thread-safe together with main()?
exit_signal = sig;
-
- user_abort = 1;
+ user_abort = true;
return;
}
@@ -38,116 +52,226 @@ signal_handler(int sig)
static void
establish_signal_handlers(void)
{
- struct sigaction sa;
- sa.sa_handler = &signal_handler;
- sigfillset(&sa.sa_mask);
- sa.sa_flags = 0;
-
+ // List of signals for which we establish the signal handler.
static const int sigs[] = {
- SIGHUP,
SIGINT,
- SIGPIPE,
SIGTERM,
+#ifdef SIGHUP
+ SIGHUP,
+#endif
+#ifdef SIGPIPE
+ SIGPIPE,
+#endif
+#ifdef SIGXCPU
SIGXCPU,
+#endif
+#ifdef SIGXFSZ
SIGXFSZ,
+#endif
};
- for (size_t i = 0; i < sizeof(sigs) / sizeof(sigs[0]); ++i) {
- if (sigaction(sigs[i], &sa, NULL)) {
- errmsg(V_ERROR, _("Cannot establish signal handlers"));
- my_exit(ERROR);
- }
+ // Mask of the signals for which we have established a signal handler.
+ sigemptyset(&hooked_signals);
+ for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i)
+ sigaddset(&hooked_signals, sigs[i]);
+
+ struct sigaction sa;
+
+ // All the signals that we handle we also blocked while the signal
+ // handler runs.
+ sa.sa_mask = hooked_signals;
+
+ // Don't set SA_RESTART, because we want EINTR so that we can check
+ // for user_abort and cleanup before exiting. We block the signals
+ // for which we have established a handler when we don't want EINTR.
+ sa.sa_flags = 0;
+ sa.sa_handler = &signal_handler;
+
+ for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) {
+ // If the parent process has left some signals ignored,
+ // we don't unignore them.
+ struct sigaction old;
+ if (sigaction(sigs[i], NULL, &old) == 0
+ && old.sa_handler == SIG_IGN)
+ continue;
+
+ // Establish the signal handler.
+ if (sigaction(sigs[i], &sa, NULL))
+ message_signal_handler();
}
- /*
- SIGINFO/SIGUSR1 for status reporting?
- */
+ return;
}
-static bool
-is_tty_stdin(void)
+extern void
+signals_block(void)
{
- const bool ret = isatty(STDIN_FILENO);
- if (ret) {
- // FIXME: Other threads may print between these lines.
- // Maybe that should be fixed. Not a big issue in practice.
- errmsg(V_ERROR, _("Compressed data not read from "
- "a terminal."));
- errmsg(V_ERROR, _("Use `--force' to force decompression."));
- show_try_help();
+ if (signals_block_count++ == 0) {
+ const int saved_errno = errno;
+ sigprocmask(SIG_BLOCK, &hooked_signals, NULL);
+ errno = saved_errno;
}
- return ret;
+ return;
}
-static bool
-is_tty_stdout(void)
+extern void
+signals_unblock(void)
{
- const bool ret = isatty(STDOUT_FILENO);
- if (ret) {
- errmsg(V_ERROR, _("Compressed data not written to "
- "a terminal."));
- errmsg(V_ERROR, _("Use `--force' to force compression."));
- show_try_help();
+ assert(signals_block_count > 0);
+
+ if (--signals_block_count == 0) {
+ const int saved_errno = errno;
+ sigprocmask(SIG_UNBLOCK, &hooked_signals, NULL);
+ errno = saved_errno;
}
- return ret;
+ return;
}
-static char *
-read_name(void)
+extern void
+set_exit_status(enum exit_status_type new_status)
{
- size_t size = 256;
- size_t pos = 0;
- char *name = malloc(size);
- if (name == NULL) {
- out_of_memory();
- return NULL;
+ assert(new_status == E_WARNING || new_status == E_ERROR);
+
+ if (exit_status != E_ERROR)
+ exit_status = new_status;
+
+ return;
+}
+
+
+extern void
+my_exit(enum exit_status_type status)
+{
+ // Close stdout. If something goes wrong, print an error message
+ // to stderr.
+ {
+ const int ferror_err = ferror(stdout);
+ const int fclose_err = fclose(stdout);
+ if (ferror_err || fclose_err) {
+ // If it was fclose() that failed, we have the reason
+ // in errno. If only ferror() indicated an error,
+ // we have no idea what the reason was.
+ message(V_ERROR, _("Writing to standard output "
+ "failed: %s"),
+ fclose_err ? strerror(errno)
+ : _("Unknown error"));
+ status = E_ERROR;
+ }
+ }
+
+ // Close stderr. If something goes wrong, there's nothing where we
+ // could print an error message. Just set the exit status.
+ {
+ const int ferror_err = ferror(stderr);
+ const int fclose_err = fclose(stderr);
+ if (fclose_err || ferror_err)
+ status = E_ERROR;
}
- while (true) {
- const int c = fgetc(opt_files_file);
- if (c == EOF) {
- free(name);
-
- if (ferror(opt_files_file))
- errmsg(V_ERROR, _("%s: Error reading "
- "filenames: %s"),
- opt_files_name,
- strerror(errno));
- else if (pos != 0)
- errmsg(V_ERROR, _("%s: Unexpected end of "
- "input when reading "
- "filenames"), opt_files_name);
+ // If we have got a signal, raise it to kill the program.
+ const int sig = exit_signal;
+ if (sig != 0) {
+ struct sigaction sa;
+ sa.sa_handler = SIG_DFL;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sigaction(sig, &sa, NULL);
+ raise(exit_signal);
+ // If, for some weird reason, the signal doesn't kill us,
+ // we safely fall to the exit below.
+ }
+
+ exit(status);
+}
+
+
+static const char *
+read_name(const args_info *args)
+{
+ // FIXME: Maybe we should have some kind of memory usage limit here
+ // like the tool has for the actual compression and uncompression.
+ // Giving some huge text file with --files0 makes us to read the
+ // whole file in RAM.
+ static char *name = NULL;
+ static size_t size = 256;
+
+ // Allocate the initial buffer. This is never freed, since after it
+ // is no longer needed, the program exits very soon. It is safe to
+ // use xmalloc() and xrealloc() in this function, because while
+ // executing this function, no files are open for writing, and thus
+ // there's no need to cleanup anything before exiting.
+ if (name == NULL)
+ name = xmalloc(size);
+
+ // Write position in name
+ size_t pos = 0;
+
+ // Read one character at a time into name.
+ while (!user_abort) {
+ const int c = fgetc(args->files_file);
+
+ if (ferror(args->files_file)) {
+ // Take care of EINTR since we have established
+ // the signal handlers already.
+ if (errno == EINTR)
+ continue;
+
+ message_error(_("%s: Error reading filenames: %s"),
+ args->files_name, strerror(errno));
return NULL;
}
- if (c == '\0' || c == opt_files_split)
- break;
+ if (feof(args->files_file)) {
+ if (pos != 0)
+ message_error(_("%s: Unexpected end of input "
+ "when reading filenames"),
+ args->files_name);
+
+ return NULL;
+ }
+
+ if (c == args->files_delim) {
+ // We allow consecutive newline (--files) or '\0'
+ // characters (--files0), and ignore such empty
+ // filenames.
+ if (pos == 0)
+ continue;
+
+ // A non-empty name was read. Terminate it with '\0'
+ // and return it.
+ name[pos] = '\0';
+ return name;
+ }
+
+ if (c == '\0') {
+ // A null character was found when using --files,
+ // which expects plain text input separated with
+ // newlines.
+ message_error(_("%s: Null character found when "
+ "reading filenames; maybe you meant "
+ "to use `--files0' instead "
+ "of `--files'?"), args->files_name);
+ return NULL;
+ }
name[pos++] = c;
+ // Allocate more memory if needed. There must always be space
+ // at least for one character to allow terminating the string
+ // with '\0'.
if (pos == size) {
size *= 2;
- char *tmp = realloc(name, size);
- if (tmp == NULL) {
- free(name);
- out_of_memory();
- return NULL;
- }
-
- name = tmp;
+ name = xrealloc(name, size);
}
}
- if (name != NULL)
- name[pos] = '\0';
-
- return name;
+ return NULL;
}
@@ -158,35 +282,56 @@ main(int argc, char **argv)
// a valid file descriptor. Exit immediatelly with exit code ERROR
// if we cannot make the file descriptors valid. Maybe we should
// print an error message, but our stderr could be screwed anyway.
- open_stdxxx(ERROR);
+ open_stdxxx(E_ERROR);
- // Set the program invocation name used in various messages.
- argv0 = argv[0];
+ // This has to be done before calling any liblzma functions.
+ lzma_init();
- setlocale(LC_ALL, "en_US.UTF-8");
+ // Set up the locale.
+ setlocale(LC_ALL, "");
+
+#ifdef ENABLE_NLS
+ // Set up the message translations too.
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
+#endif
+
+ // Set the program invocation name used in various messages, and
+ // do other message handling related initializations.
+ message_init(argv[0]);
// Set hardware-dependent default values. These can be overriden
// on the command line, thus this must be done before parse_args().
hardware_init();
- char **files = parse_args(argc, argv);
-
- if (opt_mode == MODE_COMPRESS && opt_stdout && is_tty_stdout())
- return ERROR;
-
- if (opt_mode == MODE_COMPRESS)
- lzma_init_encoder();
+ // Parse the command line arguments and get an array of filenames.
+ // This doesn't return if something is wrong with the command line
+ // arguments. If there are no arguments, one filename ("-") is still
+ // returned to indicate stdin.
+ args_info args;
+ args_parse(&args, argc, argv);
+
+ // Tell the message handling code how many input files there are if
+ // we know it. This way the progress indicator can show it.
+ if (args.files_name != NULL)
+ message_set_files(0);
else
- lzma_init_decoder();
-
- io_init();
- process_init();
+ message_set_files(args.arg_count);
+
+ // Refuse to write compressed data to standard output if it is
+ // a terminal and --force wasn't used.
+ if (opt_mode == MODE_COMPRESS) {
+ if (opt_stdout || (args.arg_count == 1
+ && strcmp(args.arg_names[0], "-") == 0)) {
+ if (is_tty_stdout()) {
+ message_try_help();
+ my_exit(E_ERROR);
+ }
+ }
+ }
if (opt_mode == MODE_LIST) {
- errmsg(V_ERROR, "--list is not implemented yet.");
- my_exit(ERROR);
+ message_fatal("--list is not implemented yet.");
}
// Hook the signal handlers. We don't need these before we start
@@ -194,60 +339,63 @@ main(int argc, char **argv)
// line arguments.
establish_signal_handlers();
- while (*files != NULL && !user_abort) {
- if (strcmp("-", *files) == 0) {
+ // Process the files given on the command line. Note that if no names
+ // were given, parse_args() gave us a fake "-" filename.
+ for (size_t i = 0; i < args.arg_count && !user_abort; ++i) {
+ if (strcmp("-", args.arg_names[i]) == 0) {
+ // Processing from stdin to stdout. Unless --force
+ // was used, check that we aren't writing compressed
+ // data to a terminal or reading it from terminal.
if (!opt_force) {
if (opt_mode == MODE_COMPRESS) {
- if (is_tty_stdout()) {
- ++files;
+ if (is_tty_stdout())
continue;
- }
} else if (is_tty_stdin()) {
- ++files;
continue;
}
}
- if (opt_files_name == stdin_filename) {
- errmsg(V_ERROR, _("Cannot read data from "
+ // It doesn't make sense to compress data from stdin
+ // if we are supposed to read filenames from stdin
+ // too (enabled with --files or --files0).
+ if (args.files_name == stdin_filename) {
+ message_error(_("Cannot read data from "
"standard input when "
"reading filenames "
"from standard input"));
- ++files;
continue;
}
- *files = (char *)stdin_filename;
+ // Replace the "-" with a special pointer, which is
+ // recognized by process_file() and other things.
+ // This way error messages get a proper filename
+ // string and the code still knows that it is
+ // handling the special case of stdin.
+ args.arg_names[i] = (char *)stdin_filename;
}
- process_file(*files++);
+ // Do the actual compression or uncompression.
+ process_file(args.arg_names[i]);
}
- if (opt_files_name != NULL) {
+ // If --files or --files0 was used, process the filenames from the
+ // given file or stdin. Note that here we don't consider "-" to
+ // indicate stdin like we do with the command line arguments.
+ if (args.files_name != NULL) {
+ // read_name() checks for user_abort so we don't need to
+ // check it as loop termination condition.
while (true) {
- char *name = read_name();
+ const char *name = read_name(&args);
if (name == NULL)
break;
- if (name[0] != '\0')
- process_file(name);
-
- free(name);
+ // read_name() doesn't return empty names.
+ assert(name[0] != '\0');
+ process_file(name);
}
- if (opt_files_name != stdin_filename)
- (void)fclose(opt_files_file);
- }
-
- io_finish();
-
- if (exit_signal != 0) {
- struct sigaction sa;
- sa.sa_handler = SIG_DFL;
- sigfillset(&sa.sa_mask);
- sa.sa_flags = 0;
- sigaction(exit_signal, &sa, NULL);
- raise(exit_signal);
+ if (args.files_name != stdin_filename)
+ (void)fclose(args.files_file);
}
my_exit(exit_status);
diff --git a/src/lzma/main.h b/src/lzma/main.h
new file mode 100644
index 00000000..1e369425
--- /dev/null
+++ b/src/lzma/main.h
@@ -0,0 +1,60 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file main.h
+/// \brief Miscellanous declarations
+//
+// Copyright (C) 2008 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef MAIN_H
+#define MAIN_H
+
+/// Possible exit status values. These are the same as used by gzip and bzip2.
+enum exit_status_type {
+ E_SUCCESS = 0,
+ E_ERROR = 1,
+ E_WARNING = 2,
+};
+
+
+/// If this is true, we will clean up the possibly incomplete output file,
+/// return to main() as soon as practical. That is, the code needs to poll
+/// this variable in various places.
+extern volatile sig_atomic_t user_abort;
+
+
+/// Block the signals which don't have SA_RESTART and which would just set
+/// user_abort to true. This is handy when we don't want to handle EINTR
+/// and don't want SA_RESTART either.
+extern void signals_block(void);
+
+
+/// Unblock the signals blocked by signals_block().
+extern void signals_unblock(void);
+
+
+/// Sets the exit status after a warning or error has occurred. If new_status
+/// is EX_WARNING and the old exit status was already EX_ERROR, the exit
+/// status is not changed.
+extern void set_exit_status(enum exit_status_type new_status);
+
+
+/// Exits the program using the given status. This takes care of closing
+/// stdin, stdout, and stderr and catches possible errors. If we had got
+/// a signal, this function will raise it so that to the parent process it
+/// appears that we were killed by the signal sent by the user.
+extern void my_exit(enum exit_status_type status) lzma_attribute((noreturn));
+
+
+#endif
diff --git a/src/lzma/message.c b/src/lzma/message.c
new file mode 100644
index 00000000..caba9fbc
--- /dev/null
+++ b/src/lzma/message.c
@@ -0,0 +1,892 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file message.c
+/// \brief Printing messages to stderr
+//
+// Copyright (C) 2007-2008 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#if defined(HAVE_SYS_TIME_H)
+# include <sys/time.h>
+#elif defined(SIGALRM)
+// FIXME
+#endif
+
+#include <stdarg.h>
+
+
+/// Name of the program which is prefixed to the error messages.
+static const char *argv0;
+
+/// Number of the current file
+static unsigned int files_pos = 0;
+
+/// Total number of input files; zero if unknown.
+static unsigned int files_total;
+
+/// Verbosity level
+static enum message_verbosity verbosity = V_WARNING;
+
+/// Filename which we will print with the verbose messages
+static const char *filename;
+
+/// True once the a filename has been printed to stderr as part of progress
+/// message. If automatic progress updating isn't enabled, this becomes true
+/// after the first progress message has been printed due to user sending
+/// SIGALRM. Once this variable is true, we will print an empty line before
+/// the next filename to make the output more readable.
+static bool first_filename_printed = false;
+
+/// This is set to true when we have printed the current filename to stderr
+/// as part of a progress message. This variable is useful only if not
+/// updating progress automatically: if user sends many SIGALRM signals,
+/// we won't print the name of the same file multiple times.
+static bool current_filename_printed = false;
+
+/// True if we should print progress indicator and update it automatically.
+static bool progress_automatic;
+
+/// This is true when a progress message was printed and the cursor is still
+/// on the same line with the progress message. In that case, a newline has
+/// to be printed before any error messages.
+static bool progress_active = false;
+
+/// Expected size of the input stream is needed to show completion percentage
+/// and estimate remaining time.
+static uint64_t expected_in_size;
+
+/// Time when we started processing the file
+static double start_time;
+
+/// The signal handler for SIGALRM sets this to true. It is set back to false
+/// once the progress message has been updated.
+static volatile sig_atomic_t progress_needs_updating = false;
+
+
+/// Signal handler for SIGALRM
+static void
+progress_signal_handler(int sig lzma_attribute((unused)))
+{
+ progress_needs_updating = true;
+ return;
+}
+
+
+/// Get the current time as double
+static double
+my_time(void)
+{
+ struct timeval tv;
+
+ // This really shouldn't fail. I'm not sure what to return if it
+ // still fails. It doesn't look so useful to check the return value
+ // everywhere. FIXME?
+ if (gettimeofday(&tv, NULL))
+ return -1.0;
+
+ return (double)(tv.tv_sec) + (double)(tv.tv_usec) / 1.0e9;
+}
+
+
+/// Wrapper for snprintf() to help constructing a string in pieces.
+static void /* lzma_attribute((format(printf, 3, 4))) */
+my_snprintf(char **pos, size_t *left, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ const int len = vsnprintf(*pos, *left, fmt, ap);
+ va_end(ap);
+
+ // If an error occurred, we want the caller to think that the whole
+ // buffer was used. This way no more data will be written to the
+ // buffer. We don't need better error handling here.
+ if (len < 0 || (size_t)(len) >= *left) {
+ *left = 0;
+ } else {
+ *pos += len;
+ *left -= len;
+ }
+
+ return;
+}
+
+
+extern void
+message_init(const char *given_argv0)
+{
+ // Name of the program
+ argv0 = given_argv0;
+
+ // If --verbose is used, we use a progress indicator if and only
+ // if stderr is a terminal. If stderr is not a terminal, we print
+ // verbose information only after finishing the file. As a special
+ // exception, even if --verbose was not used, user can send SIGALRM
+ // to make us print progress information once without automatic
+ // updating.
+ progress_automatic = isatty(STDERR_FILENO);
+
+/*
+ if (progress_automatic) {
+ // stderr is a terminal. Check the COLUMNS environment
+ // variable to see if the terminal is wide enough. If COLUMNS
+ // doesn't exist or it has some unparseable value, we assume
+ // that the terminal is wide enough.
+ const char *columns_str = getenv("COLUMNS");
+ uint64_t columns;
+ if (columns_str != NULL
+ && !str_to_uint64_raw(&columns, columns_str)
+ && columns < 80)
+ progress_automatic = false;
+ }
+*/
+
+#ifdef SIGALRM
+ // Establish the signal handler for SIGALRM. Since this signal
+ // doesn't require any quick action, we set SA_RESTART.
+ struct sigaction sa;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ sa.sa_handler = &progress_signal_handler;
+ if (sigaction(SIGALRM, &sa, NULL))
+ message_signal_handler();
+#endif
+
+ return;
+}
+
+
+extern void
+message_verbosity_increase(void)
+{
+ if (verbosity < V_DEBUG)
+ ++verbosity;
+
+ return;
+}
+
+
+extern void
+message_verbosity_decrease(void)
+{
+ if (verbosity > V_SILENT)
+ --verbosity;
+
+ return;
+}
+
+
+extern void
+message_set_files(unsigned int files)
+{
+ files_total = files;
+ return;
+}
+
+
+/// Prints the name of the current file if it hasn't been printed already,
+/// except if we are processing exactly one stream from stdin to stdout.
+/// I think it looks nicer to not print "(stdin)" when --verbose is used
+/// in a pipe and no other files are processed.
+static void
+print_filename(void)
+{
+ if (!current_filename_printed
+ && (files_total != 1 || filename != stdin_filename)) {
+ signals_block();
+
+ // If a file was already processed, put an empty line
+ // before the next filename to improve readability.
+ if (first_filename_printed)
+ fputc('\n', stderr);
+
+ first_filename_printed = true;
+ current_filename_printed = true;
+
+ // If we don't know how many files there will be due
+ // to usage of --files or --files0.
+ if (files_total == 0)
+ fprintf(stderr, "%s (%u)\n", filename,
+ files_pos);
+ else
+ fprintf(stderr, "%s (%u/%u)\n", filename,
+ files_pos, files_total);
+
+ signals_unblock();
+ }
+
+ return;
+}
+
+
+extern void
+message_progress_start(const char *src_name, uint64_t in_size)
+{
+ // Store the processing start time of the file and its expected size.
+ // If we aren't printing any statistics, then these are unused. But
+ // since it is possible that the user tells us with SIGALRM to show
+ // statistics, we need to have these available anyway.
+ start_time = my_time();
+ filename = src_name;
+ expected_in_size = in_size;
+
+ // Indicate the name of this file hasn't been printed to
+ // stderr yet.
+ current_filename_printed = false;
+
+ // Start numbering the files starting from one.
+ ++files_pos;
+
+ // If progress indicator is wanted, print the filename and possibly
+ // the file count now. As an exception, if there is exactly one file,
+ // do not print the filename at all.
+ if (verbosity >= V_VERBOSE && progress_automatic) {
+ // Print the filename to stderr if that is appropriate with
+ // the current settings.
+ print_filename();
+
+ // Start the timer to set progress_needs_updating to true
+ // after about one second. An alternative would to be set
+ // progress_needs_updating to true here immediatelly, but
+ // setting the timer looks better to me, since extremely
+ // early progress info is pretty much useless.
+ alarm(1);
+ }
+
+ return;
+}
+
+
+/// Make the string indicating completion percentage.
+static const char *
+progress_percentage(uint64_t in_pos)
+{
+ // If the size of the input file is unknown or the size told us is
+ // clearly wrong since we have processed more data than the alleged
+ // size of the file, show a static string indicating that we have
+ // no idea of the completion percentage.
+ if (expected_in_size == 0 || in_pos > expected_in_size)
+ return "--- %";
+
+ static char buf[sizeof("99.9 %")];
+
+ // Never show 100.0 % before we actually are finished (that case is
+ // handled separately in message_progress_end()).
+ snprintf(buf, sizeof(buf), "%.1f %%",
+ (double)(in_pos) / (double)(expected_in_size) * 99.9);
+
+ return buf;
+}
+
+
+static void
+progress_sizes_helper(char **pos, size_t *left, uint64_t value, bool final)
+{
+ if (final) {
+ // At maximum of four digits is allowed for exact byte count.
+ if (value < 10000) {
+ my_snprintf(pos, left, "%'" PRIu64 " B", value);
+ return;
+ }
+
+// // At maximum of four significant digits is allowed for KiB.
+// if (value < UINT64_C(1023900)) {
+ // At maximum of five significant digits is allowed for KiB.
+ if (value < UINT64_C(10239900)) {
+ my_snprintf(pos, left, "%'.1f KiB",
+ (double)(value) / 1024.0);
+ return;
+ }
+ }
+
+ // Otherwise we use MiB.
+ my_snprintf(pos, left, "%'.1f MiB",
+ (double)(value) / (1024.0 * 1024.0));
+ return;
+}
+
+
+/// Make the string containing the amount of input processed, amount of
+/// output produced, and the compression ratio.
+static const char *
+progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final)
+{
+ // This is enough to hold sizes up to about 99 TiB if thousand
+ // separator is used, or about 1 PiB without thousand separator.
+ // After that the progress indicator will look a bit silly, since
+ // the compression ratio no longer fits with three decimal places.
+ static char buf[44];
+
+ char *pos = buf;
+ size_t left = sizeof(buf);
+
+ // Print the sizes. If this the final message, use more reasonable
+ // units than MiB if the file was small.
+ progress_sizes_helper(&pos, &left, compressed_pos, final);
+ my_snprintf(&pos, &left, " / ");
+ progress_sizes_helper(&pos, &left, uncompressed_pos, final);
+
+ // Avoid division by zero. If we cannot calculate the ratio, set
+ // it to some nice number greater than 10.0 so that it gets caught
+ // in the next if-clause.
+ const double ratio = uncompressed_pos > 0
+ ? (double)(compressed_pos) / (double)(uncompressed_pos)
+ : 16.0;
+
+ // If the ratio is very bad, just indicate that it is greater than
+ // 9.999. This way the length of the ratio field stays fixed.
+ if (ratio > 9.999)
+ snprintf(pos, left, " > %.3f", 9.999);
+ else
+ snprintf(pos, left, " = %.3f", ratio);
+
+ return buf;
+}
+
+
+/// Make the string containing the processing speed of uncompressed data.
+static const char *
+progress_speed(uint64_t uncompressed_pos, double elapsed)
+{
+ // Don't print the speed immediatelly, since the early values look
+ // like somewhat random.
+ if (elapsed < 3.0)
+ return "";
+
+ static const char unit[][8] = {
+ "KiB/s",
+ "MiB/s",
+ "GiB/s",
+ };
+
+ size_t unit_index = 0;
+
+ // Calculate the speed as KiB/s.
+ double speed = (double)(uncompressed_pos) / (elapsed * 1024.0);
+
+ // Adjust the unit of the speed if needed.
+ while (speed > 999.9) {
+ speed /= 1024.0;
+ if (++unit_index == ARRAY_SIZE(unit))
+ return ""; // Way too fast ;-)
+ }
+
+ static char buf[sizeof("999.9 GiB/s")];
+ snprintf(buf, sizeof(buf), "%.1f %s", speed, unit[unit_index]);
+ return buf;
+}
+
+
+/// Make a string indicating elapsed or remaining time. The format is either
+/// M:SS or H:MM:SS depending on if the time is an hour or more.
+static const char *
+progress_time(uint32_t seconds)
+{
+ // 9999 hours = 416 days
+ static char buf[sizeof("9999:59:59")];
+
+ // Don't show anything if the time is zero or ridiculously big.
+ if (seconds == 0 || seconds > ((UINT32_C(9999) * 60) + 59) * 60 + 59)
+ return "";
+
+ uint32_t minutes = seconds / 60;
+ seconds %= 60;
+
+ if (minutes >= 60) {
+ const uint32_t hours = minutes / 60;
+ minutes %= 60;
+ snprintf(buf, sizeof(buf),
+ "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32,
+ hours, minutes, seconds);
+ } else {
+ snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32,
+ minutes, seconds);
+ }
+
+ return buf;
+}
+
+
+/// Make the string to contain the estimated remaining time, or if the amount
+/// of input isn't known, how much time has elapsed.
+static const char *
+progress_remaining(uint64_t in_pos, double elapsed)
+{
+ // If we don't know the size of the input, we indicate the time
+ // spent so far.
+ if (expected_in_size == 0 || in_pos > expected_in_size)
+ return progress_time((uint32_t)(elapsed));
+
+ // If we are at the very beginning of the file or the file is very
+ // small, don't give any estimate to avoid far too wrong estimations.
+ if (in_pos < (UINT64_C(1) << 19) || elapsed < 8.0)
+ return "";
+
+ // Calculate the estimate. Don't give an estimate of zero seconds,
+ // since it is possible that all the input has been already passed
+ // to the library, but there is still quite a bit of output pending.
+ uint32_t remaining = (double)(expected_in_size - in_pos)
+ * elapsed / (double)(in_pos);
+ if (remaining == 0)
+ remaining = 1;
+
+ return progress_time(remaining);
+}
+
+
+extern void
+message_progress_update(uint64_t in_pos, uint64_t out_pos)
+{
+ // If there's nothing to do, return immediatelly.
+ if (!progress_needs_updating || in_pos == 0)
+ return;
+
+ // Print the filename if it hasn't been printed yet.
+ print_filename();
+
+ // Calculate how long we have been processing this file.
+ const double elapsed = my_time() - start_time;
+
+ // Set compressed_pos and uncompressed_pos.
+ uint64_t compressed_pos;
+ uint64_t uncompressed_pos;
+ if (opt_mode == MODE_COMPRESS) {
+ compressed_pos = out_pos;
+ uncompressed_pos = in_pos;
+ } else {
+ compressed_pos = in_pos;
+ uncompressed_pos = out_pos;
+ }
+
+ signals_block();
+
+ // Print the actual progress message. The idea is that there is at
+ // least three spaces between the fields in typical situations, but
+ // even in rare situations there is at least one space.
+ fprintf(stderr, " %7s %43s %11s %10s\r",
+ progress_percentage(in_pos),
+ progress_sizes(compressed_pos, uncompressed_pos, false),
+ progress_speed(uncompressed_pos, elapsed),
+ progress_remaining(in_pos, elapsed));
+
+ // Updating the progress info was finished. Reset
+ // progress_needs_updating to wait for the next SIGALRM.
+ //
+ // NOTE: This has to be done before alarm() call or with (very) bad
+ // luck we could be setting this to false after the alarm has already
+ // been triggered.
+ progress_needs_updating = false;
+
+ if (progress_automatic) {
+ // Mark that the progress indicator is active, so if an error
+ // occurs, the error message gets printed cleanly.
+ progress_active = true;
+
+ // Restart the timer so that progress_needs_updating gets
+ // set to true after about one second.
+ alarm(1);
+ } else {
+ // The progress message was printed because user had sent us
+ // SIGALRM. In this case, each progress message is printed
+ // on its own line.
+ fputc('\n', stderr);
+ }
+
+ signals_unblock();
+
+ return;
+}
+
+
+extern void
+message_progress_end(uint64_t in_pos, uint64_t out_pos, bool success)
+{
+ // If we are not in verbose mode, we have nothing to do.
+ if (verbosity < V_VERBOSE || user_abort)
+ return;
+
+ // Cancel a pending alarm, if any.
+ if (progress_automatic) {
+ alarm(0);
+ progress_active = false;
+ }
+
+ const double elapsed = my_time() - start_time;
+
+ uint64_t compressed_pos;
+ uint64_t uncompressed_pos;
+ if (opt_mode == MODE_COMPRESS) {
+ compressed_pos = out_pos;
+ uncompressed_pos = in_pos;
+ } else {
+ compressed_pos = in_pos;
+ uncompressed_pos = out_pos;
+ }
+
+ // If it took less than a second, don't display the time.
+ const char *elapsed_str = progress_time((double)(elapsed));
+
+ signals_block();
+
+ // When using the auto-updating progress indicator, the final
+ // statistics are printed in the same format as the progress
+ // indicator itself.
+ if (progress_automatic && in_pos > 0) {
+ // Using floating point conversion for the percentage instead
+ // of static "100.0 %" string, because the decimal separator
+ // isn't a dot in all locales.
+ fprintf(stderr, " %5.1f %% %43s %11s %10s\n",
+ 100.0,
+ progress_sizes(compressed_pos, uncompressed_pos, true),
+ progress_speed(uncompressed_pos, elapsed),
+ elapsed_str);
+
+ // When no automatic progress indicator is used, don't print a verbose
+ // message at all if we something went wrong and we couldn't produce
+ // any output. If we did produce output, then it is sometimes useful
+ // to tell that to the user, especially if we detected an error after
+ // a time-consuming operation.
+ } else if (success || out_pos > 0) {
+ // The filename and size information are always printed.
+ fprintf(stderr, "%s: %s", filename, progress_sizes(
+ compressed_pos, uncompressed_pos, true));
+
+ // The speed and elapsed time aren't always shown.
+ const char *speed = progress_speed(uncompressed_pos, elapsed);
+ if (speed[0] != '\0')
+ fprintf(stderr, ", %s", speed);
+
+ if (elapsed_str[0] != '\0')
+ fprintf(stderr, ", %s", elapsed_str);
+
+ fputc('\n', stderr);
+ }
+
+ signals_unblock();
+
+ return;
+}
+
+
+static void
+vmessage(enum message_verbosity v, const char *fmt, va_list ap)
+{
+ if (v <= verbosity) {
+ signals_block();
+
+ // If there currently is a progress message on the screen,
+ // print a newline so that the progress message is left
+ // readable. This is good, because it is nice to be able to
+ // see where the error occurred. (The alternative would be
+ // to clear the progress message and replace it with the
+ // error message.)
+ if (progress_active) {
+ progress_active = false;
+ fputc('\n', stderr);
+ }
+
+ fprintf(stderr, "%s: ", argv0);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+
+ signals_unblock();
+ }
+
+ return;
+}
+
+
+extern void
+message(enum message_verbosity v, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(v, fmt, ap);
+ va_end(ap);
+ return;
+}
+
+
+extern void
+message_warning(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(V_WARNING, fmt, ap);
+ va_end(ap);
+
+ set_exit_status(E_WARNING);
+ return;
+}
+
+
+extern void
+message_error(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(V_ERROR, fmt, ap);
+ va_end(ap);
+
+ set_exit_status(E_ERROR);
+ return;
+}
+
+
+extern void
+message_fatal(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(V_ERROR, fmt, ap);
+ va_end(ap);
+
+ my_exit(E_ERROR);
+}
+
+
+extern void
+message_bug(void)
+{
+ message_fatal(_("Internal error (bug)"));
+}
+
+
+extern void
+message_signal_handler(void)
+{
+ message_fatal(_("Cannot establish signal handlers"));
+}
+
+
+extern const char *
+message_strm(lzma_ret code)
+{
+ switch (code) {
+ case LZMA_NO_CHECK:
+ return _("No integrity check; not verifying file integrity");
+
+ case LZMA_UNSUPPORTED_CHECK:
+ return _("Unsupported type of integrity check; "
+ "not verifying file integrity");
+
+ case LZMA_MEM_ERROR:
+ return strerror(ENOMEM);
+
+ case LZMA_MEMLIMIT_ERROR:
+ return _("Memory usage limit reached");
+
+ case LZMA_FORMAT_ERROR:
+ return _("File format not recognized");
+
+ case LZMA_OPTIONS_ERROR:
+ return _("Unsupported options");
+
+ case LZMA_DATA_ERROR:
+ return _("Compressed data is corrupt");
+
+ case LZMA_BUF_ERROR:
+ return _("Unexpected end of input");
+
+ case LZMA_OK:
+ case LZMA_STREAM_END:
+ case LZMA_GET_CHECK:
+ case LZMA_PROG_ERROR:
+ return _("Internal error (bug)");
+ }
+
+ return NULL;
+}
+
+
+extern void
+message_try_help(void)
+{
+ // Print this with V_WARNING instead of V_ERROR to prevent it from
+ // showing up when --quiet has been specified.
+ message(V_WARNING, _("Try `%s --help' for more information."), argv0);
+ return;
+}
+
+
+extern void
+message_version(void)
+{
+ // It is possible that liblzma version is different than the command
+ // line tool version, so print both.
+ printf("xz " PACKAGE_VERSION "\n");
+ printf("liblzma %s\n", lzma_version_string());
+ my_exit(E_SUCCESS);
+}
+
+
+extern void
+message_help(bool long_help)
+{
+ printf(_("Usage: %s [OPTION]... [FILE]...\n"
+ "Compress or decompress FILEs in the .xz format.\n\n"),
+ argv0);
+
+ puts(_("Mandatory arguments to long options are mandatory for "
+ "short options too.\n"));
+
+ if (long_help)
+ puts(_(" Operation mode:\n"));
+
+ puts(_(
+" -z, --compress force compression\n"
+" -d, --decompress force decompression\n"
+" -t, --test test compressed file integrity\n"
+" -l, --list list information about files"));
+
+ if (long_help)
+ puts(_("\n Operation modifiers:\n"));
+
+ puts(_(
+" -k, --keep keep (don't delete) input files\n"
+" -f, --force force overwrite of output file and (de)compress links\n"
+" -c, --stdout write to standard output and don't delete input files"));
+
+ if (long_help)
+ puts(_(
+" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
+" --files=[FILE] read filenames to process from FILE; if FILE is\n"
+" omitted, filenames are read from the standard input;\n"
+" filenames must be terminated with the newline character\n"
+" --files0=[FILE] like --files but use the null character as terminator"));
+
+ if (long_help) {
+ puts(_("\n Basic file format and compression options:\n"));
+ puts(_(
+" -F, --format=FMT file format to encode or decode; possible values are\n"
+" `auto' (default), `xz', `lzma', and `raw'\n"
+" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n"
+" or `sha256'"));
+ }
+
+ puts(_(
+" -p, --preset=NUM compression preset: 1-2 fast compression, 3-6 good\n"
+" compression, 7-9 excellent compression; default is 7"));
+
+ puts(_(
+" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
+" the default setting, which depends on the operation mode\n"
+" and the amount of physical memory (RAM)"));
+
+ if (long_help) {
+ puts(_(
+"\n Custom filter chain for compression (alternative for using presets):"));
+
+#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \
+ || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
+ puts(_(
+"\n"
+" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n"
+" --lzma2=[OPTS] more of the following options (valid values; default):\n"
+" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n"
+" lc=NUM number of literal context bits (0-4; 3)\n"
+" lp=NUM number of literal position bits (0-4; 0)\n"
+" pb=NUM number of position bits (0-4; 2)\n"
+" mode=MODE compression mode (fast, normal; normal)\n"
+" nice=NUM nice length of a match (2-273; 64)\n"
+" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n"
+" depth=NUM maximum search depth; 0=automatic (default)"));
+#endif
+
+ puts(_(
+"\n"
+" --x86 x86 filter (sometimes called BCJ filter)\n"
+" --powerpc PowerPC (big endian) filter\n"
+" --ia64 IA64 (Itanium) filter\n"
+" --arm ARM filter\n"
+" --armthumb ARM-Thumb filter\n"
+" --sparc SPARC filter"));
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+ puts(_(
+"\n"
+" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n"
+" dist=NUM distance between bytes being subtracted\n"
+" from each other (1-256; 1)"));
+#endif
+
+#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK)
+ puts(_(
+"\n"
+" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n"
+" size=NUM number of bytes of data per subblock\n"
+" (1 - 256Mi; 4Ki)\n"
+" rle=NUM run-length encoder chunk size (0-256; 0)"));
+#endif
+ }
+
+/*
+ if (long_help)
+ puts(_(
+"\n"
+" Resource usage options:\n"
+"\n"
+" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
+" the default setting, which depends on the operation mode\n"
+" and the amount of physical memory (RAM)\n"
+" -T, --threads=NUM use a maximum of NUM (de)compression threads"
+// " --threading=STR threading style; possible values are `auto' (default),\n"
+// " `files', and `stream'
+));
+*/
+ if (long_help)
+ puts(_("\n Other options:\n"));
+
+ puts(_(
+" -q, --quiet suppress warnings; specify twice to suppress errors too\n"
+" -v, --verbose be verbose; specify twice for even more verbose"));
+
+ if (long_help)
+ puts(_(
+"\n"
+" -h, --help display the short help (lists only the basic options)\n"
+" -H, --long-help display this long help"));
+ else
+ puts(_(
+" -h, --help display this short help\n"
+" -H, --long-help display the long help (lists also the advanced options)"));
+
+ puts(_(
+" -V, --version display the version number"));
+
+ puts(_("\nWith no FILE, or when FILE is -, read standard input.\n"));
+
+ if (long_help) {
+ // FIXME !!!
+ size_t mem_limit = hardware_memlimit_encoder() / (1024 * 1024);
+ if (mem_limit == 0)
+ mem_limit = 1;
+
+ // We use PRIu64 instead of %zu to support pre-C99 libc.
+ // FIXME: Use ' but avoid warnings.
+ puts(_("On this system and configuration, the tool will use"));
+ printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"),
+ (uint64_t)(mem_limit));
+ printf(N_(" * at maximum of one thread for (de)compression.\n\n",
+ " * at maximum of %" PRIu64
+ " threads for (de)compression.\n\n",
+ (uint64_t)(opt_threads)), (uint64_t)(opt_threads));
+ }
+
+ printf(_("Report bugs to <%s> (in English or Finnish).\n"),
+ PACKAGE_BUGREPORT);
+
+ my_exit(E_SUCCESS);
+}
diff --git a/src/lzma/message.h b/src/lzma/message.h
new file mode 100644
index 00000000..7ef9b165
--- /dev/null
+++ b/src/lzma/message.h
@@ -0,0 +1,132 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file message.h
+/// \brief Printing messages to stderr
+//
+// Copyright (C) 2007-2008 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef MESSAGE_H
+#define MESSAGE_H
+
+
+/// Verbosity levels
+enum message_verbosity {
+ V_SILENT, ///< No messages
+ V_ERROR, ///< Only error messages
+ V_WARNING, ///< Errors and warnings
+ V_VERBOSE, ///< Errors, warnings, and verbose statistics
+ V_DEBUG, ///< Debugging, FIXME remove?
+};
+
+
+/// \brief Initializes the message functions
+///
+/// \param argv0 Name of the program i.e. argv[0] from main()
+/// \param verbosity Verbosity level
+///
+/// If an error occurs, this function doesn't return.
+///
+extern void message_init(const char *argv0);
+
+
+/// Increase verbosity level by one step unless it was at maximum.
+extern void message_verbosity_increase(void);
+
+/// Decrease verbosity level by one step unless it was at minimum.
+extern void message_verbosity_decrease(void);
+
+
+/// Set the total number of files to be processed (stdin is counted as a file
+/// here). The default is one.
+extern void message_set_files(unsigned int files);
+
+
+/// \brief Print a message if verbosity level is at least "verbosity"
+///
+/// This doesn't touch the exit status.
+extern void message(enum message_verbosity verbosity, const char *fmt, ...)
+ lzma_attribute((format(printf, 2, 3)));
+
+
+/// \brief Prints a warning and possibly sets exit status
+///
+/// The message is printed only if verbosity level is at least V_WARNING.
+/// The exit status is set to WARNING unless it was already at ERROR.
+extern void message_warning(const char *fmt, ...)
+ lzma_attribute((format(printf, 1, 2)));
+
+
+/// \brief Prints an error message and sets exit status
+///
+/// The message is printed only if verbosity level is at least V_ERROR.
+/// The exit status is set to ERROR.
+extern void message_error(const char *fmt, ...)
+ lzma_attribute((format(printf, 1, 2)));
+
+
+/// \brief Prints an error message and exits with EXIT_ERROR
+///
+/// The message is printed only if verbosity level is at least V_ERROR.
+extern void message_fatal(const char *fmt, ...)
+ lzma_attribute((format(printf, 1, 2)))
+ lzma_attribute((noreturn));
+
+
+/// Print an error message that an internal error occurred and exit with
+/// EXIT_ERROR.
+extern void message_bug(void) lzma_attribute((noreturn));
+
+
+/// Print a message that establishing signal handlers failed, and exit with
+/// exit status ERROR.
+extern void message_signal_handler(void) lzma_attribute((noreturn));
+
+
+/// Converts lzma_ret to a string.
+extern const char *message_strm(lzma_ret code);
+
+
+/// Print a message that user should try --help.
+extern void message_try_help(void);
+
+
+/// Prints the version number to stdout and exits with exit status SUCCESS.
+extern void message_version(void) lzma_attribute((noreturn));
+
+
+/// Print the help message.
+extern void message_help(bool long_help) lzma_attribute((noreturn));
+
+
+///
+extern void message_progress_start(const char *filename, uint64_t in_size);
+
+
+///
+extern void message_progress_update(uint64_t in_pos, uint64_t out_pos);
+
+
+/// \brief Finishes the progress message if we were in verbose mode
+///
+/// \param in_pos Final input position i.e. how much input there was.
+/// \param out_pos Final output position
+/// \param success True if the operation was successful. We don't
+/// print the final progress message if the operation
+/// wasn't successful.
+///
+extern void message_progress_end(
+ uint64_t in_pos, uint64_t out_pos, bool success);
+
+#endif
diff --git a/src/lzma/options.c b/src/lzma/options.c
index f5ebdd8e..77ebddd6 100644
--- a/src/lzma/options.c
+++ b/src/lzma/options.c
@@ -79,11 +79,9 @@ parse_options(const char *str, const option_map *opts,
if (value != NULL)
*value++ = '\0';
- if (value == NULL || value[0] == '\0') {
- errmsg(V_ERROR, _("%s: Options must be `name=value' "
+ if (value == NULL || value[0] == '\0')
+ message_fatal(_("%s: Options must be `name=value' "
"pairs separated with commas"), str);
- my_exit(ERROR);
- }
// Look for the option name from the option map.
bool found = false;
@@ -106,11 +104,9 @@ parse_options(const char *str, const option_map *opts,
break;
}
- if (opts[i].map[j].name == NULL) {
- errmsg(V_ERROR, _("%s: Invalid option "
+ if (opts[i].map[j].name == NULL)
+ message_fatal(_("%s: Invalid option "
"value"), value);
- my_exit(ERROR);
- }
set(filter_options, i, opts[i].map[j].id);
}
@@ -119,10 +115,8 @@ parse_options(const char *str, const option_map *opts,
break;
}
- if (!found) {
- errmsg(V_ERROR, _("%s: Invalid option name"), name);
- my_exit(ERROR);
- }
+ if (!found)
+ message_fatal(_("%s: Invalid option name"), name);
if (split == NULL)
break;
@@ -168,7 +162,7 @@ set_subblock(void *options, uint32_t key, uint64_t value)
extern lzma_options_subblock *
-parse_options_subblock(const char *str)
+options_subblock(const char *str)
{
static const option_map opts[] = {
{ "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN,
@@ -217,7 +211,7 @@ set_delta(void *options, uint32_t key, uint64_t value)
extern lzma_options_delta *
-parse_options_delta(const char *str)
+options_delta(const char *str)
{
static const option_map opts[] = {
{ "dist", NULL, LZMA_DELTA_DIST_MIN,
@@ -225,7 +219,7 @@ parse_options_delta(const char *str)
{ NULL, NULL, 0, 0 }
};
- lzma_options_delta *options = xmalloc(sizeof(lzma_options_subblock));
+ lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta));
*options = (lzma_options_delta){
// It's hard to give a useful default for this.
.type = LZMA_DELTA_TYPE_BYTE,
@@ -296,7 +290,7 @@ set_lzma(void *options, uint32_t key, uint64_t value)
extern lzma_options_lzma *
-parse_options_lzma(const char *str)
+options_lzma(const char *str)
{
static const name_id_map modes[] = {
{ "fast", LZMA_MODE_FAST },
@@ -345,18 +339,14 @@ parse_options_lzma(const char *str)
parse_options(str, opts, &set_lzma, options);
- if (options->lc + options->lp > LZMA_LCLP_MAX) {
- errmsg(V_ERROR, "The sum of lc and lp must be at "
- "maximum of 4");
- exit(ERROR);
- }
+ if (options->lc + options->lp > LZMA_LCLP_MAX)
+ message_fatal(_("The sum of lc and lp must be at "
+ "maximum of 4"));
const uint32_t nice_len_min = options->mf & 0x0F;
- if (options->nice_len < nice_len_min) {
- errmsg(V_ERROR, "The selected match finder requires at "
- "least nice=%" PRIu32, nice_len_min);
- exit(ERROR);
- }
+ if (options->nice_len < nice_len_min)
+ message_fatal(_("The selected match finder requires at "
+ "least nice=%" PRIu32), nice_len_min);
return options;
}
diff --git a/src/lzma/options.h b/src/lzma/options.h
index 885c5969..4253ac3c 100644
--- a/src/lzma/options.h
+++ b/src/lzma/options.h
@@ -27,20 +27,20 @@
///
/// \return Pointer to allocated options structure.
/// Doesn't return on error.
-extern lzma_options_subblock *parse_options_subblock(const char *str);
+extern lzma_options_subblock *options_subblock(const char *str);
/// \brief Parser for Delta options
///
/// \return Pointer to allocated options structure.
/// Doesn't return on error.
-extern lzma_options_delta *parse_options_delta(const char *str);
+extern lzma_options_delta *options_delta(const char *str);
/// \brief Parser for LZMA options
///
/// \return Pointer to allocated options structure.
/// Doesn't return on error.
-extern lzma_options_lzma *parse_options_lzma(const char *str);
+extern lzma_options_lzma *options_lzma(const char *str);
#endif
diff --git a/src/lzma/private.h b/src/lzma/private.h
index f6a75645..b463a08e 100644
--- a/src/lzma/private.h
+++ b/src/lzma/private.h
@@ -22,32 +22,30 @@
#include "sysdefs.h"
-#ifdef HAVE_ERRNO_H
-# include <errno.h>
-#else
-extern int errno;
-#endif
-
+#include <sys/types.h>
#include <sys/stat.h>
+#include <errno.h>
#include <signal.h>
-#include <pthread.h>
#include <locale.h>
#include <stdio.h>
-#include <fcntl.h>
#include <unistd.h>
-#include "gettext.h"
-#define _(msgid) gettext(msgid)
-#define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n)
+#ifdef ENABLE_NLS
+# include <libintl.h>
+# define _(msgid) gettext(msgid)
+# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n)
+#else
+# define _(msgid) (msgid)
+# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2))
+#endif
-#include "alloc.h"
+#include "main.h"
+#include "process.h"
+#include "message.h"
#include "args.h"
-#include "error.h"
#include "hardware.h"
-#include "help.h"
#include "io.h"
#include "options.h"
-#include "process.h"
#include "suffix.h"
#include "util.h"
diff --git a/src/lzma/process.c b/src/lzma/process.c
index fc4ef96a..d30878e4 100644
--- a/src/lzma/process.c
+++ b/src/lzma/process.c
@@ -20,137 +20,158 @@
#include "private.h"
-typedef struct {
- lzma_stream strm;
- void *options;
+enum operation_mode opt_mode = MODE_COMPRESS;
- file_pair *pair;
+enum format_type opt_format = FORMAT_AUTO;
- /// We don't need this for *anything* but seems that at least with
- /// glibc pthread_create() doesn't allow NULL.
- pthread_t thread;
- bool in_use;
+/// Stream used to communicate with liblzma
+static lzma_stream strm = LZMA_STREAM_INIT;
-} thread_data;
+/// Filters needed for all encoding all formats, and also decoding in raw data
+static lzma_filter filters[LZMA_FILTERS_MAX + 1];
+/// Number of filters. Zero indicates that we are using a preset.
+static size_t filters_count = 0;
-/// Number of available threads
-static size_t free_threads;
+/// Number of the preset (1-9)
+static size_t preset_number = 7;
-/// Thread-specific data
-static thread_data *threads;
+/// Indicate if no preset has been given. In that case, we will auto-adjust
+/// the compression preset so that it doesn't use too much RAM.
+// FIXME
+static bool preset_default = true;
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+/// Integrity check type
+static lzma_check check = LZMA_CHECK_CRC64;
-/// Attributes of new coder threads. They are created in detached state.
-/// Coder threads signal to the service thread themselves when they are done.
-static pthread_attr_t thread_attr;
+extern void
+coder_set_check(lzma_check new_check)
+{
+ check = new_check;
+ return;
+}
-//////////
-// Init //
-//////////
extern void
-process_init(void)
+coder_set_preset(size_t new_preset)
{
- threads = malloc(sizeof(thread_data) * opt_threads);
- if (threads == NULL) {
- out_of_memory();
- my_exit(ERROR);
- }
+ preset_number = new_preset;
+ preset_default = false;
+ return;
+}
- for (size_t i = 0; i < opt_threads; ++i)
- memzero(&threads[i], sizeof(threads[0]));
- if (pthread_attr_init(&thread_attr)
- || pthread_attr_setdetachstate(
- &thread_attr, PTHREAD_CREATE_DETACHED)) {
- out_of_memory();
- my_exit(ERROR);
- }
+extern void
+coder_add_filter(lzma_vli id, void *options)
+{
+ if (filters_count == LZMA_FILTERS_MAX)
+ message_fatal(_("Maximum number of filters is four"));
- free_threads = opt_threads;
+ filters[filters_count].id = id;
+ filters[filters_count].options = options;
+ ++filters_count;
return;
}
-//////////////////////////
-// Thread-specific data //
-//////////////////////////
-
-static thread_data *
-get_thread_data(void)
+extern void
+coder_set_compression_settings(void)
{
- pthread_mutex_lock(&mutex);
+ // Options for LZMA1 or LZMA2 in case we are using a preset.
+ static lzma_options_lzma opt_lzma;
+
+ if (filters_count == 0) {
+ // We are using a preset. This is not a good idea in raw mode
+ // except when playing around with things. Different versions
+ // of this software may use different options in presets, and
+ // thus make uncompressing the raw data difficult.
+ if (opt_format == FORMAT_RAW) {
+ // The message is shown only if warnings are allowed
+ // but the exit status isn't changed.
+ message(V_WARNING, _("Using a preset in raw mode "
+ "is discouraged."));
+ message(V_WARNING, _("The exact options of the "
+ "presets may vary between software "
+ "versions."));
+ }
- while (free_threads == 0) {
- pthread_cond_wait(&cond, &mutex);
+ // Get the preset for LZMA1 or LZMA2.
+ if (lzma_lzma_preset(&opt_lzma, preset_number))
+ message_bug();
- if (user_abort) {
- pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
- return NULL;
- }
+ // Use LZMA2 except with --format=lzma we use LZMA1.
+ filters[0].id = opt_format == FORMAT_LZMA
+ ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
+ filters[0].options = &opt_lzma;
+ filters_count = 1;
}
- thread_data *t = threads;
- while (t->in_use)
- ++t;
+ // Terminate the filter options array.
+ filters[filters_count].id = LZMA_VLI_UNKNOWN;
- t->in_use = true;
- --free_threads;
+ // If we are using the LZMA_Alone format, allow exactly one filter
+ // which has to be LZMA.
+ if (opt_format == FORMAT_LZMA && (filters_count != 1
+ || filters[0].id != LZMA_FILTER_LZMA1))
+ message_fatal(_("With --format=lzma only the LZMA1 filter "
+ "is supported"));
- pthread_mutex_unlock(&mutex);
-
- return t;
-}
+ // TODO: liblzma probably needs an API to validate the filter chain.
+ // If using --format=raw, we can be decoding.
+ uint64_t memory_usage;
+ uint64_t memory_limit;
+ if (opt_mode == MODE_COMPRESS) {
+ memory_usage = lzma_memusage_encoder(filters);
+ memory_limit = hardware_memlimit_encoder();
+ } else {
+ memory_usage = lzma_memusage_decoder(filters);
+ memory_limit = hardware_memlimit_decoder();
+ }
-static void
-release_thread_data(thread_data *t)
-{
- pthread_mutex_lock(&mutex);
+ if (memory_usage == UINT64_MAX)
+ message_bug();
- t->in_use = false;
- ++free_threads;
+ if (preset_default) {
+ // When no preset was explicitly requested, we use the default
+ // preset only if the memory usage limit allows. Otherwise we
+ // select a lower preset automatically.
+ while (memory_usage > memory_limit) {
+ if (preset_number == 1)
+ message_fatal(_("Memory usage limit is too "
+ "small for any internal "
+ "filter preset"));
- pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
+ if (lzma_lzma_preset(&opt_lzma, --preset_number))
+ message_bug();
- return;
-}
-
-
-static int
-create_thread(void *(*func)(thread_data *t), thread_data *t)
-{
- if (opt_threads == 1) {
- func(t);
- } else {
- const int err = pthread_create(&t->thread, &thread_attr,
- (void *(*)(void *))(func), t);
- if (err) {
- errmsg(V_ERROR, _("Cannot create a thread: %s"),
- strerror(err));
- user_abort = 1;
- return -1;
+ memory_usage = lzma_memusage_encoder(filters);
}
+ } else {
+ if (memory_usage > memory_limit)
+ message_fatal(_("Memory usage limit is too small "
+ "for the given filter setup"));
}
- return 0;
-}
+ // Limit the number of worked threads so that memory usage
+ // limit isn't exceeded.
+ assert(memory_usage > 0);
+ size_t thread_limit = memory_limit / memory_usage;
+ if (thread_limit == 0)
+ thread_limit = 1;
+ if (opt_threads > thread_limit)
+ opt_threads = thread_limit;
+
+ return;
+}
-/////////////////////////
-// One thread per file //
-/////////////////////////
-static int
-single_init(thread_data *t)
+static bool
+coder_init(void)
{
lzma_ret ret = LZMA_PROG_ERROR;
@@ -162,17 +183,15 @@ single_init(thread_data *t)
break;
case FORMAT_XZ:
- ret = lzma_stream_encoder(&t->strm,
- opt_filters, opt_check);
+ ret = lzma_stream_encoder(&strm, filters, check);
break;
case FORMAT_LZMA:
- ret = lzma_alone_encoder(&t->strm,
- opt_filters[0].options);
+ ret = lzma_alone_encoder(&strm, filters[0].options);
break;
case FORMAT_RAW:
- ret = lzma_raw_encoder(&t->strm, opt_filters);
+ ret = lzma_raw_encoder(&strm, filters);
break;
}
} else {
@@ -181,254 +200,192 @@ single_init(thread_data *t)
switch (opt_format) {
case FORMAT_AUTO:
- ret = lzma_auto_decoder(&t->strm, opt_memory, flags);
+ ret = lzma_auto_decoder(&strm,
+ hardware_memlimit_decoder(), flags);
break;
case FORMAT_XZ:
- ret = lzma_stream_decoder(&t->strm, opt_memory, flags);
+ ret = lzma_stream_decoder(&strm,
+ hardware_memlimit_decoder(), flags);
break;
case FORMAT_LZMA:
- ret = lzma_alone_decoder(&t->strm, opt_memory);
+ ret = lzma_alone_decoder(&strm,
+ hardware_memlimit_decoder());
break;
case FORMAT_RAW:
// Memory usage has already been checked in args.c.
- ret = lzma_raw_decoder(&t->strm, opt_filters);
+ // FIXME Comment
+ ret = lzma_raw_decoder(&strm, filters);
break;
}
}
if (ret != LZMA_OK) {
if (ret == LZMA_MEM_ERROR)
- out_of_memory();
+ message_error("%s", message_strm(LZMA_MEM_ERROR));
else
- internal_error();
+ message_bug();
- return -1;
+ return true;
}
- return 0;
+ return false;
}
-static void *
-single(thread_data *t)
+static bool
+coder_run(file_pair *pair)
{
- if (single_init(t)) {
- io_close(t->pair, false);
- release_thread_data(t);
- return NULL;
- }
+ // Buffers to hold input and output data.
+ uint8_t in_buf[IO_BUFFER_SIZE];
+ uint8_t out_buf[IO_BUFFER_SIZE];
+
+ // Initialize the progress indicator.
+ const uint64_t in_size = pair->src_st.st_size <= (off_t)(0)
+ ? 0 : (uint64_t)(pair->src_st.st_size);
+ message_progress_start(pair->src_name, in_size);
- uint8_t in_buf[BUFSIZ];
- uint8_t out_buf[BUFSIZ];
lzma_action action = LZMA_RUN;
lzma_ret ret;
- bool success = false;
- t->strm.avail_in = 0;
- t->strm.next_out = out_buf;
- t->strm.avail_out = BUFSIZ;
+ strm.avail_in = 0;
+ strm.next_out = out_buf;
+ strm.avail_out = IO_BUFFER_SIZE;
while (!user_abort) {
- if (t->strm.avail_in == 0 && !t->pair->src_eof) {
- t->strm.next_in = in_buf;
- t->strm.avail_in = io_read(t->pair, in_buf, BUFSIZ);
+ // Fill the input buffer if it is empty and we haven't reached
+ // end of file yet.
+ if (strm.avail_in == 0 && !pair->src_eof) {
+ strm.next_in = in_buf;
+ strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE);
- if (t->strm.avail_in == SIZE_MAX)
+ if (strm.avail_in == SIZE_MAX)
break;
- if (t->pair->src_eof)
+ // Encoder needs to know when we have given all the
+ // input to it. The decoders need to know it too when
+ // we are using LZMA_CONCATENATED.
+ if (pair->src_eof)
action = LZMA_FINISH;
}
- ret = lzma_code(&t->strm, action);
+ // Let liblzma do the actual work.
+ ret = lzma_code(&strm, action);
- if ((t->strm.avail_out == 0 || ret != LZMA_OK)
- && opt_mode != MODE_TEST) {
- if (io_write(t->pair, out_buf,
- BUFSIZ - t->strm.avail_out))
- break;
+ // Write out if the output buffer became full.
+ if (strm.avail_out == 0) {
+ if (opt_mode != MODE_TEST && io_write(pair, out_buf,
+ IO_BUFFER_SIZE - strm.avail_out))
+ return false;
- t->strm.next_out = out_buf;
- t->strm.avail_out = BUFSIZ;
+ strm.next_out = out_buf;
+ strm.avail_out = IO_BUFFER_SIZE;
}
if (ret != LZMA_OK) {
- // Check that there is no trailing garbage. This is
- // needed for LZMA_Alone and raw streams.
- if (ret == LZMA_STREAM_END && (t->strm.avail_in != 0
- || (!t->pair->src_eof && io_read(
- t->pair, in_buf, 1) != 0)))
- ret = LZMA_DATA_ERROR;
-
- if (ret != LZMA_STREAM_END) {
- errmsg(V_ERROR, "%s: %s", t->pair->src_name,
- str_strm_error(ret));
- break;
+ // Determine if the return value indicates that we
+ // won't continue coding.
+ const bool stop = ret != LZMA_NO_CHECK
+ && ret != LZMA_UNSUPPORTED_CHECK;
+
+ if (stop) {
+ // First print the final progress info.
+ // This way the user sees more accurately
+ // where the error occurred. Note that we
+ // print this *before* the possible error
+ // message.
+ //
+ // FIXME: What if something goes wrong
+ // after this?
+ message_progress_end(strm.total_in,
+ strm.total_out,
+ ret == LZMA_STREAM_END);
+
+ // Write the remaining bytes even if something
+ // went wrong, because that way the user gets
+ // as much data as possible, which can be good
+ // when trying to get at least some useful
+ // data out of damaged files.
+ if (opt_mode != MODE_TEST && io_write(pair,
+ out_buf, IO_BUFFER_SIZE
+ - strm.avail_out))
+ return false;
}
- assert(t->pair->src_eof);
- success = true;
- break;
- }
- }
-
- io_close(t->pair, success);
- release_thread_data(t);
-
- return NULL;
-}
+ if (ret == LZMA_STREAM_END) {
+ // Check that there is no trailing garbage.
+ // This is needed for LZMA_Alone and raw
+ // streams.
+ if (strm.avail_in == 0 && (pair->src_eof
+ || io_read(pair, in_buf, 1)
+ == 0)) {
+ assert(pair->src_eof);
+ return true;
+ }
+ // FIXME: What about io_read() failing?
-///////////////////////////////
-// Multiple threads per file //
-///////////////////////////////
-
-// TODO
-
-// I'm not sure what would the best way to implement this. Here's one
-// possible way:
-// - Reader thread would read the input data and control the coders threads.
-// - Every coder thread is associated with input and output buffer pools.
-// The input buffer pool is filled by reader thread, and the output buffer
-// pool is emptied by the writer thread.
-// - Writer thread writes the output data of the oldest living coder thread.
-//
-// The per-file thread started by the application's main thread is used as
-// the reader thread. In the beginning, it starts the writer thread and the
-// first coder thread. The coder thread would be left waiting for input from
-// the reader thread, and the writer thread would be waiting for input from
-// the coder thread.
-//
-// The reader thread reads the input data into a ring buffer, whose size
-// depends on the value returned by lzma_chunk_size(). If the ring buffer
-// gets full, the buffer is marked "to be finished", which indicates to
-// the coder thread that no more input is coming. Then a new coder thread
-// would be started.
-//
-// TODO
-
-/*
-typedef struct {
- /// Buffers
- uint8_t (*buffers)[BUFSIZ];
-
- /// Number of buffers
- size_t buffer_count;
-
- /// buffers[read_pos] is the buffer currently being read. Once finish
- /// is true and read_pos == write_pos, end of input has been reached.
- size_t read_pos;
-
- /// buffers[write_pos] is the buffer into which data is currently
- /// being written.
- size_t write_pos;
-
- /// This variable matters only when read_pos == write_pos && finish.
- /// In that case, this variable will contain the size of the
- /// buffers[read_pos].
- size_t last_size;
-
- /// True once no more data is being written to the buffer. When this
- /// is set, the last_size variable must have been set too.
- bool finish;
-
- /// Mutex to protect access to the variables in this structure
- pthread_mutex_t mutex;
-
- /// Condition to indicate when another thread can continue
- pthread_cond_t cond;
-} mem_pool;
-
-
-static foo
-multi_reader(thread_data *t)
-{
- bool done = false;
-
- do {
- const size_t size = io_read(t->pair,
- m->buffers + m->write_pos, BUFSIZ);
- if (size == SIZE_MAX) {
- // TODO
- } else if (t->pair->src_eof) {
- m->last_size = size;
- }
-
- pthread_mutex_lock(&m->mutex);
-
- if (++m->write_pos == m->buffer_count)
- m->write_pos = 0;
-
- if (m->write_pos == m->read_pos || t->pair->src_eof)
- m->finish = true;
-
- pthread_cond_signal(&m->cond);
- pthread_mutex_unlock(&m->mutex);
-
- } while (!m->finish);
-
- return done ? 0 : -1;
-}
-
-
-static foo
-multi_code()
-{
- lzma_action = LZMA_RUN;
-
- while (true) {
- pthread_mutex_lock(&m->mutex);
+ // We hadn't reached the end of the file.
+ ret = LZMA_DATA_ERROR;
+ assert(stop);
+ }
- while (m->read_pos == m->write_pos && !m->finish)
- pthread_cond_wait(&m->cond, &m->mutex);
+ // If we get here and stop is true, something went
+ // wrong and we print an error. Otherwise it's just
+ // a warning and coding can continue.
+ if (stop) {
+ message_error("%s: %s", pair->src_name,
+ message_strm(ret));
+ } else {
+ message_warning("%s: %s", pair->src_name,
+ message_strm(ret));
+
+ // When compressing, all possible errors set
+ // stop to true.
+ assert(opt_mode != MODE_COMPRESS);
+ }
- pthread_mutex_unlock(&m->mutex);
+ if (ret == LZMA_MEMLIMIT_ERROR) {
+ // Figure out how much memory would have
+ // actually needed.
+ // TODO
+ }
- if (m->finish) {
- t->strm.avail_in = m->last_size;
- if (opt_mode == MODE_COMPRESS)
- action = LZMA_FINISH;
- } else {
- t->strm.avail_in = BUFSIZ;
+ if (stop)
+ return false;
}
- t->strm.next_in = m->buffers + m->read_pos;
-
- const lzma_ret ret = lzma_code(&t->strm, action);
-
+ // Show progress information if --verbose was specified and
+ // stderr is a terminal.
+ message_progress_update(strm.total_in, strm.total_out);
}
-}
-
-*/
+ return false;
+}
-///////////////////////
-// Starting new file //
-///////////////////////
extern void
process_file(const char *filename)
{
- thread_data *t = get_thread_data();
- if (t == NULL)
- return; // User abort
-
- // If this fails, it shows appropriate error messages too.
- t->pair = io_open(filename);
- if (t->pair == NULL) {
- release_thread_data(t);
+ // First try initializing the coder. If it fails, it's useless to try
+ // opening the file. Check also for user_abort just in case if we had
+ // got a signal while initializing the coder.
+ if (coder_init() || user_abort)
return;
- }
- // TODO Currently only one-thread-per-file mode is implemented.
+ // Try to open the input and output files.
+ file_pair *pair = io_open(filename);
+ if (pair == NULL)
+ return;
- if (create_thread(&single, t)) {
- io_close(t->pair, false);
- release_thread_data(t);
- }
+ // Do the actual coding.
+ const bool success = coder_run(pair);
+
+ // Close the file pair. It needs to know if coding was successful to
+ // know if the source or target file should be unlinked.
+ io_close(pair, success);
return;
}
diff --git a/src/lzma/process.h b/src/lzma/process.h
index 7fdfbce6..de23eacb 100644
--- a/src/lzma/process.h
+++ b/src/lzma/process.h
@@ -23,6 +23,46 @@
#include "private.h"
+enum operation_mode {
+ MODE_COMPRESS,
+ MODE_DECOMPRESS,
+ MODE_TEST,
+ MODE_LIST,
+};
+
+
+// NOTE: The order of these is significant in suffix.c.
+enum format_type {
+ FORMAT_AUTO,
+ FORMAT_XZ,
+ FORMAT_LZMA,
+ // HEADER_GZIP,
+ FORMAT_RAW,
+};
+
+
+/// Operation mode of the command line tool. This is set in args.c and read
+/// in several files.
+extern enum operation_mode opt_mode;
+
+/// File format to use when encoding or what format(s) to accept when
+/// decoding. This is a global because it's needed also in suffix.c.
+/// This is set in args.c.
+extern enum format_type opt_format;
+
+
+/// Set the integrity check type used when compressing
+extern void coder_set_check(lzma_check check);
+
+/// Set preset number
+extern void coder_set_preset(size_t new_preset);
+
+/// Add a filter to the custom filter chain
+extern void coder_add_filter(lzma_vli id, void *options);
+
+///
+extern void coder_set_compression_settings(void);
+
extern void process_init(void);
extern void process_file(const char *filename);
diff --git a/src/lzma/suffix.c b/src/lzma/suffix.c
index 460acee2..0d46855a 100644
--- a/src/lzma/suffix.c
+++ b/src/lzma/suffix.c
@@ -20,6 +20,9 @@
#include "private.h"
+static char *custom_suffix = NULL;
+
+
struct suffix_pair {
const char *compressed;
const char *uncompressed;
@@ -74,8 +77,8 @@ uncompressed_name(const char *src_name, const size_t src_len)
if (opt_format == FORMAT_RAW) {
// Don't check for known suffixes when --format=raw was used.
- if (opt_suffix == NULL) {
- errmsg(V_ERROR, _("%s: With --format=raw, "
+ if (custom_suffix == NULL) {
+ message_error(_("%s: With --format=raw, "
"--suffix=.SUF is required unless "
"writing to stdout"), src_name);
return NULL;
@@ -91,21 +94,17 @@ uncompressed_name(const char *src_name, const size_t src_len)
}
}
- if (new_len == 0 && opt_suffix != NULL)
- new_len = test_suffix(opt_suffix, src_name, src_len);
+ if (new_len == 0 && custom_suffix != NULL)
+ new_len = test_suffix(custom_suffix, src_name, src_len);
if (new_len == 0) {
- errmsg(V_WARNING, _("%s: Filename has an unknown suffix, "
+ message_warning(_("%s: Filename has an unknown suffix, "
"skipping"), src_name);
return NULL;
}
const size_t new_suffix_len = strlen(new_suffix);
- char *dest_name = malloc(new_len + new_suffix_len + 1);
- if (dest_name == NULL) {
- out_of_memory();
- return NULL;
- }
+ char *dest_name = xmalloc(new_len + new_suffix_len + 1);
memcpy(dest_name, src_name, new_len);
memcpy(dest_name + new_len, new_suffix, new_suffix_len);
@@ -154,7 +153,7 @@ compressed_name(const char *src_name, const size_t src_len)
for (size_t i = 0; suffixes[i].compressed != NULL; ++i) {
if (test_suffix(suffixes[i].compressed, src_name, src_len)
!= 0) {
- errmsg(V_WARNING, _("%s: File already has `%s' "
+ message_warning(_("%s: File already has `%s' "
"suffix, skipping"), src_name,
suffixes[i].compressed);
return NULL;
@@ -163,22 +162,18 @@ compressed_name(const char *src_name, const size_t src_len)
// TODO: Hmm, maybe it would be better to validate this in args.c,
// since the suffix handling when decoding is weird now.
- if (opt_format == FORMAT_RAW && opt_suffix == NULL) {
- errmsg(V_ERROR, _("%s: With --format=raw, "
+ if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
+ message_error(_("%s: With --format=raw, "
"--suffix=.SUF is required unless "
"writing to stdout"), src_name);
return NULL;
}
- const char *suffix = opt_suffix != NULL
- ? opt_suffix : suffixes[0].compressed;
+ const char *suffix = custom_suffix != NULL
+ ? custom_suffix : suffixes[0].compressed;
const size_t suffix_len = strlen(suffix);
- char *dest_name = malloc(src_len + suffix_len + 1);
- if (dest_name == NULL) {
- out_of_memory();
- return NULL;
- }
+ char *dest_name = xmalloc(src_len + suffix_len + 1);
memcpy(dest_name, src_name, src_len);
memcpy(dest_name + src_len, suffix, suffix_len);
@@ -189,7 +184,7 @@ compressed_name(const char *src_name, const size_t src_len)
extern char *
-get_dest_name(const char *src_name)
+suffix_get_dest_name(const char *src_name)
{
assert(src_name != NULL);
@@ -201,3 +196,18 @@ get_dest_name(const char *src_name)
? compressed_name(src_name, src_len)
: uncompressed_name(src_name, src_len);
}
+
+
+extern void
+suffix_set(const char *suffix)
+{
+ // Empty suffix and suffixes having a slash are rejected. Such
+ // suffixes would break things later.
+ if (suffix[0] == '\0' || strchr(suffix, '/') != NULL)
+ message_fatal(_("%s: Invalid filename suffix"), optarg);
+
+ // Replace the old custom_suffix (if any) with the new suffix.
+ free(custom_suffix);
+ custom_suffix = xstrdup(suffix);
+ return;
+}
diff --git a/src/lzma/suffix.h b/src/lzma/suffix.h
index 08315659..c92b92dc 100644
--- a/src/lzma/suffix.h
+++ b/src/lzma/suffix.h
@@ -20,6 +20,21 @@
#ifndef SUFFIX_H
#define SUFFIX_H
-extern char *get_dest_name(const char *src_name);
+/// \brief Get the name of the destination file
+///
+/// Depending on the global variable opt_mode, this tries to find a matching
+/// counterpart for src_name. If the name can be constructed, it is allocated
+/// and returned (caller must free it). On error, a message is printed and
+/// NULL is returned.
+extern char *suffix_get_dest_name(const char *src_name);
+
+
+/// \brief Set a custom filename suffix
+///
+/// This function calls xstrdup() for the given suffix, thus the caller
+/// doesn't need to keep the memory allocated. There can be only one custom
+/// suffix, thus if this is called multiple times, the old suffixes are freed
+/// and forgotten.
+extern void suffix_set(const char *suffix);
#endif
diff --git a/src/lzma/util.c b/src/lzma/util.c
index 4bdbf8ec..13b67925 100644
--- a/src/lzma/util.c
+++ b/src/lzma/util.c
@@ -20,17 +20,29 @@
#include "private.h"
-/// \brief Fancy version of strtoull()
-///
-/// \param name Name of the option to show in case of an error
-/// \param value String containing the number to be parsed; may
-/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi"
-/// \param min Minimum valid value
-/// \param max Maximum valid value
-///
-/// \return Parsed value that is in the range [min, max]. Does not return
-/// if an error occurs.
-///
+extern void *
+xrealloc(void *ptr, size_t size)
+{
+ assert(size > 0);
+
+ ptr = realloc(ptr, size);
+ if (ptr == NULL)
+ message_fatal("%s", strerror(errno));
+
+ return ptr;
+}
+
+
+extern char *
+xstrdup(const char *src)
+{
+ assert(src != NULL);
+ const size_t size = strlen(src) + 1;
+ char *dest = xmalloc(size);
+ return memcpy(dest, src, size);
+}
+
+
extern uint64_t
str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
{
@@ -40,12 +52,9 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
while (*value == ' ' || *value == '\t')
++value;
- if (*value < '0' || *value > '9') {
- errmsg(V_ERROR, _("%s: Value is not a non-negative "
- "decimal integer"),
- value);
- my_exit(ERROR);
- }
+ if (*value < '0' || *value > '9')
+ message_fatal(_("%s: Value is not a non-negative "
+ "decimal integer"), value);
do {
// Don't overflow.
@@ -86,12 +95,11 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
}
if (multiplier == 0) {
- errmsg(V_ERROR, _("%s: Invalid multiplier suffix. "
+ message(V_ERROR, _("%s: Invalid multiplier suffix. "
"Valid suffixes:"), value);
- errmsg(V_ERROR, "`k' (10^3), `M' (10^6), `G' (10^9) "
+ message_fatal("`k' (10^3), `M' (10^6), `G' (10^9) "
"`Ki' (2^10), `Mi' (2^20), "
"`Gi' (2^30)");
- my_exit(ERROR);
}
// Don't overflow here either.
@@ -107,32 +115,10 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
return result;
error:
- errmsg(V_ERROR, _("Value of the option `%s' must be in the range "
+ message_fatal(_("Value of the option `%s' must be in the range "
"[%llu, %llu]"), name,
(unsigned long long)(min),
(unsigned long long)(max));
- my_exit(ERROR);
-}
-
-
-/// \brief Gets filename part from pathname+filename
-///
-/// \return Pointer in the filename where the actual filename starts.
-/// If the last character is a slash, NULL is returned.
-///
-extern const char *
-str_filename(const char *name)
-{
- const char *base = strrchr(name, '/');
-
- if (base == NULL) {
- base = name;
- } else if (*++base == '\0') {
- base = NULL;
- errmsg(V_ERROR, _("%s: Invalid filename"), name);
- }
-
- return base;
}
@@ -179,9 +165,35 @@ extern bool
is_empty_filename(const char *filename)
{
if (filename[0] == '\0') {
- errmsg(V_WARNING, _("Empty filename, skipping"));
+ message_error(_("Empty filename, skipping"));
return true;
}
return false;
}
+
+
+extern bool
+is_tty_stdin(void)
+{
+ const bool ret = isatty(STDIN_FILENO);
+
+ if (ret)
+ message_error(_("Compressed data not read from a terminal "
+ "unless `--force' is used."));
+
+ return ret;
+}
+
+
+extern bool
+is_tty_stdout(void)
+{
+ const bool ret = isatty(STDOUT_FILENO);
+
+ if (ret)
+ message_error(_("Compressed data not written to a terminal "
+ "unless `--force' is used."));
+
+ return ret;
+}
diff --git a/src/lzma/util.h b/src/lzma/util.h
index 91bd9ba3..dca62b26 100644
--- a/src/lzma/util.h
+++ b/src/lzma/util.h
@@ -20,13 +20,52 @@
#ifndef UTIL_H
#define UTIL_H
-#include "private.h"
+/// \brief Safe malloc() that never returns NULL
+///
+/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when
+/// there are files open for writing, that should be cleaned up
+/// before exiting.
+#define xmalloc(size) xrealloc(NULL, size)
+
+/// \brief Safe realloc() that never returns NULL
+extern void *xrealloc(void *ptr, size_t size);
+
+
+/// \brief Safe strdup() that never returns NULL
+extern char *xstrdup(const char *src);
+
+
+/// \brief Fancy version of strtoull()
+///
+/// \param name Name of the option to show in case of an error
+/// \param value String containing the number to be parsed; may
+/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi"
+/// \param min Minimum valid value
+/// \param max Maximum valid value
+///
+/// \return Parsed value that is in the range [min, max]. Does not return
+/// if an error occurs.
+///
extern uint64_t str_to_uint64(const char *name, const char *value,
uint64_t min, uint64_t max);
-extern const char *str_filename(const char *filename);
+/// \brief Check if filename is empty and print an error message
extern bool is_empty_filename(const char *filename);
+
+/// \brief Test if stdin is a terminal
+///
+/// If stdin is a terminal, an error message is printed and exit status set
+/// to EXIT_ERROR.
+extern bool is_tty_stdin(void);
+
+
+/// \brief Test if stdout is a terminal
+///
+/// If stdout is a terminal, an error message is printed and exit status set
+/// to EXIT_ERROR.
+extern bool is_tty_stdout(void);
+
#endif
diff --git a/src/lzmadec/lzmadec.c b/src/lzmadec/lzmadec.c
index eab00544..0b2adb97 100644
--- a/src/lzmadec/lzmadec.c
+++ b/src/lzmadec/lzmadec.c
@@ -19,12 +19,7 @@
#include "sysdefs.h"
-#ifdef HAVE_ERRNO_H
-# include <errno.h>
-#else
-extern int errno;
-#endif
-
+#include <errno.h>
#include <stdio.h>
#include <unistd.h>
@@ -65,7 +60,7 @@ static uint8_t out_buf[BUFSIZ];
static lzma_stream strm = LZMA_STREAM_INIT;
/// Number of bytes to use memory at maximum
-static size_t memlimit;
+static uint64_t memlimit;
/// Program name to be shown in error messages
static const char *argv0;
@@ -94,8 +89,8 @@ help(void)
" -d, --decompress (ignored)\n"
" -k, --keep (ignored)\n"
" -f, --force allow reading compressed data from a terminal\n"
-" -M, --memory=NUM use NUM bytes of memory at maximum; the suffixes\n"
-" k, M, G, Ki, Mi, and Gi are supported.\n"
+" -M, --memory=NUM use NUM bytes of memory at maximum (0 means default);\n"
+" the suffixes k, M, G, Ki, Mi, and Gi are supported.\n"
" --format=FMT accept only files in the given file format;\n"
" possible FMTs are `auto', `native', and alone',\n"
" -h, --help display this help and exit\n"
@@ -141,20 +136,14 @@ version(void)
static void
set_default_memlimit(void)
{
- uint64_t mem = physmem();
- if (mem != 0) {
- mem /= 3;
+ const uint64_t mem = physmem();
-#if UINT64_MAX > SIZE_MAX
- if (mem > SIZE_MAX)
- mem = SIZE_MAX;
-#endif
-
- memlimit = mem / 3;
- } else {
+ if (mem == 0)
// Cannot autodetect, use 10 MiB as the default limit.
memlimit = (1U << 23) + (1U << 21);
- }
+ else
+ // Limit is 33 % of RAM.
+ memlimit = mem / 3;
return;
}
@@ -165,7 +154,7 @@ set_default_memlimit(void)
/// This is rudely copied from src/lzma/util.c and modified a little. :-(
///
static size_t
-str_to_size(const char *value)
+str_to_uint64(const char *value)
{
size_t result = 0;
@@ -263,7 +252,10 @@ parse_options(int argc, char **argv)
break;
case 'M':
- memlimit = str_to_size(optarg);
+ memlimit = str_to_uint64(optarg);
+ if (memlimit == 0)
+ set_default_memlimit();
+
break;
case 'h':