aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/liblzma/api/lzma/lzma12.h123
-rw-r--r--src/liblzma/common/filter_common.c7
-rw-r--r--src/liblzma/common/filter_decoder.c6
-rw-r--r--src/liblzma/common/filter_encoder.c9
-rw-r--r--src/liblzma/lzma/lzma2_encoder.c2
-rw-r--r--src/liblzma/lzma/lzma_decoder.c26
-rw-r--r--src/liblzma/lzma/lzma_encoder.c40
-rw-r--r--src/liblzma/lzma/lzma_encoder.h3
-rw-r--r--src/liblzma/lzma/lzma_encoder_private.h3
9 files changed, 204 insertions, 15 deletions
diff --git a/src/liblzma/api/lzma/lzma12.h b/src/liblzma/api/lzma/lzma12.h
index df5f23b6..d34e7839 100644
--- a/src/liblzma/api/lzma/lzma12.h
+++ b/src/liblzma/api/lzma/lzma12.h
@@ -18,18 +18,41 @@
/**
- * \brief LZMA1 Filter ID
+ * \brief LZMA1 Filter ID (for raw encoder/decoder only, not in .xz)
*
* LZMA1 is the very same thing as what was called just LZMA in LZMA Utils,
* 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from
* accidentally using LZMA when they actually want LZMA2.
- *
- * LZMA1 shouldn't be used for new applications unless you _really_ know
- * what you are doing. LZMA2 is almost always a better choice.
*/
#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001)
/**
+ * \brief LZMA1 Filter ID with extended options (for raw encoder/decoder)
+ *
+ * This is like LZMA_FILTER_LZMA1 but with this ID a few extra options
+ * are supported in the lzma_options_lzma structure:
+ *
+ * - A flag to tell the encoder if the end of payload marker (EOPM) alias
+ * end of stream (EOS) marker must be written at the end of the stream.
+ * In contrast, LZMA_FILTER_LZMA1 always writes the end marker.
+ *
+ * - Decoder needs to be told the uncompressed size of the stream
+ * or that it is unknown (using the special value UINT64_MAX).
+ * If the size is known, a flag can be set to allow the presence of
+ * the end marker anyway. In contrast, LZMA_FILTER_LZMA1 always
+ * behaves as if the uncompressed size was unknown.
+ *
+ * This allows handling file formats where LZMA1 streams are used but where
+ * the end marker isn't allowed or where it might not (always) be present.
+ * This extended LZMA1 functionality is provided as a Filter ID for raw
+ * encoder and decoder instead of adding new encoder and decoder initialization
+ * functions because this way it is possible to also use extra filters,
+ * for example, LZMA_FILTER_X86 in a filter chain with LZMA_FILTER_LZMA1EXT,
+ * which might be needed to handle some file formats.
+ */
+#define LZMA_FILTER_LZMA1EXT LZMA_VLI_C(0x4000000000000002)
+
+/**
* \brief LZMA2 Filter ID
*
* Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds
@@ -374,6 +397,82 @@ typedef struct {
*/
uint32_t depth;
+ /**
+ * \brief For LZMA_FILTER_LZMA1EXT: Extended flags
+ *
+ * This is used only with LZMA_FILTER_LZMA1EXT.
+ *
+ * Currently only one flag is supported, LZMA_LZMA1EXT_ALLOW_EOPM:
+ *
+ * - Encoder: If the flag is set, then end marker is written just
+ * like it is with LZMA_FILTER_LZMA1. Without this flag the
+ * end marker isn't written and the application has to store
+ * the uncompressed size somewhere outside the compressed stream.
+ * To decompress streams without the end marker, the appliation
+ * has to set the correct uncompressed size in ext_size_low and
+ * ext_size_high.
+ *
+ * - Decoder: If the uncompressed size in ext_size_low and
+ * ext_size_high is set to the special value UINT64_MAX
+ * (indicating unknown uncompressed size) then this flag is
+ * ignored and the end marker must always be present, that is,
+ * the behavior is identical to LZMA_FILTER_LZMA1.
+ *
+ * Otherwise, if this flag isn't set, then the input stream
+ * must not have the end marker; if the end marker is detected
+ * then it will result in LZMA_DATA_ERROR. This is useful when
+ * it is known that the stream must not have the end marker and
+ * strict validation is wanted.
+ *
+ * If this flag is set, then it is autodetected if the end marker
+ * is present after the specified number of uncompressed bytes
+ * has been decompressed (ext_size_low and ext_size_high). The
+ * end marker isn't allowed in any other position. This behavior
+ * is useful when uncompressed size is known but the end marker
+ * may or may not be present. This is the case, for example,
+ * in .7z files (valid .7z files that have the end marker in
+ * LZMA1 streams are rare but they do exist).
+ */
+ uint32_t ext_flags;
+# define LZMA_LZMA1EXT_ALLOW_EOPM UINT32_C(0x01)
+
+ /**
+ * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (low bits)
+ *
+ * The 64-bit uncompressed size is needed for decompression with
+ * LZMA_FILTER_LZMA1EXT. The size is ignored by the encoder.
+ *
+ * The special value UINT64_MAX indicates that the uncompressed size
+ * is unknown and that the end of payload marker (also known as
+ * end of stream marker) must be present to indicate the end of
+ * the LZMA1 stream. Any other value indicates the expected
+ * uncompressed size of the LZMA1 stream. (If LZMA1 was used together
+ * with filters that change the size of the data then the uncompressed
+ * size of the LZMA1 stream could be different than the final
+ * uncompressed size of the filtered stream.)
+ *
+ * ext_size_low holds the least significant 32 bits of the
+ * uncompressed size. The most significant 32 bits must be set
+ * in ext_size_high. The macro lzma_ext_size_set(opt_lzma, u64size)
+ * can be used to set these members.
+ *
+ * The 64-bit uncompressed size is split into two uint32_t variables
+ * because there were no reserved uint64_t members and using the
+ * same options structure for LZMA_FILTER_LZMA1, LZMA_FILTER_LZMA1EXT,
+ * and LZMA_FILTER_LZMA2 was otherwise more convenient than having
+ * a new options structure for LZMA_FILTER_LZMA1EXT. (Replacing two
+ * uint32_t members with one uint64_t changes the ABI on some systems
+ * as the alignment of this struct can increase from 4 bytes to 8.)
+ */
+ uint32_t ext_size_low;
+
+ /**
+ * \brief For LZMA_FILTER_LZMA1EXT: Uncompressed size (high bits)
+ *
+ * This holds the most significant 32 bits of the uncompressed size.
+ */
+ uint32_t ext_size_high;
+
/*
* Reserved space to allow possible future extensions without
* breaking the ABI. You should not touch these, because the names
@@ -381,9 +480,6 @@ typedef struct {
* with the currently supported options, so it is safe to leave these
* uninitialized.
*/
- uint32_t reserved_int1;
- uint32_t reserved_int2;
- uint32_t reserved_int3;
uint32_t reserved_int4;
uint32_t reserved_int5;
uint32_t reserved_int6;
@@ -400,6 +496,19 @@ typedef struct {
/**
+ * \brief Macro to set the 64-bit uncompressed size in ext_size_*
+ *
+ * This might be convenient when decoding using LZMA_FILTER_LZMA1EXT.
+ * This isn't used with LZMA_FILTER_LZMA1 or LZMA_FILTER_LZMA2.
+ */
+#define lzma_set_ext_size(opt_lzma2, u64size) \
+do { \
+ (opt_lzma2).ext_size_low = (uint32_t)(u64size); \
+ (opt_lzma2).ext_size_high = (uint32_t)((uint64_t)(u64size) >> 32); \
+} while (0)
+
+
+/**
* \brief Set a compression preset to lzma_options_lzma structure
*
* 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9
diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c
index a803b4c2..c8694e2c 100644
--- a/src/liblzma/common/filter_common.c
+++ b/src/liblzma/common/filter_common.c
@@ -42,6 +42,13 @@ static const struct {
.last_ok = true,
.changes_size = true,
},
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .options_size = sizeof(lzma_options_lzma),
+ .non_last_ok = false,
+ .last_ok = true,
+ .changes_size = true,
+ },
#endif
#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
{
diff --git a/src/liblzma/common/filter_decoder.c b/src/liblzma/common/filter_decoder.c
index b031ac62..fa53f5bd 100644
--- a/src/liblzma/common/filter_decoder.c
+++ b/src/liblzma/common/filter_decoder.c
@@ -50,6 +50,12 @@ static const lzma_filter_decoder decoders[] = {
.memusage = &lzma_lzma_decoder_memusage,
.props_decode = &lzma_lzma_props_decode,
},
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_decoder_init,
+ .memusage = &lzma_lzma_decoder_memusage,
+ .props_decode = &lzma_lzma_props_decode,
+ },
#endif
#ifdef HAVE_DECODER_LZMA2
{
diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c
index 5d6c1a7e..978b7a6b 100644
--- a/src/liblzma/common/filter_encoder.c
+++ b/src/liblzma/common/filter_encoder.c
@@ -64,6 +64,15 @@ static const lzma_filter_encoder encoders[] = {
.props_size_fixed = 5,
.props_encode = &lzma_lzma_props_encode,
},
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_encoder_init,
+ .memusage = &lzma_lzma_encoder_memusage,
+ .block_size = NULL, // Not needed for LZMA1
+ .props_size_get = NULL,
+ .props_size_fixed = 5,
+ .props_encode = &lzma_lzma_props_encode,
+ },
#endif
#ifdef HAVE_ENCODER_LZMA2
{
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
index f1252c57..4b6b2311 100644
--- a/src/liblzma/lzma/lzma2_encoder.c
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -341,7 +341,7 @@ lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
// Initialize LZMA encoder
return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator,
- &coder->opt_cur, lz_options));
+ LZMA_FILTER_LZMA2, &coder->opt_cur, lz_options));
// Make sure that we will always have enough history available in
// case we need to use uncompressed chunks. They are used when the
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 550963d1..26c148a9 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -1018,11 +1018,35 @@ lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
if (!is_lclppb_valid(options))
return LZMA_PROG_ERROR;
+ lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
+ bool allow_eopm = true;
+
+ if (id == LZMA_FILTER_LZMA1EXT) {
+ const lzma_options_lzma *opt = options;
+
+ // Only one flag is supported.
+ if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
+ return LZMA_OPTIONS_ERROR;
+
+ // FIXME? Using lzma_vli instead of uint64_t is weird because
+ // this has nothing to do with .xz headers and variable-length
+ // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
+ // instead of UINT64_MAX is clearer when unknown size is
+ // meant. A problem with using lzma_vli is that now we
+ // allow > LZMA_VLI_MAX which is fine in this file but
+ // it's still confusing. Note that alone_decoder.c also
+ // allows > LZMA_VLI_MAX when setting uncompressed size.
+ uncomp_size = opt->ext_size_low
+ + ((uint64_t)(opt->ext_size_high) << 32);
+ allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
+ || uncomp_size == LZMA_VLI_UNKNOWN;
+ }
+
return_if_error(lzma_lzma_decoder_create(
lz, allocator, options, lz_options));
lzma_decoder_reset(lz->coder, options);
- lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN, true);
+ lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
return LZMA_OK;
}
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index e2dbbc03..dc62f44f 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -416,7 +416,7 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
//
// Plain LZMA streams without EOPM aren't supported except when
// output size limiting is enabled.
- if (limit == UINT32_MAX && coder->out_limit == 0)
+ if (coder->use_eopm)
encode_eopm(coder, (uint32_t)(coder->uncomp_size));
// Flush the remaining bytes from the range encoder.
@@ -462,6 +462,7 @@ lzma_lzma_set_out_limit(
lzma_lzma1_encoder *coder = coder_ptr;
coder->out_limit = out_limit;
coder->uncomp_size_ptr = uncomp_size;
+ coder->use_eopm = false;
return LZMA_OK;
}
@@ -599,10 +600,13 @@ lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder,
extern lzma_ret
-lzma_lzma_encoder_create(void **coder_ptr,
- const lzma_allocator *allocator,
- const lzma_options_lzma *options, lzma_lz_options *lz_options)
+lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator,
+ lzma_vli id, const lzma_options_lzma *options,
+ lzma_lz_options *lz_options)
{
+ assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT
+ || id == LZMA_FILTER_LZMA2);
+
// Allocate lzma_lzma1_encoder if it wasn't already allocated.
if (*coder_ptr == NULL) {
*coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator);
@@ -672,6 +676,32 @@ lzma_lzma_encoder_create(void **coder_ptr,
// Output size limitting is disabled by default.
coder->out_limit = 0;
+ // Determine if end marker is wanted:
+ // - It is never used with LZMA2.
+ // - It is always used with LZMA_FILTER_LZMA1 (unless
+ // lzma_lzma_set_out_limit() is called later).
+ // - LZMA_FILTER_LZMA1EXT has a flag for it in the options.
+ coder->use_eopm = (id == LZMA_FILTER_LZMA1);
+ if (id == LZMA_FILTER_LZMA1EXT) {
+ // Check if unsupported flags are present.
+ if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->use_eopm = (options->ext_flags
+ & LZMA_LZMA1EXT_ALLOW_EOPM) != 0;
+
+ // TODO? As long as there are no filters that change the size
+ // of the data, it is enough to look at lzma_stream.total_in
+ // after encoding has been finished to know the uncompressed
+ // size of the LZMA1 stream. But in the future there could be
+ // filters that change the size of the data and then total_in
+ // doesn't work as the LZMA1 stream size might be different
+ // due to another filter in the chain. The problem is simple
+ // to solve: Add another flag to ext_flags and then set
+ // coder->uncomp_size_ptr to the address stored in
+ // lzma_options_lzma.reserved_ptr2 (or _ptr1).
+ }
+
set_lz_options(lz_options, options);
return lzma_lzma_encoder_reset(coder, options);
@@ -685,7 +715,7 @@ lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
lz->code = &lzma_encode;
lz->set_out_limit = &lzma_lzma_set_out_limit;
return lzma_lzma_encoder_create(
- &lz->coder, allocator, options, lz_options);
+ &lz->coder, allocator, id, options, lz_options);
}
diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h
index 6cfdf228..84d8c916 100644
--- a/src/liblzma/lzma/lzma_encoder.h
+++ b/src/liblzma/lzma/lzma_encoder.h
@@ -40,7 +40,8 @@ extern bool lzma_lzma_lclppb_encode(
/// Initializes raw LZMA encoder; this is used by LZMA2.
extern lzma_ret lzma_lzma_encoder_create(
void **coder_ptr, const lzma_allocator *allocator,
- const lzma_options_lzma *options, lzma_lz_options *lz_options);
+ lzma_vli id, const lzma_options_lzma *options,
+ lzma_lz_options *lz_options);
/// Resets an already initialized LZMA encoder; this is used by LZMA2.
diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h
index 8960c52c..b228c577 100644
--- a/src/liblzma/lzma/lzma_encoder_private.h
+++ b/src/liblzma/lzma/lzma_encoder_private.h
@@ -111,6 +111,9 @@ struct lzma_lzma1_encoder_s {
/// have been written to the output buffer yet.
bool is_flushed;
+ /// True if end of payload marker will be written.
+ bool use_eopm;
+
uint32_t pos_mask; ///< (1 << pos_bits) - 1
uint32_t literal_context_bits;
uint32_t literal_pos_mask;