aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lzma
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2022-11-27 23:16:21 +0200
committerLasse Collin <lasse.collin@tukaani.org>2022-11-27 23:16:21 +0200
commit33b8a24b6646a9dbfd8358405aec466b13078559 (patch)
tree166b0977ca46fa664c0b7ceab6e64a59a7abe4d2 /src/liblzma/lzma
parentliblzma: Avoid unneeded use of void pointer in LZMA decoder. (diff)
downloadxz-33b8a24b6646a9dbfd8358405aec466b13078559.tar.xz
liblzma: Add LZMA_FILTER_LZMA1EXT to support LZMA1 without end marker.
Some file formats need support for LZMA1 streams that don't use the end of payload marker (EOPM) alias end of stream (EOS) marker. So far liblzma API has supported decompressing such streams via lzma_alone_decoder() when .lzma header specifies a known uncompressed size. Encoding support hasn't been available in the API. Instead of adding a new LZMA1-only API for this purpose, this commit adds a new filter ID for use with raw encoder and decoder. The main benefit of this approach is that then also filter chains are possible, for example, if someone wants to implement support for .7z files that use the x86 BCJ filter with LZMA1 (not BCJ2 as that isn't supported in liblzma).
Diffstat (limited to 'src/liblzma/lzma')
-rw-r--r--src/liblzma/lzma/lzma2_encoder.c2
-rw-r--r--src/liblzma/lzma/lzma_decoder.c26
-rw-r--r--src/liblzma/lzma/lzma_encoder.c40
-rw-r--r--src/liblzma/lzma/lzma_encoder.h3
-rw-r--r--src/liblzma/lzma/lzma_encoder_private.h3
5 files changed, 66 insertions, 8 deletions
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
index f1252c57..4b6b2311 100644
--- a/src/liblzma/lzma/lzma2_encoder.c
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -341,7 +341,7 @@ lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
// Initialize LZMA encoder
return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator,
- &coder->opt_cur, lz_options));
+ LZMA_FILTER_LZMA2, &coder->opt_cur, lz_options));
// Make sure that we will always have enough history available in
// case we need to use uncompressed chunks. They are used when the
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 550963d1..26c148a9 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -1018,11 +1018,35 @@ lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
if (!is_lclppb_valid(options))
return LZMA_PROG_ERROR;
+ lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
+ bool allow_eopm = true;
+
+ if (id == LZMA_FILTER_LZMA1EXT) {
+ const lzma_options_lzma *opt = options;
+
+ // Only one flag is supported.
+ if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
+ return LZMA_OPTIONS_ERROR;
+
+ // FIXME? Using lzma_vli instead of uint64_t is weird because
+ // this has nothing to do with .xz headers and variable-length
+ // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
+ // instead of UINT64_MAX is clearer when unknown size is
+ // meant. A problem with using lzma_vli is that now we
+ // allow > LZMA_VLI_MAX which is fine in this file but
+ // it's still confusing. Note that alone_decoder.c also
+ // allows > LZMA_VLI_MAX when setting uncompressed size.
+ uncomp_size = opt->ext_size_low
+ + ((uint64_t)(opt->ext_size_high) << 32);
+ allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
+ || uncomp_size == LZMA_VLI_UNKNOWN;
+ }
+
return_if_error(lzma_lzma_decoder_create(
lz, allocator, options, lz_options));
lzma_decoder_reset(lz->coder, options);
- lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN, true);
+ lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
return LZMA_OK;
}
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index e2dbbc03..dc62f44f 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -416,7 +416,7 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
//
// Plain LZMA streams without EOPM aren't supported except when
// output size limiting is enabled.
- if (limit == UINT32_MAX && coder->out_limit == 0)
+ if (coder->use_eopm)
encode_eopm(coder, (uint32_t)(coder->uncomp_size));
// Flush the remaining bytes from the range encoder.
@@ -462,6 +462,7 @@ lzma_lzma_set_out_limit(
lzma_lzma1_encoder *coder = coder_ptr;
coder->out_limit = out_limit;
coder->uncomp_size_ptr = uncomp_size;
+ coder->use_eopm = false;
return LZMA_OK;
}
@@ -599,10 +600,13 @@ lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder,
extern lzma_ret
-lzma_lzma_encoder_create(void **coder_ptr,
- const lzma_allocator *allocator,
- const lzma_options_lzma *options, lzma_lz_options *lz_options)
+lzma_lzma_encoder_create(void **coder_ptr, const lzma_allocator *allocator,
+ lzma_vli id, const lzma_options_lzma *options,
+ lzma_lz_options *lz_options)
{
+ assert(id == LZMA_FILTER_LZMA1 || id == LZMA_FILTER_LZMA1EXT
+ || id == LZMA_FILTER_LZMA2);
+
// Allocate lzma_lzma1_encoder if it wasn't already allocated.
if (*coder_ptr == NULL) {
*coder_ptr = lzma_alloc(sizeof(lzma_lzma1_encoder), allocator);
@@ -672,6 +676,32 @@ lzma_lzma_encoder_create(void **coder_ptr,
// Output size limitting is disabled by default.
coder->out_limit = 0;
+ // Determine if end marker is wanted:
+ // - It is never used with LZMA2.
+ // - It is always used with LZMA_FILTER_LZMA1 (unless
+ // lzma_lzma_set_out_limit() is called later).
+ // - LZMA_FILTER_LZMA1EXT has a flag for it in the options.
+ coder->use_eopm = (id == LZMA_FILTER_LZMA1);
+ if (id == LZMA_FILTER_LZMA1EXT) {
+ // Check if unsupported flags are present.
+ if (options->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->use_eopm = (options->ext_flags
+ & LZMA_LZMA1EXT_ALLOW_EOPM) != 0;
+
+ // TODO? As long as there are no filters that change the size
+ // of the data, it is enough to look at lzma_stream.total_in
+ // after encoding has been finished to know the uncompressed
+ // size of the LZMA1 stream. But in the future there could be
+ // filters that change the size of the data and then total_in
+ // doesn't work as the LZMA1 stream size might be different
+ // due to another filter in the chain. The problem is simple
+ // to solve: Add another flag to ext_flags and then set
+ // coder->uncomp_size_ptr to the address stored in
+ // lzma_options_lzma.reserved_ptr2 (or _ptr1).
+ }
+
set_lz_options(lz_options, options);
return lzma_lzma_encoder_reset(coder, options);
@@ -685,7 +715,7 @@ lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
lz->code = &lzma_encode;
lz->set_out_limit = &lzma_lzma_set_out_limit;
return lzma_lzma_encoder_create(
- &lz->coder, allocator, options, lz_options);
+ &lz->coder, allocator, id, options, lz_options);
}
diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h
index 6cfdf228..84d8c916 100644
--- a/src/liblzma/lzma/lzma_encoder.h
+++ b/src/liblzma/lzma/lzma_encoder.h
@@ -40,7 +40,8 @@ extern bool lzma_lzma_lclppb_encode(
/// Initializes raw LZMA encoder; this is used by LZMA2.
extern lzma_ret lzma_lzma_encoder_create(
void **coder_ptr, const lzma_allocator *allocator,
- const lzma_options_lzma *options, lzma_lz_options *lz_options);
+ lzma_vli id, const lzma_options_lzma *options,
+ lzma_lz_options *lz_options);
/// Resets an already initialized LZMA encoder; this is used by LZMA2.
diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h
index 8960c52c..b228c577 100644
--- a/src/liblzma/lzma/lzma_encoder_private.h
+++ b/src/liblzma/lzma/lzma_encoder_private.h
@@ -111,6 +111,9 @@ struct lzma_lzma1_encoder_s {
/// have been written to the output buffer yet.
bool is_flushed;
+ /// True if end of payload marker will be written.
+ bool use_eopm;
+
uint32_t pos_mask; ///< (1 << pos_bits) - 1
uint32_t literal_context_bits;
uint32_t literal_pos_mask;