aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/lzma-file-format.txt11
-rw-r--r--src/liblzma/common/alone_decoder.c2
-rw-r--r--src/liblzma/lz/lz_decoder.c10
-rw-r--r--src/liblzma/lz/lz_decoder.h8
-rw-r--r--src/liblzma/lzma/lzma2_decoder.c2
-rw-r--r--src/liblzma/lzma/lzma_decoder.c99
6 files changed, 94 insertions, 38 deletions
diff --git a/doc/lzma-file-format.txt b/doc/lzma-file-format.txt
index 015b0fae..4865defd 100644
--- a/doc/lzma-file-format.txt
+++ b/doc/lzma-file-format.txt
@@ -40,7 +40,11 @@ The .lzma File Format
0.2. Changes
- Last modified: 2011-04-12 11:55+0300
+ Last modified: 2022-07-13 21:00+0300
+
+ Compared to the previous version (2011-04-12 11:55+0300)
+ the section 1.1.3 was modified to allow End of Payload Marker
+ with a known Uncompressed Size.
1. File Format
@@ -129,7 +133,10 @@ The .lzma File Format
Uncompressed Size is stored as unsigned 64-bit little endian
integer. A special value of 0xFFFF_FFFF_FFFF_FFFF indicates
that Uncompressed Size is unknown. End of Payload Marker (*)
- is used if and only if Uncompressed Size is unknown.
+ is used if Uncompressed Size is unknown. End of Payload Marker
+ is allowed but rarely used if Uncompressed Size is known.
+ XZ Utils 5.2.5 and older don't support .lzma files that have
+ End of Payload Marker together with a known Uncompressed Size.
XZ Utils rejects files whose Uncompressed Size field specifies
a known size that is 256 GiB or more. This is to reject false
diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c
index 239b230e..a3ea20a2 100644
--- a/src/liblzma/common/alone_decoder.c
+++ b/src/liblzma/common/alone_decoder.c
@@ -146,7 +146,7 @@ alone_decode(void *coder_ptr, const lzma_allocator *allocator,
// Use a hack to set the uncompressed size.
lzma_lz_decoder_uncompressed(coder->next.coder,
- coder->uncompressed_size);
+ coder->uncompressed_size, true);
coder->sequence = SEQ_CODE;
break;
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index 09b57438..ab6af0dd 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -304,8 +304,14 @@ lzma_lz_decoder_memusage(size_t dictionary_size)
extern void
-lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
+lzma_lz_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
+ bool allow_eopm)
{
lzma_coder *coder = coder_ptr;
- coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size);
+
+ if (uncompressed_size == LZMA_VLI_UNKNOWN)
+ allow_eopm = true;
+
+ coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size,
+ allow_eopm);
}
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index 754ccf37..e6d7ab2a 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -62,8 +62,10 @@ typedef struct {
void (*reset)(void *coder, const void *options);
- /// Set the uncompressed size
- void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size);
+ /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
+ /// then allow_eopm will always be true.
+ void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
+ bool allow_eopm);
/// Free allocated resources
void (*end)(void *coder, const lzma_allocator *allocator);
@@ -91,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
extern void lzma_lz_decoder_uncompressed(
- void *coder, lzma_vli uncompressed_size);
+ void *coder, lzma_vli uncompressed_size, bool allow_eopm);
//////////////////////
diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c
index cf1b5110..105a28dc 100644
--- a/src/liblzma/lzma/lzma2_decoder.c
+++ b/src/liblzma/lzma/lzma2_decoder.c
@@ -139,7 +139,7 @@ lzma2_decode(void *coder_ptr, lzma_dict *restrict dict,
coder->uncompressed_size += in[(*in_pos)++] + 1U;
coder->sequence = SEQ_COMPRESSED_0;
coder->lzma.set_uncompressed(coder->lzma.coder,
- coder->uncompressed_size);
+ coder->uncompressed_size, false);
break;
case SEQ_COMPRESSED_0:
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index e605a0a9..140a44f1 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -238,6 +238,11 @@ typedef struct {
/// payload marker is expected.
lzma_vli uncompressed_size;
+ /// True if end of payload marker (EOPM) is allowed even when
+ /// uncompressed_size is known; false if EOPM must not be present.
+ /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
+ bool allow_eopm;
+
////////////////////////////////
// State of incomplete symbol //
////////////////////////////////
@@ -343,12 +348,19 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
lzma_ret ret = LZMA_OK;
- // If uncompressed size is known, there must be no end of payload
- // marker.
- const bool no_eopm = coder->uncompressed_size
- != LZMA_VLI_UNKNOWN;
- if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
+ // EOPM is always required (not just allowed) when
+ // the uncompressed size isn't known.
+ bool eopm_allowed = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
+
+ // If uncompressed size is known and there is enough output space
+ // to decode all the data, limit the available buffer space so that
+ // the main loop won't try to decode past the end of the stream.
+ bool might_finish_without_eopm = false;
+ if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
+ && coder->uncompressed_size <= dict.limit - dict.pos) {
dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
+ might_finish_without_eopm = true;
+ }
// The main decoder loop. The "switch" is used to restart the decoder at
// correct location. Once restarted, the "switch" is no longer used.
@@ -361,8 +373,32 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
case SEQ_NORMALIZE:
case SEQ_IS_MATCH:
- if (unlikely(no_eopm && dict.pos == dict.limit))
- break;
+ if (unlikely(might_finish_without_eopm
+ && dict.pos == dict.limit)) {
+ // In rare cases there is a useless byte that needs
+ // to be read anyway.
+ rc_normalize(SEQ_NORMALIZE);
+
+ // If the range decoder state is such that we can
+ // be at the end of the LZMA stream, then the
+ // decoding is finished.
+ if (rc_is_finished(rc)) {
+ ret = LZMA_STREAM_END;
+ goto out;
+ }
+
+ // If the caller hasn't allowed EOPM to be present
+ // together with known uncompressed size, then the
+ // LZMA stream is corrupt.
+ if (!coder->allow_eopm) {
+ ret = LZMA_DATA_ERROR;
+ goto out;
+ }
+
+ // Otherwise continue decoding with the expectation
+ // that the next LZMA symbol is EOPM.
+ eopm_allowed = true;
+ }
rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
rc_update_0(coder->is_match[state][pos_state]);
@@ -658,11 +694,18 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
if (rep0 == UINT32_MAX) {
// End of payload marker was
- // found. It must not be
- // present if uncompressed
- // size is known.
- if (coder->uncompressed_size
- != LZMA_VLI_UNKNOWN) {
+ // found. It may only be
+ // present if
+ // - uncompressed size is
+ // unknown or
+ // - after known uncompressed
+ // size amount of bytes has
+ // been decompressed and
+ // caller has indicated
+ // that EOPM might be used
+ // (it's not allowed in
+ // LZMA2).
+ if (!eopm_allowed) {
ret = LZMA_DATA_ERROR;
goto out;
}
@@ -671,7 +714,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// LZMA1 stream with
// end-of-payload marker.
rc_normalize(SEQ_EOPM);
- ret = LZMA_STREAM_END;
+ ret = rc_is_finished(rc)
+ ? LZMA_STREAM_END
+ : LZMA_DATA_ERROR;
goto out;
}
}
@@ -793,9 +838,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
}
}
- rc_normalize(SEQ_NORMALIZE);
- coder->sequence = SEQ_IS_MATCH;
-
out:
// Save state
@@ -822,24 +864,21 @@ out:
if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
coder->uncompressed_size -= dict.pos - dict_start;
- // Since there cannot be end of payload marker if the
- // uncompressed size was known, we check here if we
- // finished decoding.
+ // If we have gotten all the output but the decoder wants
+ // to write more output, the file is corrupt. There are
+ // three SEQ values where output is produced.
if (coder->uncompressed_size == 0 && ret == LZMA_OK
- && coder->sequence != SEQ_NORMALIZE)
- ret = coder->sequence == SEQ_IS_MATCH
- ? LZMA_STREAM_END : LZMA_DATA_ERROR;
+ && (coder->sequence == SEQ_LITERAL_WRITE
+ || coder->sequence == SEQ_SHORTREP
+ || coder->sequence == SEQ_COPY))
+ ret = LZMA_DATA_ERROR;
}
- // We can do an additional check in the range decoder to catch some
- // corrupted files.
if (ret == LZMA_STREAM_END) {
- if (!rc_is_finished(coder->rc))
- ret = LZMA_DATA_ERROR;
-
// Reset the range decoder so that it is ready to reinitialize
// for a new LZMA2 chunk.
rc_reset(coder->rc);
+ coder->sequence = SEQ_IS_MATCH;
}
return ret;
@@ -848,10 +887,12 @@ out:
static void
-lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
+lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
+ bool allow_eopm)
{
lzma_lzma1_decoder *coder = coder_ptr;
coder->uncompressed_size = uncompressed_size;
+ coder->allow_eopm = allow_eopm;
}
@@ -977,7 +1018,7 @@ lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
lz, allocator, options, lz_options));
lzma_decoder_reset(lz->coder, options);
- lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN);
+ lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN, true);
return LZMA_OK;
}