aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2021-01-17 18:53:34 +0200
committerLasse Collin <lasse.collin@tukaani.org>2021-01-17 18:53:34 +0200
commit774cc0118ba2496581cb2621505a04bb6598cc75 (patch)
tree76d2253456c4cc51813623587cfa157a0cf84985
parentliblzma: Fix missing normalization in rc_encode_dummy(). (diff)
downloadxz-774cc0118ba2496581cb2621505a04bb6598cc75.tar.xz
liblzma: Make EROFS LZMA decoder work when exact uncomp_size isn't known.
The caller must still not specify an uncompressed size bigger than the actual uncompressed size. As a downside, this now needs the exact compressed size.
-rw-r--r--src/liblzma/api/lzma/container.h23
-rw-r--r--src/liblzma/common/erofs_decoder.c80
2 files changed, 91 insertions, 12 deletions
diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h
index 581f3507..4ad7ce6a 100644
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@@ -690,11 +690,22 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
* The special behavior of lzma_code() applies to lzma_erofs_encoder() only.
*
* \param strm Pointer to properly prepared lzma_stream
+ * \param comp_size Compressed size of the EROFS LZMA stream.
+ * The caller must somehow know this exactly.
* \param uncomp_size Uncompressed size of the EROFS LZMA stream.
- * The caller must somehow know this. Note that
- * while the EROFS LZMA decoder in XZ Embedded needs
- * also the compressed size, the implementation in
- * liblzma doesn't need to know the compressed size.
+ * If the exact uncompressed size isn't known, this
+ * can be set to a value that is at most as big as
+ * the exact uncompressed size would be, but then the
+ * next argument uncomp_size_is_exact must be false.
+ * \param uncomp_size_is_exact
+ * If true, uncomp_size must be exactly correct.
+ * This will improve error detection at the end of
+ * the stream. If the exact uncompressed size isn't
+ * known, this must be false. uncomp_size must still
+ * be at most as big as the exact uncompressed size
+ * is. Setting this to false when the exact size is
+ * known will work but error detection at the end of
+ * the stream will be weaker.
* \param dict_size LZMA dictionary size that was used when
* compressing the data. It is OK to use a bigger
* value too but liblzma will then allocate more
@@ -705,4 +716,6 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
* dictionary than actually required.)
*/
extern LZMA_API(lzma_ret) lzma_erofs_decoder(
- lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size);
+ lzma_stream *strm, uint64_t comp_size,
+ uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
+ uint32_t dict_size);
diff --git a/src/liblzma/common/erofs_decoder.c b/src/liblzma/common/erofs_decoder.c
index ef584373..816e2482 100644
--- a/src/liblzma/common/erofs_decoder.c
+++ b/src/liblzma/common/erofs_decoder.c
@@ -18,12 +18,27 @@ typedef struct {
/// LZMA1 decoder
lzma_next_coder lzma;
- /// Uncompressed size of the stream as given by the application
+ /// Compressed size of the stream as given by the application.
+ /// This must be exactly correct.
+ ///
+ /// This will be decremented when input is read.
+ uint64_t comp_size;
+
+ /// Uncompressed size of the stream as given by the application.
+ /// This may be less than the actual uncompressed size if
+ /// uncomp_size_is_exact is false.
+ ///
+ /// This will be decremented when output is produced.
lzma_vli uncomp_size;
/// LZMA dictionary size as given by the application
uint32_t dict_size;
+ /// If true, the exact uncompressed size is known. If false,
+ /// uncomp_size may be smaller than the real uncompressed size;
+ /// uncomp_size may never be bigger than the real uncompressed size.
+ bool uncomp_size_is_exact;
+
/// True once the first byte of the EROFS LZMA stream
/// has been processed.
bool props_decoded;
@@ -38,6 +53,26 @@ erofs_decode(void *coder_ptr, const lzma_allocator *allocator,
{
lzma_erofs_coder *coder = coder_ptr;
+ // Remember the in start position so that we can update comp_size.
+ const size_t in_start = *in_pos;
+
+ // Remember the out start position so that we can update uncomp_size.
+ const size_t out_start = *out_pos;
+
+ // Limit the amount of input so that the decoder won't read more than
+ // comp_size. This is required when uncomp_size isn't exact because
+ // in that case the LZMA decoder will try to decode more input even
+ // when it has no output space (it can be looking for EOPM).
+ if (in_size - *in_pos > coder->comp_size)
+ in_size = *in_pos + (size_t)(coder->comp_size);
+
+ // When the exact uncompressed size isn't known, we must limit
+ // the available output space to prevent the LZMA decoder from
+ // trying to decode too much.
+ if (!coder->uncomp_size_is_exact
+ && out_size - *out_pos > coder->uncomp_size)
+ out_size = *out_pos + (size_t)(coder->uncomp_size);
+
if (!coder->props_decoded) {
// There must be at least one byte of input to decode
// the properties byte.
@@ -71,8 +106,9 @@ erofs_decode(void *coder_ptr, const lzma_allocator *allocator,
allocator, filters));
// Use a hack to set the uncompressed size.
- lzma_lz_decoder_uncompressed(coder->lzma.coder,
- coder->uncomp_size);
+ if (coder->uncomp_size_is_exact)
+ lzma_lz_decoder_uncompressed(coder->lzma.coder,
+ coder->uncomp_size);
// Pass one dummy 0x00 byte to the LZMA decoder since that
// is what it expects the first byte to be.
@@ -88,9 +124,30 @@ erofs_decode(void *coder_ptr, const lzma_allocator *allocator,
}
// The rest is normal LZMA decoding.
- return coder->lzma.code(coder->lzma.coder, allocator,
+ lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
in, in_pos, in_size,
out, out_pos, out_size, action);
+
+ // Update the remaining compressed size.
+ assert(coder->comp_size >= *in_pos - in_start);
+ coder->comp_size -= *in_pos - in_start;
+
+ if (!coder->uncomp_size_is_exact) {
+ // Update the amount of output remaining.
+ assert(coder->uncomp_size >= *out_pos - out_start);
+ coder->uncomp_size -= *out_pos - out_start;
+
+ // - We must not get LZMA_STREAM_END because the stream
+ // shouldn't have EOPM.
+ // - We must use uncomp_size to determine when to
+ // return LZMA_STREAM_END.
+ if (ret == LZMA_STREAM_END)
+ ret = LZMA_DATA_ERROR;
+ else if (coder->uncomp_size == 0)
+ ret = LZMA_STREAM_END;
+ }
+
+ return ret;
}
@@ -106,7 +163,9 @@ erofs_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
static lzma_ret
erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
- uint64_t uncomp_size, uint32_t dict_size)
+ uint64_t comp_size,
+ uint64_t uncomp_size, bool uncomp_size_is_exact,
+ uint32_t dict_size)
{
lzma_next_coder_init(&erofs_decoder_init, next, allocator);
@@ -124,10 +183,14 @@ erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
coder->lzma = LZMA_NEXT_CODER_INIT;
}
+ // The public API is uint64_t but the internal LZ decoder API uses
+ // lzma_vli.
if (uncomp_size > LZMA_VLI_MAX)
return LZMA_OPTIONS_ERROR;
+ coder->comp_size = comp_size;
coder->uncomp_size = uncomp_size;
+ coder->uncomp_size_is_exact = uncomp_size_is_exact;
coder->dict_size = dict_size;
coder->props_decoded = false;
@@ -137,9 +200,12 @@ erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
extern LZMA_API(lzma_ret)
-lzma_erofs_decoder(lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size)
+lzma_erofs_decoder(lzma_stream *strm, uint64_t comp_size,
+ uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
+ uint32_t dict_size)
{
- lzma_next_strm_init(erofs_decoder_init, strm, uncomp_size, dict_size);
+ lzma_next_strm_init(erofs_decoder_init, strm, comp_size,
+ uncomp_size, uncomp_size_is_exact, dict_size);
strm->internal->supported_actions[LZMA_RUN] = true;
strm->internal->supported_actions[LZMA_FINISH] = true;