aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2022-08-18 17:16:49 +0300
committerLasse Collin <lasse.collin@tukaani.org>2022-08-18 17:16:49 +0300
commitc4e8e5fb311225b8b48d34157891a640b2535e0c (patch)
treea7ac08bc9dcf279eb495abc07b49686774d699ed
parentAdd NEWS for 5.2.6. (diff)
downloadxz-c4e8e5fb311225b8b48d34157891a640b2535e0c.tar.xz
liblzma: Threaded decoder: Improve LZMA_FAIL_FAST when LZMA_FINISH is used.
It will now return LZMA_DATA_ERROR (not LZMA_OK or LZMA_BUF_ERROR) if LZMA_FINISH is used and there isn't enough input to finish decoding the Block Header or the Block. The use of LZMA_DATA_ERROR is simpler and the less risky than LZMA_BUF_ERROR but this might be changed before 5.4.0.
-rw-r--r--src/liblzma/api/lzma/container.h6
-rw-r--r--src/liblzma/common/stream_decoder_mt.c42
2 files changed, 48 insertions, 0 deletions
diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h
index 564c6aaf..c0e1f5b4 100644
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@@ -623,6 +623,12 @@ extern LZMA_API(lzma_ret) lzma_microlzma_encoder(
* decompressed multiple times with this flag, a different amount of output
* may be produced by different runs, and even the error code might vary.
*
+ * When using LZMA_FAIL_FAST, it is recommended to use LZMA_FINISH to tell
+ * the decoder when no more input will be coming because it can help fast
+ * detection and reporting of truncated files. Note that in this situation
+ * truncated files might be diagnosed with LZMA_DATA_ERROR instead of
+ * LZMA_OK or LZMA_BUF_ERROR!
+ *
* Without this flag the threaded decoder will provide as much output as
* possible at first and then report the pending error. This default behavior
* matches the single-threaded decoder and provides repeatable behavior
diff --git a/src/liblzma/common/stream_decoder_mt.c b/src/liblzma/common/stream_decoder_mt.c
index 3786b2aa..840051b0 100644
--- a/src/liblzma/common/stream_decoder_mt.c
+++ b/src/liblzma/common/stream_decoder_mt.c
@@ -1143,6 +1143,35 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
// of "called with an empty input buffer".
assert(*in_pos == in_size);
+ // If LZMA_FINISH was used we know that we won't get
+ // more input, so the file must be truncated if we
+ // get here. If worker threads don't detect any
+ // errors, eventually there will be no more output
+ // while we keep returning LZMA_OK which gets
+ // converted to LZMA_BUF_ERROR in lzma_code().
+ //
+ // If fail-fast is enabled then we will return
+ // immediately using LZMA_DATA_ERROR instead of
+ // LZMA_OK or LZMA_BUF_ERROR. Rationale for the
+ // error code:
+ //
+ // - Worker threads may have a large amount of
+ // not-yet-decoded input data and we don't
+ // know for sure if all data is valid. Bad
+ // data there would result in LZMA_DATA_ERROR
+ // when fail-fast isn't used.
+ //
+ // - Immediate LZMA_BUF_ERROR would be a bit weird
+ // considering the older liblzma code. lzma_code()
+ // even has an assertion to prevent coders from
+ // returning LZMA_BUF_ERROR directly.
+ //
+ // The downside of this is that with fail-fast apps
+ // cannot always distinguish between corrupt and
+ // truncated files.
+ if (action == LZMA_FINISH && coder->fail_fast)
+ return LZMA_DATA_ERROR;
+
return_if_error(read_output_and_wait(coder, allocator,
out, out_pos, out_size,
NULL, waiting_allowed,
@@ -1478,6 +1507,19 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
// Fall through
case SEQ_BLOCK_THR_RUN: {
+ if (action == LZMA_FINISH && coder->fail_fast) {
+ // We know that we won't get more input and that
+ // the caller wants fail-fast behavior. If we see
+ // that we don't have enough input to finish this
+ // Block, return LZMA_DATA_ERROR immediately.
+ // See SEQ_BLOCK_HEADER for the error code rationale.
+ const size_t in_avail = in_size - *in_pos;
+ const size_t in_needed = coder->thr->in_size
+ - coder->thr->in_filled;
+ if (in_avail < in_needed)
+ return LZMA_DATA_ERROR;
+ }
+
// Copy input to the worker thread.
size_t cur_in_filled = coder->thr->in_filled;
lzma_bufcpy(in, in_pos, in_size, coder->thr->in,