liblzma: Make Block decoder catch certain types of errors better.

Now it limits the input and output buffer sizes that are passed to a raw decoder. This way there's no need to check if the sizes can grow too big or overflow when updating Compressed Size and Uncompressed Size counts. This also means that a corrupt file cannot cause the raw decoder to process useless extra input or output that would exceed the size info in Block Header (and thus cause LZMA_DATA_ERROR anyway). More importantly, now the size information is verified more carefully in case raw decoder returns LZMA_OK. This doesn't really matter with the current single-threaded .xz decoder as the errors would be detected slightly later anyway. But this helps avoiding corner cases in the upcoming threaded decompressor, and it might help other Block decoder uses outside liblzma too. The test files bad-1-lzma2-{9,10,11}.xz test these conditions. With the single-threaded .xz decoder the only difference is that LZMA_DATA_ERROR is detected in a difference place now.
author: Lasse Collin <lasse.collin@tukaani.org> 2022-02-20 20:36:27 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2022-02-20 20:36:27 +0200
commit: 1c9a5786d206b4abc8e427326651c8174baea753 (patch)
tree: e504643962b3bd30ffe73fc8d9f67922de193e70 /src
parent: Tests: Add bad-1-lzma2-11.xz. (diff)
download: xz-1c9a5786d206b4abc8e427326651c8174baea753.tar.xz
1 files changed, 54 insertions, 25 deletions
diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c
index 075bd279..4827e0f0 100644
--- a/src/liblzma/common/block_decoder.c
+++ b/src/liblzma/common/block_decoder.c
@@ -40,6 +40,9 @@ typedef struct {
 	/// is unknown.
 	lzma_vli compressed_limit;
 
+	/// Maximum allowed Uncompressed Size.
+	lzma_vli uncompressed_limit;
+
 	/// Position when reading the Check field
 	size_t check_pos;
 
@@ -52,21 +55,6 @@ typedef struct {
 
 
 static inline bool
-update_size(lzma_vli *size, lzma_vli add, lzma_vli limit)
-{
-	if (limit > LZMA_VLI_MAX)
-		limit = LZMA_VLI_MAX;
-
-	if (limit < *size || limit - *size < add)
-		return true;
-
-	*size += add;
-
-	return false;
-}
-
-
-static inline bool
 is_size_valid(lzma_vli size, lzma_vli reference)
 {
 	return reference == LZMA_VLI_UNKNOWN || reference == size;
@@ -86,21 +74,54 @@ block_decode(void *coder_ptr, const lzma_allocator *allocator,
 		const size_t in_start = *in_pos;
 		const size_t out_start = *out_pos;
 
+		// Limit the amount of input and output space that we give
+		// to the raw decoder based on the information we have
+		// (or don't have) from Block Header.
+		const size_t in_stop = *in_pos + (size_t)my_min(
+			in_size - *in_pos,
+			coder->compressed_limit - coder->compressed_size);
+		const size_t out_stop = *out_pos + (size_t)my_min(
+			out_size - *out_pos,
+			coder->uncompressed_limit - coder->uncompressed_size);
+
 		const lzma_ret ret = coder->next.code(coder->next.coder,
-				allocator, in, in_pos, in_size,
-				out, out_pos, out_size, action);
+				allocator, in, in_pos, in_stop,
+				out, out_pos, out_stop, action);
 
 		const size_t in_used = *in_pos - in_start;
 		const size_t out_used = *out_pos - out_start;
 
-		// NOTE: We compare to compressed_limit here, which prevents
-		// the total size of the Block growing past LZMA_VLI_MAX.
-		if (update_size(&coder->compressed_size, in_used,
-					coder->compressed_limit)
-				|| update_size(&coder->uncompressed_size,
-					out_used,
-					coder->block->uncompressed_size))
-			return LZMA_DATA_ERROR;
+		// Because we have limited the input and output sizes,
+		// we know that these cannot grow too big or overflow.
+		coder->compressed_size += in_used;
+		coder->uncompressed_size += out_used;
+
+		if (ret == LZMA_OK) {
+			const bool comp_done = coder->compressed_size
+					== coder->block->compressed_size;
+			const bool uncomp_done = coder->uncompressed_size
+					== coder->block->uncompressed_size;
+
+			// If both input and output amounts match the sizes
+			// in Block Header but we still got LZMA_OK instead
+			// of LZMA_STREAM_END, the file is broken.
+			if (comp_done && uncomp_done)
+				return LZMA_DATA_ERROR;
+
+			// If the decoder has consumed all the input that it
+			// needs but it still couldn't fill the output buffer
+			// or return LZMA_STREAM_END, the file is broken.
+			if (comp_done && *out_pos < out_size)
+				return LZMA_DATA_ERROR;
+
+			// If the decoder has produced all the output but
+			// it still didn't return LZMA_STREAM_END or consume
+			// more input (for example, detecting an end of
+			// payload marker may need more input but produce
+			// no output) the file is broken.
+			if (uncomp_done && *in_pos < in_size)
+				return LZMA_DATA_ERROR;
+		}
 
 		if (!coder->ignore_check)
 			lzma_check_update(&coder->check, coder->block->check,
@@ -230,6 +251,14 @@ lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 					- lzma_check_size(block->check)
 				: block->compressed_size;
 
+	// With Uncompressed Size this is simpler. If Block Header lacks
+	// the size info, then LZMA_VLI_MAX is the maximum possible
+	// Uncompressed Size.
+	coder->uncompressed_limit
+			= block->uncompressed_size == LZMA_VLI_UNKNOWN
+				? LZMA_VLI_MAX
+				: block->uncompressed_size;
+
 	// Initialize the check. It's caller's problem if the Check ID is not
 	// supported, and the Block decoder cannot verify the Check field.
 	// Caller can test lzma_check_is_supported(block->check).
author	Lasse Collin <lasse.collin@tukaani.org>	2022-02-20 20:36:27 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2022-02-20 20:36:27 +0200
commit	1c9a5786d206b4abc8e427326651c8174baea753 (patch)
tree	e504643962b3bd30ffe73fc8d9f67922de193e70 /src
parent	Tests: Add bad-1-lzma2-11.xz. (diff)
download	xz-1c9a5786d206b4abc8e427326651c8174baea753.tar.xz