1 files changed, 174 insertions, 373 deletions
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index ae969d62..5c3f1d18 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -18,351 +18,142 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#include "lz_decoder.h"
+// liblzma supports multiple LZ77-based filters. The LZ part is shared
+// between these filters. The LZ code takes care of dictionary handling
+// and passing the data between filters in the chain. The filter-specific
+// part decodes from the input buffer to the dictionary.
 
 
-/// Minimum size of allocated dictionary
-#define DICT_SIZE_MIN 8192
+#include "lz_decoder.h"
 
-/// When there is less than this amount of data available for decoding,
-/// it is moved to the temporary buffer which
-///   - protects from reads past the end of the buffer; and
-///   - stored the incomplete data between lzma_code() calls.
-///
-/// \note       TEMP_LIMIT must be at least as much as
-///             REQUIRED_IN_BUFFER_SIZE defined in lzma_decoder.c.
-#define TEMP_LIMIT 32
 
-// lzma_lz_decoder.dict[] must be three times the size of TEMP_LIMIT.
-// 2 * TEMP_LIMIT is used for the actual data, and the third TEMP_LIMIT
-// bytes is needed for safety to allow decode_dummy() in lzma_decoder.c
-// to read past end of the buffer. This way it should be both fast and simple.
-#if LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT
-#	error LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT
-#endif
+struct lzma_coder_s {
+	/// Dictionary (history buffer)
+	lzma_dict dict;
 
+	/// The actual LZ-based decoder e.g. LZMA
+	lzma_lz_decoder lz;
 
-struct lzma_coder_s {
+	/// Next filter in the chain, if any. Note that LZMA and LZMA2 are
+	/// only allowed as the last filter, but the long-range filter in
+	/// future can be in the middle of the chain.
 	lzma_next_coder next;
-	lzma_lz_decoder lz;
 
-	// There are more members in this structure but they are not
-	// visible in LZ coder.
+	/// True if the next filter in the chain has returned LZMA_STREAM_END.
+	bool next_finished;
+
+	/// True if the LZ decoder (e.g. LZMA) has detected end of payload
+	/// marker. This may become true before next_finished becomes true.
+	bool this_finished;
+
+        /// Temporary buffer needed when the LZ-based filter is not the last
+        /// filter in the chain. The output of the next filter is first
+        /// decoded into buffer[], which is then used as input for the actual
+        /// LZ-based decoder.
+        struct {
+                size_t pos;
+                size_t size;
+                uint8_t buffer[LZMA_BUFFER_SIZE];
+        } temp;
 };
 
 
-/// - Copy as much data as possible from lz->dict[] to out[].
-/// - Update *out_pos, lz->start, and lz->end accordingly.
-/// - Wrap lz-pos to the beginning of lz->dict[] if there is a danger that
-///   it may go past the end of the buffer (lz->pos >= lz->must_flush_pos).
-static inline bool
-flush(lzma_lz_decoder *restrict lz, uint8_t *restrict out,
-		size_t *restrict out_pos, size_t out_size)
-{
-	// Flush uncompressed data from the history buffer to
-	// the output buffer. This is done in two phases.
-
-	assert(lz->start <= lz->end);
-
-	// Flush if pos < start < end.
-	if (lz->pos < lz->start && lz->start < lz->end) {
-		bufcpy(lz->dict, &lz->start, lz->end, out, out_pos, out_size);
-
-		// If we reached end of the data in history buffer,
-		// wrap to the beginning.
-		if (lz->start == lz->end)
-			lz->start = 0;
-	}
-
-	// Flush if start start < pos <= end. This is not as `else' for
-	// previous `if' because the previous one may make this one true.
-	if (lz->start < lz->pos) {
-		bufcpy(lz->dict, &lz->start,
-				lz->pos, out, out_pos, out_size);
-
-		if (lz->pos >= lz->must_flush_pos) {
-			// Wrap the flushing position if we have
-			// flushed the whole history buffer.
-			if (lz->pos == lz->start)
-				lz->start = 0;
-
-			// Wrap the write position and store to lz.end
-			// how much there is new data available.
-			lz->end = lz->pos;
-			lz->pos = 0;
-			lz->is_full = true;
-		}
-	}
-
-	assert(lz->pos < lz->must_flush_pos);
-
-	return *out_pos == out_size;
-}
-
-
-/// Calculate safe value for lz->limit. If no safe value can be found,
-/// set lz->limit to zero. When flushing, only as little data will be
-/// decoded as is needed to fill the output buffer (lowers both latency
-/// and throughput).
-///
-/// \return     true if there is no space for new uncompressed data.
-///
-static inline bool
-set_limit(lzma_lz_decoder *lz, size_t out_avail, bool flushing)
-{
-	// Set the limit so that writing to dict[limit + match_max_len - 1]
-	// doesn't overwrite any unflushed data and doesn't write past the
-	// end of the dict buffer.
-	if (lz->start <= lz->pos) {
-		// We can fill the buffer from pos till the end
-		// of the dict buffer.
-		lz->limit = lz->must_flush_pos;
-	} else if (lz->pos + lz->match_max_len < lz->start) {
-		// There's some unflushed data between pos and end of the
-		// buffer. Limit so that we don't overwrite the unflushed data.
-		lz->limit = lz->start - lz->match_max_len;
-	} else {
-		// Buffer is too full.
-		lz->limit = 0;
-		return true;
-	}
-
-	// Finetune the limit a bit if it isn't zero.
-
-	assert(lz->limit > lz->pos);
-	const size_t dict_avail = lz->limit - lz->pos;
-
-	if (lz->uncompressed_size < dict_avail) {
-		// Finishing a stream that doesn't have
-		// an end of stream marker.
-		lz->limit = lz->pos + lz->uncompressed_size;
-
-	} else if (flushing && out_avail < dict_avail) {
-		// Flushing enabled, decoding only as little as needed to
-		// fill the out buffer (if there's enough input, of course).
-		lz->limit = lz->pos + out_avail;
-	}
-
-	return lz->limit == lz->pos;
-}
-
-
-/// Takes care of wrapping the data into temporary buffer when needed,
-/// and calls the actual decoder.
-///
-/// \return     true if error occurred
-///
-static inline bool
-call_process(lzma_coder *restrict coder, const uint8_t *restrict in,
-		size_t *restrict in_pos, size_t in_size)
-{
-	// It would be nice and simple if we could just give in[] to the
-	// decoder, but the requirement of zlib-like API forces us to be
-	// able to make *in_pos == in_size whenever there is enough output
-	// space. If needed, we will append a few bytes from in[] to
-	// a temporary buffer and decode enough to reach the part that
-	// was copied from in[]. Then we can continue with the real in[].
-
-	bool error;
-	const size_t dict_old_pos = coder->lz.pos;
-	const size_t in_avail = in_size - *in_pos;
-
-	if (coder->lz.temp_size + in_avail < 2 * TEMP_LIMIT) {
-		// Copy all the available input from in[] to temp[].
-		memcpy(coder->lz.temp + coder->lz.temp_size,
-				in + *in_pos, in_avail);
-		coder->lz.temp_size += in_avail;
-		*in_pos += in_avail;
-		assert(*in_pos == in_size);
-
-		// Decode as much as possible.
-		size_t temp_used = 0;
-		error = coder->lz.process(coder, coder->lz.temp, &temp_used,
-				coder->lz.temp_size, true);
-		assert(temp_used <= coder->lz.temp_size);
-
-		// Move the remaining data to the beginning of temp[].
-		coder->lz.temp_size -= temp_used;
-		memmove(coder->lz.temp, coder->lz.temp + temp_used,
-				coder->lz.temp_size);
-
-	} else if (coder->lz.temp_size > 0) {
-		// Fill temp[] unless it is already full because we aren't
-		// the last filter in the chain.
-		size_t copy_size = 0;
-		if (coder->lz.temp_size < 2 * TEMP_LIMIT) {
-			assert(*in_pos < in_size);
-			copy_size = 2 * TEMP_LIMIT - coder->lz.temp_size;
-			memcpy(coder->lz.temp + coder->lz.temp_size,
-					in + *in_pos, copy_size);
-			// NOTE: We don't update lz.temp_size or *in_pos yet.
-		}
-
-		size_t temp_used = 0;
-		error = coder->lz.process(coder, coder->lz.temp, &temp_used,
-				coder->lz.temp_size + copy_size, false);
-
-		if (temp_used < coder->lz.temp_size) {
-			// Only very little input data was consumed. Move
-			// the unprocessed data to the beginning temp[].
-			coder->lz.temp_size += copy_size - temp_used;
-			memmove(coder->lz.temp, coder->lz.temp + temp_used,
-					coder->lz.temp_size);
-			*in_pos += copy_size;
-			assert(*in_pos <= in_size);
-
-		} else {
-			// We were able to decode so much data that next time
-			// we can decode directly from in[]. That is, we can
-			// consider temp[] to be empty now.
-			*in_pos += temp_used - coder->lz.temp_size;
-			coder->lz.temp_size = 0;
-			assert(*in_pos <= in_size);
-		}
-
-	} else {
-		// Decode directly from in[].
-		error = coder->lz.process(coder, in, in_pos, in_size, false);
-		assert(*in_pos <= in_size);
-	}
-
-	assert(coder->lz.pos >= dict_old_pos);
-	if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
-		// Update uncompressed size.
-		coder->lz.uncompressed_size -= coder->lz.pos - dict_old_pos;
-
-		// Check that End of Payload Marker hasn't been detected
-		// since it must not be present because uncompressed size
-		// is known.
-		if (coder->lz.eopm_detected)
-			error = true;
-	}
-
-	return error;
-}
-
-
 static lzma_ret
 decode_buffer(lzma_coder *coder,
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
-		size_t *restrict out_pos, size_t out_size,
-		bool flushing)
+		size_t *restrict out_pos, size_t out_size)
 {
-	bool stop = false;
-
 	while (true) {
-		// Flush from coder->lz.dict to out[].
-		flush(&coder->lz, out, out_pos, out_size);
-
-		// All done?
-		if (*out_pos == out_size
-				|| stop
-				|| coder->lz.eopm_detected
-				|| coder->lz.uncompressed_size == 0)
-			break;
-
-		// Set write limit in the dictionary.
-		if (set_limit(&coder->lz, out_size - *out_pos, flushing))
-			break;
-
-		// Decode more data.
-		if (call_process(coder, in, in_pos, in_size))
-			return LZMA_DATA_ERROR;
-
-		// Set stop to true if we must not call call_process() again
-		// during this function call.
-		// FIXME: Can this make the loop exist too early? It wouldn't
-		// cause data corruption so not a critical problem. It can
-		// happen if dictionary gets full and lz.temp still contains
-		// a few bytes data that we could decode right now.
-		if (*in_pos == in_size && coder->lz.temp_size <= TEMP_LIMIT
-				&& coder->lz.pos < coder->lz.limit)
-			stop = true;
+		// Wrap the dictionary if needed.
+		if (coder->dict.pos == coder->dict.size)
+			coder->dict.pos = 0;
+
+		// Store the current dictionary position. It is needed to know
+		// where to start copying to the out[] buffer.
+		const size_t dict_start = coder->dict.pos;
+
+		// Calculate how much we allow the process() function to
+		// decode. It must not decode past the end of the dictionary
+		// buffer, and we don't want it to decode more than is
+		// actually needed to fill the out[] buffer.
+		coder->dict.limit = coder->dict.pos + MIN(out_size - *out_pos,
+				coder->dict.size - coder->dict.pos);
+
+		// Call the process() function to do the actual decoding.
+		const lzma_ret ret = coder->lz.code(
+				coder->lz.coder, &coder->dict,
+				in, in_pos, in_size);
+
+		// Copy the decoded data from the dictionary to the out[]
+		// buffer.
+		const size_t copy_size = coder->dict.pos - dict_start;
+		assert(copy_size <= out_size - *out_pos);
+		memcpy(out + *out_pos, coder->dict.buf + dict_start,
+				copy_size);
+		*out_pos += copy_size;
+
+		// Return if everything got decoded or an error occurred, or
+		// if there's no more data to decode.
+		if (ret != LZMA_OK || *out_pos == out_size
+				|| coder->dict.pos < coder->dict.size)
+			return ret;
 	}
-
-	// If we have decoded everything (EOPM detected or uncompressed_size
-	// bytes were processed) to the history buffer, and also flushed
-	// everything from the history buffer, our job is done.
-	if ((coder->lz.eopm_detected
-			|| coder->lz.uncompressed_size == 0)
-			&& coder->lz.start == coder->lz.pos)
-		return LZMA_STREAM_END;
-
-	return LZMA_OK;
 }
 
 
-extern lzma_ret
-lzma_lz_decode(lzma_coder *coder,
+static lzma_ret
+lz_decode(lzma_coder *coder,
 		lzma_allocator *allocator lzma_attribute((unused)),
 		const uint8_t *restrict in, size_t *restrict in_pos,
 		size_t in_size, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size,
 		lzma_action action)
 {
-	if (coder->next.code == NULL) {
-		const lzma_ret ret = decode_buffer(coder, in, in_pos, in_size,
-				out, out_pos, out_size,
-				action == LZMA_SYNC_FLUSH);
-
-		if (*out_pos == out_size || ret == LZMA_STREAM_END) {
-			// Unread to make coder->temp[] empty. This is easy,
-			// because we know that all the data currently in
-			// coder->temp[] has been copied form in[] during this
-			// call to the decoder.
-			//
-			// If we didn't do this, we could have data left in
-			// coder->temp[] when end of stream is reached. That
-			// data could be left there from *previous* call to
-			// the decoder; in that case we wouldn't know where
-			// to put that data.
-			assert(*in_pos >= coder->lz.temp_size);
-			*in_pos -= coder->lz.temp_size;
-			coder->lz.temp_size = 0;
-		}
-
-		return ret;
-	}
+	if (coder->next.code == NULL)
+		return decode_buffer(coder, in, in_pos, in_size,
+				out, out_pos, out_size);
 
 	// We aren't the last coder in the chain, we need to decode
 	// our input to a temporary buffer.
-	const bool flushing = action == LZMA_SYNC_FLUSH;
 	while (*out_pos < out_size) {
-		if (!coder->lz.next_finished
-				&& coder->lz.temp_size < LZMA_BUFFER_SIZE) {
+		// Fill the temporary buffer if it is empty.
+		if (!coder->next_finished
+				&& coder->temp.pos == coder->temp.size) {
+			coder->temp.pos = 0;
+			coder->temp.size = 0;
+
 			const lzma_ret ret = coder->next.code(
 					coder->next.coder,
 					allocator, in, in_pos, in_size,
-					coder->lz.temp, &coder->lz.temp_size,
+					coder->temp.buffer, &coder->temp.size,
 					LZMA_BUFFER_SIZE, action);
 
 			if (ret == LZMA_STREAM_END)
-				coder->lz.next_finished = true;
-			else if (coder->lz.temp_size < LZMA_BUFFER_SIZE
-					|| ret != LZMA_OK)
+				coder->next_finished = true;
+			else if (ret != LZMA_OK || coder->temp.size == 0)
 				return ret;
 		}
 
-		if (coder->lz.this_finished) {
-			if (coder->lz.temp_size != 0)
+		if (coder->this_finished) {
+			if (coder->temp.size != 0)
 				return LZMA_DATA_ERROR;
 
-			if (coder->lz.next_finished)
+			if (coder->next_finished)
 				return LZMA_STREAM_END;
 
 			return LZMA_OK;
 		}
 
-		size_t dummy = 0;
-		const lzma_ret ret = decode_buffer(coder, NULL, &dummy, 0,
-				out, out_pos, out_size, flushing);
+		const lzma_ret ret = decode_buffer(coder, coder->temp.buffer,
+				&coder->temp.pos, coder->temp.size,
+				out, out_pos, out_size);
 
 		if (ret == LZMA_STREAM_END)
-			coder->lz.this_finished = true;
+			coder->this_finished = true;
 		else if (ret != LZMA_OK)
 			return ret;
-		else if (coder->lz.next_finished && *out_pos < out_size)
+		else if (coder->next_finished && *out_pos < out_size)
 			return LZMA_DATA_ERROR;
 	}
 
@@ -370,94 +161,104 @@ lzma_lz_decode(lzma_coder *coder,
 }
 
 
-/// \brief      Initializes LZ part of the LZMA decoder or Inflate
-///
-/// \param      history_size    Number of bytes the LZ out window is
-///                             supposed keep available from the output
-///                             history.
-/// \param      match_max_len   Number of bytes a single decoding loop
-///                             can advance the write position (lz->pos)
-///                             in the history buffer (lz->dict).
-///
-/// \note       This function is called by LZMA decoder and Inflate init()s.
-///             It's up to those functions allocate *lz and initialize it
-///             with LZMA_LZ_DECODER_INIT.
+static void
+lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+	lzma_next_end(&coder->next, allocator);
+	lzma_free(coder->dict.buf, allocator);
+
+	if (coder->lz.end != NULL)
+		coder->lz.end(coder->lz.coder, allocator);
+	else
+		lzma_free(coder->lz.coder, allocator);
+
+	lzma_free(coder, allocator);
+	return;
+}
+
+
 extern lzma_ret
-lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator,
-		bool (*process)(lzma_coder *restrict coder,
-			const uint8_t *restrict in, size_t *restrict in_pos,
-			size_t in_size, bool has_safe_buffer),
-		size_t history_size, size_t match_max_len)
+lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters,
+		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
+			lzma_allocator *allocator, const void *options,
+			size_t *dict_size))
 {
-	// Known uncompressed size is used only with LZMA_Alone files so we
-	// set it always to unknown by default.
-	lz->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
-
-	// Limit the history size to roughly sane values. This is primarily
-	// to prevent integer overflows.
-	if (history_size > UINT32_MAX / 2)
-		return LZMA_HEADER_ERROR;
-
-	// Store the value actually requested. We use it for sanity checks
-	// when repeating data from the history buffer.
-	lz->requested_size = history_size;
-
-	// Avoid tiny history buffer sizes for performance reasons.
-	// TODO: Test if this actually helps...
-	if (history_size < DICT_SIZE_MIN)
-		history_size = DICT_SIZE_MIN;
-
-	// The real size of the history buffer is a bit bigger than
-	// requested by our caller. This allows us to do some optimizations,
-	// which help not only speed but simplicity of the code; specifically,
-	// we can make sure that there is always at least match_max_len
-	// bytes immediatelly available for writing without a need to wrap
-	// the history buffer.
-	const size_t dict_real_size = history_size + 2 * match_max_len + 1;
-
-	// Reallocate memory if needed.
-	if (history_size != lz->size || match_max_len != lz->match_max_len) {
-		// Destroy the old buffer.
-		lzma_lz_decoder_end(lz, allocator);
-
-		lz->size = history_size;
-		lz->match_max_len = match_max_len;
-		lz->must_flush_pos = history_size + match_max_len + 1;
-
-		lz->dict = lzma_alloc(dict_real_size, allocator);
-		if (lz->dict == NULL)
+	// Allocate the base structure if it isn't already allocated.
+	if (next->coder == NULL) {
+		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (next->coder == NULL)
 			return LZMA_MEM_ERROR;
+
+		next->code = &lz_decode;
+		next->end = &lz_decoder_end;
+
+		next->coder->dict.buf = NULL;
+		next->coder->dict.size = 0;
+		next->coder->lz = LZMA_LZ_DECODER_INIT;
+		next->coder->next = LZMA_NEXT_CODER_INIT;
 	}
 
-	// Reset the variables so that lz_get_byte(lz, 0) will return '\0'.
-	lz->pos = 0;
-	lz->start = 0;
-	lz->end = dict_real_size;
-	lz->dict[dict_real_size - 1] = 0;
-	lz->is_full = false;
-	lz->eopm_detected = false;
-	lz->next_finished = false;
-	lz->this_finished = false;
-	lz->temp_size = 0;
-
-	// Clean up the temporary buffer to make it very sure that there are
-	// no information leaks when multiple steams are decoded with the
-	// same decoder structures.
-	memzero(lz->temp, LZMA_BUFFER_SIZE);
-
-	// Set the process function pointer.
-	lz->process = process;
+	// Allocate and initialize the LZ-based decoder. It will also give
+	// us the dictionary size.
+	size_t dict_size;
+	return_if_error(lz_init(&next->coder->lz, allocator,
+			filters[0].options, &dict_size));
+
+	// If the dictionary size is very small, increase it to 4096 bytes.
+	// This is to prevent constant wrapping of the dictionary, which
+	// would slow things down. The downside is that since we don't check
+	// separately for the real dictionary size, we may happily accept
+	// corrupt files.
+	if (dict_size < 4096)
+		dict_size = 4096;
+
+	// Make dictionary size a multipe of 16. Some LZ-based decoders like
+	// LZMA use the lowest bits lzma_dict.pos to know the alignment of the
+	// data. Aligned buffer is also good when memcpying from the
+	// dictionary to the output buffer, since applications are
+	// recommended to give aligned buffers to liblzma.
+	//
+	// Avoid integer overflow. FIXME Should the return value be
+	// LZMA_HEADER_ERROR or LZMA_MEM_ERROR?
+	if (dict_size > SIZE_MAX - 15)
+		return LZMA_MEM_ERROR;
+
+	dict_size = (dict_size + 15) & (SIZE_MAX - 15);
+
+	// Allocate and initialize the dictionary.
+	if (next->coder->dict.size != dict_size) {
+		lzma_free(next->coder->dict.buf, allocator);
+		next->coder->dict.buf = lzma_alloc(dict_size, allocator);
+		if (next->coder->dict.buf == NULL)
+			return LZMA_MEM_ERROR;
 
-	return LZMA_OK;
+		next->coder->dict.size = dict_size;
+	}
+
+	dict_reset(&next->coder->dict);
+
+	// Miscellaneous initializations
+	next->coder->next_finished = false;
+	next->coder->this_finished = false;
+	next->coder->temp.pos = 0;
+	next->coder->temp.size = 0;
+
+	// Initialize the next filter in the chain, if any.
+	return lzma_next_filter_init(&next->coder->next, allocator,
+			filters + 1);
+}
+
+
+extern uint64_t
+lzma_lz_decoder_memusage(size_t dictionary_size)
+{
+	return sizeof(lzma_coder) + (uint64_t)(dictionary_size);
 }
 
 
 extern void
-lzma_lz_decoder_end(lzma_lz_decoder *lz, lzma_allocator *allocator)
+lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
 {
-	lzma_free(lz->dict, allocator);
-	lz->dict = NULL;
-	lz->size = 0;
-	lz->match_max_len = 0;
-	return;
+	coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size);
 }