3 files changed, 173 insertions, 56 deletions
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c
index 629f9df2..8d2277ec 100644
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@@ -141,8 +141,9 @@ lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator,
 		const uint8_t *preset_dictionary,
 		size_t preset_dictionary_size)
 {
-	// Set uncompressed size.
+	lz->sequence = SEQ_RUN;
 	lz->uncompressed_size = uncompressed_size;
+	lz->temp_size = 0;
 
 	///////////////
 	// In Window //
@@ -187,7 +188,6 @@ lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator,
 	lz->read_pos = 0;
 	lz->read_limit = 0;
 	lz->write_pos = 0;
-	lz->stream_end_was_reached = false;
 
 
 	//////////////////
@@ -368,35 +368,59 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
 		size_t *in_pos, size_t in_size, lzma_action action)
 {
 	assert(coder->lz.read_pos <= coder->lz.write_pos);
-	lzma_ret ret;
 
 	// Move the sliding window if needed.
 	if (coder->lz.read_pos >= coder->lz.size - coder->lz.keep_size_after)
 		move_window(&coder->lz);
 
+	size_t in_used;
+	lzma_ret ret;
 	if (coder->next.code == NULL) {
 		// Not using a filter, simply memcpy() as much as possible.
-		bufcpy(in, in_pos, in_size, coder->lz.buffer,
+		in_used = bufcpy(in, in_pos, in_size, coder->lz.buffer,
 				&coder->lz.write_pos, coder->lz.size);
 
-		if (action == LZMA_FINISH && *in_pos == in_size)
+		if (action != LZMA_RUN && *in_pos == in_size)
 			ret = LZMA_STREAM_END;
 		else
 			ret = LZMA_OK;
 
 	} else {
+		const size_t in_start = *in_pos;
 		ret = coder->next.code(coder->next.coder, allocator,
 				in, in_pos, in_size,
 				coder->lz.buffer, &coder->lz.write_pos,
 				coder->lz.size, action);
+		in_used = *in_pos - in_start;
 	}
 
-	// If end of stream has been reached, we allow the encoder to process
-	// all the input (that is, read_pos is allowed to reach write_pos).
-	// Otherwise we keep keep_size_after bytes available as prebuffer.
+	assert(coder->lz.uncompressed_size >= in_used);
+	if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN)
+		coder->lz.uncompressed_size -= in_used;
+
+	// If end of stream has been reached or flushing completed, we allow
+	// the encoder to process all the input (that is, read_pos is allowed
+	// to reach write_pos). Otherwise we keep keep_size_after bytes
+	// available as prebuffer.
 	if (ret == LZMA_STREAM_END) {
-		coder->lz.stream_end_was_reached = true;
+		assert(*in_pos == in_size);
 		coder->lz.read_limit = coder->lz.write_pos;
+		ret = LZMA_OK;
+
+		switch (action) {
+		case LZMA_SYNC_FLUSH:
+			coder->lz.sequence = SEQ_FLUSH;
+			break;
+
+		case LZMA_FINISH:
+			coder->lz.sequence = SEQ_FINISH;
+			break;
+
+		default:
+			assert(0);
+			ret = LZMA_PROG_ERROR;
+			break;
+		}
 
 	} else if (coder->lz.write_pos > coder->lz.keep_size_after) {
 		// This needs to be done conditionally, because if we got
@@ -406,6 +430,19 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
 				- coder->lz.keep_size_after;
 	}
 
+	// Switch to finishing mode if we have got all the input data.
+	// lzma_lz_encode() won't return LZMA_STREAM_END until LZMA_FINISH
+	// is used.
+	//
+	// NOTE: When LZMA is used together with other filters, it is possible
+	// that coder->lz.sequence gets set to SEQ_FINISH before the next
+	// encoder has returned LZMA_STREAM_END. This is somewhat ugly, but
+	// works correctly, because the next encoder cannot have any more
+	// output left to be produced. If it had, then our known Uncompressed
+	// Size would be invalid, which would mean that we have a bad bug.
+	if (ret == LZMA_OK && coder->lz.uncompressed_size == 0)
+		coder->lz.sequence = SEQ_FINISH;
+
 	return ret;
 }
 
@@ -417,20 +454,81 @@ lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator,
 		uint8_t *restrict out, size_t *restrict out_pos,
 		size_t out_size, lzma_action action)
 {
-	while (*out_pos < out_size
-			&& (*in_pos < in_size || action == LZMA_FINISH)) {
-		// Fill the input window if there is no more usable data.
-		if (!coder->lz.stream_end_was_reached && coder->lz.read_pos
-				>= coder->lz.read_limit) {
-			const lzma_ret ret = fill_window(coder, allocator,
-					in, in_pos, in_size, action);
-			if (ret != LZMA_OK && ret != LZMA_STREAM_END)
-				return ret;
+	// Flush the temporary output buffer, which may be used when the
+	// encoder runs of out of space in primary output buffer (the out,
+	// *out_pos, and out_size variables).
+	if (coder->lz.temp_size > 0) {
+		const size_t out_avail = out_size - *out_pos;
+		if (out_avail < coder->lz.temp_size) {
+			// Cannot copy everything. Copy as much as possible
+			// and move the data in lz.temp to the beginning of
+			// that buffer.
+			memcpy(out + *out_pos, coder->lz.temp, out_avail);
+			*out_pos += out_avail;
+			memmove(coder->lz.temp, coder->lz.temp + out_avail,
+					coder->lz.temp_size - out_avail);
+			coder->lz.temp_size -= out_avail;
+			return LZMA_OK;
 		}
 
+		// We can copy everything from coder->lz.temp to out.
+		memcpy(out + *out_pos, coder->lz.temp, coder->lz.temp_size);
+		*out_pos += coder->lz.temp_size;
+		coder->lz.temp_size = 0;
+	}
+
+	if (coder->lz.sequence == SEQ_FLUSH_END) {
+		// During an earlier call to this function, flushing was
+		// otherwise finished except some data was left pending
+		// in coder->lz.buffer. Now we have copied all that data
+		// to the output buffer and can return LZMA_STREAM_END.
+		coder->lz.sequence = SEQ_RUN;
+		assert(action == LZMA_SYNC_FLUSH);
+		return LZMA_STREAM_END;
+	}
+
+	if (coder->lz.sequence == SEQ_END) {
+		// This is like the above flushing case, but for finishing
+		// the encoding.
+		//
+		// NOTE: action is not necesarily LZMA_FINISH; it can
+		// be LZMA_SYNC_FLUSH too in case it is used at the
+		// end of the stream with known Uncompressed Size.
+		return action != LZMA_RUN ? LZMA_STREAM_END : LZMA_OK;
+	}
+
+	while (*out_pos < out_size
+			&& (*in_pos < in_size || action != LZMA_RUN)) {
+		// Read more data to coder->lz.buffer if needed.
+		if (coder->lz.sequence == SEQ_RUN
+				&& coder->lz.read_pos >= coder->lz.read_limit)
+			return_if_error(fill_window(coder, allocator,
+					in, in_pos, in_size, action));
+
 		// Encode
-		if (coder->lz.process(coder, out, out_pos, out_size))
-			return LZMA_STREAM_END;
+		if (coder->lz.process(coder, out, out_pos, out_size)) {
+			if (coder->lz.sequence == SEQ_FLUSH) {
+				assert(action == LZMA_SYNC_FLUSH);
+				if (coder->lz.temp_size == 0) {
+					// Flushing was finished successfully.
+					coder->lz.sequence = SEQ_RUN;
+				} else {
+					// Flushing was otherwise finished,
+					// except that some data was left
+					// into coder->lz.buffer.
+					coder->lz.sequence = SEQ_FLUSH_END;
+				}
+			} else {
+				// NOTE: action may be LZMA_RUN here in case
+				// Uncompressed Size is known and we have
+				// processed all the data already.
+				assert(coder->lz.sequence == SEQ_FINISH);
+				coder->lz.sequence = SEQ_END;
+			}
+
+			return action != LZMA_RUN && coder->lz.temp_size == 0
+					? LZMA_STREAM_END : LZMA_OK;
+		}
 	}
 
 	return LZMA_OK;
diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h
index fe94618b..11d12722 100644
--- a/src/liblzma/lz/lz_encoder.h
+++ b/src/liblzma/lz/lz_encoder.h
@@ -24,11 +24,15 @@
 #include "common.h"
 
 
+#define LZMA_LZ_TEMP_SIZE 64
+
+
 typedef struct lzma_lz_encoder_s lzma_lz_encoder;
 struct lzma_lz_encoder_s {
 	enum {
-		SEQ_INIT,
 		SEQ_RUN,
+		SEQ_FLUSH,
+		SEQ_FLUSH_END,
 		SEQ_FINISH,
 		SEQ_END
 	} sequence;
@@ -36,8 +40,15 @@ struct lzma_lz_encoder_s {
 	bool (*process)(lzma_coder *coder, uint8_t *restrict out,
 			size_t *restrict out_pos, size_t out_size);
 
+	/// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if using EOPM. We need
+	/// to track Uncompressed Size to prevent writing flush marker to the
+	/// very end of stream that doesn't use EOPM.
 	lzma_vli uncompressed_size;
 
+	/// Temporary buffer for range encoder.
+	uint8_t temp[LZMA_LZ_TEMP_SIZE];
+	size_t temp_size;
+
 	///////////////
 	// In Window //
 	///////////////
@@ -84,10 +95,6 @@ struct lzma_lz_encoder_s {
 	/// is allowed to reach write_pos).
 	size_t keep_size_after;
 
-	/// This is set to true once the last byte of the input data has
-	/// been copied to buffer.
-	bool stream_end_was_reached;
-
 	//////////////////
 	// Match Finder //
 	//////////////////
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index f9c1e3fe..2c46b0c5 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -149,20 +149,11 @@ extern bool
 lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 		size_t *restrict out_pos, size_t out_size)
 {
-	// Flush the range encoder's temporary buffer to out[].
-	// Return immediatelly if not everything could be flushed.
-	if (rc_flush_buffer(&coder->rc, out, out_pos, out_size))
-		return false;
-
-	// Return immediatelly if we have already finished our work.
-	if (coder->lz.stream_end_was_reached
-			&& coder->is_initialized
-			&& coder->lz.read_pos == coder->lz.write_pos
-			&& coder->additional_offset == 0)
-		return true;
+#define rc_buffer coder->lz.temp
+#define rc_buffer_size coder->lz.temp_size
 
 	// Local copies
-	rc_to_local(coder->rc);
+	lzma_range_encoder rc = coder->rc;
 	size_t out_pos_local = *out_pos;
 	const uint32_t pos_mask = coder->pos_mask;
 	const bool best_compression = coder->best_compression;
@@ -170,13 +161,30 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 	// Initialize the stream if no data has been encoded yet.
 	if (!coder->is_initialized) {
 		if (coder->lz.read_pos == coder->lz.read_limit) {
-			// Cannot initialize, because there is no input data.
-			if (!coder->lz.stream_end_was_reached)
+			switch (coder->lz.sequence) {
+			case SEQ_RUN:
+				// Cannot initialize, because there is
+				// no input data.
 				return false;
 
-			// If we get here, we are encoding an empty file.
-			// Initialization is skipped completely.
-			assert(coder->lz.write_pos == coder->lz.read_pos);
+			case SEQ_FLUSH:
+				// Nothing to flush. There cannot be a flush
+				// marker when no data has been processed
+				// yet (file format doesn't allow it, and
+				// it would be just waste of space).
+				return true;
+
+			case SEQ_FINISH:
+				// We are encoding an empty file. No need
+				// to initialize the encoder.
+				assert(coder->lz.write_pos == coder->lz.read_pos);
+				break;
+
+			default:
+				// We never get here.
+				assert(0);
+				return true;
+			}
 
 		} else {
 			// Do the actual initialization.
@@ -214,9 +222,10 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 
 		// Check that there is some input to process.
 		if (coder->lz.read_pos >= coder->lz.read_limit) {
-			// If end of input has been reached, we must keep
-			// encoding until additional_offset becomes zero.
-			if (!coder->lz.stream_end_was_reached
+			// If flushing or finishing, we must keep encoding
+			// until additional_offset becomes zero to make
+			// all the input available at output.
+			if (coder->lz.sequence == SEQ_RUN
 					|| coder->additional_offset == 0)
 				break;
 		}
@@ -224,7 +233,7 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 		assert(coder->lz.read_pos <= coder->lz.write_pos);
 
 #ifndef NDEBUG
-		if (coder->lz.stream_end_was_reached) {
+		if (coder->lz.sequence != SEQ_RUN) {
 			assert(coder->lz.read_limit == coder->lz.write_pos);
 		} else {
 			assert(coder->lz.read_limit + coder->lz.keep_size_after
@@ -363,19 +372,21 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 
 	// Check if everything is done.
 	bool all_done = false;
-	if (coder->lz.stream_end_was_reached
+	if (coder->lz.sequence != SEQ_RUN
 			&& coder->lz.read_pos == coder->lz.write_pos
 			&& coder->additional_offset == 0) {
-		// Write end of stream marker. It is encoded as a match with
-		// distance of UINT32_MAX. Match length is needed but it is
-		// ignored by the decoder.
-		if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) {
+		if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN
+				|| coder->lz.sequence == SEQ_FLUSH) {
+			// Write special marker: flush marker or end of payload
+			// marker. Both are encoded as a match with distance of
+			// UINT32_MAX. The match length codes the type of the marker.
 			const uint32_t pos_state = coder->now_pos & pos_mask;
 			bit_encode_1(coder->is_match[coder->state][pos_state]);
 			bit_encode_0(coder->is_rep[coder->state]);
 			update_match(coder->state);
 
-			const uint32_t len = MATCH_MIN_LEN; // MATCH_MAX_LEN;
+			const uint32_t len = coder->lz.sequence == SEQ_FLUSH
+					? LEN_SPECIAL_FLUSH : LEN_SPECIAL_EOPM;
 			length_encode(coder->len_encoder, len - MATCH_MIN_LEN,
 					pos_state, best_compression);
 
@@ -398,15 +409,16 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 		// the range coder to the output buffer.
 		rc_flush();
 
+		rc_reset(rc);
+
 		// All done. Note that some output bytes might be
-		// pending in coder->buffer. lzma_encode() will
+		// pending in coder->lz.temp. lzma_lz_encode() will
 		// take care of those bytes.
-		if (rc_buffer_size == 0)
-			all_done = true;
+		all_done = true;
 	}
 
 	// Store local variables back to *coder.
-	rc_from_local(coder->rc);
+	coder->rc = rc;
 	*out_pos = out_pos_local;
 
 	return all_done;