1 files changed, 413 insertions, 0 deletions
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
new file mode 100644
index 00000000..f9c1e3fe
--- /dev/null
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -0,0 +1,413 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma_encoder.c
+/// \brief      LZMA encoder
+//
+//  Copyright (C) 1999-2006 Igor Pavlov
+//  Copyright (C) 2007 Lasse Collin
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// NOTE: If you want to keep the line length in 80 characters, set
+//       tab width to 4 or less in your editor when editing this file.
+
+
+#include "lzma_encoder_private.h"
+
+
+////////////
+// Macros //
+////////////
+
+// These are as macros mostly because they use local range encoder variables.
+
+#define literal_encode(subcoder, symbol) \
+do { \
+	uint32_t context = 1; \
+	int i = 8; \
+	do { \
+		--i; \
+		const uint32_t bit = ((symbol) >> i) & 1; \
+		bit_encode(subcoder[context], bit); \
+		context = (context << 1) | bit; \
+	} while (i != 0); \
+} while (0)
+
+
+#define literal_encode_matched(subcoder, match_byte, symbol) \
+do { \
+	uint32_t context = 1; \
+	int i = 8; \
+	do { \
+		--i; \
+		uint32_t bit = ((symbol) >> i) & 1; \
+		const uint32_t match_bit = ((match_byte) >> i) & 1; \
+		const uint32_t subcoder_index = 0x100 + (match_bit << 8) + context; \
+		bit_encode(subcoder[subcoder_index], bit); \
+		context = (context << 1) | bit; \
+		if (match_bit != bit) { \
+			while (i != 0) { \
+				--i; \
+				bit = ((symbol) >> i) & 1; \
+				bit_encode(subcoder[context], bit); \
+				context = (context << 1) | bit; \
+			} \
+			break; \
+		} \
+	} while (i != 0); \
+} while (0)
+
+
+#define length_encode(length_encoder, symbol, pos_state, update_price) \
+do { \
+	\
+	if ((symbol) < LEN_LOW_SYMBOLS) { \
+		bit_encode_0((length_encoder).choice); \
+		bittree_encode((length_encoder).low[pos_state], \
+				LEN_LOW_BITS, symbol); \
+	} else { \
+		bit_encode_1((length_encoder).choice); \
+		if ((symbol) < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS) { \
+			bit_encode_0((length_encoder).choice2); \
+			bittree_encode((length_encoder).mid[pos_state], \
+					LEN_MID_BITS, \
+					(symbol) - LEN_LOW_SYMBOLS); \
+		} else { \
+			bit_encode_1((length_encoder).choice2); \
+			bittree_encode((length_encoder).high, LEN_HIGH_BITS, \
+					(symbol) - LEN_LOW_SYMBOLS \
+					- LEN_MID_SYMBOLS); \
+		} \
+	} \
+	if (update_price) \
+		if (--(length_encoder).counters[pos_state] == 0) \
+			lzma_length_encoder_update_table(&(length_encoder), pos_state); \
+} while (0)
+
+
+///////////////
+// Functions //
+///////////////
+
+/// \brief      Updates price table of the length encoder
+///
+/// All all the other prices in LZMA, these are used by lzma_get_optimum().
+///
+extern void
+lzma_length_encoder_update_table(lzma_length_encoder *lencoder,
+		const uint32_t pos_state)
+{
+	const uint32_t num_symbols = lencoder->table_size;
+	const uint32_t a0 = bit_get_price_0(lencoder->choice);
+	const uint32_t a1 = bit_get_price_1(lencoder->choice);
+	const uint32_t b0 = a1 + bit_get_price_0(lencoder->choice2);
+	const uint32_t b1 = a1 + bit_get_price_1(lencoder->choice2);
+
+	uint32_t *prices = lencoder->prices[pos_state];
+	uint32_t i = 0;
+
+	for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i) {
+		prices[i] = a0;
+		bittree_get_price(prices[i], lencoder->low[pos_state],
+				LEN_LOW_BITS, i);
+	}
+
+	for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) {
+		prices[i] = b0;
+		bittree_get_price(prices[i], lencoder->mid[pos_state],
+				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);
+	}
+
+	for (; i < num_symbols; ++i) {
+		prices[i] = b1;
+		bittree_get_price(prices[i], lencoder->high, LEN_HIGH_BITS,
+				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);
+	}
+
+	lencoder->counters[pos_state] = num_symbols;
+
+	return;
+}
+
+
+/**
+ * \brief       LZMA encoder
+ *
+ * \return      true if end of stream was reached, false otherwise.
+ */
+extern bool
+lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size)
+{
+	// Flush the range encoder's temporary buffer to out[].
+	// Return immediatelly if not everything could be flushed.
+	if (rc_flush_buffer(&coder->rc, out, out_pos, out_size))
+		return false;
+
+	// Return immediatelly if we have already finished our work.
+	if (coder->lz.stream_end_was_reached
+			&& coder->is_initialized
+			&& coder->lz.read_pos == coder->lz.write_pos
+			&& coder->additional_offset == 0)
+		return true;
+
+	// Local copies
+	rc_to_local(coder->rc);
+	size_t out_pos_local = *out_pos;
+	const uint32_t pos_mask = coder->pos_mask;
+	const bool best_compression = coder->best_compression;
+
+	// Initialize the stream if no data has been encoded yet.
+	if (!coder->is_initialized) {
+		if (coder->lz.read_pos == coder->lz.read_limit) {
+			// Cannot initialize, because there is no input data.
+			if (!coder->lz.stream_end_was_reached)
+				return false;
+
+			// If we get here, we are encoding an empty file.
+			// Initialization is skipped completely.
+			assert(coder->lz.write_pos == coder->lz.read_pos);
+
+		} else {
+			// Do the actual initialization.
+			uint32_t len;
+			uint32_t num_distance_pairs;
+			lzma_read_match_distances(coder, &len, &num_distance_pairs);
+
+			bit_encode_0(coder->is_match[coder->state][0]);
+			update_char(coder->state);
+
+			const uint8_t cur_byte = coder->lz.buffer[
+					coder->lz.read_pos - coder->additional_offset];
+			probability *subcoder = literal_get_subcoder(coder->literal_coder,
+					coder->now_pos, coder->previous_byte);
+			literal_encode(subcoder, cur_byte);
+
+			coder->previous_byte = cur_byte;
+			--coder->additional_offset;
+			++coder->now_pos;
+
+			assert(coder->additional_offset == 0);
+		}
+
+		// Initialization is done (except if empty file).
+		coder->is_initialized = true;
+	}
+
+	// Encoding loop
+	while (true) {
+		// Check that there is free output space.
+		if (out_pos_local == out_size)
+			break;
+
+		assert(rc_buffer_size == 0);
+
+		// Check that there is some input to process.
+		if (coder->lz.read_pos >= coder->lz.read_limit) {
+			// If end of input has been reached, we must keep
+			// encoding until additional_offset becomes zero.
+			if (!coder->lz.stream_end_was_reached
+					|| coder->additional_offset == 0)
+				break;
+		}
+
+		assert(coder->lz.read_pos <= coder->lz.write_pos);
+
+#ifndef NDEBUG
+		if (coder->lz.stream_end_was_reached) {
+			assert(coder->lz.read_limit == coder->lz.write_pos);
+		} else {
+			assert(coder->lz.read_limit + coder->lz.keep_size_after
+					== coder->lz.write_pos);
+		}
+#endif
+
+		const uint32_t pos_state = coder->now_pos & pos_mask;
+
+		uint32_t pos;
+		uint32_t len;
+
+		// Get optimal match (repeat position and length).
+		// Value ranges for pos:
+		//   - [0, REP_DISTANCES): repeated match
+		//   - [REP_DISTANCES, UINT32_MAX): match at (pos - REP_DISTANCES)
+		//   - UINT32_MAX: not a match but a literal
+		// Value ranges for len:
+		//   - [MATCH_MIN_LEN, MATCH_MAX_LEN]
+		if (best_compression)
+			lzma_get_optimum(coder, &pos, &len);
+		else
+			lzma_get_optimum_fast(coder, &pos, &len);
+
+		if (len == 1 && pos == UINT32_MAX) {
+			// It's a literal.
+			bit_encode_0(coder->is_match[coder->state][pos_state]);
+
+			const uint8_t cur_byte = coder->lz.buffer[
+					coder->lz.read_pos - coder->additional_offset];
+			probability *subcoder = literal_get_subcoder(coder->literal_coder,
+					coder->now_pos, coder->previous_byte);
+
+			if (is_char_state(coder->state)) {
+				literal_encode(subcoder, cur_byte);
+			} else {
+				const uint8_t match_byte = coder->lz.buffer[
+						coder->lz.read_pos
+						- coder->rep_distances[0] - 1
+						- coder->additional_offset];
+				literal_encode_matched(subcoder, match_byte, cur_byte);
+			}
+
+			update_char(coder->state);
+			coder->previous_byte = cur_byte;
+
+		} else {
+			// It's a match.
+			bit_encode_1(coder->is_match[coder->state][pos_state]);
+
+			if (pos < REP_DISTANCES) {
+				// It's a repeated match i.e. the same distance
+				// has been used earlier.
+				bit_encode_1(coder->is_rep[coder->state]);
+
+				if (pos == 0) {
+					bit_encode_0(coder->is_rep0[coder->state]);
+					const uint32_t symbol = (len == 1) ? 0 : 1;
+					bit_encode(coder->is_rep0_long[coder->state][pos_state],
+							symbol);
+				} else {
+					const uint32_t distance = coder->rep_distances[pos];
+					bit_encode_1(coder->is_rep0[coder->state]);
+
+					if (pos == 1) {
+						bit_encode_0(coder->is_rep1[coder->state]);
+					} else {
+						bit_encode_1(coder->is_rep1[coder->state]);
+						bit_encode(coder->is_rep2[coder->state], pos - 2);
+
+						if (pos == 3)
+							coder->rep_distances[3] = coder->rep_distances[2];
+
+						coder->rep_distances[2] = coder->rep_distances[1];
+					}
+
+					coder->rep_distances[1] = coder->rep_distances[0];
+					coder->rep_distances[0] = distance;
+				}
+
+				if (len == 1) {
+					update_short_rep(coder->state);
+				} else {
+					length_encode(coder->rep_match_len_encoder,
+							len - MATCH_MIN_LEN, pos_state,
+							best_compression);
+					update_rep(coder->state);
+				}
+
+			} else {
+				bit_encode_0(coder->is_rep[coder->state]);
+				update_match(coder->state);
+				length_encode(coder->len_encoder, len - MATCH_MIN_LEN,
+						pos_state, best_compression);
+				pos -= REP_DISTANCES;
+
+				const uint32_t pos_slot = get_pos_slot(pos);
+				const uint32_t len_to_pos_state = get_len_to_pos_state(len);
+				bittree_encode(coder->pos_slot_encoder[len_to_pos_state],
+						POS_SLOT_BITS, pos_slot);
+
+				if (pos_slot >= START_POS_MODEL_INDEX) {
+					const uint32_t footer_bits = (pos_slot >> 1) - 1;
+					const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
+					const uint32_t pos_reduced = pos - base;
+
+					if (pos_slot < END_POS_MODEL_INDEX) {
+						bittree_reverse_encode(
+								coder->pos_encoders + base - pos_slot - 1,
+								footer_bits, pos_reduced);
+					} else {
+						rc_encode_direct_bits(pos_reduced >> ALIGN_BITS,
+								footer_bits - ALIGN_BITS);
+						bittree_reverse_encode(coder->pos_align_encoder,
+								ALIGN_BITS, pos_reduced & ALIGN_MASK);
+						++coder->align_price_count;
+					}
+				}
+
+				coder->rep_distances[3] = coder->rep_distances[2];
+				coder->rep_distances[2] = coder->rep_distances[1];
+				coder->rep_distances[1] = coder->rep_distances[0];
+				coder->rep_distances[0] = pos;
+				++coder->match_price_count;
+			}
+
+			coder->previous_byte = coder->lz.buffer[
+					coder->lz.read_pos + len - 1
+					- coder->additional_offset];
+		}
+
+		assert(coder->additional_offset >= len);
+		coder->additional_offset -= len;
+		coder->now_pos += len;
+	}
+
+	// Check if everything is done.
+	bool all_done = false;
+	if (coder->lz.stream_end_was_reached
+			&& coder->lz.read_pos == coder->lz.write_pos
+			&& coder->additional_offset == 0) {
+		// Write end of stream marker. It is encoded as a match with
+		// distance of UINT32_MAX. Match length is needed but it is
+		// ignored by the decoder.
+		if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) {
+			const uint32_t pos_state = coder->now_pos & pos_mask;
+			bit_encode_1(coder->is_match[coder->state][pos_state]);
+			bit_encode_0(coder->is_rep[coder->state]);
+			update_match(coder->state);
+
+			const uint32_t len = MATCH_MIN_LEN; // MATCH_MAX_LEN;
+			length_encode(coder->len_encoder, len - MATCH_MIN_LEN,
+					pos_state, best_compression);
+
+			const uint32_t pos_slot = (1 << POS_SLOT_BITS) - 1;
+			const uint32_t len_to_pos_state = get_len_to_pos_state(len);
+			bittree_encode(coder->pos_slot_encoder[len_to_pos_state],
+					POS_SLOT_BITS, pos_slot);
+
+			const uint32_t footer_bits = 30;
+			const uint32_t pos_reduced
+					= (UINT32_C(1) << footer_bits) - 1;
+			rc_encode_direct_bits(pos_reduced >> ALIGN_BITS,
+					footer_bits - ALIGN_BITS);
+
+			bittree_reverse_encode(coder->pos_align_encoder, ALIGN_BITS,
+					pos_reduced & ALIGN_MASK);
+		}
+
+		// Flush the last bytes of compressed data from
+		// the range coder to the output buffer.
+		rc_flush();
+
+		// All done. Note that some output bytes might be
+		// pending in coder->buffer. lzma_encode() will
+		// take care of those bytes.
+		if (rc_buffer_size == 0)
+			all_done = true;
+	}
+
+	// Store local variables back to *coder.
+	rc_from_local(coder->rc);
+	*out_pos = out_pos_local;
+
+	return all_done;
+}