Sort of garbage collection commit. :-| Many things are still

broken. API has changed a lot and it will still change a little more here and there. The command line tool doesn't have all the required changes to reflect the API changes, so it's easy to get "internal error" or trigger assertions.
author: Lasse Collin <lasse.collin@tukaani.org> 2008-08-28 22:53:15 +0300
committer: Lasse Collin <lasse.collin@tukaani.org> 2008-08-28 22:53:15 +0300
commit: 3b34851de1eaf358cf9268922fa0eeed8278d680 (patch)
tree: 7bab212af647541df64227a8d350d17a2e789f6b /src/liblzma/lzma
parent: Fix test_filter_flags to match the new restriction of lc+lp. (diff)
download: xz-3b34851de1eaf358cf9268922fa0eeed8278d680.tar.xz
21 files changed, 3404 insertions, 2343 deletions
diff --git a/src/liblzma/lzma/Makefile.am b/src/liblzma/lzma/Makefile.am
index 59ded214..7aeceb63 100644
--- a/src/liblzma/lzma/Makefile.am
+++ b/src/liblzma/lzma/Makefile.am
@@ -14,37 +14,46 @@
 
 EXTRA_DIST = fastpos_tablegen.c
 
-noinst_LTLIBRARIES = liblzma4.la
-liblzma4_la_CPPFLAGS = \
+## Using liblzma2 since liblzma is already used for the final library.
+noinst_LTLIBRARIES = liblzma2.la
+liblzma2_la_CPPFLAGS = \
 	-I@top_srcdir@/src/liblzma/api \
 	-I@top_srcdir@/src/liblzma/common \
 	-I@top_srcdir@/src/liblzma/lz \
 	-I@top_srcdir@/src/liblzma/rangecoder
 
-liblzma4_la_SOURCES = \
-	lzma_common.h \
-	lzma_literal.c \
-	lzma_literal.h
+liblzma2_la_SOURCES = lzma_common.h
 
-if COND_MAIN_ENCODER
-liblzma4_la_SOURCES += \
+if COND_ENCODER_LZMA
+liblzma2_la_SOURCES += \
 	fastpos.h \
 	lzma_encoder.h \
 	lzma_encoder.c \
 	lzma_encoder_presets.c \
 	lzma_encoder_private.h \
-	lzma_encoder_init.c \
 	lzma_encoder_features.c \
-	lzma_encoder_getoptimum.c \
-	lzma_encoder_getoptimumfast.c
+	lzma_encoder_optimum_fast.c \
+	lzma_encoder_optimum_normal.c
 
 if !COND_SMALL
-liblzma4_la_SOURCES += fastpos_table.c
+liblzma2_la_SOURCES += fastpos_table.c
 endif
 endif
 
-if COND_MAIN_DECODER
-liblzma4_la_SOURCES += \
+if COND_DECODER_LZMA
+liblzma2_la_SOURCES += \
 	lzma_decoder.c \
 	lzma_decoder.h
 endif
+
+if COND_ENCODER_LZMA2
+liblzma2_la_SOURCES += \
+	lzma2_encoder.c \
+	lzma2_encoder.h
+endif
+
+if COND_DECODER_LZMA2
+liblzma2_la_SOURCES += \
+	lzma2_decoder.c \
+	lzma2_decoder.h
+endif
diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h
index 57a94556..503be275 100644
--- a/src/liblzma/lzma/fastpos.h
+++ b/src/liblzma/lzma/fastpos.h
@@ -81,8 +81,6 @@
 // I'm making the table version the default, because that has good speed
 // on all systems I have tried. The size optimized version is sometimes
 // slightly faster, but sometimes it is a lot slower.
-//
-// Finally, this code isn't a major bottle neck in LZMA encoding anyway.
 
 #ifdef HAVE_SMALL
 #	include "bsr.h"
@@ -135,11 +133,7 @@ get_pos_slot(uint32_t pos)
 static inline uint32_t
 get_pos_slot_2(uint32_t pos)
 {
-	// FIXME: This assert() cannot be enabled at the moment, because
-	// lzma_getoptimum.c calls this function so that this assertion
-	// fails; however, it ignores the result of this function when
-	// this assert() would have failed.
-	// assert(pos >= FULL_DISTANCES);
+	assert(pos >= FULL_DISTANCES);
 
 	if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
 		return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0);
diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c
new file mode 100644
index 00000000..b16c40ce
--- /dev/null
+++ b/src/liblzma/lzma/lzma2_decoder.c
@@ -0,0 +1,318 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma2_decoder.c
+/// \brief      LZMA2 decoder
+//
+//  Copyright (C) 1999-2008 Igor Pavlov
+//  Copyright (C) 2008 Lasse Collin
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma2_decoder.h"
+#include "lz_decoder.h"
+#include "lzma_decoder.h"
+
+
+struct lzma_coder_s {
+	enum sequence {
+		SEQ_CONTROL,
+		SEQ_UNCOMPRESSED_1,
+		SEQ_UNCOMPRESSED_2,
+		SEQ_COMPRESSED_0,
+		SEQ_COMPRESSED_1,
+		SEQ_PROPERTIES,
+		SEQ_LZMA,
+		SEQ_COPY,
+	} sequence;
+
+	/// Sequence after the size fields have been decoded.
+	enum sequence next_sequence;
+
+	/// LZMA decoder
+	lzma_lz_decoder lzma;
+
+	/// Uncompressed size of LZMA chunk
+	size_t uncompressed_size;
+
+	/// Compressed size of the chunk (naturally equals to uncompressed
+	/// size of uncompressed chunk)
+	size_t compressed_size;
+
+	/// True if properties are needed. This is false before the
+	/// first LZMA chunk.
+	bool need_properties;
+
+	/// True if dictionary reset is needed. This is false before the
+	/// first chunk (LZMA or uncompressed).
+	bool need_dictionary_reset;
+
+	lzma_options_lzma options;
+};
+
+
+static lzma_ret
+lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size)
+{
+	// With SEQ_LZMA it is possible that no new input is needed to do
+	// some progress. The rest of the sequences assume that there is
+	// at least one byte of input.
+	while (*in_pos < in_size || coder->sequence == SEQ_LZMA)
+	switch (coder->sequence) {
+	case SEQ_CONTROL:
+		if (in[*in_pos] & 0x80) {
+			// Get the highest five bits of uncompressed size.
+			coder->uncompressed_size
+					= (uint32_t)(in[*in_pos] & 0x1F) << 16;
+			coder->sequence = SEQ_UNCOMPRESSED_1;
+
+			// See if we need to reset dictionary or state.
+			switch ((in[(*in_pos)++] >> 5) & 3) {
+			case 3:
+				dict_reset(dict);
+				coder->need_dictionary_reset = false;
+
+			// Fall through
+
+			case 2:
+				if (coder->need_dictionary_reset)
+					return LZMA_DATA_ERROR;
+
+				coder->need_properties = false;
+				coder->next_sequence = SEQ_PROPERTIES;
+				break;
+
+			case 1:
+				if (coder->need_properties)
+					return LZMA_DATA_ERROR;
+
+				coder->lzma.reset(coder->lzma.coder,
+						&coder->options);
+
+				coder->next_sequence = SEQ_LZMA;
+				break;
+
+			case 0:
+				if (coder->need_properties)
+					return LZMA_DATA_ERROR;
+
+				coder->next_sequence = SEQ_LZMA;
+				break;
+			}
+
+		} else {
+			switch (in[(*in_pos)++]) {
+			case 0:
+				// End of payload marker
+				return LZMA_STREAM_END;
+
+			case 1:
+				// Dictionary reset
+				dict_reset(dict);
+				coder->need_dictionary_reset = false;
+
+			// Fall through
+
+			case 2:
+				if (coder->need_dictionary_reset)
+					return LZMA_DATA_ERROR;
+
+				// Uncompressed chunk; we need to read total
+				// size first.
+				coder->sequence = SEQ_COMPRESSED_0;
+				coder->next_sequence = SEQ_COPY;
+				break;
+
+			default:
+				return LZMA_DATA_ERROR;
+			}
+		}
+
+		break;
+
+	case SEQ_UNCOMPRESSED_1:
+		coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8;
+		coder->sequence = SEQ_UNCOMPRESSED_2;
+		break;
+
+	case SEQ_UNCOMPRESSED_2:
+		coder->uncompressed_size += in[(*in_pos)++] + 1;
+		coder->sequence = SEQ_COMPRESSED_0;
+		coder->lzma.set_uncompressed(coder->lzma.coder,
+				coder->uncompressed_size);
+		break;
+
+	case SEQ_COMPRESSED_0:
+		coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8;
+		coder->sequence = SEQ_COMPRESSED_1;
+		break;
+
+	case SEQ_COMPRESSED_1:
+		coder->compressed_size += in[(*in_pos)++] + 1;
+		coder->sequence = coder->next_sequence;
+		break;
+
+	case SEQ_PROPERTIES:
+		if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++]))
+			return LZMA_DATA_ERROR;
+
+		coder->lzma.reset(coder->lzma.coder, &coder->options);
+
+		coder->sequence = SEQ_LZMA;
+		break;
+
+	case SEQ_LZMA: {
+		// Store the start offset so that we can update
+		// coder->compressed_size later.
+		const size_t in_start = *in_pos;
+
+		// Decode from in[] to *dict.
+		const lzma_ret ret = coder->lzma.code(coder->lzma.coder,
+				dict, in, in_pos, in_size);
+
+		// Validate and update coder->compressed_size.
+		const size_t in_used = *in_pos - in_start;
+		if (in_used > coder->compressed_size)
+			return LZMA_DATA_ERROR;
+
+		coder->compressed_size -= in_used;
+
+		// Return if we didn't finish the chunk, or an error occurred.
+		if (ret != LZMA_STREAM_END)
+			return ret;
+
+		// The LZMA decoder must have consumed the whole chunk now.
+		// We don't need to worry about uncompressed size since it
+		// is checked by the LZMA decoder.
+		if (coder->compressed_size != 0)
+			return LZMA_DATA_ERROR;
+
+		coder->sequence = SEQ_CONTROL;
+		break;
+	}
+
+	case SEQ_COPY: {
+		// Copy from input to the dictionary as is.
+		// FIXME Can copy too much?
+		dict_write(dict, in, in_pos, in_size, &coder->compressed_size);
+		if (coder->compressed_size != 0)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_CONTROL;
+		break;
+	}
+
+	default:
+		assert(0);
+		return LZMA_PROG_ERROR;
+	}
+
+	return LZMA_OK;
+}
+
+
+static void
+lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+	assert(coder->lzma.end == NULL);
+	lzma_free(coder->lzma.coder, allocator);
+
+	lzma_free(coder, allocator);
+
+	return;
+}
+
+
+static lzma_ret
+lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
+		const void *options, size_t *dict_size)
+{
+	if (lz->coder == NULL) {
+		lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (lz->coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		lz->code = &lzma2_decode;
+		lz->end = &lzma2_decoder_end;
+
+		lz->coder->lzma = LZMA_LZ_DECODER_INIT;
+	}
+
+	lz->coder->sequence = SEQ_CONTROL;
+	lz->coder->need_properties = true;
+	lz->coder->need_dictionary_reset = true;
+
+	return lzma_lzma_decoder_create(&lz->coder->lzma,
+			allocator, options, dict_size);
+}
+
+
+extern lzma_ret
+lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	// LZMA2 can only be the last filter in the chain. This is enforced
+	// by the raw_decoder initialization.
+	assert(filters[1].init == NULL);
+
+	return lzma_lz_decoder_init(next, allocator, filters,
+			&lzma2_decoder_init);
+}
+
+
+extern uint64_t
+lzma_lzma2_decoder_memusage(const void *options)
+{
+	const uint64_t lzma_memusage = lzma_lzma_decoder_memusage(options);
+	if (lzma_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	return sizeof(lzma_coder) + lzma_memusage;
+}
+
+
+extern lzma_ret
+lzma_lzma2_props_decode(void **options, lzma_allocator *allocator,
+		const uint8_t *props, size_t props_size)
+{
+	if (props_size != 1)
+		return LZMA_HEADER_ERROR;
+
+	// Check that reserved bits are unset.
+	if (props[0] & 0xC0)
+		return LZMA_HEADER_ERROR;
+
+	// Decode the dictionary size.
+	if (props[0] > 40)
+		return LZMA_HEADER_ERROR;
+
+	lzma_options_lzma *opt = lzma_alloc(
+			sizeof(lzma_options_lzma), allocator);
+	if (opt == NULL)
+		return LZMA_MEM_ERROR;
+
+	if (props[0] == 40) {
+		opt->dictionary_size = UINT32_MAX;
+	} else {
+		opt->dictionary_size = 2 | (props[0] & 1);
+		opt->dictionary_size <<= props[0] / 2 + 11;
+	}
+
+	opt->preset_dictionary = NULL;
+	opt->preset_dictionary_size = 0;
+
+	*options = opt;
+
+	return LZMA_OK;
+}
diff --git a/src/liblzma/lzma/lzma2_decoder.h b/src/liblzma/lzma/lzma2_decoder.h
new file mode 100644
index 00000000..a7504863
--- /dev/null
+++ b/src/liblzma/lzma/lzma2_decoder.h
@@ -0,0 +1,35 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma2_decoder.h
+/// \brief      LZMA2 decoder
+//
+//  Copyright (C) 1999-2008 Igor Pavlov
+//  Copyright (C) 2008 Lasse Collin
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_LZMA2_DECODER_H
+#define LZMA_LZMA2_DECODER_H
+
+#include "common.h"
+
+extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next,
+		lzma_allocator *allocator, const lzma_filter_info *filters);
+
+extern uint64_t lzma_lzma2_decoder_memusage(const void *options);
+
+extern lzma_ret lzma_lzma2_props_decode(
+		void **options, lzma_allocator *allocator,
+		const uint8_t *props, size_t props_size);
+
+#endif
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
new file mode 100644
index 00000000..b2cd176b
--- /dev/null
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -0,0 +1,406 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma2_encoder.c
+/// \brief      LZMA2 encoder
+//
+//  Copyright (C) 1999-2008 Igor Pavlov
+//  Copyright (C) 2008 Lasse Collin
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lz_encoder.h"
+#include "lzma_encoder.h"
+#include "fastpos.h"
+#include "lzma2_encoder.h"
+
+
+/// Maximum number of bytes of actual data per chunk (no headers)
+#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16)
+
+/// Maximum uncompressed size of LZMA chunk (no headers)
+#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21)
+
+/// Maximum size of LZMA2 headers
+#define LZMA2_HEADER_MAX 6
+
+/// Size of a header for uncompressed chunk
+#define LZMA2_HEADER_UNCOMPRESSED 3
+
+
+struct lzma_coder_s {
+	enum {
+		SEQ_INIT,
+		SEQ_LZMA_ENCODE,
+		SEQ_LZMA_COPY,
+		SEQ_UNCOMPRESSED_HEADER,
+		SEQ_UNCOMPRESSED_COPY,
+	} sequence;
+
+	/// LZMA encoder
+	lzma_coder *lzma;
+
+	/// If this is not NULL, we will check new options from this
+	/// structure when starting a new chunk.
+	const lzma_options_lzma *opt_new;
+
+	/// LZMA options currently in use.
+	lzma_options_lzma opt_cur;
+
+	bool need_properties;
+	bool need_state_reset;
+	bool need_dictionary_reset;
+
+	/// Uncompressed size of a chunk
+	size_t uncompressed_size;
+
+	/// Compressed size of a chunk (excluding headers); this is also used
+	/// to indicate the end of buf[] in SEQ_LZMA_COPY.
+	size_t compressed_size;
+
+	/// Read position in buf[]
+	size_t buf_pos;
+
+	/// Buffer to hold the chunk header and LZMA compressed data
+	uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX];
+};
+
+
+static void
+lzma2_header_lzma(lzma_coder *coder)
+{
+	assert(coder->uncompressed_size > 0);
+	assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
+	assert(coder->compressed_size > 0);
+	assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
+
+	size_t pos;
+
+	if (coder->need_properties) {
+		pos = 0;
+
+		if (coder->need_dictionary_reset)
+			coder->buf[pos] = 0x80 + (3 << 5);
+		else
+			coder->buf[pos] = 0x80 + (2 << 5);
+	} else {
+		pos = 1;
+
+		if (coder->need_state_reset)
+			coder->buf[pos] = 0x80 + (1 << 5);
+		else
+			coder->buf[pos] = 0x80;
+	}
+
+	// Set the start position for copying.
+	coder->buf_pos = pos;
+
+	// Uncompressed size
+	size_t size = coder->uncompressed_size - 1;
+	coder->buf[pos++] += size >> 16;
+	coder->buf[pos++] = (size >> 8) & 0xFF;
+	coder->buf[pos++] = size & 0xFF;
+
+	// Compressed size
+	size = coder->compressed_size - 1;
+	coder->buf[pos++] = size >> 8;
+	coder->buf[pos++] = size & 0xFF;
+
+	// Properties, if needed
+	if (coder->need_properties)
+		lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos);
+
+	coder->need_properties = false;
+	coder->need_state_reset = false;
+	coder->need_dictionary_reset = false;
+
+	// The copying code uses coder->compressed_size to indicate the end
+	// of coder->buf[], so we need add the maximum size of the header here.
+	coder->compressed_size += LZMA2_HEADER_MAX;
+
+	return;
+}
+
+
+static void
+lzma2_header_uncompressed(lzma_coder *coder)
+{
+	assert(coder->uncompressed_size > 0);
+	assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX);
+
+	// If this is the first chunk, we need to include dictionary
+	// reset indicator.
+	if (coder->need_dictionary_reset)
+		coder->buf[0] = 1;
+	else
+		coder->buf[0] = 2;
+
+	coder->need_dictionary_reset = false;
+
+	// "Compressed" size
+	coder->buf[1] = (coder->uncompressed_size - 1) >> 8;
+	coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF;
+
+	// Set the start position for copying.
+	coder->buf_pos = 0;
+	return;
+}
+
+
+static lzma_ret
+lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint8_t *restrict out, size_t *restrict out_pos,
+		size_t out_size)
+{
+	while (*out_pos < out_size)
+	switch (coder->sequence) {
+	case SEQ_INIT:
+		// If there's no input left and we are flushing or finishing,
+		// don't start a new chunk.
+		if (mf_unencoded(mf) == 0) {
+			// Write end of payload marker if finishing.
+			if (mf->action == LZMA_FINISH)
+				out[(*out_pos)++] = 0;
+
+			return mf->action == LZMA_RUN
+					? LZMA_OK : LZMA_STREAM_END;
+		}
+
+		// Look if there are new options. At least for now,
+		// only lc/lp/pb can be changed.
+		if (coder->opt_new != NULL
+				&& (coder->opt_cur.literal_context_bits
+					!= coder->opt_new->literal_context_bits
+				|| coder->opt_cur.literal_pos_bits
+					!= coder->opt_new->literal_pos_bits
+				|| coder->opt_cur.pos_bits
+					!= coder->opt_new->pos_bits)) {
+			// Options have been changed, copy them to opt_cur.
+			coder->opt_cur.literal_context_bits
+					= coder->opt_new->literal_context_bits;
+			coder->opt_cur.literal_pos_bits
+					= coder->opt_new->literal_pos_bits;
+			coder->opt_cur.pos_bits
+					= coder->opt_new->pos_bits;
+
+			// We need to write the new options and reset
+			// the encoder state.
+			coder->need_properties = true;
+			coder->need_state_reset = true;
+		}
+
+		if (coder->need_state_reset)
+			lzma_lzma_encoder_reset(coder->lzma, &coder->opt_cur);
+
+		coder->uncompressed_size = 0;
+		coder->compressed_size = 0;
+		coder->sequence = SEQ_LZMA_ENCODE;
+
+	// Fall through
+
+	case SEQ_LZMA_ENCODE: {
+		// Calculate how much more uncompressed data this chunk
+		// could accept.
+		const uint32_t left = LZMA2_UNCOMPRESSED_MAX
+				- coder->uncompressed_size;
+		uint32_t limit;
+
+		if (left < mf->match_len_max) {
+			// Must flush immediatelly since the next LZMA symbol
+			// could make the uncompressed size of the chunk too
+			// big.
+			limit = 0;
+		} else {
+			// Calculate maximum read_limit that is OK from point
+			// of view of LZMA2 chunk size.
+			limit = mf->read_pos - mf->read_ahead
+					+ left - mf->match_len_max;
+		}
+
+		// Save the start position so that we can update
+		// coder->uncompressed_size.
+		const uint32_t read_start = mf->read_pos - mf->read_ahead;
+
+		// Call the LZMA encoder until the chunk is finished.
+		const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf,
+				coder->buf + LZMA2_HEADER_MAX,
+				&coder->compressed_size,
+				LZMA2_CHUNK_MAX, limit);
+
+		coder->uncompressed_size += mf->read_pos - mf->read_ahead
+				- read_start;
+
+		assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
+		assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
+
+		if (ret != LZMA_STREAM_END)
+			return LZMA_OK;
+
+		// See if the chunk compressed. If it didn't, we encode it
+		// as uncompressed chunk. This saves a few bytes of space
+		// and makes decoding faster.
+		if (coder->compressed_size >= coder->uncompressed_size) {
+			coder->uncompressed_size += mf->read_ahead;
+			assert(coder->uncompressed_size
+					<= LZMA2_UNCOMPRESSED_MAX);
+			mf->read_ahead = 0;
+			lzma2_header_uncompressed(coder);
+			coder->need_state_reset = true;
+			coder->sequence = SEQ_UNCOMPRESSED_HEADER;
+			break;
+		}
+
+		// The chunk did compress at least by one byte, so we store
+		// the chunk as LZMA.
+		lzma2_header_lzma(coder);
+
+		coder->sequence = SEQ_LZMA_COPY;
+	}
+
+	// Fall through
+
+	case SEQ_LZMA_COPY:
+		// Copy the compressed chunk along its headers to the
+		// output buffer.
+		lzma_bufcpy(coder->buf, &coder->buf_pos,
+				coder->compressed_size,
+				out, out_pos, out_size);
+		if (coder->buf_pos != coder->compressed_size)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_INIT;
+		break;
+
+	case SEQ_UNCOMPRESSED_HEADER:
+		// Copy the three-byte header to indicate uncompressed chunk.
+		lzma_bufcpy(coder->buf, &coder->buf_pos,
+				LZMA2_HEADER_UNCOMPRESSED,
+				out, out_pos, out_size);
+		if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_UNCOMPRESSED_COPY;
+
+	// Fall through
+
+	case SEQ_UNCOMPRESSED_COPY:
+		// Copy the uncompressed data as is from the dictionary
+		// to the output buffer.
+		mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size);
+		if (coder->uncompressed_size != 0)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_INIT;
+		break;
+	}
+
+	return LZMA_OK;
+}
+
+
+static void
+lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+	lzma_free(coder->lzma, allocator);
+	lzma_free(coder, allocator);
+	return;
+}
+
+
+static lzma_ret
+lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
+		const void *options, lzma_lz_options *lz_options)
+{
+	if (options == NULL)
+		return LZMA_PROG_ERROR;
+
+	if (lz->coder == NULL) {
+		lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (lz->coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		lz->code = &lzma2_encode;
+		lz->end = &lzma2_encoder_end;
+
+		lz->coder->lzma = NULL;
+	}
+
+	lz->coder->sequence = SEQ_INIT;
+	lz->coder->need_properties = true;
+	lz->coder->need_state_reset = false;
+	lz->coder->need_dictionary_reset = true;
+
+	lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
+	lz->coder->opt_new = lz->coder->opt_cur.persistent
+				? options : NULL;
+
+	// Initialize LZMA encoder
+	return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
+			&lz->coder->opt_cur, lz_options));
+
+	// Make sure that we will always have enough history available in
+	// case we need to use uncompressed chunks. They are used when the
+	// compressed size of a chunk is not smaller than the uncompressed
+	// size, so we need to have at least LZMA2_COMPRESSED_MAX bytes
+	// history available.
+	if (lz_options->before_size + lz_options->dictionary_size
+			< LZMA2_CHUNK_MAX)
+		lz_options->before_size = LZMA2_CHUNK_MAX
+				- lz_options->dictionary_size;
+
+	return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	return lzma_lz_encoder_init(
+			next, allocator, filters, &lzma2_encoder_init);
+}
+
+
+extern uint64_t
+lzma_lzma2_encoder_memusage(const void *options)
+{
+	const uint64_t lzma_memusage = lzma_lzma_encoder_memusage(options);
+	if (lzma_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	return sizeof(lzma_coder) + lzma_memusage;
+}
+
+
+extern lzma_ret
+lzma_lzma2_props_encode(const void *options, uint8_t *out)
+{
+	const lzma_options_lzma *const opt = options;
+	uint32_t d = MAX(opt->dictionary_size, LZMA_DICTIONARY_SIZE_MIN);
+
+	// Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending
+	// on which one is the next:
+	--d;
+	d |= d >> 2;
+	d |= d >> 3;
+	d |= d >> 4;
+	d |= d >> 8;
+	d |= d >> 16;
+
+	// Get the highest two bits using the proper encoding:
+	if (d == UINT32_MAX)
+		out[0] = 40;
+	else
+		out[0] = get_pos_slot(d + 1) - 24;
+
+	return LZMA_OK;
+}
diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/lzma/lzma2_encoder.h
index 0a27f3dc..3e27f680 100644
--- a/src/liblzma/common/raw_common.h
+++ b/src/liblzma/lzma/lzma2_encoder.h
@@ -1,9 +1,10 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
-/// \file       raw_common.h
-/// \brief      Stuff shared between raw encoder and raw decoder
+/// \file       lzma2_encoder.h
+/// \brief      LZMA2 encoder
 //
-//  Copyright (C) 2007 Lasse Collin
+//  Copyright (C) 1999-2008 Igor Pavlov
+//  Copyright (C) 2008 Lasse Collin
 //
 //  This library is free software; you can redistribute it and/or
 //  modify it under the terms of the GNU Lesser General Public
@@ -17,14 +18,17 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-#ifndef LZMA_RAW_COMMON_H
-#define LZMA_RAW_COMMON_H
+#ifndef LZMA_LZMA2_ENCODER_H
+#define LZMA_LZMA2_ENCODER_H
 
 #include "common.h"
 
-extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next,
-		lzma_allocator *allocator, const lzma_options_filter *options,
-		lzma_init_function (*get_function)(lzma_vli id),
-		bool is_encoder);
+extern lzma_ret lzma_lzma2_encoder_init(
+		lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters);
+
+extern uint64_t lzma_lzma2_encoder_memusage(const void *options);
+
+extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out);
 
 #endif
diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h
index f677fcce..6909969b 100644
--- a/src/liblzma/lzma/lzma_common.h
+++ b/src/liblzma/lzma/lzma_common.h
@@ -22,81 +22,31 @@
 #define LZMA_LZMA_COMMON_H
 
 #include "common.h"
-#include "lzma_literal.h"
 #include "range_common.h"
 
 
-///////////////
-// Constants //
-///////////////
-
-#define REP_DISTANCES 4
-
-#define POS_SLOT_BITS 6
-#define DICT_LOG_SIZE_MAX 30
-#define DIST_TABLE_SIZE_MAX (DICT_LOG_SIZE_MAX * 2)
-#if (UINT32_C(1) << DICT_LOG_SIZE_MAX) != LZMA_DICTIONARY_SIZE_MAX
-#	error DICT_LOG_SIZE_MAX is inconsistent with LZMA_DICTIONARY_SIZE_MAX
-#endif
-
-// 2 is for speed optimization
-#define LEN_TO_POS_STATES_BITS 2
-#define LEN_TO_POS_STATES (1 << LEN_TO_POS_STATES_BITS)
-
-#define MATCH_MIN_LEN 2
-
-#define ALIGN_BITS 4
-#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS)
-#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1)
-
-#define START_POS_MODEL_INDEX 4
-#define END_POS_MODEL_INDEX 14
-#define POS_MODELS (END_POS_MODEL_INDEX - START_POS_MODEL_INDEX)
-
-#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2)
-#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
+///////////////////
+// Miscellaneous //
+///////////////////
 
+/// Maximum number of position states. A position state is the lowest pos bits
+/// number of bits of the current uncompressed offset. In some places there
+/// are different sets of probabilities for different pos states.
 #define POS_STATES_MAX (1 << LZMA_POS_BITS_MAX)
 
 
-// Length coder & Length price table encoder
-#define LEN_LOW_BITS 3
-#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
-#define LEN_MID_BITS 3
-#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
-#define LEN_HIGH_BITS 8
-#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
-#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
-#define LEN_SPEC_SYMBOLS (LOW_LOW_SYMBOLS + LEN_MID_LEN_SYMBOLS)
-#define MATCH_MAX_LEN (MATCH_MIN_LEN + LEN_SYMBOLS - 1)
-
-// Total number of probs in a Len Encoder
-#define LEN_CODER_TOTAL_PROBS (LEN_HIGH_CODER + LEN_HIGH_SYMBOLS)
-
-// Price table size of Len Encoder
-#define LEN_PRICES (LEN_SYMBOLS << LZMA_POS_BITS_MAX)
-
-// Special lengths used together with distance == UINT32_MAX
-#define LEN_SPECIAL_EOPM MATCH_MIN_LEN
-#define LEN_SPECIAL_FLUSH (LEN_SPECIAL_EOPM + 1)
-
-
-// Optimal - Number of entries in the optimum array.
-#define OPTS (1 << 12)
-
-
-// Miscellaneous
-#define INFINITY_PRICE 0x0FFFFFFF
-
-
-////////////
-// Macros //
-////////////
-
-#define get_len_to_pos_state(len) \
-	((len) < LEN_TO_POS_STATES + MATCH_MIN_LEN \
-		? (len) - MATCH_MIN_LEN \
-		: LEN_TO_POS_STATES - 1)
+/// Validates literal_context_bits, literal_pos_bits, and pos_bits.
+static inline bool
+is_lclppb_valid(const lzma_options_lzma *options)
+{
+	return options->literal_context_bits <= LZMA_LITERAL_CONTEXT_BITS_MAX
+			&& options->literal_pos_bits
+				<= LZMA_LITERAL_POS_BITS_MAX
+			&& options->literal_context_bits
+					+ options->literal_pos_bits
+				<= LZMA_LITERAL_BITS_MAX
+			&& options->pos_bits <= LZMA_POS_BITS_MAX;
+}
 
 
 ///////////
@@ -161,4 +111,126 @@ typedef enum {
 #define is_literal_state(state) \
 	((state) < LIT_STATES)
 
+
+/////////////
+// Literal //
+/////////////
+
+/// Each literal coder is divided in three sections:
+///   - 0x001-0x0FF: Without match byte
+///   - 0x101-0x1FF: With match byte; match bit is 0
+///   - 0x201-0x2FF: With match byte; match bit is 1
+///
+/// Match byte is used when the previous LZMA symbol was something else than
+/// a literal (that is, it was some kind of match).
+#define LITERAL_CODER_SIZE 0x300
+
+/// Maximum number of literal coders
+#define LITERAL_CODERS_MAX (1 << LZMA_LITERAL_BITS_MAX)
+
+/// Locate the literal coder for the next literal byte. The choice depends on
+///   - the lowest literal_pos_bits bits of the position of the current
+///     byte; and
+///   - the highest literal_context_bits bits of the previous byte.
+#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \
+	((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))])
+
+
+static inline void
+literal_init(probability (*probs)[LITERAL_CODER_SIZE],
+		uint32_t literal_context_bits, uint32_t literal_pos_bits)
+{
+	assert(literal_context_bits + literal_pos_bits
+			<= LZMA_LITERAL_BITS_MAX);
+
+	const uint32_t coders
+			= 1U << (literal_context_bits + literal_pos_bits);
+
+	for (uint32_t i = 0; i < coders; ++i)
+		for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j)
+			bit_reset(probs[i][j]);
+
+	return;
+}
+
+
+//////////////////
+// Match length //
+//////////////////
+
+// Minimum length of a match is two bytes.
+#define MATCH_LEN_MIN 2
+
+// Match length is encoded with 4, 5, or 10 bits.
+//
+// Length   Bits
+//  2-9      4 = Choice=0 + 3 bits
+// 10-17     5 = Choice=1 + Choice2=0 + 3 bits
+// 18-273   10 = Choice=1 + Choice2=1 + 8 bits
+#define LEN_LOW_BITS 3
+#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
+#define LEN_MID_BITS 3
+#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
+#define LEN_HIGH_BITS 8
+#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
+#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
+
+// Maximum length of a match is 273 which is a result of the encoding
+// described above.
+#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
+
+
+////////////////////
+// Match distance //
+////////////////////
+
+// Different set of probabilities is used for match distances that have very
+// short match length: Lengths of 2, 3, and 4 bytes have a separate set of
+// probabilities for each length. The matches with longer length use a shared
+// set of probabilities.
+#define LEN_TO_POS_STATES 4
+
+// Macro to get the index of the appropriate probability array.
+#define get_len_to_pos_state(len) \
+	((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \
+		? (len) - MATCH_LEN_MIN \
+		: LEN_TO_POS_STATES - 1)
+
+// The highest two bits of a match distance (pos slot) are encoded using six
+// bits. See fastpos.h for more explanation.
+#define POS_SLOT_BITS 6
+#define POS_SLOTS (1 << POS_SLOT_BITS)
+
+// Match distances up to 127 are fully encoded using probabilities. Since
+// the highest two bits (pos slot) are always encoded using six bits, the
+// distances 0-3 don't need any additional bits to encode, since the pos
+// slot itself is the same as the actual distance. START_POS_MODEL_INDEX
+// indicates the first pos slot where at least one additional bit is needed.
+#define START_POS_MODEL_INDEX 4
+
+// Match distances greater than 127 are encoded in three pieces:
+//   - pos slot: the highest two bits
+//   - direct bits: 2-26 bits below the highest two bits
+//   - alignment bits: four lowest bits
+//
+// Direct bits don't use any probabilities.
+//
+// The pos slot value of 14 is for distances 128-191 (see the table in
+// fastpos.h to understand why).
+#define END_POS_MODEL_INDEX 14
+
+// Seven-bit distances use the full FIXME
+#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2)
+#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
+
+// For match distances greater than 127, only the highest two bits and the
+// lowest four bits (alignment) is encoded using probabilities.
+#define ALIGN_BITS 4
+#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS)
+#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1)
+
+// LZMA remembers the four most recent match distances. Reusing these distances
+// tends to take less space than re-encoding the actual distance value.
+#define REP_DISTANCES 4
+
 #endif
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 68941021..e9d047d3 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -4,7 +4,7 @@
 /// \brief      LZMA decoder
 //
 //  Copyright (C) 1999-2006 Igor Pavlov
-//  Copyright (C) 2007 Lasse Collin
+//  Copyright (C) 2007-2008 Lasse Collin
 //
 //  This library is free software; you can redistribute it and/or
 //  modify it under the terms of the GNU Lesser General Public
@@ -18,74 +18,147 @@
 //
 ///////////////////////////////////////////////////////////////////////////////
 
-// NOTE: If you want to keep the line length in 80 characters, set
-//       tab width to 4 or less in your editor when editing this file.
-
+#include "lz_decoder.h"
 #include "lzma_common.h"
 #include "lzma_decoder.h"
-#include "lz_decoder.h"
 #include "range_decoder.h"
 
 
-/// REQUIRED_IN_BUFFER_SIZE is the number of required input bytes
-/// for the worst case: longest match with longest distance.
-/// LZMA_IN_BUFFER_SIZE must be larger than REQUIRED_IN_BUFFER_SIZE.
-/// 23 bits = 2 (match select) + 10 (len) + 6 (distance) + 4 (align)
-///         + 1 (rc_normalize)
-///
-/// \todo       Is this correct for sure?
-///
-#define REQUIRED_IN_BUFFER_SIZE \
-	((23 * (BIT_MODEL_TOTAL_BITS - MOVE_BITS + 1) + 26 + 9) / 8)
+#ifdef HAVE_SMALL
 
+// Macros for (somewhat) size-optimized code.
+#define seq_4(seq) seq
 
-// Length decoders are easiest to have as macros, because they use range
-// decoder macros, which use local variables rc_range and rc_code.
+#define seq_6(seq) seq
 
-#define length_decode(target, len_decoder, pos_state) \
+#define seq_8(seq) seq
+
+#define seq_len(seq) \
+	seq ## _CHOICE, \
+	seq ## _CHOICE2, \
+	seq ## _BITTREE
+
+#define len_decode(target, ld, pos_state, seq) \
 do { \
-	if_bit_0(len_decoder.choice) { \
-		update_bit_0(len_decoder.choice); \
-		target = MATCH_MIN_LEN; \
-		bittree_decode(target, len_decoder.low[pos_state], LEN_LOW_BITS); \
+case seq ## _CHOICE: \
+	rc_if_0(ld.choice, seq ## _CHOICE) { \
+		rc_update_0(ld.choice); \
+		probs = ld.low[pos_state];\
+		limit = LEN_LOW_SYMBOLS; \
+		target = MATCH_LEN_MIN; \
 	} else { \
-		update_bit_1(len_decoder.choice); \
-		if_bit_0(len_decoder.choice2) { \
-			update_bit_0(len_decoder.choice2); \
-			target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \
-			bittree_decode(target, len_decoder.mid[pos_state], LEN_MID_BITS); \
+		rc_update_1(ld.choice); \
+case seq ## _CHOICE2: \
+		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
+			rc_update_0(ld.choice2); \
+			probs = ld.mid[pos_state]; \
+			limit = LEN_MID_SYMBOLS; \
+			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
 		} else { \
-			update_bit_1(len_decoder.choice2); \
-			target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
-			bittree_decode(target, len_decoder.high, LEN_HIGH_BITS); \
+			rc_update_1(ld.choice2); \
+			probs = ld.high; \
+			limit = LEN_HIGH_SYMBOLS; \
+			target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
+					+ LEN_MID_SYMBOLS; \
 		} \
 	} \
+	symbol = 1; \
+case seq ## _BITTREE: \
+	do { \
+		rc_bit(probs[symbol], , , seq ## _BITTREE); \
+	} while (symbol < limit); \
+	target += symbol - limit; \
 } while (0)
 
-
-#define length_decode_dummy(target, len_decoder, pos_state) \
+#else // HAVE_SMALL
+
+// Unrolled versions
+#define seq_4(seq) \
+	seq ## 0, \
+	seq ## 1, \
+	seq ## 2, \
+	seq ## 3
+
+#define seq_6(seq) \
+	seq ## 0, \
+	seq ## 1, \
+	seq ## 2, \
+	seq ## 3, \
+	seq ## 4, \
+	seq ## 5
+
+#define seq_8(seq) \
+	seq ## 0, \
+	seq ## 1, \
+	seq ## 2, \
+	seq ## 3, \
+	seq ## 4, \
+	seq ## 5, \
+	seq ## 6, \
+	seq ## 7
+
+#define seq_len(seq) \
+	seq ## _CHOICE, \
+	seq ## _LOW0, \
+	seq ## _LOW1, \
+	seq ## _LOW2, \
+	seq ## _CHOICE2, \
+	seq ## _MID0, \
+	seq ## _MID1, \
+	seq ## _MID2, \
+	seq ## _HIGH0, \
+	seq ## _HIGH1, \
+	seq ## _HIGH2, \
+	seq ## _HIGH3, \
+	seq ## _HIGH4, \
+	seq ## _HIGH5, \
+	seq ## _HIGH6, \
+	seq ## _HIGH7
+
+#define len_decode(target, ld, pos_state, seq) \
 do { \
-	if_bit_0(len_decoder.choice) { \
-		update_bit_0_dummy(); \
-		target = MATCH_MIN_LEN; \
-		bittree_decode_dummy(target, \
-				len_decoder.low[pos_state], LEN_LOW_BITS); \
+	symbol = 1; \
+case seq ## _CHOICE: \
+	rc_if_0(ld.choice, seq ## _CHOICE) { \
+		rc_update_0(ld.choice); \
+		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
+		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
+		rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
+		target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
 	} else { \
-		update_bit_1_dummy(); \
-		if_bit_0(len_decoder.choice2) { \
-			update_bit_0_dummy(); \
-			target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \
-			bittree_decode_dummy(target, len_decoder.mid[pos_state], \
-					LEN_MID_BITS); \
+		rc_update_1(ld.choice); \
+case seq ## _CHOICE2: \
+		rc_if_0(ld.choice2, seq ## _CHOICE2) { \
+			rc_update_0(ld.choice2); \
+			rc_bit_case(ld.mid[pos_state][symbol], , , \
+					seq ## _MID0); \
+			rc_bit_case(ld.mid[pos_state][symbol], , , \
+					seq ## _MID1); \
+			rc_bit_case(ld.mid[pos_state][symbol], , , \
+					seq ## _MID2); \
+			target = symbol - LEN_MID_SYMBOLS \
+					+ MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
 		} else { \
-			update_bit_1_dummy(); \
-			target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
-			bittree_decode_dummy(target, len_decoder.high, LEN_HIGH_BITS); \
+			rc_update_1(ld.choice2); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
+			rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
+			target = symbol - LEN_HIGH_SYMBOLS \
+					+ MATCH_LEN_MIN \
+					+ LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
 		} \
 	} \
 } while (0)
 
+#endif // HAVE_SMALL
 
+
+/// Length decoder probabilities; see comments in lzma_common.h.
 typedef struct {
 	probability choice;
 	probability choice2;
@@ -96,26 +169,12 @@ typedef struct {
 
 
 struct lzma_coder_s {
-	/// Data of the next coder, if any.
-	lzma_next_coder next;
-
-	/// Sliding output window a.k.a. dictionary a.k.a. history buffer.
-	lzma_lz_decoder lz;
-
-	// Range coder
-	lzma_range_decoder rc;
-
-	// State
-	lzma_lzma_state state;
-	uint32_t rep0;      ///< Distance of the latest match
-	uint32_t rep1;      ///< Distance of second latest match
-	uint32_t rep2;      ///< Distance of third latest match
-	uint32_t rep3;      ///< Distance of fourth latest match
-	uint32_t pos_bits;
-	uint32_t pos_mask;
-	uint32_t now_pos; // Lowest 32-bits are enough here.
+	///////////////////
+	// Probabilities //
+	///////////////////
 
-	lzma_literal_coder literal_coder;
+	/// Literals; see comments in lzma_common.h.
+	probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
 
 	/// If 1, it's a match. Otherwise it's a single 8-bit literal.
 	probability is_match[STATES][POS_STATES_MAX];
@@ -138,178 +197,107 @@ struct lzma_coder_s {
 	/// the length is decoded from rep_len_decoder.
 	probability is_rep0_long[STATES][POS_STATES_MAX];
 
-	probability pos_slot_decoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS];
-	probability pos_decoders[FULL_DISTANCES - END_POS_MODEL_INDEX];
-	probability pos_align_decoder[1 << ALIGN_BITS];
-
-	/// Length of a match
-	lzma_length_decoder match_len_decoder;
-
-	/// Length of a repeated match.
-	lzma_length_decoder rep_len_decoder;
-
-	/// True when we have produced at least one byte of output since the
-	/// beginning of the stream or the latest flush marker.
-	bool has_produced_output;
-};
-
-
-/// \brief      Check if the next iteration of the decoder loop can be run.
-///
-/// \note       There must always be REQUIRED_IN_BUFFER_SIZE bytes
-///             readable space!
-///
-static bool lzma_attribute((pure))
-decode_dummy(const lzma_coder *restrict coder,
-		const uint8_t *restrict in, size_t in_pos_local,
-		const size_t in_size, lzma_range_decoder rc,
-		uint32_t state, uint32_t rep0, const uint32_t now_pos)
-{
-	uint32_t rc_bound;
-
-	do {
-		const uint32_t pos_state = now_pos & coder->pos_mask;
-
-		if_bit_0(coder->is_match[state][pos_state]) {
-			// It's a literal i.e. a single 8-bit byte.
-
-			update_bit_0_dummy();
-
-			const probability *subcoder = literal_get_subcoder(
-					coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0));
-			uint32_t symbol = 1;
-
-			if (is_literal_state(state)) {
-				// Decode literal without match byte.
-				do {
-					if_bit_0(subcoder[symbol]) {
-						update_bit_0_dummy();
-						symbol <<= 1;
-					} else {
-						update_bit_1_dummy();
-						symbol = (symbol << 1) | 1;
-					}
-				} while (symbol < 0x100);
-
-			} else {
-				// Decode literal with match byte.
-				uint32_t match_byte = lz_get_byte(coder->lz, rep0);
-				uint32_t subcoder_offset = 0x100;
-
-				do {
-					match_byte <<= 1;
-					const uint32_t match_bit = match_byte & subcoder_offset;
-					const uint32_t subcoder_index
-							= subcoder_offset + match_bit + symbol;
-
-					if_bit_0(subcoder[subcoder_index]) {
-						update_bit_0_dummy();
-						symbol <<= 1;
-						subcoder_offset &= ~match_bit;
-					} else {
-						update_bit_1_dummy();
-						symbol = (symbol << 1) | 1;
-						subcoder_offset &= match_bit;
-					}
-				} while (symbol < 0x100);
-			}
-
-			break;
-		}
-
-		update_bit_1_dummy();
-		uint32_t len;
-
-		if_bit_0(coder->is_rep[state]) {
-			update_bit_0_dummy();
-			length_decode_dummy(len, coder->match_len_decoder, pos_state);
-
-			const uint32_t len_to_pos_state = get_len_to_pos_state(len);
-			uint32_t pos_slot = 0;
-			bittree_decode_dummy(pos_slot,
-					coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS);
-			assert(pos_slot <= 63);
-
-			if (pos_slot >= START_POS_MODEL_INDEX) {
-				uint32_t direct_bits = (pos_slot >> 1) - 1;
-				assert(direct_bits >= 1 && direct_bits <= 31);
-				rep0 = 2 | (pos_slot & 1);
-
-				if (pos_slot < END_POS_MODEL_INDEX) {
-					assert(direct_bits <= 5);
-					rep0 <<= direct_bits;
-					assert(rep0 <= 96);
-					// -1 is fine, because bittree_reverse_decode()
-					// starts from table index [1] (not [0]).
-					assert((int32_t)(rep0 - pos_slot - 1) >= -1);
-					assert((int32_t)(rep0 - pos_slot - 1) <= 82);
-					// We add the result to rep0, so rep0
-					// must not be part of second argument
-					// of the macro.
-					const int32_t offset = rep0 - pos_slot - 1;
-					bittree_reverse_decode_dummy(coder->pos_decoders + offset,
-							direct_bits);
-				} else {
-					assert(pos_slot >= 14);
-					assert(direct_bits >= 6);
-					direct_bits -= ALIGN_BITS;
-					assert(direct_bits >= 2);
-					rc_decode_direct_dummy(direct_bits);
-
-					bittree_reverse_decode_dummy(coder->pos_align_decoder,
-							ALIGN_BITS);
-				}
-			}
+	/// Probability tree for the highest two bits of the match distance.
+	/// There is a separate probability tree for match lengths of
+	/// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
+	probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
 
-		} else {
-			update_bit_1_dummy();
+	/// Probility trees for additional bits for match distance when the
+	/// distance is in the range [4, 127].
+	probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
 
-			if_bit_0(coder->is_rep0[state]) {
-				update_bit_0_dummy();
+	/// Probability tree for the lowest four bits of a match distance
+	/// that is equal to or greater than 128.
+	probability pos_align[ALIGN_TABLE_SIZE];
 
-				if_bit_0(coder->is_rep0_long[state][pos_state]) {
-					update_bit_0_dummy();
-					break;
-				} else {
-					update_bit_1_dummy();
-				}
+	/// Length of a normal match
+	lzma_length_decoder match_len_decoder;
 
-			} else {
-				update_bit_1_dummy();
+	/// Length of a repeated match
+	lzma_length_decoder rep_len_decoder;
 
-				if_bit_0(coder->is_rep1[state]) {
-					update_bit_0_dummy();
-				} else {
-					update_bit_1_dummy();
+	///////////////////
+	// Decoder state //
+	///////////////////
 
-					if_bit_0(coder->is_rep2[state]) {
-						update_bit_0_dummy();
-					} else {
-						update_bit_1_dummy();
-					}
-				}
-			}
+	// Range coder
+	lzma_range_decoder rc;
 
-			length_decode_dummy(len, coder->rep_len_decoder, pos_state);
-		}
-	} while (0);
+	// Types of the most recently seen LZMA symbols
+	lzma_lzma_state state;
 
-	rc_normalize();
+	uint32_t rep0;      ///< Distance of the latest match
+	uint32_t rep1;      ///< Distance of second latest match
+	uint32_t rep2;      ///< Distance of third latest match
+	uint32_t rep3;      ///< Distance of fourth latest match
 
-	return in_pos_local <= in_size;
-}
+	uint32_t pos_mask; // (1U << pos_bits) - 1
+	uint32_t literal_context_bits;
+	uint32_t literal_pos_mask;
+
+	/// Uncompressed size as bytes, or LZMA_VLI_VALUE_UNKNOWN if end of
+	/// payload marker is expected.
+	lzma_vli uncompressed_size;
+
+	////////////////////////////////
+	// State of incomplete symbol //
+	////////////////////////////////
+
+	/// Position where to continue the decoder loop
+	enum {
+		SEQ_NORMALIZE,
+		SEQ_IS_MATCH,
+		seq_8(SEQ_LITERAL),
+		seq_8(SEQ_LITERAL_MATCHED),
+		SEQ_LITERAL_WRITE,
+		SEQ_IS_REP,
+		seq_len(SEQ_MATCH_LEN),
+		seq_6(SEQ_POS_SLOT),
+		SEQ_POS_MODEL,
+		SEQ_DIRECT,
+		seq_4(SEQ_ALIGN),
+		SEQ_EOPM,
+		SEQ_IS_REP0,
+		SEQ_SHORTREP,
+		SEQ_IS_REP0_LONG,
+		SEQ_IS_REP1,
+		SEQ_IS_REP2,
+		seq_len(SEQ_REP_LEN),
+		SEQ_COPY,
+	} sequence;
+
+	/// Base of the current probability tree
+	probability *probs;
+
+	/// Symbol being decoded. This is also used as an index variable in
+	/// bittree decoders: probs[symbol]
+	uint32_t symbol;
+
+	/// Used as a loop termination condition on bittree decoders and
+	/// direct bits decoder.
+	uint32_t limit;
+
+	/// Matched literal decoder: 0x100 or 0 to help avoiding branches.
+	/// Bittree reverse decoders: Offset of the next bit: 1 << offset
+	uint32_t offset;
+
+	/// If decoding a literal: match byte.
+	/// If decoding a match: length of the match.
+	uint32_t len;
+};
 
 
-static bool
-decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
-		size_t *restrict in_pos, size_t in_size, bool has_safe_buffer)
+static lzma_ret
+lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr,
+		const uint8_t *restrict in,
+		size_t *restrict in_pos, size_t in_size)
 {
 	////////////////////
 	// Initialization //
 	////////////////////
 
 	if (!rc_read_init(&coder->rc, in, in_pos, in_size))
-		return false;
+		return LZMA_OK;
 
 	///////////////
 	// Variables //
@@ -318,8 +306,12 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
 	// Making local copies of often-used variables improves both
 	// speed and readability.
 
+	lzma_dict dict = *dictptr;
+
+	const size_t dict_start = dict.pos;
+
 	// Range decoder
-	rc_to_local(coder->rc);
+	rc_to_local(coder->rc, *in_pos);
 
 	// State
 	uint32_t state = coder->state;
@@ -328,87 +320,168 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
 	uint32_t rep2 = coder->rep2;
 	uint32_t rep3 = coder->rep3;
 
-	// Misc
-	uint32_t now_pos = coder->now_pos;
-	bool has_produced_output = coder->has_produced_output;
-
-	// Variables derived from decoder settings
 	const uint32_t pos_mask = coder->pos_mask;
 
-	size_t in_pos_local = *in_pos; // Local copy
-	size_t in_limit;
-	if (in_size <= REQUIRED_IN_BUFFER_SIZE)
-		in_limit = 0;
-	else
-		in_limit = in_size - REQUIRED_IN_BUFFER_SIZE;
-
-
-	while (coder->lz.pos < coder->lz.limit
-			&& (in_pos_local < in_limit || (has_safe_buffer
-				&& decode_dummy(coder, in, in_pos_local, in_size,
-					rc, state, rep0, now_pos)))) {
-
-		/////////////////////
-		// Actual decoding //
-		/////////////////////
-
-		const uint32_t pos_state = now_pos & pos_mask;
+	// These variables are actually needed only if we last time ran
+	// out of input in the middle of the decoder loop.
+	probability *probs = coder->probs;
+	uint32_t symbol = coder->symbol;
+	uint32_t limit = coder->limit;
+	uint32_t offset = coder->offset;
+	uint32_t len = coder->len;
+
+	const uint32_t literal_pos_mask = coder->literal_pos_mask;
+	const uint32_t literal_context_bits = coder->literal_context_bits;
+
+	// Temporary variables
+	uint32_t pos_state = dict.pos & pos_mask;
+
+	lzma_ret ret = LZMA_OK;
+
+	// If uncompressed size is known, there must be no end of payload
+	// marker.
+	const bool no_eopm = coder->uncompressed_size
+			!= LZMA_VLI_VALUE_UNKNOWN;
+	if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos)
+		dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
+
+	// The main decoder loop. The "switch" is used to restart the decoder at
+	// correct location. Once restarted, the "switch" is no longer used.
+	switch (coder->sequence)
+	while (true) {
+		// Calculate new pos_state. This is skipped on the first loop
+		// since we already calculated it when setting up the local
+		// variables.
+		pos_state = dict.pos & pos_mask;
+
+	case SEQ_NORMALIZE:
+	case SEQ_IS_MATCH:
+		if (unlikely(no_eopm && dict.pos == dict.limit))
+			break;
 
-		if_bit_0(coder->is_match[state][pos_state]) {
-			update_bit_0(coder->is_match[state][pos_state]);
+		rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
+			rc_update_0(coder->is_match[state][pos_state]);
 
 			// It's a literal i.e. a single 8-bit byte.
 
-			probability *subcoder = literal_get_subcoder(coder->literal_coder,
-					now_pos, lz_get_byte(coder->lz, 0));
-			uint32_t symbol = 1;
+			probs = literal_subcoder(coder->literal,
+					literal_context_bits, literal_pos_mask,
+					dict.pos, dict_get(&dict, 0));
+			symbol = 1;
 
 			if (is_literal_state(state)) {
 				// Decode literal without match byte.
+#ifdef HAVE_SMALL
+	case SEQ_LITERAL:
 				do {
-					if_bit_0(subcoder[symbol]) {
-						update_bit_0(subcoder[symbol]);
-						symbol <<= 1;
-					} else {
-						update_bit_1(subcoder[symbol]);
-						symbol = (symbol << 1) | 1;
-					}
-				} while (symbol < 0x100);
-
+					rc_bit(probs[symbol], , , SEQ_LITERAL);
+				} while (symbol < (1 << 8));
+#else
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
+				rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
+#endif
 			} else {
 				// Decode literal with match byte.
 				//
-				// The usage of subcoder_offset allows omitting some
-				// branches, which should give tiny speed improvement on
-				// some CPUs. subcoder_offset gets set to zero if match_bit
-				// didn't match.
-				uint32_t match_byte = lz_get_byte(coder->lz, rep0);
-				uint32_t subcoder_offset = 0x100;
-
+				// We store the byte we compare against
+				// ("match byte") to "len" to minimize the
+				// number of variables we need to store
+				// between decoder calls.
+				len = dict_get(&dict, rep0) << 1;
+
+				// The usage of "offset" allows omitting some
+				// branches, which should give tiny speed
+				// improvement on some CPUs. "offset" gets
+				// set to zero if match_bit didn't match.
+				offset = 0x100;
+
+#ifdef HAVE_SMALL
+	case SEQ_LITERAL_MATCHED:
 				do {
-					match_byte <<= 1;
-					const uint32_t match_bit = match_byte & subcoder_offset;
+					const uint32_t match_bit
+							= len & offset;
 					const uint32_t subcoder_index
-							= subcoder_offset + match_bit + symbol;
+							= offset + match_bit
+							+ symbol;
+
+					rc_bit(probs[subcoder_index],
+							offset &= ~match_bit,
+							offset &= match_bit,
+							SEQ_LITERAL_MATCHED);
+
+					// It seems to be faster to do this
+					// here instead of putting it to the
+					// beginning of the loop and then
+					// putting the "case" in the middle
+					// of the loop.
+					len <<= 1;
+
+				} while (symbol < (1 << 8));
+#else
+				// Unroll the loop.
+				uint32_t match_bit;
+				uint32_t subcoder_index;
+
+#	define d(seq) \
+		case seq: \
+			match_bit = len & offset; \
+			subcoder_index = offset + match_bit + symbol; \
+			rc_bit(probs[subcoder_index], \
+					offset &= ~match_bit, \
+					offset &= match_bit, \
+					seq)
+
+				d(SEQ_LITERAL_MATCHED0);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED1);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED2);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED3);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED4);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED5);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED6);
+				len <<= 1;
+				d(SEQ_LITERAL_MATCHED7);
+#	undef d
+#endif
+			}
 
-					if_bit_0(subcoder[subcoder_index]) {
-						update_bit_0(subcoder[subcoder_index]);
-						symbol <<= 1;
-						subcoder_offset &= ~match_bit;
-					} else {
-						update_bit_1(subcoder[subcoder_index]);
-						symbol = (symbol << 1) | 1;
-						subcoder_offset &= match_bit;
-					}
-				} while (symbol < 0x100);
+			//update_literal(state);
+			// Use a lookup table to update to literal state,
+			// since compared to other state updates, this would
+			// need two branches.
+			static const lzma_lzma_state next_state[] = {
+				STATE_LIT_LIT,
+				STATE_LIT_LIT,
+				STATE_LIT_LIT,
+				STATE_LIT_LIT,
+				STATE_MATCH_LIT_LIT,
+				STATE_REP_LIT_LIT,
+				STATE_SHORTREP_LIT_LIT,
+				STATE_MATCH_LIT,
+				STATE_REP_LIT,
+				STATE_SHORTREP_LIT,
+				STATE_MATCH_LIT,
+				STATE_REP_LIT
+			};
+			state = next_state[state];
+
+	case SEQ_LITERAL_WRITE:
+			if (unlikely(dict_put(&dict, symbol))) {
+				coder->sequence = SEQ_LITERAL_WRITE;
+				goto out;
 			}
 
-			// Put the decoded byte to the dictionary, update the
-			// decoder state, and start a new decoding loop.
-			coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);
-			++now_pos;
-			update_literal(state);
-			has_produced_output = true;
 			continue;
 		}
 
@@ -416,115 +489,196 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
 		// (distance and length) which will be repeated from our
 		// output history.
 
-		update_bit_1(coder->is_match[state][pos_state]);
-		uint32_t len;
-
-		if_bit_0(coder->is_rep[state]) {
-			update_bit_0(coder->is_rep[state]);
+		rc_update_1(coder->is_match[state][pos_state]);
 
+	case SEQ_IS_REP:
+		rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
 			// Not a repeated match
-			//
-			// We will decode a new distance and store
-			// the value to distance.
-
-			// Decode the length of the match.
-			length_decode(len, coder->match_len_decoder, pos_state);
-
+			rc_update_0(coder->is_rep[state]);
 			update_match(state);
 
-			const uint32_t len_to_pos_state = get_len_to_pos_state(len);
-			uint32_t pos_slot = 0;
-			bittree_decode(pos_slot,
-					coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS);
-			assert(pos_slot <= 63);
-
-			if (pos_slot >= START_POS_MODEL_INDEX) {
-				uint32_t direct_bits = (pos_slot >> 1) - 1;
-				assert(direct_bits >= 1 && direct_bits <= 30);
-				uint32_t distance = 2 | (pos_slot & 1);
-
-				if (pos_slot < END_POS_MODEL_INDEX) {
-					assert(direct_bits <= 5);
-					distance <<= direct_bits;
-					assert(distance <= 96);
-					// -1 is fine, because
-					// bittree_reverse_decode()
-					// starts from table index [1]
-					// (not [0]).
-					assert((int32_t)(distance - pos_slot - 1) >= -1);
-					assert((int32_t)(distance - pos_slot - 1) <= 82);
-					// We add the result to distance, so distance
-					// must not be part of second argument
-					// of the macro.
-					const int32_t offset = distance - pos_slot - 1;
-					bittree_reverse_decode(distance, coder->pos_decoders + offset,
-							direct_bits);
+			// The latest three match distances are kept in
+			// memory in case there are repeated matches.
+			rep3 = rep2;
+			rep2 = rep1;
+			rep1 = rep0;
+
+			// Decode the length of the match.
+			len_decode(len, coder->match_len_decoder,
+					pos_state, SEQ_MATCH_LEN);
+
+			// Prepare to decode the highest two bits of the
+			// match distance.
+			probs = coder->pos_slot[get_len_to_pos_state(len)];
+			symbol = 1;
+
+#ifdef HAVE_SMALL
+	case SEQ_POS_SLOT:
+			do {
+				rc_bit(probs[symbol], , , SEQ_POS_SLOT);
+			} while (symbol < POS_SLOTS);
+#else
+			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0);
+			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1);
+			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2);
+			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3);
+			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4);
+			rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5);
+#endif
+			// Get rid of the highest bit that was needed for
+			// indexing of the probability array.
+			symbol -= POS_SLOTS;
+			assert(symbol <= 63);
+
+			if (symbol < START_POS_MODEL_INDEX) {
+				// Match distances [0, 3] have only two bits.
+				rep0 = symbol;
+			} else {
+				// Decode the lowest [1, 29] bits of
+				// the match distance.
+				limit = (symbol >> 1) - 1;
+				assert(limit >= 1 && limit <= 30);
+				rep0 = 2 + (symbol & 1);
+
+				if (symbol < END_POS_MODEL_INDEX) {
+					// Prepare to decode the low bits for
+					// a distance of [4, 127].
+					assert(limit <= 5);
+					rep0 <<= limit;
+					assert(rep0 <= 96);
+					// -1 is fine, because we start
+					// decoding at probs[1], not probs[0].
+					// NOTE: This violates the C standard,
+					// since we are doing pointer
+					// arithmetic past the beginning of
+					// the array.
+					assert((int32_t)(rep0 - symbol - 1)
+							>= -1);
+					assert((int32_t)(rep0 - symbol - 1)
+							<= 82);
+					probs = coder->pos_special + rep0
+							- symbol - 1;
+					symbol = 1;
+					offset = 0;
+	case SEQ_POS_MODEL:
+#ifdef HAVE_SMALL
+					do {
+						rc_bit(probs[symbol], ,
+							rep0 += 1 << offset,
+							SEQ_POS_MODEL);
+					} while (++offset < limit);
+#else
+					switch (limit) {
+					case 5:
+						assert(offset == 0);
+						rc_bit(probs[symbol], ,
+							rep0 += 1,
+							SEQ_POS_MODEL);
+						++offset;
+						--limit;
+					case 4:
+						rc_bit(probs[symbol], ,
+							rep0 += 1 << offset,
+							SEQ_POS_MODEL);
+						++offset;
+						--limit;
+					case 3:
+						rc_bit(probs[symbol], ,
+							rep0 += 1 << offset,
+							SEQ_POS_MODEL);
+						++offset;
+						--limit;
+					case 2:
+						rc_bit(probs[symbol], ,
+							rep0 += 1 << offset,
+							SEQ_POS_MODEL);
+						++offset;
+						--limit;
+					case 1:
+						// We need "symbol" only for
+						// indexing the probability
+						// array, thus we can use
+						// rc_bit_last() here to omit
+						// the unneeded updating of
+						// "symbol".
+						rc_bit_last(probs[symbol], ,
+							rep0 += 1 << offset,
+							SEQ_POS_MODEL);
+					}
+#endif
 				} else {
-					assert(pos_slot >= 14);
-					assert(direct_bits >= 6);
-					direct_bits -= ALIGN_BITS;
-					assert(direct_bits >= 2);
-					rc_decode_direct(distance, direct_bits);
-					distance <<= ALIGN_BITS;
-
-					bittree_reverse_decode(distance, coder->pos_align_decoder,
-							ALIGN_BITS);
-
-					if (distance == UINT32_MAX) {
-						if (len == LEN_SPECIAL_EOPM) {
-							// End of Payload Marker found.
-							coder->lz.eopm_detected = true;
-							break;
-
-						} else if (len == LEN_SPECIAL_FLUSH) {
-							// Flush marker detected. We must have produced
-							// at least one byte of output since the previous
-							// flush marker or the beginning of the stream.
-							// This is to prevent hanging the decoder with
-							// malicious input files.
-							if (!has_produced_output)
-								return true;
-
-							has_produced_output = false;
-
-							// We know that we have enough input to call
-							// this macro, because it is tested at the
-							// end of decode_dummy().
-							rc_normalize();
-
-							rc_reset(rc);
-
-							// If we don't have enough input here, we jump
-							// out of the loop. Note that while there is a
-							// useless call to rc_normalize(), it does nothing
-							// since we have just reset the range decoder.
-							if (!rc_read_init(&rc, in, &in_pos_local, in_size))
-								break;
-
-							continue;
-
-						} else {
-							return true;
+					// The distace is >= 128. Decode the
+					// lower bits without probabilities
+					// except the lowest four bits.
+					assert(symbol >= 14);
+					assert(limit >= 6);
+					limit -= ALIGN_BITS;
+					assert(limit >= 2);
+	case SEQ_DIRECT:
+					// Not worth manual unrolling
+					do {
+						rc_direct(rep0, SEQ_DIRECT);
+					} while (--limit > 0);
+
+					// Decode the lowest four bits using
+					// probabilities.
+					rep0 <<= ALIGN_BITS;
+					symbol = 1;
+#ifdef HAVE_SMALL
+					offset = 0;
+	case SEQ_ALIGN:
+					do {
+						rc_bit(coder->pos_align[
+								symbol], ,
+							rep0 += 1 << offset,
+							SEQ_ALIGN);
+					} while (++offset < ALIGN_BITS);
+#else
+	case SEQ_ALIGN0:
+					rc_bit(coder->pos_align[symbol], ,
+							rep0 += 1, SEQ_ALIGN0);
+	case SEQ_ALIGN1:
+					rc_bit(coder->pos_align[symbol], ,
+							rep0 += 2, SEQ_ALIGN1);
+	case SEQ_ALIGN2:
+					rc_bit(coder->pos_align[symbol], ,
+							rep0 += 4, SEQ_ALIGN2);
+	case SEQ_ALIGN3:
+					// Like in SEQ_POS_MODEL, we don't
+					// need "symbol" for anything else
+					// than indexing the probability array.
+					rc_bit_last(coder->pos_align[symbol], ,
+							rep0 += 8, SEQ_ALIGN3);
+#endif
+
+					if (rep0 == UINT32_MAX) {
+						// End of payload marker was
+						// found. It must not be
+						// present if uncompressed
+						// size is known.
+						if (coder->uncompressed_size
+						!= LZMA_VLI_VALUE_UNKNOWN) {
+							ret = LZMA_DATA_ERROR;
+							goto out;
 						}
+
+	case SEQ_EOPM:
+						// TODO Comment
+						rc_normalize(SEQ_EOPM);
+						ret = LZMA_STREAM_END;
+						goto out;
 					}
 				}
+			}
 
-				// The latest three match distances are kept in
-				// memory in case there are repeated matches.
-				rep3 = rep2;
-				rep2 = rep1;
-				rep1 = rep0;
-				rep0 = distance;
-
-			} else {
-				rep3 = rep2;
-				rep2 = rep1;
-				rep1 = rep0;
-				rep0 = pos_slot;
+			// Validate the distance we just decoded.
+			if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
+				ret = LZMA_DATA_ERROR;
+				goto out;
 			}
 
 		} else {
-			update_bit_1(coder->is_rep[state]);
+			rc_update_1(coder->is_rep[state]);
 
 			// Repeated match
 			//
@@ -532,242 +686,318 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
 			// earlier. The latest four match distances are
 			// available as rep0, rep1, rep2 and rep3. We will
 			// now decode which of them is the new distance.
+			//
+			// There cannot be a match if we haven't produced
+			// any output, so check that first.
+			if (unlikely(!dict_is_distance_valid(&dict, 0))) {
+				ret = LZMA_DATA_ERROR;
+				goto out;
+			}
 
-			if_bit_0(coder->is_rep0[state]) {
-				update_bit_0(coder->is_rep0[state]);
-
+	case SEQ_IS_REP0:
+			rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
+				rc_update_0(coder->is_rep0[state]);
 				// The distance is rep0.
 
-				if_bit_0(coder->is_rep0_long[state][pos_state]) {
-					update_bit_0(coder->is_rep0_long[state][pos_state]);
+	case SEQ_IS_REP0_LONG:
+				rc_if_0(coder->is_rep0_long[state][pos_state],
+						SEQ_IS_REP0_LONG) {
+					rc_update_0(coder->is_rep0_long[
+							state][pos_state]);
 
 					update_short_rep(state);
 
-					// Repeat exactly one byte and start a new decoding loop.
-					// Note that rep0 is known to have a safe value, thus we
-					// don't need to check if we are wrapping the dictionary
-					// when it isn't full yet.
-					if (unlikely(lz_is_empty(coder->lz)))
-						return true;
-
-					coder->lz.dict[coder->lz.pos]
-							= lz_get_byte(coder->lz, rep0);
-					++coder->lz.pos;
-					++now_pos;
-					has_produced_output = true;
-					continue;
-
-				} else {
-					update_bit_1(coder->is_rep0_long[state][pos_state]);
+	case SEQ_SHORTREP:
+					if (unlikely(dict_put(&dict, dict_get(
+							&dict, rep0)))) {
+						coder->sequence = SEQ_SHORTREP;
+						goto out;
+					}
 
-					// Repeating more than one byte at
-					// distance of rep0.
+					continue;
 				}
 
+				// Repeating more than one byte at
+				// distance of rep0.
+				rc_update_1(coder->is_rep0_long[
+						state][pos_state]);
+
 			} else {
-				update_bit_1(coder->is_rep0[state]);
+				rc_update_1(coder->is_rep0[state]);
 
+	case SEQ_IS_REP1:
 				// The distance is rep1, rep2 or rep3. Once
 				// we find out which one of these three, it
 				// is stored to rep0 and rep1, rep2 and rep3
 				// are updated accordingly.
+				rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
+					rc_update_0(coder->is_rep1[state]);
 
-				uint32_t distance;
+					const uint32_t distance = rep1;
+					rep1 = rep0;
+					rep0 = distance;
 
-				if_bit_0(coder->is_rep1[state]) {
-					update_bit_0(coder->is_rep1[state]);
-					distance = rep1;
 				} else {
-					update_bit_1(coder->is_rep1[state]);
+					rc_update_1(coder->is_rep1[state]);
+	case SEQ_IS_REP2:
+					rc_if_0(coder->is_rep2[state],
+							SEQ_IS_REP2) {
+						rc_update_0(coder->is_rep2[
+								state]);
+
+						const uint32_t distance = rep2;
+						rep2 = rep1;
+						rep1 = rep0;
+						rep0 = distance;
 
-					if_bit_0(coder->is_rep2[state]) {
-						update_bit_0(coder->is_rep2[state]);
-						distance = rep2;
 					} else {
-						update_bit_1(coder->is_rep2[state]);
-						distance = rep3;
+						rc_update_1(coder->is_rep2[
+								state]);
+
+						const uint32_t distance = rep3;
 						rep3 = rep2;
+						rep2 = rep1;
+						rep1 = rep0;
+						rep0 = distance;
 					}
-
-					rep2 = rep1;
 				}
-
-				rep1 = rep0;
-				rep0 = distance;
 			}
 
 			update_long_rep(state);
 
 			// Decode the length of the repeated match.
-			length_decode(len, coder->rep_len_decoder, pos_state);
+			len_decode(len, coder->rep_len_decoder,
+					pos_state, SEQ_REP_LEN);
 		}
 
-
 		/////////////////////////////////
 		// Repeat from history buffer. //
 		/////////////////////////////////
 
 		// The length is always between these limits. There is no way
 		// to trigger the algorithm to set len outside this range.
-		assert(len >= MATCH_MIN_LEN);
-		assert(len <= MATCH_MAX_LEN);
-
-		now_pos += len;
-		has_produced_output = true;
+		assert(len >= MATCH_LEN_MIN);
+		assert(len <= MATCH_LEN_MAX);
 
+	case SEQ_COPY:
 		// Repeat len bytes from distance of rep0.
-		if (!lzma_lz_out_repeat(&coder->lz, rep0, len))
-			return true;
+		if (unlikely(dict_repeat(&dict, rep0, &len))) {
+			coder->sequence = SEQ_COPY;
+			goto out;
+		}
 	}
 
-	rc_normalize();
+	rc_normalize(SEQ_NORMALIZE);
+	coder->sequence = SEQ_IS_MATCH;
 
+out:
+	// Save state
 
-	/////////////////////////////////
-	// Update the *data structure. //
-	/////////////////////////////////
+	// NOTE: Must not copy dict.limit.
+	dictptr->pos = dict.pos;
+	dictptr->full = dict.full;
 
-	// Range decoder
-	rc_from_local(coder->rc);
+	rc_from_local(coder->rc, *in_pos);
 
-	// State
 	coder->state = state;
 	coder->rep0 = rep0;
 	coder->rep1 = rep1;
 	coder->rep2 = rep2;
 	coder->rep3 = rep3;
 
-	// Misc
-	coder->now_pos = now_pos;
-	coder->has_produced_output = has_produced_output;
-	*in_pos = in_pos_local;
+	coder->probs = probs;
+	coder->symbol = symbol;
+	coder->limit = limit;
+	coder->offset = offset;
+	coder->len = len;
+
+	// Update the remaining amount of uncompressed data if uncompressed
+	// size was known.
+	if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
+		coder->uncompressed_size -= dict.pos - dict_start;
+
+		// Since there cannot be end of payload marker if the
+		// uncompressed size was known, we check here if we
+		// finished decoding.
+		if (coder->uncompressed_size == 0 && ret == LZMA_OK
+				&& coder->sequence != SEQ_NORMALIZE)
+			ret = coder->sequence == SEQ_IS_MATCH
+					? LZMA_STREAM_END : LZMA_DATA_ERROR;
+	}
+
+	// We can do an additional check in the range decoder to catch some
+	// corrupted files.
+	if (ret == LZMA_STREAM_END) {
+		if (!rc_is_finished(coder->rc))
+			ret = LZMA_DATA_ERROR;
 
-	return false;
+		// Reset the range decoder so that it is ready to reinitialize
+		// for a new LZMA2 chunk.
+		rc_reset(coder->rc);
+	}
+
+	return ret;
 }
 
 
+
 static void
-lzma_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
 {
-	lzma_next_coder_end(&coder->next, allocator);
-	lzma_lz_decoder_end(&coder->lz, allocator);
-	lzma_free(coder, allocator);
-	return;
+	coder->uncompressed_size = uncompressed_size;
 }
 
-
-extern lzma_ret
-lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
-		const lzma_filter_info *filters)
+/*
+extern void
+lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size)
 {
-	// LZMA can only be the last filter in the chain.
-	assert(filters[1].init == NULL);
-
-	// Validate pos_bits. Other options are validated by the
-	// respective initialization functions.
-	const lzma_options_lzma *options = filters[0].options;
-	if (options->pos_bits > LZMA_POS_BITS_MAX)
-		return LZMA_HEADER_ERROR;
+	// This is hack.
+	(*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size;
+}
+*/
 
-	// Allocate memory for the decoder if needed.
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
-			return LZMA_MEM_ERROR;
+static void
+lzma_decoder_reset(lzma_coder *coder, const void *opt)
+{
+	const lzma_options_lzma *options = opt;
 
-		next->code = &lzma_lz_decode;
-		next->end = &lzma_decoder_end;
-		next->coder->next = LZMA_NEXT_CODER_INIT;
-		next->coder->lz = LZMA_LZ_DECODER_INIT;
-	}
+	// NOTE: We assume that lc/lp/pb are valid since they were
+	// successfully decoded with lzma_lzma_decode_properties().
+	// FIXME?
 
-	// Store the pos_bits and calculate pos_mask.
-	next->coder->pos_bits = options->pos_bits;
-	next->coder->pos_mask = (1U << next->coder->pos_bits) - 1;
+	// Calculate pos_mask. We don't need pos_bits as is for anything.
+	coder->pos_mask = (1U << options->pos_bits) - 1;
 
 	// Initialize the literal decoder.
-	return_if_error(lzma_literal_init(&next->coder->literal_coder,
-				options->literal_context_bits,
-				options->literal_pos_bits));
+	literal_init(coder->literal, options->literal_context_bits,
+				options->literal_pos_bits);
 
-	// Allocate and initialize the LZ decoder.
-	return_if_error(lzma_lz_decoder_reset(&next->coder->lz, allocator,
-			&decode_real, options->dictionary_size,
-			MATCH_MAX_LEN));
+	coder->literal_context_bits = options->literal_context_bits;
+	coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1;
 
 	// State
-	next->coder->state = 0;
-	next->coder->rep0 = 0;
-	next->coder->rep1 = 0;
-	next->coder->rep2 = 0;
-	next->coder->rep3 = 0;
-	next->coder->pos_bits = options->pos_bits;
-	next->coder->pos_mask = (1 << next->coder->pos_bits) - 1;
-	next->coder->now_pos = 0;
+	coder->state = STATE_LIT_LIT;
+	coder->rep0 = 0;
+	coder->rep1 = 0;
+	coder->rep2 = 0;
+	coder->rep3 = 0;
+	coder->pos_mask = (1 << options->pos_bits) - 1;
 
 	// Range decoder
-	rc_reset(next->coder->rc);
+	rc_reset(coder->rc);
 
 	// Bit and bittree decoders
 	for (uint32_t i = 0; i < STATES; ++i) {
-		for (uint32_t j = 0; j <= next->coder->pos_mask; ++j) {
-			bit_reset(next->coder->is_match[i][j]);
-			bit_reset(next->coder->is_rep0_long[i][j]);
+		for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
+			bit_reset(coder->is_match[i][j]);
+			bit_reset(coder->is_rep0_long[i][j]);
 		}
 
-		bit_reset(next->coder->is_rep[i]);
-		bit_reset(next->coder->is_rep0[i]);
-		bit_reset(next->coder->is_rep1[i]);
-		bit_reset(next->coder->is_rep2[i]);
+		bit_reset(coder->is_rep[i]);
+		bit_reset(coder->is_rep0[i]);
+		bit_reset(coder->is_rep1[i]);
+		bit_reset(coder->is_rep2[i]);
 	}
 
 	for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i)
-		bittree_reset(next->coder->pos_slot_decoder[i], POS_SLOT_BITS);
+		bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
 
 	for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
-		bit_reset(next->coder->pos_decoders[i]);
+		bit_reset(coder->pos_special[i]);
 
-	bittree_reset(next->coder->pos_align_decoder, ALIGN_BITS);
+	bittree_reset(coder->pos_align, ALIGN_BITS);
 
 	// Len decoders (also bit/bittree)
-	const uint32_t num_pos_states = 1 << next->coder->pos_bits;
-	bit_reset(next->coder->match_len_decoder.choice);
-	bit_reset(next->coder->match_len_decoder.choice2);
-	bit_reset(next->coder->rep_len_decoder.choice);
-	bit_reset(next->coder->rep_len_decoder.choice2);
+	const uint32_t num_pos_states = 1 << options->pos_bits;
+	bit_reset(coder->match_len_decoder.choice);
+	bit_reset(coder->match_len_decoder.choice2);
+	bit_reset(coder->rep_len_decoder.choice);
+	bit_reset(coder->rep_len_decoder.choice2);
 
 	for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
-		bittree_reset(next->coder->match_len_decoder.low[pos_state],
+		bittree_reset(coder->match_len_decoder.low[pos_state],
 				LEN_LOW_BITS);
-		bittree_reset(next->coder->match_len_decoder.mid[pos_state],
+		bittree_reset(coder->match_len_decoder.mid[pos_state],
 				LEN_MID_BITS);
 
-		bittree_reset(next->coder->rep_len_decoder.low[pos_state],
+		bittree_reset(coder->rep_len_decoder.low[pos_state],
 				LEN_LOW_BITS);
-		bittree_reset(next->coder->rep_len_decoder.mid[pos_state],
+		bittree_reset(coder->rep_len_decoder.mid[pos_state],
 				LEN_MID_BITS);
 	}
 
-	bittree_reset(next->coder->match_len_decoder.high, LEN_HIGH_BITS);
-	bittree_reset(next->coder->rep_len_decoder.high, LEN_HIGH_BITS);
+	bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
+	bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
+
+	coder->sequence = SEQ_IS_MATCH;
+	coder->probs = NULL;
+	coder->symbol = 0;
+	coder->limit = 0;
+	coder->offset = 0;
+	coder->len = 0;
+
+	return;
+}
+
+
+extern lzma_ret
+lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator,
+		const void *opt, size_t *dict_size)
+{
+	if (lz->coder == NULL) {
+		lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (lz->coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		lz->code = &lzma_decode;
+		lz->reset = &lzma_decoder_reset;
+		lz->set_uncompressed = &lzma_decoder_uncompressed;
+	}
 
-	next->coder->has_produced_output = false;
+	// All dictionary sizes are OK here. LZ decoder will take care of
+	// the special cases.
+	const lzma_options_lzma *options = opt;
+	*dict_size = options->dictionary_size;
 
 	return LZMA_OK;
 }
 
 
-extern void
-lzma_lzma_decoder_uncompressed_size(
-		lzma_next_coder *next, lzma_vli uncompressed_size)
+/// Allocate and initialize LZMA decoder. This is used only via LZ
+/// initialization (lzma_lzma_decoder_init() passes function pointer to
+/// the LZ initialization).
+static lzma_ret
+lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
+		const void *options, size_t *dict_size)
 {
-	next->coder->lz.uncompressed_size = uncompressed_size;
-	return;
+	if (!is_lclppb_valid(options))
+		return LZMA_PROG_ERROR;
+
+	return_if_error(lzma_lzma_decoder_create(
+			lz, allocator, options, dict_size));
+
+	lzma_decoder_reset(lz->coder, options);
+	lzma_decoder_uncompressed(lz->coder, LZMA_VLI_VALUE_UNKNOWN);
+
+	return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	// LZMA can only be the last filter in the chain. This is enforced
+	// by the raw_decoder initialization.
+	assert(filters[1].init == NULL);
+
+	return lzma_lz_decoder_init(next, allocator, filters,
+			&lzma_decoder_init);
 }
 
 
 extern bool
-lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte)
+lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
 {
 	if (byte > (4 * 5 + 4) * 9 + 8)
 		return true;
@@ -781,3 +1011,49 @@ lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte)
 	return options->literal_context_bits + options->literal_pos_bits
 			> LZMA_LITERAL_BITS_MAX;
 }
+
+
+extern uint64_t
+lzma_lzma_decoder_memusage(const void *options)
+{
+	const lzma_options_lzma *const opt = options;
+	const uint64_t lz_memusage
+			= lzma_lz_decoder_memusage(opt->dictionary_size);
+	if (lz_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	return sizeof(lzma_coder) + lz_memusage;
+}
+
+
+extern lzma_ret
+lzma_lzma_props_decode(void **options, lzma_allocator *allocator,
+		const uint8_t *props, size_t props_size)
+{
+	if (props_size != 5)
+		return LZMA_HEADER_ERROR;
+
+	lzma_options_lzma *opt
+			= lzma_alloc(sizeof(lzma_options_lzma), allocator);
+	if (opt == NULL)
+		return LZMA_MEM_ERROR;
+
+	if (lzma_lzma_lclppb_decode(opt, props[0]))
+		goto error;
+
+	// All dictionary sizes are accepted, including zero. LZ decoder
+	// will automatically use a dictionary at least a few KiB even if
+	// a smaller dictionary is requested.
+	opt->dictionary_size = integer_read_32(props + 1);
+
+	opt->preset_dictionary = NULL;
+	opt->preset_dictionary_size = 0;
+
+	*options = opt;
+
+	return LZMA_OK;
+
+error:
+	lzma_free(opt, allocator);
+	return LZMA_HEADER_ERROR;
+}
diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h
index 9d57c7e5..3792f452 100644
--- a/src/liblzma/lzma/lzma_decoder.h
+++ b/src/liblzma/lzma/lzma_decoder.h
@@ -28,16 +28,27 @@
 extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next,
 		lzma_allocator *allocator, const lzma_filter_info *filters);
 
-/// Set known uncompressed size. This is a hack needed to support
-/// LZMA_Alone files that don't have EOPM.
-extern void lzma_lzma_decoder_uncompressed_size(
-		lzma_next_coder *next, lzma_vli uncompressed_size);
+extern uint64_t lzma_lzma_decoder_memusage(const void *options);
+
+extern lzma_ret lzma_lzma_props_decode(
+		void **options, lzma_allocator *allocator,
+		const uint8_t *props, size_t props_size);
+
 
 /// \brief      Decodes the LZMA Properties byte (lc/lp/pb)
 ///
 /// \return     true if error occorred, false on success
 ///
-extern bool lzma_lzma_decode_properties(
+extern bool lzma_lzma_lclppb_decode(
 		lzma_options_lzma *options, uint8_t byte);
 
+
+#ifdef LZMA_LZ_DECODER_H
+/// Allocate and setup function pointers only. This is used by LZMA1 and
+/// LZMA2 decoders.
+extern lzma_ret lzma_lzma_decoder_create(
+		lzma_lz_decoder *lz, lzma_allocator *allocator,
+		const void *opt, size_t *dict_size);
+#endif
+
 #endif
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index afb1d5ed..a84801e7 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -30,40 +30,33 @@ static inline void
 literal_matched(lzma_range_encoder *rc, probability *subcoder,
 		uint32_t match_byte, uint32_t symbol)
 {
-	uint32_t context = 1;
-	uint32_t bit_count = 8;
+	uint32_t offset = 0x100;
+	symbol += UINT32_C(1) << 8;
 
 	do {
-		uint32_t bit = (symbol >> --bit_count) & 1;
-		const uint32_t match_bit = (match_byte >> bit_count) & 1;
-		rc_bit(rc, &subcoder[(0x100 << match_bit) + context], bit);
-		context = (context << 1) | bit;
-
-		if (match_bit != bit) {
-			// The bit from the literal being encoded and the bit
-			// from the previous match differ. Finish encoding
-			// as a normal literal.
-			while (bit_count != 0) {
-				bit = (symbol >> --bit_count) & 1;
-				rc_bit(rc, &subcoder[context], bit);
-				context = (context << 1) | bit;
-			}
+		match_byte <<= 1;
+		const uint32_t match_bit = match_byte & offset;
+		const uint32_t subcoder_index
+				= offset + match_bit + (symbol >> 8);
+		const uint32_t bit = (symbol >> 7) & 1;
+		rc_bit(rc, &subcoder[subcoder_index], bit);
 
-			break;
-		}
+		symbol <<= 1;
+		offset &= ~(match_byte ^ symbol);
 
-	} while (bit_count != 0);
+	} while (symbol < (UINT32_C(1) << 16));
 }
 
 
 static inline void
-literal(lzma_coder *coder)
+literal(lzma_coder *coder, lzma_mf *mf, uint32_t position)
 {
 	// Locate the literal byte to be encoded and the subcoder.
-	const uint8_t cur_byte = coder->lz.buffer[
-			coder->lz.read_pos - coder->additional_offset];
-	probability *subcoder = literal_get_subcoder(coder->literal_coder,
-			coder->now_pos, coder->previous_byte);
+	const uint8_t cur_byte = mf->buffer[
+			mf->read_pos - mf->read_ahead];
+	probability *subcoder = literal_subcoder(coder->literal,
+			coder->literal_context_bits, coder->literal_pos_mask,
+			position, mf->buffer[mf->read_pos - mf->read_ahead - 1]);
 
 	if (is_literal_state(coder->state)) {
 		// Previous LZMA-symbol was a literal. Encode a normal
@@ -73,14 +66,13 @@ literal(lzma_coder *coder)
 		// Previous LZMA-symbol was a match. Use the last byte of
 		// the match as a "match byte". That is, compare the bits
 		// of the current literal and the match byte.
-		const uint8_t match_byte = coder->lz.buffer[
-				coder->lz.read_pos - coder->reps[0] - 1
-				- coder->additional_offset];
+		const uint8_t match_byte = mf->buffer[
+				mf->read_pos - coder->reps[0] - 1
+				- mf->read_ahead];
 		literal_matched(&coder->rc, subcoder, match_byte, cur_byte);
 	}
 
 	update_literal(coder->state);
-	coder->previous_byte = cur_byte;
 }
 
 
@@ -88,12 +80,41 @@ literal(lzma_coder *coder)
 // Match length //
 //////////////////
 
+static void
+length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state)
+{
+	const uint32_t table_size = lc->table_size;
+	lc->counters[pos_state] = table_size;
+
+	const uint32_t a0 = rc_bit_0_price(lc->choice);
+	const uint32_t a1 = rc_bit_1_price(lc->choice);
+	const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2);
+	const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2);
+	uint32_t *const prices = lc->prices[pos_state];
+
+	uint32_t i;
+	for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i)
+		prices[i] = a0 + rc_bittree_price(lc->low[pos_state],
+				LEN_LOW_BITS, i);
+
+	for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)
+		prices[i] = b0 + rc_bittree_price(lc->mid[pos_state],
+				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);
+
+	for (; i < table_size; ++i)
+		prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS,
+				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);
+
+	return;
+}
+
+
 static inline void
 length(lzma_range_encoder *rc, lzma_length_encoder *lc,
-		const uint32_t pos_state, uint32_t len)
+		const uint32_t pos_state, uint32_t len, const bool fast_mode)
 {
-	assert(len <= MATCH_MAX_LEN);
-	len -= MATCH_MIN_LEN;
+	assert(len <= MATCH_LEN_MAX);
+	len -= MATCH_LEN_MIN;
 
 	if (len < LEN_LOW_SYMBOLS) {
 		rc_bit(rc, &lc->choice, 0);
@@ -111,6 +132,12 @@ length(lzma_range_encoder *rc, lzma_length_encoder *lc,
 			rc_bittree(rc, lc->high, LEN_HIGH_BITS, len);
 		}
 	}
+
+	// Only getoptimum uses the prices so don't update the table when
+	// in fast mode.
+	if (!fast_mode)
+		if (--lc->counters[pos_state] == 0)
+			length_update_prices(lc, pos_state);
 }
 
 
@@ -124,12 +151,12 @@ match(lzma_coder *coder, const uint32_t pos_state,
 {
 	update_match(coder->state);
 
-	length(&coder->rc, &coder->match_len_encoder, pos_state, len);
-	coder->prev_len_encoder = &coder->match_len_encoder;
+	length(&coder->rc, &coder->match_len_encoder, pos_state, len,
+			coder->fast_mode);
 
 	const uint32_t pos_slot = get_pos_slot(distance);
 	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
-	rc_bittree(&coder->rc, coder->pos_slot_encoder[len_to_pos_state],
+	rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state],
 			POS_SLOT_BITS, pos_slot);
 
 	if (pos_slot >= START_POS_MODEL_INDEX) {
@@ -139,13 +166,13 @@ match(lzma_coder *coder, const uint32_t pos_state,
 
 		if (pos_slot < END_POS_MODEL_INDEX) {
 			rc_bittree_reverse(&coder->rc,
-				&coder->pos_encoders[base - pos_slot - 1],
+				&coder->pos_special[base - pos_slot - 1],
 				footer_bits, pos_reduced);
 		} else {
 			rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS,
 					footer_bits - ALIGN_BITS);
 			rc_bittree_reverse(
-					&coder->rc, coder->pos_align_encoder,
+					&coder->rc, coder->pos_align,
 					ALIGN_BITS, pos_reduced & ALIGN_MASK);
 			++coder->align_price_count;
 		}
@@ -196,8 +223,8 @@ rep_match(lzma_coder *coder, const uint32_t pos_state,
 	if (len == 1) {
 		update_short_rep(coder->state);
 	} else {
-		length(&coder->rc, &coder->rep_len_encoder, pos_state, len);
-		coder->prev_len_encoder = &coder->rep_len_encoder;
+		length(&coder->rc, &coder->rep_len_encoder, pos_state, len,
+				coder->fast_mode);
 		update_long_rep(coder->state);
 	}
 }
@@ -208,117 +235,123 @@ rep_match(lzma_coder *coder, const uint32_t pos_state,
 //////////
 
 static void
-encode_symbol(lzma_coder *coder, uint32_t pos, uint32_t len)
+encode_symbol(lzma_coder *coder, lzma_mf *mf,
+		uint32_t back, uint32_t len, uint32_t position)
 {
-	const uint32_t pos_state = coder->now_pos & coder->pos_mask;
+	const uint32_t pos_state = position & coder->pos_mask;
 
-	if (len == 1 && pos == UINT32_MAX) {
+	if (back == UINT32_MAX) {
 		// Literal i.e. eight-bit byte
+		assert(len == 1);
 		rc_bit(&coder->rc,
 				&coder->is_match[coder->state][pos_state], 0);
-		literal(coder);
+		literal(coder, mf, position);
 	} else {
 		// Some type of match
 		rc_bit(&coder->rc,
 			&coder->is_match[coder->state][pos_state], 1);
 
-		if (pos < REP_DISTANCES) {
+		if (back < REP_DISTANCES) {
 			// It's a repeated match i.e. the same distance
 			// has been used earlier.
 			rc_bit(&coder->rc, &coder->is_rep[coder->state], 1);
-			rep_match(coder, pos_state, pos, len);
+			rep_match(coder, pos_state, back, len);
 		} else {
 			// Normal match
 			rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
-			match(coder, pos_state, pos - REP_DISTANCES, len);
+			match(coder, pos_state, back - REP_DISTANCES, len);
 		}
+	}
+
+	assert(mf->read_ahead >= len);
+	mf->read_ahead -= len;
+}
+
+
+static bool
+encode_init(lzma_coder *coder, lzma_mf *mf)
+{
+	if (mf->read_pos == mf->read_limit) {
+		if (mf->action == LZMA_RUN)
+			return false; // We cannot do anything.
 
-		coder->previous_byte = coder->lz.buffer[
-				coder->lz.read_pos + len - 1
-				- coder->additional_offset];
+		// We are finishing (we cannot get here when flushing).
+		assert(mf->write_pos == mf->read_pos);
+		assert(mf->action == LZMA_FINISH);
+	} else {
+		// Do the actual initialization. The first LZMA symbol must
+		// always be a literal.
+		mf_skip(mf, 1);
+		mf->read_ahead = 0;
+		rc_bit(&coder->rc, &coder->is_match[0][0], 0);
+		rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
 	}
 
-	assert(coder->additional_offset >= len);
-	coder->additional_offset -= len;
-	coder->now_pos += len;
+	// Initialization is done (except if empty file).
+	coder->is_initialized = true;
+
+	return true;
 }
 
 
 static void
-encode_eopm(lzma_coder *coder)
+encode_eopm(lzma_coder *coder, uint32_t position)
 {
-	const uint32_t pos_state = coder->now_pos & coder->pos_mask;
+	const uint32_t pos_state = position & coder->pos_mask;
 	rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1);
 	rc_bit(&coder->rc, &coder->is_rep[coder->state], 0);
-	match(coder, pos_state, UINT32_MAX, MATCH_MIN_LEN);
+	match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN);
 }
 
 
-/**
- * \brief       LZMA encoder
- *
- * \return      true if end of stream was reached, false otherwise.
- */
-extern bool
-lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
-		size_t *restrict out_pos, size_t out_size)
+/// Number of bytes that a single encoding loop in lzma_lzma_encode() can
+/// consume from the dictionary. This limit comes from lzma_lzma_optimum()
+/// and may need to be updated if that function is significantly modified.
+#define LOOP_INPUT_MAX (OPTS + 1)
+
+
+extern lzma_ret
+lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint8_t *restrict out, size_t *restrict out_pos,
+		size_t out_size, uint32_t limit)
 {
 	// Initialize the stream if no data has been encoded yet.
-	if (!coder->is_initialized) {
-		if (coder->lz.read_pos == coder->lz.read_limit) {
-			if (coder->lz.sequence == SEQ_RUN)
-				return false; // We cannot do anything.
-
-			// We are finishing (we cannot get here when flushing).
-			assert(coder->lz.write_pos == coder->lz.read_pos);
-			assert(coder->lz.sequence == SEQ_FINISH);
-		} else {
-			// Do the actual initialization.
-			uint32_t len;
-			uint32_t num_distance_pairs;
-			lzma_read_match_distances(coder,
-					&len, &num_distance_pairs);
+	if (!coder->is_initialized && !encode_init(coder, mf))
+		return LZMA_OK;
 
-			encode_symbol(coder, UINT32_MAX, 1);
+	// Get the lowest bits of the uncompressed offset from the LZ layer.
+	uint32_t position = mf_position(mf);
 
-			assert(coder->additional_offset == 0);
+	while (true) {
+		// Encode pending bits, if any. Calling this before encoding
+		// the next symbol is needed only with plain LZMA, since
+		// LZMA2 always provides big enough buffer to flush
+		// everything out from the range encoder. For the same reason,
+		// rc_encode() never returns true when this function is used
+		// as part of LZMA2 encoder.
+		if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+			assert(limit == UINT32_MAX);
+			return LZMA_OK;
 		}
 
-		// Initialization is done (except if empty file).
-		coder->is_initialized = true;
-	}
-
-	// Encoding loop
-	while (true) {
-		// Encode pending bits, if any.
-		if (rc_encode(&coder->rc, out, out_pos, out_size))
-			return false;
+		// With LZMA2 we need to take care that compressed size of
+		// a chunk doesn't get too big.
+		// TODO
+		if (limit != UINT32_MAX
+				&& (mf->read_pos - mf->read_ahead >= limit
+					|| *out_pos + rc_pending(&coder->rc)
+						>= (UINT32_C(1) << 16)
+							- LOOP_INPUT_MAX))
+			break;
 
 		// Check that there is some input to process.
-		if (coder->lz.read_pos >= coder->lz.read_limit) {
-			// If flushing or finishing, we must keep encoding
-			// until additional_offset becomes zero to make
-			// all the input available at output.
-			if (coder->lz.sequence == SEQ_RUN)
-				return false;
-
-			if (coder->additional_offset == 0)
-				break;
-		}
-
-		assert(coder->lz.read_pos <= coder->lz.write_pos);
+		if (mf->read_pos >= mf->read_limit) {
+			if (mf->action == LZMA_RUN)
+				return LZMA_OK;
 
-#ifndef NDEBUG
-		if (coder->lz.sequence != SEQ_RUN) {
-			assert(coder->lz.read_limit == coder->lz.write_pos);
-		} else {
-			assert(coder->lz.read_limit + coder->lz.keep_size_after
-					== coder->lz.write_pos);
+			if (mf->read_ahead == 0)
+				break;
 		}
-#endif
-
-		uint32_t pos;
-		uint32_t len;
 
 		// Get optimal match (repeat position and length).
 		// Value ranges for pos:
@@ -327,33 +360,324 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
 		//     match at (pos - REP_DISTANCES)
 		//   - UINT32_MAX: not a match but a literal
 		// Value ranges for len:
-		//   - [MATCH_MIN_LEN, MATCH_MAX_LEN]
-		if (coder->best_compression)
-			lzma_get_optimum(coder, &pos, &len);
+		//   - [MATCH_LEN_MIN, MATCH_LEN_MAX]
+		uint32_t len;
+		uint32_t back;
+
+		if (coder->fast_mode)
+			lzma_lzma_optimum_fast(coder, mf, &back, &len);
 		else
-			lzma_get_optimum_fast(coder, &pos, &len);
+			lzma_lzma_optimum_normal(
+					coder, mf, &back, &len, position);
+
+		encode_symbol(coder, mf, back, len, position);
+
+		position += len;
+	}
+
+	if (!coder->is_flushed) {
+		coder->is_flushed = true;
 
-		encode_symbol(coder, pos, len);
+		// We don't support encoding plain LZMA streams without EOPM,
+		// and LZMA2 doesn't use EOPM at LZMA level.
+		if (limit == UINT32_MAX)
+			encode_eopm(coder, position);
+
+		// Flush the remaining bytes from the range encoder.
+		rc_flush(&coder->rc);
+
+		// Copy the remaining bytes to the output buffer. If there
+		// isn't enough output space, we will copy out the remaining
+		// bytes on the next call to this function by using
+		// the rc_encode() call in the encoding loop above.
+		if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+			assert(limit == UINT32_MAX);
+			return LZMA_OK;
+		}
 	}
 
-	assert(!coder->longest_match_was_found);
+	// Make it ready for the next LZMA2 chunk.
+	coder->is_flushed = false;
+
+	return LZMA_STREAM_END;
+}
+
+
+static lzma_ret
+lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint8_t *restrict out, size_t *restrict out_pos,
+		size_t out_size)
+{
+	// Plain LZMA has no support for sync-flushing.
+	if (unlikely(mf->action == LZMA_SYNC_FLUSH))
+		return LZMA_HEADER_ERROR;
+
+	return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX);
+}
+
 
-	if (coder->is_flushed) {
-		coder->is_flushed = false;
+////////////////////
+// Initialization //
+////////////////////
+
+static bool
+set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options)
+{
+	if (!is_lclppb_valid(options)
+			|| options->fast_bytes < LZMA_FAST_BYTES_MIN
+			|| options->fast_bytes > LZMA_FAST_BYTES_MAX)
 		return true;
+
+	// FIXME validation
+
+	lz_options->before_size = OPTS;
+	lz_options->dictionary_size = options->dictionary_size;
+	lz_options->after_size = LOOP_INPUT_MAX;
+	lz_options->match_len_max = MATCH_LEN_MAX;
+	lz_options->find_len_max = options->fast_bytes;
+	lz_options->match_finder = options->match_finder;
+	lz_options->match_finder_cycles = options->match_finder_cycles;
+	lz_options->preset_dictionary = options->preset_dictionary;
+	lz_options->preset_dictionary_size = options->preset_dictionary_size;
+
+	return false;
+}
+
+
+static void
+length_encoder_reset(lzma_length_encoder *lencoder,
+		const uint32_t num_pos_states, const bool fast_mode)
+{
+	bit_reset(lencoder->choice);
+	bit_reset(lencoder->choice2);
+
+	for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
+		bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS);
+		bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS);
 	}
 
-	// We don't support encoding old LZMA streams without EOPM, and LZMA2
-	// doesn't use EOPM at LZMA level.
-	if (coder->write_eopm)
-		encode_eopm(coder);
+	bittree_reset(lencoder->high, LEN_HIGH_BITS);
 
-	rc_flush(&coder->rc);
+	if (!fast_mode)
+		for (size_t pos_state = 0; pos_state < num_pos_states;
+				++pos_state)
+			length_update_prices(lencoder, pos_state);
 
-	if (rc_encode(&coder->rc, out, out_pos, out_size)) {
-		coder->is_flushed = true;
-		return false;
+	return;
+}
+
+
+extern void
+lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
+{
+	assert(!coder->is_flushed);
+
+	coder->pos_mask = (1U << options->pos_bits) - 1;
+	coder->literal_context_bits = options->literal_context_bits;
+	coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1;
+
+
+	// Range coder
+	rc_reset(&coder->rc);
+
+	// State
+	coder->state = 0;
+	for (size_t i = 0; i < REP_DISTANCES; ++i)
+		coder->reps[i] = 0;
+
+	literal_init(coder->literal, options->literal_context_bits,
+			options->literal_pos_bits);
+
+	// Bit encoders
+	for (size_t i = 0; i < STATES; ++i) {
+		for (size_t j = 0; j <= coder->pos_mask; ++j) {
+			bit_reset(coder->is_match[i][j]);
+			bit_reset(coder->is_rep0_long[i][j]);
+		}
+
+		bit_reset(coder->is_rep[i]);
+		bit_reset(coder->is_rep0[i]);
+		bit_reset(coder->is_rep1[i]);
+		bit_reset(coder->is_rep2[i]);
 	}
 
-	return true;
+	for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
+		bit_reset(coder->pos_special[i]);
+
+	// Bit tree encoders
+	for (size_t i = 0; i < LEN_TO_POS_STATES; ++i)
+		bittree_reset(coder->pos_slot[i], POS_SLOT_BITS);
+
+	bittree_reset(coder->pos_align, ALIGN_BITS);
+
+	// Length encoders
+	length_encoder_reset(&coder->match_len_encoder,
+			1U << options->pos_bits, coder->fast_mode);
+
+	length_encoder_reset(&coder->rep_len_encoder,
+			1U << options->pos_bits, coder->fast_mode);
+
+	// FIXME: Too big or too small won't work when resetting in the middle of LZMA2.
+	coder->match_price_count = UINT32_MAX / 2;
+	coder->align_price_count = UINT32_MAX / 2;
+
+	coder->opts_end_index = 0;
+	coder->opts_current_index = 0;
+}
+
+
+extern lzma_ret
+lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
+		const lzma_options_lzma *options, lzma_lz_options *lz_options)
+{
+	if (*coder_ptr == NULL) {
+		*coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (*coder_ptr == NULL)
+			return LZMA_MEM_ERROR;
+	}
+
+	lzma_coder *coder = *coder_ptr;
+
+	// Validate options that aren't validated elsewhere.
+	if (!is_lclppb_valid(options)
+			|| options->fast_bytes < LZMA_FAST_BYTES_MIN
+			|| options->fast_bytes > LZMA_FAST_BYTES_MAX)
+		return LZMA_HEADER_ERROR;
+
+	// Set compression mode.
+	switch (options->mode) {
+		case LZMA_MODE_FAST:
+			coder->fast_mode = true;
+			break;
+
+		case LZMA_MODE_NORMAL: {
+			coder->fast_mode = false;
+
+			// Set dist_table_size.
+			// Round the dictionary size up to next 2^n.
+			uint32_t log_size = 0;
+			while ((UINT32_C(1) << log_size)
+					< options->dictionary_size)
+				++log_size;
+
+			coder->dist_table_size = log_size * 2;
+
+			// Length encoders' price table size
+			coder->match_len_encoder.table_size
+				= options->fast_bytes + 1 - MATCH_LEN_MIN;
+			coder->rep_len_encoder.table_size
+				= options->fast_bytes + 1 - MATCH_LEN_MIN;
+			break;
+		}
+
+		default:
+			return LZMA_HEADER_ERROR;
+	}
+
+	coder->is_initialized = false;
+	coder->is_flushed = false;
+
+	lzma_lzma_encoder_reset(coder, options);
+
+	// LZ encoder options FIXME validation
+	if (set_lz_options(lz_options, options))
+		return LZMA_HEADER_ERROR;
+
+	return LZMA_OK;
+}
+
+
+static lzma_ret
+lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
+		const void *options, lzma_lz_options *lz_options)
+{
+	lz->code = &lzma_encode;
+	return lzma_lzma_encoder_create(
+			&lz->coder, allocator, options, lz_options);
+}
+
+
+extern lzma_ret
+lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	// Initialization call chain:
+	//
+	//    lzma_lzma_encoder_init()
+	//      `-- lzma_lz_encoder_init()
+	//            `-- lzma_encoder_init()
+	//                  `-- lzma_encoder_init2()
+	//
+	// The above complexity is to let LZ encoder store the pointer to
+	// the LZMA encoder structure. Encoding call tree:
+	//
+	//    lz_encode()
+	//      |-- fill_window()
+	//      |     `-- Next coder in the chain, if any
+	//      `-- lzma_encode()
+	//            |-- lzma_dict_find()
+	//            `-- lzma_dict_skip()
+	//
+	// FIXME ^
+	//
+	return lzma_lz_encoder_init(
+			next, allocator, filters, &lzma_encoder_init);
+}
+
+
+extern uint64_t
+lzma_lzma_encoder_memusage(const void *options)
+{
+	lzma_lz_options lz_options;
+	if (set_lz_options(&lz_options, options))
+		return UINT64_MAX;
+
+	const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options);
+	if (lz_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	return (uint64_t)(sizeof(lzma_coder)) + lz_memusage;
+}
+
+
+extern bool
+lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte)
+{
+	if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX
+			|| options->literal_pos_bits
+				> LZMA_LITERAL_POS_BITS_MAX
+			|| options->pos_bits > LZMA_POS_BITS_MAX
+			|| options->literal_context_bits
+					+ options->literal_pos_bits
+				> LZMA_LITERAL_BITS_MAX)
+		return true;
+
+	*byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9
+			+ options->literal_context_bits;
+	assert(*byte <= (4 * 5 + 4) * 9 + 8);
+
+	return false;
+}
+
+
+#ifdef HAVE_ENCODER_LZMA
+extern lzma_ret
+lzma_lzma_props_encode(const void *options, uint8_t *out)
+{
+	const lzma_options_lzma *const opt = options;
+
+	if (lzma_lzma_lclppb_encode(opt, out))
+		return LZMA_PROG_ERROR;
+
+	integer_write_32(out + 1, opt->dictionary_size);
+
+	return LZMA_OK;
+}
+#endif
+
+
+extern LZMA_API lzma_bool
+lzma_mode_is_available(lzma_mode mode)
+{
+	return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL;
 }
diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h
index 1c57f80a..e270cc27 100644
--- a/src/liblzma/lzma/lzma_encoder.h
+++ b/src/liblzma/lzma/lzma_encoder.h
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
 /// \file       lzma_encoder.h
-/// \brief      LZMA method handler API
+/// \brief      LZMA encoder API
 //
 //  Copyright (C) 1999-2006 Igor Pavlov
 //  Copyright (C) 2007 Lasse Collin
@@ -23,13 +23,47 @@
 
 #include "common.h"
 
+
 extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next,
 		lzma_allocator *allocator, const lzma_filter_info *filters);
 
-extern bool lzma_lzma_encode_properties(
+
+extern uint64_t lzma_lzma_encoder_memusage(const void *options);
+
+extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out);
+
+
+/// Encodes lc/lp/pb into one byte. Returns false on success and true on error.
+extern bool lzma_lzma_lclppb_encode(
 		const lzma_options_lzma *options, uint8_t *byte);
 
+
+#ifdef HAVE_SMALL
+
 /// Initializes the lzma_fastpos[] array.
 extern void lzma_fastpos_init(void);
 
 #endif
+
+
+#ifdef LZMA_LZ_ENCODER_H
+
+/// Initializes raw LZMA encoder; this is used by LZMA2.
+extern lzma_ret lzma_lzma_encoder_create(
+		lzma_coder **coder_ptr, lzma_allocator *allocator,
+		const lzma_options_lzma *options, lzma_lz_options *lz_options);
+
+
+/// Resets an already initialized LZMA encoder; this is used by LZMA2.
+extern void lzma_lzma_encoder_reset(
+		lzma_coder *coder, const lzma_options_lzma *options);
+
+
+extern lzma_ret lzma_lzma_encode(lzma_coder *restrict coder,
+		lzma_mf *restrict mf, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size,
+		uint32_t read_limit);
+
+#endif
+
+#endif
diff --git a/src/liblzma/lzma/lzma_encoder_features.c b/src/liblzma/lzma/lzma_encoder_features.c
index 56e59c6a..9fecee48 100644
--- a/src/liblzma/lzma/lzma_encoder_features.c
+++ b/src/liblzma/lzma/lzma_encoder_features.c
@@ -22,7 +22,7 @@
 
 static lzma_mode modes[] = {
 	LZMA_MODE_FAST,
-	LZMA_MODE_BEST,
+	LZMA_MODE_NORMAL,
 	LZMA_MODE_INVALID
 };
 
diff --git a/src/liblzma/lzma/lzma_encoder_getoptimum.c b/src/liblzma/lzma/lzma_encoder_getoptimum.c
deleted file mode 100644
index b175e4cb..00000000
--- a/src/liblzma/lzma/lzma_encoder_getoptimum.c
+++ /dev/null
@@ -1,925 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file       lzma_encoder_getoptimum.c
-//
-//  Copyright (C) 1999-2006 Igor Pavlov
-//  Copyright (C) 2007 Lasse Collin
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2.1 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-// NOTE: If you want to keep the line length in 80 characters, set
-//       tab width to 4 or less in your editor when editing this file.
-
-
-// "Would you love the monster code?
-//  Could you understand beauty of the beast?"
-//      --Adapted from Lordi's "Would you love a monster man".
-
-
-#include "lzma_encoder_private.h"
-#include "fastpos.h"
-
-
-#define length_get_price(length_encoder, symbol, pos_state) \
-	(length_encoder).prices[pos_state][symbol]
-
-
-#define get_rep_len_1_price(state, pos_state) \
-	bit_get_price_0(coder->is_rep0[state]) \
-			+ bit_get_price_0(coder->is_rep0_long[state][pos_state])
-
-
-// Adds to price_target.
-#define get_pure_rep_price(price_target, rep_index, state, pos_state) \
-do { \
-	if ((rep_index) == 0) { \
-		price_target += bit_get_price_0(coder->is_rep0[state]); \
-		price_target += bit_get_price_1( \
-				coder->is_rep0_long[state][pos_state]); \
-	} else { \
-		price_target += bit_get_price_1(coder->is_rep0[state]); \
-		if ((rep_index) == 1) { \
-			price_target += bit_get_price_0(coder->is_rep1[state]); \
-		} else { \
-			price_target += bit_get_price_1(coder->is_rep1[state]); \
-			price_target += bit_get_price( \
-					coder->is_rep2[state], (rep_index) - 2); \
-		} \
-	} \
-} while (0)
-
-
-// Adds to price_target.
-#define get_rep_price(price_target, rep_index, len, state, pos_state) \
-do { \
-	get_pure_rep_price(price_target, rep_index, state, pos_state); \
-	price_target += length_get_price(coder->rep_len_encoder, \
-			(len) - MATCH_MIN_LEN, pos_state); \
-} while (0)
-
-
-// Adds to price_target.
-#define get_pos_len_price(price_target, pos, len, pos_state) \
-do { \
-	const uint32_t len_to_pos_state_tmp = get_len_to_pos_state(len); \
-	if ((pos) < FULL_DISTANCES) { \
-		price_target += distances_prices[len_to_pos_state_tmp][pos]; \
-	} else { \
-		price_target \
-				+= pos_slot_prices[len_to_pos_state_tmp][get_pos_slot_2(pos)] \
-				+ align_prices[(pos) & ALIGN_MASK]; \
-	} \
-	price_target += length_get_price( \
-			coder->match_len_encoder, (len) - MATCH_MIN_LEN, pos_state); \
-} while (0)
-
-
-// Three macros to manipulate lzma_optimal structures:
-#define make_as_char(opt) \
-do { \
-	(opt).back_prev = UINT32_MAX; \
-	(opt).prev_1_is_char = false; \
-} while (0)
-
-
-#define make_as_short_rep(opt) \
-do { \
-	(opt).back_prev = 0; \
-	(opt).prev_1_is_char = false; \
-} while (0)
-
-
-#define is_short_rep(opt) \
-	((opt).back_prev == 0)
-
-
-static void
-fill_length_prices(lzma_length_encoder *lc, uint32_t pos_state)
-{
-	const uint32_t num_symbols = lc->table_size;
-	const uint32_t a0 = bit_get_price_0(lc->choice);
-	const uint32_t a1 = bit_get_price_1(lc->choice);
-	const uint32_t b0 = a1 + bit_get_price_0(lc->choice2);
-	const uint32_t b1 = a1 + bit_get_price_1(lc->choice2);
-
-	uint32_t *prices = lc->prices[pos_state];
-	uint32_t i = 0;
-
-	for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i)
-		prices[i] = a0 + bittree_get_price(lc->low[pos_state],
-				LEN_LOW_BITS, i);
-
-	for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)
-		prices[i] = b0 + bittree_get_price(lc->mid[pos_state],
-				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);
-
-	for (; i < num_symbols; ++i)
-		prices[i] = b1 + bittree_get_price(lc->high, LEN_HIGH_BITS,
-				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);
-
-	lc->counters[pos_state] = num_symbols;
-
-	return;
-}
-
-
-static void
-fill_distances_prices(lzma_coder *coder)
-{
-	uint32_t temp_prices[FULL_DISTANCES];
-
-	for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) {
-		const uint32_t pos_slot = get_pos_slot(i);
-		const uint32_t footer_bits = ((pos_slot >> 1) - 1);
-		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
-		temp_prices[i] = bittree_reverse_get_price(
-				coder->pos_encoders + base - pos_slot - 1,
-				footer_bits, i - base);
-	}
-
-	const uint32_t dist_table_size = coder->dist_table_size;
-
-	for (uint32_t len_to_pos_state = 0;
-			len_to_pos_state < LEN_TO_POS_STATES;
-			++len_to_pos_state) {
-
-		const probability *encoder = coder->pos_slot_encoder[len_to_pos_state];
-		uint32_t *pos_slot_prices = coder->pos_slot_prices[len_to_pos_state];
-
-		for (uint32_t pos_slot = 0;
-				pos_slot < dist_table_size;
-				++pos_slot) {
-			pos_slot_prices[pos_slot] = bittree_get_price(encoder,
-					POS_SLOT_BITS, pos_slot);
-		}
-
-		for (uint32_t pos_slot = END_POS_MODEL_INDEX;
-				pos_slot < dist_table_size;
-				++pos_slot)
-			pos_slot_prices[pos_slot] += (((pos_slot >> 1) - 1)
-					- ALIGN_BITS) << BIT_PRICE_SHIFT_BITS;
-
-
-		uint32_t *distances_prices
-				= coder->distances_prices[len_to_pos_state];
-
-		uint32_t i;
-		for (i = 0; i < START_POS_MODEL_INDEX; ++i)
-			distances_prices[i] = pos_slot_prices[i];
-
-		for (; i < FULL_DISTANCES; ++i)
-			distances_prices[i] = pos_slot_prices[get_pos_slot(i)]
-					+ temp_prices[i];
-	}
-
-	coder->match_price_count = 0;
-
-	return;
-}
-
-
-static void
-fill_align_prices(lzma_coder *coder)
-{
-	for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i)
-		coder->align_prices[i] = bittree_reverse_get_price(
-				coder->pos_align_encoder, ALIGN_BITS, i);
-
-	coder->align_price_count = 0;
-	return;
-}
-
-
-// The first argument is a pointer returned by literal_get_subcoder().
-static uint32_t
-literal_get_price(const probability *encoders, const bool match_mode,
-		const uint8_t match_byte, const uint8_t symbol)
-{
-	uint32_t price = 0;
-	uint32_t context = 1;
-	int i = 8;
-
-	if (match_mode) {
-		do {
-			--i;
-			const uint32_t match_bit = (match_byte >> i) & 1;
-			const uint32_t bit = (symbol >> i) & 1;
-			const uint32_t subcoder_index
-					= 0x100 + (match_bit << 8) + context;
-
-			price += bit_get_price(encoders[subcoder_index], bit);
-			context = (context << 1) | bit;
-
-			if (match_bit != bit)
-				break;
-
-		} while (i != 0);
-	}
-
-	while (i != 0) {
-		--i;
-		const uint32_t bit = (symbol >> i) & 1;
-		price += bit_get_price(encoders[context], bit);
-		context = (context << 1) | bit;
-	}
-
-	return price;
-}
-
-
-static void
-backward(lzma_coder *restrict coder, uint32_t *restrict len_res,
-		uint32_t *restrict back_res, uint32_t cur)
-{
-	coder->optimum_end_index = cur;
-
-	uint32_t pos_mem = coder->optimum[cur].pos_prev;
-	uint32_t back_mem = coder->optimum[cur].back_prev;
-
-	do {
-		if (coder->optimum[cur].prev_1_is_char) {
-			make_as_char(coder->optimum[pos_mem]);
-			coder->optimum[pos_mem].pos_prev = pos_mem - 1;
-
-			if (coder->optimum[cur].prev_2) {
-				coder->optimum[pos_mem - 1].prev_1_is_char = false;
-				coder->optimum[pos_mem - 1].pos_prev
-						= coder->optimum[cur].pos_prev_2;
-				coder->optimum[pos_mem - 1].back_prev
-						= coder->optimum[cur].back_prev_2;
-			}
-		}
-
-		uint32_t pos_prev = pos_mem;
-		uint32_t back_cur = back_mem;
-
-		back_mem = coder->optimum[pos_prev].back_prev;
-		pos_mem = coder->optimum[pos_prev].pos_prev;
-
-		coder->optimum[pos_prev].back_prev = back_cur;
-		coder->optimum[pos_prev].pos_prev = cur;
-		cur = pos_prev;
-
-	} while (cur != 0);
-
-	coder->optimum_current_index = coder->optimum[0].pos_prev;
-	*len_res = coder->optimum[0].pos_prev;
-	*back_res = coder->optimum[0].back_prev;
-
-	return;
-}
-
-
-extern void
-lzma_get_optimum(lzma_coder *restrict coder,
-		uint32_t *restrict back_res, uint32_t *restrict len_res)
-{
-	uint32_t position = coder->now_pos;
-	uint32_t pos_state = position & coder->pos_mask;
-
-	// Update the price tables. In the C++ LZMA SDK 4.42 this was done in both
-	// initialization function and in the main loop. In liblzma they were
-	// moved into this single place.
-	if (coder->additional_offset == 0) {
-		if (coder->match_price_count >= (1 << 7))
-			fill_distances_prices(coder);
-
-		if (coder->align_price_count >= ALIGN_TABLE_SIZE)
-			fill_align_prices(coder);
-	}
-
-	if (coder->prev_len_encoder != NULL) {
-		if (--coder->prev_len_encoder->counters[pos_state] == 0)
-			fill_length_prices(coder->prev_len_encoder, pos_state);
-
-		coder->prev_len_encoder = NULL;
-	}
-
-
-	if (coder->optimum_end_index != coder->optimum_current_index) {
-		*len_res = coder->optimum[coder->optimum_current_index].pos_prev
-				- coder->optimum_current_index;
-		*back_res = coder->optimum[coder->optimum_current_index].back_prev;
-		coder->optimum_current_index = coder->optimum[
-				coder->optimum_current_index].pos_prev;
-		return;
-	}
-
-	coder->optimum_current_index = 0;
-	coder->optimum_end_index = 0;
-
-
-	const uint32_t fast_bytes = coder->fast_bytes;
-	uint32_t *match_distances = coder->match_distances;
-
-	uint32_t len_main;
-	uint32_t num_distance_pairs;
-
-	if (!coder->longest_match_was_found) {
-		lzma_read_match_distances(coder, &len_main, &num_distance_pairs);
-	} else {
-		len_main = coder->longest_match_length;
-		num_distance_pairs = coder->num_distance_pairs;
-		coder->longest_match_was_found = false;
-	}
-
-
-	const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1;
-	uint32_t num_available_bytes
-			= coder->lz.write_pos - coder->lz.read_pos + 1;
-	if (num_available_bytes < 2) {
-		*back_res = UINT32_MAX;
-		*len_res = 1;
-		return;
-	}
-
-	if (num_available_bytes > MATCH_MAX_LEN)
-		num_available_bytes = MATCH_MAX_LEN;
-
-
-	uint32_t reps[REP_DISTANCES];
-	uint32_t rep_lens[REP_DISTANCES];
-	uint32_t rep_max_index = 0;
-
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
-		reps[i] = coder->reps[i];
-		const uint32_t back_offset = reps[i] + 1;
-
-		if (buf[0] != *(buf - back_offset)
-				|| buf[1] != *(buf + 1 - back_offset)) {
-			rep_lens[i] = 0;
-			continue;
-		}
-
-		uint32_t len_test;
-		for (len_test = 2; len_test < num_available_bytes
-				&& buf[len_test] == *(buf + len_test - back_offset);
-				++len_test) ;
-
-		rep_lens[i] = len_test;
-		if (len_test > rep_lens[rep_max_index])
-			rep_max_index = i;
-	}
-
-	if (rep_lens[rep_max_index] >= fast_bytes) {
-		*back_res = rep_max_index;
-		*len_res = rep_lens[rep_max_index];
-		move_pos(*len_res - 1);
-		return;
-	}
-
-
-	if (len_main >= fast_bytes) {
-		*back_res = match_distances[num_distance_pairs] + REP_DISTANCES;
-		*len_res = len_main;
-		move_pos(len_main - 1);
-		return;
-	}
-
-	uint8_t current_byte = *buf;
-	uint8_t match_byte = *(buf - reps[0] - 1);
-
-	if (len_main < 2 && current_byte != match_byte
-			&& rep_lens[rep_max_index] < 2) {
-		*back_res = UINT32_MAX;
-		*len_res = 1;
-		return;
-	}
-
-	coder->optimum[0].state = coder->state;
-
-	coder->optimum[1].price = bit_get_price_0(
-				coder->is_match[coder->state][pos_state])
-			+ literal_get_price(
-				literal_get_subcoder(coder->literal_coder,
-					position, coder->previous_byte),
-				!is_literal_state(coder->state), match_byte, current_byte);
-
-	make_as_char(coder->optimum[1]);
-
-	uint32_t match_price
-			= bit_get_price_1(coder->is_match[coder->state][pos_state]);
-	uint32_t rep_match_price
-			= match_price + bit_get_price_1(coder->is_rep[coder->state]);
-
-
-	if (match_byte == current_byte) {
-		const uint32_t short_rep_price = rep_match_price
-				+ get_rep_len_1_price(coder->state, pos_state);
-
-		if (short_rep_price < coder->optimum[1].price) {
-			coder->optimum[1].price = short_rep_price;
-			make_as_short_rep(coder->optimum[1]);
-		}
-	}
-
-	uint32_t len_end = (len_main >= rep_lens[rep_max_index])
-			? len_main
-			: rep_lens[rep_max_index];
-
-	if (len_end < 2) {
-		*back_res = coder->optimum[1].back_prev;
-		*len_res = 1;
-		return;
-	}
-
-	coder->optimum[1].pos_prev = 0;
-
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
-		coder->optimum[0].backs[i] = reps[i];
-
-	uint32_t len = len_end;
-	do {
-		coder->optimum[len].price = INFINITY_PRICE;
-	} while (--len >= 2);
-
-
-	uint32_t (*distances_prices)[FULL_DISTANCES] = coder->distances_prices;
-	uint32_t (*pos_slot_prices)[DIST_TABLE_SIZE_MAX] = coder->pos_slot_prices;
-	uint32_t *align_prices = coder->align_prices;
-
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
-		uint32_t rep_len = rep_lens[i];
-		if (rep_len < 2)
-			continue;
-
-		uint32_t price = rep_match_price;
-		get_pure_rep_price(price, i, coder->state, pos_state);
-
-		do {
-			const uint32_t cur_and_len_price = price
-					+ length_get_price(
-					coder->rep_len_encoder,
-					rep_len - 2, pos_state);
-
-			if (cur_and_len_price < coder->optimum[rep_len].price) {
-				coder->optimum[rep_len].price = cur_and_len_price;
-				coder->optimum[rep_len].pos_prev = 0;
-				coder->optimum[rep_len].back_prev = i;
-				coder->optimum[rep_len].prev_1_is_char = false;
-			}
-		} while (--rep_len >= 2);
-	}
-
-
-	uint32_t normal_match_price = match_price
-			+ bit_get_price_0(coder->is_rep[coder->state]);
-
-	len = (rep_lens[0] >= 2) ? rep_lens[0] + 1 : 2;
-
-	if (len <= len_main) {
-		uint32_t offs = 0;
-
-		while (len > match_distances[offs + 1])
-			offs += 2;
-
-		for(; ; ++len) {
-			const uint32_t distance = match_distances[offs + 2];
-			uint32_t cur_and_len_price = normal_match_price;
-			get_pos_len_price(cur_and_len_price, distance, len, pos_state);
-
-			if (cur_and_len_price < coder->optimum[len].price) {
-				coder->optimum[len].price = cur_and_len_price;
-				coder->optimum[len].pos_prev = 0;
-				coder->optimum[len].back_prev = distance + REP_DISTANCES;
-				coder->optimum[len].prev_1_is_char = false;
-			}
-
-			if (len == match_distances[offs + 1]) {
-				offs += 2;
-				if (offs == num_distance_pairs)
-					break;
-			}
-		}
-	}
-
-
-	//////////////////
-	// Big loop ;-) //
-	//////////////////
-
-	uint32_t cur = 0;
-
-	// The rest of this function is a huge while-loop. To avoid extreme
-	// indentation, the indentation level is not increased here.
-	while (true) {
-
-	++cur;
-
-	assert(cur < OPTS);
-
-	if (cur == len_end) {
-		backward(coder, len_res, back_res, cur);
-		return;
-	}
-
-	uint32_t new_len;
-
-	lzma_read_match_distances(coder, &new_len, &num_distance_pairs);
-
-	if (new_len >= fast_bytes) {
-		coder->num_distance_pairs = num_distance_pairs;
-		coder->longest_match_length = new_len;
-		coder->longest_match_was_found = true;
-		backward(coder, len_res, back_res, cur);
-		return;
-	}
-
-
-	++position;
-
-	uint32_t pos_prev = coder->optimum[cur].pos_prev;
-	uint32_t state;
-
-	if (coder->optimum[cur].prev_1_is_char) {
-		--pos_prev;
-
-		if (coder->optimum[cur].prev_2) {
-			state = coder->optimum[coder->optimum[cur].pos_prev_2].state;
-
-			if (coder->optimum[cur].back_prev_2 < REP_DISTANCES)
-				update_long_rep(state);
-			else
-				update_match(state);
-
-		} else {
-			state = coder->optimum[pos_prev].state;
-		}
-
-		update_literal(state);
-
-	} else {
-		state = coder->optimum[pos_prev].state;
-	}
-
-	if (pos_prev == cur - 1) {
-		if (is_short_rep(coder->optimum[cur]))
-			update_short_rep(state);
-		else
-			update_literal(state);
-	} else {
-		uint32_t pos;
-		if (coder->optimum[cur].prev_1_is_char && coder->optimum[cur].prev_2) {
-			pos_prev = coder->optimum[cur].pos_prev_2;
-			pos = coder->optimum[cur].back_prev_2;
-			update_long_rep(state);
-		} else {
-			pos = coder->optimum[cur].back_prev;
-			if (pos < REP_DISTANCES)
-				update_long_rep(state);
-			else
-				update_match(state);
-		}
-
-		if (pos < REP_DISTANCES) {
-			reps[0] = coder->optimum[pos_prev].backs[pos];
-
-			uint32_t i;
-			for (i = 1; i <= pos; ++i)
-				reps[i] = coder->optimum[pos_prev].backs[i - 1];
-
-			for (; i < REP_DISTANCES; ++i)
-				reps[i] = coder->optimum[pos_prev].backs[i];
-
-		} else {
-			reps[0] = pos - REP_DISTANCES;
-
-			for (uint32_t i = 1; i < REP_DISTANCES; ++i)
-				reps[i] = coder->optimum[pos_prev].backs[i - 1];
-		}
-	}
-
-	coder->optimum[cur].state = state;
-
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
-		coder->optimum[cur].backs[i] = reps[i];
-
-	const uint32_t cur_price = coder->optimum[cur].price;
-
-	buf = coder->lz.buffer + coder->lz.read_pos - 1;
-	current_byte = *buf;
-	match_byte = *(buf - reps[0] - 1);
-
-	pos_state = position & coder->pos_mask;
-
-	const uint32_t cur_and_1_price = cur_price
-			+ bit_get_price_0(coder->is_match[state][pos_state])
-			+ literal_get_price(
-				literal_get_subcoder(coder->literal_coder,
-					position, buf[-1]),
-        		!is_literal_state(state), match_byte, current_byte);
-
-	bool next_is_char = false;
-
-	if (cur_and_1_price < coder->optimum[cur + 1].price) {
-		coder->optimum[cur + 1].price = cur_and_1_price;
-		coder->optimum[cur + 1].pos_prev = cur;
-		make_as_char(coder->optimum[cur + 1]);
-		next_is_char = true;
-	}
-
-	match_price = cur_price
-			+ bit_get_price_1(coder->is_match[state][pos_state]);
-	rep_match_price = match_price
-			+ bit_get_price_1(coder->is_rep[state]);
-
-	if (match_byte == current_byte
-			&& !(coder->optimum[cur + 1].pos_prev < cur
-			&& coder->optimum[cur + 1].back_prev == 0)) {
-
-		const uint32_t short_rep_price = rep_match_price
-				+ get_rep_len_1_price(state, pos_state);
-
-		if (short_rep_price <= coder->optimum[cur + 1].price) {
-			coder->optimum[cur + 1].price = short_rep_price;
-			coder->optimum[cur + 1].pos_prev = cur;
-			make_as_short_rep(coder->optimum[cur + 1]);
-			next_is_char = true;
-		}
-	}
-
-	uint32_t num_available_bytes_full
-			= coder->lz.write_pos - coder->lz.read_pos + 1;
-	num_available_bytes_full = MIN(OPTS - 1 - cur, num_available_bytes_full);
-	num_available_bytes = num_available_bytes_full;
-
-	if (num_available_bytes < 2)
-		continue;
-
-	if (num_available_bytes > fast_bytes)
-		num_available_bytes = fast_bytes;
-
-	if (!next_is_char && match_byte != current_byte) { // speed optimization
-		// try literal + rep0
-		const uint32_t back_offset = reps[0] + 1;
-		const uint32_t limit = MIN(num_available_bytes_full, fast_bytes + 1);
-
-		uint32_t temp;
-		for (temp = 1; temp < limit
-				&& buf[temp] == *(buf + temp - back_offset);
-				++temp) ;
-
-		const uint32_t len_test_2 = temp - 1;
-
-		if (len_test_2 >= 2) {
-			uint32_t state_2 = state;
-			update_literal(state_2);
-
-			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
-			const uint32_t next_rep_match_price = cur_and_1_price
-					+ bit_get_price_1(coder->is_match[state_2][pos_state_next])
-					+ bit_get_price_1(coder->is_rep[state_2]);
-
-			// for (; len_test_2 >= 2; --len_test_2) {
-			const uint32_t offset = cur + 1 + len_test_2;
-
-			while (len_end < offset)
-				coder->optimum[++len_end].price = INFINITY_PRICE;
-
-			uint32_t cur_and_len_price = next_rep_match_price;
-			get_rep_price(cur_and_len_price,
-					0, len_test_2, state_2, pos_state_next);
-
-			if (cur_and_len_price < coder->optimum[offset].price) {
-				coder->optimum[offset].price = cur_and_len_price;
-				coder->optimum[offset].pos_prev = cur + 1;
-				coder->optimum[offset].back_prev = 0;
-				coder->optimum[offset].prev_1_is_char = true;
-				coder->optimum[offset].prev_2 = false;
-			}
-// 			}
-		}
-	}
-
-
-	uint32_t start_len = 2; // speed optimization
-
-	for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) {
-		const uint32_t back_offset = reps[rep_index] + 1;
-
-		if (buf[0] != *(buf - back_offset) || buf[1] != *(buf + 1 - back_offset))
-			continue;
-
-		uint32_t len_test;
-		for (len_test = 2; len_test < num_available_bytes
-				&& buf[len_test] == *(buf + len_test - back_offset);
-				++len_test) ;
-
-		while (len_end < cur + len_test)
-			coder->optimum[++len_end].price = INFINITY_PRICE;
-
-		const uint32_t len_test_temp = len_test;
-		uint32_t price = rep_match_price;
-		get_pure_rep_price(price, rep_index, state, pos_state);
-
-		do {
-			const uint32_t cur_and_len_price = price
-					+ length_get_price(coder->rep_len_encoder,
-							len_test - 2, pos_state);
-
-			if (cur_and_len_price < coder->optimum[cur + len_test].price) {
-				coder->optimum[cur + len_test].price = cur_and_len_price;
-				coder->optimum[cur + len_test].pos_prev = cur;
-				coder->optimum[cur + len_test].back_prev = rep_index;
-				coder->optimum[cur + len_test].prev_1_is_char = false;
-			}
-		} while (--len_test >= 2);
-
-		len_test = len_test_temp;
-
-		if (rep_index == 0)
-			start_len = len_test + 1;
-
-
-		uint32_t len_test_2 = len_test + 1;
-		const uint32_t limit = MIN(num_available_bytes_full,
-				len_test_2 + fast_bytes);
-		for (; len_test_2 < limit
-				&& buf[len_test_2] == *(buf + len_test_2 - back_offset);
-				++len_test_2) ;
-
-		len_test_2 -= len_test + 1;
-
-		if (len_test_2 >= 2) {
-			uint32_t state_2 = state;
-			update_long_rep(state_2);
-
-			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
-
-			const uint32_t cur_and_len_char_price = price
-					+ length_get_price(coder->rep_len_encoder,
-						len_test - 2, pos_state)
-					+ bit_get_price_0(coder->is_match[state_2][pos_state_next])
-					+ literal_get_price(
-						literal_get_subcoder(coder->literal_coder,
-							position + len_test, buf[len_test - 1]),
-						true, *(buf + len_test - back_offset), buf[len_test]);
-
-			update_literal(state_2);
-
-			pos_state_next = (position + len_test + 1) & coder->pos_mask;
-
-			const uint32_t next_rep_match_price = cur_and_len_char_price
-					+ bit_get_price_1(coder->is_match[state_2][pos_state_next])
-					+ bit_get_price_1(coder->is_rep[state_2]);
-
-// 			for(; len_test_2 >= 2; len_test_2--) {
-			const uint32_t offset = cur + len_test + 1 + len_test_2;
-
-			while (len_end < offset)
-				coder->optimum[++len_end].price = INFINITY_PRICE;
-
-			uint32_t cur_and_len_price = next_rep_match_price;
-			get_rep_price(cur_and_len_price,
-					0, len_test_2, state_2, pos_state_next);
-
-			if (cur_and_len_price < coder->optimum[offset].price) {
-				coder->optimum[offset].price = cur_and_len_price;
-				coder->optimum[offset].pos_prev = cur + len_test + 1;
-				coder->optimum[offset].back_prev = 0;
-				coder->optimum[offset].prev_1_is_char = true;
-				coder->optimum[offset].prev_2 = true;
-				coder->optimum[offset].pos_prev_2 = cur;
-				coder->optimum[offset].back_prev_2 = rep_index;
-			}
-// 			}
-		}
-	}
-
-
-// 	for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
-	if (new_len > num_available_bytes) {
-		new_len = num_available_bytes;
-
-		for (num_distance_pairs = 0;
-				new_len > match_distances[num_distance_pairs + 1];
-				num_distance_pairs += 2) ;
-
-		match_distances[num_distance_pairs + 1] = new_len;
-		num_distance_pairs += 2;
-	}
-
-
-	if (new_len >= start_len) {
-		normal_match_price = match_price
-				+ bit_get_price_0(coder->is_rep[state]);
-
-		while (len_end < cur + new_len)
-			coder->optimum[++len_end].price = INFINITY_PRICE;
-
-		uint32_t offs = 0;
-		while (start_len > match_distances[offs + 1])
-			offs += 2;
-
-		uint32_t cur_back = match_distances[offs + 2];
-		uint32_t pos_slot = get_pos_slot_2(cur_back);
-
-		for (uint32_t len_test = start_len; ; ++len_test) {
-			uint32_t cur_and_len_price = normal_match_price;
-			const uint32_t len_to_pos_state = get_len_to_pos_state(len_test);
-
-			if (cur_back < FULL_DISTANCES)
-				cur_and_len_price += distances_prices[
-						len_to_pos_state][cur_back];
-			else
-				cur_and_len_price += pos_slot_prices[
-						len_to_pos_state][pos_slot]
-						+ align_prices[cur_back & ALIGN_MASK];
-
-			cur_and_len_price += length_get_price(coder->match_len_encoder,
-					len_test - MATCH_MIN_LEN, pos_state);
-
-			if (cur_and_len_price < coder->optimum[cur + len_test].price) {
-				coder->optimum[cur + len_test].price = cur_and_len_price;
-				coder->optimum[cur + len_test].pos_prev = cur;
-				coder->optimum[cur + len_test].back_prev
-						= cur_back + REP_DISTANCES;
-				coder->optimum[cur + len_test].prev_1_is_char = false;
-			}
-
-			if (len_test == match_distances[offs + 1]) {
-				// Try Match + Literal + Rep0
-				const uint32_t back_offset = cur_back + 1;
-				uint32_t len_test_2 = len_test + 1;
-				const uint32_t limit = MIN(num_available_bytes_full,
-						len_test_2 + fast_bytes);
-
-				for (; len_test_2 < limit &&
-						buf[len_test_2] == *(buf + len_test_2 - back_offset);
-						++len_test_2) ;
-
-				len_test_2 -= len_test + 1;
-
-				if (len_test_2 >= 2) {
-					uint32_t state_2 = state;
-					update_match(state_2);
-					uint32_t pos_state_next
-							= (position + len_test) & coder->pos_mask;
-
-					const uint32_t cur_and_len_char_price = cur_and_len_price
-							+ bit_get_price_0(
-								coder->is_match[state_2][pos_state_next])
-							+ literal_get_price(
-								literal_get_subcoder(
-									coder->literal_coder,
-									position + len_test,
-									buf[len_test - 1]),
-								true,
-								*(buf + len_test - back_offset),
-								buf[len_test]);
-
-					update_literal(state_2);
-					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
-
-					const uint32_t next_rep_match_price
-							= cur_and_len_char_price
-							+ bit_get_price_1(
-								coder->is_match[state_2][pos_state_next])
-							+ bit_get_price_1(coder->is_rep[state_2]);
-
-					// for(; len_test_2 >= 2; --len_test_2) {
-					const uint32_t offset = cur + len_test + 1 + len_test_2;
-
-					while (len_end < offset)
-						coder->optimum[++len_end].price = INFINITY_PRICE;
-
-					cur_and_len_price = next_rep_match_price;
-					get_rep_price(cur_and_len_price,
-							0, len_test_2, state_2, pos_state_next);
-
-					if (cur_and_len_price < coder->optimum[offset].price) {
-						coder->optimum[offset].price = cur_and_len_price;
-						coder->optimum[offset].pos_prev = cur + len_test + 1;
-						coder->optimum[offset].back_prev = 0;
-						coder->optimum[offset].prev_1_is_char = true;
-						coder->optimum[offset].prev_2 = true;
-						coder->optimum[offset].pos_prev_2 = cur;
-						coder->optimum[offset].back_prev_2
-								= cur_back + REP_DISTANCES;
-					}
-// 					}
-				}
-
-				offs += 2;
-				if (offs == num_distance_pairs)
-					break;
-
-				cur_back = match_distances[offs + 2];
-				if (cur_back >= FULL_DISTANCES)
-					pos_slot = get_pos_slot_2(cur_back);
-			}
-		}
-	}
-
-	} // Closes: while (true)
-}
diff --git a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c b/src/liblzma/lzma/lzma_encoder_getoptimumfast.c
deleted file mode 100644
index fa06be21..00000000
--- a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c
+++ /dev/null
@@ -1,201 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file       lzma_encoder_getoptimumfast.c
-//
-//  Copyright (C) 1999-2006 Igor Pavlov
-//  Copyright (C) 2007 Lasse Collin
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2.1 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-// NOTE: If you want to keep the line length in 80 characters, set
-//       tab width to 4 or less in your editor when editing this file.
-
-
-#include "lzma_encoder_private.h"
-
-
-#define change_pair(small_dist, big_dist) \
-	(((big_dist) >> 7) > (small_dist))
-
-
-extern void
-lzma_get_optimum_fast(lzma_coder *restrict coder,
-		uint32_t *restrict back_res, uint32_t *restrict len_res)
-{
-	// Local copies
-	const uint32_t fast_bytes = coder->fast_bytes;
-
-	uint32_t len_main;
-	uint32_t num_distance_pairs;
-	if (!coder->longest_match_was_found) {
-		lzma_read_match_distances(coder, &len_main, &num_distance_pairs);
-	} else {
-		len_main = coder->longest_match_length;
-		num_distance_pairs = coder->num_distance_pairs;
-		coder->longest_match_was_found = false;
-	}
-
-	const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1;
-	uint32_t num_available_bytes
-			= coder->lz.write_pos - coder->lz.read_pos + 1;
-
-	if (num_available_bytes < 2) {
-		// There's not enough input left to encode a match.
-		*back_res = UINT32_MAX;
-		*len_res = 1;
-		return;
-	}
-
-	if (num_available_bytes > MATCH_MAX_LEN)
-		num_available_bytes = MATCH_MAX_LEN;
-
-
-	// Look for repetitive matches; scan the previous four match distances
-	uint32_t rep_lens[REP_DISTANCES];
-	uint32_t rep_max_index = 0;
-
-	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
-		const uint32_t back_offset = coder->reps[i] + 1;
-
-		// If the first two bytes (2 == MATCH_MIN_LEN) do not match,
-		// this rep_distance[i] is not useful. This is indicated
-		// using zero as the length of the repetitive match.
-		if (buf[0] != *(buf - back_offset)
-				|| buf[1] != *(buf + 1 - back_offset)) {
-			rep_lens[i] = 0;
-			continue;
-		}
-
-		// The first two bytes matched.
-		// Calculate the length of the match.
-		uint32_t len;
-		for (len = 2; len < num_available_bytes
-				&& buf[len] == *(buf + len - back_offset);
-				++len) ;
-
-		// If we have found a repetitive match that is at least
-		// as long as fast_bytes, return it immediatelly.
-		if (len >= fast_bytes) {
-			*back_res = i;
-			*len_res = len;
-			move_pos(len - 1);
-			return;
-		}
-
-		rep_lens[i] = len;
-
-		// After this loop, rep_lens[rep_max_index] is the biggest
-		// value of all values in rep_lens[].
-		if (len > rep_lens[rep_max_index])
-			rep_max_index = i;
-	}
-
-
-	if (len_main >= fast_bytes) {
-		*back_res = coder->match_distances[num_distance_pairs]
-				+ REP_DISTANCES;
-		*len_res = len_main;
-		move_pos(len_main - 1);
-		return;
-	}
-
-	uint32_t back_main = 0;
-	if (len_main >= 2) {
-		back_main = coder->match_distances[num_distance_pairs];
-
-		while (num_distance_pairs > 2 && len_main ==
-				coder->match_distances[num_distance_pairs - 3] + 1) {
-			if (!change_pair(coder->match_distances[
-					num_distance_pairs - 2], back_main))
-				break;
-
-			num_distance_pairs -= 2;
-			len_main = coder->match_distances[num_distance_pairs - 1];
-			back_main = coder->match_distances[num_distance_pairs];
-		}
-
-		if (len_main == 2 && back_main >= 0x80)
-			len_main = 1;
-	}
-
-	if (rep_lens[rep_max_index] >= 2) {
-		if (rep_lens[rep_max_index] + 1 >= len_main
-				|| (rep_lens[rep_max_index] + 2 >= len_main
-					&& (back_main > (1 << 9)))
-				|| (rep_lens[rep_max_index] + 3 >= len_main
-					&& (back_main > (1 << 15)))) {
-			*back_res = rep_max_index;
-			*len_res = rep_lens[rep_max_index];
-			move_pos(*len_res - 1);
-			return;
-		}
-	}
-
-	if (len_main >= 2 && num_available_bytes > 2) {
-		lzma_read_match_distances(coder, &coder->longest_match_length,
-				&coder->num_distance_pairs);
-
-		if (coder->longest_match_length >= 2) {
-			const uint32_t new_distance = coder->match_distances[
-					coder->num_distance_pairs];
-
-			if ((coder->longest_match_length >= len_main
-						&& new_distance < back_main)
-					|| (coder->longest_match_length == len_main + 1
-						&& !change_pair(back_main, new_distance))
-					|| (coder->longest_match_length > len_main + 1)
-					|| (coder->longest_match_length + 1 >= len_main
-						&& len_main >= 3
-						&& change_pair(new_distance, back_main))) {
-				coder->longest_match_was_found = true;
-				*back_res = UINT32_MAX;
-				*len_res = 1;
-				return;
-			}
-		}
-
-		++buf;
-		--num_available_bytes;
-
-		for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
-			const uint32_t back_offset = coder->reps[i] + 1;
-
-			if (buf[1] != *(buf + 1 - back_offset)
-					|| buf[2] != *(buf + 2 - back_offset)) {
-				rep_lens[i] = 0;
-				continue;
-			}
-
-			uint32_t len;
-			for (len = 2; len < num_available_bytes
-					&& buf[len] == *(buf + len - back_offset);
-					++len) ;
-
-			if (len + 1 >= len_main) {
-				coder->longest_match_was_found = true;
-				*back_res = UINT32_MAX;
-				*len_res = 1;
-				return;
-			}
-		}
-
-		*back_res = back_main + REP_DISTANCES;
-		*len_res = len_main;
-		move_pos(len_main - 2);
-		return;
-	}
-
-	*back_res = UINT32_MAX;
-	*len_res = 1;
-	return;
-}
diff --git a/src/liblzma/lzma/lzma_encoder_init.c b/src/liblzma/lzma/lzma_encoder_init.c
deleted file mode 100644
index 21335f95..00000000
--- a/src/liblzma/lzma/lzma_encoder_init.c
+++ /dev/null
@@ -1,228 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file       lzma_encoder_init.c
-/// \brief      Creating, resetting and destroying the LZMA encoder
-//
-//  Copyright (C) 1999-2006 Igor Pavlov
-//  Copyright (C) 2007 Lasse Collin
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2.1 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "lzma_encoder_private.h"
-
-
-/// \brief      Initializes the length encoder
-static void
-length_encoder_reset(lzma_length_encoder *lencoder,
-		const uint32_t num_pos_states, const uint32_t table_size)
-{
-	// NLength::CPriceTableEncoder::SetTableSize()
-	lencoder->table_size = table_size;
-
-	// NLength::CEncoder::Init()
-	bit_reset(lencoder->choice);
-	bit_reset(lencoder->choice2);
-
-	for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
-		bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS);
-		bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS);
-	}
-
-	bittree_reset(lencoder->high, LEN_HIGH_BITS);
-
-	// NLength::CPriceTableEncoder::UpdateTables()
-	for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state)
-		lencoder->counters[pos_state] = 1;
-
-	return;
-}
-
-
-static void
-lzma_lzma_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
-{
-	lzma_lz_encoder_end(&coder->lz, allocator);
-	lzma_free(coder, allocator);
-	return;
-}
-
-
-extern lzma_ret
-lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
-		const lzma_filter_info *filters)
-{
-	if (next->coder == NULL) {
-		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
-		if (next->coder == NULL)
-			return LZMA_MEM_ERROR;
-
-		next->coder->next = LZMA_NEXT_CODER_INIT;
-		next->coder->lz = LZMA_LZ_ENCODER_INIT;
-	}
-
-	// Validate options that aren't validated elsewhere.
-	const lzma_options_lzma *options = filters[0].options;
-	if (options->pos_bits > LZMA_POS_BITS_MAX
-			|| options->fast_bytes < LZMA_FAST_BYTES_MIN
-			|| options->fast_bytes > LZMA_FAST_BYTES_MAX) {
-		lzma_lzma_encoder_end(next->coder, allocator);
-		return LZMA_HEADER_ERROR;
-	}
-
-	// Set compression mode.
-	switch (options->mode) {
-		case LZMA_MODE_FAST:
-			next->coder->best_compression = false;
-			break;
-
-		case LZMA_MODE_BEST:
-			next->coder->best_compression = true;
-			break;
-
-		default:
-			lzma_lzma_encoder_end(next->coder, allocator);
-			return LZMA_HEADER_ERROR;
-	}
-
-	// Initialize literal coder.
-	{
-		const lzma_ret ret = lzma_literal_init(
-				&next->coder->literal_coder,
-				options->literal_context_bits,
-				options->literal_pos_bits);
-		if (ret != LZMA_OK)
-			return ret;
-	}
-
-	// Initialize LZ encoder.
-	{
-		const lzma_ret ret = lzma_lz_encoder_reset(
-				&next->coder->lz, allocator, &lzma_lzma_encode,
-				options->dictionary_size, OPTS,
-				options->fast_bytes, MATCH_MAX_LEN + 1 + OPTS,
-				options->match_finder,
-				options->match_finder_cycles,
-				options->preset_dictionary,
-				options->preset_dictionary_size);
-		if (ret != LZMA_OK) {
-			lzma_lzma_encoder_end(next->coder, allocator);
-			return ret;
-		}
-	}
-
-	// Set dist_table_size.
-	{
-		// Round the dictionary size up to next 2^n.
-		uint32_t log_size;
-		for (log_size = 0; (UINT32_C(1) << log_size)
-				< options->dictionary_size; ++log_size) ;
-
-		next->coder->dist_table_size = log_size * 2;
-	}
-
-	// Misc FIXME desc
-	next->coder->align_price_count = UINT32_MAX;
-	next->coder->match_price_count = UINT32_MAX;
-	next->coder->dictionary_size = options->dictionary_size;
-	next->coder->pos_mask = (1U << options->pos_bits) - 1;
-	next->coder->fast_bytes = options->fast_bytes;
-
-	// Range coder
-	rc_reset(&next->coder->rc);
-
-	// State
-	next->coder->state = 0;
-	next->coder->previous_byte = 0;
-	for (size_t i = 0; i < REP_DISTANCES; ++i)
-		next->coder->reps[i] = 0;
-
-	// Bit encoders
-	for (size_t i = 0; i < STATES; ++i) {
-		for (size_t j = 0; j <= next->coder->pos_mask; ++j) {
-			bit_reset(next->coder->is_match[i][j]);
-			bit_reset(next->coder->is_rep0_long[i][j]);
-		}
-
-		bit_reset(next->coder->is_rep[i]);
-		bit_reset(next->coder->is_rep0[i]);
-		bit_reset(next->coder->is_rep1[i]);
-		bit_reset(next->coder->is_rep2[i]);
-	}
-
-	for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i)
-		bit_reset(next->coder->pos_encoders[i]);
-
-	// Bit tree encoders
-	for (size_t i = 0; i < LEN_TO_POS_STATES; ++i)
-		bittree_reset(next->coder->pos_slot_encoder[i], POS_SLOT_BITS);
-
-	bittree_reset(next->coder->pos_align_encoder, ALIGN_BITS);
-
-	// Length encoders
-	length_encoder_reset(&next->coder->match_len_encoder,
-			1U << options->pos_bits,
-			options->fast_bytes + 1 - MATCH_MIN_LEN);
-
-	length_encoder_reset(&next->coder->rep_len_encoder,
-			1U << options->pos_bits,
-			next->coder->fast_bytes + 1 - MATCH_MIN_LEN);
-
-	next->coder->prev_len_encoder = NULL;
-
-	// Misc
-	next->coder->longest_match_was_found = false;
-	next->coder->optimum_end_index = 0;
-	next->coder->optimum_current_index = 0;
-	next->coder->additional_offset = 0;
-
-	next->coder->now_pos = 0;
-	next->coder->is_initialized = false;
-	next->coder->is_flushed = false,
-	next->coder->write_eopm = true;
-
-	// Initialize the next decoder in the chain, if any.
-	{
-		const lzma_ret ret = lzma_next_filter_init(&next->coder->next,
-				allocator, filters + 1);
-		if (ret != LZMA_OK) {
-			lzma_lzma_encoder_end(next->coder, allocator);
-			return ret;
-		}
-	}
-
-	// Initialization successful. Set the function pointers.
-	next->code = &lzma_lz_encode;
-	next->end = &lzma_lzma_encoder_end;
-
-	return LZMA_OK;
-}
-
-
-extern bool
-lzma_lzma_encode_properties(const lzma_options_lzma *options, uint8_t *byte)
-{
-	if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX
-			|| options->literal_pos_bits
-				> LZMA_LITERAL_POS_BITS_MAX
-			|| options->pos_bits > LZMA_POS_BITS_MAX
-			|| options->literal_context_bits
-					+ options->literal_pos_bits
-				> LZMA_LITERAL_BITS_MAX)
-		return true;
-
-	*byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9
-			+ options->literal_context_bits;
-	assert(*byte <= (4 * 5 + 4) * 9 + 8);
-
-	return false;
-}
diff --git a/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/liblzma/lzma/lzma_encoder_optimum_fast.c
new file mode 100644
index 00000000..9da7e79e
--- /dev/null
+++ b/src/liblzma/lzma/lzma_encoder_optimum_fast.c
@@ -0,0 +1,193 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma_encoder_optimum_fast.c
+//
+//  Copyright (C) 1999-2008 Igor Pavlov
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder_private.h"
+
+
+#define change_pair(small_dist, big_dist) \
+	(((big_dist) >> 7) > (small_dist))
+
+
+static inline void
+literal(const lzma_coder *restrict coder, const uint8_t *restrict buf,
+		uint32_t *restrict back_res, uint32_t *restrict len_res)
+{
+	// Try short rep0 instead of always coding it as a literal.
+	*back_res = *buf == *(buf - coder->reps[0] - 1) ? 0 : UINT32_MAX;
+	*len_res = 1;
+	return;
+}
+
+
+extern void
+lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint32_t *restrict back_res, uint32_t *restrict len_res)
+{
+	const uint32_t fast_bytes = mf->find_len_max;
+
+	uint32_t len_main;
+	uint32_t matches_count;
+	if (mf->read_ahead == 0) {
+		len_main = mf_find(mf, &matches_count, coder->matches);
+	} else {
+		assert(mf->read_ahead == 1);
+		len_main = coder->longest_match_length;
+		matches_count = coder->matches_count;
+	}
+
+	const uint8_t *buf = mf_ptr(mf) - 1;
+	const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX);
+
+	if (buf_avail < 2) {
+		// There's not enough input left to encode a match.
+		literal(coder, buf, back_res, len_res);
+		return;
+	}
+
+	// Look for repeated matches; scan the previous four match distances
+	uint32_t rep_len = 0;
+	uint32_t rep_index = 0;
+
+	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+		// Pointer to the beginning of the match candidate
+		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
+
+		// If the first two bytes (2 == MATCH_LEN_MIN) do not match,
+		// this rep is not useful.
+		if (not_equal_16(buf, buf_back))
+			continue;
+
+		// The first two bytes matched.
+		// Calculate the length of the match.
+		uint32_t len;
+		for (len = 2; len < buf_avail
+				&& buf[len] == buf_back[len]; ++len) ;
+
+		// If we have found a repeated match that is at least
+		// fast_bytes long, return it immediatelly.
+		if (len >= fast_bytes) {
+			*back_res = i;
+			*len_res = len;
+			mf_skip(mf, len - 1);
+			return;
+		}
+
+		if (len > rep_len) {
+			rep_index = i;
+			rep_len = len;
+		}
+	}
+
+	// We didn't find a long enough repeated match. Encode it as a normal
+	// match if the match length is at least fast_bytes.
+	if (len_main >= fast_bytes) {
+		*back_res = coder->matches[matches_count - 1].dist
+				+ REP_DISTANCES;
+		*len_res = len_main;
+		mf_skip(mf, len_main - 1);
+		return;
+	}
+
+	uint32_t back_main = 0;
+	if (len_main >= 2) {
+		back_main = coder->matches[matches_count - 1].dist;
+
+		while (matches_count > 1 && len_main ==
+				coder->matches[matches_count - 2].len + 1) {
+			if (!change_pair(coder->matches[
+						matches_count - 2].dist,
+					back_main))
+				break;
+
+			--matches_count;
+			len_main = coder->matches[matches_count - 1].len;
+			back_main = coder->matches[matches_count - 1].dist;
+		}
+
+		if (len_main == 2 && back_main >= 0x80)
+			len_main = 1;
+	}
+
+	if (rep_len >= 2) {
+		if (rep_len + 1 >= len_main
+				|| (rep_len + 2 >= len_main
+					&& back_main > (UINT32_C(1) << 9))
+				|| (rep_len + 3 >= len_main
+					&& back_main > (UINT32_C(1) << 15))) {
+			*back_res = rep_index;
+			*len_res = rep_len;
+			mf_skip(mf, rep_len - 1);
+			return;
+		}
+	}
+
+	if (len_main < 2 || buf_avail <= 2) {
+		literal(coder, buf, back_res, len_res);
+		return;
+	}
+
+	// Get the matches for the next byte. If we find a better match,
+	// the current byte is encoded as a literal.
+	coder->longest_match_length = mf_find(mf,
+			&coder->matches_count, coder->matches);
+
+	if (coder->longest_match_length >= 2) {
+		const uint32_t new_dist = coder->matches[
+				coder->matches_count - 1].dist;
+
+		if ((coder->longest_match_length >= len_main
+					&& new_dist < back_main)
+				|| (coder->longest_match_length == len_main + 1
+					&& !change_pair(back_main, new_dist))
+				|| (coder->longest_match_length > len_main + 1)
+				|| (coder->longest_match_length + 1 >= len_main
+					&& len_main >= 3
+					&& change_pair(new_dist, back_main))) {
+			literal(coder, buf, back_res, len_res);
+			return;
+		}
+	}
+
+	// In contrast to LZMA SDK, dictionary could not have been moved
+	// between mf_find() calls, thus it is safe to just increment
+	// the old buf pointer instead of recalculating it with mf_ptr().
+	++buf;
+
+	const uint32_t limit = len_main - 1;
+
+	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
+
+		if (not_equal_16(buf, buf_back))
+			continue;
+
+		uint32_t len;
+		for (len = 2; len < limit
+				&& buf[len] == buf_back[len]; ++len) ;
+
+		if (len >= limit) {
+			literal(coder, buf - 1, back_res, len_res);
+			return;
+		}
+	}
+
+	*back_res = back_main + REP_DISTANCES;
+	*len_res = len_main;
+	mf_skip(mf, len_main - 2);
+	return;
+}
diff --git a/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/liblzma/lzma/lzma_encoder_optimum_normal.c
new file mode 100644
index 00000000..f0dd92c9
--- /dev/null
+++ b/src/liblzma/lzma/lzma_encoder_optimum_normal.c
@@ -0,0 +1,875 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma_encoder_optimum_normal.c
+//
+//  Copyright (C) 1999-2008 Igor Pavlov
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder_private.h"
+#include "fastpos.h"
+
+
+////////////
+// Prices //
+////////////
+
+static uint32_t
+get_literal_price(const lzma_coder *const coder, const uint32_t pos,
+		const uint32_t prev_byte, const bool match_mode,
+		uint32_t match_byte, uint32_t symbol)
+{
+	const probability *const subcoder = literal_subcoder(coder->literal,
+			coder->literal_context_bits, coder->literal_pos_mask,
+			pos, prev_byte);
+
+	uint32_t price = 0;
+
+	if (!match_mode) {
+		price = rc_bittree_price(subcoder, 8, symbol);
+	} else {
+		uint32_t offset = 0x100;
+		symbol += UINT32_C(1) << 8;
+
+		do {
+			match_byte <<= 1;
+
+			const uint32_t match_bit = match_byte & offset;
+			const uint32_t subcoder_index
+					= offset + match_bit + (symbol >> 8);
+			const uint32_t bit = (symbol >> 7) & 1;
+			price += rc_bit_price(subcoder[subcoder_index], bit);
+
+			symbol <<= 1;
+			offset &= ~(match_byte ^ symbol);
+
+		} while (symbol < (UINT32_C(1) << 16));
+	}
+
+	return price;
+}
+
+
+static inline uint32_t
+get_len_price(const lzma_length_encoder *const lencoder,
+		const uint32_t len, const uint32_t pos_state)
+{
+	// NOTE: Unlike the other price tables, length prices are updated
+	// in lzma_encoder.c
+	return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
+}
+
+
+static inline uint32_t
+get_short_rep_price(const lzma_coder *const coder,
+		const lzma_lzma_state state, const uint32_t pos_state)
+{
+	return rc_bit_0_price(coder->is_rep0[state])
+		+ rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
+}
+
+
+static inline uint32_t
+get_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
+		const lzma_lzma_state state, uint32_t pos_state)
+{
+	uint32_t price;
+
+	if (rep_index == 0) {
+		price = rc_bit_0_price(coder->is_rep0[state]);
+		price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
+	} else {
+		price = rc_bit_1_price(coder->is_rep0[state]);
+
+		if (rep_index == 1) {
+			price += rc_bit_0_price(coder->is_rep1[state]);
+		} else {
+			price += rc_bit_1_price(coder->is_rep1[state]);
+			price += rc_bit_price(coder->is_rep2[state],
+					rep_index - 2);
+		}
+	}
+
+	return price;
+}
+
+
+static inline uint32_t
+get_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
+		const uint32_t len, const lzma_lzma_state state,
+		const uint32_t pos_state)
+{
+	return get_len_price(&coder->rep_len_encoder, len, pos_state)
+		+ get_pure_rep_price(coder, rep_index, state, pos_state);
+}
+
+
+static inline uint32_t
+get_pos_len_price(const lzma_coder *const coder, const uint32_t pos,
+		const uint32_t len, const uint32_t pos_state)
+{
+	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
+	uint32_t price;
+
+	if (pos < FULL_DISTANCES) {
+		price = coder->distances_prices[len_to_pos_state][pos];
+	} else {
+		const uint32_t pos_slot = get_pos_slot_2(pos);
+		price = coder->pos_slot_prices[len_to_pos_state][pos_slot]
+				+ coder->align_prices[pos & ALIGN_MASK];
+	}
+
+	price += get_len_price(&coder->match_len_encoder, len, pos_state);
+
+	return price;
+}
+
+
+static void
+fill_distances_prices(lzma_coder *coder)
+{
+	for (uint32_t len_to_pos_state = 0;
+			len_to_pos_state < LEN_TO_POS_STATES;
+			++len_to_pos_state) {
+
+		uint32_t *const pos_slot_prices
+				= coder->pos_slot_prices[len_to_pos_state];
+
+		// Price to encode the pos_slot.
+		for (uint32_t pos_slot = 0;
+				pos_slot < coder->dist_table_size; ++pos_slot)
+			pos_slot_prices[pos_slot] = rc_bittree_price(
+					coder->pos_slot[len_to_pos_state],
+					POS_SLOT_BITS, pos_slot);
+
+		// For matches with distance >= FULL_DISTANCES, add the price
+		// of the direct bits part of the match distance. (Align bits
+		// are handled by fill_align_prices()).
+		for (uint32_t pos_slot = END_POS_MODEL_INDEX;
+				pos_slot < coder->dist_table_size; ++pos_slot)
+			pos_slot_prices[pos_slot] += rc_direct_price(
+					((pos_slot >> 1) - 1) - ALIGN_BITS);
+
+		// Distances in the range [0, 3] are fully encoded with
+		// pos_slot, so they are used for coder->distances_prices
+		// as is.
+		for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i)
+			coder->distances_prices[len_to_pos_state][i]
+					= pos_slot_prices[i];
+	}
+
+	// Distances in the range [4, 127] depend on pos_slot and pos_special.
+	// We do this in a loop separate from the above loop to avoid
+	// redundant calls to get_pos_slot().
+	for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) {
+		const uint32_t pos_slot = get_pos_slot(i);
+		const uint32_t footer_bits = ((pos_slot >> 1) - 1);
+		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
+		const uint32_t price = rc_bittree_reverse_price(
+				coder->pos_special + base - pos_slot - 1,
+				footer_bits, i - base);
+
+		for (uint32_t len_to_pos_state = 0;
+				len_to_pos_state < LEN_TO_POS_STATES;
+				++len_to_pos_state)
+			coder->distances_prices[len_to_pos_state][i]
+					= price + coder->pos_slot_prices[
+						len_to_pos_state][pos_slot];
+	}
+
+	coder->match_price_count = 0;
+	return;
+}
+
+
+static void
+fill_align_prices(lzma_coder *coder)
+{
+	for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i)
+		coder->align_prices[i] = rc_bittree_reverse_price(
+				coder->pos_align, ALIGN_BITS, i);
+
+	coder->align_price_count = 0;
+	return;
+}
+
+
+/////////////
+// Optimal //
+/////////////
+
+static inline void
+make_literal(lzma_optimal *optimal)
+{
+	optimal->back_prev = UINT32_MAX;
+	optimal->prev_1_is_literal = false;
+}
+
+
+static inline void
+make_short_rep(lzma_optimal *optimal)
+{
+	optimal->back_prev = 0;
+	optimal->prev_1_is_literal = false;
+}
+
+
+#define is_short_rep(optimal) \
+	((optimal).back_prev == 0)
+
+
+static void
+backward(lzma_coder *restrict coder, uint32_t *restrict len_res,
+		uint32_t *restrict back_res, uint32_t cur)
+{
+	coder->opts_end_index = cur;
+
+	uint32_t pos_mem = coder->opts[cur].pos_prev;
+	uint32_t back_mem = coder->opts[cur].back_prev;
+
+	do {
+		if (coder->opts[cur].prev_1_is_literal) {
+			make_literal(&coder->opts[pos_mem]);
+			coder->opts[pos_mem].pos_prev = pos_mem - 1;
+
+			if (coder->opts[cur].prev_2) {
+				coder->opts[pos_mem - 1].prev_1_is_literal
+						= false;
+				coder->opts[pos_mem - 1].pos_prev
+						= coder->opts[cur].pos_prev_2;
+				coder->opts[pos_mem - 1].back_prev
+						= coder->opts[cur].back_prev_2;
+			}
+		}
+
+		const uint32_t pos_prev = pos_mem;
+		const uint32_t back_cur = back_mem;
+
+		back_mem = coder->opts[pos_prev].back_prev;
+		pos_mem = coder->opts[pos_prev].pos_prev;
+
+		coder->opts[pos_prev].back_prev = back_cur;
+		coder->opts[pos_prev].pos_prev = cur;
+		cur = pos_prev;
+
+	} while (cur != 0);
+
+	coder->opts_current_index = coder->opts[0].pos_prev;
+	*len_res = coder->opts[0].pos_prev;
+	*back_res = coder->opts[0].back_prev;
+
+	return;
+}
+
+
+//////////
+// Main //
+//////////
+
+static inline uint32_t
+helper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint32_t *restrict back_res, uint32_t *restrict len_res,
+		uint32_t position)
+{
+	const uint32_t fast_bytes = mf->find_len_max;
+
+	uint32_t len_main;
+	uint32_t matches_count;
+
+	if (mf->read_ahead == 0) {
+		len_main = mf_find(mf, &matches_count, coder->matches);
+	} else {
+		assert(mf->read_ahead == 1);
+		len_main = coder->longest_match_length;
+		matches_count = coder->matches_count;
+	}
+
+	const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX);
+	if (buf_avail < 2) {
+		*back_res = UINT32_MAX;
+		*len_res = 1;
+		return UINT32_MAX;
+	}
+
+	const uint8_t *const buf = mf_ptr(mf) - 1;
+
+	uint32_t rep_lens[REP_DISTANCES];
+	uint32_t rep_max_index = 0;
+
+	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
+
+		if (not_equal_16(buf, buf_back)) {
+			rep_lens[i] = 0;
+			continue;
+		}
+
+		uint32_t len_test;
+		for (len_test = 2; len_test < buf_avail
+				&& buf[len_test] == buf_back[len_test];
+				++len_test) ;
+
+		rep_lens[i] = len_test;
+		if (len_test > rep_lens[rep_max_index])
+			rep_max_index = i;
+	}
+
+	if (rep_lens[rep_max_index] >= fast_bytes) {
+		*back_res = rep_max_index;
+		*len_res = rep_lens[rep_max_index];
+		mf_skip(mf, *len_res - 1);
+		return UINT32_MAX;
+	}
+
+
+	if (len_main >= fast_bytes) {
+		*back_res = coder->matches[matches_count - 1].dist
+				+ REP_DISTANCES;
+		*len_res = len_main;
+		mf_skip(mf, len_main - 1);
+		return UINT32_MAX;
+	}
+
+	const uint8_t current_byte = *buf;
+	const uint8_t match_byte = *(buf - coder->reps[0] - 1);
+
+	if (len_main < 2 && current_byte != match_byte
+			&& rep_lens[rep_max_index] < 2) {
+		*back_res = UINT32_MAX;
+		*len_res = 1;
+		return UINT32_MAX;
+	}
+
+	coder->opts[0].state = coder->state;
+
+	const uint32_t pos_state = position & coder->pos_mask;
+
+	coder->opts[1].price = rc_bit_0_price(
+				coder->is_match[coder->state][pos_state])
+			+ get_literal_price(coder, position, buf[-1],
+				!is_literal_state(coder->state),
+				match_byte, current_byte);
+
+	make_literal(&coder->opts[1]);
+
+	const uint32_t match_price = rc_bit_1_price(
+			coder->is_match[coder->state][pos_state]);
+	const uint32_t rep_match_price = match_price
+			+ rc_bit_1_price(coder->is_rep[coder->state]);
+
+	if (match_byte == current_byte) {
+		const uint32_t short_rep_price = rep_match_price
+				+ get_short_rep_price(
+					coder, coder->state, pos_state);
+
+		if (short_rep_price < coder->opts[1].price) {
+			coder->opts[1].price = short_rep_price;
+			make_short_rep(&coder->opts[1]);
+		}
+	}
+
+	const uint32_t len_end = MAX(len_main, rep_lens[rep_max_index]);
+
+	if (len_end < 2) {
+		*back_res = coder->opts[1].back_prev;
+		*len_res = 1;
+		return UINT32_MAX;
+	}
+
+	coder->opts[1].pos_prev = 0;
+
+	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
+		coder->opts[0].backs[i] = coder->reps[i];
+
+	uint32_t len = len_end;
+	do {
+		coder->opts[len].price = RC_INFINITY_PRICE;
+	} while (--len >= 2);
+
+
+	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
+		uint32_t rep_len = rep_lens[i];
+		if (rep_len < 2)
+			continue;
+
+		const uint32_t price = rep_match_price + get_pure_rep_price(
+				coder, i, coder->state, pos_state);
+
+		do {
+			const uint32_t cur_and_len_price = price
+					+ get_len_price(
+						&coder->rep_len_encoder,
+						rep_len, pos_state);
+
+			if (cur_and_len_price < coder->opts[rep_len].price) {
+				coder->opts[rep_len].price = cur_and_len_price;
+				coder->opts[rep_len].pos_prev = 0;
+				coder->opts[rep_len].back_prev = i;
+				coder->opts[rep_len].prev_1_is_literal = false;
+			}
+		} while (--rep_len >= 2);
+	}
+
+
+	const uint32_t normal_match_price = match_price
+			+ rc_bit_0_price(coder->is_rep[coder->state]);
+
+	len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
+	if (len <= len_main) {
+		uint32_t i = 0;
+		while (len > coder->matches[i].len)
+			++i;
+
+		for(; ; ++len) {
+			const uint32_t dist = coder->matches[i].dist;
+			const uint32_t cur_and_len_price = normal_match_price
+					+ get_pos_len_price(coder,
+						dist, len, pos_state);
+
+			if (cur_and_len_price < coder->opts[len].price) {
+				coder->opts[len].price = cur_and_len_price;
+				coder->opts[len].pos_prev = 0;
+				coder->opts[len].back_prev
+						= dist + REP_DISTANCES;
+				coder->opts[len].prev_1_is_literal = false;
+			}
+
+			if (len == coder->matches[i].len)
+				if (++i == matches_count)
+					break;
+		}
+	}
+
+	return len_end;
+}
+
+
+static inline uint32_t
+helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
+		uint32_t len_end, uint32_t position, const uint32_t cur,
+		const uint32_t fast_bytes, const uint32_t buf_avail_full)
+{
+	uint32_t matches_count = coder->matches_count;
+	uint32_t new_len = coder->longest_match_length;
+	uint32_t pos_prev = coder->opts[cur].pos_prev;
+	uint32_t state;
+
+	if (coder->opts[cur].prev_1_is_literal) {
+		--pos_prev;
+
+		if (coder->opts[cur].prev_2) {
+			state = coder->opts[coder->opts[cur].pos_prev_2].state;
+
+			if (coder->opts[cur].back_prev_2 < REP_DISTANCES)
+				update_long_rep(state);
+			else
+				update_match(state);
+
+		} else {
+			state = coder->opts[pos_prev].state;
+		}
+
+		update_literal(state);
+
+	} else {
+		state = coder->opts[pos_prev].state;
+	}
+
+	if (pos_prev == cur - 1) {
+		if (is_short_rep(coder->opts[cur]))
+			update_short_rep(state);
+		else
+			update_literal(state);
+	} else {
+		uint32_t pos;
+		if (coder->opts[cur].prev_1_is_literal
+				&& coder->opts[cur].prev_2) {
+			pos_prev = coder->opts[cur].pos_prev_2;
+			pos = coder->opts[cur].back_prev_2;
+			update_long_rep(state);
+		} else {
+			pos = coder->opts[cur].back_prev;
+			if (pos < REP_DISTANCES)
+				update_long_rep(state);
+			else
+				update_match(state);
+		}
+
+		if (pos < REP_DISTANCES) {
+			reps[0] = coder->opts[pos_prev].backs[pos];
+
+			uint32_t i;
+			for (i = 1; i <= pos; ++i)
+				reps[i] = coder->opts[pos_prev].backs[i - 1];
+
+			for (; i < REP_DISTANCES; ++i)
+				reps[i] = coder->opts[pos_prev].backs[i];
+
+		} else {
+			reps[0] = pos - REP_DISTANCES;
+
+			for (uint32_t i = 1; i < REP_DISTANCES; ++i)
+				reps[i] = coder->opts[pos_prev].backs[i - 1];
+		}
+	}
+
+	coder->opts[cur].state = state;
+
+	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
+		coder->opts[cur].backs[i] = reps[i];
+
+	const uint32_t cur_price = coder->opts[cur].price;
+
+	const uint8_t current_byte = *buf;
+	const uint8_t match_byte = *(buf - reps[0] - 1);
+
+	const uint32_t pos_state = position & coder->pos_mask;
+
+	const uint32_t cur_and_1_price = cur_price
+			+ rc_bit_0_price(coder->is_match[state][pos_state])
+			+ get_literal_price(coder, position, buf[-1],
+        		!is_literal_state(state), match_byte, current_byte);
+
+	bool next_is_literal = false;
+
+	if (cur_and_1_price < coder->opts[cur + 1].price) {
+		coder->opts[cur + 1].price = cur_and_1_price;
+		coder->opts[cur + 1].pos_prev = cur;
+		make_literal(&coder->opts[cur + 1]);
+		next_is_literal = true;
+	}
+
+	const uint32_t match_price = cur_price
+			+ rc_bit_1_price(coder->is_match[state][pos_state]);
+	const uint32_t rep_match_price = match_price
+			+ rc_bit_1_price(coder->is_rep[state]);
+
+	if (match_byte == current_byte
+			&& !(coder->opts[cur + 1].pos_prev < cur
+				&& coder->opts[cur + 1].back_prev == 0)) {
+
+		const uint32_t short_rep_price = rep_match_price
+				+ get_short_rep_price(coder, state, pos_state);
+
+		if (short_rep_price <= coder->opts[cur + 1].price) {
+			coder->opts[cur + 1].price = short_rep_price;
+			coder->opts[cur + 1].pos_prev = cur;
+			make_short_rep(&coder->opts[cur + 1]);
+			next_is_literal = true;
+		}
+	}
+
+	if (buf_avail_full < 2)
+		return len_end;
+
+	const uint32_t buf_avail = MIN(buf_avail_full, fast_bytes);
+
+	if (!next_is_literal && match_byte != current_byte) { // speed optimization
+		// try literal + rep0
+		const uint8_t *const buf_back = buf - reps[0] - 1;
+		const uint32_t limit = MIN(buf_avail_full, fast_bytes + 1);
+
+		uint32_t len_test = 1;
+		while (len_test < limit && buf[len_test] == buf_back[len_test])
+			++len_test;
+
+		--len_test;
+
+		if (len_test >= 2) {
+			uint32_t state_2 = state;
+			update_literal(state_2);
+
+			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
+			const uint32_t next_rep_match_price = cur_and_1_price
+					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
+					+ rc_bit_1_price(coder->is_rep[state_2]);
+
+			//for (; len_test >= 2; --len_test) {
+			const uint32_t offset = cur + 1 + len_test;
+
+			while (len_end < offset)
+				coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+			const uint32_t cur_and_len_price = next_rep_match_price
+					+ get_rep_price(coder, 0, len_test,
+						state_2, pos_state_next);
+
+			if (cur_and_len_price < coder->opts[offset].price) {
+				coder->opts[offset].price = cur_and_len_price;
+				coder->opts[offset].pos_prev = cur + 1;
+				coder->opts[offset].back_prev = 0;
+				coder->opts[offset].prev_1_is_literal = true;
+				coder->opts[offset].prev_2 = false;
+			}
+			//}
+		}
+	}
+
+
+	uint32_t start_len = 2; // speed optimization
+
+	for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) {
+		const uint8_t *const buf_back = buf - reps[rep_index] - 1;
+		if (not_equal_16(buf, buf_back))
+			continue;
+
+		uint32_t len_test;
+		for (len_test = 2; len_test < buf_avail
+				&& buf[len_test] == buf_back[len_test];
+				++len_test) ;
+
+		while (len_end < cur + len_test)
+			coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+		const uint32_t len_test_temp = len_test;
+		const uint32_t price = rep_match_price + get_pure_rep_price(
+				coder, rep_index, state, pos_state);
+
+		do {
+			const uint32_t cur_and_len_price = price
+					+ get_len_price(&coder->rep_len_encoder,
+							len_test, pos_state);
+
+			if (cur_and_len_price < coder->opts[cur + len_test].price) {
+				coder->opts[cur + len_test].price = cur_and_len_price;
+				coder->opts[cur + len_test].pos_prev = cur;
+				coder->opts[cur + len_test].back_prev = rep_index;
+				coder->opts[cur + len_test].prev_1_is_literal = false;
+			}
+		} while (--len_test >= 2);
+
+		len_test = len_test_temp;
+
+		if (rep_index == 0)
+			start_len = len_test + 1;
+
+
+		uint32_t len_test_2 = len_test + 1;
+		const uint32_t limit = MIN(buf_avail_full,
+				len_test_2 + fast_bytes);
+		for (; len_test_2 < limit
+				&& buf[len_test_2] == buf_back[len_test_2];
+				++len_test_2) ;
+
+		len_test_2 -= len_test + 1;
+
+		if (len_test_2 >= 2) {
+			uint32_t state_2 = state;
+			update_long_rep(state_2);
+
+			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
+
+			const uint32_t cur_and_len_literal_price = price
+					+ get_len_price(&coder->rep_len_encoder,
+						len_test, pos_state)
+					+ rc_bit_0_price(coder->is_match[state_2][pos_state_next])
+					+ get_literal_price(coder, position + len_test,
+						buf[len_test - 1], true,
+						buf_back[len_test], buf[len_test]);
+
+			update_literal(state_2);
+
+			pos_state_next = (position + len_test + 1) & coder->pos_mask;
+
+			const uint32_t next_rep_match_price = cur_and_len_literal_price
+					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
+					+ rc_bit_1_price(coder->is_rep[state_2]);
+
+			//for(; len_test_2 >= 2; len_test_2--) {
+			const uint32_t offset = cur + len_test + 1 + len_test_2;
+
+			while (len_end < offset)
+				coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+			const uint32_t cur_and_len_price = next_rep_match_price
+					+ get_rep_price(coder, 0, len_test_2,
+						state_2, pos_state_next);
+
+			if (cur_and_len_price < coder->opts[offset].price) {
+				coder->opts[offset].price = cur_and_len_price;
+				coder->opts[offset].pos_prev = cur + len_test + 1;
+				coder->opts[offset].back_prev = 0;
+				coder->opts[offset].prev_1_is_literal = true;
+				coder->opts[offset].prev_2 = true;
+				coder->opts[offset].pos_prev_2 = cur;
+				coder->opts[offset].back_prev_2 = rep_index;
+			}
+			//}
+		}
+	}
+
+
+	//for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
+	if (new_len > buf_avail) {
+		new_len = buf_avail;
+
+		matches_count = 0;
+		while (new_len > coder->matches[matches_count].len)
+			++matches_count;
+
+		coder->matches[matches_count++].len = new_len;
+	}
+
+
+	if (new_len >= start_len) {
+		const uint32_t normal_match_price = match_price
+				+ rc_bit_0_price(coder->is_rep[state]);
+
+		while (len_end < cur + new_len)
+			coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+		uint32_t i = 0;
+		while (start_len > coder->matches[i].len)
+			++i;
+
+		for (uint32_t len_test = start_len; ; ++len_test) {
+			const uint32_t cur_back = coder->matches[i].dist;
+			uint32_t cur_and_len_price = normal_match_price
+					+ get_pos_len_price(coder,
+						cur_back, len_test, pos_state);
+
+			if (cur_and_len_price < coder->opts[cur + len_test].price) {
+				coder->opts[cur + len_test].price = cur_and_len_price;
+				coder->opts[cur + len_test].pos_prev = cur;
+				coder->opts[cur + len_test].back_prev
+						= cur_back + REP_DISTANCES;
+				coder->opts[cur + len_test].prev_1_is_literal = false;
+			}
+
+			if (len_test == coder->matches[i].len) {
+				// Try Match + Literal + Rep0
+				const uint8_t *const buf_back = buf - cur_back - 1;
+				uint32_t len_test_2 = len_test + 1;
+				const uint32_t limit = MIN(buf_avail_full,
+						len_test_2 + fast_bytes);
+
+				for (; len_test_2 < limit &&
+						buf[len_test_2] == buf_back[len_test_2];
+						++len_test_2) ;
+
+				len_test_2 -= len_test + 1;
+
+				if (len_test_2 >= 2) {
+					uint32_t state_2 = state;
+					update_match(state_2);
+					uint32_t pos_state_next
+							= (position + len_test) & coder->pos_mask;
+
+					const uint32_t cur_and_len_literal_price = cur_and_len_price
+							+ rc_bit_0_price(
+								coder->is_match[state_2][pos_state_next])
+							+ get_literal_price(coder,
+								position + len_test,
+								buf[len_test - 1],
+								true,
+								buf_back[len_test],
+								buf[len_test]);
+
+					update_literal(state_2);
+					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
+
+					const uint32_t next_rep_match_price
+							= cur_and_len_literal_price
+							+ rc_bit_1_price(
+								coder->is_match[state_2][pos_state_next])
+							+ rc_bit_1_price(coder->is_rep[state_2]);
+
+					// for(; len_test_2 >= 2; --len_test_2) {
+					const uint32_t offset = cur + len_test + 1 + len_test_2;
+
+					while (len_end < offset)
+						coder->opts[++len_end].price = RC_INFINITY_PRICE;
+
+					cur_and_len_price = next_rep_match_price
+							+ get_rep_price(coder, 0, len_test_2,
+								state_2, pos_state_next);
+
+					if (cur_and_len_price < coder->opts[offset].price) {
+						coder->opts[offset].price = cur_and_len_price;
+						coder->opts[offset].pos_prev = cur + len_test + 1;
+						coder->opts[offset].back_prev = 0;
+						coder->opts[offset].prev_1_is_literal = true;
+						coder->opts[offset].prev_2 = true;
+						coder->opts[offset].pos_prev_2 = cur;
+						coder->opts[offset].back_prev_2
+								= cur_back + REP_DISTANCES;
+					}
+					//}
+				}
+
+				if (++i == matches_count)
+					break;
+			}
+		}
+	}
+
+	return len_end;
+}
+
+
+extern void
+lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint32_t *restrict back_res, uint32_t *restrict len_res,
+		uint32_t position)
+{
+	// If we have symbols pending, return the next pending symbol.
+	if (coder->opts_end_index != coder->opts_current_index) {
+		assert(mf->read_ahead > 0);
+		*len_res = coder->opts[coder->opts_current_index].pos_prev
+				- coder->opts_current_index;
+		*back_res = coder->opts[coder->opts_current_index].back_prev;
+		coder->opts_current_index = coder->opts[
+				coder->opts_current_index].pos_prev;
+		return;
+	}
+
+	// Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
+	// this was done in both initialization function and in the main loop.
+	// In liblzma they were moved into this single place.
+	if (mf->read_ahead == 0) {
+		if (coder->match_price_count >= (1 << 7))
+			fill_distances_prices(coder);
+
+		if (coder->align_price_count >= ALIGN_TABLE_SIZE)
+			fill_align_prices(coder);
+	}
+
+	// TODO: This needs quite a bit of cleaning still. But splitting
+	// the oroginal function to two pieces makes it at least a little
+	// more readable, since those two parts don't share many variables.
+
+	uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
+	if (len_end == UINT32_MAX)
+		return;
+
+	uint32_t reps[REP_DISTANCES];
+	memcpy(reps, coder->reps, sizeof(reps));
+
+	uint32_t cur;
+	for (cur = 1; cur < len_end; ++cur) {
+		assert(cur < OPTS);
+
+		coder->longest_match_length = mf_find(
+				mf, &coder->matches_count, coder->matches);
+
+		if (coder->longest_match_length >= mf->find_len_max)
+			break;
+
+		len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
+				position + cur, cur, mf->find_len_max,
+				MIN(mf_avail(mf) + 1, OPTS - 1 - cur));
+	}
+
+	backward(coder, len_res, back_res, cur);
+	return;
+}
diff --git a/src/liblzma/lzma/lzma_encoder_presets.c b/src/liblzma/lzma/lzma_encoder_presets.c
index 966c7c86..08f339e9 100644
--- a/src/liblzma/lzma/lzma_encoder_presets.c
+++ b/src/liblzma/lzma/lzma_encoder_presets.c
@@ -20,15 +20,47 @@
 #include "common.h"
 
 
+#define pow2(e) (UINT32_C(1) << (e))
+
+
 LZMA_API const lzma_options_lzma lzma_preset_lzma[9] = {
-// dictionary_size  lc lp pb           mode            fb  mf          mfc
-{ UINT32_C(1) << 16, 3, 0, 2, NULL, 0, LZMA_MODE_FAST,  64, LZMA_MF_HC3, 0 },
-{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_FAST,  64, LZMA_MF_HC4, 0 },
-{ UINT32_C(1) << 19, 3, 0, 2, NULL, 0, LZMA_MODE_BEST,  64, LZMA_MF_BT4, 0 },
-{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_BEST,  64, LZMA_MF_BT4, 0 },
-{ UINT32_C(1) << 21, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 },
-{ UINT32_C(1) << 22, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 },
-{ UINT32_C(1) << 23, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 },
-{ UINT32_C(1) << 24, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 },
-{ UINT32_C(1) << 25, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 },
+// dict           lc lp pb           mode              fb   mf          mfc
+{ pow2(16), NULL, 0, 3, 0, 2, false, LZMA_MODE_FAST,    64, LZMA_MF_HC3, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(20), NULL, 0, 3, 0, 0, false, LZMA_MODE_FAST,    64, LZMA_MF_HC4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(19), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL,  64, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(20), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL,  64, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(21), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(22), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(23), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(24), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 273, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
+{ pow2(25), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 273, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL },
 };
+
+
+/*
+extern LZMA_API lzma_bool
+lzma_preset_lzma(lzma_options_lzma *options, uint32_t level)
+{
+	*options = (lzma_options_lzma){
+
+	};
+
+	options->literal_context_bits = LZMA_LITERAL_CONTEXT_BITS_DEFAULT
+	options->literal_pos_bits = LZMA_LITERAL_POS_BITS_DEFAULT;
+	options->pos_bits = LZMA_POS_BITS_DEFAULT;
+	options->preset_dictionary = NULL;
+	options->preset_dictionary_size = 0;
+	options->persistent = false;
+
+	options->mode = level <= 2 ? LZMA_MODE_FAST : LZMA_MODE_NORMAL;
+	options->fast_bytes = level <=
+
+	options->match_finder = level == 1 ? LZMA_MF_HC3
+			: (level == 2 ? LZMA_MF_HC4 : LZMA_MF_BT4);
+	options->match_finder_cycles = 0;
+
+
+
+	options->dictionary_size =
+}
+*/
diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h
index a16051f8..7533bc79 100644
--- a/src/liblzma/lzma/lzma_encoder_private.h
+++ b/src/liblzma/lzma/lzma_encoder_private.h
@@ -21,20 +21,27 @@
 #ifndef LZMA_LZMA_ENCODER_PRIVATE_H
 #define LZMA_LZMA_ENCODER_PRIVATE_H
 
-#include "lzma_encoder.h"
-#include "lzma_common.h"
 #include "lz_encoder.h"
 #include "range_encoder.h"
+#include "lzma_common.h"
+#include "lzma_encoder.h"
+
 
+// Macro to compare if the first two bytes in two buffers differ. This is
+// needed in lzma_lzma_optimum_*() to test if the match is at least
+// MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no
+// reason to not use it when it is supported.
+#ifdef HAVE_FAST_UNALIGNED_ACCESS
+#	define not_equal_16(a, b) \
+		(*(const uint16_t *)(a) != *(const uint16_t *)(b))
+#else
+#	define not_equal_16(a, b) \
+		((a)[0] != (b)[0] || (a)[1] != (b)[1])
+#endif
 
-#define move_pos(num) \
-do { \
-	assert((int32_t)(num) >= 0); \
-	if ((num) != 0) { \
-		coder->additional_offset += num; \
-		coder->lz.skip(&coder->lz, num); \
-	} \
-} while (0)
+
+// Optimal - Number of entries in the optimum array.
+#define OPTS (1 << 12)
 
 
 typedef struct {
@@ -54,7 +61,7 @@ typedef struct {
 typedef struct {
 	lzma_lzma_state state;
 
-	bool prev_1_is_char;
+	bool prev_1_is_literal;
 	bool prev_2;
 
 	uint32_t pos_prev_2;
@@ -70,132 +77,79 @@ typedef struct {
 
 
 struct lzma_coder_s {
-	// Next coder in the chain
-	lzma_next_coder next;
-
-	// In window and match finder
-	lzma_lz_encoder lz;
-
-	// Range encoder
+	/// Range encoder
 	lzma_range_encoder rc;
 
-	// State
+	/// State
 	lzma_lzma_state state;
-	uint8_t previous_byte;
+
+	/// The four most recent match distances
 	uint32_t reps[REP_DISTANCES];
 
-	// Misc
-	uint32_t match_distances[MATCH_MAX_LEN * 2 + 2 + 1];
-	uint32_t num_distance_pairs;
-	uint32_t additional_offset;
-	uint32_t now_pos; // Lowest 32 bits are enough here.
-	bool best_compression;           ///< True when LZMA_MODE_BEST is used
+	/// Array of match candidates
+	lzma_match matches[MATCH_LEN_MAX + 1];
+
+	/// Number of match candidates in matches[]
+	uint32_t matches_count;
+
+	/// Varibale to hold the length of the longest match between calls
+	/// to lzma_lzma_optimum_*().
+	uint32_t longest_match_length;
+
+	/// True if using getoptimumfast
+	bool fast_mode;
+
+	/// True if the encoder has been initialized by encoding the first
+	/// byte as a literal.
 	bool is_initialized;
+
+	/// True if the range encoder has been flushed, but not all bytes
+	/// have been written to the output buffer yet.
 	bool is_flushed;
-	bool write_eopm;
 
-	// Literal encoder
-	lzma_literal_coder literal_coder;
+	uint32_t pos_mask;         ///< (1 << pos_bits) - 1
+	uint32_t literal_context_bits;
+	uint32_t literal_pos_mask;
 
-	// Bit encoders
+	// These are the same as in lzma_decoder.c. See comments there.
+	probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
 	probability is_match[STATES][POS_STATES_MAX];
 	probability is_rep[STATES];
 	probability is_rep0[STATES];
 	probability is_rep1[STATES];
 	probability is_rep2[STATES];
 	probability is_rep0_long[STATES][POS_STATES_MAX];
-	probability pos_encoders[FULL_DISTANCES - END_POS_MODEL_INDEX];
+	probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS];
+	probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX];
+	probability pos_align[ALIGN_TABLE_SIZE];
 
-	// Bit Tree Encoders
-	probability pos_slot_encoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS];
-	probability pos_align_encoder[1 << ALIGN_BITS];
-
-	// Length encoders
+	// These are the same as in lzma_decoder.c except that the encoders
+	// include also price tables.
 	lzma_length_encoder match_len_encoder;
 	lzma_length_encoder rep_len_encoder;
-	lzma_length_encoder *prev_len_encoder;
 
-	// Optimal
-	lzma_optimal optimum[OPTS];
-	uint32_t optimum_end_index;
-	uint32_t optimum_current_index;
-	uint32_t longest_match_length;
-	bool longest_match_was_found;
-
-	// Prices
-	uint32_t pos_slot_prices[LEN_TO_POS_STATES][DIST_TABLE_SIZE_MAX];
+	// Price tables
+	uint32_t pos_slot_prices[LEN_TO_POS_STATES][POS_SLOTS];
 	uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES];
-	uint32_t align_prices[ALIGN_TABLE_SIZE];
-	uint32_t align_price_count;
 	uint32_t dist_table_size;
 	uint32_t match_price_count;
 
-	// LZMA specific settings
-	uint32_t dictionary_size;        ///< Size in bytes
-	uint32_t fast_bytes;
-	uint32_t pos_state_bits;
-	uint32_t pos_mask;         ///< (1 << pos_state_bits) - 1
-};
-
-
-extern void lzma_length_encoder_update_table(lzma_length_encoder *lencoder,
-		const uint32_t pos_state);
+	uint32_t align_prices[ALIGN_TABLE_SIZE];
+	uint32_t align_price_count;
 
-extern bool lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out,
-		size_t *restrict out_pos, size_t out_size);
+	// Optimal
+	uint32_t opts_end_index;
+	uint32_t opts_current_index;
+	lzma_optimal opts[OPTS];
+};
 
-extern void lzma_get_optimum(lzma_coder *restrict coder,
-		uint32_t *restrict back_res, uint32_t *restrict len_res);
 
-extern void lzma_get_optimum_fast(lzma_coder *restrict coder,
+extern void lzma_lzma_optimum_fast(
+		lzma_coder *restrict coder, lzma_mf *restrict mf,
 		uint32_t *restrict back_res, uint32_t *restrict len_res);
 
-
-// NOTE: Don't add 'restrict'.
-static inline void
-lzma_read_match_distances(lzma_coder *coder,
-		uint32_t *len_res, uint32_t *num_distance_pairs)
-{
-	*len_res = 0;
-
-	coder->lz.get_matches(&coder->lz, coder->match_distances);
-
-	*num_distance_pairs = coder->match_distances[0];
-
-	if (*num_distance_pairs > 0) {
-		*len_res = coder->match_distances[*num_distance_pairs - 1];
-		assert(*len_res <= MATCH_MAX_LEN);
-
-		if (*len_res == coder->fast_bytes) {
-			uint32_t offset = *len_res - 1;
-			const uint32_t distance = coder->match_distances[
-					*num_distance_pairs] + 1;
-			uint32_t limit = MATCH_MAX_LEN - *len_res;
-
-			assert(offset + limit < coder->lz.keep_size_after);
-			assert(coder->lz.read_pos <= coder->lz.write_pos);
-
-			// If we are close to end of the stream, we may need
-			// to limit the length of the match.
-			if (coder->lz.write_pos - coder->lz.read_pos
-					< offset + limit)
-				limit = coder->lz.write_pos
-					- (coder->lz.read_pos + offset);
-
-			offset += coder->lz.read_pos;
-			uint32_t i = 0;
-			while (i < limit && coder->lz.buffer[offset + i]
-					== coder->lz.buffer[
-						offset + i - distance])
-				++i;
-
-			*len_res += i;
-		}
-	}
-
-	++coder->additional_offset;
-
-	return;
-}
+extern void lzma_lzma_optimum_normal(lzma_coder *restrict coder,
+		lzma_mf *restrict mf, uint32_t *restrict back_res,
+		uint32_t *restrict len_res, uint32_t position);
 
 #endif
diff --git a/src/liblzma/lzma/lzma_literal.c b/src/liblzma/lzma/lzma_literal.c
deleted file mode 100644
index 3611a1f7..00000000
--- a/src/liblzma/lzma/lzma_literal.c
+++ /dev/null
@@ -1,51 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file       lzma_literal.c
-/// \brief      Literal Coder
-//
-//  Copyright (C) 1999-2006 Igor Pavlov
-//  Copyright (C) 2007 Lasse Collin
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2.1 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "lzma_literal.h"
-
-
-extern lzma_ret
-lzma_literal_init(lzma_literal_coder *coder,
-		uint32_t literal_context_bits, uint32_t literal_pos_bits)
-{
-	// Verify that arguments are sane.
-	if (literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX
-			|| literal_pos_bits > LZMA_LITERAL_POS_BITS_MAX)
-		return LZMA_HEADER_ERROR;
-
-	// Calculate the number of states the literal coder must store.
-	const uint32_t states = literal_states(
-			literal_pos_bits, literal_context_bits);
-
-	// Store the new settings.
-	coder->literal_context_bits = literal_context_bits;
-	coder->literal_pos_bits = literal_pos_bits;
-
-	// Calculate also the literal_pos_mask. It's not changed
-	// anywhere else than here.
-	coder->literal_pos_mask = (1 << literal_pos_bits) - 1;
-
-	// Reset the literal coder.
-	for (uint32_t i = 0; i < states; ++i)
-		for (uint32_t j = 0; j < LIT_SIZE; ++j)
-			bit_reset(coder->coders[i][j]);
-
-	return LZMA_OK;
-}
diff --git a/src/liblzma/lzma/lzma_literal.h b/src/liblzma/lzma/lzma_literal.h
deleted file mode 100644
index 208abd99..00000000
--- a/src/liblzma/lzma/lzma_literal.h
+++ /dev/null
@@ -1,71 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file       lzma_literal.h
-/// \brief      Literal Coder
-///
-/// This is used as is by both LZMA encoder and decoder.
-//
-//  Copyright (C) 1999-2006 Igor Pavlov
-//  Copyright (C) 2007 Lasse Collin
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2.1 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_LITERAL_H
-#define LZMA_LITERAL_H
-
-#include "common.h"
-
-// We need typedef of `probability'.
-#include "range_common.h"
-
-
-/// Each literal coder is divided in three sections:
-///   - 0x001-0x0FF: Without match byte
-///   - 0x101-0x1FF: With match byte; match bit is 0
-///   - 0x201-0x2FF: With match byte; match bit is 1
-#define LIT_SIZE 0x300
-
-/// Calculate how many states are needed. Each state has
-/// LIT_SIZE `probability' variables.
-#define literal_states(literal_context_bits, literal_pos_bits) \
-	(1U << ((literal_context_bits) + (literal_pos_bits)))
-
-/// Locate the literal coder for the next literal byte. The choice depends on
-///   - the lowest literal_pos_bits bits of the position of the current
-///     byte; and
-///   - the highest literal_context_bits bits of the previous byte.
-#define literal_get_subcoder(literal_coder, pos, prev_byte) \
-	(literal_coder).coders[(((pos) & (literal_coder).literal_pos_mask) \
-			<< (literal_coder).literal_context_bits) \
-		+ ((prev_byte) >> (8 - (literal_coder).literal_context_bits))]
-
-
-typedef struct {
-	uint32_t literal_context_bits;
-	uint32_t literal_pos_bits;
-
-	/// literal_pos_mask is always (1 << literal_pos_bits) - 1.
-	uint32_t literal_pos_mask;
-
-	/// There are (1 << (literal_pos_bits + literal_context_bits))
-	/// literal coders.
-	probability coders[1 << LZMA_LITERAL_BITS_MAX][LIT_SIZE];
-
-} lzma_literal_coder;
-
-
-extern lzma_ret lzma_literal_init(
-		lzma_literal_coder *coder,
-		uint32_t literal_context_bits, uint32_t literal_pos_bits);
-
-#endif
author	Lasse Collin <lasse.collin@tukaani.org>	2008-08-28 22:53:15 +0300
committer	Lasse Collin <lasse.collin@tukaani.org>	2008-08-28 22:53:15 +0300
commit	3b34851de1eaf358cf9268922fa0eeed8278d680 (patch)
tree	7bab212af647541df64227a8d350d17a2e789f6b /src/liblzma/lzma
parent	Fix test_filter_flags to match the new restriction of lc+lp. (diff)
download	xz-3b34851de1eaf358cf9268922fa0eeed8278d680.tar.xz