Sort of garbage collection commit. :-| Many things are still

broken. API has changed a lot and it will still change a little more here and there. The command line tool doesn't have all the required changes to reflect the API changes, so it's easy to get "internal error" or trigger assertions.
author: Lasse Collin <lasse.collin@tukaani.org> 2008-08-28 22:53:15 +0300
committer: Lasse Collin <lasse.collin@tukaani.org> 2008-08-28 22:53:15 +0300
commit: 3b34851de1eaf358cf9268922fa0eeed8278d680 (patch)
tree: 7bab212af647541df64227a8d350d17a2e789f6b /src/liblzma/lzma/lzma2_encoder.c
parent: Fix test_filter_flags to match the new restriction of lc+lp. (diff)
download: xz-3b34851de1eaf358cf9268922fa0eeed8278d680.tar.xz
1 files changed, 406 insertions, 0 deletions
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
new file mode 100644
index 00000000..b2cd176b
--- /dev/null
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -0,0 +1,406 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lzma2_encoder.c
+/// \brief      LZMA2 encoder
+//
+//  Copyright (C) 1999-2008 Igor Pavlov
+//  Copyright (C) 2008 Lasse Collin
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lz_encoder.h"
+#include "lzma_encoder.h"
+#include "fastpos.h"
+#include "lzma2_encoder.h"
+
+
+/// Maximum number of bytes of actual data per chunk (no headers)
+#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16)
+
+/// Maximum uncompressed size of LZMA chunk (no headers)
+#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21)
+
+/// Maximum size of LZMA2 headers
+#define LZMA2_HEADER_MAX 6
+
+/// Size of a header for uncompressed chunk
+#define LZMA2_HEADER_UNCOMPRESSED 3
+
+
+struct lzma_coder_s {
+	enum {
+		SEQ_INIT,
+		SEQ_LZMA_ENCODE,
+		SEQ_LZMA_COPY,
+		SEQ_UNCOMPRESSED_HEADER,
+		SEQ_UNCOMPRESSED_COPY,
+	} sequence;
+
+	/// LZMA encoder
+	lzma_coder *lzma;
+
+	/// If this is not NULL, we will check new options from this
+	/// structure when starting a new chunk.
+	const lzma_options_lzma *opt_new;
+
+	/// LZMA options currently in use.
+	lzma_options_lzma opt_cur;
+
+	bool need_properties;
+	bool need_state_reset;
+	bool need_dictionary_reset;
+
+	/// Uncompressed size of a chunk
+	size_t uncompressed_size;
+
+	/// Compressed size of a chunk (excluding headers); this is also used
+	/// to indicate the end of buf[] in SEQ_LZMA_COPY.
+	size_t compressed_size;
+
+	/// Read position in buf[]
+	size_t buf_pos;
+
+	/// Buffer to hold the chunk header and LZMA compressed data
+	uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX];
+};
+
+
+static void
+lzma2_header_lzma(lzma_coder *coder)
+{
+	assert(coder->uncompressed_size > 0);
+	assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
+	assert(coder->compressed_size > 0);
+	assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
+
+	size_t pos;
+
+	if (coder->need_properties) {
+		pos = 0;
+
+		if (coder->need_dictionary_reset)
+			coder->buf[pos] = 0x80 + (3 << 5);
+		else
+			coder->buf[pos] = 0x80 + (2 << 5);
+	} else {
+		pos = 1;
+
+		if (coder->need_state_reset)
+			coder->buf[pos] = 0x80 + (1 << 5);
+		else
+			coder->buf[pos] = 0x80;
+	}
+
+	// Set the start position for copying.
+	coder->buf_pos = pos;
+
+	// Uncompressed size
+	size_t size = coder->uncompressed_size - 1;
+	coder->buf[pos++] += size >> 16;
+	coder->buf[pos++] = (size >> 8) & 0xFF;
+	coder->buf[pos++] = size & 0xFF;
+
+	// Compressed size
+	size = coder->compressed_size - 1;
+	coder->buf[pos++] = size >> 8;
+	coder->buf[pos++] = size & 0xFF;
+
+	// Properties, if needed
+	if (coder->need_properties)
+		lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos);
+
+	coder->need_properties = false;
+	coder->need_state_reset = false;
+	coder->need_dictionary_reset = false;
+
+	// The copying code uses coder->compressed_size to indicate the end
+	// of coder->buf[], so we need add the maximum size of the header here.
+	coder->compressed_size += LZMA2_HEADER_MAX;
+
+	return;
+}
+
+
+static void
+lzma2_header_uncompressed(lzma_coder *coder)
+{
+	assert(coder->uncompressed_size > 0);
+	assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX);
+
+	// If this is the first chunk, we need to include dictionary
+	// reset indicator.
+	if (coder->need_dictionary_reset)
+		coder->buf[0] = 1;
+	else
+		coder->buf[0] = 2;
+
+	coder->need_dictionary_reset = false;
+
+	// "Compressed" size
+	coder->buf[1] = (coder->uncompressed_size - 1) >> 8;
+	coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF;
+
+	// Set the start position for copying.
+	coder->buf_pos = 0;
+	return;
+}
+
+
+static lzma_ret
+lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
+		uint8_t *restrict out, size_t *restrict out_pos,
+		size_t out_size)
+{
+	while (*out_pos < out_size)
+	switch (coder->sequence) {
+	case SEQ_INIT:
+		// If there's no input left and we are flushing or finishing,
+		// don't start a new chunk.
+		if (mf_unencoded(mf) == 0) {
+			// Write end of payload marker if finishing.
+			if (mf->action == LZMA_FINISH)
+				out[(*out_pos)++] = 0;
+
+			return mf->action == LZMA_RUN
+					? LZMA_OK : LZMA_STREAM_END;
+		}
+
+		// Look if there are new options. At least for now,
+		// only lc/lp/pb can be changed.
+		if (coder->opt_new != NULL
+				&& (coder->opt_cur.literal_context_bits
+					!= coder->opt_new->literal_context_bits
+				|| coder->opt_cur.literal_pos_bits
+					!= coder->opt_new->literal_pos_bits
+				|| coder->opt_cur.pos_bits
+					!= coder->opt_new->pos_bits)) {
+			// Options have been changed, copy them to opt_cur.
+			coder->opt_cur.literal_context_bits
+					= coder->opt_new->literal_context_bits;
+			coder->opt_cur.literal_pos_bits
+					= coder->opt_new->literal_pos_bits;
+			coder->opt_cur.pos_bits
+					= coder->opt_new->pos_bits;
+
+			// We need to write the new options and reset
+			// the encoder state.
+			coder->need_properties = true;
+			coder->need_state_reset = true;
+		}
+
+		if (coder->need_state_reset)
+			lzma_lzma_encoder_reset(coder->lzma, &coder->opt_cur);
+
+		coder->uncompressed_size = 0;
+		coder->compressed_size = 0;
+		coder->sequence = SEQ_LZMA_ENCODE;
+
+	// Fall through
+
+	case SEQ_LZMA_ENCODE: {
+		// Calculate how much more uncompressed data this chunk
+		// could accept.
+		const uint32_t left = LZMA2_UNCOMPRESSED_MAX
+				- coder->uncompressed_size;
+		uint32_t limit;
+
+		if (left < mf->match_len_max) {
+			// Must flush immediatelly since the next LZMA symbol
+			// could make the uncompressed size of the chunk too
+			// big.
+			limit = 0;
+		} else {
+			// Calculate maximum read_limit that is OK from point
+			// of view of LZMA2 chunk size.
+			limit = mf->read_pos - mf->read_ahead
+					+ left - mf->match_len_max;
+		}
+
+		// Save the start position so that we can update
+		// coder->uncompressed_size.
+		const uint32_t read_start = mf->read_pos - mf->read_ahead;
+
+		// Call the LZMA encoder until the chunk is finished.
+		const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf,
+				coder->buf + LZMA2_HEADER_MAX,
+				&coder->compressed_size,
+				LZMA2_CHUNK_MAX, limit);
+
+		coder->uncompressed_size += mf->read_pos - mf->read_ahead
+				- read_start;
+
+		assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
+		assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
+
+		if (ret != LZMA_STREAM_END)
+			return LZMA_OK;
+
+		// See if the chunk compressed. If it didn't, we encode it
+		// as uncompressed chunk. This saves a few bytes of space
+		// and makes decoding faster.
+		if (coder->compressed_size >= coder->uncompressed_size) {
+			coder->uncompressed_size += mf->read_ahead;
+			assert(coder->uncompressed_size
+					<= LZMA2_UNCOMPRESSED_MAX);
+			mf->read_ahead = 0;
+			lzma2_header_uncompressed(coder);
+			coder->need_state_reset = true;
+			coder->sequence = SEQ_UNCOMPRESSED_HEADER;
+			break;
+		}
+
+		// The chunk did compress at least by one byte, so we store
+		// the chunk as LZMA.
+		lzma2_header_lzma(coder);
+
+		coder->sequence = SEQ_LZMA_COPY;
+	}
+
+	// Fall through
+
+	case SEQ_LZMA_COPY:
+		// Copy the compressed chunk along its headers to the
+		// output buffer.
+		lzma_bufcpy(coder->buf, &coder->buf_pos,
+				coder->compressed_size,
+				out, out_pos, out_size);
+		if (coder->buf_pos != coder->compressed_size)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_INIT;
+		break;
+
+	case SEQ_UNCOMPRESSED_HEADER:
+		// Copy the three-byte header to indicate uncompressed chunk.
+		lzma_bufcpy(coder->buf, &coder->buf_pos,
+				LZMA2_HEADER_UNCOMPRESSED,
+				out, out_pos, out_size);
+		if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_UNCOMPRESSED_COPY;
+
+	// Fall through
+
+	case SEQ_UNCOMPRESSED_COPY:
+		// Copy the uncompressed data as is from the dictionary
+		// to the output buffer.
+		mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size);
+		if (coder->uncompressed_size != 0)
+			return LZMA_OK;
+
+		coder->sequence = SEQ_INIT;
+		break;
+	}
+
+	return LZMA_OK;
+}
+
+
+static void
+lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+	lzma_free(coder->lzma, allocator);
+	lzma_free(coder, allocator);
+	return;
+}
+
+
+static lzma_ret
+lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
+		const void *options, lzma_lz_options *lz_options)
+{
+	if (options == NULL)
+		return LZMA_PROG_ERROR;
+
+	if (lz->coder == NULL) {
+		lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+		if (lz->coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		lz->code = &lzma2_encode;
+		lz->end = &lzma2_encoder_end;
+
+		lz->coder->lzma = NULL;
+	}
+
+	lz->coder->sequence = SEQ_INIT;
+	lz->coder->need_properties = true;
+	lz->coder->need_state_reset = false;
+	lz->coder->need_dictionary_reset = true;
+
+	lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
+	lz->coder->opt_new = lz->coder->opt_cur.persistent
+				? options : NULL;
+
+	// Initialize LZMA encoder
+	return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
+			&lz->coder->opt_cur, lz_options));
+
+	// Make sure that we will always have enough history available in
+	// case we need to use uncompressed chunks. They are used when the
+	// compressed size of a chunk is not smaller than the uncompressed
+	// size, so we need to have at least LZMA2_COMPRESSED_MAX bytes
+	// history available.
+	if (lz_options->before_size + lz_options->dictionary_size
+			< LZMA2_CHUNK_MAX)
+		lz_options->before_size = LZMA2_CHUNK_MAX
+				- lz_options->dictionary_size;
+
+	return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+		const lzma_filter_info *filters)
+{
+	return lzma_lz_encoder_init(
+			next, allocator, filters, &lzma2_encoder_init);
+}
+
+
+extern uint64_t
+lzma_lzma2_encoder_memusage(const void *options)
+{
+	const uint64_t lzma_memusage = lzma_lzma_encoder_memusage(options);
+	if (lzma_memusage == UINT64_MAX)
+		return UINT64_MAX;
+
+	return sizeof(lzma_coder) + lzma_memusage;
+}
+
+
+extern lzma_ret
+lzma_lzma2_props_encode(const void *options, uint8_t *out)
+{
+	const lzma_options_lzma *const opt = options;
+	uint32_t d = MAX(opt->dictionary_size, LZMA_DICTIONARY_SIZE_MIN);
+
+	// Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending
+	// on which one is the next:
+	--d;
+	d |= d >> 2;
+	d |= d >> 3;
+	d |= d >> 4;
+	d |= d >> 8;
+	d |= d >> 16;
+
+	// Get the highest two bits using the proper encoding:
+	if (d == UINT32_MAX)
+		out[0] = 40;
+	else
+		out[0] = get_pos_slot(d + 1) - 24;
+
+	return LZMA_OK;
+}
author	Lasse Collin <lasse.collin@tukaani.org>	2008-08-28 22:53:15 +0300
committer	Lasse Collin <lasse.collin@tukaani.org>	2008-08-28 22:53:15 +0300
commit	3b34851de1eaf358cf9268922fa0eeed8278d680 (patch)
tree	7bab212af647541df64227a8d350d17a2e789f6b /src/liblzma/lzma/lzma2_encoder.c
parent	Fix test_filter_flags to match the new restriction of lc+lp. (diff)
download	xz-3b34851de1eaf358cf9268922fa0eeed8278d680.tar.xz