Imported to git.

author: Lasse Collin <lasse.collin@tukaani.org> 2007-12-09 00:42:33 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2007-12-09 00:42:33 +0200
commit: 5d018dc03549c1ee4958364712fb0c94e1bf2741 (patch)
tree: 1b211911fb33fddb3f04b77f99e81df23623ffc4 /src/liblzma/lz/lz_decoder.h
download: xz-5d018dc03549c1ee4958364712fb0c94e1bf2741.tar.xz
1 files changed, 214 insertions, 0 deletions
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
new file mode 100644
index 00000000..a8a585cd
--- /dev/null
+++ b/src/liblzma/lz/lz_decoder.h
@@ -0,0 +1,214 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       lz_decoder.h
+/// \brief      LZ out window
+//
+//  Copyright (C) 1999-2006 Igor Pavlov
+//  Copyright (C) 2007 Lasse Collin
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_LZ_OUT_H
+#define LZMA_LZ_OUT_H
+
+#include "common.h"
+
+
+/// Get a byte from the history buffer.
+#define lz_get_byte(lz, distance) \
+	((distance) < (lz).pos \
+		? (lz).dict[(lz).pos - (distance) - 1] \
+		: (lz).dict[(lz).pos - (distance) - 1 + (lz).end])
+
+
+#define LZMA_LZ_DECODER_INIT \
+	(lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }
+
+
+typedef struct {
+	/// Function to do the actual decoding (LZMA or Inflate)
+	bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in,
+			size_t *restrict in_pos, size_t size_in,
+			bool has_safe_buffer);
+
+	/// Pointer to dictionary (history) buffer.
+	/// \note Not 'restrict' because can alias next_out.
+	uint8_t *dict;
+
+	/// Next write goes to dict[pos].
+	size_t pos;
+
+	/// Next byte to flush is buffer[start].
+	size_t start;
+
+	/// First byte to not flush is buffer[end].
+	size_t end;
+
+	/// First position to which data must not be written.
+	size_t limit;
+
+	/// True if dictionary has needed wrapping.
+	bool is_full;
+
+	/// True if process() has detected End of Payload Marker.
+	bool eopm_detected;
+
+	/// True if the next coder in the chain has returned LZMA_STREAM_END.
+	bool next_finished;
+
+	/// True if the LZ decoder (e.g. LZMA) has detected End of Payload
+	/// Marker. This may become true before next_finished becomes true.
+	bool this_finished;
+
+	/// When pos >= must_flush_pos, we must not call process().
+	size_t must_flush_pos;
+
+	/// Maximum number of bytes that a single decoding loop inside
+	/// process() can produce data into dict. This amount is kept
+	/// always available at dict + pos i.e. it is safe to write a byte
+	/// to dict[pos + match_max_len - 1].
+	size_t match_max_len;
+
+	/// Number of bytes allocated to dict.
+	size_t size;
+
+	/// Requested size of the dictionary. This is needed because we avoid
+	/// using extremely tiny history buffers.
+	size_t requested_size;
+
+	/// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.
+	lzma_vli uncompressed_size;
+
+	/// Number of bytes currently in temp[].
+	size_t temp_size;
+
+	/// Temporary buffer needed when
+	/// 1) we cannot make the input buffer completely empty; or
+	/// 2) we are not the last filter in the chain.
+	uint8_t temp[LZMA_BUFFER_SIZE];
+
+} lzma_lz_decoder;
+
+
+/////////////////////////
+// Function prototypes //
+/////////////////////////
+
+extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,
+		lzma_allocator *allocator, bool (*process)(
+			lzma_coder *restrict coder, const uint8_t *restrict in,
+			size_t *restrict in_pos, size_t in_size,
+			bool has_safe_buffer),
+		lzma_vli uncompressed_size,
+		size_t history_size, size_t match_max_len);
+
+extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size,
+		lzma_action action);
+
+/// Deallocates the history buffer if one exists.
+extern void lzma_lz_decoder_end(
+		lzma_lz_decoder *lz, lzma_allocator *allocator);
+
+//////////////////////
+// Inline functions //
+//////////////////////
+
+// Repeat a block of data from the history. Because memcpy() is faster
+// than copying byte by byte in a loop, the copying process gets split
+// into three cases:
+// 1. distance < length
+//    Source and target areas overlap, thus we can't use memcpy()
+//    (nor memmove()) safely.
+//    TODO: If this is common enough, it might be worth optimizing this
+//    more e.g. by checking if distance > sizeof(uint8_t*) and using
+//    memcpy in small chunks.
+// 2. distance < pos
+//    This is the easiest and the fastest case. The block being copied
+//    is a contiguous piece in the history buffer. The buffer offset
+//    doesn't need wrapping.
+// 3. distance >= pos
+//    We need to wrap the position, because otherwise we would try copying
+//    behind the first byte of the allocated buffer. It is possible that
+//    the block is fragmeneted into two pieces, thus we might need to call
+//    memcpy() twice.
+// NOTE: The function using this macro must ensure that length is positive
+// and that distance is FIXME
+static inline bool
+lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)
+{
+	// Validate offset of the block to be repeated. It doesn't
+	// make sense to copy data behind the beginning of the stream.
+	// Leaving this check away would lead to a security problem,
+	// in which e.g. the data of the previously decoded file(s)
+	// would be leaked (or whatever happens to be in unused
+	// part of the dictionary buffer).
+	if (distance >= lz->pos && !lz->is_full)
+		return false;
+
+	// It also doesn't make sense to copy data farer than
+	// the dictionary size.
+	if (distance >= lz->requested_size)
+		return false;
+
+	// The caller must have checked these!
+	assert(distance <= lz->size);
+	assert(length > 0);
+	assert(length <= lz->match_max_len);
+
+	// Copy the amount of data requested by the decoder.
+	if (distance < length) {
+		// Source and target areas overlap, thus we can't use
+		// memcpy() nor even memmove() safely. :-(
+		// TODO: Copying byte by byte is slow. It might be
+		// worth optimizing this more if this case is common.
+		do {
+			lz->dict[lz->pos] = lz_get_byte(*lz, distance);
+			++lz->pos;
+		} while (--length > 0);
+
+	} else if (distance < lz->pos) {
+		// The easiest and fastest case
+		memcpy(lz->dict + lz->pos,
+				lz->dict + lz->pos - distance - 1,
+				length);
+		lz->pos += length;
+
+	} else {
+		// The bigger the dictionary, the more rare this
+		// case occurs. We need to "wrap" the dict, thus
+		// we might need two memcpy() to copy all the data.
+		assert(lz->is_full);
+		const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;
+		uint32_t copy_size = lz->end - copy_pos;
+
+		if (copy_size < length) {
+			memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
+					copy_size);
+			lz->pos += copy_size;
+			copy_size = length - copy_size;
+			memcpy(lz->dict + lz->pos, lz->dict, copy_size);
+			lz->pos += copy_size;
+		} else {
+			memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
+					length);
+			lz->pos += length;
+		}
+	}
+
+	return true;
+}
+
+#endif
author	Lasse Collin <lasse.collin@tukaani.org>	2007-12-09 00:42:33 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2007-12-09 00:42:33 +0200
commit	5d018dc03549c1ee4958364712fb0c94e1bf2741 (patch)
tree	1b211911fb33fddb3f04b77f99e81df23623ffc4 /src/liblzma/lz/lz_decoder.h
download	xz-5d018dc03549c1ee4958364712fb0c94e1bf2741.tar.xz