diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2007-12-09 00:42:33 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2007-12-09 00:42:33 +0200 |
commit | 5d018dc03549c1ee4958364712fb0c94e1bf2741 (patch) | |
tree | 1b211911fb33fddb3f04b77f99e81df23623ffc4 /src/liblzma/lz/lz_decoder.h | |
download | xz-5d018dc03549c1ee4958364712fb0c94e1bf2741.tar.xz |
Imported to git.
Diffstat (limited to 'src/liblzma/lz/lz_decoder.h')
-rw-r--r-- | src/liblzma/lz/lz_decoder.h | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h new file mode 100644 index 00000000..a8a585cd --- /dev/null +++ b/src/liblzma/lz/lz_decoder.h @@ -0,0 +1,214 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_OUT_H +#define LZMA_LZ_OUT_H + +#include "common.h" + + +/// Get a byte from the history buffer. +#define lz_get_byte(lz, distance) \ + ((distance) < (lz).pos \ + ? (lz).dict[(lz).pos - (distance) - 1] \ + : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } + + +typedef struct { + /// Function to do the actual decoding (LZMA or Inflate) + bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t size_in, + bool has_safe_buffer); + + /// Pointer to dictionary (history) buffer. + /// \note Not 'restrict' because can alias next_out. + uint8_t *dict; + + /// Next write goes to dict[pos]. + size_t pos; + + /// Next byte to flush is buffer[start]. + size_t start; + + /// First byte to not flush is buffer[end]. + size_t end; + + /// First position to which data must not be written. + size_t limit; + + /// True if dictionary has needed wrapping. + bool is_full; + + /// True if process() has detected End of Payload Marker. + bool eopm_detected; + + /// True if the next coder in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected End of Payload + /// Marker. This may become true before next_finished becomes true. + bool this_finished; + + /// When pos >= must_flush_pos, we must not call process(). + size_t must_flush_pos; + + /// Maximum number of bytes that a single decoding loop inside + /// process() can produce data into dict. This amount is kept + /// always available at dict + pos i.e. it is safe to write a byte + /// to dict[pos + match_max_len - 1]. + size_t match_max_len; + + /// Number of bytes allocated to dict. + size_t size; + + /// Requested size of the dictionary. This is needed because we avoid + /// using extremely tiny history buffers. + size_t requested_size; + + /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown. + lzma_vli uncompressed_size; + + /// Number of bytes currently in temp[]. + size_t temp_size; + + /// Temporary buffer needed when + /// 1) we cannot make the input buffer completely empty; or + /// 2) we are not the last filter in the chain. + uint8_t temp[LZMA_BUFFER_SIZE]; + +} lzma_lz_decoder; + + +///////////////////////// +// Function prototypes // +///////////////////////// + +extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, + lzma_allocator *allocator, bool (*process)( + lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + bool has_safe_buffer), + lzma_vli uncompressed_size, + size_t history_size, size_t match_max_len); + +extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Deallocates the history buffer if one exists. +extern void lzma_lz_decoder_end( + lzma_lz_decoder *lz, lzma_allocator *allocator); + +////////////////////// +// Inline functions // +////////////////////// + +// Repeat a block of data from the history. Because memcpy() is faster +// than copying byte by byte in a loop, the copying process gets split +// into three cases: +// 1. distance < length +// Source and target areas overlap, thus we can't use memcpy() +// (nor memmove()) safely. +// TODO: If this is common enough, it might be worth optimizing this +// more e.g. by checking if distance > sizeof(uint8_t*) and using +// memcpy in small chunks. +// 2. distance < pos +// This is the easiest and the fastest case. The block being copied +// is a contiguous piece in the history buffer. The buffer offset +// doesn't need wrapping. +// 3. distance >= pos +// We need to wrap the position, because otherwise we would try copying +// behind the first byte of the allocated buffer. It is possible that +// the block is fragmeneted into two pieces, thus we might need to call +// memcpy() twice. +// NOTE: The function using this macro must ensure that length is positive +// and that distance is FIXME +static inline bool +lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) +{ + // Validate offset of the block to be repeated. It doesn't + // make sense to copy data behind the beginning of the stream. + // Leaving this check away would lead to a security problem, + // in which e.g. the data of the previously decoded file(s) + // would be leaked (or whatever happens to be in unused + // part of the dictionary buffer). + if (distance >= lz->pos && !lz->is_full) + return false; + + // It also doesn't make sense to copy data farer than + // the dictionary size. + if (distance >= lz->requested_size) + return false; + + // The caller must have checked these! + assert(distance <= lz->size); + assert(length > 0); + assert(length <= lz->match_max_len); + + // Copy the amount of data requested by the decoder. + if (distance < length) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. :-( + // TODO: Copying byte by byte is slow. It might be + // worth optimizing this more if this case is common. + do { + lz->dict[lz->pos] = lz_get_byte(*lz, distance); + ++lz->pos; + } while (--length > 0); + + } else if (distance < lz->pos) { + // The easiest and fastest case + memcpy(lz->dict + lz->pos, + lz->dict + lz->pos - distance - 1, + length); + lz->pos += length; + + } else { + // The bigger the dictionary, the more rare this + // case occurs. We need to "wrap" the dict, thus + // we might need two memcpy() to copy all the data. + assert(lz->is_full); + const uint32_t copy_pos = lz->pos - distance - 1 + lz->end; + uint32_t copy_size = lz->end - copy_pos; + + if (copy_size < length) { + memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + copy_size); + lz->pos += copy_size; + copy_size = length - copy_size; + memcpy(lz->dict + lz->pos, lz->dict, copy_size); + lz->pos += copy_size; + } else { + memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + length); + lz->pos += length; + } + } + + return true; +} + +#endif |