diff options
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/lz/lz_decoder.h | 308 |
1 files changed, 161 insertions, 147 deletions
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index 1acf9831..d2a77ba4 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -18,201 +18,215 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_LZ_OUT_H -#define LZMA_LZ_OUT_H +#ifndef LZMA_LZ_DECODER_H +#define LZMA_LZ_DECODER_H #include "common.h" -/// Get a byte from the history buffer. -#define lz_get_byte(lz, distance) \ - ((distance) < (lz).pos \ - ? (lz).dict[(lz).pos - (distance) - 1] \ - : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) - - -/// Test if dictionary is empty. -#define lz_is_empty(lz) \ - ((lz).pos == 0 && !(lz).is_full) - - -#define LZMA_LZ_DECODER_INIT \ - (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } - - typedef struct { - /// Function to do the actual decoding (LZMA or Inflate) - bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t size_in, - bool has_safe_buffer); + /// Pointer to the dictionary buffer. It can be an allocated buffer + /// internal to liblzma, or it can a be a buffer given by the + /// application when in single-call mode (not implemented yet). + uint8_t *buf; - /// Pointer to dictionary (history) buffer. - /// \note Not 'restrict' because can alias next_out. - uint8_t *dict; - - /// Next write goes to dict[pos]. + /// Write position in dictionary. The next byte will be written to + /// buf[pos]. size_t pos; - /// Next byte to flush is buffer[start]. - size_t start; - - /// First byte to not flush is buffer[end]. - size_t end; + /// Indicates how full the dictionary is. This is used by + /// dict_is_distance_valid() to detect corrupt files that would + /// read beyond the beginning of the dictionary. + size_t full; - /// First position to which data must not be written. + /// Write limit size_t limit; - /// True if dictionary has needed wrapping. - bool is_full; - - /// True if process() has detected End of Payload Marker. - bool eopm_detected; + /// Size of the dictionary + size_t size; - /// True if the next coder in the chain has returned LZMA_STREAM_END. - bool next_finished; +} lzma_dict; - /// True if the LZ decoder (e.g. LZMA) has detected End of Payload - /// Marker. This may become true before next_finished becomes true. - bool this_finished; - /// When pos >= must_flush_pos, we must not call process(). - size_t must_flush_pos; +typedef struct { + /// Data specific to the LZ-based decoder + lzma_coder *coder; - /// Maximum number of bytes that a single decoding loop inside - /// process() can produce data into dict. This amount is kept - /// always available at dict + pos i.e. it is safe to write a byte - /// to dict[pos + match_max_len - 1]. - size_t match_max_len; + /// Function to decode from in[] to *dict + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size); - /// Number of bytes allocated to dict. - size_t size; + void (*reset)(lzma_coder *coder, const void *options); - /// Requested size of the dictionary. This is needed because we avoid - /// using extremely tiny history buffers. - size_t requested_size; + /// Set the uncompressed size + void (*set_uncompressed)(lzma_coder *coder, + lzma_vli uncompressed_size); - /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown. - lzma_vli uncompressed_size; + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); - /// Number of bytes currently in temp[]. - size_t temp_size; +} lzma_lz_decoder; - /// Temporary buffer needed when - /// 1) we cannot make the input buffer completely empty; or - /// 2) we are not the last filter in the chain. - uint8_t temp[LZMA_BUFFER_SIZE]; -} lzma_lz_decoder; +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ \ + .coder = NULL, \ + .code = NULL, \ + .reset = NULL, \ + .set_uncompressed = NULL, \ + .end = NULL, \ + } -///////////////////////// -// Function prototypes // -///////////////////////// +extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + size_t *dict_size)); -extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, - lzma_allocator *allocator, bool (*process)( - lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t in_size, - bool has_safe_buffer), - size_t history_size, size_t match_max_len); +extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); -extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action); +extern void lzma_lz_decoder_uncompressed( + lzma_coder *coder, lzma_vli uncompressed_size); -/// Deallocates the history buffer if one exists. -extern void lzma_lz_decoder_end( - lzma_lz_decoder *lz, lzma_allocator *allocator); ////////////////////// // Inline functions // ////////////////////// -// Repeat a block of data from the history. Because memcpy() is faster -// than copying byte by byte in a loop, the copying process gets split -// into three cases: -// 1. distance < length -// Source and target areas overlap, thus we can't use memcpy() -// (nor memmove()) safely. -// TODO: If this is common enough, it might be worth optimizing this -// more e.g. by checking if distance > sizeof(uint8_t*) and using -// memcpy in small chunks. -// 2. distance < pos -// This is the easiest and the fastest case. The block being copied -// is a contiguous piece in the history buffer. The buffer offset -// doesn't need wrapping. -// 3. distance >= pos -// We need to wrap the position, because otherwise we would try copying -// behind the first byte of the allocated buffer. It is possible that -// the block is fragmeneted into two pieces, thus we might need to call -// memcpy() twice. -// NOTE: The function using this macro must ensure that length is positive -// and that distance is FIXME +/// Get a byte from the history buffer. +static inline uint8_t +dict_get(const lzma_dict *const dict, const uint32_t distance) +{ + return dict->buf[dict->pos - distance - 1 + + (distance < dict->pos ? 0 : dict->size)]; +} + + +/// Test if dictionary is empty. +static inline bool +dict_is_empty(const lzma_dict *const dict) +{ + return dict->full == 0; +} + + +/// Validate the match distance +static inline bool +dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) +{ + return dict->full >= distance; +} + + +/// Repeat *len bytes at distance. static inline bool -lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) +dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) { - // Validate offset of the block to be repeated. It doesn't - // make sense to copy data behind the beginning of the stream. - // Leaving this check away would lead to a security problem, - // in which e.g. the data of the previously decoded file(s) - // would be leaked (or whatever happens to be in unused - // part of the dictionary buffer). - if (unlikely(distance >= lz->pos && !lz->is_full)) - return false; - - // It also doesn't make sense to copy data farer than - // the dictionary size. - if (unlikely(distance >= lz->requested_size)) - return false; - - // The caller must have checked these! - assert(distance <= lz->size); - assert(length > 0); - assert(length <= lz->match_max_len); - - // Copy the amount of data requested by the decoder. - if (distance < length) { + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; + uint32_t left = MIN(dict_avail, *len); + *len -= left; + + // Repeat a block of data from the history. Because memcpy() is faster + // than copying byte by byte in a loop, the copying process gets split + // into three cases. + if (distance < left) { // Source and target areas overlap, thus we can't use - // memcpy() nor even memmove() safely. :-( - // TODO: Copying byte by byte is slow. It might be - // worth optimizing this more if this case is common. + // memcpy() nor even memmove() safely. do { - lz->dict[lz->pos] = lz_get_byte(*lz, distance); - ++lz->pos; - } while (--length > 0); + dict->buf[dict->pos] = dict_get(dict, distance); + ++dict->pos; + } while (--left > 0); - } else if (distance < lz->pos) { + } else if (distance < dict->pos) { // The easiest and fastest case - memcpy(lz->dict + lz->pos, - lz->dict + lz->pos - distance - 1, - length); - lz->pos += length; + memcpy(dict->buf + dict->pos, + dict->buf + dict->pos - distance - 1, + left); + dict->pos += left; } else { // The bigger the dictionary, the more rare this // case occurs. We need to "wrap" the dict, thus // we might need two memcpy() to copy all the data. - assert(lz->is_full); - const uint32_t copy_pos = lz->pos - distance - 1 + lz->end; - uint32_t copy_size = lz->end - copy_pos; + assert(dict->full == dict->size); + const uint32_t copy_pos + = dict->pos - distance - 1 + dict->size; + uint32_t copy_size = dict->size - copy_pos; - if (copy_size < length) { - memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + if (copy_size < left) { + memcpy(dict->buf + dict->pos, dict->buf + copy_pos, copy_size); - lz->pos += copy_size; - copy_size = length - copy_size; - memcpy(lz->dict + lz->pos, lz->dict, copy_size); - lz->pos += copy_size; + dict->pos += copy_size; + copy_size = left - copy_size; + memcpy(dict->buf + dict->pos, dict->buf, copy_size); + dict->pos += copy_size; } else { - memcpy(lz->dict + lz->pos, lz->dict + copy_pos, - length); - lz->pos += length; + memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + left); + dict->pos += left; } } - return true; + // Update how full the dictionary is. + if (dict->full < dict->pos) + dict->full = dict->pos; + + return unlikely(*len != 0); +} + + +/// Puts one byte into the dictionary. Returns true if the dictionary was +/// already full and the byte couldn't be added. +static inline bool +dict_put(lzma_dict *dict, uint8_t byte) +{ + if (unlikely(dict->pos == dict->limit)) + return true; + + dict->buf[dict->pos++] = byte; + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return false; +} + + +/// Copies arbitrary amount of data into the dictionary. +static inline void +dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + size_t *restrict left) +{ + // NOTE: If we are being given more data than the size of the + // dictionary, it could be possible to optimize the LZ decoder + // so that not everything needs to go through the dictionary. + // This shouldn't be very common thing in practice though, and + // the slowdown of one extra memcpy() isn't bad compared to how + // much time it would have taken if the data were compressed. + + if (in_size - *in_pos > *left) + in_size = *in_pos + *left; + + *left -= lzma_bufcpy(in, in_pos, in_size, + dict->buf, &dict->pos, dict->limit); + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return; +} + + +static inline void +dict_reset(lzma_dict *dict) +{ + dict->pos = 0; + dict->full = 0; + dict->buf[dict->size - 1] = '\0'; } #endif |