diff options
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/lz/lz_encoder.c | 780 |
1 files changed, 388 insertions, 392 deletions
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 82b9103f..d5f84826 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -3,8 +3,8 @@ /// \file lz_encoder.c /// \brief LZ in window // -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -18,496 +18,492 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "lz_encoder_private.h" +#include "lz_encoder.h" +#include "lz_encoder_hash.h" -// Hash Chains -#ifdef HAVE_HC3 -# include "hc3.h" -#endif -#ifdef HAVE_HC4 -# include "hc4.h" -#endif -// Binary Trees -#ifdef HAVE_BT2 -# include "bt2.h" -#endif -#ifdef HAVE_BT3 -# include "bt3.h" -#endif -#ifdef HAVE_BT4 -# include "bt4.h" -#endif +struct lzma_coder_s { + /// LZ-based encoder e.g. LZMA + lzma_lz_encoder lz; + /// History buffer and match finder + lzma_mf mf; -/// This is needed in two places so provide a macro. -#define get_cyclic_buffer_size(history_size) ((history_size) + 1) + /// Next coder in the chain + lzma_next_coder next; +}; -/// Calculate certain match finder properties and validate the calculated -/// values. This is as its own function, because *num_items is needed to -/// calculate memory requirements in common/memory.c. -extern bool -lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, - uint32_t history_size, uint32_t *restrict hash_mask, - uint32_t *restrict hash_size_sum, uint32_t *restrict num_items) +/// \brief Moves the data in the input window to free space for new data +/// +/// mf->buffer is a sliding input window, which keeps mf->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_mf *mf) { - uint32_t fix_hash_size; - uint32_t sons; + // Align the move to a multiple of 16 bytes. Some LZ-based encoders + // like LZMA use the lowest bits of mf->read_pos to know the + // alignment of the uncompressed data. We also get better speed + // for memmove() with aligned buffers. + assert(mf->read_pos > mf->keep_size_before); + const uint32_t move_offset + = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); - switch (match_finder) { -#ifdef HAVE_HC3 - case LZMA_MF_HC3: - fix_hash_size = LZMA_HC3_FIX_HASH_SIZE; - sons = 1; - break; -#endif -#ifdef HAVE_HC4 - case LZMA_MF_HC4: - fix_hash_size = LZMA_HC4_FIX_HASH_SIZE; - sons = 1; - break; -#endif -#ifdef HAVE_BT2 - case LZMA_MF_BT2: - fix_hash_size = LZMA_BT2_FIX_HASH_SIZE; - sons = 2; - break; -#endif -#ifdef HAVE_BT3 - case LZMA_MF_BT3: - fix_hash_size = LZMA_BT3_FIX_HASH_SIZE; - sons = 2; - break; -#endif -#ifdef HAVE_BT4 - case LZMA_MF_BT4: - fix_hash_size = LZMA_BT4_FIX_HASH_SIZE; - sons = 2; - break; -#endif - default: - return true; - } + assert(mf->write_pos > move_offset); + const size_t move_size = mf->write_pos - move_offset; - uint32_t hs; + assert(move_offset + move_size <= mf->size); -#ifdef HAVE_LZMA_BT2 - if (match_finder == LZMA_BT2) { - // NOTE: hash_mask is not used by the BT2 match finder, - // but it is initialized just in case. - hs = LZMA_BT2_HASH_SIZE; - *hash_mask = 0; - } else -#endif - { - hs = history_size - 1; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - hs |= 0xFFFF; + memmove(mf->buffer, mf->buffer + move_offset, move_size); - if (hs > (UINT32_C(1) << 24)) { - if (match_finder == LZMA_MF_HC4 - || match_finder == LZMA_MF_BT4) - hs >>= 1; - else - hs = (1 << 24) - 1; - } + mf->offset += move_offset; + mf->read_pos -= move_offset; + mf->read_limit -= move_offset; + mf->write_pos -= move_offset; + + return; +} - *hash_mask = hs; - ++hs; - } - *hash_size_sum = hs + fix_hash_size; +/// \brief Tries to fill the input window (mf->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to mf->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size, lzma_action action) +{ + assert(coder->mf.read_pos <= coder->mf.write_pos); - *num_items = *hash_size_sum - + get_cyclic_buffer_size(history_size) * sons; + // Move the sliding window if needed. + if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) + move_window(&coder->mf); - return false; -} + size_t in_used; + lzma_ret ret; + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + in_used = lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, + &coder->mf.write_pos, coder->mf.size); + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; -extern lzma_ret -lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, - bool (*process)(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size), - size_t history_size, size_t additional_buffer_before, - size_t match_max_len, size_t additional_buffer_after, - lzma_match_finder match_finder, uint32_t match_finder_cycles, - const uint8_t *preset_dictionary, - size_t preset_dictionary_size) -{ - lz->sequence = SEQ_RUN; + } else { + const size_t in_start = *in_pos; + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->mf.buffer, &coder->mf.write_pos, + coder->mf.size, action); + in_used = *in_pos - in_start; + } - /////////////// - // In Window // - /////////////// + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. + if (ret == LZMA_STREAM_END) { + assert(*in_pos == in_size); + ret = LZMA_OK; + coder->mf.action = action; + coder->mf.read_limit = coder->mf.write_pos; - // Validate history size. - if (history_size < LZMA_DICTIONARY_SIZE_MIN - || history_size > LZMA_DICTIONARY_SIZE_MAX) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; + } else if (coder->mf.write_pos > coder->mf.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->mf.read_limit = coder->mf.write_pos + - coder->mf.keep_size_after; } - assert(history_size <= MAX_VAL_FOR_NORMALIZE - 256); - assert(LZMA_DICTIONARY_SIZE_MAX <= MAX_VAL_FOR_NORMALIZE - 256); + // Restart the match finder after finished LZMA_SYNC_FLUSH. + if (coder->mf.pending > 0 + && coder->mf.read_pos < coder->mf.read_limit) { + // Match finder may update coder->pending and expects it to + // start from zero, so use a temporary variable. + const size_t pending = coder->mf.pending; + coder->mf.pending = 0; - // Calculate the size of the history buffer to allocate. - // TODO: Get a reason for magic constant of 256. - const size_t size_reserv = (history_size + additional_buffer_before - + match_max_len + additional_buffer_after) / 2 + 256; + // Rewind read_pos so that the match finder can hash + // the pending bytes. + assert(coder->mf.read_pos >= pending); + coder->mf.read_pos -= pending; - lz->keep_size_before = history_size + additional_buffer_before; - lz->keep_size_after = match_max_len + additional_buffer_after; + // Call the skip function directly instead of using + // lz_dict_skip(), since we don't want to touch + // mf->read_ahead. + coder->mf.skip(&coder->mf, pending); + } - const size_t buffer_size = lz->keep_size_before + lz->keep_size_after - + size_reserv; + return ret; +} - // Allocate history buffer if its size has changed. - if (buffer_size != lz->size) { - lzma_free(lz->buffer, allocator); - lz->buffer = lzma_alloc(buffer_size, allocator); - if (lz->buffer == NULL) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_MEM_ERROR; + +static lzma_ret +lz_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->mf.buffer if needed. + if (coder->mf.action == LZMA_RUN && coder->mf.read_pos + >= coder->mf.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + + // Encode + const lzma_ret ret = coder->lz.code(coder->lz.coder, + &coder->mf, out, out_pos, out_size); + if (ret != LZMA_OK) { + // Setting this to LZMA_RUN for cases when we are + // flushing. It doesn't matter when finishing or if + // an error occurred. + coder->mf.action = LZMA_RUN; + return ret; } } - // Allocation successful. Store the new size. - lz->size = buffer_size; + return LZMA_OK; +} + + +static bool +lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + if (lz_options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || lz_options->dictionary_size + > LZMA_DICTIONARY_SIZE_MAX + || lz_options->find_len_max + > lz_options->match_len_max) + return true; + + mf->keep_size_before = lz_options->before_size + + lz_options->dictionary_size; - // Reset in window variables. - lz->offset = 0; - lz->read_pos = 0; - lz->read_limit = 0; - lz->write_pos = 0; - lz->pending = 0; + mf->keep_size_after = lz_options->after_size + + lz_options->match_len_max; + // To avoid constant memmove()s, allocate some extra space. Since + // memmove()s become more expensive when the size of the buffer + // increases, we reserve more space when a large dictionary is + // used to make the memmove() calls rarer. + uint32_t reserve = lz_options->dictionary_size / 2; + if (reserve > (UINT32_C(1) << 30)) + reserve /= 2; - ////////////////// - // Match Finder // - ////////////////// + reserve += (lz_options->before_size + lz_options->match_len_max + + lz_options->after_size) / 2 + (UINT32_C(1) << 19); - // Validate match_finder, set function pointers and a few match - // finder specific variables. - switch (match_finder) { -#ifdef HAVE_HC3 + const uint32_t old_size = mf->size; + mf->size = mf->keep_size_before + reserve + mf->keep_size_after; + + // FIXME Integer overflows + + // Deallocate the old history buffer if it exists but has different + // size than what is needed now. + if (mf->buffer != NULL && old_size != mf->size) { + lzma_free(mf->buffer, allocator); + mf->buffer = NULL; + } + + // Match finder options + mf->match_len_max = lz_options->match_len_max; + mf->find_len_max = lz_options->find_len_max; + mf->cyclic_buffer_size = lz_options->dictionary_size + 1; + + // Validate the match finder ID and setup the function pointers. + switch (lz_options->match_finder) { +#ifdef HAVE_MF_HC3 case LZMA_MF_HC3: - lz->get_matches = &lzma_hc3_get_matches; - lz->skip = &lzma_hc3_skip; - lz->cut_value = 8 + (match_max_len >> 2); + mf->find = &lzma_mf_hc3_find; + mf->skip = &lzma_mf_hc3_skip; break; #endif -#ifdef HAVE_HC4 +#ifdef HAVE_MF_HC4 case LZMA_MF_HC4: - lz->get_matches = &lzma_hc4_get_matches; - lz->skip = &lzma_hc4_skip; - lz->cut_value = 8 + (match_max_len >> 2); + mf->find = &lzma_mf_hc4_find; + mf->skip = &lzma_mf_hc4_skip; break; #endif -#ifdef HAVE_BT2 +#ifdef HAVE_MF_BT2 case LZMA_MF_BT2: - lz->get_matches = &lzma_bt2_get_matches; - lz->skip = &lzma_bt2_skip; - lz->cut_value = 16 + (match_max_len >> 1); + mf->find = &lzma_mf_bt2_find; + mf->skip = &lzma_mf_bt2_skip; break; #endif -#ifdef HAVE_BT3 +#ifdef HAVE_MF_BT3 case LZMA_MF_BT3: - lz->get_matches = &lzma_bt3_get_matches; - lz->skip = &lzma_bt3_skip; - lz->cut_value = 16 + (match_max_len >> 1); + mf->find = &lzma_mf_bt3_find; + mf->skip = &lzma_mf_bt3_skip; break; #endif -#ifdef HAVE_BT4 +#ifdef HAVE_MF_BT4 case LZMA_MF_BT4: - lz->get_matches = &lzma_bt4_get_matches; - lz->skip = &lzma_bt4_skip; - lz->cut_value = 16 + (match_max_len >> 1); + mf->find = &lzma_mf_bt4_find; + mf->skip = &lzma_mf_bt4_skip; break; #endif + default: - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; + return true; } - // Check if we have been requested to use a non-default cut_value. - if (match_finder_cycles > 0) - lz->cut_value = match_finder_cycles; - - lz->match_max_len = match_max_len; - lz->cyclic_buffer_size = get_cyclic_buffer_size(history_size); + // Calculate the sizes of mf->hash and mf->son. + const uint32_t hash_bytes = lz_options->match_finder & 0x0F; + const bool is_bt = (lz_options->match_finder & 0x10) != 0; + uint32_t hs; - uint32_t hash_size_sum; - uint32_t num_items; - if (lzma_lz_encoder_hash_properties(match_finder, history_size, - &lz->hash_mask, &hash_size_sum, &num_items)) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; - } + if (hash_bytes == 2) { + hs = 0xFFFF; + } else { + // Round dictionary size up to the next 2^n - 1 so it can + // be used as a hash mask. + hs = lz_options->dictionary_size - 1; + hs |= hs >> 1; + hs |= hs >> 2; + hs |= hs >> 4; + hs |= hs >> 8; + hs >>= 1; + hs |= 0xFFFF; - if (num_items != lz->num_items) { -#if UINT32_MAX >= SIZE_MAX / 4 - // Check for integer overflow. (Huge dictionaries are not - // possible on 32-bit CPU.) - if (num_items > SIZE_MAX / sizeof(uint32_t)) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_MEM_ERROR; + if (hs > (UINT32_C(1) << 24)) { + if (hash_bytes == 3) + hs = (UINT32_C(1) << 24) - 1; + else + hs >>= 1; } -#endif - - const size_t size_in_bytes - = (size_t)(num_items) * sizeof(uint32_t); + } - lzma_free(lz->hash, allocator); - lz->hash = lzma_alloc(size_in_bytes, allocator); - if (lz->hash == NULL) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_MEM_ERROR; - } + mf->hash_mask = hs; + + ++hs; + if (hash_bytes > 2) + hs += HASH_2_SIZE; + if (hash_bytes > 3) + hs += HASH_3_SIZE; +/* + No match finder uses this at the moment. + if (mf->hash_bytes > 4) + hs += HASH_4_SIZE; +*/ + + const uint32_t old_count = mf->hash_size_sum + mf->sons_count; + mf->hash_size_sum = hs; + mf->sons_count = mf->cyclic_buffer_size; + if (is_bt) + mf->sons_count *= 2; + + const uint32_t new_count = mf->hash_size_sum + mf->sons_count; + + // Deallocate the old hash array if it exists and has different size + // than what is needed now. + if (mf->hash != NULL && old_count != new_count) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; + } - lz->num_items = num_items; + // Maximum number of match finder cycles + mf->loops = lz_options->match_finder_cycles; + if (mf->loops == 0) { + mf->loops = 16 + (lz_options->find_len_max / 2); + if (!is_bt) + mf->loops /= 2; } - lz->son = lz->hash + hash_size_sum; + return false; +} - // Reset the hash table to empty hash values. - { - uint32_t *restrict items = lz->hash; - for (uint32_t i = 0; i < hash_size_sum; ++i) - items[i] = EMPTY_HASH_VALUE; +static bool +lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator) +{ + // Allocate the history buffer. + if (mf->buffer == NULL) { + mf->buffer = lzma_alloc(mf->size, allocator); + if (mf->buffer == NULL) + return true; } - lz->cyclic_buffer_pos = 0; + // Use cyclic_buffer_size as initial mf->offset. This allows + // avoiding a few branches in the match finders. The downside is + // that match finder needs to be normalized more often, which may + // hurt performance with huge dictionaries. + mf->offset = mf->cyclic_buffer_size; + mf->read_pos = 0; + mf->read_ahead = 0; + mf->read_limit = 0; + mf->write_pos = 0; + mf->pending = 0; - // Because zero is used as empty hash value, make the first byte - // appear at buffer[1 - offset]. - ++lz->offset; + // Allocate match finder's hash array. + const size_t alloc_count = mf->hash_size_sum + mf->sons_count; - // If we are using a preset dictionary, read it now. - // TODO: This isn't implemented yet so return LZMA_HEADER_ERROR. - if (preset_dictionary != NULL && preset_dictionary_size > 0) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (alloc_count > SIZE_MAX / sizeof(uint32_t)) + return true; +#endif + + if (mf->hash == NULL) { + mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), + allocator); + if (mf->hash == NULL) + return true; } - // Set the process function pointer. - lz->process = process; + mf->son = mf->hash + mf->hash_size_sum; + mf->cyclic_buffer_pos = 0; + + // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we + // can use memset(). +/* + for (uint32_t i = 0; i < hash_size_sum; ++i) + mf->hash[i] = EMPTY_HASH_VALUE; +*/ + memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); + + // We don't need to initialize mf->son, but not doing that will + // make Valgrind complain in normalization (see normalize() in + // lz_encoder_mf.c). + // + // Skipping this initialization is *very* good when big dictionary is + // used but only small amount of data gets actually compressed: most + // of the mf->hash won't get actually allocated by the kernel, so + // we avoid wasting RAM and improve initialization speed a lot. + //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); + + mf->action = LZMA_RUN; - return LZMA_OK; + return false; } -extern void -lzma_lz_encoder_end(lzma_lz_encoder *lz, lzma_allocator *allocator) +extern uint64_t +lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) { - lzma_free(lz->hash, allocator); - lz->hash = NULL; - lz->num_items = 0; - - lzma_free(lz->buffer, allocator); - lz->buffer = NULL; - lz->size = 0; - - return; + // Old buffers must not exist when calling lz_encoder_prepare(). + lzma_mf mf = { + .buffer = NULL, + .hash = NULL, + }; + + // Setup the size information into mf. + if (lz_encoder_prepare(&mf, NULL, lz_options)) + return UINT64_MAX; + + // Calculate the memory usage. + return (uint64_t)(mf.hash_size_sum + mf.sons_count) + * sizeof(uint32_t) + + (uint64_t)(mf.size) + sizeof(lzma_coder); } -/// \brief Moves the data in the input window to free space for new data -/// -/// lz->buffer is a sliding input window, which keeps lz->keep_size_before -/// bytes of input history available all the time. Now and then we need to -/// "slide" the buffer to make space for the new data to the end of the -/// buffer. At the same time, data older than keep_size_before is dropped. -/// static void -move_window(lzma_lz_encoder *lz) +lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) { - // buffer[move_offset] will become buffer[0]. - assert(lz->read_pos > lz->keep_size_after); - size_t move_offset = lz->read_pos - lz->keep_size_before; - - // We need one additional byte, since move_pos() moves on 1 byte. - // TODO: Clean up? At least document more. - if (move_offset > 0) - --move_offset; - - assert(lz->write_pos > move_offset); - const size_t move_size = lz->write_pos - move_offset; + lzma_next_end(&coder->next, allocator); - assert(move_offset + move_size <= lz->size); + lzma_free(coder->mf.hash, allocator); + lzma_free(coder->mf.buffer, allocator); - memmove(lz->buffer, lz->buffer + move_offset, move_size); - - lz->offset += move_offset; - lz->read_pos -= move_offset; - lz->read_limit -= move_offset; - lz->write_pos -= move_offset; + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + lzma_free(coder, allocator); return; } -/// \brief Tries to fill the input window (lz->buffer) -/// -/// If we are the last encoder in the chain, our input data is in in[]. -/// Otherwise we call the next filter in the chain to process in[] and -/// write its output to lz->buffer. -/// -/// This function must not be called once it has returned LZMA_STREAM_END. -/// -static lzma_ret -fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size, lzma_action action) +extern lzma_ret +lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) { - assert(coder->lz.read_pos <= coder->lz.write_pos); + // Allocate and initialize the base data structure. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; - // Move the sliding window if needed. - if (coder->lz.read_pos >= coder->lz.size - coder->lz.keep_size_after) - move_window(&coder->lz); + next->code = &lz_encode; + next->end = &lz_encoder_end; - size_t in_used; - lzma_ret ret; - if (coder->next.code == NULL) { - // Not using a filter, simply memcpy() as much as possible. - in_used = bufcpy(in, in_pos, in_size, coder->lz.buffer, - &coder->lz.write_pos, coder->lz.size); + next->coder->lz.coder = NULL; + next->coder->lz.code = NULL; + next->coder->lz.end = NULL; - if (action != LZMA_RUN && *in_pos == in_size) - ret = LZMA_STREAM_END; - else - ret = LZMA_OK; + next->coder->mf.buffer = NULL; + next->coder->mf.hash = NULL; - } else { - const size_t in_start = *in_pos; - ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, - coder->lz.buffer, &coder->lz.write_pos, - coder->lz.size, action); - in_used = *in_pos - in_start; + next->coder->next = LZMA_NEXT_CODER_INIT; } - // If end of stream has been reached or flushing completed, we allow - // the encoder to process all the input (that is, read_pos is allowed - // to reach write_pos). Otherwise we keep keep_size_after bytes - // available as prebuffer. - if (ret == LZMA_STREAM_END) { - assert(*in_pos == in_size); - coder->lz.read_limit = coder->lz.write_pos; - ret = LZMA_OK; + // Initialize the LZ-based encoder. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); - switch (action) { - case LZMA_SYNC_FLUSH: - coder->lz.sequence = SEQ_FLUSH; - break; - - case LZMA_FINISH: - coder->lz.sequence = SEQ_FINISH; - break; - - default: - assert(0); - ret = LZMA_PROG_ERROR; - break; - } - - } else if (coder->lz.write_pos > coder->lz.keep_size_after) { - // This needs to be done conditionally, because if we got - // only little new input, there may be too little input - // to do any encoding yet. - coder->lz.read_limit = coder->lz.write_pos - - coder->lz.keep_size_after; - } - - // Restart the match finder after finished LZMA_SYNC_FLUSH. - if (coder->lz.pending > 0 - && coder->lz.read_pos < coder->lz.read_limit) { - // Match finder may update coder->pending and expects it to - // start from zero, so use a temporary variable. - const size_t pending = coder->lz.pending; - coder->lz.pending = 0; + // Setup the size information into next->coder->mf and deallocate + // old buffers if they have wrong size. + if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) + return LZMA_HEADER_ERROR; - // Rewind read_pos so that the match finder can hash - // the pending bytes. - assert(coder->lz.read_pos >= pending); - coder->lz.read_pos -= pending; - coder->lz.skip(&coder->lz, pending); - } + // Allocate new buffers if needed, and do the rest of + // the initialization. + if (lz_encoder_init(&next->coder->mf, allocator)) + return LZMA_MEM_ERROR; - return ret; + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); } -extern lzma_ret -lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, - uint8_t *restrict out, size_t *restrict out_pos, - size_t out_size, lzma_action action) +extern LZMA_API lzma_bool +lzma_mf_is_supported(lzma_match_finder mf) { - while (*out_pos < out_size - && (*in_pos < in_size || action != LZMA_RUN)) { - // Read more data to coder->lz.buffer if needed. - if (coder->lz.sequence == SEQ_RUN - && coder->lz.read_pos >= coder->lz.read_limit) - return_if_error(fill_window(coder, allocator, - in, in_pos, in_size, action)); + bool ret = false; - // Encode - if (coder->lz.process(coder, out, out_pos, out_size)) { - // Setting this to SEQ_RUN for cases when we are - // flushing. It doesn't matter when finishing. - coder->lz.sequence = SEQ_RUN; - return action != LZMA_RUN ? LZMA_STREAM_END : LZMA_OK; - } - } +#ifdef HAVE_MF_HC3 + if (mf == LZMA_MF_HC3) + ret = true; +#endif - return LZMA_OK; -} +#ifdef HAVE_MF_HC4 + if (mf == LZMA_MF_HC4) + ret = true; +#endif +#ifdef HAVE_MF_BT2 + if (mf == LZMA_MF_BT2) + ret = true; +#endif -/// \brief Normalizes hash values -/// -/// lzma_lz_normalize is called when lz->pos hits MAX_VAL_FOR_NORMALIZE, -/// which currently happens once every 2 GiB of input data (to be exact, -/// after the first 2 GiB it happens once every 2 GiB minus dictionary_size -/// bytes). lz->pos is incremented by lzma_lz_move_pos(). -/// -/// lz->hash contains big amount of offsets relative to lz->buffer. -/// The offsets are stored as uint32_t, which is the only reasonable -/// datatype for these offsets; uint64_t would waste far too much RAM -/// and uint16_t would limit the dictionary to 64 KiB (far too small). -/// -/// When compressing files over 2 GiB, lz->buffer needs to be moved forward -/// to avoid integer overflows. We scan the lz->hash array and fix every -/// value to match the updated lz->buffer. -extern void -lzma_lz_encoder_normalize(lzma_lz_encoder *lz) -{ - const uint32_t subvalue = lz->read_pos - lz->cyclic_buffer_size; - assert(subvalue <= INT32_MAX); - - { - const uint32_t num_items = lz->num_items; - uint32_t *restrict items = lz->hash; - - for (uint32_t i = 0; i < num_items; ++i) { - // If the distance is greater than the dictionary - // size, we can simply mark the item as empty. - if (items[i] <= subvalue) - items[i] = EMPTY_HASH_VALUE; - else - items[i] -= subvalue; - } - } +#ifdef HAVE_MF_BT3 + if (mf == LZMA_MF_BT3) + ret = true; +#endif - // Update offset to match the new locations. - lz->offset -= subvalue; +#ifdef HAVE_MF_BT4 + if (mf == LZMA_MF_BT4) + ret = true; +#endif - return; + return ret; } |