From 3b34851de1eaf358cf9268922fa0eeed8278d680 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Thu, 28 Aug 2008 22:53:15 +0300 Subject: Sort of garbage collection commit. :-| Many things are still broken. API has changed a lot and it will still change a little more here and there. The command line tool doesn't have all the required changes to reflect the API changes, so it's easy to get "internal error" or trigger assertions. --- src/liblzma/lzma/Makefile.am | 37 +- src/liblzma/lzma/fastpos.h | 8 +- src/liblzma/lzma/lzma2_decoder.c | 318 ++++++ src/liblzma/lzma/lzma2_decoder.h | 35 + src/liblzma/lzma/lzma2_encoder.c | 406 ++++++++ src/liblzma/lzma/lzma2_encoder.h | 34 + src/liblzma/lzma/lzma_common.h | 208 ++-- src/liblzma/lzma/lzma_decoder.c | 1306 ++++++++++++++---------- src/liblzma/lzma/lzma_decoder.h | 21 +- src/liblzma/lzma/lzma_encoder.c | 576 ++++++++--- src/liblzma/lzma/lzma_encoder.h | 38 +- src/liblzma/lzma/lzma_encoder_features.c | 2 +- src/liblzma/lzma/lzma_encoder_getoptimum.c | 925 ----------------- src/liblzma/lzma/lzma_encoder_getoptimumfast.c | 201 ---- src/liblzma/lzma/lzma_encoder_init.c | 228 ----- src/liblzma/lzma/lzma_encoder_optimum_fast.c | 193 ++++ src/liblzma/lzma/lzma_encoder_optimum_normal.c | 875 ++++++++++++++++ src/liblzma/lzma/lzma_encoder_presets.c | 52 +- src/liblzma/lzma/lzma_encoder_private.h | 174 ++-- src/liblzma/lzma/lzma_literal.c | 51 - src/liblzma/lzma/lzma_literal.h | 71 -- 21 files changed, 3425 insertions(+), 2334 deletions(-) create mode 100644 src/liblzma/lzma/lzma2_decoder.c create mode 100644 src/liblzma/lzma/lzma2_decoder.h create mode 100644 src/liblzma/lzma/lzma2_encoder.c create mode 100644 src/liblzma/lzma/lzma2_encoder.h delete mode 100644 src/liblzma/lzma/lzma_encoder_getoptimum.c delete mode 100644 src/liblzma/lzma/lzma_encoder_getoptimumfast.c delete mode 100644 src/liblzma/lzma/lzma_encoder_init.c create mode 100644 src/liblzma/lzma/lzma_encoder_optimum_fast.c create mode 100644 src/liblzma/lzma/lzma_encoder_optimum_normal.c delete mode 100644 src/liblzma/lzma/lzma_literal.c delete mode 100644 src/liblzma/lzma/lzma_literal.h (limited to 'src/liblzma/lzma') diff --git a/src/liblzma/lzma/Makefile.am b/src/liblzma/lzma/Makefile.am index 59ded214..7aeceb63 100644 --- a/src/liblzma/lzma/Makefile.am +++ b/src/liblzma/lzma/Makefile.am @@ -14,37 +14,46 @@ EXTRA_DIST = fastpos_tablegen.c -noinst_LTLIBRARIES = liblzma4.la -liblzma4_la_CPPFLAGS = \ +## Using liblzma2 since liblzma is already used for the final library. +noinst_LTLIBRARIES = liblzma2.la +liblzma2_la_CPPFLAGS = \ -I@top_srcdir@/src/liblzma/api \ -I@top_srcdir@/src/liblzma/common \ -I@top_srcdir@/src/liblzma/lz \ -I@top_srcdir@/src/liblzma/rangecoder -liblzma4_la_SOURCES = \ - lzma_common.h \ - lzma_literal.c \ - lzma_literal.h +liblzma2_la_SOURCES = lzma_common.h -if COND_MAIN_ENCODER -liblzma4_la_SOURCES += \ +if COND_ENCODER_LZMA +liblzma2_la_SOURCES += \ fastpos.h \ lzma_encoder.h \ lzma_encoder.c \ lzma_encoder_presets.c \ lzma_encoder_private.h \ - lzma_encoder_init.c \ lzma_encoder_features.c \ - lzma_encoder_getoptimum.c \ - lzma_encoder_getoptimumfast.c + lzma_encoder_optimum_fast.c \ + lzma_encoder_optimum_normal.c if !COND_SMALL -liblzma4_la_SOURCES += fastpos_table.c +liblzma2_la_SOURCES += fastpos_table.c endif endif -if COND_MAIN_DECODER -liblzma4_la_SOURCES += \ +if COND_DECODER_LZMA +liblzma2_la_SOURCES += \ lzma_decoder.c \ lzma_decoder.h endif + +if COND_ENCODER_LZMA2 +liblzma2_la_SOURCES += \ + lzma2_encoder.c \ + lzma2_encoder.h +endif + +if COND_DECODER_LZMA2 +liblzma2_la_SOURCES += \ + lzma2_decoder.c \ + lzma2_decoder.h +endif diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h index 57a94556..503be275 100644 --- a/src/liblzma/lzma/fastpos.h +++ b/src/liblzma/lzma/fastpos.h @@ -81,8 +81,6 @@ // I'm making the table version the default, because that has good speed // on all systems I have tried. The size optimized version is sometimes // slightly faster, but sometimes it is a lot slower. -// -// Finally, this code isn't a major bottle neck in LZMA encoding anyway. #ifdef HAVE_SMALL # include "bsr.h" @@ -135,11 +133,7 @@ get_pos_slot(uint32_t pos) static inline uint32_t get_pos_slot_2(uint32_t pos) { - // FIXME: This assert() cannot be enabled at the moment, because - // lzma_getoptimum.c calls this function so that this assertion - // fails; however, it ignores the result of this function when - // this assert() would have failed. - // assert(pos >= FULL_DISTANCES); + assert(pos >= FULL_DISTANCES); if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0); diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c new file mode 100644 index 00000000..b16c40ce --- /dev/null +++ b/src/liblzma/lzma/lzma2_decoder.c @@ -0,0 +1,318 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.c +/// \brief LZMA2 decoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_decoder.h" +#include "lz_decoder.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + enum sequence { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA, + SEQ_COPY, + } sequence; + + /// Sequence after the size fields have been decoded. + enum sequence next_sequence; + + /// LZMA decoder + lzma_lz_decoder lzma; + + /// Uncompressed size of LZMA chunk + size_t uncompressed_size; + + /// Compressed size of the chunk (naturally equals to uncompressed + /// size of uncompressed chunk) + size_t compressed_size; + + /// True if properties are needed. This is false before the + /// first LZMA chunk. + bool need_properties; + + /// True if dictionary reset is needed. This is false before the + /// first chunk (LZMA or uncompressed). + bool need_dictionary_reset; + + lzma_options_lzma options; +}; + + +static lzma_ret +lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // With SEQ_LZMA it is possible that no new input is needed to do + // some progress. The rest of the sequences assume that there is + // at least one byte of input. + while (*in_pos < in_size || coder->sequence == SEQ_LZMA) + switch (coder->sequence) { + case SEQ_CONTROL: + if (in[*in_pos] & 0x80) { + // Get the highest five bits of uncompressed size. + coder->uncompressed_size + = (uint32_t)(in[*in_pos] & 0x1F) << 16; + coder->sequence = SEQ_UNCOMPRESSED_1; + + // See if we need to reset dictionary or state. + switch ((in[(*in_pos)++] >> 5) & 3) { + case 3: + dict_reset(dict); + coder->need_dictionary_reset = false; + + // Fall through + + case 2: + if (coder->need_dictionary_reset) + return LZMA_DATA_ERROR; + + coder->need_properties = false; + coder->next_sequence = SEQ_PROPERTIES; + break; + + case 1: + if (coder->need_properties) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, + &coder->options); + + coder->next_sequence = SEQ_LZMA; + break; + + case 0: + if (coder->need_properties) + return LZMA_DATA_ERROR; + + coder->next_sequence = SEQ_LZMA; + break; + } + + } else { + switch (in[(*in_pos)++]) { + case 0: + // End of payload marker + return LZMA_STREAM_END; + + case 1: + // Dictionary reset + dict_reset(dict); + coder->need_dictionary_reset = false; + + // Fall through + + case 2: + if (coder->need_dictionary_reset) + return LZMA_DATA_ERROR; + + // Uncompressed chunk; we need to read total + // size first. + coder->sequence = SEQ_COMPRESSED_0; + coder->next_sequence = SEQ_COPY; + break; + + default: + return LZMA_DATA_ERROR; + } + } + + break; + + case SEQ_UNCOMPRESSED_1: + coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + coder->uncompressed_size += in[(*in_pos)++] + 1; + coder->sequence = SEQ_COMPRESSED_0; + coder->lzma.set_uncompressed(coder->lzma.coder, + coder->uncompressed_size); + break; + + case SEQ_COMPRESSED_0: + coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + coder->compressed_size += in[(*in_pos)++] + 1; + coder->sequence = coder->next_sequence; + break; + + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, &coder->options); + + coder->sequence = SEQ_LZMA; + break; + + case SEQ_LZMA: { + // Store the start offset so that we can update + // coder->compressed_size later. + const size_t in_start = *in_pos; + + // Decode from in[] to *dict. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, + dict, in, in_pos, in_size); + + // Validate and update coder->compressed_size. + const size_t in_used = *in_pos - in_start; + if (in_used > coder->compressed_size) + return LZMA_DATA_ERROR; + + coder->compressed_size -= in_used; + + // Return if we didn't finish the chunk, or an error occurred. + if (ret != LZMA_STREAM_END) + return ret; + + // The LZMA decoder must have consumed the whole chunk now. + // We don't need to worry about uncompressed size since it + // is checked by the LZMA decoder. + if (coder->compressed_size != 0) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CONTROL; + break; + } + + case SEQ_COPY: { + // Copy from input to the dictionary as is. + // FIXME Can copy too much? + dict_write(dict, in, in_pos, in_size, &coder->compressed_size); + if (coder->compressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_CONTROL; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + assert(coder->lzma.end == NULL); + lzma_free(coder->lzma.coder, allocator); + + lzma_free(coder, allocator); + + return; +} + + +static lzma_ret +lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, size_t *dict_size) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_decode; + lz->end = &lzma2_decoder_end; + + lz->coder->lzma = LZMA_LZ_DECODER_INIT; + } + + lz->coder->sequence = SEQ_CONTROL; + lz->coder->need_properties = true; + lz->coder->need_dictionary_reset = true; + + return lzma_lzma_decoder_create(&lz->coder->lzma, + allocator, options, dict_size); +} + + +extern lzma_ret +lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA2 can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma2_decoder_init); +} + + +extern uint64_t +lzma_lzma2_decoder_memusage(const void *options) +{ + const uint64_t lzma_memusage = lzma_lzma_decoder_memusage(options); + if (lzma_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lzma_memusage; +} + + +extern lzma_ret +lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_HEADER_ERROR; + + // Check that reserved bits are unset. + if (props[0] & 0xC0) + return LZMA_HEADER_ERROR; + + // Decode the dictionary size. + if (props[0] > 40) + return LZMA_HEADER_ERROR; + + lzma_options_lzma *opt = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (props[0] == 40) { + opt->dictionary_size = UINT32_MAX; + } else { + opt->dictionary_size = 2 | (props[0] & 1); + opt->dictionary_size <<= props[0] / 2 + 11; + } + + opt->preset_dictionary = NULL; + opt->preset_dictionary_size = 0; + + *options = opt; + + return LZMA_OK; +} diff --git a/src/liblzma/lzma/lzma2_decoder.h b/src/liblzma/lzma/lzma2_decoder.h new file mode 100644 index 00000000..a7504863 --- /dev/null +++ b/src/liblzma/lzma/lzma2_decoder.h @@ -0,0 +1,35 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.h +/// \brief LZMA2 decoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_DECODER_H +#define LZMA_LZMA2_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c new file mode 100644 index 00000000..b2cd176b --- /dev/null +++ b/src/liblzma/lzma/lzma2_encoder.c @@ -0,0 +1,406 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.c +/// \brief LZMA2 encoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lzma_encoder.h" +#include "fastpos.h" +#include "lzma2_encoder.h" + + +/// Maximum number of bytes of actual data per chunk (no headers) +#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16) + +/// Maximum uncompressed size of LZMA chunk (no headers) +#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21) + +/// Maximum size of LZMA2 headers +#define LZMA2_HEADER_MAX 6 + +/// Size of a header for uncompressed chunk +#define LZMA2_HEADER_UNCOMPRESSED 3 + + +struct lzma_coder_s { + enum { + SEQ_INIT, + SEQ_LZMA_ENCODE, + SEQ_LZMA_COPY, + SEQ_UNCOMPRESSED_HEADER, + SEQ_UNCOMPRESSED_COPY, + } sequence; + + /// LZMA encoder + lzma_coder *lzma; + + /// If this is not NULL, we will check new options from this + /// structure when starting a new chunk. + const lzma_options_lzma *opt_new; + + /// LZMA options currently in use. + lzma_options_lzma opt_cur; + + bool need_properties; + bool need_state_reset; + bool need_dictionary_reset; + + /// Uncompressed size of a chunk + size_t uncompressed_size; + + /// Compressed size of a chunk (excluding headers); this is also used + /// to indicate the end of buf[] in SEQ_LZMA_COPY. + size_t compressed_size; + + /// Read position in buf[] + size_t buf_pos; + + /// Buffer to hold the chunk header and LZMA compressed data + uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; +}; + + +static void +lzma2_header_lzma(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + assert(coder->compressed_size > 0); + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + + size_t pos; + + if (coder->need_properties) { + pos = 0; + + if (coder->need_dictionary_reset) + coder->buf[pos] = 0x80 + (3 << 5); + else + coder->buf[pos] = 0x80 + (2 << 5); + } else { + pos = 1; + + if (coder->need_state_reset) + coder->buf[pos] = 0x80 + (1 << 5); + else + coder->buf[pos] = 0x80; + } + + // Set the start position for copying. + coder->buf_pos = pos; + + // Uncompressed size + size_t size = coder->uncompressed_size - 1; + coder->buf[pos++] += size >> 16; + coder->buf[pos++] = (size >> 8) & 0xFF; + coder->buf[pos++] = size & 0xFF; + + // Compressed size + size = coder->compressed_size - 1; + coder->buf[pos++] = size >> 8; + coder->buf[pos++] = size & 0xFF; + + // Properties, if needed + if (coder->need_properties) + lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); + + coder->need_properties = false; + coder->need_state_reset = false; + coder->need_dictionary_reset = false; + + // The copying code uses coder->compressed_size to indicate the end + // of coder->buf[], so we need add the maximum size of the header here. + coder->compressed_size += LZMA2_HEADER_MAX; + + return; +} + + +static void +lzma2_header_uncompressed(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); + + // If this is the first chunk, we need to include dictionary + // reset indicator. + if (coder->need_dictionary_reset) + coder->buf[0] = 1; + else + coder->buf[0] = 2; + + coder->need_dictionary_reset = false; + + // "Compressed" size + coder->buf[1] = (coder->uncompressed_size - 1) >> 8; + coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; + + // Set the start position for copying. + coder->buf_pos = 0; + return; +} + + +static lzma_ret +lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INIT: + // If there's no input left and we are flushing or finishing, + // don't start a new chunk. + if (mf_unencoded(mf) == 0) { + // Write end of payload marker if finishing. + if (mf->action == LZMA_FINISH) + out[(*out_pos)++] = 0; + + return mf->action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + } + + // Look if there are new options. At least for now, + // only lc/lp/pb can be changed. + if (coder->opt_new != NULL + && (coder->opt_cur.literal_context_bits + != coder->opt_new->literal_context_bits + || coder->opt_cur.literal_pos_bits + != coder->opt_new->literal_pos_bits + || coder->opt_cur.pos_bits + != coder->opt_new->pos_bits)) { + // Options have been changed, copy them to opt_cur. + coder->opt_cur.literal_context_bits + = coder->opt_new->literal_context_bits; + coder->opt_cur.literal_pos_bits + = coder->opt_new->literal_pos_bits; + coder->opt_cur.pos_bits + = coder->opt_new->pos_bits; + + // We need to write the new options and reset + // the encoder state. + coder->need_properties = true; + coder->need_state_reset = true; + } + + if (coder->need_state_reset) + lzma_lzma_encoder_reset(coder->lzma, &coder->opt_cur); + + coder->uncompressed_size = 0; + coder->compressed_size = 0; + coder->sequence = SEQ_LZMA_ENCODE; + + // Fall through + + case SEQ_LZMA_ENCODE: { + // Calculate how much more uncompressed data this chunk + // could accept. + const uint32_t left = LZMA2_UNCOMPRESSED_MAX + - coder->uncompressed_size; + uint32_t limit; + + if (left < mf->match_len_max) { + // Must flush immediatelly since the next LZMA symbol + // could make the uncompressed size of the chunk too + // big. + limit = 0; + } else { + // Calculate maximum read_limit that is OK from point + // of view of LZMA2 chunk size. + limit = mf->read_pos - mf->read_ahead + + left - mf->match_len_max; + } + + // Save the start position so that we can update + // coder->uncompressed_size. + const uint32_t read_start = mf->read_pos - mf->read_ahead; + + // Call the LZMA encoder until the chunk is finished. + const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, + coder->buf + LZMA2_HEADER_MAX, + &coder->compressed_size, + LZMA2_CHUNK_MAX, limit); + + coder->uncompressed_size += mf->read_pos - mf->read_ahead + - read_start; + + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + + if (ret != LZMA_STREAM_END) + return LZMA_OK; + + // See if the chunk compressed. If it didn't, we encode it + // as uncompressed chunk. This saves a few bytes of space + // and makes decoding faster. + if (coder->compressed_size >= coder->uncompressed_size) { + coder->uncompressed_size += mf->read_ahead; + assert(coder->uncompressed_size + <= LZMA2_UNCOMPRESSED_MAX); + mf->read_ahead = 0; + lzma2_header_uncompressed(coder); + coder->need_state_reset = true; + coder->sequence = SEQ_UNCOMPRESSED_HEADER; + break; + } + + // The chunk did compress at least by one byte, so we store + // the chunk as LZMA. + lzma2_header_lzma(coder); + + coder->sequence = SEQ_LZMA_COPY; + } + + // Fall through + + case SEQ_LZMA_COPY: + // Copy the compressed chunk along its headers to the + // output buffer. + lzma_bufcpy(coder->buf, &coder->buf_pos, + coder->compressed_size, + out, out_pos, out_size); + if (coder->buf_pos != coder->compressed_size) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + + case SEQ_UNCOMPRESSED_HEADER: + // Copy the three-byte header to indicate uncompressed chunk. + lzma_bufcpy(coder->buf, &coder->buf_pos, + LZMA2_HEADER_UNCOMPRESSED, + out, out_pos, out_size); + if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) + return LZMA_OK; + + coder->sequence = SEQ_UNCOMPRESSED_COPY; + + // Fall through + + case SEQ_UNCOMPRESSED_COPY: + // Copy the uncompressed data as is from the dictionary + // to the output buffer. + mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); + if (coder->uncompressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + } + + return LZMA_OK; +} + + +static void +lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_encode; + lz->end = &lzma2_encoder_end; + + lz->coder->lzma = NULL; + } + + lz->coder->sequence = SEQ_INIT; + lz->coder->need_properties = true; + lz->coder->need_state_reset = false; + lz->coder->need_dictionary_reset = true; + + lz->coder->opt_cur = *(const lzma_options_lzma *)(options); + lz->coder->opt_new = lz->coder->opt_cur.persistent + ? options : NULL; + + // Initialize LZMA encoder + return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, + &lz->coder->opt_cur, lz_options)); + + // Make sure that we will always have enough history available in + // case we need to use uncompressed chunks. They are used when the + // compressed size of a chunk is not smaller than the uncompressed + // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes + // history available. + if (lz_options->before_size + lz_options->dictionary_size + < LZMA2_CHUNK_MAX) + lz_options->before_size = LZMA2_CHUNK_MAX + - lz_options->dictionary_size; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma2_encoder_init); +} + + +extern uint64_t +lzma_lzma2_encoder_memusage(const void *options) +{ + const uint64_t lzma_memusage = lzma_lzma_encoder_memusage(options); + if (lzma_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lzma_memusage; +} + + +extern lzma_ret +lzma_lzma2_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + uint32_t d = MAX(opt->dictionary_size, LZMA_DICTIONARY_SIZE_MIN); + + // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending + // on which one is the next: + --d; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + + // Get the highest two bits using the proper encoding: + if (d == UINT32_MAX) + out[0] = 40; + else + out[0] = get_pos_slot(d + 1) - 24; + + return LZMA_OK; +} diff --git a/src/liblzma/lzma/lzma2_encoder.h b/src/liblzma/lzma/lzma2_encoder.h new file mode 100644 index 00000000..3e27f680 --- /dev/null +++ b/src/liblzma/lzma/lzma2_encoder.h @@ -0,0 +1,34 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.h +/// \brief LZMA2 encoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_ENCODER_H +#define LZMA_LZMA2_ENCODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out); + +#endif diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h index f677fcce..6909969b 100644 --- a/src/liblzma/lzma/lzma_common.h +++ b/src/liblzma/lzma/lzma_common.h @@ -22,81 +22,31 @@ #define LZMA_LZMA_COMMON_H #include "common.h" -#include "lzma_literal.h" #include "range_common.h" -/////////////// -// Constants // -/////////////// - -#define REP_DISTANCES 4 - -#define POS_SLOT_BITS 6 -#define DICT_LOG_SIZE_MAX 30 -#define DIST_TABLE_SIZE_MAX (DICT_LOG_SIZE_MAX * 2) -#if (UINT32_C(1) << DICT_LOG_SIZE_MAX) != LZMA_DICTIONARY_SIZE_MAX -# error DICT_LOG_SIZE_MAX is inconsistent with LZMA_DICTIONARY_SIZE_MAX -#endif - -// 2 is for speed optimization -#define LEN_TO_POS_STATES_BITS 2 -#define LEN_TO_POS_STATES (1 << LEN_TO_POS_STATES_BITS) - -#define MATCH_MIN_LEN 2 - -#define ALIGN_BITS 4 -#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) -#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) - -#define START_POS_MODEL_INDEX 4 -#define END_POS_MODEL_INDEX 14 -#define POS_MODELS (END_POS_MODEL_INDEX - START_POS_MODEL_INDEX) - -#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) -#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) +/////////////////// +// Miscellaneous // +/////////////////// +/// Maximum number of position states. A position state is the lowest pos bits +/// number of bits of the current uncompressed offset. In some places there +/// are different sets of probabilities for different pos states. #define POS_STATES_MAX (1 << LZMA_POS_BITS_MAX) -// Length coder & Length price table encoder -#define LEN_LOW_BITS 3 -#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) -#define LEN_MID_BITS 3 -#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) -#define LEN_HIGH_BITS 8 -#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) -#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) -#define LEN_SPEC_SYMBOLS (LOW_LOW_SYMBOLS + LEN_MID_LEN_SYMBOLS) -#define MATCH_MAX_LEN (MATCH_MIN_LEN + LEN_SYMBOLS - 1) - -// Total number of probs in a Len Encoder -#define LEN_CODER_TOTAL_PROBS (LEN_HIGH_CODER + LEN_HIGH_SYMBOLS) - -// Price table size of Len Encoder -#define LEN_PRICES (LEN_SYMBOLS << LZMA_POS_BITS_MAX) - -// Special lengths used together with distance == UINT32_MAX -#define LEN_SPECIAL_EOPM MATCH_MIN_LEN -#define LEN_SPECIAL_FLUSH (LEN_SPECIAL_EOPM + 1) - - -// Optimal - Number of entries in the optimum array. -#define OPTS (1 << 12) - - -// Miscellaneous -#define INFINITY_PRICE 0x0FFFFFFF - - -//////////// -// Macros // -//////////// - -#define get_len_to_pos_state(len) \ - ((len) < LEN_TO_POS_STATES + MATCH_MIN_LEN \ - ? (len) - MATCH_MIN_LEN \ - : LEN_TO_POS_STATES - 1) +/// Validates literal_context_bits, literal_pos_bits, and pos_bits. +static inline bool +is_lclppb_valid(const lzma_options_lzma *options) +{ + return options->literal_context_bits <= LZMA_LITERAL_CONTEXT_BITS_MAX + && options->literal_pos_bits + <= LZMA_LITERAL_POS_BITS_MAX + && options->literal_context_bits + + options->literal_pos_bits + <= LZMA_LITERAL_BITS_MAX + && options->pos_bits <= LZMA_POS_BITS_MAX; +} /////////// @@ -161,4 +111,126 @@ typedef enum { #define is_literal_state(state) \ ((state) < LIT_STATES) + +///////////// +// Literal // +///////////// + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +/// +/// Match byte is used when the previous LZMA symbol was something else than +/// a literal (that is, it was some kind of match). +#define LITERAL_CODER_SIZE 0x300 + +/// Maximum number of literal coders +#define LITERAL_CODERS_MAX (1 << LZMA_LITERAL_BITS_MAX) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ + ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))]) + + +static inline void +literal_init(probability (*probs)[LITERAL_CODER_SIZE], + uint32_t literal_context_bits, uint32_t literal_pos_bits) +{ + assert(literal_context_bits + literal_pos_bits + <= LZMA_LITERAL_BITS_MAX); + + const uint32_t coders + = 1U << (literal_context_bits + literal_pos_bits); + + for (uint32_t i = 0; i < coders; ++i) + for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j) + bit_reset(probs[i][j]); + + return; +} + + +////////////////// +// Match length // +////////////////// + +// Minimum length of a match is two bytes. +#define MATCH_LEN_MIN 2 + +// Match length is encoded with 4, 5, or 10 bits. +// +// Length Bits +// 2-9 4 = Choice=0 + 3 bits +// 10-17 5 = Choice=1 + Choice2=0 + 3 bits +// 18-273 10 = Choice=1 + Choice2=1 + 8 bits +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +// Maximum length of a match is 273 which is a result of the encoding +// described above. +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + + +//////////////////// +// Match distance // +//////////////////// + +// Different set of probabilities is used for match distances that have very +// short match length: Lengths of 2, 3, and 4 bytes have a separate set of +// probabilities for each length. The matches with longer length use a shared +// set of probabilities. +#define LEN_TO_POS_STATES 4 + +// Macro to get the index of the appropriate probability array. +#define get_len_to_pos_state(len) \ + ((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \ + ? (len) - MATCH_LEN_MIN \ + : LEN_TO_POS_STATES - 1) + +// The highest two bits of a match distance (pos slot) are encoded using six +// bits. See fastpos.h for more explanation. +#define POS_SLOT_BITS 6 +#define POS_SLOTS (1 << POS_SLOT_BITS) + +// Match distances up to 127 are fully encoded using probabilities. Since +// the highest two bits (pos slot) are always encoded using six bits, the +// distances 0-3 don't need any additional bits to encode, since the pos +// slot itself is the same as the actual distance. START_POS_MODEL_INDEX +// indicates the first pos slot where at least one additional bit is needed. +#define START_POS_MODEL_INDEX 4 + +// Match distances greater than 127 are encoded in three pieces: +// - pos slot: the highest two bits +// - direct bits: 2-26 bits below the highest two bits +// - alignment bits: four lowest bits +// +// Direct bits don't use any probabilities. +// +// The pos slot value of 14 is for distances 128-191 (see the table in +// fastpos.h to understand why). +#define END_POS_MODEL_INDEX 14 + +// Seven-bit distances use the full FIXME +#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +// For match distances greater than 127, only the highest two bits and the +// lowest four bits (alignment) is encoded using probabilities. +#define ALIGN_BITS 4 +#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) + +// LZMA remembers the four most recent match distances. Reusing these distances +// tends to take less space than re-encoding the actual distance value. +#define REP_DISTANCES 4 + #endif diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index 68941021..e9d047d3 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -4,7 +4,7 @@ /// \brief LZMA decoder // // Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -18,74 +18,147 @@ // /////////////////////////////////////////////////////////////////////////////// -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - +#include "lz_decoder.h" #include "lzma_common.h" #include "lzma_decoder.h" -#include "lz_decoder.h" #include "range_decoder.h" -/// REQUIRED_IN_BUFFER_SIZE is the number of required input bytes -/// for the worst case: longest match with longest distance. -/// LZMA_IN_BUFFER_SIZE must be larger than REQUIRED_IN_BUFFER_SIZE. -/// 23 bits = 2 (match select) + 10 (len) + 6 (distance) + 4 (align) -/// + 1 (rc_normalize) -/// -/// \todo Is this correct for sure? -/// -#define REQUIRED_IN_BUFFER_SIZE \ - ((23 * (BIT_MODEL_TOTAL_BITS - MOVE_BITS + 1) + 26 + 9) / 8) +#ifdef HAVE_SMALL +// Macros for (somewhat) size-optimized code. +#define seq_4(seq) seq -// Length decoders are easiest to have as macros, because they use range -// decoder macros, which use local variables rc_range and rc_code. +#define seq_6(seq) seq -#define length_decode(target, len_decoder, pos_state) \ +#define seq_8(seq) seq + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _CHOICE2, \ + seq ## _BITTREE + +#define len_decode(target, ld, pos_state, seq) \ do { \ - if_bit_0(len_decoder.choice) { \ - update_bit_0(len_decoder.choice); \ - target = MATCH_MIN_LEN; \ - bittree_decode(target, len_decoder.low[pos_state], LEN_LOW_BITS); \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ } else { \ - update_bit_1(len_decoder.choice); \ - if_bit_0(len_decoder.choice2) { \ - update_bit_0(len_decoder.choice2); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ - bittree_decode(target, len_decoder.mid[pos_state], LEN_MID_BITS); \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ - update_bit_1(len_decoder.choice2); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ - bittree_decode(target, len_decoder.high, LEN_HIGH_BITS); \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ } \ } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ } while (0) - -#define length_decode_dummy(target, len_decoder, pos_state) \ +#else // HAVE_SMALL + +// Unrolled versions +#define seq_4(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3 + +#define seq_6(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5 + +#define seq_8(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5, \ + seq ## 6, \ + seq ## 7 + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _LOW0, \ + seq ## _LOW1, \ + seq ## _LOW2, \ + seq ## _CHOICE2, \ + seq ## _MID0, \ + seq ## _MID1, \ + seq ## _MID2, \ + seq ## _HIGH0, \ + seq ## _HIGH1, \ + seq ## _HIGH2, \ + seq ## _HIGH3, \ + seq ## _HIGH4, \ + seq ## _HIGH5, \ + seq ## _HIGH6, \ + seq ## _HIGH7 + +#define len_decode(target, ld, pos_state, seq) \ do { \ - if_bit_0(len_decoder.choice) { \ - update_bit_0_dummy(); \ - target = MATCH_MIN_LEN; \ - bittree_decode_dummy(target, \ - len_decoder.low[pos_state], LEN_LOW_BITS); \ + symbol = 1; \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ } else { \ - update_bit_1_dummy(); \ - if_bit_0(len_decoder.choice2) { \ - update_bit_0_dummy(); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ - bittree_decode_dummy(target, len_decoder.mid[pos_state], \ - LEN_MID_BITS); \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID0); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID1); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID2); \ + target = symbol - LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ - update_bit_1_dummy(); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ - bittree_decode_dummy(target, len_decoder.high, LEN_HIGH_BITS); \ + rc_update_1(ld.choice2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ + target = symbol - LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ } \ } \ } while (0) +#endif // HAVE_SMALL + +/// Length decoder probabilities; see comments in lzma_common.h. typedef struct { probability choice; probability choice2; @@ -96,26 +169,12 @@ typedef struct { struct lzma_coder_s { - /// Data of the next coder, if any. - lzma_next_coder next; - - /// Sliding output window a.k.a. dictionary a.k.a. history buffer. - lzma_lz_decoder lz; - - // Range coder - lzma_range_decoder rc; - - // State - lzma_lzma_state state; - uint32_t rep0; ///< Distance of the latest match - uint32_t rep1; ///< Distance of second latest match - uint32_t rep2; ///< Distance of third latest match - uint32_t rep3; ///< Distance of fourth latest match - uint32_t pos_bits; - uint32_t pos_mask; - uint32_t now_pos; // Lowest 32-bits are enough here. + /////////////////// + // Probabilities // + /////////////////// - lzma_literal_coder literal_coder; + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; /// If 1, it's a match. Otherwise it's a single 8-bit literal. probability is_match[STATES][POS_STATES_MAX]; @@ -138,178 +197,107 @@ struct lzma_coder_s { /// the length is decoded from rep_len_decoder. probability is_rep0_long[STATES][POS_STATES_MAX]; - probability pos_slot_decoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; - probability pos_decoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; - probability pos_align_decoder[1 << ALIGN_BITS]; - - /// Length of a match - lzma_length_decoder match_len_decoder; - - /// Length of a repeated match. - lzma_length_decoder rep_len_decoder; - - /// True when we have produced at least one byte of output since the - /// beginning of the stream or the latest flush marker. - bool has_produced_output; -}; - - -/// \brief Check if the next iteration of the decoder loop can be run. -/// -/// \note There must always be REQUIRED_IN_BUFFER_SIZE bytes -/// readable space! -/// -static bool lzma_attribute((pure)) -decode_dummy(const lzma_coder *restrict coder, - const uint8_t *restrict in, size_t in_pos_local, - const size_t in_size, lzma_range_decoder rc, - uint32_t state, uint32_t rep0, const uint32_t now_pos) -{ - uint32_t rc_bound; - - do { - const uint32_t pos_state = now_pos & coder->pos_mask; - - if_bit_0(coder->is_match[state][pos_state]) { - // It's a literal i.e. a single 8-bit byte. - - update_bit_0_dummy(); - - const probability *subcoder = literal_get_subcoder( - coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0)); - uint32_t symbol = 1; - - if (is_literal_state(state)) { - // Decode literal without match byte. - do { - if_bit_0(subcoder[symbol]) { - update_bit_0_dummy(); - symbol <<= 1; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - } - } while (symbol < 0x100); - - } else { - // Decode literal with match byte. - uint32_t match_byte = lz_get_byte(coder->lz, rep0); - uint32_t subcoder_offset = 0x100; - - do { - match_byte <<= 1; - const uint32_t match_bit = match_byte & subcoder_offset; - const uint32_t subcoder_index - = subcoder_offset + match_bit + symbol; - - if_bit_0(subcoder[subcoder_index]) { - update_bit_0_dummy(); - symbol <<= 1; - subcoder_offset &= ~match_bit; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - subcoder_offset &= match_bit; - } - } while (symbol < 0x100); - } - - break; - } - - update_bit_1_dummy(); - uint32_t len; - - if_bit_0(coder->is_rep[state]) { - update_bit_0_dummy(); - length_decode_dummy(len, coder->match_len_decoder, pos_state); - - const uint32_t len_to_pos_state = get_len_to_pos_state(len); - uint32_t pos_slot = 0; - bittree_decode_dummy(pos_slot, - coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS); - assert(pos_slot <= 63); - - if (pos_slot >= START_POS_MODEL_INDEX) { - uint32_t direct_bits = (pos_slot >> 1) - 1; - assert(direct_bits >= 1 && direct_bits <= 31); - rep0 = 2 | (pos_slot & 1); - - if (pos_slot < END_POS_MODEL_INDEX) { - assert(direct_bits <= 5); - rep0 <<= direct_bits; - assert(rep0 <= 96); - // -1 is fine, because bittree_reverse_decode() - // starts from table index [1] (not [0]). - assert((int32_t)(rep0 - pos_slot - 1) >= -1); - assert((int32_t)(rep0 - pos_slot - 1) <= 82); - // We add the result to rep0, so rep0 - // must not be part of second argument - // of the macro. - const int32_t offset = rep0 - pos_slot - 1; - bittree_reverse_decode_dummy(coder->pos_decoders + offset, - direct_bits); - } else { - assert(pos_slot >= 14); - assert(direct_bits >= 6); - direct_bits -= ALIGN_BITS; - assert(direct_bits >= 2); - rc_decode_direct_dummy(direct_bits); - - bittree_reverse_decode_dummy(coder->pos_align_decoder, - ALIGN_BITS); - } - } + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; - } else { - update_bit_1_dummy(); + /// Probility trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; - if_bit_0(coder->is_rep0[state]) { - update_bit_0_dummy(); + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_TABLE_SIZE]; - if_bit_0(coder->is_rep0_long[state][pos_state]) { - update_bit_0_dummy(); - break; - } else { - update_bit_1_dummy(); - } + /// Length of a normal match + lzma_length_decoder match_len_decoder; - } else { - update_bit_1_dummy(); + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; - if_bit_0(coder->is_rep1[state]) { - update_bit_0_dummy(); - } else { - update_bit_1_dummy(); + /////////////////// + // Decoder state // + /////////////////// - if_bit_0(coder->is_rep2[state]) { - update_bit_0_dummy(); - } else { - update_bit_1_dummy(); - } - } - } + // Range coder + lzma_range_decoder rc; - length_decode_dummy(len, coder->rep_len_decoder, pos_state); - } - } while (0); + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; - rc_normalize(); + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match - return in_pos_local <= in_size; -} + uint32_t pos_mask; // (1U << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_VALUE_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + seq_8(SEQ_LITERAL), + seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + seq_len(SEQ_MATCH_LEN), + seq_6(SEQ_POS_SLOT), + SEQ_POS_MODEL, + SEQ_DIRECT, + seq_4(SEQ_ALIGN), + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + seq_len(SEQ_REP_LEN), + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +}; -static bool -decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t in_size, bool has_safe_buffer) +static lzma_ret +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) { //////////////////// // Initialization // //////////////////// if (!rc_read_init(&coder->rc, in, in_pos, in_size)) - return false; + return LZMA_OK; /////////////// // Variables // @@ -318,8 +306,12 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // Making local copies of often-used variables improves both // speed and readability. + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + // Range decoder - rc_to_local(coder->rc); + rc_to_local(coder->rc, *in_pos); // State uint32_t state = coder->state; @@ -328,87 +320,168 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, uint32_t rep2 = coder->rep2; uint32_t rep3 = coder->rep3; - // Misc - uint32_t now_pos = coder->now_pos; - bool has_produced_output = coder->has_produced_output; - - // Variables derived from decoder settings const uint32_t pos_mask = coder->pos_mask; - size_t in_pos_local = *in_pos; // Local copy - size_t in_limit; - if (in_size <= REQUIRED_IN_BUFFER_SIZE) - in_limit = 0; - else - in_limit = in_size - REQUIRED_IN_BUFFER_SIZE; - - - while (coder->lz.pos < coder->lz.limit - && (in_pos_local < in_limit || (has_safe_buffer - && decode_dummy(coder, in, in_pos_local, in_size, - rc, state, rep0, now_pos)))) { - - ///////////////////// - // Actual decoding // - ///////////////////// - - const uint32_t pos_state = now_pos & pos_mask; + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // If uncompressed size is known, there must be no end of payload + // marker. + const bool no_eopm = coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN; + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + + // The main decoder loop. The "switch" is used to restart the decoder at + // correct location. Once restarted, the "switch" is no longer used. + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(no_eopm && dict.pos == dict.limit)) + break; - if_bit_0(coder->is_match[state][pos_state]) { - update_bit_0(coder->is_match[state][pos_state]); + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + rc_update_0(coder->is_match[state][pos_state]); // It's a literal i.e. a single 8-bit byte. - probability *subcoder = literal_get_subcoder(coder->literal_coder, - now_pos, lz_get_byte(coder->lz, 0)); - uint32_t symbol = 1; + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_pos_mask, + dict.pos, dict_get(&dict, 0)); + symbol = 1; if (is_literal_state(state)) { // Decode literal without match byte. +#ifdef HAVE_SMALL + case SEQ_LITERAL: do { - if_bit_0(subcoder[symbol]) { - update_bit_0(subcoder[symbol]); - symbol <<= 1; - } else { - update_bit_1(subcoder[symbol]); - symbol = (symbol << 1) | 1; - } - } while (symbol < 0x100); - + rc_bit(probs[symbol], , , SEQ_LITERAL); + } while (symbol < (1 << 8)); +#else + rc_bit_case(probs[symbol], , , SEQ_LITERAL0); + rc_bit_case(probs[symbol], , , SEQ_LITERAL1); + rc_bit_case(probs[symbol], , , SEQ_LITERAL2); + rc_bit_case(probs[symbol], , , SEQ_LITERAL3); + rc_bit_case(probs[symbol], , , SEQ_LITERAL4); + rc_bit_case(probs[symbol], , , SEQ_LITERAL5); + rc_bit_case(probs[symbol], , , SEQ_LITERAL6); + rc_bit_case(probs[symbol], , , SEQ_LITERAL7); +#endif } else { // Decode literal with match byte. // - // The usage of subcoder_offset allows omitting some - // branches, which should give tiny speed improvement on - // some CPUs. subcoder_offset gets set to zero if match_bit - // didn't match. - uint32_t match_byte = lz_get_byte(coder->lz, rep0); - uint32_t subcoder_offset = 0x100; - + // We store the byte we compare against + // ("match byte") to "len" to minimize the + // number of variables we need to store + // between decoder calls. + len = dict_get(&dict, rep0) << 1; + + // The usage of "offset" allows omitting some + // branches, which should give tiny speed + // improvement on some CPUs. "offset" gets + // set to zero if match_bit didn't match. + offset = 0x100; + +#ifdef HAVE_SMALL + case SEQ_LITERAL_MATCHED: do { - match_byte <<= 1; - const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t match_bit + = len & offset; const uint32_t subcoder_index - = subcoder_offset + match_bit + symbol; + = offset + match_bit + + symbol; + + rc_bit(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); +#else + // Unroll the loop. + uint32_t match_bit; + uint32_t subcoder_index; + +# define d(seq) \ + case seq: \ + match_bit = len & offset; \ + subcoder_index = offset + match_bit + symbol; \ + rc_bit(probs[subcoder_index], \ + offset &= ~match_bit, \ + offset &= match_bit, \ + seq) + + d(SEQ_LITERAL_MATCHED0); + len <<= 1; + d(SEQ_LITERAL_MATCHED1); + len <<= 1; + d(SEQ_LITERAL_MATCHED2); + len <<= 1; + d(SEQ_LITERAL_MATCHED3); + len <<= 1; + d(SEQ_LITERAL_MATCHED4); + len <<= 1; + d(SEQ_LITERAL_MATCHED5); + len <<= 1; + d(SEQ_LITERAL_MATCHED6); + len <<= 1; + d(SEQ_LITERAL_MATCHED7); +# undef d +#endif + } - if_bit_0(subcoder[subcoder_index]) { - update_bit_0(subcoder[subcoder_index]); - symbol <<= 1; - subcoder_offset &= ~match_bit; - } else { - update_bit_1(subcoder[subcoder_index]); - symbol = (symbol << 1) | 1; - subcoder_offset &= match_bit; - } - } while (symbol < 0x100); + //update_literal(state); + // Use a lookup table to update to literal state, + // since compared to other state updates, this would + // need two branches. + static const lzma_lzma_state next_state[] = { + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT + }; + state = next_state[state]; + + case SEQ_LITERAL_WRITE: + if (unlikely(dict_put(&dict, symbol))) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; } - // Put the decoded byte to the dictionary, update the - // decoder state, and start a new decoding loop. - coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol); - ++now_pos; - update_literal(state); - has_produced_output = true; continue; } @@ -416,115 +489,196 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // (distance and length) which will be repeated from our // output history. - update_bit_1(coder->is_match[state][pos_state]); - uint32_t len; - - if_bit_0(coder->is_rep[state]) { - update_bit_0(coder->is_rep[state]); + rc_update_1(coder->is_match[state][pos_state]); + case SEQ_IS_REP: + rc_if_0(coder->is_rep[state], SEQ_IS_REP) { // Not a repeated match - // - // We will decode a new distance and store - // the value to distance. - - // Decode the length of the match. - length_decode(len, coder->match_len_decoder, pos_state); - + rc_update_0(coder->is_rep[state]); update_match(state); - const uint32_t len_to_pos_state = get_len_to_pos_state(len); - uint32_t pos_slot = 0; - bittree_decode(pos_slot, - coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS); - assert(pos_slot <= 63); - - if (pos_slot >= START_POS_MODEL_INDEX) { - uint32_t direct_bits = (pos_slot >> 1) - 1; - assert(direct_bits >= 1 && direct_bits <= 30); - uint32_t distance = 2 | (pos_slot & 1); - - if (pos_slot < END_POS_MODEL_INDEX) { - assert(direct_bits <= 5); - distance <<= direct_bits; - assert(distance <= 96); - // -1 is fine, because - // bittree_reverse_decode() - // starts from table index [1] - // (not [0]). - assert((int32_t)(distance - pos_slot - 1) >= -1); - assert((int32_t)(distance - pos_slot - 1) <= 82); - // We add the result to distance, so distance - // must not be part of second argument - // of the macro. - const int32_t offset = distance - pos_slot - 1; - bittree_reverse_decode(distance, coder->pos_decoders + offset, - direct_bits); + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + // Prepare to decode the highest two bits of the + // match distance. + probs = coder->pos_slot[get_len_to_pos_state(len)]; + symbol = 1; + +#ifdef HAVE_SMALL + case SEQ_POS_SLOT: + do { + rc_bit(probs[symbol], , , SEQ_POS_SLOT); + } while (symbol < POS_SLOTS); +#else + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5); +#endif + // Get rid of the highest bit that was needed for + // indexing of the probability array. + symbol -= POS_SLOTS; + assert(symbol <= 63); + + if (symbol < START_POS_MODEL_INDEX) { + // Match distances [0, 3] have only two bits. + rep0 = symbol; + } else { + // Decode the lowest [1, 29] bits of + // the match distance. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < END_POS_MODEL_INDEX) { + // Prepare to decode the low bits for + // a distance of [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_POS_MODEL: +#ifdef HAVE_SMALL + do { + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } while (++offset < limit); +#else + switch (limit) { + case 5: + assert(offset == 0); + rc_bit(probs[symbol], , + rep0 += 1, + SEQ_POS_MODEL); + ++offset; + --limit; + case 4: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 3: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 2: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 1: + // We need "symbol" only for + // indexing the probability + // array, thus we can use + // rc_bit_last() here to omit + // the unneeded updating of + // "symbol". + rc_bit_last(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } +#endif } else { - assert(pos_slot >= 14); - assert(direct_bits >= 6); - direct_bits -= ALIGN_BITS; - assert(direct_bits >= 2); - rc_decode_direct(distance, direct_bits); - distance <<= ALIGN_BITS; - - bittree_reverse_decode(distance, coder->pos_align_decoder, - ALIGN_BITS); - - if (distance == UINT32_MAX) { - if (len == LEN_SPECIAL_EOPM) { - // End of Payload Marker found. - coder->lz.eopm_detected = true; - break; - - } else if (len == LEN_SPECIAL_FLUSH) { - // Flush marker detected. We must have produced - // at least one byte of output since the previous - // flush marker or the beginning of the stream. - // This is to prevent hanging the decoder with - // malicious input files. - if (!has_produced_output) - return true; - - has_produced_output = false; - - // We know that we have enough input to call - // this macro, because it is tested at the - // end of decode_dummy(). - rc_normalize(); - - rc_reset(rc); - - // If we don't have enough input here, we jump - // out of the loop. Note that while there is a - // useless call to rc_normalize(), it does nothing - // since we have just reset the range decoder. - if (!rc_read_init(&rc, in, &in_pos_local, in_size)) - break; - - continue; - - } else { - return true; + // The distace is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + // Not worth manual unrolling + do { + rc_direct(rep0, SEQ_DIRECT); + } while (--limit > 0); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + symbol = 1; +#ifdef HAVE_SMALL + offset = 0; + case SEQ_ALIGN: + do { + rc_bit(coder->pos_align[ + symbol], , + rep0 += 1 << offset, + SEQ_ALIGN); + } while (++offset < ALIGN_BITS); +#else + case SEQ_ALIGN0: + rc_bit(coder->pos_align[symbol], , + rep0 += 1, SEQ_ALIGN0); + case SEQ_ALIGN1: + rc_bit(coder->pos_align[symbol], , + rep0 += 2, SEQ_ALIGN1); + case SEQ_ALIGN2: + rc_bit(coder->pos_align[symbol], , + rep0 += 4, SEQ_ALIGN2); + case SEQ_ALIGN3: + // Like in SEQ_POS_MODEL, we don't + // need "symbol" for anything else + // than indexing the probability array. + rc_bit_last(coder->pos_align[symbol], , + rep0 += 8, SEQ_ALIGN3); +#endif + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It must not be + // present if uncompressed + // size is known. + if (coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + ret = LZMA_DATA_ERROR; + goto out; } + + case SEQ_EOPM: + // TODO Comment + rc_normalize(SEQ_EOPM); + ret = LZMA_STREAM_END; + goto out; } } + } - // The latest three match distances are kept in - // memory in case there are repeated matches. - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance; - - } else { - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = pos_slot; + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; } } else { - update_bit_1(coder->is_rep[state]); + rc_update_1(coder->is_rep[state]); // Repeated match // @@ -532,242 +686,318 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // earlier. The latest four match distances are // available as rep0, rep1, rep2 and rep3. We will // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } - if_bit_0(coder->is_rep0[state]) { - update_bit_0(coder->is_rep0[state]); - + case SEQ_IS_REP0: + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); // The distance is rep0. - if_bit_0(coder->is_rep0_long[state][pos_state]) { - update_bit_0(coder->is_rep0_long[state][pos_state]); + case SEQ_IS_REP0_LONG: + rc_if_0(coder->is_rep0_long[state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); update_short_rep(state); - // Repeat exactly one byte and start a new decoding loop. - // Note that rep0 is known to have a safe value, thus we - // don't need to check if we are wrapping the dictionary - // when it isn't full yet. - if (unlikely(lz_is_empty(coder->lz))) - return true; - - coder->lz.dict[coder->lz.pos] - = lz_get_byte(coder->lz, rep0); - ++coder->lz.pos; - ++now_pos; - has_produced_output = true; - continue; - - } else { - update_bit_1(coder->is_rep0_long[state][pos_state]); + case SEQ_SHORTREP: + if (unlikely(dict_put(&dict, dict_get( + &dict, rep0)))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } - // Repeating more than one byte at - // distance of rep0. + continue; } + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + } else { - update_bit_1(coder->is_rep0[state]); + rc_update_1(coder->is_rep0[state]); + case SEQ_IS_REP1: // The distance is rep1, rep2 or rep3. Once // we find out which one of these three, it // is stored to rep0 and rep1, rep2 and rep3 // are updated accordingly. + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); - uint32_t distance; + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; - if_bit_0(coder->is_rep1[state]) { - update_bit_0(coder->is_rep1[state]); - distance = rep1; } else { - update_bit_1(coder->is_rep1[state]); + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; - if_bit_0(coder->is_rep2[state]) { - update_bit_0(coder->is_rep2[state]); - distance = rep2; } else { - update_bit_1(coder->is_rep2[state]); - distance = rep3; + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; } - - rep2 = rep1; } - - rep1 = rep0; - rep0 = distance; } update_long_rep(state); // Decode the length of the repeated match. - length_decode(len, coder->rep_len_decoder, pos_state); + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); } - ///////////////////////////////// // Repeat from history buffer. // ///////////////////////////////// // The length is always between these limits. There is no way // to trigger the algorithm to set len outside this range. - assert(len >= MATCH_MIN_LEN); - assert(len <= MATCH_MAX_LEN); - - now_pos += len; - has_produced_output = true; + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + case SEQ_COPY: // Repeat len bytes from distance of rep0. - if (!lzma_lz_out_repeat(&coder->lz, rep0, len)) - return true; + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } } - rc_normalize(); + rc_normalize(SEQ_NORMALIZE); + coder->sequence = SEQ_IS_MATCH; +out: + // Save state - ///////////////////////////////// - // Update the *data structure. // - ///////////////////////////////// + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; - // Range decoder - rc_from_local(coder->rc); + rc_from_local(coder->rc, *in_pos); - // State coder->state = state; coder->rep0 = rep0; coder->rep1 = rep1; coder->rep2 = rep2; coder->rep3 = rep3; - // Misc - coder->now_pos = now_pos; - coder->has_produced_output = has_produced_output; - *in_pos = in_pos_local; + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // Since there cannot be end of payload marker if the + // uncompressed size was known, we check here if we + // finished decoding. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && coder->sequence != SEQ_NORMALIZE) + ret = coder->sequence == SEQ_IS_MATCH + ? LZMA_STREAM_END : LZMA_DATA_ERROR; + } + + // We can do an additional check in the range decoder to catch some + // corrupted files. + if (ret == LZMA_STREAM_END) { + if (!rc_is_finished(coder->rc)) + ret = LZMA_DATA_ERROR; - return false; + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + } + + return ret; } + static void -lzma_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) { - lzma_next_coder_end(&coder->next, allocator); - lzma_lz_decoder_end(&coder->lz, allocator); - lzma_free(coder, allocator); - return; + coder->uncompressed_size = uncompressed_size; } - -extern lzma_ret -lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) +/* +extern void +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) { - // LZMA can only be the last filter in the chain. - assert(filters[1].init == NULL); - - // Validate pos_bits. Other options are validated by the - // respective initialization functions. - const lzma_options_lzma *options = filters[0].options; - if (options->pos_bits > LZMA_POS_BITS_MAX) - return LZMA_HEADER_ERROR; + // This is hack. + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size; +} +*/ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; +static void +lzma_decoder_reset(lzma_coder *coder, const void *opt) +{ + const lzma_options_lzma *options = opt; - next->code = &lzma_lz_decode; - next->end = &lzma_decoder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->lz = LZMA_LZ_DECODER_INIT; - } + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + // FIXME? - // Store the pos_bits and calculate pos_mask. - next->coder->pos_bits = options->pos_bits; - next->coder->pos_mask = (1U << next->coder->pos_bits) - 1; + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pos_bits) - 1; // Initialize the literal decoder. - return_if_error(lzma_literal_init(&next->coder->literal_coder, - options->literal_context_bits, - options->literal_pos_bits)); + literal_init(coder->literal, options->literal_context_bits, + options->literal_pos_bits); - // Allocate and initialize the LZ decoder. - return_if_error(lzma_lz_decoder_reset(&next->coder->lz, allocator, - &decode_real, options->dictionary_size, - MATCH_MAX_LEN)); + coder->literal_context_bits = options->literal_context_bits; + coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1; // State - next->coder->state = 0; - next->coder->rep0 = 0; - next->coder->rep1 = 0; - next->coder->rep2 = 0; - next->coder->rep3 = 0; - next->coder->pos_bits = options->pos_bits; - next->coder->pos_mask = (1 << next->coder->pos_bits) - 1; - next->coder->now_pos = 0; + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1 << options->pos_bits) - 1; // Range decoder - rc_reset(next->coder->rc); + rc_reset(coder->rc); // Bit and bittree decoders for (uint32_t i = 0; i < STATES; ++i) { - for (uint32_t j = 0; j <= next->coder->pos_mask; ++j) { - bit_reset(next->coder->is_match[i][j]); - bit_reset(next->coder->is_rep0_long[i][j]); + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); } - bit_reset(next->coder->is_rep[i]); - bit_reset(next->coder->is_rep0[i]); - bit_reset(next->coder->is_rep1[i]); - bit_reset(next->coder->is_rep2[i]); + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); } for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) - bittree_reset(next->coder->pos_slot_decoder[i], POS_SLOT_BITS); + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) - bit_reset(next->coder->pos_decoders[i]); + bit_reset(coder->pos_special[i]); - bittree_reset(next->coder->pos_align_decoder, ALIGN_BITS); + bittree_reset(coder->pos_align, ALIGN_BITS); // Len decoders (also bit/bittree) - const uint32_t num_pos_states = 1 << next->coder->pos_bits; - bit_reset(next->coder->match_len_decoder.choice); - bit_reset(next->coder->match_len_decoder.choice2); - bit_reset(next->coder->rep_len_decoder.choice); - bit_reset(next->coder->rep_len_decoder.choice2); + const uint32_t num_pos_states = 1 << options->pos_bits; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { - bittree_reset(next->coder->match_len_decoder.low[pos_state], + bittree_reset(coder->match_len_decoder.low[pos_state], LEN_LOW_BITS); - bittree_reset(next->coder->match_len_decoder.mid[pos_state], + bittree_reset(coder->match_len_decoder.mid[pos_state], LEN_MID_BITS); - bittree_reset(next->coder->rep_len_decoder.low[pos_state], + bittree_reset(coder->rep_len_decoder.low[pos_state], LEN_LOW_BITS); - bittree_reset(next->coder->rep_len_decoder.mid[pos_state], + bittree_reset(coder->rep_len_decoder.mid[pos_state], LEN_MID_BITS); } - bittree_reset(next->coder->match_len_decoder.high, LEN_HIGH_BITS); - bittree_reset(next->coder->rep_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, size_t *dict_size) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } - next->coder->has_produced_output = false; + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + const lzma_options_lzma *options = opt; + *dict_size = options->dictionary_size; return LZMA_OK; } -extern void -lzma_lzma_decoder_uncompressed_size( - lzma_next_coder *next, lzma_vli uncompressed_size) +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, size_t *dict_size) { - next->coder->lz.uncompressed_size = uncompressed_size; - return; + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, dict_size)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_VALUE_UNKNOWN); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); } extern bool -lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) { if (byte > (4 * 5 + 4) * 9 + 8) return true; @@ -781,3 +1011,49 @@ lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) return options->literal_context_bits + options->literal_pos_bits > LZMA_LITERAL_BITS_MAX; } + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + const lzma_options_lzma *const opt = options; + const uint64_t lz_memusage + = lzma_lz_decoder_memusage(opt->dictionary_size); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lz_memusage; +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_HEADER_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dictionary_size = integer_read_32(props + 1); + + opt->preset_dictionary = NULL; + opt->preset_dictionary_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_HEADER_ERROR; +} diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h index 9d57c7e5..3792f452 100644 --- a/src/liblzma/lzma/lzma_decoder.h +++ b/src/liblzma/lzma/lzma_decoder.h @@ -28,16 +28,27 @@ extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters); -/// Set known uncompressed size. This is a hack needed to support -/// LZMA_Alone files that don't have EOPM. -extern void lzma_lzma_decoder_uncompressed_size( - lzma_next_coder *next, lzma_vli uncompressed_size); +extern uint64_t lzma_lzma_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + /// \brief Decodes the LZMA Properties byte (lc/lp/pb) /// /// \return true if error occorred, false on success /// -extern bool lzma_lzma_decode_properties( +extern bool lzma_lzma_lclppb_decode( lzma_options_lzma *options, uint8_t byte); + +#ifdef LZMA_LZ_DECODER_H +/// Allocate and setup function pointers only. This is used by LZMA1 and +/// LZMA2 decoders. +extern lzma_ret lzma_lzma_decoder_create( + lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, size_t *dict_size); +#endif + #endif diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c index afb1d5ed..a84801e7 100644 --- a/src/liblzma/lzma/lzma_encoder.c +++ b/src/liblzma/lzma/lzma_encoder.c @@ -30,40 +30,33 @@ static inline void literal_matched(lzma_range_encoder *rc, probability *subcoder, uint32_t match_byte, uint32_t symbol) { - uint32_t context = 1; - uint32_t bit_count = 8; + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; do { - uint32_t bit = (symbol >> --bit_count) & 1; - const uint32_t match_bit = (match_byte >> bit_count) & 1; - rc_bit(rc, &subcoder[(0x100 << match_bit) + context], bit); - context = (context << 1) | bit; - - if (match_bit != bit) { - // The bit from the literal being encoded and the bit - // from the previous match differ. Finish encoding - // as a normal literal. - while (bit_count != 0) { - bit = (symbol >> --bit_count) & 1; - rc_bit(rc, &subcoder[context], bit); - context = (context << 1) | bit; - } + match_byte <<= 1; + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + rc_bit(rc, &subcoder[subcoder_index], bit); - break; - } + symbol <<= 1; + offset &= ~(match_byte ^ symbol); - } while (bit_count != 0); + } while (symbol < (UINT32_C(1) << 16)); } static inline void -literal(lzma_coder *coder) +literal(lzma_coder *coder, lzma_mf *mf, uint32_t position) { // Locate the literal byte to be encoded and the subcoder. - const uint8_t cur_byte = coder->lz.buffer[ - coder->lz.read_pos - coder->additional_offset]; - probability *subcoder = literal_get_subcoder(coder->literal_coder, - coder->now_pos, coder->previous_byte); + const uint8_t cur_byte = mf->buffer[ + mf->read_pos - mf->read_ahead]; + probability *subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); if (is_literal_state(coder->state)) { // Previous LZMA-symbol was a literal. Encode a normal @@ -73,14 +66,13 @@ literal(lzma_coder *coder) // Previous LZMA-symbol was a match. Use the last byte of // the match as a "match byte". That is, compare the bits // of the current literal and the match byte. - const uint8_t match_byte = coder->lz.buffer[ - coder->lz.read_pos - coder->reps[0] - 1 - - coder->additional_offset]; + const uint8_t match_byte = mf->buffer[ + mf->read_pos - coder->reps[0] - 1 + - mf->read_ahead]; literal_matched(&coder->rc, subcoder, match_byte, cur_byte); } update_literal(coder->state); - coder->previous_byte = cur_byte; } @@ -88,12 +80,41 @@ literal(lzma_coder *coder) // Match length // ////////////////// +static void +length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) +{ + const uint32_t table_size = lc->table_size; + lc->counters[pos_state] = table_size; + + const uint32_t a0 = rc_bit_0_price(lc->choice); + const uint32_t a1 = rc_bit_1_price(lc->choice); + const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); + const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); + uint32_t *const prices = lc->prices[pos_state]; + + uint32_t i; + for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) + prices[i] = a0 + rc_bittree_price(lc->low[pos_state], + LEN_LOW_BITS, i); + + for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) + prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + + for (; i < table_size; ++i) + prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + + return; +} + + static inline void length(lzma_range_encoder *rc, lzma_length_encoder *lc, - const uint32_t pos_state, uint32_t len) + const uint32_t pos_state, uint32_t len, const bool fast_mode) { - assert(len <= MATCH_MAX_LEN); - len -= MATCH_MIN_LEN; + assert(len <= MATCH_LEN_MAX); + len -= MATCH_LEN_MIN; if (len < LEN_LOW_SYMBOLS) { rc_bit(rc, &lc->choice, 0); @@ -111,6 +132,12 @@ length(lzma_range_encoder *rc, lzma_length_encoder *lc, rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); } } + + // Only getoptimum uses the prices so don't update the table when + // in fast mode. + if (!fast_mode) + if (--lc->counters[pos_state] == 0) + length_update_prices(lc, pos_state); } @@ -124,12 +151,12 @@ match(lzma_coder *coder, const uint32_t pos_state, { update_match(coder->state); - length(&coder->rc, &coder->match_len_encoder, pos_state, len); - coder->prev_len_encoder = &coder->match_len_encoder; + length(&coder->rc, &coder->match_len_encoder, pos_state, len, + coder->fast_mode); const uint32_t pos_slot = get_pos_slot(distance); const uint32_t len_to_pos_state = get_len_to_pos_state(len); - rc_bittree(&coder->rc, coder->pos_slot_encoder[len_to_pos_state], + rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state], POS_SLOT_BITS, pos_slot); if (pos_slot >= START_POS_MODEL_INDEX) { @@ -139,13 +166,13 @@ match(lzma_coder *coder, const uint32_t pos_state, if (pos_slot < END_POS_MODEL_INDEX) { rc_bittree_reverse(&coder->rc, - &coder->pos_encoders[base - pos_slot - 1], + &coder->pos_special[base - pos_slot - 1], footer_bits, pos_reduced); } else { rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS, footer_bits - ALIGN_BITS); rc_bittree_reverse( - &coder->rc, coder->pos_align_encoder, + &coder->rc, coder->pos_align, ALIGN_BITS, pos_reduced & ALIGN_MASK); ++coder->align_price_count; } @@ -196,8 +223,8 @@ rep_match(lzma_coder *coder, const uint32_t pos_state, if (len == 1) { update_short_rep(coder->state); } else { - length(&coder->rc, &coder->rep_len_encoder, pos_state, len); - coder->prev_len_encoder = &coder->rep_len_encoder; + length(&coder->rc, &coder->rep_len_encoder, pos_state, len, + coder->fast_mode); update_long_rep(coder->state); } } @@ -208,117 +235,123 @@ rep_match(lzma_coder *coder, const uint32_t pos_state, ////////// static void -encode_symbol(lzma_coder *coder, uint32_t pos, uint32_t len) +encode_symbol(lzma_coder *coder, lzma_mf *mf, + uint32_t back, uint32_t len, uint32_t position) { - const uint32_t pos_state = coder->now_pos & coder->pos_mask; + const uint32_t pos_state = position & coder->pos_mask; - if (len == 1 && pos == UINT32_MAX) { + if (back == UINT32_MAX) { // Literal i.e. eight-bit byte + assert(len == 1); rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 0); - literal(coder); + literal(coder, mf, position); } else { // Some type of match rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); - if (pos < REP_DISTANCES) { + if (back < REP_DISTANCES) { // It's a repeated match i.e. the same distance // has been used earlier. rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); - rep_match(coder, pos_state, pos, len); + rep_match(coder, pos_state, back, len); } else { // Normal match rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); - match(coder, pos_state, pos - REP_DISTANCES, len); + match(coder, pos_state, back - REP_DISTANCES, len); } + } + + assert(mf->read_ahead >= len); + mf->read_ahead -= len; +} + + +static bool +encode_init(lzma_coder *coder, lzma_mf *mf) +{ + if (mf->read_pos == mf->read_limit) { + if (mf->action == LZMA_RUN) + return false; // We cannot do anything. - coder->previous_byte = coder->lz.buffer[ - coder->lz.read_pos + len - 1 - - coder->additional_offset]; + // We are finishing (we cannot get here when flushing). + assert(mf->write_pos == mf->read_pos); + assert(mf->action == LZMA_FINISH); + } else { + // Do the actual initialization. The first LZMA symbol must + // always be a literal. + mf_skip(mf, 1); + mf->read_ahead = 0; + rc_bit(&coder->rc, &coder->is_match[0][0], 0); + rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); } - assert(coder->additional_offset >= len); - coder->additional_offset -= len; - coder->now_pos += len; + // Initialization is done (except if empty file). + coder->is_initialized = true; + + return true; } static void -encode_eopm(lzma_coder *coder) +encode_eopm(lzma_coder *coder, uint32_t position) { - const uint32_t pos_state = coder->now_pos & coder->pos_mask; + const uint32_t pos_state = position & coder->pos_mask; rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); - match(coder, pos_state, UINT32_MAX, MATCH_MIN_LEN); + match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); } -/** - * \brief LZMA encoder - * - * \return true if end of stream was reached, false otherwise. - */ -extern bool -lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size) +/// Number of bytes that a single encoding loop in lzma_lzma_encode() can +/// consume from the dictionary. This limit comes from lzma_lzma_optimum() +/// and may need to be updated if that function is significantly modified. +#define LOOP_INPUT_MAX (OPTS + 1) + + +extern lzma_ret +lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, uint32_t limit) { // Initialize the stream if no data has been encoded yet. - if (!coder->is_initialized) { - if (coder->lz.read_pos == coder->lz.read_limit) { - if (coder->lz.sequence == SEQ_RUN) - return false; // We cannot do anything. - - // We are finishing (we cannot get here when flushing). - assert(coder->lz.write_pos == coder->lz.read_pos); - assert(coder->lz.sequence == SEQ_FINISH); - } else { - // Do the actual initialization. - uint32_t len; - uint32_t num_distance_pairs; - lzma_read_match_distances(coder, - &len, &num_distance_pairs); + if (!coder->is_initialized && !encode_init(coder, mf)) + return LZMA_OK; - encode_symbol(coder, UINT32_MAX, 1); + // Get the lowest bits of the uncompressed offset from the LZ layer. + uint32_t position = mf_position(mf); - assert(coder->additional_offset == 0); + while (true) { + // Encode pending bits, if any. Calling this before encoding + // the next symbol is needed only with plain LZMA, since + // LZMA2 always provides big enough buffer to flush + // everything out from the range encoder. For the same reason, + // rc_encode() never returns true when this function is used + // as part of LZMA2 encoder. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; } - // Initialization is done (except if empty file). - coder->is_initialized = true; - } - - // Encoding loop - while (true) { - // Encode pending bits, if any. - if (rc_encode(&coder->rc, out, out_pos, out_size)) - return false; + // With LZMA2 we need to take care that compressed size of + // a chunk doesn't get too big. + // TODO + if (limit != UINT32_MAX + && (mf->read_pos - mf->read_ahead >= limit + || *out_pos + rc_pending(&coder->rc) + >= (UINT32_C(1) << 16) + - LOOP_INPUT_MAX)) + break; // Check that there is some input to process. - if (coder->lz.read_pos >= coder->lz.read_limit) { - // If flushing or finishing, we must keep encoding - // until additional_offset becomes zero to make - // all the input available at output. - if (coder->lz.sequence == SEQ_RUN) - return false; - - if (coder->additional_offset == 0) - break; - } - - assert(coder->lz.read_pos <= coder->lz.write_pos); + if (mf->read_pos >= mf->read_limit) { + if (mf->action == LZMA_RUN) + return LZMA_OK; -#ifndef NDEBUG - if (coder->lz.sequence != SEQ_RUN) { - assert(coder->lz.read_limit == coder->lz.write_pos); - } else { - assert(coder->lz.read_limit + coder->lz.keep_size_after - == coder->lz.write_pos); + if (mf->read_ahead == 0) + break; } -#endif - - uint32_t pos; - uint32_t len; // Get optimal match (repeat position and length). // Value ranges for pos: @@ -327,33 +360,324 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, // match at (pos - REP_DISTANCES) // - UINT32_MAX: not a match but a literal // Value ranges for len: - // - [MATCH_MIN_LEN, MATCH_MAX_LEN] - if (coder->best_compression) - lzma_get_optimum(coder, &pos, &len); + // - [MATCH_LEN_MIN, MATCH_LEN_MAX] + uint32_t len; + uint32_t back; + + if (coder->fast_mode) + lzma_lzma_optimum_fast(coder, mf, &back, &len); else - lzma_get_optimum_fast(coder, &pos, &len); + lzma_lzma_optimum_normal( + coder, mf, &back, &len, position); + + encode_symbol(coder, mf, back, len, position); + + position += len; + } + + if (!coder->is_flushed) { + coder->is_flushed = true; - encode_symbol(coder, pos, len); + // We don't support encoding plain LZMA streams without EOPM, + // and LZMA2 doesn't use EOPM at LZMA level. + if (limit == UINT32_MAX) + encode_eopm(coder, position); + + // Flush the remaining bytes from the range encoder. + rc_flush(&coder->rc); + + // Copy the remaining bytes to the output buffer. If there + // isn't enough output space, we will copy out the remaining + // bytes on the next call to this function by using + // the rc_encode() call in the encoding loop above. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; + } } - assert(!coder->longest_match_was_found); + // Make it ready for the next LZMA2 chunk. + coder->is_flushed = false; + + return LZMA_STREAM_END; +} + + +static lzma_ret +lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // Plain LZMA has no support for sync-flushing. + if (unlikely(mf->action == LZMA_SYNC_FLUSH)) + return LZMA_HEADER_ERROR; + + return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); +} + - if (coder->is_flushed) { - coder->is_flushed = false; +//////////////////// +// Initialization // +//////////////////// + +static bool +set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) +{ + if (!is_lclppb_valid(options) + || options->fast_bytes < LZMA_FAST_BYTES_MIN + || options->fast_bytes > LZMA_FAST_BYTES_MAX) return true; + + // FIXME validation + + lz_options->before_size = OPTS; + lz_options->dictionary_size = options->dictionary_size; + lz_options->after_size = LOOP_INPUT_MAX; + lz_options->match_len_max = MATCH_LEN_MAX; + lz_options->find_len_max = options->fast_bytes; + lz_options->match_finder = options->match_finder; + lz_options->match_finder_cycles = options->match_finder_cycles; + lz_options->preset_dictionary = options->preset_dictionary; + lz_options->preset_dictionary_size = options->preset_dictionary_size; + + return false; +} + + +static void +length_encoder_reset(lzma_length_encoder *lencoder, + const uint32_t num_pos_states, const bool fast_mode) +{ + bit_reset(lencoder->choice); + bit_reset(lencoder->choice2); + + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); + bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); } - // We don't support encoding old LZMA streams without EOPM, and LZMA2 - // doesn't use EOPM at LZMA level. - if (coder->write_eopm) - encode_eopm(coder); + bittree_reset(lencoder->high, LEN_HIGH_BITS); - rc_flush(&coder->rc); + if (!fast_mode) + for (size_t pos_state = 0; pos_state < num_pos_states; + ++pos_state) + length_update_prices(lencoder, pos_state); - if (rc_encode(&coder->rc, out, out_pos, out_size)) { - coder->is_flushed = true; - return false; + return; +} + + +extern void +lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) +{ + assert(!coder->is_flushed); + + coder->pos_mask = (1U << options->pos_bits) - 1; + coder->literal_context_bits = options->literal_context_bits; + coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1; + + + // Range coder + rc_reset(&coder->rc); + + // State + coder->state = 0; + for (size_t i = 0; i < REP_DISTANCES; ++i) + coder->reps[i] = 0; + + literal_init(coder->literal, options->literal_context_bits, + options->literal_pos_bits); + + // Bit encoders + for (size_t i = 0; i < STATES; ++i) { + for (size_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); } - return true; + for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); + + // Bit tree encoders + for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Length encoders + length_encoder_reset(&coder->match_len_encoder, + 1U << options->pos_bits, coder->fast_mode); + + length_encoder_reset(&coder->rep_len_encoder, + 1U << options->pos_bits, coder->fast_mode); + + // FIXME: Too big or too small won't work when resetting in the middle of LZMA2. + coder->match_price_count = UINT32_MAX / 2; + coder->align_price_count = UINT32_MAX / 2; + + coder->opts_end_index = 0; + coder->opts_current_index = 0; +} + + +extern lzma_ret +lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options) +{ + if (*coder_ptr == NULL) { + *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); + if (*coder_ptr == NULL) + return LZMA_MEM_ERROR; + } + + lzma_coder *coder = *coder_ptr; + + // Validate options that aren't validated elsewhere. + if (!is_lclppb_valid(options) + || options->fast_bytes < LZMA_FAST_BYTES_MIN + || options->fast_bytes > LZMA_FAST_BYTES_MAX) + return LZMA_HEADER_ERROR; + + // Set compression mode. + switch (options->mode) { + case LZMA_MODE_FAST: + coder->fast_mode = true; + break; + + case LZMA_MODE_NORMAL: { + coder->fast_mode = false; + + // Set dist_table_size. + // Round the dictionary size up to next 2^n. + uint32_t log_size = 0; + while ((UINT32_C(1) << log_size) + < options->dictionary_size) + ++log_size; + + coder->dist_table_size = log_size * 2; + + // Length encoders' price table size + coder->match_len_encoder.table_size + = options->fast_bytes + 1 - MATCH_LEN_MIN; + coder->rep_len_encoder.table_size + = options->fast_bytes + 1 - MATCH_LEN_MIN; + break; + } + + default: + return LZMA_HEADER_ERROR; + } + + coder->is_initialized = false; + coder->is_flushed = false; + + lzma_lzma_encoder_reset(coder, options); + + // LZ encoder options FIXME validation + if (set_lz_options(lz_options, options)) + return LZMA_HEADER_ERROR; + + return LZMA_OK; +} + + +static lzma_ret +lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + lz->code = &lzma_encode; + return lzma_lzma_encoder_create( + &lz->coder, allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Initialization call chain: + // + // lzma_lzma_encoder_init() + // `-- lzma_lz_encoder_init() + // `-- lzma_encoder_init() + // `-- lzma_encoder_init2() + // + // The above complexity is to let LZ encoder store the pointer to + // the LZMA encoder structure. Encoding call tree: + // + // lz_encode() + // |-- fill_window() + // | `-- Next coder in the chain, if any + // `-- lzma_encode() + // |-- lzma_dict_find() + // `-- lzma_dict_skip() + // + // FIXME ^ + // + return lzma_lz_encoder_init( + next, allocator, filters, &lzma_encoder_init); +} + + +extern uint64_t +lzma_lzma_encoder_memusage(const void *options) +{ + lzma_lz_options lz_options; + if (set_lz_options(&lz_options, options)) + return UINT64_MAX; + + const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; +} + + +extern bool +lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) +{ + if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX + || options->literal_pos_bits + > LZMA_LITERAL_POS_BITS_MAX + || options->pos_bits > LZMA_POS_BITS_MAX + || options->literal_context_bits + + options->literal_pos_bits + > LZMA_LITERAL_BITS_MAX) + return true; + + *byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9 + + options->literal_context_bits; + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +} + + +#ifdef HAVE_ENCODER_LZMA +extern lzma_ret +lzma_lzma_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + + if (lzma_lzma_lclppb_encode(opt, out)) + return LZMA_PROG_ERROR; + + integer_write_32(out + 1, opt->dictionary_size); + + return LZMA_OK; +} +#endif + + +extern LZMA_API lzma_bool +lzma_mode_is_available(lzma_mode mode) +{ + return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; } diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h index 1c57f80a..e270cc27 100644 --- a/src/liblzma/lzma/lzma_encoder.h +++ b/src/liblzma/lzma/lzma_encoder.h @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder.h -/// \brief LZMA method handler API +/// \brief LZMA encoder API // // Copyright (C) 1999-2006 Igor Pavlov // Copyright (C) 2007 Lasse Collin @@ -23,13 +23,47 @@ #include "common.h" + extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters); -extern bool lzma_lzma_encode_properties( + +extern uint64_t lzma_lzma_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out); + + +/// Encodes lc/lp/pb into one byte. Returns false on success and true on error. +extern bool lzma_lzma_lclppb_encode( const lzma_options_lzma *options, uint8_t *byte); + +#ifdef HAVE_SMALL + /// Initializes the lzma_fastpos[] array. extern void lzma_fastpos_init(void); #endif + + +#ifdef LZMA_LZ_ENCODER_H + +/// Initializes raw LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_create( + lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options); + + +/// Resets an already initialized LZMA encoder; this is used by LZMA2. +extern void lzma_lzma_encoder_reset( + lzma_coder *coder, const lzma_options_lzma *options); + + +extern lzma_ret lzma_lzma_encode(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + uint32_t read_limit); + +#endif + +#endif diff --git a/src/liblzma/lzma/lzma_encoder_features.c b/src/liblzma/lzma/lzma_encoder_features.c index 56e59c6a..9fecee48 100644 --- a/src/liblzma/lzma/lzma_encoder_features.c +++ b/src/liblzma/lzma/lzma_encoder_features.c @@ -22,7 +22,7 @@ static lzma_mode modes[] = { LZMA_MODE_FAST, - LZMA_MODE_BEST, + LZMA_MODE_NORMAL, LZMA_MODE_INVALID }; diff --git a/src/liblzma/lzma/lzma_encoder_getoptimum.c b/src/liblzma/lzma/lzma_encoder_getoptimum.c deleted file mode 100644 index b175e4cb..00000000 --- a/src/liblzma/lzma/lzma_encoder_getoptimum.c +++ /dev/null @@ -1,925 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_encoder_getoptimum.c -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - - -// "Would you love the monster code? -// Could you understand beauty of the beast?" -// --Adapted from Lordi's "Would you love a monster man". - - -#include "lzma_encoder_private.h" -#include "fastpos.h" - - -#define length_get_price(length_encoder, symbol, pos_state) \ - (length_encoder).prices[pos_state][symbol] - - -#define get_rep_len_1_price(state, pos_state) \ - bit_get_price_0(coder->is_rep0[state]) \ - + bit_get_price_0(coder->is_rep0_long[state][pos_state]) - - -// Adds to price_target. -#define get_pure_rep_price(price_target, rep_index, state, pos_state) \ -do { \ - if ((rep_index) == 0) { \ - price_target += bit_get_price_0(coder->is_rep0[state]); \ - price_target += bit_get_price_1( \ - coder->is_rep0_long[state][pos_state]); \ - } else { \ - price_target += bit_get_price_1(coder->is_rep0[state]); \ - if ((rep_index) == 1) { \ - price_target += bit_get_price_0(coder->is_rep1[state]); \ - } else { \ - price_target += bit_get_price_1(coder->is_rep1[state]); \ - price_target += bit_get_price( \ - coder->is_rep2[state], (rep_index) - 2); \ - } \ - } \ -} while (0) - - -// Adds to price_target. -#define get_rep_price(price_target, rep_index, len, state, pos_state) \ -do { \ - get_pure_rep_price(price_target, rep_index, state, pos_state); \ - price_target += length_get_price(coder->rep_len_encoder, \ - (len) - MATCH_MIN_LEN, pos_state); \ -} while (0) - - -// Adds to price_target. -#define get_pos_len_price(price_target, pos, len, pos_state) \ -do { \ - const uint32_t len_to_pos_state_tmp = get_len_to_pos_state(len); \ - if ((pos) < FULL_DISTANCES) { \ - price_target += distances_prices[len_to_pos_state_tmp][pos]; \ - } else { \ - price_target \ - += pos_slot_prices[len_to_pos_state_tmp][get_pos_slot_2(pos)] \ - + align_prices[(pos) & ALIGN_MASK]; \ - } \ - price_target += length_get_price( \ - coder->match_len_encoder, (len) - MATCH_MIN_LEN, pos_state); \ -} while (0) - - -// Three macros to manipulate lzma_optimal structures: -#define make_as_char(opt) \ -do { \ - (opt).back_prev = UINT32_MAX; \ - (opt).prev_1_is_char = false; \ -} while (0) - - -#define make_as_short_rep(opt) \ -do { \ - (opt).back_prev = 0; \ - (opt).prev_1_is_char = false; \ -} while (0) - - -#define is_short_rep(opt) \ - ((opt).back_prev == 0) - - -static void -fill_length_prices(lzma_length_encoder *lc, uint32_t pos_state) -{ - const uint32_t num_symbols = lc->table_size; - const uint32_t a0 = bit_get_price_0(lc->choice); - const uint32_t a1 = bit_get_price_1(lc->choice); - const uint32_t b0 = a1 + bit_get_price_0(lc->choice2); - const uint32_t b1 = a1 + bit_get_price_1(lc->choice2); - - uint32_t *prices = lc->prices[pos_state]; - uint32_t i = 0; - - for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i) - prices[i] = a0 + bittree_get_price(lc->low[pos_state], - LEN_LOW_BITS, i); - - for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) - prices[i] = b0 + bittree_get_price(lc->mid[pos_state], - LEN_MID_BITS, i - LEN_LOW_SYMBOLS); - - for (; i < num_symbols; ++i) - prices[i] = b1 + bittree_get_price(lc->high, LEN_HIGH_BITS, - i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); - - lc->counters[pos_state] = num_symbols; - - return; -} - - -static void -fill_distances_prices(lzma_coder *coder) -{ - uint32_t temp_prices[FULL_DISTANCES]; - - for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { - const uint32_t pos_slot = get_pos_slot(i); - const uint32_t footer_bits = ((pos_slot >> 1) - 1); - const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; - temp_prices[i] = bittree_reverse_get_price( - coder->pos_encoders + base - pos_slot - 1, - footer_bits, i - base); - } - - const uint32_t dist_table_size = coder->dist_table_size; - - for (uint32_t len_to_pos_state = 0; - len_to_pos_state < LEN_TO_POS_STATES; - ++len_to_pos_state) { - - const probability *encoder = coder->pos_slot_encoder[len_to_pos_state]; - uint32_t *pos_slot_prices = coder->pos_slot_prices[len_to_pos_state]; - - for (uint32_t pos_slot = 0; - pos_slot < dist_table_size; - ++pos_slot) { - pos_slot_prices[pos_slot] = bittree_get_price(encoder, - POS_SLOT_BITS, pos_slot); - } - - for (uint32_t pos_slot = END_POS_MODEL_INDEX; - pos_slot < dist_table_size; - ++pos_slot) - pos_slot_prices[pos_slot] += (((pos_slot >> 1) - 1) - - ALIGN_BITS) << BIT_PRICE_SHIFT_BITS; - - - uint32_t *distances_prices - = coder->distances_prices[len_to_pos_state]; - - uint32_t i; - for (i = 0; i < START_POS_MODEL_INDEX; ++i) - distances_prices[i] = pos_slot_prices[i]; - - for (; i < FULL_DISTANCES; ++i) - distances_prices[i] = pos_slot_prices[get_pos_slot(i)] - + temp_prices[i]; - } - - coder->match_price_count = 0; - - return; -} - - -static void -fill_align_prices(lzma_coder *coder) -{ - for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) - coder->align_prices[i] = bittree_reverse_get_price( - coder->pos_align_encoder, ALIGN_BITS, i); - - coder->align_price_count = 0; - return; -} - - -// The first argument is a pointer returned by literal_get_subcoder(). -static uint32_t -literal_get_price(const probability *encoders, const bool match_mode, - const uint8_t match_byte, const uint8_t symbol) -{ - uint32_t price = 0; - uint32_t context = 1; - int i = 8; - - if (match_mode) { - do { - --i; - const uint32_t match_bit = (match_byte >> i) & 1; - const uint32_t bit = (symbol >> i) & 1; - const uint32_t subcoder_index - = 0x100 + (match_bit << 8) + context; - - price += bit_get_price(encoders[subcoder_index], bit); - context = (context << 1) | bit; - - if (match_bit != bit) - break; - - } while (i != 0); - } - - while (i != 0) { - --i; - const uint32_t bit = (symbol >> i) & 1; - price += bit_get_price(encoders[context], bit); - context = (context << 1) | bit; - } - - return price; -} - - -static void -backward(lzma_coder *restrict coder, uint32_t *restrict len_res, - uint32_t *restrict back_res, uint32_t cur) -{ - coder->optimum_end_index = cur; - - uint32_t pos_mem = coder->optimum[cur].pos_prev; - uint32_t back_mem = coder->optimum[cur].back_prev; - - do { - if (coder->optimum[cur].prev_1_is_char) { - make_as_char(coder->optimum[pos_mem]); - coder->optimum[pos_mem].pos_prev = pos_mem - 1; - - if (coder->optimum[cur].prev_2) { - coder->optimum[pos_mem - 1].prev_1_is_char = false; - coder->optimum[pos_mem - 1].pos_prev - = coder->optimum[cur].pos_prev_2; - coder->optimum[pos_mem - 1].back_prev - = coder->optimum[cur].back_prev_2; - } - } - - uint32_t pos_prev = pos_mem; - uint32_t back_cur = back_mem; - - back_mem = coder->optimum[pos_prev].back_prev; - pos_mem = coder->optimum[pos_prev].pos_prev; - - coder->optimum[pos_prev].back_prev = back_cur; - coder->optimum[pos_prev].pos_prev = cur; - cur = pos_prev; - - } while (cur != 0); - - coder->optimum_current_index = coder->optimum[0].pos_prev; - *len_res = coder->optimum[0].pos_prev; - *back_res = coder->optimum[0].back_prev; - - return; -} - - -extern void -lzma_get_optimum(lzma_coder *restrict coder, - uint32_t *restrict back_res, uint32_t *restrict len_res) -{ - uint32_t position = coder->now_pos; - uint32_t pos_state = position & coder->pos_mask; - - // Update the price tables. In the C++ LZMA SDK 4.42 this was done in both - // initialization function and in the main loop. In liblzma they were - // moved into this single place. - if (coder->additional_offset == 0) { - if (coder->match_price_count >= (1 << 7)) - fill_distances_prices(coder); - - if (coder->align_price_count >= ALIGN_TABLE_SIZE) - fill_align_prices(coder); - } - - if (coder->prev_len_encoder != NULL) { - if (--coder->prev_len_encoder->counters[pos_state] == 0) - fill_length_prices(coder->prev_len_encoder, pos_state); - - coder->prev_len_encoder = NULL; - } - - - if (coder->optimum_end_index != coder->optimum_current_index) { - *len_res = coder->optimum[coder->optimum_current_index].pos_prev - - coder->optimum_current_index; - *back_res = coder->optimum[coder->optimum_current_index].back_prev; - coder->optimum_current_index = coder->optimum[ - coder->optimum_current_index].pos_prev; - return; - } - - coder->optimum_current_index = 0; - coder->optimum_end_index = 0; - - - const uint32_t fast_bytes = coder->fast_bytes; - uint32_t *match_distances = coder->match_distances; - - uint32_t len_main; - uint32_t num_distance_pairs; - - if (!coder->longest_match_was_found) { - lzma_read_match_distances(coder, &len_main, &num_distance_pairs); - } else { - len_main = coder->longest_match_length; - num_distance_pairs = coder->num_distance_pairs; - coder->longest_match_was_found = false; - } - - - const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1; - uint32_t num_available_bytes - = coder->lz.write_pos - coder->lz.read_pos + 1; - if (num_available_bytes < 2) { - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - - if (num_available_bytes > MATCH_MAX_LEN) - num_available_bytes = MATCH_MAX_LEN; - - - uint32_t reps[REP_DISTANCES]; - uint32_t rep_lens[REP_DISTANCES]; - uint32_t rep_max_index = 0; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - reps[i] = coder->reps[i]; - const uint32_t back_offset = reps[i] + 1; - - if (buf[0] != *(buf - back_offset) - || buf[1] != *(buf + 1 - back_offset)) { - rep_lens[i] = 0; - continue; - } - - uint32_t len_test; - for (len_test = 2; len_test < num_available_bytes - && buf[len_test] == *(buf + len_test - back_offset); - ++len_test) ; - - rep_lens[i] = len_test; - if (len_test > rep_lens[rep_max_index]) - rep_max_index = i; - } - - if (rep_lens[rep_max_index] >= fast_bytes) { - *back_res = rep_max_index; - *len_res = rep_lens[rep_max_index]; - move_pos(*len_res - 1); - return; - } - - - if (len_main >= fast_bytes) { - *back_res = match_distances[num_distance_pairs] + REP_DISTANCES; - *len_res = len_main; - move_pos(len_main - 1); - return; - } - - uint8_t current_byte = *buf; - uint8_t match_byte = *(buf - reps[0] - 1); - - if (len_main < 2 && current_byte != match_byte - && rep_lens[rep_max_index] < 2) { - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - - coder->optimum[0].state = coder->state; - - coder->optimum[1].price = bit_get_price_0( - coder->is_match[coder->state][pos_state]) - + literal_get_price( - literal_get_subcoder(coder->literal_coder, - position, coder->previous_byte), - !is_literal_state(coder->state), match_byte, current_byte); - - make_as_char(coder->optimum[1]); - - uint32_t match_price - = bit_get_price_1(coder->is_match[coder->state][pos_state]); - uint32_t rep_match_price - = match_price + bit_get_price_1(coder->is_rep[coder->state]); - - - if (match_byte == current_byte) { - const uint32_t short_rep_price = rep_match_price - + get_rep_len_1_price(coder->state, pos_state); - - if (short_rep_price < coder->optimum[1].price) { - coder->optimum[1].price = short_rep_price; - make_as_short_rep(coder->optimum[1]); - } - } - - uint32_t len_end = (len_main >= rep_lens[rep_max_index]) - ? len_main - : rep_lens[rep_max_index]; - - if (len_end < 2) { - *back_res = coder->optimum[1].back_prev; - *len_res = 1; - return; - } - - coder->optimum[1].pos_prev = 0; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) - coder->optimum[0].backs[i] = reps[i]; - - uint32_t len = len_end; - do { - coder->optimum[len].price = INFINITY_PRICE; - } while (--len >= 2); - - - uint32_t (*distances_prices)[FULL_DISTANCES] = coder->distances_prices; - uint32_t (*pos_slot_prices)[DIST_TABLE_SIZE_MAX] = coder->pos_slot_prices; - uint32_t *align_prices = coder->align_prices; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - uint32_t rep_len = rep_lens[i]; - if (rep_len < 2) - continue; - - uint32_t price = rep_match_price; - get_pure_rep_price(price, i, coder->state, pos_state); - - do { - const uint32_t cur_and_len_price = price - + length_get_price( - coder->rep_len_encoder, - rep_len - 2, pos_state); - - if (cur_and_len_price < coder->optimum[rep_len].price) { - coder->optimum[rep_len].price = cur_and_len_price; - coder->optimum[rep_len].pos_prev = 0; - coder->optimum[rep_len].back_prev = i; - coder->optimum[rep_len].prev_1_is_char = false; - } - } while (--rep_len >= 2); - } - - - uint32_t normal_match_price = match_price - + bit_get_price_0(coder->is_rep[coder->state]); - - len = (rep_lens[0] >= 2) ? rep_lens[0] + 1 : 2; - - if (len <= len_main) { - uint32_t offs = 0; - - while (len > match_distances[offs + 1]) - offs += 2; - - for(; ; ++len) { - const uint32_t distance = match_distances[offs + 2]; - uint32_t cur_and_len_price = normal_match_price; - get_pos_len_price(cur_and_len_price, distance, len, pos_state); - - if (cur_and_len_price < coder->optimum[len].price) { - coder->optimum[len].price = cur_and_len_price; - coder->optimum[len].pos_prev = 0; - coder->optimum[len].back_prev = distance + REP_DISTANCES; - coder->optimum[len].prev_1_is_char = false; - } - - if (len == match_distances[offs + 1]) { - offs += 2; - if (offs == num_distance_pairs) - break; - } - } - } - - - ////////////////// - // Big loop ;-) // - ////////////////// - - uint32_t cur = 0; - - // The rest of this function is a huge while-loop. To avoid extreme - // indentation, the indentation level is not increased here. - while (true) { - - ++cur; - - assert(cur < OPTS); - - if (cur == len_end) { - backward(coder, len_res, back_res, cur); - return; - } - - uint32_t new_len; - - lzma_read_match_distances(coder, &new_len, &num_distance_pairs); - - if (new_len >= fast_bytes) { - coder->num_distance_pairs = num_distance_pairs; - coder->longest_match_length = new_len; - coder->longest_match_was_found = true; - backward(coder, len_res, back_res, cur); - return; - } - - - ++position; - - uint32_t pos_prev = coder->optimum[cur].pos_prev; - uint32_t state; - - if (coder->optimum[cur].prev_1_is_char) { - --pos_prev; - - if (coder->optimum[cur].prev_2) { - state = coder->optimum[coder->optimum[cur].pos_prev_2].state; - - if (coder->optimum[cur].back_prev_2 < REP_DISTANCES) - update_long_rep(state); - else - update_match(state); - - } else { - state = coder->optimum[pos_prev].state; - } - - update_literal(state); - - } else { - state = coder->optimum[pos_prev].state; - } - - if (pos_prev == cur - 1) { - if (is_short_rep(coder->optimum[cur])) - update_short_rep(state); - else - update_literal(state); - } else { - uint32_t pos; - if (coder->optimum[cur].prev_1_is_char && coder->optimum[cur].prev_2) { - pos_prev = coder->optimum[cur].pos_prev_2; - pos = coder->optimum[cur].back_prev_2; - update_long_rep(state); - } else { - pos = coder->optimum[cur].back_prev; - if (pos < REP_DISTANCES) - update_long_rep(state); - else - update_match(state); - } - - if (pos < REP_DISTANCES) { - reps[0] = coder->optimum[pos_prev].backs[pos]; - - uint32_t i; - for (i = 1; i <= pos; ++i) - reps[i] = coder->optimum[pos_prev].backs[i - 1]; - - for (; i < REP_DISTANCES; ++i) - reps[i] = coder->optimum[pos_prev].backs[i]; - - } else { - reps[0] = pos - REP_DISTANCES; - - for (uint32_t i = 1; i < REP_DISTANCES; ++i) - reps[i] = coder->optimum[pos_prev].backs[i - 1]; - } - } - - coder->optimum[cur].state = state; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) - coder->optimum[cur].backs[i] = reps[i]; - - const uint32_t cur_price = coder->optimum[cur].price; - - buf = coder->lz.buffer + coder->lz.read_pos - 1; - current_byte = *buf; - match_byte = *(buf - reps[0] - 1); - - pos_state = position & coder->pos_mask; - - const uint32_t cur_and_1_price = cur_price - + bit_get_price_0(coder->is_match[state][pos_state]) - + literal_get_price( - literal_get_subcoder(coder->literal_coder, - position, buf[-1]), - !is_literal_state(state), match_byte, current_byte); - - bool next_is_char = false; - - if (cur_and_1_price < coder->optimum[cur + 1].price) { - coder->optimum[cur + 1].price = cur_and_1_price; - coder->optimum[cur + 1].pos_prev = cur; - make_as_char(coder->optimum[cur + 1]); - next_is_char = true; - } - - match_price = cur_price - + bit_get_price_1(coder->is_match[state][pos_state]); - rep_match_price = match_price - + bit_get_price_1(coder->is_rep[state]); - - if (match_byte == current_byte - && !(coder->optimum[cur + 1].pos_prev < cur - && coder->optimum[cur + 1].back_prev == 0)) { - - const uint32_t short_rep_price = rep_match_price - + get_rep_len_1_price(state, pos_state); - - if (short_rep_price <= coder->optimum[cur + 1].price) { - coder->optimum[cur + 1].price = short_rep_price; - coder->optimum[cur + 1].pos_prev = cur; - make_as_short_rep(coder->optimum[cur + 1]); - next_is_char = true; - } - } - - uint32_t num_available_bytes_full - = coder->lz.write_pos - coder->lz.read_pos + 1; - num_available_bytes_full = MIN(OPTS - 1 - cur, num_available_bytes_full); - num_available_bytes = num_available_bytes_full; - - if (num_available_bytes < 2) - continue; - - if (num_available_bytes > fast_bytes) - num_available_bytes = fast_bytes; - - if (!next_is_char && match_byte != current_byte) { // speed optimization - // try literal + rep0 - const uint32_t back_offset = reps[0] + 1; - const uint32_t limit = MIN(num_available_bytes_full, fast_bytes + 1); - - uint32_t temp; - for (temp = 1; temp < limit - && buf[temp] == *(buf + temp - back_offset); - ++temp) ; - - const uint32_t len_test_2 = temp - 1; - - if (len_test_2 >= 2) { - uint32_t state_2 = state; - update_literal(state_2); - - const uint32_t pos_state_next = (position + 1) & coder->pos_mask; - const uint32_t next_rep_match_price = cur_and_1_price - + bit_get_price_1(coder->is_match[state_2][pos_state_next]) - + bit_get_price_1(coder->is_rep[state_2]); - - // for (; len_test_2 >= 2; --len_test_2) { - const uint32_t offset = cur + 1 + len_test_2; - - while (len_end < offset) - coder->optimum[++len_end].price = INFINITY_PRICE; - - uint32_t cur_and_len_price = next_rep_match_price; - get_rep_price(cur_and_len_price, - 0, len_test_2, state_2, pos_state_next); - - if (cur_and_len_price < coder->optimum[offset].price) { - coder->optimum[offset].price = cur_and_len_price; - coder->optimum[offset].pos_prev = cur + 1; - coder->optimum[offset].back_prev = 0; - coder->optimum[offset].prev_1_is_char = true; - coder->optimum[offset].prev_2 = false; - } -// } - } - } - - - uint32_t start_len = 2; // speed optimization - - for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { - const uint32_t back_offset = reps[rep_index] + 1; - - if (buf[0] != *(buf - back_offset) || buf[1] != *(buf + 1 - back_offset)) - continue; - - uint32_t len_test; - for (len_test = 2; len_test < num_available_bytes - && buf[len_test] == *(buf + len_test - back_offset); - ++len_test) ; - - while (len_end < cur + len_test) - coder->optimum[++len_end].price = INFINITY_PRICE; - - const uint32_t len_test_temp = len_test; - uint32_t price = rep_match_price; - get_pure_rep_price(price, rep_index, state, pos_state); - - do { - const uint32_t cur_and_len_price = price - + length_get_price(coder->rep_len_encoder, - len_test - 2, pos_state); - - if (cur_and_len_price < coder->optimum[cur + len_test].price) { - coder->optimum[cur + len_test].price = cur_and_len_price; - coder->optimum[cur + len_test].pos_prev = cur; - coder->optimum[cur + len_test].back_prev = rep_index; - coder->optimum[cur + len_test].prev_1_is_char = false; - } - } while (--len_test >= 2); - - len_test = len_test_temp; - - if (rep_index == 0) - start_len = len_test + 1; - - - uint32_t len_test_2 = len_test + 1; - const uint32_t limit = MIN(num_available_bytes_full, - len_test_2 + fast_bytes); - for (; len_test_2 < limit - && buf[len_test_2] == *(buf + len_test_2 - back_offset); - ++len_test_2) ; - - len_test_2 -= len_test + 1; - - if (len_test_2 >= 2) { - uint32_t state_2 = state; - update_long_rep(state_2); - - uint32_t pos_state_next = (position + len_test) & coder->pos_mask; - - const uint32_t cur_and_len_char_price = price - + length_get_price(coder->rep_len_encoder, - len_test - 2, pos_state) - + bit_get_price_0(coder->is_match[state_2][pos_state_next]) - + literal_get_price( - literal_get_subcoder(coder->literal_coder, - position + len_test, buf[len_test - 1]), - true, *(buf + len_test - back_offset), buf[len_test]); - - update_literal(state_2); - - pos_state_next = (position + len_test + 1) & coder->pos_mask; - - const uint32_t next_rep_match_price = cur_and_len_char_price - + bit_get_price_1(coder->is_match[state_2][pos_state_next]) - + bit_get_price_1(coder->is_rep[state_2]); - -// for(; len_test_2 >= 2; len_test_2--) { - const uint32_t offset = cur + len_test + 1 + len_test_2; - - while (len_end < offset) - coder->optimum[++len_end].price = INFINITY_PRICE; - - uint32_t cur_and_len_price = next_rep_match_price; - get_rep_price(cur_and_len_price, - 0, len_test_2, state_2, pos_state_next); - - if (cur_and_len_price < coder->optimum[offset].price) { - coder->optimum[offset].price = cur_and_len_price; - coder->optimum[offset].pos_prev = cur + len_test + 1; - coder->optimum[offset].back_prev = 0; - coder->optimum[offset].prev_1_is_char = true; - coder->optimum[offset].prev_2 = true; - coder->optimum[offset].pos_prev_2 = cur; - coder->optimum[offset].back_prev_2 = rep_index; - } -// } - } - } - - -// for (uint32_t len_test = 2; len_test <= new_len; ++len_test) - if (new_len > num_available_bytes) { - new_len = num_available_bytes; - - for (num_distance_pairs = 0; - new_len > match_distances[num_distance_pairs + 1]; - num_distance_pairs += 2) ; - - match_distances[num_distance_pairs + 1] = new_len; - num_distance_pairs += 2; - } - - - if (new_len >= start_len) { - normal_match_price = match_price - + bit_get_price_0(coder->is_rep[state]); - - while (len_end < cur + new_len) - coder->optimum[++len_end].price = INFINITY_PRICE; - - uint32_t offs = 0; - while (start_len > match_distances[offs + 1]) - offs += 2; - - uint32_t cur_back = match_distances[offs + 2]; - uint32_t pos_slot = get_pos_slot_2(cur_back); - - for (uint32_t len_test = start_len; ; ++len_test) { - uint32_t cur_and_len_price = normal_match_price; - const uint32_t len_to_pos_state = get_len_to_pos_state(len_test); - - if (cur_back < FULL_DISTANCES) - cur_and_len_price += distances_prices[ - len_to_pos_state][cur_back]; - else - cur_and_len_price += pos_slot_prices[ - len_to_pos_state][pos_slot] - + align_prices[cur_back & ALIGN_MASK]; - - cur_and_len_price += length_get_price(coder->match_len_encoder, - len_test - MATCH_MIN_LEN, pos_state); - - if (cur_and_len_price < coder->optimum[cur + len_test].price) { - coder->optimum[cur + len_test].price = cur_and_len_price; - coder->optimum[cur + len_test].pos_prev = cur; - coder->optimum[cur + len_test].back_prev - = cur_back + REP_DISTANCES; - coder->optimum[cur + len_test].prev_1_is_char = false; - } - - if (len_test == match_distances[offs + 1]) { - // Try Match + Literal + Rep0 - const uint32_t back_offset = cur_back + 1; - uint32_t len_test_2 = len_test + 1; - const uint32_t limit = MIN(num_available_bytes_full, - len_test_2 + fast_bytes); - - for (; len_test_2 < limit && - buf[len_test_2] == *(buf + len_test_2 - back_offset); - ++len_test_2) ; - - len_test_2 -= len_test + 1; - - if (len_test_2 >= 2) { - uint32_t state_2 = state; - update_match(state_2); - uint32_t pos_state_next - = (position + len_test) & coder->pos_mask; - - const uint32_t cur_and_len_char_price = cur_and_len_price - + bit_get_price_0( - coder->is_match[state_2][pos_state_next]) - + literal_get_price( - literal_get_subcoder( - coder->literal_coder, - position + len_test, - buf[len_test - 1]), - true, - *(buf + len_test - back_offset), - buf[len_test]); - - update_literal(state_2); - pos_state_next = (pos_state_next + 1) & coder->pos_mask; - - const uint32_t next_rep_match_price - = cur_and_len_char_price - + bit_get_price_1( - coder->is_match[state_2][pos_state_next]) - + bit_get_price_1(coder->is_rep[state_2]); - - // for(; len_test_2 >= 2; --len_test_2) { - const uint32_t offset = cur + len_test + 1 + len_test_2; - - while (len_end < offset) - coder->optimum[++len_end].price = INFINITY_PRICE; - - cur_and_len_price = next_rep_match_price; - get_rep_price(cur_and_len_price, - 0, len_test_2, state_2, pos_state_next); - - if (cur_and_len_price < coder->optimum[offset].price) { - coder->optimum[offset].price = cur_and_len_price; - coder->optimum[offset].pos_prev = cur + len_test + 1; - coder->optimum[offset].back_prev = 0; - coder->optimum[offset].prev_1_is_char = true; - coder->optimum[offset].prev_2 = true; - coder->optimum[offset].pos_prev_2 = cur; - coder->optimum[offset].back_prev_2 - = cur_back + REP_DISTANCES; - } -// } - } - - offs += 2; - if (offs == num_distance_pairs) - break; - - cur_back = match_distances[offs + 2]; - if (cur_back >= FULL_DISTANCES) - pos_slot = get_pos_slot_2(cur_back); - } - } - } - - } // Closes: while (true) -} diff --git a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c b/src/liblzma/lzma/lzma_encoder_getoptimumfast.c deleted file mode 100644 index fa06be21..00000000 --- a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c +++ /dev/null @@ -1,201 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_encoder_getoptimumfast.c -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - - -#include "lzma_encoder_private.h" - - -#define change_pair(small_dist, big_dist) \ - (((big_dist) >> 7) > (small_dist)) - - -extern void -lzma_get_optimum_fast(lzma_coder *restrict coder, - uint32_t *restrict back_res, uint32_t *restrict len_res) -{ - // Local copies - const uint32_t fast_bytes = coder->fast_bytes; - - uint32_t len_main; - uint32_t num_distance_pairs; - if (!coder->longest_match_was_found) { - lzma_read_match_distances(coder, &len_main, &num_distance_pairs); - } else { - len_main = coder->longest_match_length; - num_distance_pairs = coder->num_distance_pairs; - coder->longest_match_was_found = false; - } - - const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1; - uint32_t num_available_bytes - = coder->lz.write_pos - coder->lz.read_pos + 1; - - if (num_available_bytes < 2) { - // There's not enough input left to encode a match. - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - - if (num_available_bytes > MATCH_MAX_LEN) - num_available_bytes = MATCH_MAX_LEN; - - - // Look for repetitive matches; scan the previous four match distances - uint32_t rep_lens[REP_DISTANCES]; - uint32_t rep_max_index = 0; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - const uint32_t back_offset = coder->reps[i] + 1; - - // If the first two bytes (2 == MATCH_MIN_LEN) do not match, - // this rep_distance[i] is not useful. This is indicated - // using zero as the length of the repetitive match. - if (buf[0] != *(buf - back_offset) - || buf[1] != *(buf + 1 - back_offset)) { - rep_lens[i] = 0; - continue; - } - - // The first two bytes matched. - // Calculate the length of the match. - uint32_t len; - for (len = 2; len < num_available_bytes - && buf[len] == *(buf + len - back_offset); - ++len) ; - - // If we have found a repetitive match that is at least - // as long as fast_bytes, return it immediatelly. - if (len >= fast_bytes) { - *back_res = i; - *len_res = len; - move_pos(len - 1); - return; - } - - rep_lens[i] = len; - - // After this loop, rep_lens[rep_max_index] is the biggest - // value of all values in rep_lens[]. - if (len > rep_lens[rep_max_index]) - rep_max_index = i; - } - - - if (len_main >= fast_bytes) { - *back_res = coder->match_distances[num_distance_pairs] - + REP_DISTANCES; - *len_res = len_main; - move_pos(len_main - 1); - return; - } - - uint32_t back_main = 0; - if (len_main >= 2) { - back_main = coder->match_distances[num_distance_pairs]; - - while (num_distance_pairs > 2 && len_main == - coder->match_distances[num_distance_pairs - 3] + 1) { - if (!change_pair(coder->match_distances[ - num_distance_pairs - 2], back_main)) - break; - - num_distance_pairs -= 2; - len_main = coder->match_distances[num_distance_pairs - 1]; - back_main = coder->match_distances[num_distance_pairs]; - } - - if (len_main == 2 && back_main >= 0x80) - len_main = 1; - } - - if (rep_lens[rep_max_index] >= 2) { - if (rep_lens[rep_max_index] + 1 >= len_main - || (rep_lens[rep_max_index] + 2 >= len_main - && (back_main > (1 << 9))) - || (rep_lens[rep_max_index] + 3 >= len_main - && (back_main > (1 << 15)))) { - *back_res = rep_max_index; - *len_res = rep_lens[rep_max_index]; - move_pos(*len_res - 1); - return; - } - } - - if (len_main >= 2 && num_available_bytes > 2) { - lzma_read_match_distances(coder, &coder->longest_match_length, - &coder->num_distance_pairs); - - if (coder->longest_match_length >= 2) { - const uint32_t new_distance = coder->match_distances[ - coder->num_distance_pairs]; - - if ((coder->longest_match_length >= len_main - && new_distance < back_main) - || (coder->longest_match_length == len_main + 1 - && !change_pair(back_main, new_distance)) - || (coder->longest_match_length > len_main + 1) - || (coder->longest_match_length + 1 >= len_main - && len_main >= 3 - && change_pair(new_distance, back_main))) { - coder->longest_match_was_found = true; - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - } - - ++buf; - --num_available_bytes; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - const uint32_t back_offset = coder->reps[i] + 1; - - if (buf[1] != *(buf + 1 - back_offset) - || buf[2] != *(buf + 2 - back_offset)) { - rep_lens[i] = 0; - continue; - } - - uint32_t len; - for (len = 2; len < num_available_bytes - && buf[len] == *(buf + len - back_offset); - ++len) ; - - if (len + 1 >= len_main) { - coder->longest_match_was_found = true; - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - } - - *back_res = back_main + REP_DISTANCES; - *len_res = len_main; - move_pos(len_main - 2); - return; - } - - *back_res = UINT32_MAX; - *len_res = 1; - return; -} diff --git a/src/liblzma/lzma/lzma_encoder_init.c b/src/liblzma/lzma/lzma_encoder_init.c deleted file mode 100644 index 21335f95..00000000 --- a/src/liblzma/lzma/lzma_encoder_init.c +++ /dev/null @@ -1,228 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_encoder_init.c -/// \brief Creating, resetting and destroying the LZMA encoder -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "lzma_encoder_private.h" - - -/// \brief Initializes the length encoder -static void -length_encoder_reset(lzma_length_encoder *lencoder, - const uint32_t num_pos_states, const uint32_t table_size) -{ - // NLength::CPriceTableEncoder::SetTableSize() - lencoder->table_size = table_size; - - // NLength::CEncoder::Init() - bit_reset(lencoder->choice); - bit_reset(lencoder->choice2); - - for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { - bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); - bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); - } - - bittree_reset(lencoder->high, LEN_HIGH_BITS); - - // NLength::CPriceTableEncoder::UpdateTables() - for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) - lencoder->counters[pos_state] = 1; - - return; -} - - -static void -lzma_lzma_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_lz_encoder_end(&coder->lz, allocator); - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->lz = LZMA_LZ_ENCODER_INIT; - } - - // Validate options that aren't validated elsewhere. - const lzma_options_lzma *options = filters[0].options; - if (options->pos_bits > LZMA_POS_BITS_MAX - || options->fast_bytes < LZMA_FAST_BYTES_MIN - || options->fast_bytes > LZMA_FAST_BYTES_MAX) { - lzma_lzma_encoder_end(next->coder, allocator); - return LZMA_HEADER_ERROR; - } - - // Set compression mode. - switch (options->mode) { - case LZMA_MODE_FAST: - next->coder->best_compression = false; - break; - - case LZMA_MODE_BEST: - next->coder->best_compression = true; - break; - - default: - lzma_lzma_encoder_end(next->coder, allocator); - return LZMA_HEADER_ERROR; - } - - // Initialize literal coder. - { - const lzma_ret ret = lzma_literal_init( - &next->coder->literal_coder, - options->literal_context_bits, - options->literal_pos_bits); - if (ret != LZMA_OK) - return ret; - } - - // Initialize LZ encoder. - { - const lzma_ret ret = lzma_lz_encoder_reset( - &next->coder->lz, allocator, &lzma_lzma_encode, - options->dictionary_size, OPTS, - options->fast_bytes, MATCH_MAX_LEN + 1 + OPTS, - options->match_finder, - options->match_finder_cycles, - options->preset_dictionary, - options->preset_dictionary_size); - if (ret != LZMA_OK) { - lzma_lzma_encoder_end(next->coder, allocator); - return ret; - } - } - - // Set dist_table_size. - { - // Round the dictionary size up to next 2^n. - uint32_t log_size; - for (log_size = 0; (UINT32_C(1) << log_size) - < options->dictionary_size; ++log_size) ; - - next->coder->dist_table_size = log_size * 2; - } - - // Misc FIXME desc - next->coder->align_price_count = UINT32_MAX; - next->coder->match_price_count = UINT32_MAX; - next->coder->dictionary_size = options->dictionary_size; - next->coder->pos_mask = (1U << options->pos_bits) - 1; - next->coder->fast_bytes = options->fast_bytes; - - // Range coder - rc_reset(&next->coder->rc); - - // State - next->coder->state = 0; - next->coder->previous_byte = 0; - for (size_t i = 0; i < REP_DISTANCES; ++i) - next->coder->reps[i] = 0; - - // Bit encoders - for (size_t i = 0; i < STATES; ++i) { - for (size_t j = 0; j <= next->coder->pos_mask; ++j) { - bit_reset(next->coder->is_match[i][j]); - bit_reset(next->coder->is_rep0_long[i][j]); - } - - bit_reset(next->coder->is_rep[i]); - bit_reset(next->coder->is_rep0[i]); - bit_reset(next->coder->is_rep1[i]); - bit_reset(next->coder->is_rep2[i]); - } - - for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) - bit_reset(next->coder->pos_encoders[i]); - - // Bit tree encoders - for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) - bittree_reset(next->coder->pos_slot_encoder[i], POS_SLOT_BITS); - - bittree_reset(next->coder->pos_align_encoder, ALIGN_BITS); - - // Length encoders - length_encoder_reset(&next->coder->match_len_encoder, - 1U << options->pos_bits, - options->fast_bytes + 1 - MATCH_MIN_LEN); - - length_encoder_reset(&next->coder->rep_len_encoder, - 1U << options->pos_bits, - next->coder->fast_bytes + 1 - MATCH_MIN_LEN); - - next->coder->prev_len_encoder = NULL; - - // Misc - next->coder->longest_match_was_found = false; - next->coder->optimum_end_index = 0; - next->coder->optimum_current_index = 0; - next->coder->additional_offset = 0; - - next->coder->now_pos = 0; - next->coder->is_initialized = false; - next->coder->is_flushed = false, - next->coder->write_eopm = true; - - // Initialize the next decoder in the chain, if any. - { - const lzma_ret ret = lzma_next_filter_init(&next->coder->next, - allocator, filters + 1); - if (ret != LZMA_OK) { - lzma_lzma_encoder_end(next->coder, allocator); - return ret; - } - } - - // Initialization successful. Set the function pointers. - next->code = &lzma_lz_encode; - next->end = &lzma_lzma_encoder_end; - - return LZMA_OK; -} - - -extern bool -lzma_lzma_encode_properties(const lzma_options_lzma *options, uint8_t *byte) -{ - if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX - || options->literal_pos_bits - > LZMA_LITERAL_POS_BITS_MAX - || options->pos_bits > LZMA_POS_BITS_MAX - || options->literal_context_bits - + options->literal_pos_bits - > LZMA_LITERAL_BITS_MAX) - return true; - - *byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9 - + options->literal_context_bits; - assert(*byte <= (4 * 5 + 4) * 9 + 8); - - return false; -} diff --git a/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/liblzma/lzma/lzma_encoder_optimum_fast.c new file mode 100644 index 00000000..9da7e79e --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_optimum_fast.c @@ -0,0 +1,193 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_fast.c +// +// Copyright (C) 1999-2008 Igor Pavlov +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" + + +#define change_pair(small_dist, big_dist) \ + (((big_dist) >> 7) > (small_dist)) + + +static inline void +literal(const lzma_coder *restrict coder, const uint8_t *restrict buf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + // Try short rep0 instead of always coding it as a literal. + *back_res = *buf == *(buf - coder->reps[0] - 1) ? 0 : UINT32_MAX; + *len_res = 1; + return; +} + + +extern void +lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + const uint32_t fast_bytes = mf->find_len_max; + + uint32_t len_main; + uint32_t matches_count; + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint8_t *buf = mf_ptr(mf) - 1; + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + + if (buf_avail < 2) { + // There's not enough input left to encode a match. + literal(coder, buf, back_res, len_res); + return; + } + + // Look for repeated matches; scan the previous four match distances + uint32_t rep_len = 0; + uint32_t rep_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + // Pointer to the beginning of the match candidate + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + // If the first two bytes (2 == MATCH_LEN_MIN) do not match, + // this rep is not useful. + if (not_equal_16(buf, buf_back)) + continue; + + // The first two bytes matched. + // Calculate the length of the match. + uint32_t len; + for (len = 2; len < buf_avail + && buf[len] == buf_back[len]; ++len) ; + + // If we have found a repeated match that is at least + // fast_bytes long, return it immediatelly. + if (len >= fast_bytes) { + *back_res = i; + *len_res = len; + mf_skip(mf, len - 1); + return; + } + + if (len > rep_len) { + rep_index = i; + rep_len = len; + } + } + + // We didn't find a long enough repeated match. Encode it as a normal + // match if the match length is at least fast_bytes. + if (len_main >= fast_bytes) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return; + } + + uint32_t back_main = 0; + if (len_main >= 2) { + back_main = coder->matches[matches_count - 1].dist; + + while (matches_count > 1 && len_main == + coder->matches[matches_count - 2].len + 1) { + if (!change_pair(coder->matches[ + matches_count - 2].dist, + back_main)) + break; + + --matches_count; + len_main = coder->matches[matches_count - 1].len; + back_main = coder->matches[matches_count - 1].dist; + } + + if (len_main == 2 && back_main >= 0x80) + len_main = 1; + } + + if (rep_len >= 2) { + if (rep_len + 1 >= len_main + || (rep_len + 2 >= len_main + && back_main > (UINT32_C(1) << 9)) + || (rep_len + 3 >= len_main + && back_main > (UINT32_C(1) << 15))) { + *back_res = rep_index; + *len_res = rep_len; + mf_skip(mf, rep_len - 1); + return; + } + } + + if (len_main < 2 || buf_avail <= 2) { + literal(coder, buf, back_res, len_res); + return; + } + + // Get the matches for the next byte. If we find a better match, + // the current byte is encoded as a literal. + coder->longest_match_length = mf_find(mf, + &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= 2) { + const uint32_t new_dist = coder->matches[ + coder->matches_count - 1].dist; + + if ((coder->longest_match_length >= len_main + && new_dist < back_main) + || (coder->longest_match_length == len_main + 1 + && !change_pair(back_main, new_dist)) + || (coder->longest_match_length > len_main + 1) + || (coder->longest_match_length + 1 >= len_main + && len_main >= 3 + && change_pair(new_dist, back_main))) { + literal(coder, buf, back_res, len_res); + return; + } + } + + // In contrast to LZMA SDK, dictionary could not have been moved + // between mf_find() calls, thus it is safe to just increment + // the old buf pointer instead of recalculating it with mf_ptr(). + ++buf; + + const uint32_t limit = len_main - 1; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len; + for (len = 2; len < limit + && buf[len] == buf_back[len]; ++len) ; + + if (len >= limit) { + literal(coder, buf - 1, back_res, len_res); + return; + } + } + + *back_res = back_main + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 2); + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/liblzma/lzma/lzma_encoder_optimum_normal.c new file mode 100644 index 00000000..f0dd92c9 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_optimum_normal.c @@ -0,0 +1,875 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_normal.c +// +// Copyright (C) 1999-2008 Igor Pavlov +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +//////////// +// Prices // +//////////// + +static uint32_t +get_literal_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t prev_byte, const bool match_mode, + uint32_t match_byte, uint32_t symbol) +{ + const probability *const subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + pos, prev_byte); + + uint32_t price = 0; + + if (!match_mode) { + price = rc_bittree_price(subcoder, 8, symbol); + } else { + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + price += rc_bit_price(subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); + } + + return price; +} + + +static inline uint32_t +get_len_price(const lzma_length_encoder *const lencoder, + const uint32_t len, const uint32_t pos_state) +{ + // NOTE: Unlike the other price tables, length prices are updated + // in lzma_encoder.c + return lencoder->prices[pos_state][len - MATCH_LEN_MIN]; +} + + +static inline uint32_t +get_short_rep_price(const lzma_coder *const coder, + const lzma_lzma_state state, const uint32_t pos_state) +{ + return rc_bit_0_price(coder->is_rep0[state]) + + rc_bit_0_price(coder->is_rep0_long[state][pos_state]); +} + + +static inline uint32_t +get_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const lzma_lzma_state state, uint32_t pos_state) +{ + uint32_t price; + + if (rep_index == 0) { + price = rc_bit_0_price(coder->is_rep0[state]); + price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]); + } else { + price = rc_bit_1_price(coder->is_rep0[state]); + + if (rep_index == 1) { + price += rc_bit_0_price(coder->is_rep1[state]); + } else { + price += rc_bit_1_price(coder->is_rep1[state]); + price += rc_bit_price(coder->is_rep2[state], + rep_index - 2); + } + } + + return price; +} + + +static inline uint32_t +get_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const uint32_t len, const lzma_lzma_state state, + const uint32_t pos_state) +{ + return get_len_price(&coder->rep_len_encoder, len, pos_state) + + get_pure_rep_price(coder, rep_index, state, pos_state); +} + + +static inline uint32_t +get_pos_len_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t len, const uint32_t pos_state) +{ + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + uint32_t price; + + if (pos < FULL_DISTANCES) { + price = coder->distances_prices[len_to_pos_state][pos]; + } else { + const uint32_t pos_slot = get_pos_slot_2(pos); + price = coder->pos_slot_prices[len_to_pos_state][pos_slot] + + coder->align_prices[pos & ALIGN_MASK]; + } + + price += get_len_price(&coder->match_len_encoder, len, pos_state); + + return price; +} + + +static void +fill_distances_prices(lzma_coder *coder) +{ + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) { + + uint32_t *const pos_slot_prices + = coder->pos_slot_prices[len_to_pos_state]; + + // Price to encode the pos_slot. + for (uint32_t pos_slot = 0; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] = rc_bittree_price( + coder->pos_slot[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + // For matches with distance >= FULL_DISTANCES, add the price + // of the direct bits part of the match distance. (Align bits + // are handled by fill_align_prices()). + for (uint32_t pos_slot = END_POS_MODEL_INDEX; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] += rc_direct_price( + ((pos_slot >> 1) - 1) - ALIGN_BITS); + + // Distances in the range [0, 3] are fully encoded with + // pos_slot, so they are used for coder->distances_prices + // as is. + for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i) + coder->distances_prices[len_to_pos_state][i] + = pos_slot_prices[i]; + } + + // Distances in the range [4, 127] depend on pos_slot and pos_special. + // We do this in a loop separate from the above loop to avoid + // redundant calls to get_pos_slot(). + for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { + const uint32_t pos_slot = get_pos_slot(i); + const uint32_t footer_bits = ((pos_slot >> 1) - 1); + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t price = rc_bittree_reverse_price( + coder->pos_special + base - pos_slot - 1, + footer_bits, i - base); + + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) + coder->distances_prices[len_to_pos_state][i] + = price + coder->pos_slot_prices[ + len_to_pos_state][pos_slot]; + } + + coder->match_price_count = 0; + return; +} + + +static void +fill_align_prices(lzma_coder *coder) +{ + for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) + coder->align_prices[i] = rc_bittree_reverse_price( + coder->pos_align, ALIGN_BITS, i); + + coder->align_price_count = 0; + return; +} + + +///////////// +// Optimal // +///////////// + +static inline void +make_literal(lzma_optimal *optimal) +{ + optimal->back_prev = UINT32_MAX; + optimal->prev_1_is_literal = false; +} + + +static inline void +make_short_rep(lzma_optimal *optimal) +{ + optimal->back_prev = 0; + optimal->prev_1_is_literal = false; +} + + +#define is_short_rep(optimal) \ + ((optimal).back_prev == 0) + + +static void +backward(lzma_coder *restrict coder, uint32_t *restrict len_res, + uint32_t *restrict back_res, uint32_t cur) +{ + coder->opts_end_index = cur; + + uint32_t pos_mem = coder->opts[cur].pos_prev; + uint32_t back_mem = coder->opts[cur].back_prev; + + do { + if (coder->opts[cur].prev_1_is_literal) { + make_literal(&coder->opts[pos_mem]); + coder->opts[pos_mem].pos_prev = pos_mem - 1; + + if (coder->opts[cur].prev_2) { + coder->opts[pos_mem - 1].prev_1_is_literal + = false; + coder->opts[pos_mem - 1].pos_prev + = coder->opts[cur].pos_prev_2; + coder->opts[pos_mem - 1].back_prev + = coder->opts[cur].back_prev_2; + } + } + + const uint32_t pos_prev = pos_mem; + const uint32_t back_cur = back_mem; + + back_mem = coder->opts[pos_prev].back_prev; + pos_mem = coder->opts[pos_prev].pos_prev; + + coder->opts[pos_prev].back_prev = back_cur; + coder->opts[pos_prev].pos_prev = cur; + cur = pos_prev; + + } while (cur != 0); + + coder->opts_current_index = coder->opts[0].pos_prev; + *len_res = coder->opts[0].pos_prev; + *back_res = coder->opts[0].back_prev; + + return; +} + + +////////// +// Main // +////////// + +static inline uint32_t +helper1(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + const uint32_t fast_bytes = mf->find_len_max; + + uint32_t len_main; + uint32_t matches_count; + + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + if (buf_avail < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + const uint8_t *const buf = mf_ptr(mf) - 1; + + uint32_t rep_lens[REP_DISTANCES]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) { + rep_lens[i] = 0; + continue; + } + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + rep_lens[i] = len_test; + if (len_test > rep_lens[rep_max_index]) + rep_max_index = i; + } + + if (rep_lens[rep_max_index] >= fast_bytes) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + mf_skip(mf, *len_res - 1); + return UINT32_MAX; + } + + + if (len_main >= fast_bytes) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return UINT32_MAX; + } + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - coder->reps[0] - 1); + + if (len_main < 2 && current_byte != match_byte + && rep_lens[rep_max_index] < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[0].state = coder->state; + + const uint32_t pos_state = position & coder->pos_mask; + + coder->opts[1].price = rc_bit_0_price( + coder->is_match[coder->state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(coder->state), + match_byte, current_byte); + + make_literal(&coder->opts[1]); + + const uint32_t match_price = rc_bit_1_price( + coder->is_match[coder->state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[coder->state]); + + if (match_byte == current_byte) { + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price( + coder, coder->state, pos_state); + + if (short_rep_price < coder->opts[1].price) { + coder->opts[1].price = short_rep_price; + make_short_rep(&coder->opts[1]); + } + } + + const uint32_t len_end = MAX(len_main, rep_lens[rep_max_index]); + + if (len_end < 2) { + *back_res = coder->opts[1].back_prev; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[1].pos_prev = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[0].backs[i] = coder->reps[i]; + + uint32_t len = len_end; + do { + coder->opts[len].price = RC_INFINITY_PRICE; + } while (--len >= 2); + + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + uint32_t rep_len = rep_lens[i]; + if (rep_len < 2) + continue; + + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, i, coder->state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price( + &coder->rep_len_encoder, + rep_len, pos_state); + + if (cur_and_len_price < coder->opts[rep_len].price) { + coder->opts[rep_len].price = cur_and_len_price; + coder->opts[rep_len].pos_prev = 0; + coder->opts[rep_len].back_prev = i; + coder->opts[rep_len].prev_1_is_literal = false; + } + } while (--rep_len >= 2); + } + + + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[coder->state]); + + len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2; + if (len <= len_main) { + uint32_t i = 0; + while (len > coder->matches[i].len) + ++i; + + for(; ; ++len) { + const uint32_t dist = coder->matches[i].dist; + const uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + dist, len, pos_state); + + if (cur_and_len_price < coder->opts[len].price) { + coder->opts[len].price = cur_and_len_price; + coder->opts[len].pos_prev = 0; + coder->opts[len].back_prev + = dist + REP_DISTANCES; + coder->opts[len].prev_1_is_literal = false; + } + + if (len == coder->matches[i].len) + if (++i == matches_count) + break; + } + } + + return len_end; +} + + +static inline uint32_t +helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf, + uint32_t len_end, uint32_t position, const uint32_t cur, + const uint32_t fast_bytes, const uint32_t buf_avail_full) +{ + uint32_t matches_count = coder->matches_count; + uint32_t new_len = coder->longest_match_length; + uint32_t pos_prev = coder->opts[cur].pos_prev; + uint32_t state; + + if (coder->opts[cur].prev_1_is_literal) { + --pos_prev; + + if (coder->opts[cur].prev_2) { + state = coder->opts[coder->opts[cur].pos_prev_2].state; + + if (coder->opts[cur].back_prev_2 < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + + } else { + state = coder->opts[pos_prev].state; + } + + update_literal(state); + + } else { + state = coder->opts[pos_prev].state; + } + + if (pos_prev == cur - 1) { + if (is_short_rep(coder->opts[cur])) + update_short_rep(state); + else + update_literal(state); + } else { + uint32_t pos; + if (coder->opts[cur].prev_1_is_literal + && coder->opts[cur].prev_2) { + pos_prev = coder->opts[cur].pos_prev_2; + pos = coder->opts[cur].back_prev_2; + update_long_rep(state); + } else { + pos = coder->opts[cur].back_prev; + if (pos < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + } + + if (pos < REP_DISTANCES) { + reps[0] = coder->opts[pos_prev].backs[pos]; + + uint32_t i; + for (i = 1; i <= pos; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + + for (; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i]; + + } else { + reps[0] = pos - REP_DISTANCES; + + for (uint32_t i = 1; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + } + } + + coder->opts[cur].state = state; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[cur].backs[i] = reps[i]; + + const uint32_t cur_price = coder->opts[cur].price; + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - reps[0] - 1); + + const uint32_t pos_state = position & coder->pos_mask; + + const uint32_t cur_and_1_price = cur_price + + rc_bit_0_price(coder->is_match[state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(state), match_byte, current_byte); + + bool next_is_literal = false; + + if (cur_and_1_price < coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = cur_and_1_price; + coder->opts[cur + 1].pos_prev = cur; + make_literal(&coder->opts[cur + 1]); + next_is_literal = true; + } + + const uint32_t match_price = cur_price + + rc_bit_1_price(coder->is_match[state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[state]); + + if (match_byte == current_byte + && !(coder->opts[cur + 1].pos_prev < cur + && coder->opts[cur + 1].back_prev == 0)) { + + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price(coder, state, pos_state); + + if (short_rep_price <= coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = short_rep_price; + coder->opts[cur + 1].pos_prev = cur; + make_short_rep(&coder->opts[cur + 1]); + next_is_literal = true; + } + } + + if (buf_avail_full < 2) + return len_end; + + const uint32_t buf_avail = MIN(buf_avail_full, fast_bytes); + + if (!next_is_literal && match_byte != current_byte) { // speed optimization + // try literal + rep0 + const uint8_t *const buf_back = buf - reps[0] - 1; + const uint32_t limit = MIN(buf_avail_full, fast_bytes + 1); + + uint32_t len_test = 1; + while (len_test < limit && buf[len_test] == buf_back[len_test]) + ++len_test; + + --len_test; + + if (len_test >= 2) { + uint32_t state_2 = state; + update_literal(state_2); + + const uint32_t pos_state_next = (position + 1) & coder->pos_mask; + const uint32_t next_rep_match_price = cur_and_1_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for (; len_test >= 2; --len_test) { + const uint32_t offset = cur + 1 + len_test; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = false; + } + //} + } + } + + + uint32_t start_len = 2; // speed optimization + + for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { + const uint8_t *const buf_back = buf - reps[rep_index] - 1; + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + while (len_end < cur + len_test) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t len_test_temp = len_test; + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, rep_index, state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev = rep_index; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + } while (--len_test >= 2); + + len_test = len_test_temp; + + if (rep_index == 0) + start_len = len_test + 1; + + + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + fast_bytes); + for (; len_test_2 < limit + && buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_long_rep(state_2); + + uint32_t pos_state_next = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state) + + rc_bit_0_price(coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, position + len_test, + buf[len_test - 1], true, + buf_back[len_test], buf[len_test]); + + update_literal(state_2); + + pos_state_next = (position + len_test + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price = cur_and_len_literal_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for(; len_test_2 >= 2; len_test_2--) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 = rep_index; + } + //} + } + } + + + //for (uint32_t len_test = 2; len_test <= new_len; ++len_test) + if (new_len > buf_avail) { + new_len = buf_avail; + + matches_count = 0; + while (new_len > coder->matches[matches_count].len) + ++matches_count; + + coder->matches[matches_count++].len = new_len; + } + + + if (new_len >= start_len) { + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[state]); + + while (len_end < cur + new_len) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + uint32_t i = 0; + while (start_len > coder->matches[i].len) + ++i; + + for (uint32_t len_test = start_len; ; ++len_test) { + const uint32_t cur_back = coder->matches[i].dist; + uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + cur_back, len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev + = cur_back + REP_DISTANCES; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + + if (len_test == coder->matches[i].len) { + // Try Match + Literal + Rep0 + const uint8_t *const buf_back = buf - cur_back - 1; + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + fast_bytes); + + for (; len_test_2 < limit && + buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_match(state_2); + uint32_t pos_state_next + = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = cur_and_len_price + + rc_bit_0_price( + coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, + position + len_test, + buf[len_test - 1], + true, + buf_back[len_test], + buf[len_test]); + + update_literal(state_2); + pos_state_next = (pos_state_next + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price + = cur_and_len_literal_price + + rc_bit_1_price( + coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + // for(; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 + = cur_back + REP_DISTANCES; + } + //} + } + + if (++i == matches_count) + break; + } + } + } + + return len_end; +} + + +extern void +lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + // If we have symbols pending, return the next pending symbol. + if (coder->opts_end_index != coder->opts_current_index) { + assert(mf->read_ahead > 0); + *len_res = coder->opts[coder->opts_current_index].pos_prev + - coder->opts_current_index; + *back_res = coder->opts[coder->opts_current_index].back_prev; + coder->opts_current_index = coder->opts[ + coder->opts_current_index].pos_prev; + return; + } + + // Update the price tables. In LZMA SDK <= 4.60 (and possibly later) + // this was done in both initialization function and in the main loop. + // In liblzma they were moved into this single place. + if (mf->read_ahead == 0) { + if (coder->match_price_count >= (1 << 7)) + fill_distances_prices(coder); + + if (coder->align_price_count >= ALIGN_TABLE_SIZE) + fill_align_prices(coder); + } + + // TODO: This needs quite a bit of cleaning still. But splitting + // the oroginal function to two pieces makes it at least a little + // more readable, since those two parts don't share many variables. + + uint32_t len_end = helper1(coder, mf, back_res, len_res, position); + if (len_end == UINT32_MAX) + return; + + uint32_t reps[REP_DISTANCES]; + memcpy(reps, coder->reps, sizeof(reps)); + + uint32_t cur; + for (cur = 1; cur < len_end; ++cur) { + assert(cur < OPTS); + + coder->longest_match_length = mf_find( + mf, &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= mf->find_len_max) + break; + + len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end, + position + cur, cur, mf->find_len_max, + MIN(mf_avail(mf) + 1, OPTS - 1 - cur)); + } + + backward(coder, len_res, back_res, cur); + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_presets.c b/src/liblzma/lzma/lzma_encoder_presets.c index 966c7c86..08f339e9 100644 --- a/src/liblzma/lzma/lzma_encoder_presets.c +++ b/src/liblzma/lzma/lzma_encoder_presets.c @@ -20,15 +20,47 @@ #include "common.h" +#define pow2(e) (UINT32_C(1) << (e)) + + LZMA_API const lzma_options_lzma lzma_preset_lzma[9] = { -// dictionary_size lc lp pb mode fb mf mfc -{ UINT32_C(1) << 16, 3, 0, 2, NULL, 0, LZMA_MODE_FAST, 64, LZMA_MF_HC3, 0 }, -{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_FAST, 64, LZMA_MF_HC4, 0 }, -{ UINT32_C(1) << 19, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 64, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 64, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 21, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 22, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 23, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 24, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 25, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 }, +// dict lc lp pb mode fb mf mfc +{ pow2(16), NULL, 0, 3, 0, 2, false, LZMA_MODE_FAST, 64, LZMA_MF_HC3, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(20), NULL, 0, 3, 0, 0, false, LZMA_MODE_FAST, 64, LZMA_MF_HC4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(19), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 64, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(20), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 64, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(21), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(22), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(23), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(24), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 273, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(25), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 273, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, }; + + +/* +extern LZMA_API lzma_bool +lzma_preset_lzma(lzma_options_lzma *options, uint32_t level) +{ + *options = (lzma_options_lzma){ + + }; + + options->literal_context_bits = LZMA_LITERAL_CONTEXT_BITS_DEFAULT + options->literal_pos_bits = LZMA_LITERAL_POS_BITS_DEFAULT; + options->pos_bits = LZMA_POS_BITS_DEFAULT; + options->preset_dictionary = NULL; + options->preset_dictionary_size = 0; + options->persistent = false; + + options->mode = level <= 2 ? LZMA_MODE_FAST : LZMA_MODE_NORMAL; + options->fast_bytes = level <= + + options->match_finder = level == 1 ? LZMA_MF_HC3 + : (level == 2 ? LZMA_MF_HC4 : LZMA_MF_BT4); + options->match_finder_cycles = 0; + + + + options->dictionary_size = +} +*/ diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h index a16051f8..7533bc79 100644 --- a/src/liblzma/lzma/lzma_encoder_private.h +++ b/src/liblzma/lzma/lzma_encoder_private.h @@ -21,20 +21,27 @@ #ifndef LZMA_LZMA_ENCODER_PRIVATE_H #define LZMA_LZMA_ENCODER_PRIVATE_H -#include "lzma_encoder.h" -#include "lzma_common.h" #include "lz_encoder.h" #include "range_encoder.h" +#include "lzma_common.h" +#include "lzma_encoder.h" + +// Macro to compare if the first two bytes in two buffers differ. This is +// needed in lzma_lzma_optimum_*() to test if the match is at least +// MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no +// reason to not use it when it is supported. +#ifdef HAVE_FAST_UNALIGNED_ACCESS +# define not_equal_16(a, b) \ + (*(const uint16_t *)(a) != *(const uint16_t *)(b)) +#else +# define not_equal_16(a, b) \ + ((a)[0] != (b)[0] || (a)[1] != (b)[1]) +#endif -#define move_pos(num) \ -do { \ - assert((int32_t)(num) >= 0); \ - if ((num) != 0) { \ - coder->additional_offset += num; \ - coder->lz.skip(&coder->lz, num); \ - } \ -} while (0) + +// Optimal - Number of entries in the optimum array. +#define OPTS (1 << 12) typedef struct { @@ -54,7 +61,7 @@ typedef struct { typedef struct { lzma_lzma_state state; - bool prev_1_is_char; + bool prev_1_is_literal; bool prev_2; uint32_t pos_prev_2; @@ -70,132 +77,79 @@ typedef struct { struct lzma_coder_s { - // Next coder in the chain - lzma_next_coder next; - - // In window and match finder - lzma_lz_encoder lz; - - // Range encoder + /// Range encoder lzma_range_encoder rc; - // State + /// State lzma_lzma_state state; - uint8_t previous_byte; + + /// The four most recent match distances uint32_t reps[REP_DISTANCES]; - // Misc - uint32_t match_distances[MATCH_MAX_LEN * 2 + 2 + 1]; - uint32_t num_distance_pairs; - uint32_t additional_offset; - uint32_t now_pos; // Lowest 32 bits are enough here. - bool best_compression; ///< True when LZMA_MODE_BEST is used + /// Array of match candidates + lzma_match matches[MATCH_LEN_MAX + 1]; + + /// Number of match candidates in matches[] + uint32_t matches_count; + + /// Varibale to hold the length of the longest match between calls + /// to lzma_lzma_optimum_*(). + uint32_t longest_match_length; + + /// True if using getoptimumfast + bool fast_mode; + + /// True if the encoder has been initialized by encoding the first + /// byte as a literal. bool is_initialized; + + /// True if the range encoder has been flushed, but not all bytes + /// have been written to the output buffer yet. bool is_flushed; - bool write_eopm; - // Literal encoder - lzma_literal_coder literal_coder; + uint32_t pos_mask; ///< (1 << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; - // Bit encoders + // These are the same as in lzma_decoder.c. See comments there. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; probability is_match[STATES][POS_STATES_MAX]; probability is_rep[STATES]; probability is_rep0[STATES]; probability is_rep1[STATES]; probability is_rep2[STATES]; probability is_rep0_long[STATES][POS_STATES_MAX]; - probability pos_encoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_align[ALIGN_TABLE_SIZE]; - // Bit Tree Encoders - probability pos_slot_encoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; - probability pos_align_encoder[1 << ALIGN_BITS]; - - // Length encoders + // These are the same as in lzma_decoder.c except that the encoders + // include also price tables. lzma_length_encoder match_len_encoder; lzma_length_encoder rep_len_encoder; - lzma_length_encoder *prev_len_encoder; - // Optimal - lzma_optimal optimum[OPTS]; - uint32_t optimum_end_index; - uint32_t optimum_current_index; - uint32_t longest_match_length; - bool longest_match_was_found; - - // Prices - uint32_t pos_slot_prices[LEN_TO_POS_STATES][DIST_TABLE_SIZE_MAX]; + // Price tables + uint32_t pos_slot_prices[LEN_TO_POS_STATES][POS_SLOTS]; uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES]; - uint32_t align_prices[ALIGN_TABLE_SIZE]; - uint32_t align_price_count; uint32_t dist_table_size; uint32_t match_price_count; - // LZMA specific settings - uint32_t dictionary_size; ///< Size in bytes - uint32_t fast_bytes; - uint32_t pos_state_bits; - uint32_t pos_mask; ///< (1 << pos_state_bits) - 1 -}; - - -extern void lzma_length_encoder_update_table(lzma_length_encoder *lencoder, - const uint32_t pos_state); + uint32_t align_prices[ALIGN_TABLE_SIZE]; + uint32_t align_price_count; -extern bool lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size); + // Optimal + uint32_t opts_end_index; + uint32_t opts_current_index; + lzma_optimal opts[OPTS]; +}; -extern void lzma_get_optimum(lzma_coder *restrict coder, - uint32_t *restrict back_res, uint32_t *restrict len_res); -extern void lzma_get_optimum_fast(lzma_coder *restrict coder, +extern void lzma_lzma_optimum_fast( + lzma_coder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res); - -// NOTE: Don't add 'restrict'. -static inline void -lzma_read_match_distances(lzma_coder *coder, - uint32_t *len_res, uint32_t *num_distance_pairs) -{ - *len_res = 0; - - coder->lz.get_matches(&coder->lz, coder->match_distances); - - *num_distance_pairs = coder->match_distances[0]; - - if (*num_distance_pairs > 0) { - *len_res = coder->match_distances[*num_distance_pairs - 1]; - assert(*len_res <= MATCH_MAX_LEN); - - if (*len_res == coder->fast_bytes) { - uint32_t offset = *len_res - 1; - const uint32_t distance = coder->match_distances[ - *num_distance_pairs] + 1; - uint32_t limit = MATCH_MAX_LEN - *len_res; - - assert(offset + limit < coder->lz.keep_size_after); - assert(coder->lz.read_pos <= coder->lz.write_pos); - - // If we are close to end of the stream, we may need - // to limit the length of the match. - if (coder->lz.write_pos - coder->lz.read_pos - < offset + limit) - limit = coder->lz.write_pos - - (coder->lz.read_pos + offset); - - offset += coder->lz.read_pos; - uint32_t i = 0; - while (i < limit && coder->lz.buffer[offset + i] - == coder->lz.buffer[ - offset + i - distance]) - ++i; - - *len_res += i; - } - } - - ++coder->additional_offset; - - return; -} +extern void lzma_lzma_optimum_normal(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint32_t *restrict back_res, + uint32_t *restrict len_res, uint32_t position); #endif diff --git a/src/liblzma/lzma/lzma_literal.c b/src/liblzma/lzma/lzma_literal.c deleted file mode 100644 index 3611a1f7..00000000 --- a/src/liblzma/lzma/lzma_literal.c +++ /dev/null @@ -1,51 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_literal.c -/// \brief Literal Coder -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "lzma_literal.h" - - -extern lzma_ret -lzma_literal_init(lzma_literal_coder *coder, - uint32_t literal_context_bits, uint32_t literal_pos_bits) -{ - // Verify that arguments are sane. - if (literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX - || literal_pos_bits > LZMA_LITERAL_POS_BITS_MAX) - return LZMA_HEADER_ERROR; - - // Calculate the number of states the literal coder must store. - const uint32_t states = literal_states( - literal_pos_bits, literal_context_bits); - - // Store the new settings. - coder->literal_context_bits = literal_context_bits; - coder->literal_pos_bits = literal_pos_bits; - - // Calculate also the literal_pos_mask. It's not changed - // anywhere else than here. - coder->literal_pos_mask = (1 << literal_pos_bits) - 1; - - // Reset the literal coder. - for (uint32_t i = 0; i < states; ++i) - for (uint32_t j = 0; j < LIT_SIZE; ++j) - bit_reset(coder->coders[i][j]); - - return LZMA_OK; -} diff --git a/src/liblzma/lzma/lzma_literal.h b/src/liblzma/lzma/lzma_literal.h deleted file mode 100644 index 208abd99..00000000 --- a/src/liblzma/lzma/lzma_literal.h +++ /dev/null @@ -1,71 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_literal.h -/// \brief Literal Coder -/// -/// This is used as is by both LZMA encoder and decoder. -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_LITERAL_H -#define LZMA_LITERAL_H - -#include "common.h" - -// We need typedef of `probability'. -#include "range_common.h" - - -/// Each literal coder is divided in three sections: -/// - 0x001-0x0FF: Without match byte -/// - 0x101-0x1FF: With match byte; match bit is 0 -/// - 0x201-0x2FF: With match byte; match bit is 1 -#define LIT_SIZE 0x300 - -/// Calculate how many states are needed. Each state has -/// LIT_SIZE `probability' variables. -#define literal_states(literal_context_bits, literal_pos_bits) \ - (1U << ((literal_context_bits) + (literal_pos_bits))) - -/// Locate the literal coder for the next literal byte. The choice depends on -/// - the lowest literal_pos_bits bits of the position of the current -/// byte; and -/// - the highest literal_context_bits bits of the previous byte. -#define literal_get_subcoder(literal_coder, pos, prev_byte) \ - (literal_coder).coders[(((pos) & (literal_coder).literal_pos_mask) \ - << (literal_coder).literal_context_bits) \ - + ((prev_byte) >> (8 - (literal_coder).literal_context_bits))] - - -typedef struct { - uint32_t literal_context_bits; - uint32_t literal_pos_bits; - - /// literal_pos_mask is always (1 << literal_pos_bits) - 1. - uint32_t literal_pos_mask; - - /// There are (1 << (literal_pos_bits + literal_context_bits)) - /// literal coders. - probability coders[1 << LZMA_LITERAL_BITS_MAX][LIT_SIZE]; - -} lzma_literal_coder; - - -extern lzma_ret lzma_literal_init( - lzma_literal_coder *coder, - uint32_t literal_context_bits, uint32_t literal_pos_bits); - -#endif -- cgit v1.2.3