diff options
Diffstat (limited to 'src/liblzma/lzma/lzma_decoder.c')
-rw-r--r-- | src/liblzma/lzma/lzma_decoder.c | 1306 |
1 files changed, 791 insertions, 515 deletions
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index 68941021..e9d047d3 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -4,7 +4,7 @@ /// \brief LZMA decoder // // Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -18,74 +18,147 @@ // /////////////////////////////////////////////////////////////////////////////// -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - +#include "lz_decoder.h" #include "lzma_common.h" #include "lzma_decoder.h" -#include "lz_decoder.h" #include "range_decoder.h" -/// REQUIRED_IN_BUFFER_SIZE is the number of required input bytes -/// for the worst case: longest match with longest distance. -/// LZMA_IN_BUFFER_SIZE must be larger than REQUIRED_IN_BUFFER_SIZE. -/// 23 bits = 2 (match select) + 10 (len) + 6 (distance) + 4 (align) -/// + 1 (rc_normalize) -/// -/// \todo Is this correct for sure? -/// -#define REQUIRED_IN_BUFFER_SIZE \ - ((23 * (BIT_MODEL_TOTAL_BITS - MOVE_BITS + 1) + 26 + 9) / 8) +#ifdef HAVE_SMALL +// Macros for (somewhat) size-optimized code. +#define seq_4(seq) seq -// Length decoders are easiest to have as macros, because they use range -// decoder macros, which use local variables rc_range and rc_code. +#define seq_6(seq) seq -#define length_decode(target, len_decoder, pos_state) \ +#define seq_8(seq) seq + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _CHOICE2, \ + seq ## _BITTREE + +#define len_decode(target, ld, pos_state, seq) \ do { \ - if_bit_0(len_decoder.choice) { \ - update_bit_0(len_decoder.choice); \ - target = MATCH_MIN_LEN; \ - bittree_decode(target, len_decoder.low[pos_state], LEN_LOW_BITS); \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ } else { \ - update_bit_1(len_decoder.choice); \ - if_bit_0(len_decoder.choice2) { \ - update_bit_0(len_decoder.choice2); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ - bittree_decode(target, len_decoder.mid[pos_state], LEN_MID_BITS); \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ - update_bit_1(len_decoder.choice2); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ - bittree_decode(target, len_decoder.high, LEN_HIGH_BITS); \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ } \ } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ } while (0) - -#define length_decode_dummy(target, len_decoder, pos_state) \ +#else // HAVE_SMALL + +// Unrolled versions +#define seq_4(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3 + +#define seq_6(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5 + +#define seq_8(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5, \ + seq ## 6, \ + seq ## 7 + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _LOW0, \ + seq ## _LOW1, \ + seq ## _LOW2, \ + seq ## _CHOICE2, \ + seq ## _MID0, \ + seq ## _MID1, \ + seq ## _MID2, \ + seq ## _HIGH0, \ + seq ## _HIGH1, \ + seq ## _HIGH2, \ + seq ## _HIGH3, \ + seq ## _HIGH4, \ + seq ## _HIGH5, \ + seq ## _HIGH6, \ + seq ## _HIGH7 + +#define len_decode(target, ld, pos_state, seq) \ do { \ - if_bit_0(len_decoder.choice) { \ - update_bit_0_dummy(); \ - target = MATCH_MIN_LEN; \ - bittree_decode_dummy(target, \ - len_decoder.low[pos_state], LEN_LOW_BITS); \ + symbol = 1; \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ } else { \ - update_bit_1_dummy(); \ - if_bit_0(len_decoder.choice2) { \ - update_bit_0_dummy(); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ - bittree_decode_dummy(target, len_decoder.mid[pos_state], \ - LEN_MID_BITS); \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID0); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID1); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID2); \ + target = symbol - LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ - update_bit_1_dummy(); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ - bittree_decode_dummy(target, len_decoder.high, LEN_HIGH_BITS); \ + rc_update_1(ld.choice2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ + target = symbol - LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ } \ } \ } while (0) +#endif // HAVE_SMALL + +/// Length decoder probabilities; see comments in lzma_common.h. typedef struct { probability choice; probability choice2; @@ -96,26 +169,12 @@ typedef struct { struct lzma_coder_s { - /// Data of the next coder, if any. - lzma_next_coder next; - - /// Sliding output window a.k.a. dictionary a.k.a. history buffer. - lzma_lz_decoder lz; - - // Range coder - lzma_range_decoder rc; - - // State - lzma_lzma_state state; - uint32_t rep0; ///< Distance of the latest match - uint32_t rep1; ///< Distance of second latest match - uint32_t rep2; ///< Distance of third latest match - uint32_t rep3; ///< Distance of fourth latest match - uint32_t pos_bits; - uint32_t pos_mask; - uint32_t now_pos; // Lowest 32-bits are enough here. + /////////////////// + // Probabilities // + /////////////////// - lzma_literal_coder literal_coder; + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; /// If 1, it's a match. Otherwise it's a single 8-bit literal. probability is_match[STATES][POS_STATES_MAX]; @@ -138,178 +197,107 @@ struct lzma_coder_s { /// the length is decoded from rep_len_decoder. probability is_rep0_long[STATES][POS_STATES_MAX]; - probability pos_slot_decoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; - probability pos_decoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; - probability pos_align_decoder[1 << ALIGN_BITS]; - - /// Length of a match - lzma_length_decoder match_len_decoder; - - /// Length of a repeated match. - lzma_length_decoder rep_len_decoder; - - /// True when we have produced at least one byte of output since the - /// beginning of the stream or the latest flush marker. - bool has_produced_output; -}; - - -/// \brief Check if the next iteration of the decoder loop can be run. -/// -/// \note There must always be REQUIRED_IN_BUFFER_SIZE bytes -/// readable space! -/// -static bool lzma_attribute((pure)) -decode_dummy(const lzma_coder *restrict coder, - const uint8_t *restrict in, size_t in_pos_local, - const size_t in_size, lzma_range_decoder rc, - uint32_t state, uint32_t rep0, const uint32_t now_pos) -{ - uint32_t rc_bound; - - do { - const uint32_t pos_state = now_pos & coder->pos_mask; - - if_bit_0(coder->is_match[state][pos_state]) { - // It's a literal i.e. a single 8-bit byte. - - update_bit_0_dummy(); - - const probability *subcoder = literal_get_subcoder( - coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0)); - uint32_t symbol = 1; - - if (is_literal_state(state)) { - // Decode literal without match byte. - do { - if_bit_0(subcoder[symbol]) { - update_bit_0_dummy(); - symbol <<= 1; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - } - } while (symbol < 0x100); - - } else { - // Decode literal with match byte. - uint32_t match_byte = lz_get_byte(coder->lz, rep0); - uint32_t subcoder_offset = 0x100; - - do { - match_byte <<= 1; - const uint32_t match_bit = match_byte & subcoder_offset; - const uint32_t subcoder_index - = subcoder_offset + match_bit + symbol; - - if_bit_0(subcoder[subcoder_index]) { - update_bit_0_dummy(); - symbol <<= 1; - subcoder_offset &= ~match_bit; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - subcoder_offset &= match_bit; - } - } while (symbol < 0x100); - } - - break; - } - - update_bit_1_dummy(); - uint32_t len; - - if_bit_0(coder->is_rep[state]) { - update_bit_0_dummy(); - length_decode_dummy(len, coder->match_len_decoder, pos_state); - - const uint32_t len_to_pos_state = get_len_to_pos_state(len); - uint32_t pos_slot = 0; - bittree_decode_dummy(pos_slot, - coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS); - assert(pos_slot <= 63); - - if (pos_slot >= START_POS_MODEL_INDEX) { - uint32_t direct_bits = (pos_slot >> 1) - 1; - assert(direct_bits >= 1 && direct_bits <= 31); - rep0 = 2 | (pos_slot & 1); - - if (pos_slot < END_POS_MODEL_INDEX) { - assert(direct_bits <= 5); - rep0 <<= direct_bits; - assert(rep0 <= 96); - // -1 is fine, because bittree_reverse_decode() - // starts from table index [1] (not [0]). - assert((int32_t)(rep0 - pos_slot - 1) >= -1); - assert((int32_t)(rep0 - pos_slot - 1) <= 82); - // We add the result to rep0, so rep0 - // must not be part of second argument - // of the macro. - const int32_t offset = rep0 - pos_slot - 1; - bittree_reverse_decode_dummy(coder->pos_decoders + offset, - direct_bits); - } else { - assert(pos_slot >= 14); - assert(direct_bits >= 6); - direct_bits -= ALIGN_BITS; - assert(direct_bits >= 2); - rc_decode_direct_dummy(direct_bits); - - bittree_reverse_decode_dummy(coder->pos_align_decoder, - ALIGN_BITS); - } - } + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; - } else { - update_bit_1_dummy(); + /// Probility trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; - if_bit_0(coder->is_rep0[state]) { - update_bit_0_dummy(); + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_TABLE_SIZE]; - if_bit_0(coder->is_rep0_long[state][pos_state]) { - update_bit_0_dummy(); - break; - } else { - update_bit_1_dummy(); - } + /// Length of a normal match + lzma_length_decoder match_len_decoder; - } else { - update_bit_1_dummy(); + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; - if_bit_0(coder->is_rep1[state]) { - update_bit_0_dummy(); - } else { - update_bit_1_dummy(); + /////////////////// + // Decoder state // + /////////////////// - if_bit_0(coder->is_rep2[state]) { - update_bit_0_dummy(); - } else { - update_bit_1_dummy(); - } - } - } + // Range coder + lzma_range_decoder rc; - length_decode_dummy(len, coder->rep_len_decoder, pos_state); - } - } while (0); + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; - rc_normalize(); + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match - return in_pos_local <= in_size; -} + uint32_t pos_mask; // (1U << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_VALUE_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + seq_8(SEQ_LITERAL), + seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + seq_len(SEQ_MATCH_LEN), + seq_6(SEQ_POS_SLOT), + SEQ_POS_MODEL, + SEQ_DIRECT, + seq_4(SEQ_ALIGN), + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + seq_len(SEQ_REP_LEN), + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +}; -static bool -decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t in_size, bool has_safe_buffer) +static lzma_ret +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) { //////////////////// // Initialization // //////////////////// if (!rc_read_init(&coder->rc, in, in_pos, in_size)) - return false; + return LZMA_OK; /////////////// // Variables // @@ -318,8 +306,12 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // Making local copies of often-used variables improves both // speed and readability. + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + // Range decoder - rc_to_local(coder->rc); + rc_to_local(coder->rc, *in_pos); // State uint32_t state = coder->state; @@ -328,87 +320,168 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, uint32_t rep2 = coder->rep2; uint32_t rep3 = coder->rep3; - // Misc - uint32_t now_pos = coder->now_pos; - bool has_produced_output = coder->has_produced_output; - - // Variables derived from decoder settings const uint32_t pos_mask = coder->pos_mask; - size_t in_pos_local = *in_pos; // Local copy - size_t in_limit; - if (in_size <= REQUIRED_IN_BUFFER_SIZE) - in_limit = 0; - else - in_limit = in_size - REQUIRED_IN_BUFFER_SIZE; - - - while (coder->lz.pos < coder->lz.limit - && (in_pos_local < in_limit || (has_safe_buffer - && decode_dummy(coder, in, in_pos_local, in_size, - rc, state, rep0, now_pos)))) { - - ///////////////////// - // Actual decoding // - ///////////////////// - - const uint32_t pos_state = now_pos & pos_mask; + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // If uncompressed size is known, there must be no end of payload + // marker. + const bool no_eopm = coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN; + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + + // The main decoder loop. The "switch" is used to restart the decoder at + // correct location. Once restarted, the "switch" is no longer used. + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(no_eopm && dict.pos == dict.limit)) + break; - if_bit_0(coder->is_match[state][pos_state]) { - update_bit_0(coder->is_match[state][pos_state]); + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + rc_update_0(coder->is_match[state][pos_state]); // It's a literal i.e. a single 8-bit byte. - probability *subcoder = literal_get_subcoder(coder->literal_coder, - now_pos, lz_get_byte(coder->lz, 0)); - uint32_t symbol = 1; + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_pos_mask, + dict.pos, dict_get(&dict, 0)); + symbol = 1; if (is_literal_state(state)) { // Decode literal without match byte. +#ifdef HAVE_SMALL + case SEQ_LITERAL: do { - if_bit_0(subcoder[symbol]) { - update_bit_0(subcoder[symbol]); - symbol <<= 1; - } else { - update_bit_1(subcoder[symbol]); - symbol = (symbol << 1) | 1; - } - } while (symbol < 0x100); - + rc_bit(probs[symbol], , , SEQ_LITERAL); + } while (symbol < (1 << 8)); +#else + rc_bit_case(probs[symbol], , , SEQ_LITERAL0); + rc_bit_case(probs[symbol], , , SEQ_LITERAL1); + rc_bit_case(probs[symbol], , , SEQ_LITERAL2); + rc_bit_case(probs[symbol], , , SEQ_LITERAL3); + rc_bit_case(probs[symbol], , , SEQ_LITERAL4); + rc_bit_case(probs[symbol], , , SEQ_LITERAL5); + rc_bit_case(probs[symbol], , , SEQ_LITERAL6); + rc_bit_case(probs[symbol], , , SEQ_LITERAL7); +#endif } else { // Decode literal with match byte. // - // The usage of subcoder_offset allows omitting some - // branches, which should give tiny speed improvement on - // some CPUs. subcoder_offset gets set to zero if match_bit - // didn't match. - uint32_t match_byte = lz_get_byte(coder->lz, rep0); - uint32_t subcoder_offset = 0x100; - + // We store the byte we compare against + // ("match byte") to "len" to minimize the + // number of variables we need to store + // between decoder calls. + len = dict_get(&dict, rep0) << 1; + + // The usage of "offset" allows omitting some + // branches, which should give tiny speed + // improvement on some CPUs. "offset" gets + // set to zero if match_bit didn't match. + offset = 0x100; + +#ifdef HAVE_SMALL + case SEQ_LITERAL_MATCHED: do { - match_byte <<= 1; - const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t match_bit + = len & offset; const uint32_t subcoder_index - = subcoder_offset + match_bit + symbol; + = offset + match_bit + + symbol; + + rc_bit(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); +#else + // Unroll the loop. + uint32_t match_bit; + uint32_t subcoder_index; + +# define d(seq) \ + case seq: \ + match_bit = len & offset; \ + subcoder_index = offset + match_bit + symbol; \ + rc_bit(probs[subcoder_index], \ + offset &= ~match_bit, \ + offset &= match_bit, \ + seq) + + d(SEQ_LITERAL_MATCHED0); + len <<= 1; + d(SEQ_LITERAL_MATCHED1); + len <<= 1; + d(SEQ_LITERAL_MATCHED2); + len <<= 1; + d(SEQ_LITERAL_MATCHED3); + len <<= 1; + d(SEQ_LITERAL_MATCHED4); + len <<= 1; + d(SEQ_LITERAL_MATCHED5); + len <<= 1; + d(SEQ_LITERAL_MATCHED6); + len <<= 1; + d(SEQ_LITERAL_MATCHED7); +# undef d +#endif + } - if_bit_0(subcoder[subcoder_index]) { - update_bit_0(subcoder[subcoder_index]); - symbol <<= 1; - subcoder_offset &= ~match_bit; - } else { - update_bit_1(subcoder[subcoder_index]); - symbol = (symbol << 1) | 1; - subcoder_offset &= match_bit; - } - } while (symbol < 0x100); + //update_literal(state); + // Use a lookup table to update to literal state, + // since compared to other state updates, this would + // need two branches. + static const lzma_lzma_state next_state[] = { + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT + }; + state = next_state[state]; + + case SEQ_LITERAL_WRITE: + if (unlikely(dict_put(&dict, symbol))) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; } - // Put the decoded byte to the dictionary, update the - // decoder state, and start a new decoding loop. - coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol); - ++now_pos; - update_literal(state); - has_produced_output = true; continue; } @@ -416,115 +489,196 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // (distance and length) which will be repeated from our // output history. - update_bit_1(coder->is_match[state][pos_state]); - uint32_t len; - - if_bit_0(coder->is_rep[state]) { - update_bit_0(coder->is_rep[state]); + rc_update_1(coder->is_match[state][pos_state]); + case SEQ_IS_REP: + rc_if_0(coder->is_rep[state], SEQ_IS_REP) { // Not a repeated match - // - // We will decode a new distance and store - // the value to distance. - - // Decode the length of the match. - length_decode(len, coder->match_len_decoder, pos_state); - + rc_update_0(coder->is_rep[state]); update_match(state); - const uint32_t len_to_pos_state = get_len_to_pos_state(len); - uint32_t pos_slot = 0; - bittree_decode(pos_slot, - coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS); - assert(pos_slot <= 63); - - if (pos_slot >= START_POS_MODEL_INDEX) { - uint32_t direct_bits = (pos_slot >> 1) - 1; - assert(direct_bits >= 1 && direct_bits <= 30); - uint32_t distance = 2 | (pos_slot & 1); - - if (pos_slot < END_POS_MODEL_INDEX) { - assert(direct_bits <= 5); - distance <<= direct_bits; - assert(distance <= 96); - // -1 is fine, because - // bittree_reverse_decode() - // starts from table index [1] - // (not [0]). - assert((int32_t)(distance - pos_slot - 1) >= -1); - assert((int32_t)(distance - pos_slot - 1) <= 82); - // We add the result to distance, so distance - // must not be part of second argument - // of the macro. - const int32_t offset = distance - pos_slot - 1; - bittree_reverse_decode(distance, coder->pos_decoders + offset, - direct_bits); + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + // Prepare to decode the highest two bits of the + // match distance. + probs = coder->pos_slot[get_len_to_pos_state(len)]; + symbol = 1; + +#ifdef HAVE_SMALL + case SEQ_POS_SLOT: + do { + rc_bit(probs[symbol], , , SEQ_POS_SLOT); + } while (symbol < POS_SLOTS); +#else + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5); +#endif + // Get rid of the highest bit that was needed for + // indexing of the probability array. + symbol -= POS_SLOTS; + assert(symbol <= 63); + + if (symbol < START_POS_MODEL_INDEX) { + // Match distances [0, 3] have only two bits. + rep0 = symbol; + } else { + // Decode the lowest [1, 29] bits of + // the match distance. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < END_POS_MODEL_INDEX) { + // Prepare to decode the low bits for + // a distance of [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_POS_MODEL: +#ifdef HAVE_SMALL + do { + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } while (++offset < limit); +#else + switch (limit) { + case 5: + assert(offset == 0); + rc_bit(probs[symbol], , + rep0 += 1, + SEQ_POS_MODEL); + ++offset; + --limit; + case 4: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 3: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 2: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 1: + // We need "symbol" only for + // indexing the probability + // array, thus we can use + // rc_bit_last() here to omit + // the unneeded updating of + // "symbol". + rc_bit_last(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } +#endif } else { - assert(pos_slot >= 14); - assert(direct_bits >= 6); - direct_bits -= ALIGN_BITS; - assert(direct_bits >= 2); - rc_decode_direct(distance, direct_bits); - distance <<= ALIGN_BITS; - - bittree_reverse_decode(distance, coder->pos_align_decoder, - ALIGN_BITS); - - if (distance == UINT32_MAX) { - if (len == LEN_SPECIAL_EOPM) { - // End of Payload Marker found. - coder->lz.eopm_detected = true; - break; - - } else if (len == LEN_SPECIAL_FLUSH) { - // Flush marker detected. We must have produced - // at least one byte of output since the previous - // flush marker or the beginning of the stream. - // This is to prevent hanging the decoder with - // malicious input files. - if (!has_produced_output) - return true; - - has_produced_output = false; - - // We know that we have enough input to call - // this macro, because it is tested at the - // end of decode_dummy(). - rc_normalize(); - - rc_reset(rc); - - // If we don't have enough input here, we jump - // out of the loop. Note that while there is a - // useless call to rc_normalize(), it does nothing - // since we have just reset the range decoder. - if (!rc_read_init(&rc, in, &in_pos_local, in_size)) - break; - - continue; - - } else { - return true; + // The distace is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + // Not worth manual unrolling + do { + rc_direct(rep0, SEQ_DIRECT); + } while (--limit > 0); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + symbol = 1; +#ifdef HAVE_SMALL + offset = 0; + case SEQ_ALIGN: + do { + rc_bit(coder->pos_align[ + symbol], , + rep0 += 1 << offset, + SEQ_ALIGN); + } while (++offset < ALIGN_BITS); +#else + case SEQ_ALIGN0: + rc_bit(coder->pos_align[symbol], , + rep0 += 1, SEQ_ALIGN0); + case SEQ_ALIGN1: + rc_bit(coder->pos_align[symbol], , + rep0 += 2, SEQ_ALIGN1); + case SEQ_ALIGN2: + rc_bit(coder->pos_align[symbol], , + rep0 += 4, SEQ_ALIGN2); + case SEQ_ALIGN3: + // Like in SEQ_POS_MODEL, we don't + // need "symbol" for anything else + // than indexing the probability array. + rc_bit_last(coder->pos_align[symbol], , + rep0 += 8, SEQ_ALIGN3); +#endif + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It must not be + // present if uncompressed + // size is known. + if (coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + ret = LZMA_DATA_ERROR; + goto out; } + + case SEQ_EOPM: + // TODO Comment + rc_normalize(SEQ_EOPM); + ret = LZMA_STREAM_END; + goto out; } } + } - // The latest three match distances are kept in - // memory in case there are repeated matches. - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance; - - } else { - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = pos_slot; + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; } } else { - update_bit_1(coder->is_rep[state]); + rc_update_1(coder->is_rep[state]); // Repeated match // @@ -532,242 +686,318 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // earlier. The latest four match distances are // available as rep0, rep1, rep2 and rep3. We will // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } - if_bit_0(coder->is_rep0[state]) { - update_bit_0(coder->is_rep0[state]); - + case SEQ_IS_REP0: + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); // The distance is rep0. - if_bit_0(coder->is_rep0_long[state][pos_state]) { - update_bit_0(coder->is_rep0_long[state][pos_state]); + case SEQ_IS_REP0_LONG: + rc_if_0(coder->is_rep0_long[state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); update_short_rep(state); - // Repeat exactly one byte and start a new decoding loop. - // Note that rep0 is known to have a safe value, thus we - // don't need to check if we are wrapping the dictionary - // when it isn't full yet. - if (unlikely(lz_is_empty(coder->lz))) - return true; - - coder->lz.dict[coder->lz.pos] - = lz_get_byte(coder->lz, rep0); - ++coder->lz.pos; - ++now_pos; - has_produced_output = true; - continue; - - } else { - update_bit_1(coder->is_rep0_long[state][pos_state]); + case SEQ_SHORTREP: + if (unlikely(dict_put(&dict, dict_get( + &dict, rep0)))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } - // Repeating more than one byte at - // distance of rep0. + continue; } + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + } else { - update_bit_1(coder->is_rep0[state]); + rc_update_1(coder->is_rep0[state]); + case SEQ_IS_REP1: // The distance is rep1, rep2 or rep3. Once // we find out which one of these three, it // is stored to rep0 and rep1, rep2 and rep3 // are updated accordingly. + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); - uint32_t distance; + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; - if_bit_0(coder->is_rep1[state]) { - update_bit_0(coder->is_rep1[state]); - distance = rep1; } else { - update_bit_1(coder->is_rep1[state]); + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; - if_bit_0(coder->is_rep2[state]) { - update_bit_0(coder->is_rep2[state]); - distance = rep2; } else { - update_bit_1(coder->is_rep2[state]); - distance = rep3; + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; } - - rep2 = rep1; } - - rep1 = rep0; - rep0 = distance; } update_long_rep(state); // Decode the length of the repeated match. - length_decode(len, coder->rep_len_decoder, pos_state); + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); } - ///////////////////////////////// // Repeat from history buffer. // ///////////////////////////////// // The length is always between these limits. There is no way // to trigger the algorithm to set len outside this range. - assert(len >= MATCH_MIN_LEN); - assert(len <= MATCH_MAX_LEN); - - now_pos += len; - has_produced_output = true; + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + case SEQ_COPY: // Repeat len bytes from distance of rep0. - if (!lzma_lz_out_repeat(&coder->lz, rep0, len)) - return true; + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } } - rc_normalize(); + rc_normalize(SEQ_NORMALIZE); + coder->sequence = SEQ_IS_MATCH; +out: + // Save state - ///////////////////////////////// - // Update the *data structure. // - ///////////////////////////////// + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; - // Range decoder - rc_from_local(coder->rc); + rc_from_local(coder->rc, *in_pos); - // State coder->state = state; coder->rep0 = rep0; coder->rep1 = rep1; coder->rep2 = rep2; coder->rep3 = rep3; - // Misc - coder->now_pos = now_pos; - coder->has_produced_output = has_produced_output; - *in_pos = in_pos_local; + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // Since there cannot be end of payload marker if the + // uncompressed size was known, we check here if we + // finished decoding. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && coder->sequence != SEQ_NORMALIZE) + ret = coder->sequence == SEQ_IS_MATCH + ? LZMA_STREAM_END : LZMA_DATA_ERROR; + } + + // We can do an additional check in the range decoder to catch some + // corrupted files. + if (ret == LZMA_STREAM_END) { + if (!rc_is_finished(coder->rc)) + ret = LZMA_DATA_ERROR; - return false; + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + } + + return ret; } + static void -lzma_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) { - lzma_next_coder_end(&coder->next, allocator); - lzma_lz_decoder_end(&coder->lz, allocator); - lzma_free(coder, allocator); - return; + coder->uncompressed_size = uncompressed_size; } - -extern lzma_ret -lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) +/* +extern void +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) { - // LZMA can only be the last filter in the chain. - assert(filters[1].init == NULL); - - // Validate pos_bits. Other options are validated by the - // respective initialization functions. - const lzma_options_lzma *options = filters[0].options; - if (options->pos_bits > LZMA_POS_BITS_MAX) - return LZMA_HEADER_ERROR; + // This is hack. + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size; +} +*/ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; +static void +lzma_decoder_reset(lzma_coder *coder, const void *opt) +{ + const lzma_options_lzma *options = opt; - next->code = &lzma_lz_decode; - next->end = &lzma_decoder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->lz = LZMA_LZ_DECODER_INIT; - } + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + // FIXME? - // Store the pos_bits and calculate pos_mask. - next->coder->pos_bits = options->pos_bits; - next->coder->pos_mask = (1U << next->coder->pos_bits) - 1; + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pos_bits) - 1; // Initialize the literal decoder. - return_if_error(lzma_literal_init(&next->coder->literal_coder, - options->literal_context_bits, - options->literal_pos_bits)); + literal_init(coder->literal, options->literal_context_bits, + options->literal_pos_bits); - // Allocate and initialize the LZ decoder. - return_if_error(lzma_lz_decoder_reset(&next->coder->lz, allocator, - &decode_real, options->dictionary_size, - MATCH_MAX_LEN)); + coder->literal_context_bits = options->literal_context_bits; + coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1; // State - next->coder->state = 0; - next->coder->rep0 = 0; - next->coder->rep1 = 0; - next->coder->rep2 = 0; - next->coder->rep3 = 0; - next->coder->pos_bits = options->pos_bits; - next->coder->pos_mask = (1 << next->coder->pos_bits) - 1; - next->coder->now_pos = 0; + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1 << options->pos_bits) - 1; // Range decoder - rc_reset(next->coder->rc); + rc_reset(coder->rc); // Bit and bittree decoders for (uint32_t i = 0; i < STATES; ++i) { - for (uint32_t j = 0; j <= next->coder->pos_mask; ++j) { - bit_reset(next->coder->is_match[i][j]); - bit_reset(next->coder->is_rep0_long[i][j]); + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); } - bit_reset(next->coder->is_rep[i]); - bit_reset(next->coder->is_rep0[i]); - bit_reset(next->coder->is_rep1[i]); - bit_reset(next->coder->is_rep2[i]); + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); } for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) - bittree_reset(next->coder->pos_slot_decoder[i], POS_SLOT_BITS); + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) - bit_reset(next->coder->pos_decoders[i]); + bit_reset(coder->pos_special[i]); - bittree_reset(next->coder->pos_align_decoder, ALIGN_BITS); + bittree_reset(coder->pos_align, ALIGN_BITS); // Len decoders (also bit/bittree) - const uint32_t num_pos_states = 1 << next->coder->pos_bits; - bit_reset(next->coder->match_len_decoder.choice); - bit_reset(next->coder->match_len_decoder.choice2); - bit_reset(next->coder->rep_len_decoder.choice); - bit_reset(next->coder->rep_len_decoder.choice2); + const uint32_t num_pos_states = 1 << options->pos_bits; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { - bittree_reset(next->coder->match_len_decoder.low[pos_state], + bittree_reset(coder->match_len_decoder.low[pos_state], LEN_LOW_BITS); - bittree_reset(next->coder->match_len_decoder.mid[pos_state], + bittree_reset(coder->match_len_decoder.mid[pos_state], LEN_MID_BITS); - bittree_reset(next->coder->rep_len_decoder.low[pos_state], + bittree_reset(coder->rep_len_decoder.low[pos_state], LEN_LOW_BITS); - bittree_reset(next->coder->rep_len_decoder.mid[pos_state], + bittree_reset(coder->rep_len_decoder.mid[pos_state], LEN_MID_BITS); } - bittree_reset(next->coder->match_len_decoder.high, LEN_HIGH_BITS); - bittree_reset(next->coder->rep_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, size_t *dict_size) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } - next->coder->has_produced_output = false; + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + const lzma_options_lzma *options = opt; + *dict_size = options->dictionary_size; return LZMA_OK; } -extern void -lzma_lzma_decoder_uncompressed_size( - lzma_next_coder *next, lzma_vli uncompressed_size) +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, size_t *dict_size) { - next->coder->lz.uncompressed_size = uncompressed_size; - return; + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, dict_size)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_VALUE_UNKNOWN); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); } extern bool -lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) { if (byte > (4 * 5 + 4) * 9 + 8) return true; @@ -781,3 +1011,49 @@ lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) return options->literal_context_bits + options->literal_pos_bits > LZMA_LITERAL_BITS_MAX; } + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + const lzma_options_lzma *const opt = options; + const uint64_t lz_memusage + = lzma_lz_decoder_memusage(opt->dictionary_size); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lz_memusage; +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_HEADER_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dictionary_size = integer_read_32(props + 1); + + opt->preset_dictionary = NULL; + opt->preset_dictionary_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_HEADER_ERROR; +} |