aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lzma
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2024-02-12 17:09:10 +0200
committerLasse Collin <lasse.collin@tukaani.org>2024-02-14 18:31:16 +0200
commite0c0ee475c0800c08291ae45e0d66aa00d5ce604 (patch)
tree2bd2240b7bba1739fa8c54f1acff3a3eba621dc8 /src/liblzma/lzma
parentliblzma: Creates Non-resumable and Resumable modes for lzma_decoder. (diff)
downloadxz-e0c0ee475c0800c08291ae45e0d66aa00d5ce604.tar.xz
liblzma: LZMA decoder improvements.
This adds macros for bittree decoding which prepares the code for alternative C versions and inline assembly.
Diffstat (limited to 'src/liblzma/lzma')
-rw-r--r--src/liblzma/lzma/lzma_decoder.c264
1 files changed, 78 insertions, 186 deletions
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 6203bae9..0788558f 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -24,9 +24,8 @@
// Minimum number of input bytes to safely decode one LZMA symbol.
// The worst case is that we decode 22 bits using probabilities and 26
-// direct bits. This may decode at maximum 20 bytes of input plus one
-// extra byte after the final EOPM normalization.
-#define LZMA_IN_REQUIRED 21
+// direct bits. This may decode at maximum 20 bytes of input.
+#define LZMA_IN_REQUIRED 20
// Macros for (somewhat) size-optimized code.
@@ -73,32 +72,22 @@ do { \
symbol = 1; \
rc_if_0(ld.choice) { \
rc_update_0(ld.choice); \
- rc_bit(ld.low[pos_state][symbol], , ); \
- rc_bit(ld.low[pos_state][symbol], , ); \
- rc_bit(ld.low[pos_state][symbol], , ); \
- target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
+ rc_bittree3(ld.low[pos_state], \
+ -LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \
+ target = symbol; \
} else { \
rc_update_1(ld.choice); \
rc_if_0(ld.choice2) { \
rc_update_0(ld.choice2); \
- rc_bit(ld.mid[pos_state][symbol], , ); \
- rc_bit(ld.mid[pos_state][symbol], , ); \
- rc_bit(ld.mid[pos_state][symbol], , ); \
- target = symbol - LEN_MID_SYMBOLS \
- + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
+ rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \
+ + MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \
+ target = symbol; \
} else { \
rc_update_1(ld.choice2); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- rc_bit(ld.high[symbol], , ); \
- target = symbol - LEN_HIGH_SYMBOLS \
+ rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \
+ MATCH_LEN_MIN \
- + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
+ + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \
+ target = symbol; \
} \
} \
} while (0)
@@ -369,8 +358,8 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// If there is not enough room for another LZMA symbol
// go to Resumable mode.
- if (rc_in_pos + LZMA_IN_REQUIRED > in_size
- || dict.pos == dict.limit)
+ if (unlikely(rc_in_end - rc_in_ptr < LZMA_IN_REQUIRED
+ || dict.pos == dict.limit))
goto slow;
// Decode the first bit from the next LZMA symbol.
@@ -390,64 +379,14 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
probs = literal_subcoder(coder->literal,
literal_context_bits, literal_pos_mask,
dict.pos, dict_get(&dict, 0));
- symbol = 1;
if (is_literal_state(state)) {
// Decode literal without match byte.
- // We need to decode 8 bits, so instead
- // of looping from 1 - 8, we unroll the
- // loop for a speed optimization.
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
+ rc_bittree8(probs, 0);
} else {
// Decode literal with match byte.
- //
- // We store the byte we compare against
- // ("match byte") to "len" to minimize the
- // number of variables we need to store
- // between decoder calls.
-
- len = (uint32_t)(dict_get(&dict, rep0)) << 1;
-
- // The usage of "offset" allows omitting some
- // branches, which should give tiny speed
- // improvement on some CPUs. "offset" gets
- // set to zero if match_bit didn't match.
- offset = 0x100;
-
- // Unroll the loop.
- uint32_t match_bit;
- uint32_t subcoder_index;
-
-# define decode_with_match_bit \
- match_bit = len & offset; \
- subcoder_index = offset + match_bit + symbol; \
- rc_bit(probs[subcoder_index], \
- offset &= ~match_bit, \
- offset &= match_bit)
-
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
- len <<= 1;
- decode_with_match_bit;
-# undef decode_match_bit
+ rc_matched_literal(probs,
+ dict_get(&dict, rep0));
}
state = next_state[state];
@@ -501,18 +440,8 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// The next 6 bits determine how to decode the
// rest of the distance.
probs = coder->dist_slot[get_dist_state(len)];
- symbol = 1;
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
- rc_bit(probs[symbol], , );
-
- // Get rid of the highest bit that was needed for
- // indexing of the probability array.
- symbol -= DIST_SLOTS;
+ rc_bittree6(probs, -DIST_SLOTS);
assert(symbol <= 63);
if (symbol < DIST_MODEL_START) {
@@ -540,6 +469,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
assert(limit <= 5);
rep0 <<= limit;
assert(rep0 <= 96);
+
// -1 is fine, because we start
// decoding at probs[1], not probs[0].
// NOTE: This violates the C standard,
@@ -553,106 +483,51 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
probs = coder->pos_special + rep0
- symbol - 1;
symbol = 1;
- offset = 0;
+ offset = 1;
- switch (limit) {
- case 5:
- assert(offset == 0);
- rc_bit(probs[symbol], ,
- rep0 += 1U);
- ++offset;
- --limit;
- case 4:
- rc_bit(probs[symbol], ,
- rep0 += 1U << offset);
- ++offset;
- --limit;
- case 3:
- rc_bit(probs[symbol], ,
- rep0 += 1U << offset);
- ++offset;
- --limit;
- case 2:
- rc_bit(probs[symbol], ,
- rep0 += 1U << offset);
- ++offset;
- --limit;
- case 1:
- // We need "symbol" only for
- // indexing the probability
- // array, thus we can use
- // rc_bit_last() here to
- // omit the unneeded updating
- // of "symbol".
- rc_bit_last(probs[symbol], ,
- rep0 += 1U << offset);
- }
+ // Variable number (1-5) of bits
+ // from a reverse bittree. This
+ // isn't worth manual unrolling.
+ do {
+ rc_bit_add_if_1(probs,
+ rep0, offset);
+ offset <<= 1;
+ } while (--limit > 0);
} else {
// The distance is >= 128. Decode the
// lower bits without probabilities
// except the lowest four bits.
assert(symbol >= 14);
assert(limit >= 6);
+
limit -= ALIGN_BITS;
assert(limit >= 2);
- // Not worth manual unrolling
- do {
- rc_direct(rep0);
- } while (--limit > 0);
+ rc_direct(rep0, limit);
// Decode the lowest four bits using
// probabilities.
rep0 <<= ALIGN_BITS;
- symbol = 1;
-
- rc_bit(coder->pos_align[symbol], ,
- rep0 += 1);
-
- rc_bit(coder->pos_align[symbol], ,
- rep0 += 2);
-
- rc_bit(coder->pos_align[symbol], ,
- rep0 += 4);
-
- // Like when distance [4, 127], we
- // don't need "symbol" for anything
- // other than indexing the probability
- // array.
- rc_bit_last(
- coder->pos_align[symbol], ,
- rep0 += 8);
-
- if (rep0 == UINT32_MAX) {
- ///////////////////////////
- // End of payload marker //
- ///////////////////////////
-
- // End of payload marker was
- // found. It may only be
- // present if
- // - uncompressed size is
- // unknown or
- // - after known uncompressed
- // size amount of bytes has
- // been decompressed and
- // caller has indicated
- // that EOPM might be used
- // (it's not allowed in
- // LZMA2).
- if (!eopm_is_valid) {
- ret = LZMA_DATA_ERROR;
- goto out;
- }
-
- // LZMA1 stream with
- // end-of-payload marker.
- rc_normalize();
- ret = rc_is_finished(rc)
- ? LZMA_STREAM_END
- : LZMA_DATA_ERROR;
- goto out;
- }
+ rc_bittree_rev4(coder->pos_align);
+ rep0 += symbol;
+
+ // If the end of payload marker (EOPM)
+ // is detected, jump to the safe code.
+ // The EOPM handling isn't speed
+ // critical at all.
+ //
+ // A final normalization is needed
+ // after the EOPM (there can be a
+ // dummy byte to read in some cases).
+ // If the normalization was done here
+ // in the fast code, it would need to
+ // be taken into account in the value
+ // of LZMA_IN_REQUIRED. Using the
+ // safe code allows keeping
+ // LZMA_IN_REQUIRED as 20 instead of
+ // 21.
+ if (rep0 == UINT32_MAX)
+ goto eopm;
}
}
@@ -948,31 +823,48 @@ slow:
limit -= ALIGN_BITS;
assert(limit >= 2);
case SEQ_DIRECT:
- do {
- rc_direct_safe(rep0,
- SEQ_DIRECT);
- } while (--limit > 0);
+ rc_direct_safe(rep0, limit,
+ SEQ_DIRECT);
rep0 <<= ALIGN_BITS;
- symbol = 1;
-
- offset = 0;
+ symbol = 0;
+ offset = 1;
case SEQ_ALIGN:
do {
- rc_bit_safe(coder->pos_align[
- symbol], ,
- rep0 += 1U << offset,
+ rc_bit_last_safe(
+ coder->pos_align[
+ offset
+ + symbol],
+ ,
+ symbol += offset,
SEQ_ALIGN);
- } while (++offset < ALIGN_BITS);
+ offset <<= 1;
+ } while (offset < ALIGN_SIZE);
+
+ rep0 += symbol;
- // End of payload marker
if (rep0 == UINT32_MAX) {
+ // End of payload marker was
+ // found. It may only be
+ // present if
+ // - uncompressed size is
+ // unknown or
+ // - after known uncompressed
+ // size amount of bytes has
+ // been decompressed and
+ // caller has indicated
+ // that EOPM might be used
+ // (it's not allowed in
+ // LZMA2).
+eopm:
if (!eopm_is_valid) {
ret = LZMA_DATA_ERROR;
goto out;
}
case SEQ_EOPM:
+ // LZMA1 stream with
+ // end-of-payload marker.
rc_normalize_safe(SEQ_EOPM);
ret = rc_is_finished(rc)
? LZMA_STREAM_END