aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-03-11 15:35:34 +0200
committerLasse Collin <lasse.collin@tukaani.org>2008-03-11 15:35:34 +0200
commitbfde3b24a5ae25ce53c854762b6148952386b025 (patch)
tree9ef18ef7954c62042396cd5dd1c00969e2b1f2ce
parentInitialize the last byte of the dictionary to zero so that (diff)
downloadxz-bfde3b24a5ae25ce53c854762b6148952386b025.tar.xz
Apply a minor speed optimization to LZMA decoder.
Diffstat (limited to '')
-rw-r--r--src/liblzma/lzma/lzma_decoder.c85
1 files changed, 43 insertions, 42 deletions
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 9e1226f7..fce9594a 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -179,43 +179,41 @@ decode_dummy(const lzma_coder *restrict coder,
coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0));
uint32_t symbol = 1;
- if (!is_char_state(state)) {
- // Decode literal with match byte.
+ if (is_char_state(state)) {
+ // Decode literal without match byte.
+ do {
+ if_bit_0(subcoder[symbol]) {
+ update_bit_0_dummy();
+ symbol <<= 1;
+ } else {
+ update_bit_1_dummy();
+ symbol = (symbol << 1) | 1;
+ }
+ } while (symbol < 0x100);
- assert(rep0 != UINT32_MAX);
+ } else {
+ // Decode literal with match byte.
uint32_t match_byte = lz_get_byte(coder->lz, rep0);
+ uint32_t subcoder_offset = 0x100;
do {
match_byte <<= 1;
- const uint32_t match_bit = match_byte & 0x100;
- const uint32_t subcoder_index = 0x100 + match_bit + symbol;
+ const uint32_t match_bit = match_byte & subcoder_offset;
+ const uint32_t subcoder_index
+ = subcoder_offset + match_bit + symbol;
if_bit_0(subcoder[subcoder_index]) {
update_bit_0_dummy();
symbol <<= 1;
- if (match_bit != 0)
- break;
+ subcoder_offset &= ~match_bit;
} else {
update_bit_1_dummy();
symbol = (symbol << 1) | 1;
- if (match_bit == 0)
- break;
+ subcoder_offset &= match_bit;
}
} while (symbol < 0x100);
}
- // Decode literal without match byte. This is also
- // the tail of the with-match-byte function.
- while (symbol < 0x100) {
- if_bit_0(subcoder[symbol]) {
- update_bit_0_dummy();
- symbol <<= 1;
- } else {
- update_bit_1_dummy();
- symbol = (symbol << 1) | 1;
- }
- }
-
break;
}
@@ -366,43 +364,46 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
now_pos, lz_get_byte(coder->lz, 0));
uint32_t symbol = 1;
- if (!is_char_state(state)) {
- // Decode literal with match byte.
+ if (is_char_state(state)) {
+ // Decode literal without match byte.
+ do {
+ if_bit_0(subcoder[symbol]) {
+ update_bit_0(subcoder[symbol]);
+ symbol <<= 1;
+ } else {
+ update_bit_1(subcoder[symbol]);
+ symbol = (symbol << 1) | 1;
+ }
+ } while (symbol < 0x100);
- assert(rep0 != UINT32_MAX);
+ } else {
+ // Decode literal with match byte.
+ //
+ // The usage of subcoder_offset allows omitting some
+ // branches, which should give tiny speed improvement on
+ // some CPUs. subcoder_offset gets set to zero if match_bit
+ // didn't match.
uint32_t match_byte = lz_get_byte(coder->lz, rep0);
+ uint32_t subcoder_offset = 0x100;
do {
match_byte <<= 1;
- const uint32_t match_bit = match_byte & 0x100;
- const uint32_t subcoder_index = 0x100 + match_bit + symbol;
+ const uint32_t match_bit = match_byte & subcoder_offset;
+ const uint32_t subcoder_index
+ = subcoder_offset + match_bit + symbol;
if_bit_0(subcoder[subcoder_index]) {
update_bit_0(subcoder[subcoder_index]);
symbol <<= 1;
- if (match_bit != 0)
- break;
+ subcoder_offset &= ~match_bit;
} else {
update_bit_1(subcoder[subcoder_index]);
symbol = (symbol << 1) | 1;
- if (match_bit == 0)
- break;
+ subcoder_offset &= match_bit;
}
} while (symbol < 0x100);
}
- // Decode literal without match byte. This is also
- // the tail of the with-match-byte function.
- while (symbol < 0x100) {
- if_bit_0(subcoder[symbol]) {
- update_bit_0(subcoder[symbol]);
- symbol <<= 1;
- } else {
- update_bit_1(subcoder[symbol]);
- symbol = (symbol << 1) | 1;
- }
- }
-
// Put the decoded byte to the dictionary, update the
// decoder state, and start a new decoding loop.
coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);