From 369f72fd656f537a9a8e06f13e6d0d4c242be22f Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 1 Jun 2008 12:48:17 +0300 Subject: Fix a buffer overflow in the LZMA encoder. It was due to my misunderstanding of the code. There's no tiny fix for this problem, so I also cleaned up the code in general. This reduces the speed of the encoder 2-5 % in the fastest compression mode ("lzma -1"). High compression modes should have no noticeable performance difference. This commit breaks things (especially LZMA_SYNC_FLUSH) but I will fix them once the new format and LZMA2 has been roughly implemented. Plain LZMA won't support LZMA_SYNC_FLUSH at all and won't be supported in the new .lzma format. This may change still but this is what it looks like now. Support for known uncompressed size (that is, LZMA or LZMA2 without EOPM) is likely to go away. This means there will be API changes. --- src/liblzma/lzma/lzma_encoder_getoptimum.c | 59 ++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'src/liblzma/lzma/lzma_encoder_getoptimum.c') diff --git a/src/liblzma/lzma/lzma_encoder_getoptimum.c b/src/liblzma/lzma/lzma_encoder_getoptimum.c index 535508ee..b175e4cb 100644 --- a/src/liblzma/lzma/lzma_encoder_getoptimum.c +++ b/src/liblzma/lzma/lzma_encoder_getoptimum.c @@ -103,6 +103,36 @@ do { \ ((opt).back_prev == 0) +static void +fill_length_prices(lzma_length_encoder *lc, uint32_t pos_state) +{ + const uint32_t num_symbols = lc->table_size; + const uint32_t a0 = bit_get_price_0(lc->choice); + const uint32_t a1 = bit_get_price_1(lc->choice); + const uint32_t b0 = a1 + bit_get_price_0(lc->choice2); + const uint32_t b1 = a1 + bit_get_price_1(lc->choice2); + + uint32_t *prices = lc->prices[pos_state]; + uint32_t i = 0; + + for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i) + prices[i] = a0 + bittree_get_price(lc->low[pos_state], + LEN_LOW_BITS, i); + + for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) + prices[i] = b0 + bittree_get_price(lc->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + + for (; i < num_symbols; ++i) + prices[i] = b1 + bittree_get_price(lc->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + + lc->counters[pos_state] = num_symbols; + + return; +} + + static void fill_distances_prices(lzma_coder *coder) { @@ -254,6 +284,9 @@ extern void lzma_get_optimum(lzma_coder *restrict coder, uint32_t *restrict back_res, uint32_t *restrict len_res) { + uint32_t position = coder->now_pos; + uint32_t pos_state = position & coder->pos_mask; + // Update the price tables. In the C++ LZMA SDK 4.42 this was done in both // initialization function and in the main loop. In liblzma they were // moved into this single place. @@ -265,6 +298,13 @@ lzma_get_optimum(lzma_coder *restrict coder, fill_align_prices(coder); } + if (coder->prev_len_encoder != NULL) { + if (--coder->prev_len_encoder->counters[pos_state] == 0) + fill_length_prices(coder->prev_len_encoder, pos_state); + + coder->prev_len_encoder = NULL; + } + if (coder->optimum_end_index != coder->optimum_current_index) { *len_res = coder->optimum[coder->optimum_current_index].pos_prev @@ -312,7 +352,7 @@ lzma_get_optimum(lzma_coder *restrict coder, uint32_t rep_max_index = 0; for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - reps[i] = coder->rep_distances[i]; + reps[i] = coder->reps[i]; const uint32_t back_offset = reps[i] + 1; if (buf[0] != *(buf - back_offset) @@ -356,13 +396,8 @@ lzma_get_optimum(lzma_coder *restrict coder, return; } - const uint32_t pos_mask = coder->pos_mask; - coder->optimum[0].state = coder->state; - uint32_t position = coder->now_pos; - uint32_t pos_state = (position & pos_mask); - coder->optimum[1].price = bit_get_price_0( coder->is_match[coder->state][pos_state]) + literal_get_price( @@ -575,7 +610,7 @@ lzma_get_optimum(lzma_coder *restrict coder, current_byte = *buf; match_byte = *(buf - reps[0] - 1); - pos_state = position & pos_mask; + pos_state = position & coder->pos_mask; const uint32_t cur_and_1_price = cur_price + bit_get_price_0(coder->is_match[state][pos_state]) @@ -640,7 +675,7 @@ lzma_get_optimum(lzma_coder *restrict coder, uint32_t state_2 = state; update_literal(state_2); - const uint32_t pos_state_next = (position + 1) & pos_mask; + const uint32_t pos_state_next = (position + 1) & coder->pos_mask; const uint32_t next_rep_match_price = cur_and_1_price + bit_get_price_1(coder->is_match[state_2][pos_state_next]) + bit_get_price_1(coder->is_rep[state_2]); @@ -719,7 +754,7 @@ lzma_get_optimum(lzma_coder *restrict coder, uint32_t state_2 = state; update_long_rep(state_2); - uint32_t pos_state_next = (position + len_test) & pos_mask; + uint32_t pos_state_next = (position + len_test) & coder->pos_mask; const uint32_t cur_and_len_char_price = price + length_get_price(coder->rep_len_encoder, @@ -732,7 +767,7 @@ lzma_get_optimum(lzma_coder *restrict coder, update_literal(state_2); - pos_state_next = (position + len_test + 1) & pos_mask; + pos_state_next = (position + len_test + 1) & coder->pos_mask; const uint32_t next_rep_match_price = cur_and_len_char_price + bit_get_price_1(coder->is_match[state_2][pos_state_next]) @@ -829,7 +864,7 @@ lzma_get_optimum(lzma_coder *restrict coder, uint32_t state_2 = state; update_match(state_2); uint32_t pos_state_next - = (position + len_test) & pos_mask; + = (position + len_test) & coder->pos_mask; const uint32_t cur_and_len_char_price = cur_and_len_price + bit_get_price_0( @@ -844,7 +879,7 @@ lzma_get_optimum(lzma_coder *restrict coder, buf[len_test]); update_literal(state_2); - pos_state_next = (pos_state_next + 1) & pos_mask; + pos_state_next = (pos_state_next + 1) & coder->pos_mask; const uint32_t next_rep_match_price = cur_and_len_char_price -- cgit v1.2.3