Fix a buffer overflow in the LZMA encoder. It was due to my

misunderstanding of the code. There's no tiny fix for this problem, so I also cleaned up the code in general. This reduces the speed of the encoder 2-5 % in the fastest compression mode ("lzma -1"). High compression modes should have no noticeable performance difference. This commit breaks things (especially LZMA_SYNC_FLUSH) but I will fix them once the new format and LZMA2 has been roughly implemented. Plain LZMA won't support LZMA_SYNC_FLUSH at all and won't be supported in the new .lzma format. This may change still but this is what it looks like now. Support for known uncompressed size (that is, LZMA or LZMA2 without EOPM) is likely to go away. This means there will be API changes.
author: Lasse Collin <lasse.collin@tukaani.org> 2008-06-01 12:48:17 +0300
committer: Lasse Collin <lasse.collin@tukaani.org> 2008-06-01 12:48:17 +0300
commit: 369f72fd656f537a9a8e06f13e6d0d4c242be22f (patch)
tree: 7b0d983e6be1ebb4d1361b2efcd125eeacad97a0 /src/liblzma/lzma/lzma_encoder_getoptimum.c
parent: Typo fixes from meyering. (diff)
download: xz-369f72fd656f537a9a8e06f13e6d0d4c242be22f.tar.xz
1 files changed, 47 insertions, 12 deletions
diff --git a/src/liblzma/lzma/lzma_encoder_getoptimum.c b/src/liblzma/lzma/lzma_encoder_getoptimum.c
index 535508ee..b175e4cb 100644
--- a/src/liblzma/lzma/lzma_encoder_getoptimum.c
+++ b/src/liblzma/lzma/lzma_encoder_getoptimum.c
@@ -104,6 +104,36 @@ do { \
 
 
 static void
+fill_length_prices(lzma_length_encoder *lc, uint32_t pos_state)
+{
+	const uint32_t num_symbols = lc->table_size;
+	const uint32_t a0 = bit_get_price_0(lc->choice);
+	const uint32_t a1 = bit_get_price_1(lc->choice);
+	const uint32_t b0 = a1 + bit_get_price_0(lc->choice2);
+	const uint32_t b1 = a1 + bit_get_price_1(lc->choice2);
+
+	uint32_t *prices = lc->prices[pos_state];
+	uint32_t i = 0;
+
+	for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i)
+		prices[i] = a0 + bittree_get_price(lc->low[pos_state],
+				LEN_LOW_BITS, i);
+
+	for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i)
+		prices[i] = b0 + bittree_get_price(lc->mid[pos_state],
+				LEN_MID_BITS, i - LEN_LOW_SYMBOLS);
+
+	for (; i < num_symbols; ++i)
+		prices[i] = b1 + bittree_get_price(lc->high, LEN_HIGH_BITS,
+				i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS);
+
+	lc->counters[pos_state] = num_symbols;
+
+	return;
+}
+
+
+static void
 fill_distances_prices(lzma_coder *coder)
 {
 	uint32_t temp_prices[FULL_DISTANCES];
@@ -254,6 +284,9 @@ extern void
 lzma_get_optimum(lzma_coder *restrict coder,
 		uint32_t *restrict back_res, uint32_t *restrict len_res)
 {
+	uint32_t position = coder->now_pos;
+	uint32_t pos_state = position & coder->pos_mask;
+
 	// Update the price tables. In the C++ LZMA SDK 4.42 this was done in both
 	// initialization function and in the main loop. In liblzma they were
 	// moved into this single place.
@@ -265,6 +298,13 @@ lzma_get_optimum(lzma_coder *restrict coder,
 			fill_align_prices(coder);
 	}
 
+	if (coder->prev_len_encoder != NULL) {
+		if (--coder->prev_len_encoder->counters[pos_state] == 0)
+			fill_length_prices(coder->prev_len_encoder, pos_state);
+
+		coder->prev_len_encoder = NULL;
+	}
+
 
 	if (coder->optimum_end_index != coder->optimum_current_index) {
 		*len_res = coder->optimum[coder->optimum_current_index].pos_prev
@@ -312,7 +352,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 	uint32_t rep_max_index = 0;
 
 	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
-		reps[i] = coder->rep_distances[i];
+		reps[i] = coder->reps[i];
 		const uint32_t back_offset = reps[i] + 1;
 
 		if (buf[0] != *(buf - back_offset)
@@ -356,13 +396,8 @@ lzma_get_optimum(lzma_coder *restrict coder,
 		return;
 	}
 
-	const uint32_t pos_mask = coder->pos_mask;
-
 	coder->optimum[0].state = coder->state;
 
-	uint32_t position = coder->now_pos;
-	uint32_t pos_state = (position & pos_mask);
-
 	coder->optimum[1].price = bit_get_price_0(
 				coder->is_match[coder->state][pos_state])
 			+ literal_get_price(
@@ -575,7 +610,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 	current_byte = *buf;
 	match_byte = *(buf - reps[0] - 1);
 
-	pos_state = position & pos_mask;
+	pos_state = position & coder->pos_mask;
 
 	const uint32_t cur_and_1_price = cur_price
 			+ bit_get_price_0(coder->is_match[state][pos_state])
@@ -640,7 +675,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 			uint32_t state_2 = state;
 			update_literal(state_2);
 
-			const uint32_t pos_state_next = (position + 1) & pos_mask;
+			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
 			const uint32_t next_rep_match_price = cur_and_1_price
 					+ bit_get_price_1(coder->is_match[state_2][pos_state_next])
 					+ bit_get_price_1(coder->is_rep[state_2]);
@@ -719,7 +754,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 			uint32_t state_2 = state;
 			update_long_rep(state_2);
 
-			uint32_t pos_state_next = (position + len_test) & pos_mask;
+			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
 
 			const uint32_t cur_and_len_char_price = price
 					+ length_get_price(coder->rep_len_encoder,
@@ -732,7 +767,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 
 			update_literal(state_2);
 
-			pos_state_next = (position + len_test + 1) & pos_mask;
+			pos_state_next = (position + len_test + 1) & coder->pos_mask;
 
 			const uint32_t next_rep_match_price = cur_and_len_char_price
 					+ bit_get_price_1(coder->is_match[state_2][pos_state_next])
@@ -829,7 +864,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 					uint32_t state_2 = state;
 					update_match(state_2);
 					uint32_t pos_state_next
-							= (position + len_test) & pos_mask;
+							= (position + len_test) & coder->pos_mask;
 
 					const uint32_t cur_and_len_char_price = cur_and_len_price
 							+ bit_get_price_0(
@@ -844,7 +879,7 @@ lzma_get_optimum(lzma_coder *restrict coder,
 								buf[len_test]);
 
 					update_literal(state_2);
-					pos_state_next = (pos_state_next + 1) & pos_mask;
+					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
 
 					const uint32_t next_rep_match_price
 							= cur_and_len_char_price
author	Lasse Collin <lasse.collin@tukaani.org>	2008-06-01 12:48:17 +0300
committer	Lasse Collin <lasse.collin@tukaani.org>	2008-06-01 12:48:17 +0300
commit	369f72fd656f537a9a8e06f13e6d0d4c242be22f (patch)
tree	7b0d983e6be1ebb4d1361b2efcd125eeacad97a0 /src/liblzma/lzma/lzma_encoder_getoptimum.c
parent	Typo fixes from meyering. (diff)
download	xz-369f72fd656f537a9a8e06f13e6d0d4c242be22f.tar.xz