liblzma: Optimize LZ decoder slightly.

Now extra buffer space is reserved so that repeating bytes for any single match will never need to copy from two places (both the beginning and the end of the buffer). This simplifies dict_repeat() and helps a little with speed. This seems to reduce .lzma decompression time about 2 %, so with .xz and CRC it could be slightly less. The small things add up still.
author: Lasse Collin <lasse.collin@tukaani.org> 2024-02-12 17:09:10 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2024-02-14 18:31:16 +0200
commit: f3872a59475456c5d365cad9f1c5be514cfa54b5 (patch)
tree: 38070fa3421729f881bb73d6c6053391482523ea /src/liblzma/lz/lz_decoder.c
parent: liblzma: LZMA decoder: Get rid of next_state[]. (diff)
download: xz-f3872a59475456c5d365cad9f1c5be514cfa54b5.tar.xz
1 files changed, 32 insertions, 11 deletions
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index 73bf20d9..92913f22 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -53,9 +53,10 @@ typedef struct {
 static void
 lz_decoder_reset(lzma_coder *coder)
 {
-	coder->dict.pos = 0;
+	coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX;
 	coder->dict.full = 0;
-	coder->dict.buf[coder->dict.size - 1] = '\0';
+	coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0';
+	coder->dict.has_wrapped = false;
 	coder->dict.need_reset = false;
 	return;
 }
@@ -69,8 +70,15 @@ decode_buffer(lzma_coder *coder,
 {
 	while (true) {
 		// Wrap the dictionary if needed.
-		if (coder->dict.pos == coder->dict.size)
-			coder->dict.pos = 0;
+		if (coder->dict.pos == coder->dict.size) {
+			// See the comment of #define LZ_DICT_REPEAT_MAX.
+			coder->dict.pos = LZ_DICT_REPEAT_MAX;
+			coder->dict.has_wrapped = true;
+			memcpy(coder->dict.buf, coder->dict.buf
+						+ coder->dict.size
+						- LZ_DICT_REPEAT_MAX,
+					LZ_DICT_REPEAT_MAX);
+		}
 
 		// Store the current dictionary position. It is needed to know
 		// where to start copying to the out[] buffer.
@@ -252,21 +260,31 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 	// dictionary to the output buffer, since applications are
 	// recommended to give aligned buffers to liblzma.
 	//
+	// Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is
+	// needed for alloc_size.
+	//
 	// Avoid integer overflow.
-	if (lz_options.dict_size > SIZE_MAX - 15)
+	if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX)
 		return LZMA_MEM_ERROR;
 
 	lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
 
+	// Reserve extra space as explained in the comment
+	// of #define LZ_DICT_REPEAT_MAX.
+	const size_t alloc_size
+			= lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX;
+
 	// Allocate and initialize the dictionary.
-	if (coder->dict.size != lz_options.dict_size) {
+	if (coder->dict.size != alloc_size) {
 		lzma_free(coder->dict.buf, allocator);
-		coder->dict.buf
-				= lzma_alloc(lz_options.dict_size, allocator);
+		coder->dict.buf = lzma_alloc(alloc_size, allocator);
 		if (coder->dict.buf == NULL)
 			return LZMA_MEM_ERROR;
 
-		coder->dict.size = lz_options.dict_size;
+		// NOTE: Yes, alloc_size, not lz_options.dict_size. The way
+		// coder->dict.full is updated will take care that we will
+		// still reject distances larger than lz_options.dict_size.
+		coder->dict.size = alloc_size;
 	}
 
 	lz_decoder_reset(next->coder);
@@ -279,9 +297,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		const size_t copy_size = my_min(lz_options.preset_dict_size,
 				lz_options.dict_size);
 		const size_t offset = lz_options.preset_dict_size - copy_size;
-		memcpy(coder->dict.buf, lz_options.preset_dict + offset,
+		memcpy(coder->dict.buf + coder->dict.pos,
+				lz_options.preset_dict + offset,
 				copy_size);
-		coder->dict.pos = copy_size;
+
+		// dict.pos isn't zero after lz_decoder_reset().
+		coder->dict.pos += copy_size;
 		coder->dict.full = copy_size;
 	}
author	Lasse Collin <lasse.collin@tukaani.org>	2024-02-12 17:09:10 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2024-02-14 18:31:16 +0200
commit	f3872a59475456c5d365cad9f1c5be514cfa54b5 (patch)
tree	38070fa3421729f881bb73d6c6053391482523ea /src/liblzma/lz/lz_decoder.c
parent	liblzma: LZMA decoder: Get rid of next_state[]. (diff)
download	xz-f3872a59475456c5d365cad9f1c5be514cfa54b5.tar.xz