aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lz/lz_decoder.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2024-02-12 17:09:10 +0200
committerLasse Collin <lasse.collin@tukaani.org>2024-02-14 18:31:16 +0200
commitf3872a59475456c5d365cad9f1c5be514cfa54b5 (patch)
tree38070fa3421729f881bb73d6c6053391482523ea /src/liblzma/lz/lz_decoder.c
parentliblzma: LZMA decoder: Get rid of next_state[]. (diff)
downloadxz-f3872a59475456c5d365cad9f1c5be514cfa54b5.tar.xz
liblzma: Optimize LZ decoder slightly.
Now extra buffer space is reserved so that repeating bytes for any single match will never need to copy from two places (both the beginning and the end of the buffer). This simplifies dict_repeat() and helps a little with speed. This seems to reduce .lzma decompression time about 2 %, so with .xz and CRC it could be slightly less. The small things add up still.
Diffstat (limited to 'src/liblzma/lz/lz_decoder.c')
-rw-r--r--src/liblzma/lz/lz_decoder.c43
1 files changed, 32 insertions, 11 deletions
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index 73bf20d9..92913f22 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -53,9 +53,10 @@ typedef struct {
static void
lz_decoder_reset(lzma_coder *coder)
{
- coder->dict.pos = 0;
+ coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX;
coder->dict.full = 0;
- coder->dict.buf[coder->dict.size - 1] = '\0';
+ coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0';
+ coder->dict.has_wrapped = false;
coder->dict.need_reset = false;
return;
}
@@ -69,8 +70,15 @@ decode_buffer(lzma_coder *coder,
{
while (true) {
// Wrap the dictionary if needed.
- if (coder->dict.pos == coder->dict.size)
- coder->dict.pos = 0;
+ if (coder->dict.pos == coder->dict.size) {
+ // See the comment of #define LZ_DICT_REPEAT_MAX.
+ coder->dict.pos = LZ_DICT_REPEAT_MAX;
+ coder->dict.has_wrapped = true;
+ memcpy(coder->dict.buf, coder->dict.buf
+ + coder->dict.size
+ - LZ_DICT_REPEAT_MAX,
+ LZ_DICT_REPEAT_MAX);
+ }
// Store the current dictionary position. It is needed to know
// where to start copying to the out[] buffer.
@@ -252,21 +260,31 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// dictionary to the output buffer, since applications are
// recommended to give aligned buffers to liblzma.
//
+ // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is
+ // needed for alloc_size.
+ //
// Avoid integer overflow.
- if (lz_options.dict_size > SIZE_MAX - 15)
+ if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX)
return LZMA_MEM_ERROR;
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
+ // Reserve extra space as explained in the comment
+ // of #define LZ_DICT_REPEAT_MAX.
+ const size_t alloc_size
+ = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX;
+
// Allocate and initialize the dictionary.
- if (coder->dict.size != lz_options.dict_size) {
+ if (coder->dict.size != alloc_size) {
lzma_free(coder->dict.buf, allocator);
- coder->dict.buf
- = lzma_alloc(lz_options.dict_size, allocator);
+ coder->dict.buf = lzma_alloc(alloc_size, allocator);
if (coder->dict.buf == NULL)
return LZMA_MEM_ERROR;
- coder->dict.size = lz_options.dict_size;
+ // NOTE: Yes, alloc_size, not lz_options.dict_size. The way
+ // coder->dict.full is updated will take care that we will
+ // still reject distances larger than lz_options.dict_size.
+ coder->dict.size = alloc_size;
}
lz_decoder_reset(next->coder);
@@ -279,9 +297,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
const size_t copy_size = my_min(lz_options.preset_dict_size,
lz_options.dict_size);
const size_t offset = lz_options.preset_dict_size - copy_size;
- memcpy(coder->dict.buf, lz_options.preset_dict + offset,
+ memcpy(coder->dict.buf + coder->dict.pos,
+ lz_options.preset_dict + offset,
copy_size);
- coder->dict.pos = copy_size;
+
+ // dict.pos isn't zero after lz_decoder_reset().
+ coder->dict.pos += copy_size;
coder->dict.full = copy_size;
}