aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lz
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-08-28 22:53:15 +0300
committerLasse Collin <lasse.collin@tukaani.org>2008-08-28 22:53:15 +0300
commit3b34851de1eaf358cf9268922fa0eeed8278d680 (patch)
tree7bab212af647541df64227a8d350d17a2e789f6b /src/liblzma/lz
parentFix test_filter_flags to match the new restriction of lc+lp. (diff)
downloadxz-3b34851de1eaf358cf9268922fa0eeed8278d680.tar.xz
Sort of garbage collection commit. :-| Many things are still
broken. API has changed a lot and it will still change a little more here and there. The command line tool doesn't have all the required changes to reflect the API changes, so it's easy to get "internal error" or trigger assertions.
Diffstat (limited to 'src/liblzma/lz')
-rw-r--r--src/liblzma/lz/Makefile.am35
-rw-r--r--src/liblzma/lz/bt2.c27
-rw-r--r--src/liblzma/lz/bt2.h31
-rw-r--r--src/liblzma/lz/bt3.c29
-rw-r--r--src/liblzma/lz/bt3.h31
-rw-r--r--src/liblzma/lz/bt4.c30
-rw-r--r--src/liblzma/lz/bt4.h31
-rw-r--r--src/liblzma/lz/hc3.c30
-rw-r--r--src/liblzma/lz/hc3.h31
-rw-r--r--src/liblzma/lz/hc4.c31
-rw-r--r--src/liblzma/lz/hc4.h31
-rw-r--r--src/liblzma/lz/lz_decoder.c547
-rw-r--r--src/liblzma/lz/lz_decoder.h308
-rw-r--r--src/liblzma/lz/lz_encoder.c780
-rw-r--r--src/liblzma/lz/lz_encoder.h334
-rw-r--r--src/liblzma/lz/lz_encoder_hash.h104
-rw-r--r--src/liblzma/lz/lz_encoder_mf.c780
-rw-r--r--src/liblzma/lz/lz_encoder_private.h40
-rw-r--r--src/liblzma/lz/match_c.h412
-rw-r--r--src/liblzma/lz/match_h.h69
20 files changed, 1865 insertions, 1846 deletions
diff --git a/src/liblzma/lz/Makefile.am b/src/liblzma/lz/Makefile.am
index 5c27e2f2..bf41d8e6 100644
--- a/src/liblzma/lz/Makefile.am
+++ b/src/liblzma/lz/Makefile.am
@@ -20,43 +20,16 @@ liblz_la_CPPFLAGS = \
liblz_la_SOURCES =
-if COND_MAIN_ENCODER
+if COND_ENCODER_LZ
liblz_la_SOURCES += \
lz_encoder.c \
lz_encoder.h \
- lz_encoder_private.h \
- match_c.h \
- match_h.h
-
-if COND_MF_HC3
-liblz_la_SOURCES += hc3.c hc3.h
-liblz_la_CPPFLAGS += -DHAVE_HC3
-endif
-
-if COND_MF_HC4
-liblz_la_SOURCES += hc4.c hc4.h
-liblz_la_CPPFLAGS += -DHAVE_HC4
-endif
-
-if COND_MF_BT2
-liblz_la_SOURCES += bt2.c bt2.h
-liblz_la_CPPFLAGS += -DHAVE_BT2
-endif
-
-if COND_MF_BT3
-liblz_la_SOURCES += bt3.c bt3.h
-liblz_la_CPPFLAGS += -DHAVE_BT3
-endif
-
-if COND_MF_BT4
-liblz_la_SOURCES += bt4.c bt4.h
-liblz_la_CPPFLAGS += -DHAVE_BT4
-endif
-
+ lz_encoder_hash.h \
+ lz_encoder_mf.c
endif
-if COND_MAIN_DECODER
+if COND_DECODER_LZ
liblz_la_SOURCES += \
lz_decoder.c \
lz_decoder.h
diff --git a/src/liblzma/lz/bt2.c b/src/liblzma/lz/bt2.c
deleted file mode 100644
index 7dc4cb80..00000000
--- a/src/liblzma/lz/bt2.c
+++ /dev/null
@@ -1,27 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file bt2.c
-/// \brief Binary Tree 2
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "bt2.h"
-
-#undef IS_HASH_CHAIN
-#undef HASH_ARRAY_2
-#undef HASH_ARRAY_3
-
-#include "match_c.h"
diff --git a/src/liblzma/lz/bt2.h b/src/liblzma/lz/bt2.h
deleted file mode 100644
index 33cb52cd..00000000
--- a/src/liblzma/lz/bt2.h
+++ /dev/null
@@ -1,31 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file bt2.h
-/// \brief Binary Tree 2
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_BT2_H
-#define LZMA_BT2_H
-
-#undef LZMA_MATCH_FINDER_NAME_LOWER
-#undef LZMA_MATCH_FINDER_NAME_UPPER
-#define LZMA_MATCH_FINDER_NAME_LOWER bt2
-#define LZMA_MATCH_FINDER_NAME_UPPER BT2
-
-#include "match_h.h"
-
-#endif
diff --git a/src/liblzma/lz/bt3.c b/src/liblzma/lz/bt3.c
deleted file mode 100644
index d44310f3..00000000
--- a/src/liblzma/lz/bt3.c
+++ /dev/null
@@ -1,29 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file bt3.c
-/// \brief Binary Tree 3
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "bt3.h"
-
-#undef IS_HASH_CHAIN
-#undef HASH_ARRAY_2
-#undef HASH_ARRAY_3
-
-#define HASH_ARRAY_2
-
-#include "match_c.h"
diff --git a/src/liblzma/lz/bt3.h b/src/liblzma/lz/bt3.h
deleted file mode 100644
index 247c7e5f..00000000
--- a/src/liblzma/lz/bt3.h
+++ /dev/null
@@ -1,31 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file bt3.h
-/// \brief Binary Tree 3
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_BT3_H
-#define LZMA_BT3_H
-
-#undef LZMA_MATCH_FINDER_NAME_LOWER
-#undef LZMA_MATCH_FINDER_NAME_UPPER
-#define LZMA_MATCH_FINDER_NAME_LOWER bt3
-#define LZMA_MATCH_FINDER_NAME_UPPER BT3
-
-#include "match_h.h"
-
-#endif
diff --git a/src/liblzma/lz/bt4.c b/src/liblzma/lz/bt4.c
deleted file mode 100644
index 6e1042c9..00000000
--- a/src/liblzma/lz/bt4.c
+++ /dev/null
@@ -1,30 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file bt4.c
-/// \brief Binary Tree 4
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "bt4.h"
-
-#undef IS_HASH_CHAIN
-#undef HASH_ARRAY_2
-#undef HASH_ARRAY_3
-
-#define HASH_ARRAY_2
-#define HASH_ARRAY_3
-
-#include "match_c.h"
diff --git a/src/liblzma/lz/bt4.h b/src/liblzma/lz/bt4.h
deleted file mode 100644
index e3fcf6ac..00000000
--- a/src/liblzma/lz/bt4.h
+++ /dev/null
@@ -1,31 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file bt4.h
-/// \brief Binary Tree 4
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_BT4_H
-#define LZMA_BT4_H
-
-#undef LZMA_MATCH_FINDER_NAME_LOWER
-#undef LZMA_MATCH_FINDER_NAME_UPPER
-#define LZMA_MATCH_FINDER_NAME_LOWER bt4
-#define LZMA_MATCH_FINDER_NAME_UPPER BT4
-
-#include "match_h.h"
-
-#endif
diff --git a/src/liblzma/lz/hc3.c b/src/liblzma/lz/hc3.c
deleted file mode 100644
index 22b5689b..00000000
--- a/src/liblzma/lz/hc3.c
+++ /dev/null
@@ -1,30 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file hc3.c
-/// \brief Hash Chain 3
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "hc3.h"
-
-#undef IS_HASH_CHAIN
-#undef HASH_ARRAY_2
-#undef HASH_ARRAY_3
-
-#define IS_HASH_CHAIN
-#define HASH_ARRAY_2
-
-#include "match_c.h"
diff --git a/src/liblzma/lz/hc3.h b/src/liblzma/lz/hc3.h
deleted file mode 100644
index 97be0b1d..00000000
--- a/src/liblzma/lz/hc3.h
+++ /dev/null
@@ -1,31 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file hc3.h
-/// \brief Hash Chain 3
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_HC3_H
-#define LZMA_HC3_H
-
-#undef LZMA_MATCH_FINDER_NAME_LOWER
-#undef LZMA_MATCH_FINDER_NAME_UPPER
-#define LZMA_MATCH_FINDER_NAME_LOWER hc3
-#define LZMA_MATCH_FINDER_NAME_UPPER HC3
-
-#include "match_h.h"
-
-#endif
diff --git a/src/liblzma/lz/hc4.c b/src/liblzma/lz/hc4.c
deleted file mode 100644
index a55cfd09..00000000
--- a/src/liblzma/lz/hc4.c
+++ /dev/null
@@ -1,31 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file hc4.c
-/// \brief Hash Chain 4
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "hc4.h"
-
-#undef IS_HASH_CHAIN
-#undef HASH_ARRAY_2
-#undef HASH_ARRAY_3
-
-#define IS_HASH_CHAIN
-#define HASH_ARRAY_2
-#define HASH_ARRAY_3
-
-#include "match_c.h"
diff --git a/src/liblzma/lz/hc4.h b/src/liblzma/lz/hc4.h
deleted file mode 100644
index dc072e2f..00000000
--- a/src/liblzma/lz/hc4.h
+++ /dev/null
@@ -1,31 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file hc4.h
-/// \brief Hash Chain 4
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_HC4_H
-#define LZMA_HC4_H
-
-#undef LZMA_MATCH_FINDER_NAME_LOWER
-#undef LZMA_MATCH_FINDER_NAME_UPPER
-#define LZMA_MATCH_FINDER_NAME_LOWER hc4
-#define LZMA_MATCH_FINDER_NAME_UPPER HC4
-
-#include "match_h.h"
-
-#endif
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index ae969d62..5c3f1d18 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -18,351 +18,142 @@
//
///////////////////////////////////////////////////////////////////////////////
-#include "lz_decoder.h"
+// liblzma supports multiple LZ77-based filters. The LZ part is shared
+// between these filters. The LZ code takes care of dictionary handling
+// and passing the data between filters in the chain. The filter-specific
+// part decodes from the input buffer to the dictionary.
-/// Minimum size of allocated dictionary
-#define DICT_SIZE_MIN 8192
+#include "lz_decoder.h"
-/// When there is less than this amount of data available for decoding,
-/// it is moved to the temporary buffer which
-/// - protects from reads past the end of the buffer; and
-/// - stored the incomplete data between lzma_code() calls.
-///
-/// \note TEMP_LIMIT must be at least as much as
-/// REQUIRED_IN_BUFFER_SIZE defined in lzma_decoder.c.
-#define TEMP_LIMIT 32
-// lzma_lz_decoder.dict[] must be three times the size of TEMP_LIMIT.
-// 2 * TEMP_LIMIT is used for the actual data, and the third TEMP_LIMIT
-// bytes is needed for safety to allow decode_dummy() in lzma_decoder.c
-// to read past end of the buffer. This way it should be both fast and simple.
-#if LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT
-# error LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT
-#endif
+struct lzma_coder_s {
+ /// Dictionary (history buffer)
+ lzma_dict dict;
+ /// The actual LZ-based decoder e.g. LZMA
+ lzma_lz_decoder lz;
-struct lzma_coder_s {
+ /// Next filter in the chain, if any. Note that LZMA and LZMA2 are
+ /// only allowed as the last filter, but the long-range filter in
+ /// future can be in the middle of the chain.
lzma_next_coder next;
- lzma_lz_decoder lz;
- // There are more members in this structure but they are not
- // visible in LZ coder.
+ /// True if the next filter in the chain has returned LZMA_STREAM_END.
+ bool next_finished;
+
+ /// True if the LZ decoder (e.g. LZMA) has detected end of payload
+ /// marker. This may become true before next_finished becomes true.
+ bool this_finished;
+
+ /// Temporary buffer needed when the LZ-based filter is not the last
+ /// filter in the chain. The output of the next filter is first
+ /// decoded into buffer[], which is then used as input for the actual
+ /// LZ-based decoder.
+ struct {
+ size_t pos;
+ size_t size;
+ uint8_t buffer[LZMA_BUFFER_SIZE];
+ } temp;
};
-/// - Copy as much data as possible from lz->dict[] to out[].
-/// - Update *out_pos, lz->start, and lz->end accordingly.
-/// - Wrap lz-pos to the beginning of lz->dict[] if there is a danger that
-/// it may go past the end of the buffer (lz->pos >= lz->must_flush_pos).
-static inline bool
-flush(lzma_lz_decoder *restrict lz, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size)
-{
- // Flush uncompressed data from the history buffer to
- // the output buffer. This is done in two phases.
-
- assert(lz->start <= lz->end);
-
- // Flush if pos < start < end.
- if (lz->pos < lz->start && lz->start < lz->end) {
- bufcpy(lz->dict, &lz->start, lz->end, out, out_pos, out_size);
-
- // If we reached end of the data in history buffer,
- // wrap to the beginning.
- if (lz->start == lz->end)
- lz->start = 0;
- }
-
- // Flush if start start < pos <= end. This is not as `else' for
- // previous `if' because the previous one may make this one true.
- if (lz->start < lz->pos) {
- bufcpy(lz->dict, &lz->start,
- lz->pos, out, out_pos, out_size);
-
- if (lz->pos >= lz->must_flush_pos) {
- // Wrap the flushing position if we have
- // flushed the whole history buffer.
- if (lz->pos == lz->start)
- lz->start = 0;
-
- // Wrap the write position and store to lz.end
- // how much there is new data available.
- lz->end = lz->pos;
- lz->pos = 0;
- lz->is_full = true;
- }
- }
-
- assert(lz->pos < lz->must_flush_pos);
-
- return *out_pos == out_size;
-}
-
-
-/// Calculate safe value for lz->limit. If no safe value can be found,
-/// set lz->limit to zero. When flushing, only as little data will be
-/// decoded as is needed to fill the output buffer (lowers both latency
-/// and throughput).
-///
-/// \return true if there is no space for new uncompressed data.
-///
-static inline bool
-set_limit(lzma_lz_decoder *lz, size_t out_avail, bool flushing)
-{
- // Set the limit so that writing to dict[limit + match_max_len - 1]
- // doesn't overwrite any unflushed data and doesn't write past the
- // end of the dict buffer.
- if (lz->start <= lz->pos) {
- // We can fill the buffer from pos till the end
- // of the dict buffer.
- lz->limit = lz->must_flush_pos;
- } else if (lz->pos + lz->match_max_len < lz->start) {
- // There's some unflushed data between pos and end of the
- // buffer. Limit so that we don't overwrite the unflushed data.
- lz->limit = lz->start - lz->match_max_len;
- } else {
- // Buffer is too full.
- lz->limit = 0;
- return true;
- }
-
- // Finetune the limit a bit if it isn't zero.
-
- assert(lz->limit > lz->pos);
- const size_t dict_avail = lz->limit - lz->pos;
-
- if (lz->uncompressed_size < dict_avail) {
- // Finishing a stream that doesn't have
- // an end of stream marker.
- lz->limit = lz->pos + lz->uncompressed_size;
-
- } else if (flushing && out_avail < dict_avail) {
- // Flushing enabled, decoding only as little as needed to
- // fill the out buffer (if there's enough input, of course).
- lz->limit = lz->pos + out_avail;
- }
-
- return lz->limit == lz->pos;
-}
-
-
-/// Takes care of wrapping the data into temporary buffer when needed,
-/// and calls the actual decoder.
-///
-/// \return true if error occurred
-///
-static inline bool
-call_process(lzma_coder *restrict coder, const uint8_t *restrict in,
- size_t *restrict in_pos, size_t in_size)
-{
- // It would be nice and simple if we could just give in[] to the
- // decoder, but the requirement of zlib-like API forces us to be
- // able to make *in_pos == in_size whenever there is enough output
- // space. If needed, we will append a few bytes from in[] to
- // a temporary buffer and decode enough to reach the part that
- // was copied from in[]. Then we can continue with the real in[].
-
- bool error;
- const size_t dict_old_pos = coder->lz.pos;
- const size_t in_avail = in_size - *in_pos;
-
- if (coder->lz.temp_size + in_avail < 2 * TEMP_LIMIT) {
- // Copy all the available input from in[] to temp[].
- memcpy(coder->lz.temp + coder->lz.temp_size,
- in + *in_pos, in_avail);
- coder->lz.temp_size += in_avail;
- *in_pos += in_avail;
- assert(*in_pos == in_size);
-
- // Decode as much as possible.
- size_t temp_used = 0;
- error = coder->lz.process(coder, coder->lz.temp, &temp_used,
- coder->lz.temp_size, true);
- assert(temp_used <= coder->lz.temp_size);
-
- // Move the remaining data to the beginning of temp[].
- coder->lz.temp_size -= temp_used;
- memmove(coder->lz.temp, coder->lz.temp + temp_used,
- coder->lz.temp_size);
-
- } else if (coder->lz.temp_size > 0) {
- // Fill temp[] unless it is already full because we aren't
- // the last filter in the chain.
- size_t copy_size = 0;
- if (coder->lz.temp_size < 2 * TEMP_LIMIT) {
- assert(*in_pos < in_size);
- copy_size = 2 * TEMP_LIMIT - coder->lz.temp_size;
- memcpy(coder->lz.temp + coder->lz.temp_size,
- in + *in_pos, copy_size);
- // NOTE: We don't update lz.temp_size or *in_pos yet.
- }
-
- size_t temp_used = 0;
- error = coder->lz.process(coder, coder->lz.temp, &temp_used,
- coder->lz.temp_size + copy_size, false);
-
- if (temp_used < coder->lz.temp_size) {
- // Only very little input data was consumed. Move
- // the unprocessed data to the beginning temp[].
- coder->lz.temp_size += copy_size - temp_used;
- memmove(coder->lz.temp, coder->lz.temp + temp_used,
- coder->lz.temp_size);
- *in_pos += copy_size;
- assert(*in_pos <= in_size);
-
- } else {
- // We were able to decode so much data that next time
- // we can decode directly from in[]. That is, we can
- // consider temp[] to be empty now.
- *in_pos += temp_used - coder->lz.temp_size;
- coder->lz.temp_size = 0;
- assert(*in_pos <= in_size);
- }
-
- } else {
- // Decode directly from in[].
- error = coder->lz.process(coder, in, in_pos, in_size, false);
- assert(*in_pos <= in_size);
- }
-
- assert(coder->lz.pos >= dict_old_pos);
- if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
- // Update uncompressed size.
- coder->lz.uncompressed_size -= coder->lz.pos - dict_old_pos;
-
- // Check that End of Payload Marker hasn't been detected
- // since it must not be present because uncompressed size
- // is known.
- if (coder->lz.eopm_detected)
- error = true;
- }
-
- return error;
-}
-
-
static lzma_ret
decode_buffer(lzma_coder *coder,
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size,
- bool flushing)
+ size_t *restrict out_pos, size_t out_size)
{
- bool stop = false;
-
while (true) {
- // Flush from coder->lz.dict to out[].
- flush(&coder->lz, out, out_pos, out_size);
-
- // All done?
- if (*out_pos == out_size
- || stop
- || coder->lz.eopm_detected
- || coder->lz.uncompressed_size == 0)
- break;
-
- // Set write limit in the dictionary.
- if (set_limit(&coder->lz, out_size - *out_pos, flushing))
- break;
-
- // Decode more data.
- if (call_process(coder, in, in_pos, in_size))
- return LZMA_DATA_ERROR;
-
- // Set stop to true if we must not call call_process() again
- // during this function call.
- // FIXME: Can this make the loop exist too early? It wouldn't
- // cause data corruption so not a critical problem. It can
- // happen if dictionary gets full and lz.temp still contains
- // a few bytes data that we could decode right now.
- if (*in_pos == in_size && coder->lz.temp_size <= TEMP_LIMIT
- && coder->lz.pos < coder->lz.limit)
- stop = true;
+ // Wrap the dictionary if needed.
+ if (coder->dict.pos == coder->dict.size)
+ coder->dict.pos = 0;
+
+ // Store the current dictionary position. It is needed to know
+ // where to start copying to the out[] buffer.
+ const size_t dict_start = coder->dict.pos;
+
+ // Calculate how much we allow the process() function to
+ // decode. It must not decode past the end of the dictionary
+ // buffer, and we don't want it to decode more than is
+ // actually needed to fill the out[] buffer.
+ coder->dict.limit = coder->dict.pos + MIN(out_size - *out_pos,
+ coder->dict.size - coder->dict.pos);
+
+ // Call the process() function to do the actual decoding.
+ const lzma_ret ret = coder->lz.code(
+ coder->lz.coder, &coder->dict,
+ in, in_pos, in_size);
+
+ // Copy the decoded data from the dictionary to the out[]
+ // buffer.
+ const size_t copy_size = coder->dict.pos - dict_start;
+ assert(copy_size <= out_size - *out_pos);
+ memcpy(out + *out_pos, coder->dict.buf + dict_start,
+ copy_size);
+ *out_pos += copy_size;
+
+ // Return if everything got decoded or an error occurred, or
+ // if there's no more data to decode.
+ if (ret != LZMA_OK || *out_pos == out_size
+ || coder->dict.pos < coder->dict.size)
+ return ret;
}
-
- // If we have decoded everything (EOPM detected or uncompressed_size
- // bytes were processed) to the history buffer, and also flushed
- // everything from the history buffer, our job is done.
- if ((coder->lz.eopm_detected
- || coder->lz.uncompressed_size == 0)
- && coder->lz.start == coder->lz.pos)
- return LZMA_STREAM_END;
-
- return LZMA_OK;
}
-extern lzma_ret
-lzma_lz_decode(lzma_coder *coder,
+static lzma_ret
+lz_decode(lzma_coder *coder,
lzma_allocator *allocator lzma_attribute((unused)),
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size,
lzma_action action)
{
- if (coder->next.code == NULL) {
- const lzma_ret ret = decode_buffer(coder, in, in_pos, in_size,
- out, out_pos, out_size,
- action == LZMA_SYNC_FLUSH);
-
- if (*out_pos == out_size || ret == LZMA_STREAM_END) {
- // Unread to make coder->temp[] empty. This is easy,
- // because we know that all the data currently in
- // coder->temp[] has been copied form in[] during this
- // call to the decoder.
- //
- // If we didn't do this, we could have data left in
- // coder->temp[] when end of stream is reached. That
- // data could be left there from *previous* call to
- // the decoder; in that case we wouldn't know where
- // to put that data.
- assert(*in_pos >= coder->lz.temp_size);
- *in_pos -= coder->lz.temp_size;
- coder->lz.temp_size = 0;
- }
-
- return ret;
- }
+ if (coder->next.code == NULL)
+ return decode_buffer(coder, in, in_pos, in_size,
+ out, out_pos, out_size);
// We aren't the last coder in the chain, we need to decode
// our input to a temporary buffer.
- const bool flushing = action == LZMA_SYNC_FLUSH;
while (*out_pos < out_size) {
- if (!coder->lz.next_finished
- && coder->lz.temp_size < LZMA_BUFFER_SIZE) {
+ // Fill the temporary buffer if it is empty.
+ if (!coder->next_finished
+ && coder->temp.pos == coder->temp.size) {
+ coder->temp.pos = 0;
+ coder->temp.size = 0;
+
const lzma_ret ret = coder->next.code(
coder->next.coder,
allocator, in, in_pos, in_size,
- coder->lz.temp, &coder->lz.temp_size,
+ coder->temp.buffer, &coder->temp.size,
LZMA_BUFFER_SIZE, action);
if (ret == LZMA_STREAM_END)
- coder->lz.next_finished = true;
- else if (coder->lz.temp_size < LZMA_BUFFER_SIZE
- || ret != LZMA_OK)
+ coder->next_finished = true;
+ else if (ret != LZMA_OK || coder->temp.size == 0)
return ret;
}
- if (coder->lz.this_finished) {
- if (coder->lz.temp_size != 0)
+ if (coder->this_finished) {
+ if (coder->temp.size != 0)
return LZMA_DATA_ERROR;
- if (coder->lz.next_finished)
+ if (coder->next_finished)
return LZMA_STREAM_END;
return LZMA_OK;
}
- size_t dummy = 0;
- const lzma_ret ret = decode_buffer(coder, NULL, &dummy, 0,
- out, out_pos, out_size, flushing);
+ const lzma_ret ret = decode_buffer(coder, coder->temp.buffer,
+ &coder->temp.pos, coder->temp.size,
+ out, out_pos, out_size);
if (ret == LZMA_STREAM_END)
- coder->lz.this_finished = true;
+ coder->this_finished = true;
else if (ret != LZMA_OK)
return ret;
- else if (coder->lz.next_finished && *out_pos < out_size)
+ else if (coder->next_finished && *out_pos < out_size)
return LZMA_DATA_ERROR;
}
@@ -370,94 +161,104 @@ lzma_lz_decode(lzma_coder *coder,
}
-/// \brief Initializes LZ part of the LZMA decoder or Inflate
-///
-/// \param history_size Number of bytes the LZ out window is
-/// supposed keep available from the output
-/// history.
-/// \param match_max_len Number of bytes a single decoding loop
-/// can advance the write position (lz->pos)
-/// in the history buffer (lz->dict).
-///
-/// \note This function is called by LZMA decoder and Inflate init()s.
-/// It's up to those functions allocate *lz and initialize it
-/// with LZMA_LZ_DECODER_INIT.
+static void
+lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder->dict.buf, allocator);
+
+ if (coder->lz.end != NULL)
+ coder->lz.end(coder->lz.coder, allocator);
+ else
+ lzma_free(coder->lz.coder, allocator);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
extern lzma_ret
-lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator,
- bool (*process)(lzma_coder *restrict coder,
- const uint8_t *restrict in, size_t *restrict in_pos,
- size_t in_size, bool has_safe_buffer),
- size_t history_size, size_t match_max_len)
+lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ const lzma_filter_info *filters,
+ lzma_ret (*lz_init)(lzma_lz_decoder *lz,
+ lzma_allocator *allocator, const void *options,
+ size_t *dict_size))
{
- // Known uncompressed size is used only with LZMA_Alone files so we
- // set it always to unknown by default.
- lz->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
-
- // Limit the history size to roughly sane values. This is primarily
- // to prevent integer overflows.
- if (history_size > UINT32_MAX / 2)
- return LZMA_HEADER_ERROR;
-
- // Store the value actually requested. We use it for sanity checks
- // when repeating data from the history buffer.
- lz->requested_size = history_size;
-
- // Avoid tiny history buffer sizes for performance reasons.
- // TODO: Test if this actually helps...
- if (history_size < DICT_SIZE_MIN)
- history_size = DICT_SIZE_MIN;
-
- // The real size of the history buffer is a bit bigger than
- // requested by our caller. This allows us to do some optimizations,
- // which help not only speed but simplicity of the code; specifically,
- // we can make sure that there is always at least match_max_len
- // bytes immediatelly available for writing without a need to wrap
- // the history buffer.
- const size_t dict_real_size = history_size + 2 * match_max_len + 1;
-
- // Reallocate memory if needed.
- if (history_size != lz->size || match_max_len != lz->match_max_len) {
- // Destroy the old buffer.
- lzma_lz_decoder_end(lz, allocator);
-
- lz->size = history_size;
- lz->match_max_len = match_max_len;
- lz->must_flush_pos = history_size + match_max_len + 1;
-
- lz->dict = lzma_alloc(dict_real_size, allocator);
- if (lz->dict == NULL)
+ // Allocate the base structure if it isn't already allocated.
+ if (next->coder == NULL) {
+ next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+ if (next->coder == NULL)
return LZMA_MEM_ERROR;
+
+ next->code = &lz_decode;
+ next->end = &lz_decoder_end;
+
+ next->coder->dict.buf = NULL;
+ next->coder->dict.size = 0;
+ next->coder->lz = LZMA_LZ_DECODER_INIT;
+ next->coder->next = LZMA_NEXT_CODER_INIT;
}
- // Reset the variables so that lz_get_byte(lz, 0) will return '\0'.
- lz->pos = 0;
- lz->start = 0;
- lz->end = dict_real_size;
- lz->dict[dict_real_size - 1] = 0;
- lz->is_full = false;
- lz->eopm_detected = false;
- lz->next_finished = false;
- lz->this_finished = false;
- lz->temp_size = 0;
-
- // Clean up the temporary buffer to make it very sure that there are
- // no information leaks when multiple steams are decoded with the
- // same decoder structures.
- memzero(lz->temp, LZMA_BUFFER_SIZE);
-
- // Set the process function pointer.
- lz->process = process;
+ // Allocate and initialize the LZ-based decoder. It will also give
+ // us the dictionary size.
+ size_t dict_size;
+ return_if_error(lz_init(&next->coder->lz, allocator,
+ filters[0].options, &dict_size));
+
+ // If the dictionary size is very small, increase it to 4096 bytes.
+ // This is to prevent constant wrapping of the dictionary, which
+ // would slow things down. The downside is that since we don't check
+ // separately for the real dictionary size, we may happily accept
+ // corrupt files.
+ if (dict_size < 4096)
+ dict_size = 4096;
+
+ // Make dictionary size a multipe of 16. Some LZ-based decoders like
+ // LZMA use the lowest bits lzma_dict.pos to know the alignment of the
+ // data. Aligned buffer is also good when memcpying from the
+ // dictionary to the output buffer, since applications are
+ // recommended to give aligned buffers to liblzma.
+ //
+ // Avoid integer overflow. FIXME Should the return value be
+ // LZMA_HEADER_ERROR or LZMA_MEM_ERROR?
+ if (dict_size > SIZE_MAX - 15)
+ return LZMA_MEM_ERROR;
+
+ dict_size = (dict_size + 15) & (SIZE_MAX - 15);
+
+ // Allocate and initialize the dictionary.
+ if (next->coder->dict.size != dict_size) {
+ lzma_free(next->coder->dict.buf, allocator);
+ next->coder->dict.buf = lzma_alloc(dict_size, allocator);
+ if (next->coder->dict.buf == NULL)
+ return LZMA_MEM_ERROR;
- return LZMA_OK;
+ next->coder->dict.size = dict_size;
+ }
+
+ dict_reset(&next->coder->dict);
+
+ // Miscellaneous initializations
+ next->coder->next_finished = false;
+ next->coder->this_finished = false;
+ next->coder->temp.pos = 0;
+ next->coder->temp.size = 0;
+
+ // Initialize the next filter in the chain, if any.
+ return lzma_next_filter_init(&next->coder->next, allocator,
+ filters + 1);
+}
+
+
+extern uint64_t
+lzma_lz_decoder_memusage(size_t dictionary_size)
+{
+ return sizeof(lzma_coder) + (uint64_t)(dictionary_size);
}
extern void
-lzma_lz_decoder_end(lzma_lz_decoder *lz, lzma_allocator *allocator)
+lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size)
{
- lzma_free(lz->dict, allocator);
- lz->dict = NULL;
- lz->size = 0;
- lz->match_max_len = 0;
- return;
+ coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size);
}
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index 1acf9831..d2a77ba4 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -18,201 +18,215 @@
//
///////////////////////////////////////////////////////////////////////////////
-#ifndef LZMA_LZ_OUT_H
-#define LZMA_LZ_OUT_H
+#ifndef LZMA_LZ_DECODER_H
+#define LZMA_LZ_DECODER_H
#include "common.h"
-/// Get a byte from the history buffer.
-#define lz_get_byte(lz, distance) \
- ((distance) < (lz).pos \
- ? (lz).dict[(lz).pos - (distance) - 1] \
- : (lz).dict[(lz).pos - (distance) - 1 + (lz).end])
-
-
-/// Test if dictionary is empty.
-#define lz_is_empty(lz) \
- ((lz).pos == 0 && !(lz).is_full)
-
-
-#define LZMA_LZ_DECODER_INIT \
- (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }
-
-
typedef struct {
- /// Function to do the actual decoding (LZMA or Inflate)
- bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in,
- size_t *restrict in_pos, size_t size_in,
- bool has_safe_buffer);
+ /// Pointer to the dictionary buffer. It can be an allocated buffer
+ /// internal to liblzma, or it can a be a buffer given by the
+ /// application when in single-call mode (not implemented yet).
+ uint8_t *buf;
- /// Pointer to dictionary (history) buffer.
- /// \note Not 'restrict' because can alias next_out.
- uint8_t *dict;
-
- /// Next write goes to dict[pos].
+ /// Write position in dictionary. The next byte will be written to
+ /// buf[pos].
size_t pos;
- /// Next byte to flush is buffer[start].
- size_t start;
-
- /// First byte to not flush is buffer[end].
- size_t end;
+ /// Indicates how full the dictionary is. This is used by
+ /// dict_is_distance_valid() to detect corrupt files that would
+ /// read beyond the beginning of the dictionary.
+ size_t full;
- /// First position to which data must not be written.
+ /// Write limit
size_t limit;
- /// True if dictionary has needed wrapping.
- bool is_full;
-
- /// True if process() has detected End of Payload Marker.
- bool eopm_detected;
+ /// Size of the dictionary
+ size_t size;
- /// True if the next coder in the chain has returned LZMA_STREAM_END.
- bool next_finished;
+} lzma_dict;
- /// True if the LZ decoder (e.g. LZMA) has detected End of Payload
- /// Marker. This may become true before next_finished becomes true.
- bool this_finished;
- /// When pos >= must_flush_pos, we must not call process().
- size_t must_flush_pos;
+typedef struct {
+ /// Data specific to the LZ-based decoder
+ lzma_coder *coder;
- /// Maximum number of bytes that a single decoding loop inside
- /// process() can produce data into dict. This amount is kept
- /// always available at dict + pos i.e. it is safe to write a byte
- /// to dict[pos + match_max_len - 1].
- size_t match_max_len;
+ /// Function to decode from in[] to *dict
+ lzma_ret (*code)(lzma_coder *restrict coder,
+ lzma_dict *restrict dict, const uint8_t *restrict in,
+ size_t *restrict in_pos, size_t in_size);
- /// Number of bytes allocated to dict.
- size_t size;
+ void (*reset)(lzma_coder *coder, const void *options);
- /// Requested size of the dictionary. This is needed because we avoid
- /// using extremely tiny history buffers.
- size_t requested_size;
+ /// Set the uncompressed size
+ void (*set_uncompressed)(lzma_coder *coder,
+ lzma_vli uncompressed_size);
- /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.
- lzma_vli uncompressed_size;
+ /// Free allocated resources
+ void (*end)(lzma_coder *coder, lzma_allocator *allocator);
- /// Number of bytes currently in temp[].
- size_t temp_size;
+} lzma_lz_decoder;
- /// Temporary buffer needed when
- /// 1) we cannot make the input buffer completely empty; or
- /// 2) we are not the last filter in the chain.
- uint8_t temp[LZMA_BUFFER_SIZE];
-} lzma_lz_decoder;
+#define LZMA_LZ_DECODER_INIT \
+ (lzma_lz_decoder){ \
+ .coder = NULL, \
+ .code = NULL, \
+ .reset = NULL, \
+ .set_uncompressed = NULL, \
+ .end = NULL, \
+ }
-/////////////////////////
-// Function prototypes //
-/////////////////////////
+extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
+ lzma_allocator *allocator, const lzma_filter_info *filters,
+ lzma_ret (*lz_init)(lzma_lz_decoder *lz,
+ lzma_allocator *allocator, const void *options,
+ size_t *dict_size));
-extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,
- lzma_allocator *allocator, bool (*process)(
- lzma_coder *restrict coder, const uint8_t *restrict in,
- size_t *restrict in_pos, size_t in_size,
- bool has_safe_buffer),
- size_t history_size, size_t match_max_len);
+extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
-extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator,
- const uint8_t *restrict in, size_t *restrict in_pos,
- size_t in_size, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size,
- lzma_action action);
+extern void lzma_lz_decoder_uncompressed(
+ lzma_coder *coder, lzma_vli uncompressed_size);
-/// Deallocates the history buffer if one exists.
-extern void lzma_lz_decoder_end(
- lzma_lz_decoder *lz, lzma_allocator *allocator);
//////////////////////
// Inline functions //
//////////////////////
-// Repeat a block of data from the history. Because memcpy() is faster
-// than copying byte by byte in a loop, the copying process gets split
-// into three cases:
-// 1. distance < length
-// Source and target areas overlap, thus we can't use memcpy()
-// (nor memmove()) safely.
-// TODO: If this is common enough, it might be worth optimizing this
-// more e.g. by checking if distance > sizeof(uint8_t*) and using
-// memcpy in small chunks.
-// 2. distance < pos
-// This is the easiest and the fastest case. The block being copied
-// is a contiguous piece in the history buffer. The buffer offset
-// doesn't need wrapping.
-// 3. distance >= pos
-// We need to wrap the position, because otherwise we would try copying
-// behind the first byte of the allocated buffer. It is possible that
-// the block is fragmeneted into two pieces, thus we might need to call
-// memcpy() twice.
-// NOTE: The function using this macro must ensure that length is positive
-// and that distance is FIXME
+/// Get a byte from the history buffer.
+static inline uint8_t
+dict_get(const lzma_dict *const dict, const uint32_t distance)
+{
+ return dict->buf[dict->pos - distance - 1
+ + (distance < dict->pos ? 0 : dict->size)];
+}
+
+
+/// Test if dictionary is empty.
+static inline bool
+dict_is_empty(const lzma_dict *const dict)
+{
+ return dict->full == 0;
+}
+
+
+/// Validate the match distance
+static inline bool
+dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
+{
+ return dict->full >= distance;
+}
+
+
+/// Repeat *len bytes at distance.
static inline bool
-lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)
+dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
{
- // Validate offset of the block to be repeated. It doesn't
- // make sense to copy data behind the beginning of the stream.
- // Leaving this check away would lead to a security problem,
- // in which e.g. the data of the previously decoded file(s)
- // would be leaked (or whatever happens to be in unused
- // part of the dictionary buffer).
- if (unlikely(distance >= lz->pos && !lz->is_full))
- return false;
-
- // It also doesn't make sense to copy data farer than
- // the dictionary size.
- if (unlikely(distance >= lz->requested_size))
- return false;
-
- // The caller must have checked these!
- assert(distance <= lz->size);
- assert(length > 0);
- assert(length <= lz->match_max_len);
-
- // Copy the amount of data requested by the decoder.
- if (distance < length) {
+ // Don't write past the end of the dictionary.
+ const size_t dict_avail = dict->limit - dict->pos;
+ uint32_t left = MIN(dict_avail, *len);
+ *len -= left;
+
+ // Repeat a block of data from the history. Because memcpy() is faster
+ // than copying byte by byte in a loop, the copying process gets split
+ // into three cases.
+ if (distance < left) {
// Source and target areas overlap, thus we can't use
- // memcpy() nor even memmove() safely. :-(
- // TODO: Copying byte by byte is slow. It might be
- // worth optimizing this more if this case is common.
+ // memcpy() nor even memmove() safely.
do {
- lz->dict[lz->pos] = lz_get_byte(*lz, distance);
- ++lz->pos;
- } while (--length > 0);
+ dict->buf[dict->pos] = dict_get(dict, distance);
+ ++dict->pos;
+ } while (--left > 0);
- } else if (distance < lz->pos) {
+ } else if (distance < dict->pos) {
// The easiest and fastest case
- memcpy(lz->dict + lz->pos,
- lz->dict + lz->pos - distance - 1,
- length);
- lz->pos += length;
+ memcpy(dict->buf + dict->pos,
+ dict->buf + dict->pos - distance - 1,
+ left);
+ dict->pos += left;
} else {
// The bigger the dictionary, the more rare this
// case occurs. We need to "wrap" the dict, thus
// we might need two memcpy() to copy all the data.
- assert(lz->is_full);
- const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;
- uint32_t copy_size = lz->end - copy_pos;
+ assert(dict->full == dict->size);
+ const uint32_t copy_pos
+ = dict->pos - distance - 1 + dict->size;
+ uint32_t copy_size = dict->size - copy_pos;
- if (copy_size < length) {
- memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
+ if (copy_size < left) {
+ memcpy(dict->buf + dict->pos, dict->buf + copy_pos,
copy_size);
- lz->pos += copy_size;
- copy_size = length - copy_size;
- memcpy(lz->dict + lz->pos, lz->dict, copy_size);
- lz->pos += copy_size;
+ dict->pos += copy_size;
+ copy_size = left - copy_size;
+ memcpy(dict->buf + dict->pos, dict->buf, copy_size);
+ dict->pos += copy_size;
} else {
- memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
- length);
- lz->pos += length;
+ memcpy(dict->buf + dict->pos, dict->buf + copy_pos,
+ left);
+ dict->pos += left;
}
}
- return true;
+ // Update how full the dictionary is.
+ if (dict->full < dict->pos)
+ dict->full = dict->pos;
+
+ return unlikely(*len != 0);
+}
+
+
+/// Puts one byte into the dictionary. Returns true if the dictionary was
+/// already full and the byte couldn't be added.
+static inline bool
+dict_put(lzma_dict *dict, uint8_t byte)
+{
+ if (unlikely(dict->pos == dict->limit))
+ return true;
+
+ dict->buf[dict->pos++] = byte;
+
+ if (dict->pos > dict->full)
+ dict->full = dict->pos;
+
+ return false;
+}
+
+
+/// Copies arbitrary amount of data into the dictionary.
+static inline void
+dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
+ size_t *restrict in_pos, size_t in_size,
+ size_t *restrict left)
+{
+ // NOTE: If we are being given more data than the size of the
+ // dictionary, it could be possible to optimize the LZ decoder
+ // so that not everything needs to go through the dictionary.
+ // This shouldn't be very common thing in practice though, and
+ // the slowdown of one extra memcpy() isn't bad compared to how
+ // much time it would have taken if the data were compressed.
+
+ if (in_size - *in_pos > *left)
+ in_size = *in_pos + *left;
+
+ *left -= lzma_bufcpy(in, in_pos, in_size,
+ dict->buf, &dict->pos, dict->limit);
+
+ if (dict->pos > dict->full)
+ dict->full = dict->pos;
+
+ return;
+}
+
+
+static inline void
+dict_reset(lzma_dict *dict)
+{
+ dict->pos = 0;
+ dict->full = 0;
+ dict->buf[dict->size - 1] = '\0';
}
#endif
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c
index 82b9103f..d5f84826 100644
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@@ -3,8 +3,8 @@
/// \file lz_encoder.c
/// \brief LZ in window
//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
+// Copyright (C) 1999-2008 Igor Pavlov
+// Copyright (C) 2007-2008 Lasse Collin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -18,496 +18,492 @@
//
///////////////////////////////////////////////////////////////////////////////
-#include "lz_encoder_private.h"
+#include "lz_encoder.h"
+#include "lz_encoder_hash.h"
-// Hash Chains
-#ifdef HAVE_HC3
-# include "hc3.h"
-#endif
-#ifdef HAVE_HC4
-# include "hc4.h"
-#endif
-// Binary Trees
-#ifdef HAVE_BT2
-# include "bt2.h"
-#endif
-#ifdef HAVE_BT3
-# include "bt3.h"
-#endif
-#ifdef HAVE_BT4
-# include "bt4.h"
-#endif
+struct lzma_coder_s {
+ /// LZ-based encoder e.g. LZMA
+ lzma_lz_encoder lz;
+ /// History buffer and match finder
+ lzma_mf mf;
-/// This is needed in two places so provide a macro.
-#define get_cyclic_buffer_size(history_size) ((history_size) + 1)
+ /// Next coder in the chain
+ lzma_next_coder next;
+};
-/// Calculate certain match finder properties and validate the calculated
-/// values. This is as its own function, because *num_items is needed to
-/// calculate memory requirements in common/memory.c.
-extern bool
-lzma_lz_encoder_hash_properties(lzma_match_finder match_finder,
- uint32_t history_size, uint32_t *restrict hash_mask,
- uint32_t *restrict hash_size_sum, uint32_t *restrict num_items)
+/// \brief Moves the data in the input window to free space for new data
+///
+/// mf->buffer is a sliding input window, which keeps mf->keep_size_before
+/// bytes of input history available all the time. Now and then we need to
+/// "slide" the buffer to make space for the new data to the end of the
+/// buffer. At the same time, data older than keep_size_before is dropped.
+///
+static void
+move_window(lzma_mf *mf)
{
- uint32_t fix_hash_size;
- uint32_t sons;
+ // Align the move to a multiple of 16 bytes. Some LZ-based encoders
+ // like LZMA use the lowest bits of mf->read_pos to know the
+ // alignment of the uncompressed data. We also get better speed
+ // for memmove() with aligned buffers.
+ assert(mf->read_pos > mf->keep_size_before);
+ const uint32_t move_offset
+ = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);
- switch (match_finder) {
-#ifdef HAVE_HC3
- case LZMA_MF_HC3:
- fix_hash_size = LZMA_HC3_FIX_HASH_SIZE;
- sons = 1;
- break;
-#endif
-#ifdef HAVE_HC4
- case LZMA_MF_HC4:
- fix_hash_size = LZMA_HC4_FIX_HASH_SIZE;
- sons = 1;
- break;
-#endif
-#ifdef HAVE_BT2
- case LZMA_MF_BT2:
- fix_hash_size = LZMA_BT2_FIX_HASH_SIZE;
- sons = 2;
- break;
-#endif
-#ifdef HAVE_BT3
- case LZMA_MF_BT3:
- fix_hash_size = LZMA_BT3_FIX_HASH_SIZE;
- sons = 2;
- break;
-#endif
-#ifdef HAVE_BT4
- case LZMA_MF_BT4:
- fix_hash_size = LZMA_BT4_FIX_HASH_SIZE;
- sons = 2;
- break;
-#endif
- default:
- return true;
- }
+ assert(mf->write_pos > move_offset);
+ const size_t move_size = mf->write_pos - move_offset;
- uint32_t hs;
+ assert(move_offset + move_size <= mf->size);
-#ifdef HAVE_LZMA_BT2
- if (match_finder == LZMA_BT2) {
- // NOTE: hash_mask is not used by the BT2 match finder,
- // but it is initialized just in case.
- hs = LZMA_BT2_HASH_SIZE;
- *hash_mask = 0;
- } else
-#endif
- {
- hs = history_size - 1;
- hs |= (hs >> 1);
- hs |= (hs >> 2);
- hs |= (hs >> 4);
- hs |= (hs >> 8);
- hs >>= 1;
- hs |= 0xFFFF;
+ memmove(mf->buffer, mf->buffer + move_offset, move_size);
- if (hs > (UINT32_C(1) << 24)) {
- if (match_finder == LZMA_MF_HC4
- || match_finder == LZMA_MF_BT4)
- hs >>= 1;
- else
- hs = (1 << 24) - 1;
- }
+ mf->offset += move_offset;
+ mf->read_pos -= move_offset;
+ mf->read_limit -= move_offset;
+ mf->write_pos -= move_offset;
+
+ return;
+}
- *hash_mask = hs;
- ++hs;
- }
- *hash_size_sum = hs + fix_hash_size;
+/// \brief Tries to fill the input window (mf->buffer)
+///
+/// If we are the last encoder in the chain, our input data is in in[].
+/// Otherwise we call the next filter in the chain to process in[] and
+/// write its output to mf->buffer.
+///
+/// This function must not be called once it has returned LZMA_STREAM_END.
+///
+static lzma_ret
+fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
+ size_t *in_pos, size_t in_size, lzma_action action)
+{
+ assert(coder->mf.read_pos <= coder->mf.write_pos);
- *num_items = *hash_size_sum
- + get_cyclic_buffer_size(history_size) * sons;
+ // Move the sliding window if needed.
+ if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)
+ move_window(&coder->mf);
- return false;
-}
+ size_t in_used;
+ lzma_ret ret;
+ if (coder->next.code == NULL) {
+ // Not using a filter, simply memcpy() as much as possible.
+ in_used = lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,
+ &coder->mf.write_pos, coder->mf.size);
+ ret = action != LZMA_RUN && *in_pos == in_size
+ ? LZMA_STREAM_END : LZMA_OK;
-extern lzma_ret
-lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator,
- bool (*process)(lzma_coder *coder, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size),
- size_t history_size, size_t additional_buffer_before,
- size_t match_max_len, size_t additional_buffer_after,
- lzma_match_finder match_finder, uint32_t match_finder_cycles,
- const uint8_t *preset_dictionary,
- size_t preset_dictionary_size)
-{
- lz->sequence = SEQ_RUN;
+ } else {
+ const size_t in_start = *in_pos;
+ ret = coder->next.code(coder->next.coder, allocator,
+ in, in_pos, in_size,
+ coder->mf.buffer, &coder->mf.write_pos,
+ coder->mf.size, action);
+ in_used = *in_pos - in_start;
+ }
- ///////////////
- // In Window //
- ///////////////
+ // If end of stream has been reached or flushing completed, we allow
+ // the encoder to process all the input (that is, read_pos is allowed
+ // to reach write_pos). Otherwise we keep keep_size_after bytes
+ // available as prebuffer.
+ if (ret == LZMA_STREAM_END) {
+ assert(*in_pos == in_size);
+ ret = LZMA_OK;
+ coder->mf.action = action;
+ coder->mf.read_limit = coder->mf.write_pos;
- // Validate history size.
- if (history_size < LZMA_DICTIONARY_SIZE_MIN
- || history_size > LZMA_DICTIONARY_SIZE_MAX) {
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_HEADER_ERROR;
+ } else if (coder->mf.write_pos > coder->mf.keep_size_after) {
+ // This needs to be done conditionally, because if we got
+ // only little new input, there may be too little input
+ // to do any encoding yet.
+ coder->mf.read_limit = coder->mf.write_pos
+ - coder->mf.keep_size_after;
}
- assert(history_size <= MAX_VAL_FOR_NORMALIZE - 256);
- assert(LZMA_DICTIONARY_SIZE_MAX <= MAX_VAL_FOR_NORMALIZE - 256);
+ // Restart the match finder after finished LZMA_SYNC_FLUSH.
+ if (coder->mf.pending > 0
+ && coder->mf.read_pos < coder->mf.read_limit) {
+ // Match finder may update coder->pending and expects it to
+ // start from zero, so use a temporary variable.
+ const size_t pending = coder->mf.pending;
+ coder->mf.pending = 0;
- // Calculate the size of the history buffer to allocate.
- // TODO: Get a reason for magic constant of 256.
- const size_t size_reserv = (history_size + additional_buffer_before
- + match_max_len + additional_buffer_after) / 2 + 256;
+ // Rewind read_pos so that the match finder can hash
+ // the pending bytes.
+ assert(coder->mf.read_pos >= pending);
+ coder->mf.read_pos -= pending;
- lz->keep_size_before = history_size + additional_buffer_before;
- lz->keep_size_after = match_max_len + additional_buffer_after;
+ // Call the skip function directly instead of using
+ // lz_dict_skip(), since we don't want to touch
+ // mf->read_ahead.
+ coder->mf.skip(&coder->mf, pending);
+ }
- const size_t buffer_size = lz->keep_size_before + lz->keep_size_after
- + size_reserv;
+ return ret;
+}
- // Allocate history buffer if its size has changed.
- if (buffer_size != lz->size) {
- lzma_free(lz->buffer, allocator);
- lz->buffer = lzma_alloc(buffer_size, allocator);
- if (lz->buffer == NULL) {
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_MEM_ERROR;
+
+static lzma_ret
+lz_encode(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size, lzma_action action)
+{
+ while (*out_pos < out_size
+ && (*in_pos < in_size || action != LZMA_RUN)) {
+ // Read more data to coder->mf.buffer if needed.
+ if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
+ >= coder->mf.read_limit)
+ return_if_error(fill_window(coder, allocator,
+ in, in_pos, in_size, action));
+
+ // Encode
+ const lzma_ret ret = coder->lz.code(coder->lz.coder,
+ &coder->mf, out, out_pos, out_size);
+ if (ret != LZMA_OK) {
+ // Setting this to LZMA_RUN for cases when we are
+ // flushing. It doesn't matter when finishing or if
+ // an error occurred.
+ coder->mf.action = LZMA_RUN;
+ return ret;
}
}
- // Allocation successful. Store the new size.
- lz->size = buffer_size;
+ return LZMA_OK;
+}
+
+
+static bool
+lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
+ const lzma_lz_options *lz_options)
+{
+ if (lz_options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN
+ || lz_options->dictionary_size
+ > LZMA_DICTIONARY_SIZE_MAX
+ || lz_options->find_len_max
+ > lz_options->match_len_max)
+ return true;
+
+ mf->keep_size_before = lz_options->before_size
+ + lz_options->dictionary_size;
- // Reset in window variables.
- lz->offset = 0;
- lz->read_pos = 0;
- lz->read_limit = 0;
- lz->write_pos = 0;
- lz->pending = 0;
+ mf->keep_size_after = lz_options->after_size
+ + lz_options->match_len_max;
+ // To avoid constant memmove()s, allocate some extra space. Since
+ // memmove()s become more expensive when the size of the buffer
+ // increases, we reserve more space when a large dictionary is
+ // used to make the memmove() calls rarer.
+ uint32_t reserve = lz_options->dictionary_size / 2;
+ if (reserve > (UINT32_C(1) << 30))
+ reserve /= 2;
- //////////////////
- // Match Finder //
- //////////////////
+ reserve += (lz_options->before_size + lz_options->match_len_max
+ + lz_options->after_size) / 2 + (UINT32_C(1) << 19);
- // Validate match_finder, set function pointers and a few match
- // finder specific variables.
- switch (match_finder) {
-#ifdef HAVE_HC3
+ const uint32_t old_size = mf->size;
+ mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
+
+ // FIXME Integer overflows
+
+ // Deallocate the old history buffer if it exists but has different
+ // size than what is needed now.
+ if (mf->buffer != NULL && old_size != mf->size) {
+ lzma_free(mf->buffer, allocator);
+ mf->buffer = NULL;
+ }
+
+ // Match finder options
+ mf->match_len_max = lz_options->match_len_max;
+ mf->find_len_max = lz_options->find_len_max;
+ mf->cyclic_buffer_size = lz_options->dictionary_size + 1;
+
+ // Validate the match finder ID and setup the function pointers.
+ switch (lz_options->match_finder) {
+#ifdef HAVE_MF_HC3
case LZMA_MF_HC3:
- lz->get_matches = &lzma_hc3_get_matches;
- lz->skip = &lzma_hc3_skip;
- lz->cut_value = 8 + (match_max_len >> 2);
+ mf->find = &lzma_mf_hc3_find;
+ mf->skip = &lzma_mf_hc3_skip;
break;
#endif
-#ifdef HAVE_HC4
+#ifdef HAVE_MF_HC4
case LZMA_MF_HC4:
- lz->get_matches = &lzma_hc4_get_matches;
- lz->skip = &lzma_hc4_skip;
- lz->cut_value = 8 + (match_max_len >> 2);
+ mf->find = &lzma_mf_hc4_find;
+ mf->skip = &lzma_mf_hc4_skip;
break;
#endif
-#ifdef HAVE_BT2
+#ifdef HAVE_MF_BT2
case LZMA_MF_BT2:
- lz->get_matches = &lzma_bt2_get_matches;
- lz->skip = &lzma_bt2_skip;
- lz->cut_value = 16 + (match_max_len >> 1);
+ mf->find = &lzma_mf_bt2_find;
+ mf->skip = &lzma_mf_bt2_skip;
break;
#endif
-#ifdef HAVE_BT3
+#ifdef HAVE_MF_BT3
case LZMA_MF_BT3:
- lz->get_matches = &lzma_bt3_get_matches;
- lz->skip = &lzma_bt3_skip;
- lz->cut_value = 16 + (match_max_len >> 1);
+ mf->find = &lzma_mf_bt3_find;
+ mf->skip = &lzma_mf_bt3_skip;
break;
#endif
-#ifdef HAVE_BT4
+#ifdef HAVE_MF_BT4
case LZMA_MF_BT4:
- lz->get_matches = &lzma_bt4_get_matches;
- lz->skip = &lzma_bt4_skip;
- lz->cut_value = 16 + (match_max_len >> 1);
+ mf->find = &lzma_mf_bt4_find;
+ mf->skip = &lzma_mf_bt4_skip;
break;
#endif
+
default:
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_HEADER_ERROR;
+ return true;
}
- // Check if we have been requested to use a non-default cut_value.
- if (match_finder_cycles > 0)
- lz->cut_value = match_finder_cycles;
-
- lz->match_max_len = match_max_len;
- lz->cyclic_buffer_size = get_cyclic_buffer_size(history_size);
+ // Calculate the sizes of mf->hash and mf->son.
+ const uint32_t hash_bytes = lz_options->match_finder & 0x0F;
+ const bool is_bt = (lz_options->match_finder & 0x10) != 0;
+ uint32_t hs;
- uint32_t hash_size_sum;
- uint32_t num_items;
- if (lzma_lz_encoder_hash_properties(match_finder, history_size,
- &lz->hash_mask, &hash_size_sum, &num_items)) {
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_HEADER_ERROR;
- }
+ if (hash_bytes == 2) {
+ hs = 0xFFFF;
+ } else {
+ // Round dictionary size up to the next 2^n - 1 so it can
+ // be used as a hash mask.
+ hs = lz_options->dictionary_size - 1;
+ hs |= hs >> 1;
+ hs |= hs >> 2;
+ hs |= hs >> 4;
+ hs |= hs >> 8;
+ hs >>= 1;
+ hs |= 0xFFFF;
- if (num_items != lz->num_items) {
-#if UINT32_MAX >= SIZE_MAX / 4
- // Check for integer overflow. (Huge dictionaries are not
- // possible on 32-bit CPU.)
- if (num_items > SIZE_MAX / sizeof(uint32_t)) {
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_MEM_ERROR;
+ if (hs > (UINT32_C(1) << 24)) {
+ if (hash_bytes == 3)
+ hs = (UINT32_C(1) << 24) - 1;
+ else
+ hs >>= 1;
}
-#endif
-
- const size_t size_in_bytes
- = (size_t)(num_items) * sizeof(uint32_t);
+ }
- lzma_free(lz->hash, allocator);
- lz->hash = lzma_alloc(size_in_bytes, allocator);
- if (lz->hash == NULL) {
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_MEM_ERROR;
- }
+ mf->hash_mask = hs;
+
+ ++hs;
+ if (hash_bytes > 2)
+ hs += HASH_2_SIZE;
+ if (hash_bytes > 3)
+ hs += HASH_3_SIZE;
+/*
+ No match finder uses this at the moment.
+ if (mf->hash_bytes > 4)
+ hs += HASH_4_SIZE;
+*/
+
+ const uint32_t old_count = mf->hash_size_sum + mf->sons_count;
+ mf->hash_size_sum = hs;
+ mf->sons_count = mf->cyclic_buffer_size;
+ if (is_bt)
+ mf->sons_count *= 2;
+
+ const uint32_t new_count = mf->hash_size_sum + mf->sons_count;
+
+ // Deallocate the old hash array if it exists and has different size
+ // than what is needed now.
+ if (mf->hash != NULL && old_count != new_count) {
+ lzma_free(mf->hash, allocator);
+ mf->hash = NULL;
+ }
- lz->num_items = num_items;
+ // Maximum number of match finder cycles
+ mf->loops = lz_options->match_finder_cycles;
+ if (mf->loops == 0) {
+ mf->loops = 16 + (lz_options->find_len_max / 2);
+ if (!is_bt)
+ mf->loops /= 2;
}
- lz->son = lz->hash + hash_size_sum;
+ return false;
+}
- // Reset the hash table to empty hash values.
- {
- uint32_t *restrict items = lz->hash;
- for (uint32_t i = 0; i < hash_size_sum; ++i)
- items[i] = EMPTY_HASH_VALUE;
+static bool
+lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
+{
+ // Allocate the history buffer.
+ if (mf->buffer == NULL) {
+ mf->buffer = lzma_alloc(mf->size, allocator);
+ if (mf->buffer == NULL)
+ return true;
}
- lz->cyclic_buffer_pos = 0;
+ // Use cyclic_buffer_size as initial mf->offset. This allows
+ // avoiding a few branches in the match finders. The downside is
+ // that match finder needs to be normalized more often, which may
+ // hurt performance with huge dictionaries.
+ mf->offset = mf->cyclic_buffer_size;
+ mf->read_pos = 0;
+ mf->read_ahead = 0;
+ mf->read_limit = 0;
+ mf->write_pos = 0;
+ mf->pending = 0;
- // Because zero is used as empty hash value, make the first byte
- // appear at buffer[1 - offset].
- ++lz->offset;
+ // Allocate match finder's hash array.
+ const size_t alloc_count = mf->hash_size_sum + mf->sons_count;
- // If we are using a preset dictionary, read it now.
- // TODO: This isn't implemented yet so return LZMA_HEADER_ERROR.
- if (preset_dictionary != NULL && preset_dictionary_size > 0) {
- lzma_lz_encoder_end(lz, allocator);
- return LZMA_HEADER_ERROR;
+#if UINT32_MAX >= SIZE_MAX / 4
+ // Check for integer overflow. (Huge dictionaries are not
+ // possible on 32-bit CPU.)
+ if (alloc_count > SIZE_MAX / sizeof(uint32_t))
+ return true;
+#endif
+
+ if (mf->hash == NULL) {
+ mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t),
+ allocator);
+ if (mf->hash == NULL)
+ return true;
}
- // Set the process function pointer.
- lz->process = process;
+ mf->son = mf->hash + mf->hash_size_sum;
+ mf->cyclic_buffer_pos = 0;
+
+ // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we
+ // can use memset().
+/*
+ for (uint32_t i = 0; i < hash_size_sum; ++i)
+ mf->hash[i] = EMPTY_HASH_VALUE;
+*/
+ memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t));
+
+ // We don't need to initialize mf->son, but not doing that will
+ // make Valgrind complain in normalization (see normalize() in
+ // lz_encoder_mf.c).
+ //
+ // Skipping this initialization is *very* good when big dictionary is
+ // used but only small amount of data gets actually compressed: most
+ // of the mf->hash won't get actually allocated by the kernel, so
+ // we avoid wasting RAM and improve initialization speed a lot.
+ //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
+
+ mf->action = LZMA_RUN;
- return LZMA_OK;
+ return false;
}
-extern void
-lzma_lz_encoder_end(lzma_lz_encoder *lz, lzma_allocator *allocator)
+extern uint64_t
+lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
{
- lzma_free(lz->hash, allocator);
- lz->hash = NULL;
- lz->num_items = 0;
-
- lzma_free(lz->buffer, allocator);
- lz->buffer = NULL;
- lz->size = 0;
-
- return;
+ // Old buffers must not exist when calling lz_encoder_prepare().
+ lzma_mf mf = {
+ .buffer = NULL,
+ .hash = NULL,
+ };
+
+ // Setup the size information into mf.
+ if (lz_encoder_prepare(&mf, NULL, lz_options))
+ return UINT64_MAX;
+
+ // Calculate the memory usage.
+ return (uint64_t)(mf.hash_size_sum + mf.sons_count)
+ * sizeof(uint32_t)
+ + (uint64_t)(mf.size) + sizeof(lzma_coder);
}
-/// \brief Moves the data in the input window to free space for new data
-///
-/// lz->buffer is a sliding input window, which keeps lz->keep_size_before
-/// bytes of input history available all the time. Now and then we need to
-/// "slide" the buffer to make space for the new data to the end of the
-/// buffer. At the same time, data older than keep_size_before is dropped.
-///
static void
-move_window(lzma_lz_encoder *lz)
+lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
- // buffer[move_offset] will become buffer[0].
- assert(lz->read_pos > lz->keep_size_after);
- size_t move_offset = lz->read_pos - lz->keep_size_before;
-
- // We need one additional byte, since move_pos() moves on 1 byte.
- // TODO: Clean up? At least document more.
- if (move_offset > 0)
- --move_offset;
-
- assert(lz->write_pos > move_offset);
- const size_t move_size = lz->write_pos - move_offset;
+ lzma_next_end(&coder->next, allocator);
- assert(move_offset + move_size <= lz->size);
+ lzma_free(coder->mf.hash, allocator);
+ lzma_free(coder->mf.buffer, allocator);
- memmove(lz->buffer, lz->buffer + move_offset, move_size);
-
- lz->offset += move_offset;
- lz->read_pos -= move_offset;
- lz->read_limit -= move_offset;
- lz->write_pos -= move_offset;
+ if (coder->lz.end != NULL)
+ coder->lz.end(coder->lz.coder, allocator);
+ else
+ lzma_free(coder->lz.coder, allocator);
+ lzma_free(coder, allocator);
return;
}
-/// \brief Tries to fill the input window (lz->buffer)
-///
-/// If we are the last encoder in the chain, our input data is in in[].
-/// Otherwise we call the next filter in the chain to process in[] and
-/// write its output to lz->buffer.
-///
-/// This function must not be called once it has returned LZMA_STREAM_END.
-///
-static lzma_ret
-fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in,
- size_t *in_pos, size_t in_size, lzma_action action)
+extern lzma_ret
+lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ const lzma_filter_info *filters,
+ lzma_ret (*lz_init)(lzma_lz_encoder *lz,
+ lzma_allocator *allocator, const void *options,
+ lzma_lz_options *lz_options))
{
- assert(coder->lz.read_pos <= coder->lz.write_pos);
+ // Allocate and initialize the base data structure.
+ if (next->coder == NULL) {
+ next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+ if (next->coder == NULL)
+ return LZMA_MEM_ERROR;
- // Move the sliding window if needed.
- if (coder->lz.read_pos >= coder->lz.size - coder->lz.keep_size_after)
- move_window(&coder->lz);
+ next->code = &lz_encode;
+ next->end = &lz_encoder_end;
- size_t in_used;
- lzma_ret ret;
- if (coder->next.code == NULL) {
- // Not using a filter, simply memcpy() as much as possible.
- in_used = bufcpy(in, in_pos, in_size, coder->lz.buffer,
- &coder->lz.write_pos, coder->lz.size);
+ next->coder->lz.coder = NULL;
+ next->coder->lz.code = NULL;
+ next->coder->lz.end = NULL;
- if (action != LZMA_RUN && *in_pos == in_size)
- ret = LZMA_STREAM_END;
- else
- ret = LZMA_OK;
+ next->coder->mf.buffer = NULL;
+ next->coder->mf.hash = NULL;
- } else {
- const size_t in_start = *in_pos;
- ret = coder->next.code(coder->next.coder, allocator,
- in, in_pos, in_size,
- coder->lz.buffer, &coder->lz.write_pos,
- coder->lz.size, action);
- in_used = *in_pos - in_start;
+ next->coder->next = LZMA_NEXT_CODER_INIT;
}
- // If end of stream has been reached or flushing completed, we allow
- // the encoder to process all the input (that is, read_pos is allowed
- // to reach write_pos). Otherwise we keep keep_size_after bytes
- // available as prebuffer.
- if (ret == LZMA_STREAM_END) {
- assert(*in_pos == in_size);
- coder->lz.read_limit = coder->lz.write_pos;
- ret = LZMA_OK;
+ // Initialize the LZ-based encoder.
+ lzma_lz_options lz_options;
+ return_if_error(lz_init(&next->coder->lz, allocator,
+ filters[0].options, &lz_options));
- switch (action) {
- case LZMA_SYNC_FLUSH:
- coder->lz.sequence = SEQ_FLUSH;
- break;
-
- case LZMA_FINISH:
- coder->lz.sequence = SEQ_FINISH;
- break;
-
- default:
- assert(0);
- ret = LZMA_PROG_ERROR;
- break;
- }
-
- } else if (coder->lz.write_pos > coder->lz.keep_size_after) {
- // This needs to be done conditionally, because if we got
- // only little new input, there may be too little input
- // to do any encoding yet.
- coder->lz.read_limit = coder->lz.write_pos
- - coder->lz.keep_size_after;
- }
-
- // Restart the match finder after finished LZMA_SYNC_FLUSH.
- if (coder->lz.pending > 0
- && coder->lz.read_pos < coder->lz.read_limit) {
- // Match finder may update coder->pending and expects it to
- // start from zero, so use a temporary variable.
- const size_t pending = coder->lz.pending;
- coder->lz.pending = 0;
+ // Setup the size information into next->coder->mf and deallocate
+ // old buffers if they have wrong size.
+ if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options))
+ return LZMA_HEADER_ERROR;
- // Rewind read_pos so that the match finder can hash
- // the pending bytes.
- assert(coder->lz.read_pos >= pending);
- coder->lz.read_pos -= pending;
- coder->lz.skip(&coder->lz, pending);
- }
+ // Allocate new buffers if needed, and do the rest of
+ // the initialization.
+ if (lz_encoder_init(&next->coder->mf, allocator))
+ return LZMA_MEM_ERROR;
- return ret;
+ // Initialize the next filter in the chain, if any.
+ return lzma_next_filter_init(&next->coder->next, allocator,
+ filters + 1);
}
-extern lzma_ret
-lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator,
- const uint8_t *restrict in, size_t *restrict in_pos,
- size_t in_size,
- uint8_t *restrict out, size_t *restrict out_pos,
- size_t out_size, lzma_action action)
+extern LZMA_API lzma_bool
+lzma_mf_is_supported(lzma_match_finder mf)
{
- while (*out_pos < out_size
- && (*in_pos < in_size || action != LZMA_RUN)) {
- // Read more data to coder->lz.buffer if needed.
- if (coder->lz.sequence == SEQ_RUN
- && coder->lz.read_pos >= coder->lz.read_limit)
- return_if_error(fill_window(coder, allocator,
- in, in_pos, in_size, action));
+ bool ret = false;
- // Encode
- if (coder->lz.process(coder, out, out_pos, out_size)) {
- // Setting this to SEQ_RUN for cases when we are
- // flushing. It doesn't matter when finishing.
- coder->lz.sequence = SEQ_RUN;
- return action != LZMA_RUN ? LZMA_STREAM_END : LZMA_OK;
- }
- }
+#ifdef HAVE_MF_HC3
+ if (mf == LZMA_MF_HC3)
+ ret = true;
+#endif
- return LZMA_OK;
-}
+#ifdef HAVE_MF_HC4
+ if (mf == LZMA_MF_HC4)
+ ret = true;
+#endif
+#ifdef HAVE_MF_BT2
+ if (mf == LZMA_MF_BT2)
+ ret = true;
+#endif
-/// \brief Normalizes hash values
-///
-/// lzma_lz_normalize is called when lz->pos hits MAX_VAL_FOR_NORMALIZE,
-/// which currently happens once every 2 GiB of input data (to be exact,
-/// after the first 2 GiB it happens once every 2 GiB minus dictionary_size
-/// bytes). lz->pos is incremented by lzma_lz_move_pos().
-///
-/// lz->hash contains big amount of offsets relative to lz->buffer.
-/// The offsets are stored as uint32_t, which is the only reasonable
-/// datatype for these offsets; uint64_t would waste far too much RAM
-/// and uint16_t would limit the dictionary to 64 KiB (far too small).
-///
-/// When compressing files over 2 GiB, lz->buffer needs to be moved forward
-/// to avoid integer overflows. We scan the lz->hash array and fix every
-/// value to match the updated lz->buffer.
-extern void
-lzma_lz_encoder_normalize(lzma_lz_encoder *lz)
-{
- const uint32_t subvalue = lz->read_pos - lz->cyclic_buffer_size;
- assert(subvalue <= INT32_MAX);
-
- {
- const uint32_t num_items = lz->num_items;
- uint32_t *restrict items = lz->hash;
-
- for (uint32_t i = 0; i < num_items; ++i) {
- // If the distance is greater than the dictionary
- // size, we can simply mark the item as empty.
- if (items[i] <= subvalue)
- items[i] = EMPTY_HASH_VALUE;
- else
- items[i] -= subvalue;
- }
- }
+#ifdef HAVE_MF_BT3
+ if (mf == LZMA_MF_BT3)
+ ret = true;
+#endif
- // Update offset to match the new locations.
- lz->offset -= subvalue;
+#ifdef HAVE_MF_BT4
+ if (mf == LZMA_MF_BT4)
+ ret = true;
+#endif
- return;
+ return ret;
}
diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h
index da0e0804..45bb8462 100644
--- a/src/liblzma/lz/lz_encoder.h
+++ b/src/liblzma/lz/lz_encoder.h
@@ -3,8 +3,8 @@
/// \file lz_encoder.h
/// \brief LZ in window and match finder API
//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
+// Copyright (C) 1999-2008 Igor Pavlov
+// Copyright (C) 2008 Lasse Collin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
@@ -24,19 +24,16 @@
#include "common.h"
-typedef struct lzma_lz_encoder_s lzma_lz_encoder;
-struct lzma_lz_encoder_s {
- enum {
- SEQ_RUN,
- SEQ_FLUSH,
- SEQ_FINISH,
- } sequence;
+/// A table of these is used by the LZ-based encoder to hold
+/// the length-distance pairs found by the match finder.
+typedef struct {
+ uint32_t len;
+ uint32_t dist;
+} lzma_match;
- /// Function to do the actual encoding from the sliding input window
- /// to the output stream.
- bool (*process)(lzma_coder *coder, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size);
+typedef struct lzma_mf_s lzma_mf;
+struct lzma_mf_s {
///////////////
// In Window //
///////////////
@@ -46,17 +43,33 @@ struct lzma_lz_encoder_s {
/// Total size of the allocated buffer (that is, including all
/// the extra space)
- size_t size;
+ uint32_t size;
+
+ /// Number of bytes that must be kept available in our input history.
+ /// That is, once keep_size_before bytes have been processed,
+ /// buffer[read_pos - keep_size_before] is the oldest byte that
+ /// must be available for reading.
+ uint32_t keep_size_before;
+
+ /// Number of bytes that must be kept in buffer after read_pos.
+ /// That is, read_pos <= write_pos - keep_size_after as long as
+ /// stream_end_was_reached is false (once it is true, read_pos
+ /// is allowed to reach write_pos).
+ uint32_t keep_size_after;
/// Match finders store locations of matches using 32-bit integers.
/// To avoid adjusting several megabytes of integers every time the
/// input window is moved with move_window(), we only adjust the
/// offset of the buffer. Thus, buffer[match_finder_pos - offset]
/// is the byte pointed by match_finder_pos.
- size_t offset;
+ uint32_t offset;
/// buffer[read_pos] is the current byte.
- size_t read_pos;
+ uint32_t read_pos;
+
+ /// Number of bytes that have been ran through the match finder, but
+ /// which haven't been encoded by the LZ-based encoder yet.
+ uint32_t read_ahead;
/// As long as read_pos is less than read_limit, there is enough
/// input available in buffer for at least one encoding loop.
@@ -64,92 +77,253 @@ struct lzma_lz_encoder_s {
/// Because of the stateful API, read_limit may and will get greater
/// than read_pos quite often. This is taken into account when
/// calculating the value for keep_size_after.
- size_t read_limit;
+ uint32_t read_limit;
/// buffer[write_pos] is the first byte that doesn't contain valid
/// uncompressed data; that is, the next input byte will be copied
/// to buffer[write_pos].
- size_t write_pos;
+ uint32_t write_pos;
/// Number of bytes not hashed before read_pos. This is needed to
/// restart the match finder after LZMA_SYNC_FLUSH.
- size_t pending;
-
- /// Number of bytes that must be kept available in our input history.
- /// That is, once keep_size_before bytes have been processed,
- /// buffer[read_pos - keep_size_before] is the oldest byte that
- /// must be available for reading.
- size_t keep_size_before;
-
- /// Number of bytes that must be kept in buffer after read_pos.
- /// That is, read_pos <= write_pos - keep_size_after as long as
- /// stream_end_was_reached is false (once it is true, read_pos
- /// is allowed to reach write_pos).
- size_t keep_size_after;
+ uint32_t pending;
//////////////////
// Match Finder //
//////////////////
- // Pointers to match finder functions
- void (*get_matches)(lzma_lz_encoder *restrict lz,
- uint32_t *restrict distances);
- void (*skip)(lzma_lz_encoder *restrict lz, uint32_t num);
+ /// Find matches. Returns the number of distance-length pairs written
+ /// to the matches array. This is called only via lzma_mf_find.
+ uint32_t (*find)(lzma_mf *mf, lzma_match *matches);
+
+ /// Skips num bytes. This is like find() but doesn't make the
+ /// distance-length pairs available, thus being a little faster.
+ /// This is called only via mf_skip function.
+ void (*skip)(lzma_mf *mf, uint32_t num);
- // Match finder data
- uint32_t *hash; // TODO: Check if hash aliases son
- uint32_t *son; // and add 'restrict' if possible.
+ uint32_t *hash;
+ uint32_t *son;
uint32_t cyclic_buffer_pos;
uint32_t cyclic_buffer_size; // Must be dictionary_size + 1.
uint32_t hash_mask;
- uint32_t cut_value;
+
+ /// Maximum number of loops in the match finder
+ uint32_t loops;
+
+ /// Maximum length of a match that the match finder will try to find.
+ uint32_t find_len_max;
+
+ /// Maximum length of a match supported by the LZ-based encoder.
+ /// If the longest match found by the match finder is find_len_max,
+ /// lz_dict_find() tries to expand it up to match_len_max bytes.
+ uint32_t match_len_max;
+
+ /// When running out of input, binary tree match finders need to know
+ /// if it is due to flushing or finishing. The action is used also
+ /// by the LZ-based encoders themselves.
+ lzma_action action;
+
+ /// Number of elements in hash[]
uint32_t hash_size_sum;
- uint32_t num_items;
- uint32_t match_max_len;
+
+ /// Number of elements in son[]
+ uint32_t sons_count;
};
-#define LZMA_LZ_ENCODER_INIT \
- (lzma_lz_encoder){ \
- .buffer = NULL, \
- .size = 0, \
- .hash = NULL, \
- .num_items = 0, \
+typedef struct {
+ /// Extra amount of data to keep available before the "actual"
+ /// dictionary.
+ size_t before_size;
+
+ /// Size of the history buffer
+ size_t dictionary_size;
+
+ /// Extra amount of data to keep available after the "actual"
+ /// dictionary.
+ size_t after_size;
+
+ /// Maximum length of a match that the LZ-based encoder can accept.
+ /// This is used to extend matches of length find_len_max to the
+ /// maximum possible length.
+ size_t match_len_max;
+
+ /// Match finder will search matches of at maximum of this length.
+ /// This must be less than or equal to match_len_max.
+ size_t find_len_max;
+
+ /// Type of the match finder to use
+ lzma_match_finder match_finder;
+
+ /// TODO: Comment
+ uint32_t match_finder_cycles;
+
+ /// TODO: Comment
+ const uint8_t *preset_dictionary;
+
+ uint32_t preset_dictionary_size;
+
+} lzma_lz_options;
+
+
+// The total usable buffer space at any moment outside the match finder:
+// before_size + dictionary_size + after_size + match_len_max
+//
+// In reality, there's some extra space allocated to prevent the number of
+// memmove() calls reasonable. The bigger the dictionary_size is, the bigger
+// this extra buffer will be since with bigger dictionaries memmove() would
+// also take longer.
+//
+// A single encoder loop in the LZ-based encoder may call the match finder
+// (lz_dict_find() or lz_dict_skip()) at maximum of after_size times.
+// In other words, a single encoder loop may advance lz_dict.read_pos at
+// maximum of after_size times. Since matches are looked up to
+// lz_dict.buffer[lz_dict.read_pos + match_len_max - 1], the total
+// amount of extra buffer needed after dictionary_size becomes
+// after_size + match_len_max.
+//
+// before_size has two uses. The first one is to keep literals available
+// in cases when the LZ-based encoder has made some read ahead.
+// TODO: Maybe this could be changed by making the LZ-based encoders to
+// store the actual literals as they do with length-distance pairs.
+//
+// Alrogithms such as LZMA2 first try to compress a chunk, and then check
+// if the encoded result is smaller than the uncompressed one. If the chunk
+// was uncompressible, it is better to store it in uncompressed form in
+// the output stream. To do this, the whole uncompressed chunk has to be
+// still available in the history buffer. before_size achieves that.
+
+
+typedef struct {
+ /// Data specific to the LZ-based encoder
+ lzma_coder *coder;
+
+ /// Function to encode from *dict to out[]
+ lzma_ret (*code)(lzma_coder *restrict coder,
+ lzma_mf *restrict mf, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size);
+
+ /// Free allocated resources
+ void (*end)(lzma_coder *coder, lzma_allocator *allocator);
+
+} lzma_lz_encoder;
+
+
+// Basic steps:
+// 1. Input gets copied into the dictionary.
+// 2. Data in dictionary gets run through the match finder byte by byte.
+// 3. The literals and matches are encoded using e.g. LZMA.
+//
+// The bytes that have been ran through the match finder, but not encoded yet,
+// are called `read ahead'.
+
+
+/// Get pointer to the first byte not ran through the match finder
+static inline const uint8_t *
+mf_ptr(const lzma_mf *mf)
+{
+ return mf->buffer + mf->read_pos;
+}
+
+
+/// Get the number of bytes that haven't been ran through the match finder yet.
+static inline uint32_t
+mf_avail(const lzma_mf *mf)
+{
+ return mf->write_pos - mf->read_pos;
+}
+
+
+/// Get the number of bytes that haven't been encoded yet (some of these
+/// bytes may have been ran through the match finder though).
+static inline uint32_t
+mf_unencoded(const lzma_mf *mf)
+{
+ return mf->write_pos - mf->read_pos - mf->read_ahead;
+}
+
+
+/// Calculate the absolute offset from the beginning of the most recent
+/// dictionary reset. Only the lowest four bits are important, so there's no
+/// problem that we don't know the 64-bit size of the data encoded so far.
+///
+/// NOTE: When moving the input window, we need to do it so that the lowest
+/// bits of dict->read_pos are not modified to keep this macro working
+/// as intended.
+static inline uint32_t
+mf_position(const lzma_mf *mf)
+{
+ return mf->read_pos - mf->read_ahead;
+}
+
+
+/// Since everything else begins with mf_, use it also for lzma_mf_find().
+#define mf_find lzma_mf_find
+
+
+/// Skip the given number of bytes. This is used when a good match was found.
+/// For example, if mf_find() finds a match of 200 bytes long, the first byte
+/// of that match was already consumed by mf_find(), and the rest 199 bytes
+/// have to be skipped with mf_skip(mf, 199).
+static inline void
+mf_skip(lzma_mf *mf, uint32_t amount)
+{
+ if (amount != 0) {
+ mf->skip(mf, amount);
+ mf->read_ahead += amount;
}
+}
+
+
+/// Copies at maximum of *left amount of bytes from the history buffer
+/// to out[]. This is needed by LZMA2 to encode uncompressed chunks.
+static inline void
+mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size,
+ size_t *left)
+{
+ const size_t out_avail = out_size - *out_pos;
+ const size_t copy_size = MIN(out_avail, *left);
+
+ assert(mf->read_ahead == 0);
+ assert(mf->read_pos >= *left);
+
+ memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left,
+ copy_size);
+
+ *out_pos += copy_size;
+ *left -= copy_size;
+ return;
+}
+
+
+extern lzma_ret lzma_lz_encoder_init(
+ lzma_next_coder *next, lzma_allocator *allocator,
+ const lzma_filter_info *filters,
+ lzma_ret (*lz_init)(lzma_lz_encoder *lz,
+ lzma_allocator *allocator, const void *options,
+ lzma_lz_options *lz_options));
+
+
+extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options);
+
+
+// These are only for LZ encoder's internal use.
+extern uint32_t lzma_mf_find(
+ lzma_mf *mf, uint32_t *count, lzma_match *matches);
+
+extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches);
+extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount);
+
+extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches);
+extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount);
+
+extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches);
+extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount);
+extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches);
+extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount);
-/// Calculates
-extern bool lzma_lz_encoder_hash_properties(lzma_match_finder match_finder,
- uint32_t history_size, uint32_t *restrict hash_mask,
- uint32_t *restrict hash_size_sum,
- uint32_t *restrict num_items);
-
-// NOTE: liblzma doesn't use callback API like LZMA SDK does. The caller
-// must make sure that keep_size_after is big enough for single encoding pass
-// i.e. keep_size_after >= maximum number of bytes possibly needed after
-// the current position between calls to lzma_lz_read().
-extern lzma_ret lzma_lz_encoder_reset(lzma_lz_encoder *lz,
- lzma_allocator *allocator,
- bool (*process)(lzma_coder *coder, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size),
- size_t history_size, size_t additional_buffer_before,
- size_t match_max_len, size_t additional_buffer_after,
- lzma_match_finder match_finder, uint32_t match_finder_cycles,
- const uint8_t *preset_dictionary,
- size_t preset_dictionary_size);
-
-/// Frees memory allocated for in window and match finder buffers.
-extern void lzma_lz_encoder_end(
- lzma_lz_encoder *lz, lzma_allocator *allocator);
-
-extern lzma_ret lzma_lz_encode(lzma_coder *coder,
- lzma_allocator *allocator lzma_attribute((unused)),
- const uint8_t *restrict in, size_t *restrict in_pos,
- size_t in_size, uint8_t *restrict out,
- size_t *restrict out_pos, size_t out_size,
- lzma_action action);
-
-/// This should not be called directly, but only via move_pos() macro.
-extern void lzma_lz_encoder_normalize(lzma_lz_encoder *lz);
+extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches);
+extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount);
#endif
diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h
new file mode 100644
index 00000000..0841c38f
--- /dev/null
+++ b/src/liblzma/lz/lz_encoder_hash.h
@@ -0,0 +1,104 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lz_encoder_hash.h
+/// \brief Hash macros for match finders
+//
+// Copyright (C) 1999-2008 Igor Pavlov
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_LZ_ENCODER_HASH_H
+#define LZMA_LZ_ENCODER_HASH_H
+
+#define HASH_2_SIZE (UINT32_C(1) << 10)
+#define HASH_3_SIZE (UINT32_C(1) << 16)
+#define HASH_4_SIZE (UINT32_C(1) << 20)
+
+#define HASH_2_MASK (HASH_2_SIZE - 1)
+#define HASH_3_MASK (HASH_3_SIZE - 1)
+#define HASH_4_MASK (HASH_4_SIZE - 1)
+
+#define FIX_3_HASH_SIZE (HASH_2_SIZE)
+#define FIX_4_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
+#define FIX_5_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE + HASH_4_SIZE)
+
+// TODO Benchmark, and probably doesn't need to be endian dependent.
+#if !defined(WORDS_BIGENDIAN) && defined(HAVE_FAST_UNALIGNED_ACCESS)
+# define hash_2_calc() \
+ const uint32_t hash_value = *(const uint16_t *)(cur);
+#else
+# define hash_2_calc() \
+ const uint32_t hash_value \
+ = (uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)
+#endif
+
+#define hash_3_calc() \
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \
+ const uint32_t hash_2_value = temp & HASH_2_MASK; \
+ const uint32_t hash_value \
+ = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask
+
+#define hash_4_calc() \
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \
+ const uint32_t hash_2_value = temp & HASH_2_MASK; \
+ const uint32_t hash_3_value \
+ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \
+ const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
+ ^ (lzma_crc32_table[0][cur[3]] << 5)) & mf->hash_mask
+
+
+// The following are not currently used.
+
+#define hash_5_calc() \
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \
+ const uint32_t hash_2_value = temp & HASH_2_MASK; \
+ const uint32_t hash_3_value \
+ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \
+ uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \
+ ^ lzma_crc32_table[0][cur[3]] << 5); \
+ const uint32_t hash_value \
+ = (hash_4_value ^ (lzma_crc32_table[0][cur[4]] << 3)) \
+ & mf->hash_mask; \
+ hash_4_value &= HASH_4_MASK
+
+/*
+#define hash_zip_calc() \
+ const uint32_t hash_value \
+ = (((uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)) \
+ ^ lzma_crc32_table[0][cur[2]]) & 0xFFFF
+*/
+
+#define hash_zip_calc() \
+ const uint32_t hash_value \
+ = (((uint32_t)(cur[2]) | ((uint32_t)(cur[0]) << 8)) \
+ ^ lzma_crc32_table[0][cur[1]]) & 0xFFFF
+
+#define mt_hash_2_calc() \
+ const uint32_t hash_2_value \
+ = (lzma_crc32_table[0][cur[0]] ^ cur[1]) & HASH_2_MASK
+
+#define mt_hash_3_calc() \
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \
+ const uint32_t hash_2_value = temp & HASH_2_MASK; \
+ const uint32_t hash_3_value \
+ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK
+
+#define mt_hash_4_calc() \
+ const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \
+ const uint32_t hash_2_value = temp & HASH_2_MASK; \
+ const uint32_t hash_3_value \
+ = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \
+ const uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \
+ (lzma_crc32_table[0][cur[3]] << 5)) & HASH_4_MASK
+
+#endif
diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c
new file mode 100644
index 00000000..b1c20f50
--- /dev/null
+++ b/src/liblzma/lz/lz_encoder_mf.c
@@ -0,0 +1,780 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lz_encoder_mf.c
+/// \brief Match finders
+//
+// Copyright (C) 1999-2008 Igor Pavlov
+// Copyright (C) 2008 Lasse Collin
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lz_encoder.h"
+#include "lz_encoder_hash.h"
+#include "check.h"
+
+
+/// \brief Find matches starting from the current byte
+///
+/// \return The length of the longest match found
+extern uint32_t
+lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
+{
+ // Call the match finder. It returns the number of length-distance
+ // pairs found.
+ // FIXME: Minimum count is zero, what _exactly_ is the maximum?
+ const uint32_t count = mf->find(mf, matches);
+
+ // Length of the longest match; assume that no matches were found
+ // and thus the maximum length is zero.
+ uint32_t len_best = 0;
+
+ if (count > 0) {
+#ifndef NDEBUG
+ // Validate the matches.
+ for (uint32_t i = 0; i < count; ++i) {
+ assert(matches[i].len <= mf->find_len_max);
+ assert(matches[i].dist < mf->read_pos);
+ assert(memcmp(mf_ptr(mf) - 1,
+ mf_ptr(mf) - matches[i].dist - 2,
+ matches[i].len) == 0);
+ }
+#endif
+
+ // The last used element in the array contains
+ // the longest match.
+ len_best = matches[count - 1].len;
+
+ // If a match of maximum search length was found, try to
+ // extend the match to maximum possible length.
+ if (len_best == mf->find_len_max) {
+ // The limit for the match length is either the
+ // maximum match length supported by the LZ-based
+ // encoder or the number of bytes left in the
+ // dictionary, whichever is smaller.
+ uint32_t limit = mf_avail(mf) + 1;
+ if (limit > mf->match_len_max)
+ limit = mf->match_len_max;
+
+ // Pointer to the byte we just ran through
+ // the match finder.
+ const uint8_t *p1 = mf_ptr(mf) - 1;
+
+ // Pointer to the beginning of the match. We need -1
+ // here because the match distances are zero based.
+ const uint8_t *p2 = p1 - matches[count - 1].dist - 1;
+
+ while (len_best < limit
+ && p1[len_best] == p2[len_best])
+ ++len_best;
+ }
+ }
+
+ *count_ptr = count;
+
+ // Finally update the read position to indicate that match finder was
+ // run for this dictionary offset.
+ ++mf->read_ahead;
+
+ return len_best;
+}
+
+
+/// Hash value to indicate unused element in the hash. Since we start the
+/// positions from dictionary_size + 1, zero is always too far to qualify
+/// as usable match position.
+#define EMPTY_HASH_VALUE 0
+
+
+/// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos
+/// reaches MUST_NORMALIZE_POS.
+#define MUST_NORMALIZE_POS UINT32_MAX
+
+
+/// \brief Normalizes hash values
+///
+/// The hash arrays store positions of match candidates. The positions are
+/// relative to an arbitrary offset that is not the same as the absolute
+/// offset in the input stream. The relative position of the current byte
+/// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are
+/// the differences of the current read position and the position found from
+/// the hash.
+///
+/// To prevent integer overflows of the offsets stored in the hash arrays,
+/// we need to "normalize" the stored values now and then. During the
+/// normalization, we drop values that indicate distance greater than the
+/// dictionary size, thus making space for new values.
+static void
+normalize(lzma_mf *mf)
+{
+ assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS);
+
+ // In future we may not want to touch the lowest bits, because there
+ // may be match finders that use larger resolution than one byte.
+ const uint32_t subvalue
+ = (MUST_NORMALIZE_POS - mf->cyclic_buffer_size);
+ // & (~(UINT32_C(1) << 10) - 1);
+
+ const uint32_t count = mf->hash_size_sum + mf->sons_count;
+ uint32_t *hash = mf->hash;
+
+ for (uint32_t i = 0; i < count; ++i) {
+ // If the distance is greater than the dictionary size,
+ // we can simply mark the hash element as empty.
+ //
+ // NOTE: Only the first mf->hash_size_sum elements are
+ // initialized for sure. There may be uninitialized elements
+ // in mf->son. Since we go through both mf->hash and
+ // mf->son here in normalization, Valgrind may complain
+ // that the "if" below depends on uninitialized value. In
+ // this case it is safe to ignore the warning. See also the
+ // comments in lz_encoder_init() in lz_encoder.c.
+ if (hash[i] <= subvalue)
+ hash[i] = EMPTY_HASH_VALUE;
+ else
+ hash[i] -= subvalue;
+ }
+
+ // Update offset to match the new locations.
+ mf->offset -= subvalue;
+
+ return;
+}
+
+
+/// Mark the current byte as processed from point of view of the match finder.
+static void
+move_pos(lzma_mf *mf)
+{
+ if (++mf->cyclic_buffer_pos == mf->cyclic_buffer_size)
+ mf->cyclic_buffer_pos = 0;
+
+ ++mf->read_pos;
+ assert(mf->read_pos <= mf->write_pos);
+
+ if (unlikely(mf->read_pos + mf->offset == UINT32_MAX))
+ normalize(mf);
+}
+
+
+/// When flushing, we cannot run the match finder unless there is find_len_max
+/// bytes available in the dictionary. Instead, we skip running the match
+/// finder (indicating that no match was found), and count how many bytes we
+/// have ignored this way.
+///
+/// When new data is given after the flushing was completed, the match finder
+/// is restarted by rewinding mf->read_pos backwards by mf->pending. Then
+/// the missed bytes are added to the hash using the match finder's skip
+/// function (with small amount of input, it may start using mf->pending
+/// again if flushing).
+///
+/// Due to this rewinding, we don't touch cyclic_buffer_pos or test for
+/// normalization. It will be done when the match finder's skip function
+/// catches up after a flush.
+static void
+move_pending(lzma_mf *mf)
+{
+ ++mf->read_pos;
+ assert(mf->read_pos <= mf->write_pos);
+ ++mf->pending;
+}
+
+
+/// Calculate len_limit and determine if there is enough input to run
+/// the actual match finder code. Sets up "cur" and "pos". This macro
+/// is used by all find functions and binary tree skip functions. Hash
+/// chain skip function doesn't need len_limit so a simpler code is used
+/// in them.
+#define header(is_bt, len_min, ret_op) \
+ uint32_t len_limit = mf_avail(mf); \
+ if (mf->find_len_max <= len_limit) { \
+ len_limit = mf->find_len_max; \
+ } else if (len_limit < (len_min) \
+ || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \
+ assert(mf->action != LZMA_RUN); \
+ move_pending(mf); \
+ ret_op; \
+ } \
+ const uint8_t *cur = mf_ptr(mf); \
+ const uint32_t pos = mf->read_pos + mf->offset
+
+
+/// Header for find functions. "return 0" indicates that zero matches
+/// were found.
+#define header_find(is_bt, len_min) \
+ header(is_bt, len_min, return 0); \
+ uint32_t matches_count = 0
+
+
+/// Header for a loop in a skip function. "continue" tells to skip the rest
+/// of the code in the loop.
+#define header_skip(is_bt, len_min) \
+ header(is_bt, len_min, continue)
+
+
+/// Calls hc_find_func() or bt_find_func() and calculates the total number
+/// of matches found. Updates the dictionary position and returns the number
+/// of matches found.
+#define call_find(func, len_best) \
+do { \
+ matches_count = func(len_limit, pos, cur, cur_match, mf->loops, \
+ mf->son, mf->cyclic_buffer_pos, \
+ mf->cyclic_buffer_size, \
+ matches + matches_count, len_best) \
+ - matches; \
+ move_pos(mf); \
+ return matches_count; \
+} while (0)
+
+
+////////////////
+// Hash Chain //
+////////////////
+
+#if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4)
+///
+///
+/// \param len_limit Don't look for matches longer than len_limit.
+/// \param pos lzma_mf.read_pos + lzma_mf.offset
+/// \param cur Pointer to current byte (lzma_dict_ptr(mf))
+/// \param cur_match Start position of the current match candidate
+/// \param loops Maximum length of the hash chain
+/// \param son lzma_mf.son (contains the hash chain)
+/// \param cyclic_buffer_pos
+/// \param cyclic_buffer_size
+/// \param matches Array to hold the matches.
+/// \param len_best The length of the longest match found so far.
+static lzma_match *
+hc_find_func(
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t loops,
+ uint32_t *const son,
+ const uint32_t cyclic_buffer_pos,
+ const uint32_t cyclic_buffer_size,
+ lzma_match *matches,
+ uint32_t len_best)
+{
+ son[cyclic_buffer_pos] = cur_match;
+
+ while (true) {
+ const uint32_t delta = pos - cur_match;
+ if (loops-- == 0 || delta >= cyclic_buffer_size)
+ return matches;
+
+ const uint8_t *const pb = cur - delta;
+ cur_match = son[cyclic_buffer_pos - delta
+ + (delta > cyclic_buffer_pos
+ ? cyclic_buffer_size : 0)];
+
+ if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
+ uint32_t len = 0;
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+
+ if (len_best < len) {
+ len_best = len;
+ matches->len = len;
+ matches->dist = delta - 1;
+ ++matches;
+
+ if (len == len_limit)
+ return matches;
+ }
+ }
+ }
+}
+
+/*
+#define hc_header_find(len_min, ret_op) \
+ uint32_t len_limit = mf_avail(mf); \
+ if (mf->find_len_max <= len_limit) { \
+ len_limit = mf->find_len_max; \
+ } else if (len_limit < (len_min)) { \
+ move_pending(mf); \
+ ret_op; \
+ } \
+#define header_hc(len_min, ret_op) \
+do { \
+ if (mf_avail(mf) < (len_min)) { \
+ move_pending(mf); \
+ ret_op; \
+ } \
+} while (0)
+*/
+
+#define hc_find(len_best) \
+ call_find(hc_find_func, len_best)
+
+
+#define hc_skip() \
+do { \
+ mf->son[mf->cyclic_buffer_pos] = cur_match; \
+ move_pos(mf); \
+} while (0)
+
+#endif
+
+
+#ifdef HAVE_MF_HC3
+extern uint32_t
+lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches)
+{
+ header_find(false, 3);
+
+ hash_3_calc();
+
+ const uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
+
+ uint32_t len_best = 2;
+
+ if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) {
+ for ( ; len_best != len_limit; ++len_best)
+ if (*(cur + len_best - delta2) != cur[len_best])
+ break;
+
+ matches[0].len = len_best;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+
+ if (len_best == len_limit) {
+ hc_skip();
+ return 1; // matches_count
+ }
+ }
+
+ hc_find(len_best);
+}
+
+
+extern void
+lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ if (mf_avail(mf) < 3) {
+ move_pending(mf);
+ continue;
+ }
+
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ hash_3_calc();
+
+ const uint32_t cur_match
+ = mf->hash[FIX_3_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
+
+ hc_skip();
+
+ } while (--amount != 0);
+}
+#endif
+
+
+#ifdef HAVE_MF_HC4
+extern uint32_t
+lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches)
+{
+ header_find(false, 4);
+
+ hash_4_calc();
+
+ uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t delta3
+ = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value];
+ const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value ] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
+ mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
+
+ uint32_t len_best = 1;
+
+ if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) {
+ len_best = 2;
+ matches[0].len = 2;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+ }
+
+ if (delta2 != delta3 && delta3 < mf->cyclic_buffer_size
+ && *(cur - delta3) == *cur) {
+ len_best = 3;
+ matches[matches_count++].dist = delta3 - 1;
+ delta2 = delta3;
+ }
+
+ if (matches_count != 0) {
+ for ( ; len_best != len_limit; ++len_best)
+ if (*(cur + len_best - delta2) != cur[len_best])
+ break;
+
+ matches[matches_count - 1].len = len_best;
+
+ if (len_best == len_limit) {
+ hc_skip();
+ return matches_count;
+ }
+ }
+
+ if (len_best < 3)
+ len_best = 3;
+
+ hc_find(len_best);
+}
+
+
+extern void
+lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ if (mf_avail(mf) < 4) {
+ move_pending(mf);
+ continue;
+ }
+
+ const uint8_t *cur = mf_ptr(mf);
+ const uint32_t pos = mf->read_pos + mf->offset;
+
+ hash_4_calc();
+
+ const uint32_t cur_match
+ = mf->hash[FIX_4_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
+ mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
+
+ hc_skip();
+
+ } while (--amount != 0);
+}
+#endif
+
+
+/////////////////
+// Binary Tree //
+/////////////////
+
+#if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4)
+static lzma_match *
+bt_find_func(
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t loops,
+ uint32_t *const son,
+ const uint32_t cyclic_buffer_pos,
+ const uint32_t cyclic_buffer_size,
+ lzma_match *matches,
+ uint32_t len_best)
+{
+ uint32_t *ptr0 = son + (cyclic_buffer_pos << 1) + 1;
+ uint32_t *ptr1 = son + (cyclic_buffer_pos << 1);
+
+ uint32_t len0 = 0;
+ uint32_t len1 = 0;
+
+ while (true) {
+ const uint32_t delta = pos - cur_match;
+ if (loops-- == 0 || delta >= cyclic_buffer_size) {
+ *ptr0 = EMPTY_HASH_VALUE;
+ *ptr1 = EMPTY_HASH_VALUE;
+ return matches;
+ }
+
+ uint32_t *const pair = son + ((cyclic_buffer_pos - delta
+ + (delta > cyclic_buffer_pos
+ ? cyclic_buffer_size : 0)) << 1);
+
+ const uint8_t *const pb = cur - delta;
+ uint32_t len = MIN(len0, len1);
+
+ if (pb[len] == cur[len]) {
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+
+ if (len_best < len) {
+ len_best = len;
+ matches->len = len;
+ matches->dist = delta - 1;
+ ++matches;
+
+ if (len == len_limit) {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return matches;
+ }
+ }
+ }
+
+ if (pb[len] < cur[len]) {
+ *ptr1 = cur_match;
+ ptr1 = pair + 1;
+ cur_match = *ptr1;
+ len1 = len;
+ } else {
+ *ptr0 = cur_match;
+ ptr0 = pair;
+ cur_match = *ptr0;
+ len0 = len;
+ }
+ }
+}
+
+
+static void
+bt_skip_func(
+ const uint32_t len_limit,
+ const uint32_t pos,
+ const uint8_t *const cur,
+ uint32_t cur_match,
+ uint32_t loops,
+ uint32_t *const son,
+ const uint32_t cyclic_buffer_pos,
+ const uint32_t cyclic_buffer_size)
+{
+ uint32_t *ptr0 = son + (cyclic_buffer_pos << 1) + 1;
+ uint32_t *ptr1 = son + (cyclic_buffer_pos << 1);
+
+ uint32_t len0 = 0;
+ uint32_t len1 = 0;
+
+ while (true) {
+ const uint32_t delta = pos - cur_match;
+ if (loops-- == 0 || delta >= cyclic_buffer_size) {
+ *ptr0 = EMPTY_HASH_VALUE;
+ *ptr1 = EMPTY_HASH_VALUE;
+ return;
+ }
+
+ uint32_t *pair = son + ((cyclic_buffer_pos - delta
+ + (delta > cyclic_buffer_pos
+ ? cyclic_buffer_size : 0)) << 1);
+ const uint8_t *pb = cur - delta;
+ uint32_t len = MIN(len0, len1);
+
+ if (pb[len] == cur[len]) {
+ while (++len != len_limit)
+ if (pb[len] != cur[len])
+ break;
+
+ if (len == len_limit) {
+ *ptr1 = pair[0];
+ *ptr0 = pair[1];
+ return;
+ }
+ }
+
+ if (pb[len] < cur[len]) {
+ *ptr1 = cur_match;
+ ptr1 = pair + 1;
+ cur_match = *ptr1;
+ len1 = len;
+ } else {
+ *ptr0 = cur_match;
+ ptr0 = pair;
+ cur_match = *ptr0;
+ len0 = len;
+ }
+ }
+}
+
+
+#define bt_find(len_best) \
+ call_find(bt_find_func, len_best)
+
+#define bt_skip() \
+do { \
+ bt_skip_func(len_limit, pos, cur, cur_match, mf->loops, \
+ mf->son, mf->cyclic_buffer_pos, \
+ mf->cyclic_buffer_size); \
+ move_pos(mf); \
+} while (0)
+
+#endif
+
+
+#ifdef HAVE_MF_BT2
+extern uint32_t
+lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches)
+{
+ header_find(true, 2);
+
+ hash_2_calc();
+
+ const uint32_t cur_match = mf->hash[hash_value];
+ mf->hash[hash_value] = pos;
+
+ bt_find(1);
+}
+
+
+extern void
+lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ header_skip(true, 2);
+
+ hash_2_calc();
+
+ const uint32_t cur_match = mf->hash[hash_value];
+ mf->hash[hash_value] = pos;
+
+ bt_skip();
+
+ } while (--amount != 0);
+}
+#endif
+
+
+#ifdef HAVE_MF_BT3
+extern uint32_t
+lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches)
+{
+ header_find(true, 3);
+
+ hash_3_calc();
+
+ const uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
+
+ uint32_t len_best = 2;
+
+ if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) {
+ for ( ; len_best != len_limit; ++len_best)
+ if (*(cur + len_best - delta2) != cur[len_best])
+ break;
+
+ matches[0].len = len_best;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+
+ if (len_best == len_limit) {
+ bt_skip();
+ return 1; // matches_count
+ }
+ }
+
+ bt_find(len_best);
+}
+
+
+extern void
+lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ header_skip(true, 3);
+
+ hash_3_calc();
+
+ const uint32_t cur_match
+ = mf->hash[FIX_3_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
+
+ bt_skip();
+
+ } while (--amount != 0);
+}
+#endif
+
+
+#ifdef HAVE_MF_BT4
+extern uint32_t
+lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches)
+{
+ header_find(true, 4);
+
+ hash_4_calc();
+
+ uint32_t delta2 = pos - mf->hash[hash_2_value];
+ const uint32_t delta3
+ = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value];
+ const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
+ mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
+
+ uint32_t len_best = 1;
+
+ if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) {
+ len_best = 2;
+ matches[0].len = 2;
+ matches[0].dist = delta2 - 1;
+ matches_count = 1;
+ }
+
+ if (delta2 != delta3 && delta3 < mf->cyclic_buffer_size
+ && *(cur - delta3) == *cur) {
+ len_best = 3;
+ matches[matches_count++].dist = delta3 - 1;
+ delta2 = delta3;
+ }
+
+ if (matches_count != 0) {
+ for ( ; len_best != len_limit; ++len_best)
+ if (*(cur + len_best - delta2) != cur[len_best])
+ break;
+
+ matches[matches_count - 1].len = len_best;
+
+ if (len_best == len_limit) {
+ bt_skip();
+ return matches_count;
+ }
+ }
+
+ if (len_best < 3)
+ len_best = 3;
+
+ bt_find(len_best);
+}
+
+
+extern void
+lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount)
+{
+ do {
+ header_skip(true, 4);
+
+ hash_4_calc();
+
+ const uint32_t cur_match
+ = mf->hash[FIX_4_HASH_SIZE + hash_value];
+
+ mf->hash[hash_2_value] = pos;
+ mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos;
+ mf->hash[FIX_4_HASH_SIZE + hash_value] = pos;
+
+ bt_skip();
+
+ } while (--amount != 0);
+}
+#endif
diff --git a/src/liblzma/lz/lz_encoder_private.h b/src/liblzma/lz/lz_encoder_private.h
deleted file mode 100644
index 638fcb2d..00000000
--- a/src/liblzma/lz/lz_encoder_private.h
+++ /dev/null
@@ -1,40 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file lz_encoder_private.h
-/// \brief Private definitions for LZ encoder
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZMA_LZ_ENCODER_PRIVATE_H
-#define LZMA_LZ_ENCODER_PRIVATE_H
-
-#include "lz_encoder.h"
-
-/// Value used to indicate unused slot
-#define EMPTY_HASH_VALUE 0
-
-/// When the dictionary and hash variables need to be adjusted to prevent
-/// integer overflows. Since we use uint32_t to store the offsets, half
-/// of it is the biggest safe limit.
-#define MAX_VAL_FOR_NORMALIZE (UINT32_MAX / 2)
-
-
-struct lzma_coder_s {
- lzma_next_coder next;
- lzma_lz_encoder lz;
-};
-
-#endif
diff --git a/src/liblzma/lz/match_c.h b/src/liblzma/lz/match_c.h
deleted file mode 100644
index 664db290..00000000
--- a/src/liblzma/lz/match_c.h
+++ /dev/null
@@ -1,412 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file match_c.h
-/// \brief Template for different match finders
-///
-/// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
-/// sets slighly different #defines, resulting the different match finders.
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-//////////////
-// Includes //
-//////////////
-
-#include "check.h"
-
-
-///////////////
-// Constants //
-///////////////
-
-#define START_MAX_LEN 1
-
-#ifdef HASH_ARRAY_2
-# define NUM_HASH_DIRECT_BYTES 0
-# define HASH_2_SIZE (1 << 10)
-# ifdef HASH_ARRAY_3
-# define NUM_HASH_BYTES 4
-# define HASH_3_SIZE (1 << 16)
-# define HASH_3_OFFSET HASH_2_SIZE
-# define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
-# else
-# define NUM_HASH_BYTES 3
-# define FIX_HASH_SIZE HASH_2_SIZE
-# endif
-# define HASH_SIZE 0
-# define MIN_MATCH_CHECK NUM_HASH_BYTES
-#else
-# define NUM_HASH_DIRECT_BYTES 2
-# define NUM_HASH_BYTES 2
-# define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
-# define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
-# define FIX_HASH_SIZE 0
-#endif
-
-
-////////////
-// Macros //
-////////////
-
-#ifdef HASH_ARRAY_2
-# ifdef HASH_ARRAY_3
-# define HASH_CALC() \
- do { \
- const uint32_t temp = lzma_crc32_table[0][ \
- cur[0]] ^ cur[1]; \
- hash_2_value = temp & (HASH_2_SIZE - 1); \
- hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
- & (HASH_3_SIZE - 1); \
- hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
- ^ (lzma_crc32_table[0][cur[3]] << 5)) \
- & lz->hash_mask; \
- } while (0)
-# else
-# define HASH_CALC() \
- do { \
- const uint32_t temp = lzma_crc32_table[0][ \
- cur[0]] ^ cur[1]; \
- hash_2_value = temp & (HASH_2_SIZE - 1); \
- hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
- & lz->hash_mask; \
- } while (0)
-# endif
-#else
-# define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
-#endif
-
-
-// Moves the current read position forward by one byte. In LZMA SDK,
-// CLZInWindow::MovePos() can read more input data if needed, because of
-// the callback style API. In liblzma we must have ensured earlier, that
-// there is enough data available in lz->buffer.
-#define move_pos() \
-do { \
- if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
- lz->cyclic_buffer_pos = 0; \
- ++lz->read_pos; \
- assert(lz->read_pos <= lz->write_pos); \
- if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
- lzma_lz_encoder_normalize(lz); \
-} while (0)
-
-
-#define move_pending() \
-do { \
- ++lz->read_pos; \
- assert(lz->read_pos <= lz->write_pos); \
- ++lz->pending; \
-} while (0)
-
-
-//////////////////////
-// Global constants //
-//////////////////////
-
-LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
-LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;
-
-
-///////////////////
-// API functions //
-///////////////////
-
-LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
-{
- uint32_t len_limit;
- if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
- len_limit = lz->match_max_len;
- } else {
- len_limit = lz->write_pos - lz->read_pos;
- if (len_limit < MIN_MATCH_CHECK || lz->sequence == SEQ_FLUSH) {
- distances[0] = 0;
- move_pending();
- return;
- }
- }
-
- assert(lz->pending == 0);
-
- int32_t offset = 1;
- const uint32_t match_min_pos
- = lz->read_pos + lz->offset > lz->cyclic_buffer_size
- ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
- : 0;
- const uint8_t *cur = lz->buffer + lz->read_pos;
- uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size
-
-#ifdef HASH_ARRAY_2
- uint32_t hash_2_value;
-# ifdef HASH_ARRAY_3
- uint32_t hash_3_value;
-# endif
-#endif
- uint32_t hash_value;
- HASH_CALC();
-
- uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
-#ifdef HASH_ARRAY_2
- uint32_t cur_match2 = lz->hash[hash_2_value];
-# ifdef HASH_ARRAY_3
- uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
-# endif
- lz->hash[hash_2_value] = lz->read_pos + lz->offset;
-
- if (cur_match2 > match_min_pos) {
- if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
- max_len = 2;
- distances[offset++] = 2;
- distances[offset++] = lz->read_pos + lz->offset
- - cur_match2 - 1;
- }
- }
-
-# ifdef HASH_ARRAY_3
- lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
- if (cur_match3 > match_min_pos) {
- if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
- if (cur_match3 == cur_match2)
- offset -= 2;
-
- max_len = 3;
- distances[offset++] = 3;
- distances[offset++] = lz->read_pos + lz->offset
- - cur_match3 - 1;
- cur_match2 = cur_match3;
- }
- }
-# endif
-
- if (offset != 1 && cur_match2 == cur_match) {
- offset -= 2;
- max_len = START_MAX_LEN;
- }
-#endif
-
- lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;
-
-#ifdef IS_HASH_CHAIN
- lz->son[lz->cyclic_buffer_pos] = cur_match;
-#else
- uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
- uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
-
- uint32_t len0 = NUM_HASH_DIRECT_BYTES;
- uint32_t len1 = NUM_HASH_DIRECT_BYTES;
-#endif
-
-#if NUM_HASH_DIRECT_BYTES != 0
- if (cur_match > match_min_pos) {
- if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
- != cur[NUM_HASH_DIRECT_BYTES]) {
- max_len = NUM_HASH_DIRECT_BYTES;
- distances[offset++] = NUM_HASH_DIRECT_BYTES;
- distances[offset++] = lz->read_pos + lz->offset
- - cur_match - 1;
- }
- }
-#endif
-
- uint32_t count = lz->cut_value;
-
- while (true) {
- if (cur_match <= match_min_pos || count-- == 0) {
-#ifndef IS_HASH_CHAIN
- *ptr0 = EMPTY_HASH_VALUE;
- *ptr1 = EMPTY_HASH_VALUE;
-#endif
- break;
- }
-
- const uint32_t delta = lz->read_pos + lz->offset - cur_match;
- const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
- ? lz->cyclic_buffer_pos - delta
- : lz->cyclic_buffer_pos - delta
- + lz->cyclic_buffer_size;
- uint32_t *pair = lz->son +
-#ifdef IS_HASH_CHAIN
- cyclic_pos;
-#else
- (cyclic_pos << 1);
-#endif
-
- const uint8_t *pb = lz->buffer + cur_match - lz->offset;
- uint32_t len =
-#ifdef IS_HASH_CHAIN
- NUM_HASH_DIRECT_BYTES;
- if (pb[max_len] == cur[max_len])
-#else
- MIN(len0, len1);
-#endif
-
- if (pb[len] == cur[len]) {
- while (++len != len_limit)
- if (pb[len] != cur[len])
- break;
-
- if (max_len < len) {
- max_len = len;
- distances[offset++] = len;
- distances[offset++] = delta - 1;
- if (len == len_limit) {
-#ifndef IS_HASH_CHAIN
- *ptr1 = pair[0];
- *ptr0 = pair[1];
-#endif
- break;
- }
- }
- }
-
-#ifdef IS_HASH_CHAIN
- cur_match = *pair;
-#else
- if (pb[len] < cur[len]) {
- *ptr1 = cur_match;
- ptr1 = pair + 1;
- cur_match = *ptr1;
- len1 = len;
- } else {
- *ptr0 = cur_match;
- ptr0 = pair;
- cur_match = *ptr0;
- len0 = len;
- }
-#endif
- }
-
- distances[0] = offset - 1;
-
- move_pos();
-
- return;
-}
-
-
-LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
-{
- do {
-#ifdef IS_HASH_CHAIN
- if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
- move_pending();
- continue;
- }
-#else
- uint32_t len_limit;
- if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
- len_limit = lz->match_max_len;
- } else {
- len_limit = lz->write_pos - lz->read_pos;
- if (len_limit < MIN_MATCH_CHECK
- || lz->sequence == SEQ_FLUSH) {
- move_pending();
- continue;
- }
- }
- const uint32_t match_min_pos
- = lz->read_pos + lz->offset > lz->cyclic_buffer_size
- ? lz->read_pos + lz->offset - lz->cyclic_buffer_size
- : 0;
-#endif
-
- assert(lz->pending == 0);
-
- const uint8_t *cur = lz->buffer + lz->read_pos;
-
-#ifdef HASH_ARRAY_2
- uint32_t hash_2_value;
-# ifdef HASH_ARRAY_3
- uint32_t hash_3_value;
- uint32_t hash_value;
- HASH_CALC();
- lz->hash[HASH_3_OFFSET + hash_3_value]
- = lz->read_pos + lz->offset;
-# else
- uint32_t hash_value;
- HASH_CALC();
-# endif
- lz->hash[hash_2_value] = lz->read_pos + lz->offset;
-#else
- uint32_t hash_value;
- HASH_CALC();
-#endif
-
- uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
- lz->hash[FIX_HASH_SIZE + hash_value]
- = lz->read_pos + lz->offset;
-
-#ifdef IS_HASH_CHAIN
- lz->son[lz->cyclic_buffer_pos] = cur_match;
-#else
- uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
- uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
-
- uint32_t len0 = NUM_HASH_DIRECT_BYTES;
- uint32_t len1 = NUM_HASH_DIRECT_BYTES;
- uint32_t count = lz->cut_value;
-
- while (true) {
- if (cur_match <= match_min_pos || count-- == 0) {
- *ptr0 = EMPTY_HASH_VALUE;
- *ptr1 = EMPTY_HASH_VALUE;
- break;
- }
-
- const uint32_t delta = lz->read_pos
- + lz->offset - cur_match;
- const uint32_t cyclic_pos
- = delta <= lz->cyclic_buffer_pos
- ? lz->cyclic_buffer_pos - delta
- : lz->cyclic_buffer_pos - delta
- + lz->cyclic_buffer_size;
- uint32_t *pair = lz->son + (cyclic_pos << 1);
-
- const uint8_t *pb = lz->buffer + cur_match
- - lz->offset;
- uint32_t len = MIN(len0, len1);
-
- if (pb[len] == cur[len]) {
- while (++len != len_limit)
- if (pb[len] != cur[len])
- break;
-
- if (len == len_limit) {
- *ptr1 = pair[0];
- *ptr0 = pair[1];
- break;
- }
- }
-
- if (pb[len] < cur[len]) {
- *ptr1 = cur_match;
- ptr1 = pair + 1;
- cur_match = *ptr1;
- len1 = len;
- } else {
- *ptr0 = cur_match;
- ptr0 = pair;
- cur_match = *ptr0;
- len0 = len;
- }
- }
-#endif
-
- move_pos();
-
- } while (--num != 0);
-
- return;
-}
diff --git a/src/liblzma/lz/match_h.h b/src/liblzma/lz/match_h.h
deleted file mode 100644
index 2eae90ba..00000000
--- a/src/liblzma/lz/match_h.h
+++ /dev/null
@@ -1,69 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file match_h.h
-/// \brief Header template for different match finders
-//
-// Copyright (C) 1999-2006 Igor Pavlov
-// Copyright (C) 2007 Lasse Collin
-//
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 2.1 of the License, or (at your option) any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "lz_encoder_private.h"
-
-
-//////////////////////
-// Global constants //
-//////////////////////
-
-#undef LZMA_HASH_SIZE
-#undef LZMA_FIX_HASH_SIZE
-#undef LZMA_HASH_SIZE_C
-#undef LZMA_FIX_HASH_SIZE_C
-
-#define LZMA_HASH_SIZE(mf_name) LZMA_HASH_SIZE_C(mf_name)
-#define LZMA_FIX_HASH_SIZE(mf_name) LZMA_FIX_HASH_SIZE_C(mf_name)
-
-#define LZMA_HASH_SIZE_C(mf_name) \
- const uint32_t LZMA_ ## mf_name ## _HASH_SIZE
-
-#define LZMA_FIX_HASH_SIZE_C(mf_name) \
- const uint32_t LZMA_ ## mf_name ## _FIX_HASH_SIZE
-
-extern LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER);
-extern LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER);
-
-
-///////////////
-// Functions //
-///////////////
-
-#undef LZMA_GET_MATCHES
-#undef LZMA_SKIP
-#undef LZMA_GET_MATCHES_C
-#undef LZMA_SKIP_C
-
-#define LZMA_GET_MATCHES(mf_name) LZMA_GET_MATCHES_C(mf_name)
-#define LZMA_SKIP(mf_name) LZMA_SKIP_C(mf_name)
-
-#define LZMA_GET_MATCHES_C(mf_name) \
- extern void lzma_ ## mf_name ## _get_matches( \
- lzma_lz_encoder *restrict lz, \
- uint32_t *restrict distances)
-
-#define LZMA_SKIP_C(mf_name) \
- extern void lzma_ ## mf_name ## _skip( \
- lzma_lz_encoder *lz, uint32_t num)
-
-LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER);
-
-LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER);