diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2007-12-09 00:42:33 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2007-12-09 00:42:33 +0200 |
commit | 5d018dc03549c1ee4958364712fb0c94e1bf2741 (patch) | |
tree | 1b211911fb33fddb3f04b77f99e81df23623ffc4 /src/liblzma/lz | |
download | xz-5d018dc03549c1ee4958364712fb0c94e1bf2741.tar.xz |
Imported to git.
Diffstat (limited to 'src/liblzma/lz')
-rw-r--r-- | src/liblzma/lz/Makefile.am | 63 | ||||
-rw-r--r-- | src/liblzma/lz/bt2.c | 27 | ||||
-rw-r--r-- | src/liblzma/lz/bt2.h | 31 | ||||
-rw-r--r-- | src/liblzma/lz/bt3.c | 29 | ||||
-rw-r--r-- | src/liblzma/lz/bt3.h | 31 | ||||
-rw-r--r-- | src/liblzma/lz/bt4.c | 30 | ||||
-rw-r--r-- | src/liblzma/lz/bt4.h | 31 | ||||
-rw-r--r-- | src/liblzma/lz/hc3.c | 30 | ||||
-rw-r--r-- | src/liblzma/lz/hc3.h | 31 | ||||
-rw-r--r-- | src/liblzma/lz/hc4.c | 31 | ||||
-rw-r--r-- | src/liblzma/lz/hc4.h | 31 | ||||
-rw-r--r-- | src/liblzma/lz/lz_decoder.c | 462 | ||||
-rw-r--r-- | src/liblzma/lz/lz_decoder.h | 214 | ||||
-rw-r--r-- | src/liblzma/lz/lz_encoder.c | 481 | ||||
-rw-r--r-- | src/liblzma/lz/lz_encoder.h | 161 | ||||
-rw-r--r-- | src/liblzma/lz/lz_encoder_private.h | 40 | ||||
-rw-r--r-- | src/liblzma/lz/match_c.h | 401 | ||||
-rw-r--r-- | src/liblzma/lz/match_h.h | 69 |
18 files changed, 2193 insertions, 0 deletions
diff --git a/src/liblzma/lz/Makefile.am b/src/liblzma/lz/Makefile.am new file mode 100644 index 00000000..5c27e2f2 --- /dev/null +++ b/src/liblzma/lz/Makefile.am @@ -0,0 +1,63 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This library is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This library is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +noinst_LTLIBRARIES = liblz.la +liblz_la_CPPFLAGS = \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_srcdir@/src/liblzma/common \ + -I@top_srcdir@/src/liblzma/check +liblz_la_SOURCES = + + +if COND_MAIN_ENCODER +liblz_la_SOURCES += \ + lz_encoder.c \ + lz_encoder.h \ + lz_encoder_private.h \ + match_c.h \ + match_h.h + +if COND_MF_HC3 +liblz_la_SOURCES += hc3.c hc3.h +liblz_la_CPPFLAGS += -DHAVE_HC3 +endif + +if COND_MF_HC4 +liblz_la_SOURCES += hc4.c hc4.h +liblz_la_CPPFLAGS += -DHAVE_HC4 +endif + +if COND_MF_BT2 +liblz_la_SOURCES += bt2.c bt2.h +liblz_la_CPPFLAGS += -DHAVE_BT2 +endif + +if COND_MF_BT3 +liblz_la_SOURCES += bt3.c bt3.h +liblz_la_CPPFLAGS += -DHAVE_BT3 +endif + +if COND_MF_BT4 +liblz_la_SOURCES += bt4.c bt4.h +liblz_la_CPPFLAGS += -DHAVE_BT4 +endif + +endif + + +if COND_MAIN_DECODER +liblz_la_SOURCES += \ + lz_decoder.c \ + lz_decoder.h +endif diff --git a/src/liblzma/lz/bt2.c b/src/liblzma/lz/bt2.c new file mode 100644 index 00000000..7dc4cb80 --- /dev/null +++ b/src/liblzma/lz/bt2.c @@ -0,0 +1,27 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt2.c +/// \brief Binary Tree 2 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "bt2.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#include "match_c.h" diff --git a/src/liblzma/lz/bt2.h b/src/liblzma/lz/bt2.h new file mode 100644 index 00000000..33cb52cd --- /dev/null +++ b/src/liblzma/lz/bt2.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt2.h +/// \brief Binary Tree 2 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BT2_H +#define LZMA_BT2_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER bt2 +#define LZMA_MATCH_FINDER_NAME_UPPER BT2 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/bt3.c b/src/liblzma/lz/bt3.c new file mode 100644 index 00000000..d44310f3 --- /dev/null +++ b/src/liblzma/lz/bt3.c @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt3.c +/// \brief Binary Tree 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "bt3.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define HASH_ARRAY_2 + +#include "match_c.h" diff --git a/src/liblzma/lz/bt3.h b/src/liblzma/lz/bt3.h new file mode 100644 index 00000000..247c7e5f --- /dev/null +++ b/src/liblzma/lz/bt3.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt3.h +/// \brief Binary Tree 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BT3_H +#define LZMA_BT3_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER bt3 +#define LZMA_MATCH_FINDER_NAME_UPPER BT3 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/bt4.c b/src/liblzma/lz/bt4.c new file mode 100644 index 00000000..6e1042c9 --- /dev/null +++ b/src/liblzma/lz/bt4.c @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt4.c +/// \brief Binary Tree 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "bt4.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define HASH_ARRAY_2 +#define HASH_ARRAY_3 + +#include "match_c.h" diff --git a/src/liblzma/lz/bt4.h b/src/liblzma/lz/bt4.h new file mode 100644 index 00000000..e3fcf6ac --- /dev/null +++ b/src/liblzma/lz/bt4.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file bt4.h +/// \brief Binary Tree 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_BT4_H +#define LZMA_BT4_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER bt4 +#define LZMA_MATCH_FINDER_NAME_UPPER BT4 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/hc3.c b/src/liblzma/lz/hc3.c new file mode 100644 index 00000000..22b5689b --- /dev/null +++ b/src/liblzma/lz/hc3.c @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc3.c +/// \brief Hash Chain 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "hc3.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define IS_HASH_CHAIN +#define HASH_ARRAY_2 + +#include "match_c.h" diff --git a/src/liblzma/lz/hc3.h b/src/liblzma/lz/hc3.h new file mode 100644 index 00000000..97be0b1d --- /dev/null +++ b/src/liblzma/lz/hc3.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc3.h +/// \brief Hash Chain 3 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_HC3_H +#define LZMA_HC3_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER hc3 +#define LZMA_MATCH_FINDER_NAME_UPPER HC3 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/hc4.c b/src/liblzma/lz/hc4.c new file mode 100644 index 00000000..a55cfd09 --- /dev/null +++ b/src/liblzma/lz/hc4.c @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc4.c +/// \brief Hash Chain 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "hc4.h" + +#undef IS_HASH_CHAIN +#undef HASH_ARRAY_2 +#undef HASH_ARRAY_3 + +#define IS_HASH_CHAIN +#define HASH_ARRAY_2 +#define HASH_ARRAY_3 + +#include "match_c.h" diff --git a/src/liblzma/lz/hc4.h b/src/liblzma/lz/hc4.h new file mode 100644 index 00000000..dc072e2f --- /dev/null +++ b/src/liblzma/lz/hc4.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hc4.h +/// \brief Hash Chain 4 +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_HC4_H +#define LZMA_HC4_H + +#undef LZMA_MATCH_FINDER_NAME_LOWER +#undef LZMA_MATCH_FINDER_NAME_UPPER +#define LZMA_MATCH_FINDER_NAME_LOWER hc4 +#define LZMA_MATCH_FINDER_NAME_UPPER HC4 + +#include "match_h.h" + +#endif diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c new file mode 100644 index 00000000..9c110dec --- /dev/null +++ b/src/liblzma/lz/lz_decoder.c @@ -0,0 +1,462 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.c +/// \brief LZ out window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_decoder.h" + + +/// Minimum size of allocated dictionary +#define DICT_SIZE_MIN 8192 + +/// When there is less than this amount of data available for decoding, +/// it is moved to the temporary buffer which +/// - protects from reads past the end of the buffer; and +/// - stored the incomplete data between lzma_code() calls. +/// +/// \note TEMP_LIMIT must be at least as much as +/// REQUIRED_IN_BUFFER_SIZE defined in lzma_decoder.c. +#define TEMP_LIMIT 32 + +// lzma_lz_decoder.dict[] must be three times the size of TEMP_LIMIT. +// 2 * TEMP_LIMIT is used for the actual data, and the third TEMP_LIMIT +// bytes is needed for safety to allow decode_dummy() in lzma_decoder.c +// to read past end of the buffer. This way it should be both fast and simple. +#if LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT +# error LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT +#endif + + +struct lzma_coder_s { + lzma_next_coder next; + lzma_lz_decoder lz; + + // There are more members in this structure but they are not + // visible in LZ coder. +}; + + +/// - Copy as much data as possible from lz->dict[] to out[]. +/// - Update *out_pos, lz->start, and lz->end accordingly. +/// - Wrap lz-pos to the beginning of lz->dict[] if there is a danger that +/// it may go past the end of the buffer (lz->pos >= lz->must_flush_pos). +static inline bool +flush(lzma_lz_decoder *restrict lz, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size) +{ + // Flush uncompressed data from the history buffer to + // the output buffer. This is done in two phases. + + assert(lz->start <= lz->end); + + // Flush if pos < start < end. + if (lz->pos < lz->start && lz->start < lz->end) { + bufcpy(lz->dict, &lz->start, lz->end, out, out_pos, out_size); + + // If we reached end of the data in history buffer, + // wrap to the beginning. + if (lz->start == lz->end) + lz->start = 0; + } + + // Flush if start start < pos <= end. This is not as `else' for + // previous `if' because the previous one may make this one true. + if (lz->start < lz->pos) { + bufcpy(lz->dict, &lz->start, + lz->pos, out, out_pos, out_size); + + if (lz->pos >= lz->must_flush_pos) { + // Wrap the flushing position if we have + // flushed the whole history buffer. + if (lz->pos == lz->start) + lz->start = 0; + + // Wrap the write position and store to lz.end + // how much there is new data available. + lz->end = lz->pos; + lz->pos = 0; + lz->is_full = true; + } + } + + assert(lz->pos < lz->must_flush_pos); + + return *out_pos == out_size; +} + + +/// Calculate safe value for lz->limit. If no safe value can be found, +/// set lz->limit to zero. When flushing, only as little data will be +/// decoded as is needed to fill the output buffer (lowers both latency +/// and throughput). +/// +/// \return true if there is no space for new uncompressed data. +/// +static inline bool +set_limit(lzma_lz_decoder *lz, size_t out_avail, bool flushing) +{ + // Set the limit so that writing to dict[limit + match_max_len - 1] + // doesn't overwrite any unflushed data and doesn't write past the + // end of the dict buffer. + if (lz->start <= lz->pos) { + // We can fill the buffer from pos till the end + // of the dict buffer. + lz->limit = lz->must_flush_pos; + } else if (lz->pos + lz->match_max_len < lz->start) { + // There's some unflushed data between pos and end of the + // buffer. Limit so that we don't overwrite the unflushed data. + lz->limit = lz->start - lz->match_max_len; + } else { + // Buffer is too full. + lz->limit = 0; + return true; + } + + // Finetune the limit a bit if it isn't zero. + + assert(lz->limit > lz->pos); + const size_t dict_avail = lz->limit - lz->pos; + + if (lz->uncompressed_size < dict_avail) { + // Finishing a stream that doesn't have + // an end of stream marker. + lz->limit = lz->pos + lz->uncompressed_size; + + } else if (flushing && out_avail < dict_avail) { + // Flushing enabled, decoding only as little as needed to + // fill the out buffer (if there's enough input, of course). + lz->limit = lz->pos + out_avail; + } + + return lz->limit == lz->pos; +} + + +/// Takes care of wrapping the data into temporary buffer when needed, +/// and calls the actual decoder. +/// +/// \return true if error occurred +/// +static inline bool +call_process(lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) +{ + // It would be nice and simple if we could just give in[] to the + // decoder, but the requirement of zlib-like API forces us to be + // able to make *in_pos == in_size whenever there is enough output + // space. If needed, we will append a few bytes from in[] to + // a temporary buffer and decode enough to reach the part that + // was copied from in[]. Then we can continue with the real in[]. + + bool error; + const size_t dict_old_pos = coder->lz.pos; + const size_t in_avail = in_size - *in_pos; + + if (coder->lz.temp_size + in_avail < 2 * TEMP_LIMIT) { + // Copy all the available input from in[] to temp[]. + memcpy(coder->lz.temp + coder->lz.temp_size, + in + *in_pos, in_avail); + coder->lz.temp_size += in_avail; + *in_pos += in_avail; + assert(*in_pos == in_size); + + // Decode as much as possible. + size_t temp_used = 0; + error = coder->lz.process(coder, coder->lz.temp, &temp_used, + coder->lz.temp_size, true); + assert(temp_used <= coder->lz.temp_size); + + // Move the remaining data to the beginning of temp[]. + coder->lz.temp_size -= temp_used; + memmove(coder->lz.temp, coder->lz.temp + temp_used, + coder->lz.temp_size); + + } else if (coder->lz.temp_size > 0) { + // Fill temp[] unless it is already full because we aren't + // the last filter in the chain. + size_t copy_size = 0; + if (coder->lz.temp_size < 2 * TEMP_LIMIT) { + assert(*in_pos < in_size); + copy_size = 2 * TEMP_LIMIT - coder->lz.temp_size; + memcpy(coder->lz.temp + coder->lz.temp_size, + in + *in_pos, copy_size); + // NOTE: We don't update lz.temp_size or *in_pos yet. + } + + size_t temp_used = 0; + error = coder->lz.process(coder, coder->lz.temp, &temp_used, + coder->lz.temp_size + copy_size, false); + + if (temp_used < coder->lz.temp_size) { + // Only very little input data was consumed. Move + // the unprocessed data to the beginning temp[]. + coder->lz.temp_size += copy_size - temp_used; + memmove(coder->lz.temp, coder->lz.temp + temp_used, + coder->lz.temp_size); + *in_pos += copy_size; + assert(*in_pos <= in_size); + + } else { + // We were able to decode so much data that next time + // we can decode directly from in[]. That is, we can + // consider temp[] to be empty now. + *in_pos += temp_used - coder->lz.temp_size; + coder->lz.temp_size = 0; + assert(*in_pos <= in_size); + } + + } else { + // Decode directly from in[]. + error = coder->lz.process(coder, in, in_pos, in_size, false); + assert(*in_pos <= in_size); + } + + assert(coder->lz.pos >= dict_old_pos); + if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + // Update uncompressed size. + coder->lz.uncompressed_size -= coder->lz.pos - dict_old_pos; + + // Check that End of Payload Marker hasn't been detected + // since it must not be present because uncompressed size + // is known. + if (coder->lz.eopm_detected) + error = true; + } + + return error; +} + + +static lzma_ret +decode_buffer(lzma_coder *coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + bool flushing) +{ + bool stop = false; + + while (true) { + // Flush from coder->lz.dict to out[]. + flush(&coder->lz, out, out_pos, out_size); + + // All done? + if (*out_pos == out_size + || stop + || coder->lz.eopm_detected + || coder->lz.uncompressed_size == 0) + break; + + // Set write limit in the dictionary. + if (set_limit(&coder->lz, out_size - *out_pos, flushing)) + break; + + // Decode more data. + if (call_process(coder, in, in_pos, in_size)) + return LZMA_DATA_ERROR; + + // Set stop to true if we must not call call_process() again + // during this function call. + // FIXME: Can this make the loop exist too early? It wouldn't + // cause data corruption so not a critical problem. It can + // happen if dictionary gets full and lz.temp still contains + // a few bytes data that we could decode right now. + if (*in_pos == in_size && coder->lz.temp_size <= TEMP_LIMIT + && coder->lz.pos < coder->lz.limit) + stop = true; + } + + // If we have decoded everything (EOPM detected or uncompressed_size + // bytes were processed) to the history buffer, and also flushed + // everything from the history buffer, our job is done. + if ((coder->lz.eopm_detected + || coder->lz.uncompressed_size == 0) + && coder->lz.start == coder->lz.pos) + return LZMA_STREAM_END; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lz_decode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action) +{ + if (coder->next.code == NULL) { + const lzma_ret ret = decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size, + action == LZMA_SYNC_FLUSH); + + if (*out_pos == out_size || ret == LZMA_STREAM_END) { + // Unread to make coder->temp[] empty. This is easy, + // because we know that all the data currently in + // coder->temp[] has been copied form in[] during this + // call to the decoder. + // + // If we didn't do this, we could have data left in + // coder->temp[] when end of stream is reached. That + // data could be left there from *previous* call to + // the decoder; in that case we wouldn't know where + // to put that data. + assert(*in_pos >= coder->lz.temp_size); + *in_pos -= coder->lz.temp_size; + coder->lz.temp_size = 0; + } + + return ret; + } + + // We aren't the last coder in the chain, we need to decode + // our input to a temporary buffer. + const bool flushing = action == LZMA_SYNC_FLUSH; + while (*out_pos < out_size) { + if (!coder->lz.next_finished + && coder->lz.temp_size < LZMA_BUFFER_SIZE) { + const lzma_ret ret = coder->next.code( + coder->next.coder, + allocator, in, in_pos, in_size, + coder->lz.temp, &coder->lz.temp_size, + LZMA_BUFFER_SIZE, action); + + if (ret == LZMA_STREAM_END) + coder->lz.next_finished = true; + else if (coder->lz.temp_size < LZMA_BUFFER_SIZE + || ret != LZMA_OK) + return ret; + } + + if (coder->lz.this_finished) { + if (coder->lz.temp_size != 0) + return LZMA_DATA_ERROR; + + if (coder->lz.next_finished) + return LZMA_STREAM_END; + + return LZMA_OK; + } + + size_t dummy = 0; + const lzma_ret ret = decode_buffer(coder, NULL, &dummy, 0, + out, out_pos, out_size, flushing); + + if (ret == LZMA_STREAM_END) + coder->lz.this_finished = true; + else if (ret != LZMA_OK) + return ret; + else if (coder->lz.next_finished && *out_pos < out_size) + return LZMA_DATA_ERROR; + } + + return LZMA_OK; +} + + +/// \brief Initializes LZ part of the LZMA decoder or Inflate +/// +/// \param history_size Number of bytes the LZ out window is +/// supposed keep available from the output +/// history. +/// \param match_max_len Number of bytes a single decoding loop +/// can advance the write position (lz->pos) +/// in the history buffer (lz->dict). +/// +/// \note This function is called by LZMA decoder and Inflate init()s. +/// It's up to those functions allocate *lz and initialize it +/// with LZMA_LZ_DECODER_INIT. +extern lzma_ret +lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator, + bool (*process)(lzma_coder *restrict coder, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, bool has_safe_buffer), + lzma_vli uncompressed_size, + size_t history_size, size_t match_max_len) +{ + // Set uncompressed size. + lz->uncompressed_size = uncompressed_size; + + // Limit the history size to roughly sane values. This is primarily + // to prevent integer overflows. + if (history_size > UINT32_MAX / 2) + return LZMA_HEADER_ERROR; + + // Store the value actually requested. We use it for sanity checks + // when repeating data from the history buffer. + lz->requested_size = history_size; + + // Avoid tiny history buffer sizes for performance reasons. + // TODO: Test if this actually helps... + if (history_size < DICT_SIZE_MIN) + history_size = DICT_SIZE_MIN; + + // The real size of the history buffer is a bit bigger than + // requested by our caller. This allows us to do some optimizations, + // which help not only speed but simplicity of the code; specifically, + // we can make sure that there is always at least match_max_len + // bytes immediatelly available for writing without a need to wrap + // the history buffer. + const size_t dict_real_size = history_size + 2 * match_max_len + 1; + + // Reallocate memory if needed. + if (history_size != lz->size || match_max_len != lz->match_max_len) { + // Destroy the old buffer. + lzma_lz_decoder_end(lz, allocator); + + lz->size = history_size; + lz->match_max_len = match_max_len; + lz->must_flush_pos = history_size + match_max_len + 1; + + lz->dict = lzma_alloc(dict_real_size, allocator); + if (lz->dict == NULL) + return LZMA_MEM_ERROR; + } + + // Clean up the buffers to make it very sure that there are + // no information leaks when multiple steams are decoded + // with the same decoder structures. + memzero(lz->dict, dict_real_size); + memzero(lz->temp, LZMA_BUFFER_SIZE); + + // Reset the variables so that lz_get_byte(lz, 0) will return '\0'. + lz->pos = 0; + lz->start = 0; + lz->end = dict_real_size; + lz->is_full = false; + lz->eopm_detected = false; + lz->next_finished = false; + lz->this_finished = false; + + // Set the process function pointer. + lz->process = process; + + return LZMA_OK; +} + + +extern void +lzma_lz_decoder_end(lzma_lz_decoder *lz, lzma_allocator *allocator) +{ + lzma_free(lz->dict, allocator); + lz->dict = NULL; + lz->size = 0; + lz->match_max_len = 0; + return; +} diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h new file mode 100644 index 00000000..a8a585cd --- /dev/null +++ b/src/liblzma/lz/lz_decoder.h @@ -0,0 +1,214 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_decoder.h +/// \brief LZ out window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_OUT_H +#define LZMA_LZ_OUT_H + +#include "common.h" + + +/// Get a byte from the history buffer. +#define lz_get_byte(lz, distance) \ + ((distance) < (lz).pos \ + ? (lz).dict[(lz).pos - (distance) - 1] \ + : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) + + +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } + + +typedef struct { + /// Function to do the actual decoding (LZMA or Inflate) + bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t size_in, + bool has_safe_buffer); + + /// Pointer to dictionary (history) buffer. + /// \note Not 'restrict' because can alias next_out. + uint8_t *dict; + + /// Next write goes to dict[pos]. + size_t pos; + + /// Next byte to flush is buffer[start]. + size_t start; + + /// First byte to not flush is buffer[end]. + size_t end; + + /// First position to which data must not be written. + size_t limit; + + /// True if dictionary has needed wrapping. + bool is_full; + + /// True if process() has detected End of Payload Marker. + bool eopm_detected; + + /// True if the next coder in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected End of Payload + /// Marker. This may become true before next_finished becomes true. + bool this_finished; + + /// When pos >= must_flush_pos, we must not call process(). + size_t must_flush_pos; + + /// Maximum number of bytes that a single decoding loop inside + /// process() can produce data into dict. This amount is kept + /// always available at dict + pos i.e. it is safe to write a byte + /// to dict[pos + match_max_len - 1]. + size_t match_max_len; + + /// Number of bytes allocated to dict. + size_t size; + + /// Requested size of the dictionary. This is needed because we avoid + /// using extremely tiny history buffers. + size_t requested_size; + + /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown. + lzma_vli uncompressed_size; + + /// Number of bytes currently in temp[]. + size_t temp_size; + + /// Temporary buffer needed when + /// 1) we cannot make the input buffer completely empty; or + /// 2) we are not the last filter in the chain. + uint8_t temp[LZMA_BUFFER_SIZE]; + +} lzma_lz_decoder; + + +///////////////////////// +// Function prototypes // +///////////////////////// + +extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, + lzma_allocator *allocator, bool (*process)( + lzma_coder *restrict coder, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + bool has_safe_buffer), + lzma_vli uncompressed_size, + size_t history_size, size_t match_max_len); + +extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// Deallocates the history buffer if one exists. +extern void lzma_lz_decoder_end( + lzma_lz_decoder *lz, lzma_allocator *allocator); + +////////////////////// +// Inline functions // +////////////////////// + +// Repeat a block of data from the history. Because memcpy() is faster +// than copying byte by byte in a loop, the copying process gets split +// into three cases: +// 1. distance < length +// Source and target areas overlap, thus we can't use memcpy() +// (nor memmove()) safely. +// TODO: If this is common enough, it might be worth optimizing this +// more e.g. by checking if distance > sizeof(uint8_t*) and using +// memcpy in small chunks. +// 2. distance < pos +// This is the easiest and the fastest case. The block being copied +// is a contiguous piece in the history buffer. The buffer offset +// doesn't need wrapping. +// 3. distance >= pos +// We need to wrap the position, because otherwise we would try copying +// behind the first byte of the allocated buffer. It is possible that +// the block is fragmeneted into two pieces, thus we might need to call +// memcpy() twice. +// NOTE: The function using this macro must ensure that length is positive +// and that distance is FIXME +static inline bool +lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) +{ + // Validate offset of the block to be repeated. It doesn't + // make sense to copy data behind the beginning of the stream. + // Leaving this check away would lead to a security problem, + // in which e.g. the data of the previously decoded file(s) + // would be leaked (or whatever happens to be in unused + // part of the dictionary buffer). + if (distance >= lz->pos && !lz->is_full) + return false; + + // It also doesn't make sense to copy data farer than + // the dictionary size. + if (distance >= lz->requested_size) + return false; + + // The caller must have checked these! + assert(distance <= lz->size); + assert(length > 0); + assert(length <= lz->match_max_len); + + // Copy the amount of data requested by the decoder. + if (distance < length) { + // Source and target areas overlap, thus we can't use + // memcpy() nor even memmove() safely. :-( + // TODO: Copying byte by byte is slow. It might be + // worth optimizing this more if this case is common. + do { + lz->dict[lz->pos] = lz_get_byte(*lz, distance); + ++lz->pos; + } while (--length > 0); + + } else if (distance < lz->pos) { + // The easiest and fastest case + memcpy(lz->dict + lz->pos, + lz->dict + lz->pos - distance - 1, + length); + lz->pos += length; + + } else { + // The bigger the dictionary, the more rare this + // case occurs. We need to "wrap" the dict, thus + // we might need two memcpy() to copy all the data. + assert(lz->is_full); + const uint32_t copy_pos = lz->pos - distance - 1 + lz->end; + uint32_t copy_size = lz->end - copy_pos; + + if (copy_size < length) { + memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + copy_size); + lz->pos += copy_size; + copy_size = length - copy_size; + memcpy(lz->dict + lz->pos, lz->dict, copy_size); + lz->pos += copy_size; + } else { + memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + length); + lz->pos += length; + } + } + + return true; +} + +#endif diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c new file mode 100644 index 00000000..bc38cae2 --- /dev/null +++ b/src/liblzma/lz/lz_encoder.c @@ -0,0 +1,481 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.c +/// \brief LZ in window +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder_private.h" + +// Hash Chains +#ifdef HAVE_HC3 +# include "hc3.h" +#endif +#ifdef HAVE_HC4 +# include "hc4.h" +#endif + +// Binary Trees +#ifdef HAVE_BT2 +# include "bt2.h" +#endif +#ifdef HAVE_BT3 +# include "bt3.h" +#endif +#ifdef HAVE_BT4 +# include "bt4.h" +#endif + + +/// This is needed in two places so provide a macro. +#define get_cyclic_buffer_size(history_size) ((history_size) + 1) + + +/// Calculate certain match finder properties and validate the calculated +/// values. This is as its own function, because *num_items is needed to +/// calculate memory requirements in common/memory.c. +extern uint32_t +lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, + uint32_t history_size, uint32_t *restrict hash_mask, + uint32_t *restrict hash_size_sum, uint32_t *restrict num_items) +{ + uint32_t fix_hash_size; + uint32_t sons; + + switch (match_finder) { +#ifdef HAVE_HC3 + case LZMA_MF_HC3: + fix_hash_size = LZMA_HC3_FIX_HASH_SIZE; + sons = 1; + break; +#endif +#ifdef HAVE_HC4 + case LZMA_MF_HC4: + fix_hash_size = LZMA_HC4_FIX_HASH_SIZE; + sons = 1; + break; +#endif +#ifdef HAVE_BT2 + case LZMA_MF_BT2: + fix_hash_size = LZMA_BT2_FIX_HASH_SIZE; + sons = 2; + break; +#endif +#ifdef HAVE_BT3 + case LZMA_MF_BT3: + fix_hash_size = LZMA_BT3_FIX_HASH_SIZE; + sons = 2; + break; +#endif +#ifdef HAVE_BT4 + case LZMA_MF_BT4: + fix_hash_size = LZMA_BT4_FIX_HASH_SIZE; + sons = 2; + break; +#endif + default: + return true; + } + + uint32_t hs; + +#ifdef HAVE_LZMA_BT2 + if (match_finder == LZMA_BT2) { + // NOTE: hash_mask is not used by the BT2 match finder, + // but it is initialized just in case. + hs = LZMA_BT2_HASH_SIZE; + *hash_mask = 0; + } else +#endif + { + hs = history_size - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + hs |= 0xFFFF; + + if (hs > (UINT32_C(1) << 24)) { + if (match_finder == LZMA_MF_HC4 + || match_finder == LZMA_MF_BT4) + hs >>= 1; + else + hs = (1 << 24) - 1; + } + + *hash_mask = hs; + ++hs; + } + + *hash_size_sum = hs + fix_hash_size; + + *num_items = *hash_size_sum + + get_cyclic_buffer_size(history_size) * sons; + + return false; +} + + +extern lzma_ret +lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, + bool (*process)(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size), + lzma_vli uncompressed_size, + size_t history_size, size_t additional_buffer_before, + size_t match_max_len, size_t additional_buffer_after, + lzma_match_finder match_finder, uint32_t match_finder_cycles, + const uint8_t *preset_dictionary, + size_t preset_dictionary_size) +{ + // Set uncompressed size. + lz->uncompressed_size = uncompressed_size; + + /////////////// + // In Window // + /////////////// + + // Validate history size. + if (history_size < LZMA_DICTIONARY_SIZE_MIN + || history_size > LZMA_DICTIONARY_SIZE_MAX) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + assert(history_size <= MAX_VAL_FOR_NORMALIZE - 256); + assert(LZMA_DICTIONARY_SIZE_MAX <= MAX_VAL_FOR_NORMALIZE - 256); + + // Calculate the size of the history buffer to allocate. + // TODO: Get a reason for magic constant of 256. + const size_t size_reserv = (history_size + additional_buffer_before + + match_max_len + additional_buffer_after) / 2 + 256; + + lz->keep_size_before = history_size + additional_buffer_before; + lz->keep_size_after = match_max_len + additional_buffer_after; + + const size_t buffer_size = lz->keep_size_before + lz->keep_size_after + + size_reserv; + + // Allocate history buffer if its size has changed. + if (buffer_size != lz->size) { + lzma_free(lz->buffer, allocator); + lz->buffer = lzma_alloc(buffer_size, allocator); + if (lz->buffer == NULL) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_MEM_ERROR; + } + } + + // Allocation successful. Store the new size and calculate + // must_move_pos. + lz->size = buffer_size; + lz->must_move_pos = lz->size - lz->keep_size_after; + + // Reset in window variables. + lz->offset = 0; + lz->read_pos = 0; + lz->read_limit = 0; + lz->write_pos = 0; + lz->stream_end_was_reached = false; + + + ////////////////// + // Match Finder // + ////////////////// + + // Validate match_finder, set function pointers and a few match + // finder specific variables. + switch (match_finder) { +#ifdef HAVE_HC3 + case LZMA_MF_HC3: + lz->get_matches = &lzma_hc3_get_matches; + lz->skip = &lzma_hc3_skip; + lz->cut_value = 8 + (match_max_len >> 2); + break; +#endif +#ifdef HAVE_HC4 + case LZMA_MF_HC4: + lz->get_matches = &lzma_hc4_get_matches; + lz->skip = &lzma_hc4_skip; + lz->cut_value = 8 + (match_max_len >> 2); + break; +#endif +#ifdef HAVE_BT2 + case LZMA_MF_BT2: + lz->get_matches = &lzma_bt2_get_matches; + lz->skip = &lzma_bt2_skip; + lz->cut_value = 16 + (match_max_len >> 1); + break; +#endif +#ifdef HAVE_BT3 + case LZMA_MF_BT3: + lz->get_matches = &lzma_bt3_get_matches; + lz->skip = &lzma_bt3_skip; + lz->cut_value = 16 + (match_max_len >> 1); + break; +#endif +#ifdef HAVE_BT4 + case LZMA_MF_BT4: + lz->get_matches = &lzma_bt4_get_matches; + lz->skip = &lzma_bt4_skip; + lz->cut_value = 16 + (match_max_len >> 1); + break; +#endif + default: + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + // Check if we have been requested to use a non-default cut_value. + if (match_finder_cycles > 0) + lz->cut_value = match_finder_cycles; + + lz->match_max_len = match_max_len; + lz->cyclic_buffer_size = get_cyclic_buffer_size(history_size); + + uint32_t hash_size_sum; + uint32_t num_items; + if (lzma_lz_encoder_hash_properties(match_finder, history_size, + &lz->hash_mask, &hash_size_sum, &num_items)) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + if (num_items != lz->num_items) { +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (num_items > SIZE_MAX / sizeof(uint32_t)) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_MEM_ERROR; + } +#endif + + const size_t size_in_bytes + = (size_t)(num_items) * sizeof(uint32_t); + + lzma_free(lz->hash, allocator); + lz->hash = lzma_alloc(size_in_bytes, allocator); + if (lz->hash == NULL) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_MEM_ERROR; + } + + lz->num_items = num_items; + } + + lz->son = lz->hash + hash_size_sum; + + // Reset the hash table to empty hash values. + { + uint32_t *restrict items = lz->hash; + + for (uint32_t i = 0; i < hash_size_sum; ++i) + items[i] = EMPTY_HASH_VALUE; + } + + lz->cyclic_buffer_pos = 0; + + // Because zero is used as empty hash value, make the first byte + // appear at buffer[1 - offset]. + ++lz->offset; + + // If we are using a preset dictionary, read it now. + // TODO: This isn't implemented yet so return LZMA_HEADER_ERROR. + if (preset_dictionary != NULL && preset_dictionary_size > 0) { + lzma_lz_encoder_end(lz, allocator); + return LZMA_HEADER_ERROR; + } + + // Set the process function pointer. + lz->process = process; + + return LZMA_OK; +} + + +extern void +lzma_lz_encoder_end(lzma_lz_encoder *lz, lzma_allocator *allocator) +{ + lzma_free(lz->hash, allocator); + lz->hash = NULL; + lz->num_items = 0; + + lzma_free(lz->buffer, allocator); + lz->buffer = NULL; + lz->size = 0; + + return; +} + + +/// \brief Moves the data in the input window to free space for new data +/// +/// lz->buffer is a sliding input window, which keeps lz->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_lz_encoder *lz) +{ + // buffer[move_offset] will become buffer[0]. + assert(lz->read_pos > lz->keep_size_after); + size_t move_offset = lz->read_pos - lz->keep_size_before; + + // We need one additional byte, since move_pos() moves on 1 byte. + // TODO: Clean up? At least document more. + if (move_offset > 0) + --move_offset; + + assert(lz->write_pos > move_offset); + const size_t move_size = lz->write_pos - move_offset; + + assert(move_offset + move_size <= lz->size); + + memmove(lz->buffer, lz->buffer + move_offset, move_size); + + lz->offset += move_offset; + lz->read_pos -= move_offset; + lz->read_limit -= move_offset; + lz->write_pos -= move_offset; + + return; +} + + +/// \brief Tries to fill the input window (lz->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to lz->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size, lzma_action action) +{ + assert(coder->lz.read_pos <= coder->lz.write_pos); + lzma_ret ret; + + // Move the sliding window if needed. + if (coder->lz.read_pos >= coder->lz.must_move_pos) + move_window(&coder->lz); + + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + bufcpy(in, in_pos, in_size, coder->lz.buffer, + &coder->lz.write_pos, coder->lz.size); + + if (action == LZMA_FINISH && *in_pos == in_size) + ret = LZMA_STREAM_END; + else + ret = LZMA_OK; + + } else { + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->lz.buffer, &coder->lz.write_pos, + coder->lz.size, action); + } + + // If end of stream has been reached, we allow the encoder to process + // all the input (that is, read_pos is allowed to reach write_pos). + // Otherwise we keep keep_size_after bytes available as prebuffer. + if (ret == LZMA_STREAM_END) { + coder->lz.stream_end_was_reached = true; + coder->lz.read_limit = coder->lz.write_pos; + + } else if (coder->lz.write_pos > coder->lz.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->lz.read_limit = coder->lz.write_pos + - coder->lz.keep_size_after; + } + + return ret; +} + + +extern lzma_ret +lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + while (*out_pos < out_size + && (*in_pos < in_size || action == LZMA_FINISH)) { + // Fill the input window if there is no more usable data. + if (!coder->lz.stream_end_was_reached && coder->lz.read_pos + >= coder->lz.read_limit) { + const lzma_ret ret = fill_window(coder, allocator, + in, in_pos, in_size, action); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + return ret; + } + + // Encode + if (coder->lz.process(coder, out, out_pos, out_size)) + return LZMA_STREAM_END; + } + + return LZMA_OK; +} + + +/// \brief Normalizes hash values +/// +/// lzma_lz_normalize is called when lz->pos hits MAX_VAL_FOR_NORMALIZE, +/// which currently happens once every 2 GiB of input data (to be exact, +/// after the first 2 GiB it happens once every 2 GiB minus dictionary_size +/// bytes). lz->pos is incremented by lzma_lz_move_pos(). +/// +/// lz->hash contains big amount of offsets relative to lz->buffer. +/// The offsets are stored as uint32_t, which is the only reasonable +/// datatype for these offsets; uint64_t would waste far too much RAM +/// and uint16_t would limit the dictionary to 64 KiB (far too small). +/// +/// When compressing files over 2 GiB, lz->buffer needs to be moved forward +/// to avoid integer overflows. We scan the lz->hash array and fix every +/// value to match the updated lz->buffer. +extern void +lzma_lz_encoder_normalize(lzma_lz_encoder *lz) +{ + const uint32_t subvalue = lz->read_pos - lz->cyclic_buffer_size; + assert(subvalue <= INT32_MAX); + + { + const uint32_t num_items = lz->num_items; + uint32_t *restrict items = lz->hash; + + for (uint32_t i = 0; i < num_items; ++i) { + // If the distance is greater than the dictionary + // size, we can simply mark the item as empty. + if (items[i] <= subvalue) + items[i] = EMPTY_HASH_VALUE; + else + items[i] -= subvalue; + } + } + + // Update offset to match the new locations. + lz->offset -= subvalue; + + return; +} diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h new file mode 100644 index 00000000..b39c88e5 --- /dev/null +++ b/src/liblzma/lz/lz_encoder.h @@ -0,0 +1,161 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder.h +/// \brief LZ in window and match finder API +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_H +#define LZMA_LZ_ENCODER_H + +#include "common.h" + + +typedef struct lzma_lz_encoder_s lzma_lz_encoder; +struct lzma_lz_encoder_s { + enum { + SEQ_INIT, + SEQ_RUN, + SEQ_FINISH, + SEQ_END + } sequence; + + bool (*process)(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + lzma_vli uncompressed_size; + + /////////////// + // In Window // + /////////////// + + /// Pointer to buffer with data to be compressed + uint8_t *buffer; + + /// Total size of the allocated buffer (that is, including all + /// the extra space) + size_t size; + + /// Match finders store locations of matches using 32-bit integers. + /// To avoid adjusting several megabytes of integers every time the + /// input window is moved with move_window(), we only adjust the + /// offset of the buffer. Thus, buffer[match_finder_pos - offset] + /// is the byte pointed by match_finder_pos. + size_t offset; + + /// buffer[read_pos] is the current byte. + size_t read_pos; + + /// As long as read_pos is less than read_limit, there is enough + /// input available in buffer for at least one encoding loop. + /// + /// Because of the stateful API, read_limit may and will get greater + /// than read_pos quite often. This is taken into account when + /// calculating the value for keep_size_after. + size_t read_limit; + + /// buffer[write_pos] is the first byte that doesn't contain valid + /// uncompressed data; that is, the next input byte will be copied + /// to buffer[write_pos]. + size_t write_pos; + + /// When read_pos >= must_move_pos, move_window() must be called + /// to make more space for the input data. + size_t must_move_pos; + + /// Number of bytes that must be kept available in our input history. + /// That is, once keep_size_before bytes have been processed, + /// buffer[read_pos - keep_size_before] is the oldest byte that + /// must be available for reading. + size_t keep_size_before; + + /// Number of bytes that must be kept in buffer after read_pos. + /// That is, read_pos <= write_pos - keep_size_after as long as + /// stream_end_was_reached is false (once it is true, read_pos + /// is allowed to reach write_pos). + size_t keep_size_after; + + /// This is set to true once the last byte of the input data has + /// been copied to buffer. + bool stream_end_was_reached; + + ////////////////// + // Match Finder // + ////////////////// + + // Pointers to match finder functions + void (*get_matches)(lzma_lz_encoder *restrict lz, + uint32_t *restrict distances); + void (*skip)(lzma_lz_encoder *restrict lz, uint32_t num); + + // Match finder data + uint32_t *hash; // TODO: Check if hash aliases son + uint32_t *son; // and add 'restrict' if possible. + uint32_t cyclic_buffer_pos; + uint32_t cyclic_buffer_size; // Must be dictionary_size + 1. + uint32_t hash_mask; + uint32_t cut_value; + uint32_t hash_size_sum; + uint32_t num_items; + uint32_t match_max_len; +}; + + +#define LZMA_LZ_ENCODER_INIT \ + (lzma_lz_encoder){ \ + .buffer = NULL, \ + .size = 0, \ + .hash = NULL, \ + .num_items = 0, \ + } + + +/// Calculates +extern uint32_t lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, + uint32_t history_size, uint32_t *restrict hash_mask, + uint32_t *restrict hash_size_sum, + uint32_t *restrict num_items); + +// NOTE: liblzma doesn't use callback API like LZMA SDK does. The caller +// must make sure that keep_size_after is big enough for single encoding pass +// i.e. keep_size_after >= maximum number of bytes possibly needed after +// the current position between calls to lzma_lz_read(). +extern lzma_ret lzma_lz_encoder_reset(lzma_lz_encoder *lz, + lzma_allocator *allocator, + bool (*process)(lzma_coder *coder, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size), + lzma_vli uncompressed_size, + size_t history_size, size_t additional_buffer_before, + size_t match_max_len, size_t additional_buffer_after, + lzma_match_finder match_finder, uint32_t match_finder_cycles, + const uint8_t *preset_dictionary, + size_t preset_dictionary_size); + +/// Frees memory allocated for in window and match finder buffers. +extern void lzma_lz_encoder_end( + lzma_lz_encoder *lz, lzma_allocator *allocator); + +extern lzma_ret lzma_lz_encode(lzma_coder *coder, + lzma_allocator *allocator lzma_attribute((unused)), + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + lzma_action action); + +/// This should not be called directly, but only via move_pos() macro. +extern void lzma_lz_encoder_normalize(lzma_lz_encoder *lz); + +#endif diff --git a/src/liblzma/lz/lz_encoder_private.h b/src/liblzma/lz/lz_encoder_private.h new file mode 100644 index 00000000..638fcb2d --- /dev/null +++ b/src/liblzma/lz/lz_encoder_private.h @@ -0,0 +1,40 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_private.h +/// \brief Private definitions for LZ encoder +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_PRIVATE_H +#define LZMA_LZ_ENCODER_PRIVATE_H + +#include "lz_encoder.h" + +/// Value used to indicate unused slot +#define EMPTY_HASH_VALUE 0 + +/// When the dictionary and hash variables need to be adjusted to prevent +/// integer overflows. Since we use uint32_t to store the offsets, half +/// of it is the biggest safe limit. +#define MAX_VAL_FOR_NORMALIZE (UINT32_MAX / 2) + + +struct lzma_coder_s { + lzma_next_coder next; + lzma_lz_encoder lz; +}; + +#endif diff --git a/src/liblzma/lz/match_c.h b/src/liblzma/lz/match_c.h new file mode 100644 index 00000000..68766385 --- /dev/null +++ b/src/liblzma/lz/match_c.h @@ -0,0 +1,401 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file match_c.h +/// \brief Template for different match finders +/// +/// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file +/// sets slighly different #defines, resulting the different match finders. +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +////////////// +// Includes // +////////////// + +#include "check.h" + + +/////////////// +// Constants // +/////////////// + +#define START_MAX_LEN 1 + +#ifdef HASH_ARRAY_2 +# define NUM_HASH_DIRECT_BYTES 0 +# define HASH_2_SIZE (1 << 10) +# ifdef HASH_ARRAY_3 +# define NUM_HASH_BYTES 4 +# define HASH_3_SIZE (1 << 16) +# define HASH_3_OFFSET HASH_2_SIZE +# define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) +# else +# define NUM_HASH_BYTES 3 +# define FIX_HASH_SIZE HASH_2_SIZE +# endif +# define HASH_SIZE 0 +# define MIN_MATCH_CHECK NUM_HASH_BYTES +#else +# define NUM_HASH_DIRECT_BYTES 2 +# define NUM_HASH_BYTES 2 +# define HASH_SIZE (1 << (8 * NUM_HASH_BYTES)) +# define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1) +# define FIX_HASH_SIZE 0 +#endif + + +//////////// +// Macros // +//////////// + +#ifdef HASH_ARRAY_2 +# ifdef HASH_ARRAY_3 +# define HASH_CALC() \ + do { \ + const uint32_t temp = lzma_crc32_table[0][ \ + cur[0]] ^ cur[1]; \ + hash_2_value = temp & (HASH_2_SIZE - 1); \ + hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ + & (HASH_3_SIZE - 1); \ + hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ + ^ (lzma_crc32_table[0][cur[3]] << 5)) \ + & lz->hash_mask; \ + } while (0) +# else +# define HASH_CALC() \ + do { \ + const uint32_t temp = lzma_crc32_table[0][ \ + cur[0]] ^ cur[1]; \ + hash_2_value = temp & (HASH_2_SIZE - 1); \ + hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ + & lz->hash_mask; \ + } while (0) +# endif +#else +# define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8) +#endif + + +// Moves the current read position forward by one byte. In LZMA SDK, +// CLZInWindow::MovePos() can read more input data if needed, because of +// the callback style API. In liblzma we must have ensured earlier, that +// there is enough data available in lz->buffer. +#define move_pos() \ +do { \ + if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \ + lz->cyclic_buffer_pos = 0; \ + ++lz->read_pos; \ + assert(lz->read_pos <= lz->write_pos); \ + if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \ + lzma_lz_encoder_normalize(lz); \ +} while (0) + + +////////////////////// +// Global constants // +////////////////////// + +LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE; +LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE; + + +/////////////////// +// API functions // +/////////////////// + +LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER) +{ + uint32_t len_limit; + if (lz->read_pos + lz->match_max_len <= lz->write_pos) { + len_limit = lz->match_max_len; + } else { + assert(lz->stream_end_was_reached); + len_limit = lz->write_pos - lz->read_pos; + if (len_limit < MIN_MATCH_CHECK) { + distances[0] = 0; + move_pos(); + return; + } + } + + int32_t offset = 1; + const uint32_t match_min_pos + = lz->read_pos + lz->offset > lz->cyclic_buffer_size + ? lz->read_pos + lz->offset - lz->cyclic_buffer_size + : 0; + const uint8_t *cur = lz->buffer + lz->read_pos; + uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size + +#ifdef HASH_ARRAY_2 + uint32_t hash_2_value; +# ifdef HASH_ARRAY_3 + uint32_t hash_3_value; +# endif +#endif + uint32_t hash_value; + HASH_CALC(); + + uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; +#ifdef HASH_ARRAY_2 + uint32_t cur_match2 = lz->hash[hash_2_value]; +# ifdef HASH_ARRAY_3 + uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value]; +# endif + lz->hash[hash_2_value] = lz->read_pos + lz->offset; + + if (cur_match2 > match_min_pos) { + if (lz->buffer[cur_match2 - lz->offset] == cur[0]) { + max_len = 2; + distances[offset++] = 2; + distances[offset++] = lz->read_pos + lz->offset + - cur_match2 - 1; + } + } + +# ifdef HASH_ARRAY_3 + lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset; + if (cur_match3 > match_min_pos) { + if (lz->buffer[cur_match3 - lz->offset] == cur[0]) { + if (cur_match3 == cur_match2) + offset -= 2; + + max_len = 3; + distances[offset++] = 3; + distances[offset++] = lz->read_pos + lz->offset + - cur_match3 - 1; + cur_match2 = cur_match3; + } + } +# endif + + if (offset != 1 && cur_match2 == cur_match) { + offset -= 2; + max_len = START_MAX_LEN; + } +#endif + + lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset; + +#ifdef IS_HASH_CHAIN + lz->son[lz->cyclic_buffer_pos] = cur_match; +#else + uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; + uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); + + uint32_t len0 = NUM_HASH_DIRECT_BYTES; + uint32_t len1 = NUM_HASH_DIRECT_BYTES; +#endif + +#if NUM_HASH_DIRECT_BYTES != 0 + if (cur_match > match_min_pos) { + if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset] + != cur[NUM_HASH_DIRECT_BYTES]) { + max_len = NUM_HASH_DIRECT_BYTES; + distances[offset++] = NUM_HASH_DIRECT_BYTES; + distances[offset++] = lz->read_pos + lz->offset + - cur_match - 1; + } + } +#endif + + uint32_t count = lz->cut_value; + + while (true) { + if (cur_match <= match_min_pos || count-- == 0) { +#ifndef IS_HASH_CHAIN + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; +#endif + break; + } + + const uint32_t delta = lz->read_pos + lz->offset - cur_match; + const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos + ? lz->cyclic_buffer_pos - delta + : lz->cyclic_buffer_pos - delta + + lz->cyclic_buffer_size; + uint32_t *pair = lz->son + +#ifdef IS_HASH_CHAIN + cyclic_pos; +#else + (cyclic_pos << 1); +#endif + + const uint8_t *pb = lz->buffer + cur_match - lz->offset; + uint32_t len = +#ifdef IS_HASH_CHAIN + NUM_HASH_DIRECT_BYTES; + if (pb[max_len] == cur[max_len]) +#else + MIN(len0, len1); +#endif + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (max_len < len) { + max_len = len; + distances[offset++] = len; + distances[offset++] = delta - 1; + if (len == len_limit) { +#ifndef IS_HASH_CHAIN + *ptr1 = pair[0]; + *ptr0 = pair[1]; +#endif + break; + } + } + } + +#ifdef IS_HASH_CHAIN + cur_match = *pair; +#else + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } +#endif + } + + distances[0] = offset - 1; + + move_pos(); + + return; +} + + +LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER) +{ + do { +#ifdef IS_HASH_CHAIN + if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) { + move_pos(); + continue; + } +#else + uint32_t len_limit; + if (lz->read_pos + lz->match_max_len <= lz->write_pos) { + len_limit = lz->match_max_len; + } else { + assert(lz->stream_end_was_reached == true); + len_limit = lz->write_pos - lz->read_pos; + if (len_limit < MIN_MATCH_CHECK) { + move_pos(); + continue; + } + } + const uint32_t match_min_pos + = lz->read_pos + lz->offset > lz->cyclic_buffer_size + ? lz->read_pos + lz->offset - lz->cyclic_buffer_size + : 0; +#endif + + const uint8_t *cur = lz->buffer + lz->read_pos; + +#ifdef HASH_ARRAY_2 + uint32_t hash_2_value; +# ifdef HASH_ARRAY_3 + uint32_t hash_3_value; + uint32_t hash_value; + HASH_CALC(); + lz->hash[HASH_3_OFFSET + hash_3_value] + = lz->read_pos + lz->offset; +# else + uint32_t hash_value; + HASH_CALC(); +# endif + lz->hash[hash_2_value] = lz->read_pos + lz->offset; +#else + uint32_t hash_value; + HASH_CALC(); +#endif + + uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; + lz->hash[FIX_HASH_SIZE + hash_value] + = lz->read_pos + lz->offset; + +#ifdef IS_HASH_CHAIN + lz->son[lz->cyclic_buffer_pos] = cur_match; +#else + uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; + uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); + + uint32_t len0 = NUM_HASH_DIRECT_BYTES; + uint32_t len1 = NUM_HASH_DIRECT_BYTES; + uint32_t count = lz->cut_value; + + while (true) { + if (cur_match <= match_min_pos || count-- == 0) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + break; + } + + const uint32_t delta = lz->read_pos + + lz->offset - cur_match; + const uint32_t cyclic_pos + = delta <= lz->cyclic_buffer_pos + ? lz->cyclic_buffer_pos - delta + : lz->cyclic_buffer_pos - delta + + lz->cyclic_buffer_size; + uint32_t *pair = lz->son + (cyclic_pos << 1); + + const uint8_t *pb = lz->buffer + cur_match + - lz->offset; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + break; + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +#endif + + move_pos(); + + } while (--num != 0); + + return; +} diff --git a/src/liblzma/lz/match_h.h b/src/liblzma/lz/match_h.h new file mode 100644 index 00000000..2eae90ba --- /dev/null +++ b/src/liblzma/lz/match_h.h @@ -0,0 +1,69 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file match_h.h +/// \brief Header template for different match finders +// +// Copyright (C) 1999-2006 Igor Pavlov +// Copyright (C) 2007 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder_private.h" + + +////////////////////// +// Global constants // +////////////////////// + +#undef LZMA_HASH_SIZE +#undef LZMA_FIX_HASH_SIZE +#undef LZMA_HASH_SIZE_C +#undef LZMA_FIX_HASH_SIZE_C + +#define LZMA_HASH_SIZE(mf_name) LZMA_HASH_SIZE_C(mf_name) +#define LZMA_FIX_HASH_SIZE(mf_name) LZMA_FIX_HASH_SIZE_C(mf_name) + +#define LZMA_HASH_SIZE_C(mf_name) \ + const uint32_t LZMA_ ## mf_name ## _HASH_SIZE + +#define LZMA_FIX_HASH_SIZE_C(mf_name) \ + const uint32_t LZMA_ ## mf_name ## _FIX_HASH_SIZE + +extern LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER); +extern LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER); + + +/////////////// +// Functions // +/////////////// + +#undef LZMA_GET_MATCHES +#undef LZMA_SKIP +#undef LZMA_GET_MATCHES_C +#undef LZMA_SKIP_C + +#define LZMA_GET_MATCHES(mf_name) LZMA_GET_MATCHES_C(mf_name) +#define LZMA_SKIP(mf_name) LZMA_SKIP_C(mf_name) + +#define LZMA_GET_MATCHES_C(mf_name) \ + extern void lzma_ ## mf_name ## _get_matches( \ + lzma_lz_encoder *restrict lz, \ + uint32_t *restrict distances) + +#define LZMA_SKIP_C(mf_name) \ + extern void lzma_ ## mf_name ## _skip( \ + lzma_lz_encoder *lz, uint32_t num) + +LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER); + +LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER); |