diff options
Diffstat (limited to '')
41 files changed, 5292 insertions, 4167 deletions
diff --git a/src/liblzma/lz/lz_encoder_private.h b/debug/crc32.c index 638fcb2d..4052a863 100644 --- a/src/liblzma/lz/lz_encoder_private.h +++ b/debug/crc32.c @@ -1,10 +1,9 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file lz_encoder_private.h -/// \brief Private definitions for LZ encoder +/// \file crc32.c +/// \brief Primitive CRC32 calculation tool // -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -18,23 +17,29 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_LZ_ENCODER_PRIVATE_H -#define LZMA_LZ_ENCODER_PRIVATE_H +#include "sysdefs.h" +#include <stdio.h> -#include "lz_encoder.h" -/// Value used to indicate unused slot -#define EMPTY_HASH_VALUE 0 +int +main(void) +{ + uint32_t crc = 0; -/// When the dictionary and hash variables need to be adjusted to prevent -/// integer overflows. Since we use uint32_t to store the offsets, half -/// of it is the biggest safe limit. -#define MAX_VAL_FOR_NORMALIZE (UINT32_MAX / 2) + do { + uint8_t buf[BUFSIZ]; + const size_t size = fread(buf, 1, sizeof(buf), stdin); + crc = lzma_crc32(buf, size, crc); + } while (!ferror(stdin) && !feof(stdin)); + //printf("%08" PRIX32 "\n", crc); -struct lzma_coder_s { - lzma_next_coder next; - lzma_lz_encoder lz; -}; + // I want it little endian so it's easy to work with hex editor. + printf("%02" PRIX32 " ", crc & 0xFF); + printf("%02" PRIX32 " ", (crc >> 8) & 0xFF); + printf("%02" PRIX32 " ", (crc >> 16) & 0xFF); + printf("%02" PRIX32 " ", crc >> 24); + printf("\n"); -#endif + return 0; +} diff --git a/src/liblzma/lz/Makefile.am b/src/liblzma/lz/Makefile.am index 5c27e2f2..bf41d8e6 100644 --- a/src/liblzma/lz/Makefile.am +++ b/src/liblzma/lz/Makefile.am @@ -20,43 +20,16 @@ liblz_la_CPPFLAGS = \ liblz_la_SOURCES = -if COND_MAIN_ENCODER +if COND_ENCODER_LZ liblz_la_SOURCES += \ lz_encoder.c \ lz_encoder.h \ - lz_encoder_private.h \ - match_c.h \ - match_h.h - -if COND_MF_HC3 -liblz_la_SOURCES += hc3.c hc3.h -liblz_la_CPPFLAGS += -DHAVE_HC3 -endif - -if COND_MF_HC4 -liblz_la_SOURCES += hc4.c hc4.h -liblz_la_CPPFLAGS += -DHAVE_HC4 -endif - -if COND_MF_BT2 -liblz_la_SOURCES += bt2.c bt2.h -liblz_la_CPPFLAGS += -DHAVE_BT2 -endif - -if COND_MF_BT3 -liblz_la_SOURCES += bt3.c bt3.h -liblz_la_CPPFLAGS += -DHAVE_BT3 -endif - -if COND_MF_BT4 -liblz_la_SOURCES += bt4.c bt4.h -liblz_la_CPPFLAGS += -DHAVE_BT4 -endif - + lz_encoder_hash.h \ + lz_encoder_mf.c endif -if COND_MAIN_DECODER +if COND_DECODER_LZ liblz_la_SOURCES += \ lz_decoder.c \ lz_decoder.h diff --git a/src/liblzma/lz/bt2.c b/src/liblzma/lz/bt2.c deleted file mode 100644 index 7dc4cb80..00000000 --- a/src/liblzma/lz/bt2.c +++ /dev/null @@ -1,27 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file bt2.c -/// \brief Binary Tree 2 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "bt2.h" - -#undef IS_HASH_CHAIN -#undef HASH_ARRAY_2 -#undef HASH_ARRAY_3 - -#include "match_c.h" diff --git a/src/liblzma/lz/bt2.h b/src/liblzma/lz/bt2.h deleted file mode 100644 index 33cb52cd..00000000 --- a/src/liblzma/lz/bt2.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file bt2.h -/// \brief Binary Tree 2 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_BT2_H -#define LZMA_BT2_H - -#undef LZMA_MATCH_FINDER_NAME_LOWER -#undef LZMA_MATCH_FINDER_NAME_UPPER -#define LZMA_MATCH_FINDER_NAME_LOWER bt2 -#define LZMA_MATCH_FINDER_NAME_UPPER BT2 - -#include "match_h.h" - -#endif diff --git a/src/liblzma/lz/bt3.c b/src/liblzma/lz/bt3.c deleted file mode 100644 index d44310f3..00000000 --- a/src/liblzma/lz/bt3.c +++ /dev/null @@ -1,29 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file bt3.c -/// \brief Binary Tree 3 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "bt3.h" - -#undef IS_HASH_CHAIN -#undef HASH_ARRAY_2 -#undef HASH_ARRAY_3 - -#define HASH_ARRAY_2 - -#include "match_c.h" diff --git a/src/liblzma/lz/bt3.h b/src/liblzma/lz/bt3.h deleted file mode 100644 index 247c7e5f..00000000 --- a/src/liblzma/lz/bt3.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file bt3.h -/// \brief Binary Tree 3 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_BT3_H -#define LZMA_BT3_H - -#undef LZMA_MATCH_FINDER_NAME_LOWER -#undef LZMA_MATCH_FINDER_NAME_UPPER -#define LZMA_MATCH_FINDER_NAME_LOWER bt3 -#define LZMA_MATCH_FINDER_NAME_UPPER BT3 - -#include "match_h.h" - -#endif diff --git a/src/liblzma/lz/bt4.c b/src/liblzma/lz/bt4.c deleted file mode 100644 index 6e1042c9..00000000 --- a/src/liblzma/lz/bt4.c +++ /dev/null @@ -1,30 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file bt4.c -/// \brief Binary Tree 4 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "bt4.h" - -#undef IS_HASH_CHAIN -#undef HASH_ARRAY_2 -#undef HASH_ARRAY_3 - -#define HASH_ARRAY_2 -#define HASH_ARRAY_3 - -#include "match_c.h" diff --git a/src/liblzma/lz/bt4.h b/src/liblzma/lz/bt4.h deleted file mode 100644 index e3fcf6ac..00000000 --- a/src/liblzma/lz/bt4.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file bt4.h -/// \brief Binary Tree 4 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_BT4_H -#define LZMA_BT4_H - -#undef LZMA_MATCH_FINDER_NAME_LOWER -#undef LZMA_MATCH_FINDER_NAME_UPPER -#define LZMA_MATCH_FINDER_NAME_LOWER bt4 -#define LZMA_MATCH_FINDER_NAME_UPPER BT4 - -#include "match_h.h" - -#endif diff --git a/src/liblzma/lz/hc3.c b/src/liblzma/lz/hc3.c deleted file mode 100644 index 22b5689b..00000000 --- a/src/liblzma/lz/hc3.c +++ /dev/null @@ -1,30 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file hc3.c -/// \brief Hash Chain 3 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "hc3.h" - -#undef IS_HASH_CHAIN -#undef HASH_ARRAY_2 -#undef HASH_ARRAY_3 - -#define IS_HASH_CHAIN -#define HASH_ARRAY_2 - -#include "match_c.h" diff --git a/src/liblzma/lz/hc3.h b/src/liblzma/lz/hc3.h deleted file mode 100644 index 97be0b1d..00000000 --- a/src/liblzma/lz/hc3.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file hc3.h -/// \brief Hash Chain 3 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_HC3_H -#define LZMA_HC3_H - -#undef LZMA_MATCH_FINDER_NAME_LOWER -#undef LZMA_MATCH_FINDER_NAME_UPPER -#define LZMA_MATCH_FINDER_NAME_LOWER hc3 -#define LZMA_MATCH_FINDER_NAME_UPPER HC3 - -#include "match_h.h" - -#endif diff --git a/src/liblzma/lz/hc4.c b/src/liblzma/lz/hc4.c deleted file mode 100644 index a55cfd09..00000000 --- a/src/liblzma/lz/hc4.c +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file hc4.c -/// \brief Hash Chain 4 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "hc4.h" - -#undef IS_HASH_CHAIN -#undef HASH_ARRAY_2 -#undef HASH_ARRAY_3 - -#define IS_HASH_CHAIN -#define HASH_ARRAY_2 -#define HASH_ARRAY_3 - -#include "match_c.h" diff --git a/src/liblzma/lz/hc4.h b/src/liblzma/lz/hc4.h deleted file mode 100644 index dc072e2f..00000000 --- a/src/liblzma/lz/hc4.h +++ /dev/null @@ -1,31 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file hc4.h -/// \brief Hash Chain 4 -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_HC4_H -#define LZMA_HC4_H - -#undef LZMA_MATCH_FINDER_NAME_LOWER -#undef LZMA_MATCH_FINDER_NAME_UPPER -#define LZMA_MATCH_FINDER_NAME_LOWER hc4 -#define LZMA_MATCH_FINDER_NAME_UPPER HC4 - -#include "match_h.h" - -#endif diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index ae969d62..5c3f1d18 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -18,351 +18,142 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "lz_decoder.h" +// liblzma supports multiple LZ77-based filters. The LZ part is shared +// between these filters. The LZ code takes care of dictionary handling +// and passing the data between filters in the chain. The filter-specific +// part decodes from the input buffer to the dictionary. -/// Minimum size of allocated dictionary -#define DICT_SIZE_MIN 8192 +#include "lz_decoder.h" -/// When there is less than this amount of data available for decoding, -/// it is moved to the temporary buffer which -/// - protects from reads past the end of the buffer; and -/// - stored the incomplete data between lzma_code() calls. -/// -/// \note TEMP_LIMIT must be at least as much as -/// REQUIRED_IN_BUFFER_SIZE defined in lzma_decoder.c. -#define TEMP_LIMIT 32 -// lzma_lz_decoder.dict[] must be three times the size of TEMP_LIMIT. -// 2 * TEMP_LIMIT is used for the actual data, and the third TEMP_LIMIT -// bytes is needed for safety to allow decode_dummy() in lzma_decoder.c -// to read past end of the buffer. This way it should be both fast and simple. -#if LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT -# error LZMA_BUFFER_SIZE < 3 * TEMP_LIMIT -#endif +struct lzma_coder_s { + /// Dictionary (history buffer) + lzma_dict dict; + /// The actual LZ-based decoder e.g. LZMA + lzma_lz_decoder lz; -struct lzma_coder_s { + /// Next filter in the chain, if any. Note that LZMA and LZMA2 are + /// only allowed as the last filter, but the long-range filter in + /// future can be in the middle of the chain. lzma_next_coder next; - lzma_lz_decoder lz; - // There are more members in this structure but they are not - // visible in LZ coder. + /// True if the next filter in the chain has returned LZMA_STREAM_END. + bool next_finished; + + /// True if the LZ decoder (e.g. LZMA) has detected end of payload + /// marker. This may become true before next_finished becomes true. + bool this_finished; + + /// Temporary buffer needed when the LZ-based filter is not the last + /// filter in the chain. The output of the next filter is first + /// decoded into buffer[], which is then used as input for the actual + /// LZ-based decoder. + struct { + size_t pos; + size_t size; + uint8_t buffer[LZMA_BUFFER_SIZE]; + } temp; }; -/// - Copy as much data as possible from lz->dict[] to out[]. -/// - Update *out_pos, lz->start, and lz->end accordingly. -/// - Wrap lz-pos to the beginning of lz->dict[] if there is a danger that -/// it may go past the end of the buffer (lz->pos >= lz->must_flush_pos). -static inline bool -flush(lzma_lz_decoder *restrict lz, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size) -{ - // Flush uncompressed data from the history buffer to - // the output buffer. This is done in two phases. - - assert(lz->start <= lz->end); - - // Flush if pos < start < end. - if (lz->pos < lz->start && lz->start < lz->end) { - bufcpy(lz->dict, &lz->start, lz->end, out, out_pos, out_size); - - // If we reached end of the data in history buffer, - // wrap to the beginning. - if (lz->start == lz->end) - lz->start = 0; - } - - // Flush if start start < pos <= end. This is not as `else' for - // previous `if' because the previous one may make this one true. - if (lz->start < lz->pos) { - bufcpy(lz->dict, &lz->start, - lz->pos, out, out_pos, out_size); - - if (lz->pos >= lz->must_flush_pos) { - // Wrap the flushing position if we have - // flushed the whole history buffer. - if (lz->pos == lz->start) - lz->start = 0; - - // Wrap the write position and store to lz.end - // how much there is new data available. - lz->end = lz->pos; - lz->pos = 0; - lz->is_full = true; - } - } - - assert(lz->pos < lz->must_flush_pos); - - return *out_pos == out_size; -} - - -/// Calculate safe value for lz->limit. If no safe value can be found, -/// set lz->limit to zero. When flushing, only as little data will be -/// decoded as is needed to fill the output buffer (lowers both latency -/// and throughput). -/// -/// \return true if there is no space for new uncompressed data. -/// -static inline bool -set_limit(lzma_lz_decoder *lz, size_t out_avail, bool flushing) -{ - // Set the limit so that writing to dict[limit + match_max_len - 1] - // doesn't overwrite any unflushed data and doesn't write past the - // end of the dict buffer. - if (lz->start <= lz->pos) { - // We can fill the buffer from pos till the end - // of the dict buffer. - lz->limit = lz->must_flush_pos; - } else if (lz->pos + lz->match_max_len < lz->start) { - // There's some unflushed data between pos and end of the - // buffer. Limit so that we don't overwrite the unflushed data. - lz->limit = lz->start - lz->match_max_len; - } else { - // Buffer is too full. - lz->limit = 0; - return true; - } - - // Finetune the limit a bit if it isn't zero. - - assert(lz->limit > lz->pos); - const size_t dict_avail = lz->limit - lz->pos; - - if (lz->uncompressed_size < dict_avail) { - // Finishing a stream that doesn't have - // an end of stream marker. - lz->limit = lz->pos + lz->uncompressed_size; - - } else if (flushing && out_avail < dict_avail) { - // Flushing enabled, decoding only as little as needed to - // fill the out buffer (if there's enough input, of course). - lz->limit = lz->pos + out_avail; - } - - return lz->limit == lz->pos; -} - - -/// Takes care of wrapping the data into temporary buffer when needed, -/// and calls the actual decoder. -/// -/// \return true if error occurred -/// -static inline bool -call_process(lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t in_size) -{ - // It would be nice and simple if we could just give in[] to the - // decoder, but the requirement of zlib-like API forces us to be - // able to make *in_pos == in_size whenever there is enough output - // space. If needed, we will append a few bytes from in[] to - // a temporary buffer and decode enough to reach the part that - // was copied from in[]. Then we can continue with the real in[]. - - bool error; - const size_t dict_old_pos = coder->lz.pos; - const size_t in_avail = in_size - *in_pos; - - if (coder->lz.temp_size + in_avail < 2 * TEMP_LIMIT) { - // Copy all the available input from in[] to temp[]. - memcpy(coder->lz.temp + coder->lz.temp_size, - in + *in_pos, in_avail); - coder->lz.temp_size += in_avail; - *in_pos += in_avail; - assert(*in_pos == in_size); - - // Decode as much as possible. - size_t temp_used = 0; - error = coder->lz.process(coder, coder->lz.temp, &temp_used, - coder->lz.temp_size, true); - assert(temp_used <= coder->lz.temp_size); - - // Move the remaining data to the beginning of temp[]. - coder->lz.temp_size -= temp_used; - memmove(coder->lz.temp, coder->lz.temp + temp_used, - coder->lz.temp_size); - - } else if (coder->lz.temp_size > 0) { - // Fill temp[] unless it is already full because we aren't - // the last filter in the chain. - size_t copy_size = 0; - if (coder->lz.temp_size < 2 * TEMP_LIMIT) { - assert(*in_pos < in_size); - copy_size = 2 * TEMP_LIMIT - coder->lz.temp_size; - memcpy(coder->lz.temp + coder->lz.temp_size, - in + *in_pos, copy_size); - // NOTE: We don't update lz.temp_size or *in_pos yet. - } - - size_t temp_used = 0; - error = coder->lz.process(coder, coder->lz.temp, &temp_used, - coder->lz.temp_size + copy_size, false); - - if (temp_used < coder->lz.temp_size) { - // Only very little input data was consumed. Move - // the unprocessed data to the beginning temp[]. - coder->lz.temp_size += copy_size - temp_used; - memmove(coder->lz.temp, coder->lz.temp + temp_used, - coder->lz.temp_size); - *in_pos += copy_size; - assert(*in_pos <= in_size); - - } else { - // We were able to decode so much data that next time - // we can decode directly from in[]. That is, we can - // consider temp[] to be empty now. - *in_pos += temp_used - coder->lz.temp_size; - coder->lz.temp_size = 0; - assert(*in_pos <= in_size); - } - - } else { - // Decode directly from in[]. - error = coder->lz.process(coder, in, in_pos, in_size, false); - assert(*in_pos <= in_size); - } - - assert(coder->lz.pos >= dict_old_pos); - if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { - // Update uncompressed size. - coder->lz.uncompressed_size -= coder->lz.pos - dict_old_pos; - - // Check that End of Payload Marker hasn't been detected - // since it must not be present because uncompressed size - // is known. - if (coder->lz.eopm_detected) - error = true; - } - - return error; -} - - static lzma_ret decode_buffer(lzma_coder *coder, const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - bool flushing) + size_t *restrict out_pos, size_t out_size) { - bool stop = false; - while (true) { - // Flush from coder->lz.dict to out[]. - flush(&coder->lz, out, out_pos, out_size); - - // All done? - if (*out_pos == out_size - || stop - || coder->lz.eopm_detected - || coder->lz.uncompressed_size == 0) - break; - - // Set write limit in the dictionary. - if (set_limit(&coder->lz, out_size - *out_pos, flushing)) - break; - - // Decode more data. - if (call_process(coder, in, in_pos, in_size)) - return LZMA_DATA_ERROR; - - // Set stop to true if we must not call call_process() again - // during this function call. - // FIXME: Can this make the loop exist too early? It wouldn't - // cause data corruption so not a critical problem. It can - // happen if dictionary gets full and lz.temp still contains - // a few bytes data that we could decode right now. - if (*in_pos == in_size && coder->lz.temp_size <= TEMP_LIMIT - && coder->lz.pos < coder->lz.limit) - stop = true; + // Wrap the dictionary if needed. + if (coder->dict.pos == coder->dict.size) + coder->dict.pos = 0; + + // Store the current dictionary position. It is needed to know + // where to start copying to the out[] buffer. + const size_t dict_start = coder->dict.pos; + + // Calculate how much we allow the process() function to + // decode. It must not decode past the end of the dictionary + // buffer, and we don't want it to decode more than is + // actually needed to fill the out[] buffer. + coder->dict.limit = coder->dict.pos + MIN(out_size - *out_pos, + coder->dict.size - coder->dict.pos); + + // Call the process() function to do the actual decoding. + const lzma_ret ret = coder->lz.code( + coder->lz.coder, &coder->dict, + in, in_pos, in_size); + + // Copy the decoded data from the dictionary to the out[] + // buffer. + const size_t copy_size = coder->dict.pos - dict_start; + assert(copy_size <= out_size - *out_pos); + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; + + // Return if everything got decoded or an error occurred, or + // if there's no more data to decode. + if (ret != LZMA_OK || *out_pos == out_size + || coder->dict.pos < coder->dict.size) + return ret; } - - // If we have decoded everything (EOPM detected or uncompressed_size - // bytes were processed) to the history buffer, and also flushed - // everything from the history buffer, our job is done. - if ((coder->lz.eopm_detected - || coder->lz.uncompressed_size == 0) - && coder->lz.start == coder->lz.pos) - return LZMA_STREAM_END; - - return LZMA_OK; } -extern lzma_ret -lzma_lz_decode(lzma_coder *coder, +static lzma_ret +lz_decode(lzma_coder *coder, lzma_allocator *allocator lzma_attribute((unused)), const uint8_t *restrict in, size_t *restrict in_pos, size_t in_size, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - if (coder->next.code == NULL) { - const lzma_ret ret = decode_buffer(coder, in, in_pos, in_size, - out, out_pos, out_size, - action == LZMA_SYNC_FLUSH); - - if (*out_pos == out_size || ret == LZMA_STREAM_END) { - // Unread to make coder->temp[] empty. This is easy, - // because we know that all the data currently in - // coder->temp[] has been copied form in[] during this - // call to the decoder. - // - // If we didn't do this, we could have data left in - // coder->temp[] when end of stream is reached. That - // data could be left there from *previous* call to - // the decoder; in that case we wouldn't know where - // to put that data. - assert(*in_pos >= coder->lz.temp_size); - *in_pos -= coder->lz.temp_size; - coder->lz.temp_size = 0; - } - - return ret; - } + if (coder->next.code == NULL) + return decode_buffer(coder, in, in_pos, in_size, + out, out_pos, out_size); // We aren't the last coder in the chain, we need to decode // our input to a temporary buffer. - const bool flushing = action == LZMA_SYNC_FLUSH; while (*out_pos < out_size) { - if (!coder->lz.next_finished - && coder->lz.temp_size < LZMA_BUFFER_SIZE) { + // Fill the temporary buffer if it is empty. + if (!coder->next_finished + && coder->temp.pos == coder->temp.size) { + coder->temp.pos = 0; + coder->temp.size = 0; + const lzma_ret ret = coder->next.code( coder->next.coder, allocator, in, in_pos, in_size, - coder->lz.temp, &coder->lz.temp_size, + coder->temp.buffer, &coder->temp.size, LZMA_BUFFER_SIZE, action); if (ret == LZMA_STREAM_END) - coder->lz.next_finished = true; - else if (coder->lz.temp_size < LZMA_BUFFER_SIZE - || ret != LZMA_OK) + coder->next_finished = true; + else if (ret != LZMA_OK || coder->temp.size == 0) return ret; } - if (coder->lz.this_finished) { - if (coder->lz.temp_size != 0) + if (coder->this_finished) { + if (coder->temp.size != 0) return LZMA_DATA_ERROR; - if (coder->lz.next_finished) + if (coder->next_finished) return LZMA_STREAM_END; return LZMA_OK; } - size_t dummy = 0; - const lzma_ret ret = decode_buffer(coder, NULL, &dummy, 0, - out, out_pos, out_size, flushing); + const lzma_ret ret = decode_buffer(coder, coder->temp.buffer, + &coder->temp.pos, coder->temp.size, + out, out_pos, out_size); if (ret == LZMA_STREAM_END) - coder->lz.this_finished = true; + coder->this_finished = true; else if (ret != LZMA_OK) return ret; - else if (coder->lz.next_finished && *out_pos < out_size) + else if (coder->next_finished && *out_pos < out_size) return LZMA_DATA_ERROR; } @@ -370,94 +161,104 @@ lzma_lz_decode(lzma_coder *coder, } -/// \brief Initializes LZ part of the LZMA decoder or Inflate -/// -/// \param history_size Number of bytes the LZ out window is -/// supposed keep available from the output -/// history. -/// \param match_max_len Number of bytes a single decoding loop -/// can advance the write position (lz->pos) -/// in the history buffer (lz->dict). -/// -/// \note This function is called by LZMA decoder and Inflate init()s. -/// It's up to those functions allocate *lz and initialize it -/// with LZMA_LZ_DECODER_INIT. +static void +lz_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_next_end(&coder->next, allocator); + lzma_free(coder->dict.buf, allocator); + + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + + lzma_free(coder, allocator); + return; +} + + extern lzma_ret -lzma_lz_decoder_reset(lzma_lz_decoder *lz, lzma_allocator *allocator, - bool (*process)(lzma_coder *restrict coder, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, bool has_safe_buffer), - size_t history_size, size_t match_max_len) +lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + size_t *dict_size)) { - // Known uncompressed size is used only with LZMA_Alone files so we - // set it always to unknown by default. - lz->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN; - - // Limit the history size to roughly sane values. This is primarily - // to prevent integer overflows. - if (history_size > UINT32_MAX / 2) - return LZMA_HEADER_ERROR; - - // Store the value actually requested. We use it for sanity checks - // when repeating data from the history buffer. - lz->requested_size = history_size; - - // Avoid tiny history buffer sizes for performance reasons. - // TODO: Test if this actually helps... - if (history_size < DICT_SIZE_MIN) - history_size = DICT_SIZE_MIN; - - // The real size of the history buffer is a bit bigger than - // requested by our caller. This allows us to do some optimizations, - // which help not only speed but simplicity of the code; specifically, - // we can make sure that there is always at least match_max_len - // bytes immediatelly available for writing without a need to wrap - // the history buffer. - const size_t dict_real_size = history_size + 2 * match_max_len + 1; - - // Reallocate memory if needed. - if (history_size != lz->size || match_max_len != lz->match_max_len) { - // Destroy the old buffer. - lzma_lz_decoder_end(lz, allocator); - - lz->size = history_size; - lz->match_max_len = match_max_len; - lz->must_flush_pos = history_size + match_max_len + 1; - - lz->dict = lzma_alloc(dict_real_size, allocator); - if (lz->dict == NULL) + // Allocate the base structure if it isn't already allocated. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) return LZMA_MEM_ERROR; + + next->code = &lz_decode; + next->end = &lz_decoder_end; + + next->coder->dict.buf = NULL; + next->coder->dict.size = 0; + next->coder->lz = LZMA_LZ_DECODER_INIT; + next->coder->next = LZMA_NEXT_CODER_INIT; } - // Reset the variables so that lz_get_byte(lz, 0) will return '\0'. - lz->pos = 0; - lz->start = 0; - lz->end = dict_real_size; - lz->dict[dict_real_size - 1] = 0; - lz->is_full = false; - lz->eopm_detected = false; - lz->next_finished = false; - lz->this_finished = false; - lz->temp_size = 0; - - // Clean up the temporary buffer to make it very sure that there are - // no information leaks when multiple steams are decoded with the - // same decoder structures. - memzero(lz->temp, LZMA_BUFFER_SIZE); - - // Set the process function pointer. - lz->process = process; + // Allocate and initialize the LZ-based decoder. It will also give + // us the dictionary size. + size_t dict_size; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &dict_size)); + + // If the dictionary size is very small, increase it to 4096 bytes. + // This is to prevent constant wrapping of the dictionary, which + // would slow things down. The downside is that since we don't check + // separately for the real dictionary size, we may happily accept + // corrupt files. + if (dict_size < 4096) + dict_size = 4096; + + // Make dictionary size a multipe of 16. Some LZ-based decoders like + // LZMA use the lowest bits lzma_dict.pos to know the alignment of the + // data. Aligned buffer is also good when memcpying from the + // dictionary to the output buffer, since applications are + // recommended to give aligned buffers to liblzma. + // + // Avoid integer overflow. FIXME Should the return value be + // LZMA_HEADER_ERROR or LZMA_MEM_ERROR? + if (dict_size > SIZE_MAX - 15) + return LZMA_MEM_ERROR; + + dict_size = (dict_size + 15) & (SIZE_MAX - 15); + + // Allocate and initialize the dictionary. + if (next->coder->dict.size != dict_size) { + lzma_free(next->coder->dict.buf, allocator); + next->coder->dict.buf = lzma_alloc(dict_size, allocator); + if (next->coder->dict.buf == NULL) + return LZMA_MEM_ERROR; - return LZMA_OK; + next->coder->dict.size = dict_size; + } + + dict_reset(&next->coder->dict); + + // Miscellaneous initializations + next->coder->next_finished = false; + next->coder->this_finished = false; + next->coder->temp.pos = 0; + next->coder->temp.size = 0; + + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); +} + + +extern uint64_t +lzma_lz_decoder_memusage(size_t dictionary_size) +{ + return sizeof(lzma_coder) + (uint64_t)(dictionary_size); } extern void -lzma_lz_decoder_end(lzma_lz_decoder *lz, lzma_allocator *allocator) +lzma_lz_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) { - lzma_free(lz->dict, allocator); - lz->dict = NULL; - lz->size = 0; - lz->match_max_len = 0; - return; + coder->lz.set_uncompressed(coder->lz.coder, uncompressed_size); } diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index 1acf9831..d2a77ba4 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -18,201 +18,215 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_LZ_OUT_H -#define LZMA_LZ_OUT_H +#ifndef LZMA_LZ_DECODER_H +#define LZMA_LZ_DECODER_H #include "common.h" -/// Get a byte from the history buffer. -#define lz_get_byte(lz, distance) \ - ((distance) < (lz).pos \ - ? (lz).dict[(lz).pos - (distance) - 1] \ - : (lz).dict[(lz).pos - (distance) - 1 + (lz).end]) - - -/// Test if dictionary is empty. -#define lz_is_empty(lz) \ - ((lz).pos == 0 && !(lz).is_full) - - -#define LZMA_LZ_DECODER_INIT \ - (lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 } - - typedef struct { - /// Function to do the actual decoding (LZMA or Inflate) - bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t size_in, - bool has_safe_buffer); + /// Pointer to the dictionary buffer. It can be an allocated buffer + /// internal to liblzma, or it can a be a buffer given by the + /// application when in single-call mode (not implemented yet). + uint8_t *buf; - /// Pointer to dictionary (history) buffer. - /// \note Not 'restrict' because can alias next_out. - uint8_t *dict; - - /// Next write goes to dict[pos]. + /// Write position in dictionary. The next byte will be written to + /// buf[pos]. size_t pos; - /// Next byte to flush is buffer[start]. - size_t start; - - /// First byte to not flush is buffer[end]. - size_t end; + /// Indicates how full the dictionary is. This is used by + /// dict_is_distance_valid() to detect corrupt files that would + /// read beyond the beginning of the dictionary. + size_t full; - /// First position to which data must not be written. + /// Write limit size_t limit; - /// True if dictionary has needed wrapping. - bool is_full; - - /// True if process() has detected End of Payload Marker. - bool eopm_detected; + /// Size of the dictionary + size_t size; - /// True if the next coder in the chain has returned LZMA_STREAM_END. - bool next_finished; +} lzma_dict; - /// True if the LZ decoder (e.g. LZMA) has detected End of Payload - /// Marker. This may become true before next_finished becomes true. - bool this_finished; - /// When pos >= must_flush_pos, we must not call process(). - size_t must_flush_pos; +typedef struct { + /// Data specific to the LZ-based decoder + lzma_coder *coder; - /// Maximum number of bytes that a single decoding loop inside - /// process() can produce data into dict. This amount is kept - /// always available at dict + pos i.e. it is safe to write a byte - /// to dict[pos + match_max_len - 1]. - size_t match_max_len; + /// Function to decode from in[] to *dict + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size); - /// Number of bytes allocated to dict. - size_t size; + void (*reset)(lzma_coder *coder, const void *options); - /// Requested size of the dictionary. This is needed because we avoid - /// using extremely tiny history buffers. - size_t requested_size; + /// Set the uncompressed size + void (*set_uncompressed)(lzma_coder *coder, + lzma_vli uncompressed_size); - /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown. - lzma_vli uncompressed_size; + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); - /// Number of bytes currently in temp[]. - size_t temp_size; +} lzma_lz_decoder; - /// Temporary buffer needed when - /// 1) we cannot make the input buffer completely empty; or - /// 2) we are not the last filter in the chain. - uint8_t temp[LZMA_BUFFER_SIZE]; -} lzma_lz_decoder; +#define LZMA_LZ_DECODER_INIT \ + (lzma_lz_decoder){ \ + .coder = NULL, \ + .code = NULL, \ + .reset = NULL, \ + .set_uncompressed = NULL, \ + .end = NULL, \ + } -///////////////////////// -// Function prototypes // -///////////////////////// +extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_decoder *lz, + lzma_allocator *allocator, const void *options, + size_t *dict_size)); -extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz, - lzma_allocator *allocator, bool (*process)( - lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t in_size, - bool has_safe_buffer), - size_t history_size, size_t match_max_len); +extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); -extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action); +extern void lzma_lz_decoder_uncompressed( + lzma_coder *coder, lzma_vli uncompressed_size); -/// Deallocates the history buffer if one exists. -extern void lzma_lz_decoder_end( - lzma_lz_decoder *lz, lzma_allocator *allocator); ////////////////////// // Inline functions // ////////////////////// -// Repeat a block of data from the history. Because memcpy() is faster -// than copying byte by byte in a loop, the copying process gets split -// into three cases: -// 1. distance < length -// Source and target areas overlap, thus we can't use memcpy() -// (nor memmove()) safely. -// TODO: If this is common enough, it might be worth optimizing this -// more e.g. by checking if distance > sizeof(uint8_t*) and using -// memcpy in small chunks. -// 2. distance < pos -// This is the easiest and the fastest case. The block being copied -// is a contiguous piece in the history buffer. The buffer offset -// doesn't need wrapping. -// 3. distance >= pos -// We need to wrap the position, because otherwise we would try copying -// behind the first byte of the allocated buffer. It is possible that -// the block is fragmeneted into two pieces, thus we might need to call -// memcpy() twice. -// NOTE: The function using this macro must ensure that length is positive -// and that distance is FIXME +/// Get a byte from the history buffer. +static inline uint8_t +dict_get(const lzma_dict *const dict, const uint32_t distance) +{ + return dict->buf[dict->pos - distance - 1 + + (distance < dict->pos ? 0 : dict->size)]; +} + + +/// Test if dictionary is empty. +static inline bool +dict_is_empty(const lzma_dict *const dict) +{ + return dict->full == 0; +} + + +/// Validate the match distance +static inline bool +dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) +{ + return dict->full >= distance; +} + + +/// Repeat *len bytes at distance. static inline bool -lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length) +dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) { - // Validate offset of the block to be repeated. It doesn't - // make sense to copy data behind the beginning of the stream. - // Leaving this check away would lead to a security problem, - // in which e.g. the data of the previously decoded file(s) - // would be leaked (or whatever happens to be in unused - // part of the dictionary buffer). - if (unlikely(distance >= lz->pos && !lz->is_full)) - return false; - - // It also doesn't make sense to copy data farer than - // the dictionary size. - if (unlikely(distance >= lz->requested_size)) - return false; - - // The caller must have checked these! - assert(distance <= lz->size); - assert(length > 0); - assert(length <= lz->match_max_len); - - // Copy the amount of data requested by the decoder. - if (distance < length) { + // Don't write past the end of the dictionary. + const size_t dict_avail = dict->limit - dict->pos; + uint32_t left = MIN(dict_avail, *len); + *len -= left; + + // Repeat a block of data from the history. Because memcpy() is faster + // than copying byte by byte in a loop, the copying process gets split + // into three cases. + if (distance < left) { // Source and target areas overlap, thus we can't use - // memcpy() nor even memmove() safely. :-( - // TODO: Copying byte by byte is slow. It might be - // worth optimizing this more if this case is common. + // memcpy() nor even memmove() safely. do { - lz->dict[lz->pos] = lz_get_byte(*lz, distance); - ++lz->pos; - } while (--length > 0); + dict->buf[dict->pos] = dict_get(dict, distance); + ++dict->pos; + } while (--left > 0); - } else if (distance < lz->pos) { + } else if (distance < dict->pos) { // The easiest and fastest case - memcpy(lz->dict + lz->pos, - lz->dict + lz->pos - distance - 1, - length); - lz->pos += length; + memcpy(dict->buf + dict->pos, + dict->buf + dict->pos - distance - 1, + left); + dict->pos += left; } else { // The bigger the dictionary, the more rare this // case occurs. We need to "wrap" the dict, thus // we might need two memcpy() to copy all the data. - assert(lz->is_full); - const uint32_t copy_pos = lz->pos - distance - 1 + lz->end; - uint32_t copy_size = lz->end - copy_pos; + assert(dict->full == dict->size); + const uint32_t copy_pos + = dict->pos - distance - 1 + dict->size; + uint32_t copy_size = dict->size - copy_pos; - if (copy_size < length) { - memcpy(lz->dict + lz->pos, lz->dict + copy_pos, + if (copy_size < left) { + memcpy(dict->buf + dict->pos, dict->buf + copy_pos, copy_size); - lz->pos += copy_size; - copy_size = length - copy_size; - memcpy(lz->dict + lz->pos, lz->dict, copy_size); - lz->pos += copy_size; + dict->pos += copy_size; + copy_size = left - copy_size; + memcpy(dict->buf + dict->pos, dict->buf, copy_size); + dict->pos += copy_size; } else { - memcpy(lz->dict + lz->pos, lz->dict + copy_pos, - length); - lz->pos += length; + memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + left); + dict->pos += left; } } - return true; + // Update how full the dictionary is. + if (dict->full < dict->pos) + dict->full = dict->pos; + + return unlikely(*len != 0); +} + + +/// Puts one byte into the dictionary. Returns true if the dictionary was +/// already full and the byte couldn't be added. +static inline bool +dict_put(lzma_dict *dict, uint8_t byte) +{ + if (unlikely(dict->pos == dict->limit)) + return true; + + dict->buf[dict->pos++] = byte; + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return false; +} + + +/// Copies arbitrary amount of data into the dictionary. +static inline void +dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size, + size_t *restrict left) +{ + // NOTE: If we are being given more data than the size of the + // dictionary, it could be possible to optimize the LZ decoder + // so that not everything needs to go through the dictionary. + // This shouldn't be very common thing in practice though, and + // the slowdown of one extra memcpy() isn't bad compared to how + // much time it would have taken if the data were compressed. + + if (in_size - *in_pos > *left) + in_size = *in_pos + *left; + + *left -= lzma_bufcpy(in, in_pos, in_size, + dict->buf, &dict->pos, dict->limit); + + if (dict->pos > dict->full) + dict->full = dict->pos; + + return; +} + + +static inline void +dict_reset(lzma_dict *dict) +{ + dict->pos = 0; + dict->full = 0; + dict->buf[dict->size - 1] = '\0'; } #endif diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 82b9103f..d5f84826 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -3,8 +3,8 @@ /// \file lz_encoder.c /// \brief LZ in window // -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -18,496 +18,492 @@ // /////////////////////////////////////////////////////////////////////////////// -#include "lz_encoder_private.h" +#include "lz_encoder.h" +#include "lz_encoder_hash.h" -// Hash Chains -#ifdef HAVE_HC3 -# include "hc3.h" -#endif -#ifdef HAVE_HC4 -# include "hc4.h" -#endif -// Binary Trees -#ifdef HAVE_BT2 -# include "bt2.h" -#endif -#ifdef HAVE_BT3 -# include "bt3.h" -#endif -#ifdef HAVE_BT4 -# include "bt4.h" -#endif +struct lzma_coder_s { + /// LZ-based encoder e.g. LZMA + lzma_lz_encoder lz; + /// History buffer and match finder + lzma_mf mf; -/// This is needed in two places so provide a macro. -#define get_cyclic_buffer_size(history_size) ((history_size) + 1) + /// Next coder in the chain + lzma_next_coder next; +}; -/// Calculate certain match finder properties and validate the calculated -/// values. This is as its own function, because *num_items is needed to -/// calculate memory requirements in common/memory.c. -extern bool -lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, - uint32_t history_size, uint32_t *restrict hash_mask, - uint32_t *restrict hash_size_sum, uint32_t *restrict num_items) +/// \brief Moves the data in the input window to free space for new data +/// +/// mf->buffer is a sliding input window, which keeps mf->keep_size_before +/// bytes of input history available all the time. Now and then we need to +/// "slide" the buffer to make space for the new data to the end of the +/// buffer. At the same time, data older than keep_size_before is dropped. +/// +static void +move_window(lzma_mf *mf) { - uint32_t fix_hash_size; - uint32_t sons; + // Align the move to a multiple of 16 bytes. Some LZ-based encoders + // like LZMA use the lowest bits of mf->read_pos to know the + // alignment of the uncompressed data. We also get better speed + // for memmove() with aligned buffers. + assert(mf->read_pos > mf->keep_size_before); + const uint32_t move_offset + = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15); - switch (match_finder) { -#ifdef HAVE_HC3 - case LZMA_MF_HC3: - fix_hash_size = LZMA_HC3_FIX_HASH_SIZE; - sons = 1; - break; -#endif -#ifdef HAVE_HC4 - case LZMA_MF_HC4: - fix_hash_size = LZMA_HC4_FIX_HASH_SIZE; - sons = 1; - break; -#endif -#ifdef HAVE_BT2 - case LZMA_MF_BT2: - fix_hash_size = LZMA_BT2_FIX_HASH_SIZE; - sons = 2; - break; -#endif -#ifdef HAVE_BT3 - case LZMA_MF_BT3: - fix_hash_size = LZMA_BT3_FIX_HASH_SIZE; - sons = 2; - break; -#endif -#ifdef HAVE_BT4 - case LZMA_MF_BT4: - fix_hash_size = LZMA_BT4_FIX_HASH_SIZE; - sons = 2; - break; -#endif - default: - return true; - } + assert(mf->write_pos > move_offset); + const size_t move_size = mf->write_pos - move_offset; - uint32_t hs; + assert(move_offset + move_size <= mf->size); -#ifdef HAVE_LZMA_BT2 - if (match_finder == LZMA_BT2) { - // NOTE: hash_mask is not used by the BT2 match finder, - // but it is initialized just in case. - hs = LZMA_BT2_HASH_SIZE; - *hash_mask = 0; - } else -#endif - { - hs = history_size - 1; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - hs |= 0xFFFF; + memmove(mf->buffer, mf->buffer + move_offset, move_size); - if (hs > (UINT32_C(1) << 24)) { - if (match_finder == LZMA_MF_HC4 - || match_finder == LZMA_MF_BT4) - hs >>= 1; - else - hs = (1 << 24) - 1; - } + mf->offset += move_offset; + mf->read_pos -= move_offset; + mf->read_limit -= move_offset; + mf->write_pos -= move_offset; + + return; +} - *hash_mask = hs; - ++hs; - } - *hash_size_sum = hs + fix_hash_size; +/// \brief Tries to fill the input window (mf->buffer) +/// +/// If we are the last encoder in the chain, our input data is in in[]. +/// Otherwise we call the next filter in the chain to process in[] and +/// write its output to mf->buffer. +/// +/// This function must not be called once it has returned LZMA_STREAM_END. +/// +static lzma_ret +fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, + size_t *in_pos, size_t in_size, lzma_action action) +{ + assert(coder->mf.read_pos <= coder->mf.write_pos); - *num_items = *hash_size_sum - + get_cyclic_buffer_size(history_size) * sons; + // Move the sliding window if needed. + if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after) + move_window(&coder->mf); - return false; -} + size_t in_used; + lzma_ret ret; + if (coder->next.code == NULL) { + // Not using a filter, simply memcpy() as much as possible. + in_used = lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer, + &coder->mf.write_pos, coder->mf.size); + ret = action != LZMA_RUN && *in_pos == in_size + ? LZMA_STREAM_END : LZMA_OK; -extern lzma_ret -lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, - bool (*process)(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size), - size_t history_size, size_t additional_buffer_before, - size_t match_max_len, size_t additional_buffer_after, - lzma_match_finder match_finder, uint32_t match_finder_cycles, - const uint8_t *preset_dictionary, - size_t preset_dictionary_size) -{ - lz->sequence = SEQ_RUN; + } else { + const size_t in_start = *in_pos; + ret = coder->next.code(coder->next.coder, allocator, + in, in_pos, in_size, + coder->mf.buffer, &coder->mf.write_pos, + coder->mf.size, action); + in_used = *in_pos - in_start; + } - /////////////// - // In Window // - /////////////// + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. + if (ret == LZMA_STREAM_END) { + assert(*in_pos == in_size); + ret = LZMA_OK; + coder->mf.action = action; + coder->mf.read_limit = coder->mf.write_pos; - // Validate history size. - if (history_size < LZMA_DICTIONARY_SIZE_MIN - || history_size > LZMA_DICTIONARY_SIZE_MAX) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; + } else if (coder->mf.write_pos > coder->mf.keep_size_after) { + // This needs to be done conditionally, because if we got + // only little new input, there may be too little input + // to do any encoding yet. + coder->mf.read_limit = coder->mf.write_pos + - coder->mf.keep_size_after; } - assert(history_size <= MAX_VAL_FOR_NORMALIZE - 256); - assert(LZMA_DICTIONARY_SIZE_MAX <= MAX_VAL_FOR_NORMALIZE - 256); + // Restart the match finder after finished LZMA_SYNC_FLUSH. + if (coder->mf.pending > 0 + && coder->mf.read_pos < coder->mf.read_limit) { + // Match finder may update coder->pending and expects it to + // start from zero, so use a temporary variable. + const size_t pending = coder->mf.pending; + coder->mf.pending = 0; - // Calculate the size of the history buffer to allocate. - // TODO: Get a reason for magic constant of 256. - const size_t size_reserv = (history_size + additional_buffer_before - + match_max_len + additional_buffer_after) / 2 + 256; + // Rewind read_pos so that the match finder can hash + // the pending bytes. + assert(coder->mf.read_pos >= pending); + coder->mf.read_pos -= pending; - lz->keep_size_before = history_size + additional_buffer_before; - lz->keep_size_after = match_max_len + additional_buffer_after; + // Call the skip function directly instead of using + // lz_dict_skip(), since we don't want to touch + // mf->read_ahead. + coder->mf.skip(&coder->mf, pending); + } - const size_t buffer_size = lz->keep_size_before + lz->keep_size_after - + size_reserv; + return ret; +} - // Allocate history buffer if its size has changed. - if (buffer_size != lz->size) { - lzma_free(lz->buffer, allocator); - lz->buffer = lzma_alloc(buffer_size, allocator); - if (lz->buffer == NULL) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_MEM_ERROR; + +static lzma_ret +lz_encode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, lzma_action action) +{ + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->mf.buffer if needed. + if (coder->mf.action == LZMA_RUN && coder->mf.read_pos + >= coder->mf.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + + // Encode + const lzma_ret ret = coder->lz.code(coder->lz.coder, + &coder->mf, out, out_pos, out_size); + if (ret != LZMA_OK) { + // Setting this to LZMA_RUN for cases when we are + // flushing. It doesn't matter when finishing or if + // an error occurred. + coder->mf.action = LZMA_RUN; + return ret; } } - // Allocation successful. Store the new size. - lz->size = buffer_size; + return LZMA_OK; +} + + +static bool +lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) +{ + if (lz_options->dictionary_size < LZMA_DICTIONARY_SIZE_MIN + || lz_options->dictionary_size + > LZMA_DICTIONARY_SIZE_MAX + || lz_options->find_len_max + > lz_options->match_len_max) + return true; + + mf->keep_size_before = lz_options->before_size + + lz_options->dictionary_size; - // Reset in window variables. - lz->offset = 0; - lz->read_pos = 0; - lz->read_limit = 0; - lz->write_pos = 0; - lz->pending = 0; + mf->keep_size_after = lz_options->after_size + + lz_options->match_len_max; + // To avoid constant memmove()s, allocate some extra space. Since + // memmove()s become more expensive when the size of the buffer + // increases, we reserve more space when a large dictionary is + // used to make the memmove() calls rarer. + uint32_t reserve = lz_options->dictionary_size / 2; + if (reserve > (UINT32_C(1) << 30)) + reserve /= 2; - ////////////////// - // Match Finder // - ////////////////// + reserve += (lz_options->before_size + lz_options->match_len_max + + lz_options->after_size) / 2 + (UINT32_C(1) << 19); - // Validate match_finder, set function pointers and a few match - // finder specific variables. - switch (match_finder) { -#ifdef HAVE_HC3 + const uint32_t old_size = mf->size; + mf->size = mf->keep_size_before + reserve + mf->keep_size_after; + + // FIXME Integer overflows + + // Deallocate the old history buffer if it exists but has different + // size than what is needed now. + if (mf->buffer != NULL && old_size != mf->size) { + lzma_free(mf->buffer, allocator); + mf->buffer = NULL; + } + + // Match finder options + mf->match_len_max = lz_options->match_len_max; + mf->find_len_max = lz_options->find_len_max; + mf->cyclic_buffer_size = lz_options->dictionary_size + 1; + + // Validate the match finder ID and setup the function pointers. + switch (lz_options->match_finder) { +#ifdef HAVE_MF_HC3 case LZMA_MF_HC3: - lz->get_matches = &lzma_hc3_get_matches; - lz->skip = &lzma_hc3_skip; - lz->cut_value = 8 + (match_max_len >> 2); + mf->find = &lzma_mf_hc3_find; + mf->skip = &lzma_mf_hc3_skip; break; #endif -#ifdef HAVE_HC4 +#ifdef HAVE_MF_HC4 case LZMA_MF_HC4: - lz->get_matches = &lzma_hc4_get_matches; - lz->skip = &lzma_hc4_skip; - lz->cut_value = 8 + (match_max_len >> 2); + mf->find = &lzma_mf_hc4_find; + mf->skip = &lzma_mf_hc4_skip; break; #endif -#ifdef HAVE_BT2 +#ifdef HAVE_MF_BT2 case LZMA_MF_BT2: - lz->get_matches = &lzma_bt2_get_matches; - lz->skip = &lzma_bt2_skip; - lz->cut_value = 16 + (match_max_len >> 1); + mf->find = &lzma_mf_bt2_find; + mf->skip = &lzma_mf_bt2_skip; break; #endif -#ifdef HAVE_BT3 +#ifdef HAVE_MF_BT3 case LZMA_MF_BT3: - lz->get_matches = &lzma_bt3_get_matches; - lz->skip = &lzma_bt3_skip; - lz->cut_value = 16 + (match_max_len >> 1); + mf->find = &lzma_mf_bt3_find; + mf->skip = &lzma_mf_bt3_skip; break; #endif -#ifdef HAVE_BT4 +#ifdef HAVE_MF_BT4 case LZMA_MF_BT4: - lz->get_matches = &lzma_bt4_get_matches; - lz->skip = &lzma_bt4_skip; - lz->cut_value = 16 + (match_max_len >> 1); + mf->find = &lzma_mf_bt4_find; + mf->skip = &lzma_mf_bt4_skip; break; #endif + default: - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; + return true; } - // Check if we have been requested to use a non-default cut_value. - if (match_finder_cycles > 0) - lz->cut_value = match_finder_cycles; - - lz->match_max_len = match_max_len; - lz->cyclic_buffer_size = get_cyclic_buffer_size(history_size); + // Calculate the sizes of mf->hash and mf->son. + const uint32_t hash_bytes = lz_options->match_finder & 0x0F; + const bool is_bt = (lz_options->match_finder & 0x10) != 0; + uint32_t hs; - uint32_t hash_size_sum; - uint32_t num_items; - if (lzma_lz_encoder_hash_properties(match_finder, history_size, - &lz->hash_mask, &hash_size_sum, &num_items)) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; - } + if (hash_bytes == 2) { + hs = 0xFFFF; + } else { + // Round dictionary size up to the next 2^n - 1 so it can + // be used as a hash mask. + hs = lz_options->dictionary_size - 1; + hs |= hs >> 1; + hs |= hs >> 2; + hs |= hs >> 4; + hs |= hs >> 8; + hs >>= 1; + hs |= 0xFFFF; - if (num_items != lz->num_items) { -#if UINT32_MAX >= SIZE_MAX / 4 - // Check for integer overflow. (Huge dictionaries are not - // possible on 32-bit CPU.) - if (num_items > SIZE_MAX / sizeof(uint32_t)) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_MEM_ERROR; + if (hs > (UINT32_C(1) << 24)) { + if (hash_bytes == 3) + hs = (UINT32_C(1) << 24) - 1; + else + hs >>= 1; } -#endif - - const size_t size_in_bytes - = (size_t)(num_items) * sizeof(uint32_t); + } - lzma_free(lz->hash, allocator); - lz->hash = lzma_alloc(size_in_bytes, allocator); - if (lz->hash == NULL) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_MEM_ERROR; - } + mf->hash_mask = hs; + + ++hs; + if (hash_bytes > 2) + hs += HASH_2_SIZE; + if (hash_bytes > 3) + hs += HASH_3_SIZE; +/* + No match finder uses this at the moment. + if (mf->hash_bytes > 4) + hs += HASH_4_SIZE; +*/ + + const uint32_t old_count = mf->hash_size_sum + mf->sons_count; + mf->hash_size_sum = hs; + mf->sons_count = mf->cyclic_buffer_size; + if (is_bt) + mf->sons_count *= 2; + + const uint32_t new_count = mf->hash_size_sum + mf->sons_count; + + // Deallocate the old hash array if it exists and has different size + // than what is needed now. + if (mf->hash != NULL && old_count != new_count) { + lzma_free(mf->hash, allocator); + mf->hash = NULL; + } - lz->num_items = num_items; + // Maximum number of match finder cycles + mf->loops = lz_options->match_finder_cycles; + if (mf->loops == 0) { + mf->loops = 16 + (lz_options->find_len_max / 2); + if (!is_bt) + mf->loops /= 2; } - lz->son = lz->hash + hash_size_sum; + return false; +} - // Reset the hash table to empty hash values. - { - uint32_t *restrict items = lz->hash; - for (uint32_t i = 0; i < hash_size_sum; ++i) - items[i] = EMPTY_HASH_VALUE; +static bool +lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator) +{ + // Allocate the history buffer. + if (mf->buffer == NULL) { + mf->buffer = lzma_alloc(mf->size, allocator); + if (mf->buffer == NULL) + return true; } - lz->cyclic_buffer_pos = 0; + // Use cyclic_buffer_size as initial mf->offset. This allows + // avoiding a few branches in the match finders. The downside is + // that match finder needs to be normalized more often, which may + // hurt performance with huge dictionaries. + mf->offset = mf->cyclic_buffer_size; + mf->read_pos = 0; + mf->read_ahead = 0; + mf->read_limit = 0; + mf->write_pos = 0; + mf->pending = 0; - // Because zero is used as empty hash value, make the first byte - // appear at buffer[1 - offset]. - ++lz->offset; + // Allocate match finder's hash array. + const size_t alloc_count = mf->hash_size_sum + mf->sons_count; - // If we are using a preset dictionary, read it now. - // TODO: This isn't implemented yet so return LZMA_HEADER_ERROR. - if (preset_dictionary != NULL && preset_dictionary_size > 0) { - lzma_lz_encoder_end(lz, allocator); - return LZMA_HEADER_ERROR; +#if UINT32_MAX >= SIZE_MAX / 4 + // Check for integer overflow. (Huge dictionaries are not + // possible on 32-bit CPU.) + if (alloc_count > SIZE_MAX / sizeof(uint32_t)) + return true; +#endif + + if (mf->hash == NULL) { + mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t), + allocator); + if (mf->hash == NULL) + return true; } - // Set the process function pointer. - lz->process = process; + mf->son = mf->hash + mf->hash_size_sum; + mf->cyclic_buffer_pos = 0; + + // Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we + // can use memset(). +/* + for (uint32_t i = 0; i < hash_size_sum; ++i) + mf->hash[i] = EMPTY_HASH_VALUE; +*/ + memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t)); + + // We don't need to initialize mf->son, but not doing that will + // make Valgrind complain in normalization (see normalize() in + // lz_encoder_mf.c). + // + // Skipping this initialization is *very* good when big dictionary is + // used but only small amount of data gets actually compressed: most + // of the mf->hash won't get actually allocated by the kernel, so + // we avoid wasting RAM and improve initialization speed a lot. + //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); + + mf->action = LZMA_RUN; - return LZMA_OK; + return false; } -extern void -lzma_lz_encoder_end(lzma_lz_encoder *lz, lzma_allocator *allocator) +extern uint64_t +lzma_lz_encoder_memusage(const lzma_lz_options *lz_options) { - lzma_free(lz->hash, allocator); - lz->hash = NULL; - lz->num_items = 0; - - lzma_free(lz->buffer, allocator); - lz->buffer = NULL; - lz->size = 0; - - return; + // Old buffers must not exist when calling lz_encoder_prepare(). + lzma_mf mf = { + .buffer = NULL, + .hash = NULL, + }; + + // Setup the size information into mf. + if (lz_encoder_prepare(&mf, NULL, lz_options)) + return UINT64_MAX; + + // Calculate the memory usage. + return (uint64_t)(mf.hash_size_sum + mf.sons_count) + * sizeof(uint32_t) + + (uint64_t)(mf.size) + sizeof(lzma_coder); } -/// \brief Moves the data in the input window to free space for new data -/// -/// lz->buffer is a sliding input window, which keeps lz->keep_size_before -/// bytes of input history available all the time. Now and then we need to -/// "slide" the buffer to make space for the new data to the end of the -/// buffer. At the same time, data older than keep_size_before is dropped. -/// static void -move_window(lzma_lz_encoder *lz) +lz_encoder_end(lzma_coder *coder, lzma_allocator *allocator) { - // buffer[move_offset] will become buffer[0]. - assert(lz->read_pos > lz->keep_size_after); - size_t move_offset = lz->read_pos - lz->keep_size_before; - - // We need one additional byte, since move_pos() moves on 1 byte. - // TODO: Clean up? At least document more. - if (move_offset > 0) - --move_offset; - - assert(lz->write_pos > move_offset); - const size_t move_size = lz->write_pos - move_offset; + lzma_next_end(&coder->next, allocator); - assert(move_offset + move_size <= lz->size); + lzma_free(coder->mf.hash, allocator); + lzma_free(coder->mf.buffer, allocator); - memmove(lz->buffer, lz->buffer + move_offset, move_size); - - lz->offset += move_offset; - lz->read_pos -= move_offset; - lz->read_limit -= move_offset; - lz->write_pos -= move_offset; + if (coder->lz.end != NULL) + coder->lz.end(coder->lz.coder, allocator); + else + lzma_free(coder->lz.coder, allocator); + lzma_free(coder, allocator); return; } -/// \brief Tries to fill the input window (lz->buffer) -/// -/// If we are the last encoder in the chain, our input data is in in[]. -/// Otherwise we call the next filter in the chain to process in[] and -/// write its output to lz->buffer. -/// -/// This function must not be called once it has returned LZMA_STREAM_END. -/// -static lzma_ret -fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, - size_t *in_pos, size_t in_size, lzma_action action) +extern lzma_ret +lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)) { - assert(coder->lz.read_pos <= coder->lz.write_pos); + // Allocate and initialize the base data structure. + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; - // Move the sliding window if needed. - if (coder->lz.read_pos >= coder->lz.size - coder->lz.keep_size_after) - move_window(&coder->lz); + next->code = &lz_encode; + next->end = &lz_encoder_end; - size_t in_used; - lzma_ret ret; - if (coder->next.code == NULL) { - // Not using a filter, simply memcpy() as much as possible. - in_used = bufcpy(in, in_pos, in_size, coder->lz.buffer, - &coder->lz.write_pos, coder->lz.size); + next->coder->lz.coder = NULL; + next->coder->lz.code = NULL; + next->coder->lz.end = NULL; - if (action != LZMA_RUN && *in_pos == in_size) - ret = LZMA_STREAM_END; - else - ret = LZMA_OK; + next->coder->mf.buffer = NULL; + next->coder->mf.hash = NULL; - } else { - const size_t in_start = *in_pos; - ret = coder->next.code(coder->next.coder, allocator, - in, in_pos, in_size, - coder->lz.buffer, &coder->lz.write_pos, - coder->lz.size, action); - in_used = *in_pos - in_start; + next->coder->next = LZMA_NEXT_CODER_INIT; } - // If end of stream has been reached or flushing completed, we allow - // the encoder to process all the input (that is, read_pos is allowed - // to reach write_pos). Otherwise we keep keep_size_after bytes - // available as prebuffer. - if (ret == LZMA_STREAM_END) { - assert(*in_pos == in_size); - coder->lz.read_limit = coder->lz.write_pos; - ret = LZMA_OK; + // Initialize the LZ-based encoder. + lzma_lz_options lz_options; + return_if_error(lz_init(&next->coder->lz, allocator, + filters[0].options, &lz_options)); - switch (action) { - case LZMA_SYNC_FLUSH: - coder->lz.sequence = SEQ_FLUSH; - break; - - case LZMA_FINISH: - coder->lz.sequence = SEQ_FINISH; - break; - - default: - assert(0); - ret = LZMA_PROG_ERROR; - break; - } - - } else if (coder->lz.write_pos > coder->lz.keep_size_after) { - // This needs to be done conditionally, because if we got - // only little new input, there may be too little input - // to do any encoding yet. - coder->lz.read_limit = coder->lz.write_pos - - coder->lz.keep_size_after; - } - - // Restart the match finder after finished LZMA_SYNC_FLUSH. - if (coder->lz.pending > 0 - && coder->lz.read_pos < coder->lz.read_limit) { - // Match finder may update coder->pending and expects it to - // start from zero, so use a temporary variable. - const size_t pending = coder->lz.pending; - coder->lz.pending = 0; + // Setup the size information into next->coder->mf and deallocate + // old buffers if they have wrong size. + if (lz_encoder_prepare(&next->coder->mf, allocator, &lz_options)) + return LZMA_HEADER_ERROR; - // Rewind read_pos so that the match finder can hash - // the pending bytes. - assert(coder->lz.read_pos >= pending); - coder->lz.read_pos -= pending; - coder->lz.skip(&coder->lz, pending); - } + // Allocate new buffers if needed, and do the rest of + // the initialization. + if (lz_encoder_init(&next->coder->mf, allocator)) + return LZMA_MEM_ERROR; - return ret; + // Initialize the next filter in the chain, if any. + return lzma_next_filter_init(&next->coder->next, allocator, + filters + 1); } -extern lzma_ret -lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator, - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, - uint8_t *restrict out, size_t *restrict out_pos, - size_t out_size, lzma_action action) +extern LZMA_API lzma_bool +lzma_mf_is_supported(lzma_match_finder mf) { - while (*out_pos < out_size - && (*in_pos < in_size || action != LZMA_RUN)) { - // Read more data to coder->lz.buffer if needed. - if (coder->lz.sequence == SEQ_RUN - && coder->lz.read_pos >= coder->lz.read_limit) - return_if_error(fill_window(coder, allocator, - in, in_pos, in_size, action)); + bool ret = false; - // Encode - if (coder->lz.process(coder, out, out_pos, out_size)) { - // Setting this to SEQ_RUN for cases when we are - // flushing. It doesn't matter when finishing. - coder->lz.sequence = SEQ_RUN; - return action != LZMA_RUN ? LZMA_STREAM_END : LZMA_OK; - } - } +#ifdef HAVE_MF_HC3 + if (mf == LZMA_MF_HC3) + ret = true; +#endif - return LZMA_OK; -} +#ifdef HAVE_MF_HC4 + if (mf == LZMA_MF_HC4) + ret = true; +#endif +#ifdef HAVE_MF_BT2 + if (mf == LZMA_MF_BT2) + ret = true; +#endif -/// \brief Normalizes hash values -/// -/// lzma_lz_normalize is called when lz->pos hits MAX_VAL_FOR_NORMALIZE, -/// which currently happens once every 2 GiB of input data (to be exact, -/// after the first 2 GiB it happens once every 2 GiB minus dictionary_size -/// bytes). lz->pos is incremented by lzma_lz_move_pos(). -/// -/// lz->hash contains big amount of offsets relative to lz->buffer. -/// The offsets are stored as uint32_t, which is the only reasonable -/// datatype for these offsets; uint64_t would waste far too much RAM -/// and uint16_t would limit the dictionary to 64 KiB (far too small). -/// -/// When compressing files over 2 GiB, lz->buffer needs to be moved forward -/// to avoid integer overflows. We scan the lz->hash array and fix every -/// value to match the updated lz->buffer. -extern void -lzma_lz_encoder_normalize(lzma_lz_encoder *lz) -{ - const uint32_t subvalue = lz->read_pos - lz->cyclic_buffer_size; - assert(subvalue <= INT32_MAX); - - { - const uint32_t num_items = lz->num_items; - uint32_t *restrict items = lz->hash; - - for (uint32_t i = 0; i < num_items; ++i) { - // If the distance is greater than the dictionary - // size, we can simply mark the item as empty. - if (items[i] <= subvalue) - items[i] = EMPTY_HASH_VALUE; - else - items[i] -= subvalue; - } - } +#ifdef HAVE_MF_BT3 + if (mf == LZMA_MF_BT3) + ret = true; +#endif - // Update offset to match the new locations. - lz->offset -= subvalue; +#ifdef HAVE_MF_BT4 + if (mf == LZMA_MF_BT4) + ret = true; +#endif - return; + return ret; } diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h index da0e0804..45bb8462 100644 --- a/src/liblzma/lz/lz_encoder.h +++ b/src/liblzma/lz/lz_encoder.h @@ -3,8 +3,8 @@ /// \file lz_encoder.h /// \brief LZ in window and match finder API // -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -24,19 +24,16 @@ #include "common.h" -typedef struct lzma_lz_encoder_s lzma_lz_encoder; -struct lzma_lz_encoder_s { - enum { - SEQ_RUN, - SEQ_FLUSH, - SEQ_FINISH, - } sequence; +/// A table of these is used by the LZ-based encoder to hold +/// the length-distance pairs found by the match finder. +typedef struct { + uint32_t len; + uint32_t dist; +} lzma_match; - /// Function to do the actual encoding from the sliding input window - /// to the output stream. - bool (*process)(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size); +typedef struct lzma_mf_s lzma_mf; +struct lzma_mf_s { /////////////// // In Window // /////////////// @@ -46,17 +43,33 @@ struct lzma_lz_encoder_s { /// Total size of the allocated buffer (that is, including all /// the extra space) - size_t size; + uint32_t size; + + /// Number of bytes that must be kept available in our input history. + /// That is, once keep_size_before bytes have been processed, + /// buffer[read_pos - keep_size_before] is the oldest byte that + /// must be available for reading. + uint32_t keep_size_before; + + /// Number of bytes that must be kept in buffer after read_pos. + /// That is, read_pos <= write_pos - keep_size_after as long as + /// stream_end_was_reached is false (once it is true, read_pos + /// is allowed to reach write_pos). + uint32_t keep_size_after; /// Match finders store locations of matches using 32-bit integers. /// To avoid adjusting several megabytes of integers every time the /// input window is moved with move_window(), we only adjust the /// offset of the buffer. Thus, buffer[match_finder_pos - offset] /// is the byte pointed by match_finder_pos. - size_t offset; + uint32_t offset; /// buffer[read_pos] is the current byte. - size_t read_pos; + uint32_t read_pos; + + /// Number of bytes that have been ran through the match finder, but + /// which haven't been encoded by the LZ-based encoder yet. + uint32_t read_ahead; /// As long as read_pos is less than read_limit, there is enough /// input available in buffer for at least one encoding loop. @@ -64,92 +77,253 @@ struct lzma_lz_encoder_s { /// Because of the stateful API, read_limit may and will get greater /// than read_pos quite often. This is taken into account when /// calculating the value for keep_size_after. - size_t read_limit; + uint32_t read_limit; /// buffer[write_pos] is the first byte that doesn't contain valid /// uncompressed data; that is, the next input byte will be copied /// to buffer[write_pos]. - size_t write_pos; + uint32_t write_pos; /// Number of bytes not hashed before read_pos. This is needed to /// restart the match finder after LZMA_SYNC_FLUSH. - size_t pending; - - /// Number of bytes that must be kept available in our input history. - /// That is, once keep_size_before bytes have been processed, - /// buffer[read_pos - keep_size_before] is the oldest byte that - /// must be available for reading. - size_t keep_size_before; - - /// Number of bytes that must be kept in buffer after read_pos. - /// That is, read_pos <= write_pos - keep_size_after as long as - /// stream_end_was_reached is false (once it is true, read_pos - /// is allowed to reach write_pos). - size_t keep_size_after; + uint32_t pending; ////////////////// // Match Finder // ////////////////// - // Pointers to match finder functions - void (*get_matches)(lzma_lz_encoder *restrict lz, - uint32_t *restrict distances); - void (*skip)(lzma_lz_encoder *restrict lz, uint32_t num); + /// Find matches. Returns the number of distance-length pairs written + /// to the matches array. This is called only via lzma_mf_find. + uint32_t (*find)(lzma_mf *mf, lzma_match *matches); + + /// Skips num bytes. This is like find() but doesn't make the + /// distance-length pairs available, thus being a little faster. + /// This is called only via mf_skip function. + void (*skip)(lzma_mf *mf, uint32_t num); - // Match finder data - uint32_t *hash; // TODO: Check if hash aliases son - uint32_t *son; // and add 'restrict' if possible. + uint32_t *hash; + uint32_t *son; uint32_t cyclic_buffer_pos; uint32_t cyclic_buffer_size; // Must be dictionary_size + 1. uint32_t hash_mask; - uint32_t cut_value; + + /// Maximum number of loops in the match finder + uint32_t loops; + + /// Maximum length of a match that the match finder will try to find. + uint32_t find_len_max; + + /// Maximum length of a match supported by the LZ-based encoder. + /// If the longest match found by the match finder is find_len_max, + /// lz_dict_find() tries to expand it up to match_len_max bytes. + uint32_t match_len_max; + + /// When running out of input, binary tree match finders need to know + /// if it is due to flushing or finishing. The action is used also + /// by the LZ-based encoders themselves. + lzma_action action; + + /// Number of elements in hash[] uint32_t hash_size_sum; - uint32_t num_items; - uint32_t match_max_len; + + /// Number of elements in son[] + uint32_t sons_count; }; -#define LZMA_LZ_ENCODER_INIT \ - (lzma_lz_encoder){ \ - .buffer = NULL, \ - .size = 0, \ - .hash = NULL, \ - .num_items = 0, \ +typedef struct { + /// Extra amount of data to keep available before the "actual" + /// dictionary. + size_t before_size; + + /// Size of the history buffer + size_t dictionary_size; + + /// Extra amount of data to keep available after the "actual" + /// dictionary. + size_t after_size; + + /// Maximum length of a match that the LZ-based encoder can accept. + /// This is used to extend matches of length find_len_max to the + /// maximum possible length. + size_t match_len_max; + + /// Match finder will search matches of at maximum of this length. + /// This must be less than or equal to match_len_max. + size_t find_len_max; + + /// Type of the match finder to use + lzma_match_finder match_finder; + + /// TODO: Comment + uint32_t match_finder_cycles; + + /// TODO: Comment + const uint8_t *preset_dictionary; + + uint32_t preset_dictionary_size; + +} lzma_lz_options; + + +// The total usable buffer space at any moment outside the match finder: +// before_size + dictionary_size + after_size + match_len_max +// +// In reality, there's some extra space allocated to prevent the number of +// memmove() calls reasonable. The bigger the dictionary_size is, the bigger +// this extra buffer will be since with bigger dictionaries memmove() would +// also take longer. +// +// A single encoder loop in the LZ-based encoder may call the match finder +// (lz_dict_find() or lz_dict_skip()) at maximum of after_size times. +// In other words, a single encoder loop may advance lz_dict.read_pos at +// maximum of after_size times. Since matches are looked up to +// lz_dict.buffer[lz_dict.read_pos + match_len_max - 1], the total +// amount of extra buffer needed after dictionary_size becomes +// after_size + match_len_max. +// +// before_size has two uses. The first one is to keep literals available +// in cases when the LZ-based encoder has made some read ahead. +// TODO: Maybe this could be changed by making the LZ-based encoders to +// store the actual literals as they do with length-distance pairs. +// +// Alrogithms such as LZMA2 first try to compress a chunk, and then check +// if the encoded result is smaller than the uncompressed one. If the chunk +// was uncompressible, it is better to store it in uncompressed form in +// the output stream. To do this, the whole uncompressed chunk has to be +// still available in the history buffer. before_size achieves that. + + +typedef struct { + /// Data specific to the LZ-based encoder + lzma_coder *coder; + + /// Function to encode from *dict to out[] + lzma_ret (*code)(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size); + + /// Free allocated resources + void (*end)(lzma_coder *coder, lzma_allocator *allocator); + +} lzma_lz_encoder; + + +// Basic steps: +// 1. Input gets copied into the dictionary. +// 2. Data in dictionary gets run through the match finder byte by byte. +// 3. The literals and matches are encoded using e.g. LZMA. +// +// The bytes that have been ran through the match finder, but not encoded yet, +// are called `read ahead'. + + +/// Get pointer to the first byte not ran through the match finder +static inline const uint8_t * +mf_ptr(const lzma_mf *mf) +{ + return mf->buffer + mf->read_pos; +} + + +/// Get the number of bytes that haven't been ran through the match finder yet. +static inline uint32_t +mf_avail(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos; +} + + +/// Get the number of bytes that haven't been encoded yet (some of these +/// bytes may have been ran through the match finder though). +static inline uint32_t +mf_unencoded(const lzma_mf *mf) +{ + return mf->write_pos - mf->read_pos - mf->read_ahead; +} + + +/// Calculate the absolute offset from the beginning of the most recent +/// dictionary reset. Only the lowest four bits are important, so there's no +/// problem that we don't know the 64-bit size of the data encoded so far. +/// +/// NOTE: When moving the input window, we need to do it so that the lowest +/// bits of dict->read_pos are not modified to keep this macro working +/// as intended. +static inline uint32_t +mf_position(const lzma_mf *mf) +{ + return mf->read_pos - mf->read_ahead; +} + + +/// Since everything else begins with mf_, use it also for lzma_mf_find(). +#define mf_find lzma_mf_find + + +/// Skip the given number of bytes. This is used when a good match was found. +/// For example, if mf_find() finds a match of 200 bytes long, the first byte +/// of that match was already consumed by mf_find(), and the rest 199 bytes +/// have to be skipped with mf_skip(mf, 199). +static inline void +mf_skip(lzma_mf *mf, uint32_t amount) +{ + if (amount != 0) { + mf->skip(mf, amount); + mf->read_ahead += amount; } +} + + +/// Copies at maximum of *left amount of bytes from the history buffer +/// to out[]. This is needed by LZMA2 to encode uncompressed chunks. +static inline void +mf_read(lzma_mf *mf, uint8_t *out, size_t *out_pos, size_t out_size, + size_t *left) +{ + const size_t out_avail = out_size - *out_pos; + const size_t copy_size = MIN(out_avail, *left); + + assert(mf->read_ahead == 0); + assert(mf->read_pos >= *left); + + memcpy(out + *out_pos, mf->buffer + mf->read_pos - *left, + copy_size); + + *out_pos += copy_size; + *left -= copy_size; + return; +} + + +extern lzma_ret lzma_lz_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters, + lzma_ret (*lz_init)(lzma_lz_encoder *lz, + lzma_allocator *allocator, const void *options, + lzma_lz_options *lz_options)); + + +extern uint64_t lzma_lz_encoder_memusage(const lzma_lz_options *lz_options); + + +// These are only for LZ encoder's internal use. +extern uint32_t lzma_mf_find( + lzma_mf *mf, uint32_t *count, lzma_match *matches); + +extern uint32_t lzma_mf_hc3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc3_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_hc4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_hc4_skip(lzma_mf *dict, uint32_t amount); + +extern uint32_t lzma_mf_bt2_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt2_skip(lzma_mf *dict, uint32_t amount); +extern uint32_t lzma_mf_bt3_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt3_skip(lzma_mf *dict, uint32_t amount); -/// Calculates -extern bool lzma_lz_encoder_hash_properties(lzma_match_finder match_finder, - uint32_t history_size, uint32_t *restrict hash_mask, - uint32_t *restrict hash_size_sum, - uint32_t *restrict num_items); - -// NOTE: liblzma doesn't use callback API like LZMA SDK does. The caller -// must make sure that keep_size_after is big enough for single encoding pass -// i.e. keep_size_after >= maximum number of bytes possibly needed after -// the current position between calls to lzma_lz_read(). -extern lzma_ret lzma_lz_encoder_reset(lzma_lz_encoder *lz, - lzma_allocator *allocator, - bool (*process)(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size), - size_t history_size, size_t additional_buffer_before, - size_t match_max_len, size_t additional_buffer_after, - lzma_match_finder match_finder, uint32_t match_finder_cycles, - const uint8_t *preset_dictionary, - size_t preset_dictionary_size); - -/// Frees memory allocated for in window and match finder buffers. -extern void lzma_lz_encoder_end( - lzma_lz_encoder *lz, lzma_allocator *allocator); - -extern lzma_ret lzma_lz_encode(lzma_coder *coder, - lzma_allocator *allocator lzma_attribute((unused)), - const uint8_t *restrict in, size_t *restrict in_pos, - size_t in_size, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size, - lzma_action action); - -/// This should not be called directly, but only via move_pos() macro. -extern void lzma_lz_encoder_normalize(lzma_lz_encoder *lz); +extern uint32_t lzma_mf_bt4_find(lzma_mf *dict, lzma_match *matches); +extern void lzma_mf_bt4_skip(lzma_mf *dict, uint32_t amount); #endif diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h new file mode 100644 index 00000000..0841c38f --- /dev/null +++ b/src/liblzma/lz/lz_encoder_hash.h @@ -0,0 +1,104 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_hash.h +/// \brief Hash macros for match finders +// +// Copyright (C) 1999-2008 Igor Pavlov +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZ_ENCODER_HASH_H +#define LZMA_LZ_ENCODER_HASH_H + +#define HASH_2_SIZE (UINT32_C(1) << 10) +#define HASH_3_SIZE (UINT32_C(1) << 16) +#define HASH_4_SIZE (UINT32_C(1) << 20) + +#define HASH_2_MASK (HASH_2_SIZE - 1) +#define HASH_3_MASK (HASH_3_SIZE - 1) +#define HASH_4_MASK (HASH_4_SIZE - 1) + +#define FIX_3_HASH_SIZE (HASH_2_SIZE) +#define FIX_4_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) +#define FIX_5_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE + HASH_4_SIZE) + +// TODO Benchmark, and probably doesn't need to be endian dependent. +#if !defined(WORDS_BIGENDIAN) && defined(HAVE_FAST_UNALIGNED_ACCESS) +# define hash_2_calc() \ + const uint32_t hash_value = *(const uint16_t *)(cur); +#else +# define hash_2_calc() \ + const uint32_t hash_value \ + = (uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8) +#endif + +#define hash_3_calc() \ + const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & mf->hash_mask + +#define hash_4_calc() \ + const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ + ^ (lzma_crc32_table[0][cur[3]] << 5)) & mf->hash_mask + + +// The following are not currently used. + +#define hash_5_calc() \ + const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + ^ lzma_crc32_table[0][cur[3]] << 5); \ + const uint32_t hash_value \ + = (hash_4_value ^ (lzma_crc32_table[0][cur[4]] << 3)) \ + & mf->hash_mask; \ + hash_4_value &= HASH_4_MASK + +/* +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[0]) | ((uint32_t)(cur[1]) << 8)) \ + ^ lzma_crc32_table[0][cur[2]]) & 0xFFFF +*/ + +#define hash_zip_calc() \ + const uint32_t hash_value \ + = (((uint32_t)(cur[2]) | ((uint32_t)(cur[0]) << 8)) \ + ^ lzma_crc32_table[0][cur[1]]) & 0xFFFF + +#define mt_hash_2_calc() \ + const uint32_t hash_2_value \ + = (lzma_crc32_table[0][cur[0]] ^ cur[1]) & HASH_2_MASK + +#define mt_hash_3_calc() \ + const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK + +#define mt_hash_4_calc() \ + const uint32_t temp = lzma_crc32_table[0][cur[0]] ^ cur[1]; \ + const uint32_t hash_2_value = temp & HASH_2_MASK; \ + const uint32_t hash_3_value \ + = (temp ^ ((uint32_t)(cur[2]) << 8)) & HASH_3_MASK; \ + const uint32_t hash_4_value = (temp ^ ((uint32_t)(cur[2]) << 8) ^ \ + (lzma_crc32_table[0][cur[3]] << 5)) & HASH_4_MASK + +#endif diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c new file mode 100644 index 00000000..b1c20f50 --- /dev/null +++ b/src/liblzma/lz/lz_encoder_mf.c @@ -0,0 +1,780 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lz_encoder_mf.c +/// \brief Match finders +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lz_encoder_hash.h" +#include "check.h" + + +/// \brief Find matches starting from the current byte +/// +/// \return The length of the longest match found +extern uint32_t +lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) +{ + // Call the match finder. It returns the number of length-distance + // pairs found. + // FIXME: Minimum count is zero, what _exactly_ is the maximum? + const uint32_t count = mf->find(mf, matches); + + // Length of the longest match; assume that no matches were found + // and thus the maximum length is zero. + uint32_t len_best = 0; + + if (count > 0) { +#ifndef NDEBUG + // Validate the matches. + for (uint32_t i = 0; i < count; ++i) { + assert(matches[i].len <= mf->find_len_max); + assert(matches[i].dist < mf->read_pos); + assert(memcmp(mf_ptr(mf) - 1, + mf_ptr(mf) - matches[i].dist - 2, + matches[i].len) == 0); + } +#endif + + // The last used element in the array contains + // the longest match. + len_best = matches[count - 1].len; + + // If a match of maximum search length was found, try to + // extend the match to maximum possible length. + if (len_best == mf->find_len_max) { + // The limit for the match length is either the + // maximum match length supported by the LZ-based + // encoder or the number of bytes left in the + // dictionary, whichever is smaller. + uint32_t limit = mf_avail(mf) + 1; + if (limit > mf->match_len_max) + limit = mf->match_len_max; + + // Pointer to the byte we just ran through + // the match finder. + const uint8_t *p1 = mf_ptr(mf) - 1; + + // Pointer to the beginning of the match. We need -1 + // here because the match distances are zero based. + const uint8_t *p2 = p1 - matches[count - 1].dist - 1; + + while (len_best < limit + && p1[len_best] == p2[len_best]) + ++len_best; + } + } + + *count_ptr = count; + + // Finally update the read position to indicate that match finder was + // run for this dictionary offset. + ++mf->read_ahead; + + return len_best; +} + + +/// Hash value to indicate unused element in the hash. Since we start the +/// positions from dictionary_size + 1, zero is always too far to qualify +/// as usable match position. +#define EMPTY_HASH_VALUE 0 + + +/// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos +/// reaches MUST_NORMALIZE_POS. +#define MUST_NORMALIZE_POS UINT32_MAX + + +/// \brief Normalizes hash values +/// +/// The hash arrays store positions of match candidates. The positions are +/// relative to an arbitrary offset that is not the same as the absolute +/// offset in the input stream. The relative position of the current byte +/// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are +/// the differences of the current read position and the position found from +/// the hash. +/// +/// To prevent integer overflows of the offsets stored in the hash arrays, +/// we need to "normalize" the stored values now and then. During the +/// normalization, we drop values that indicate distance greater than the +/// dictionary size, thus making space for new values. +static void +normalize(lzma_mf *mf) +{ + assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); + + // In future we may not want to touch the lowest bits, because there + // may be match finders that use larger resolution than one byte. + const uint32_t subvalue + = (MUST_NORMALIZE_POS - mf->cyclic_buffer_size); + // & (~(UINT32_C(1) << 10) - 1); + + const uint32_t count = mf->hash_size_sum + mf->sons_count; + uint32_t *hash = mf->hash; + + for (uint32_t i = 0; i < count; ++i) { + // If the distance is greater than the dictionary size, + // we can simply mark the hash element as empty. + // + // NOTE: Only the first mf->hash_size_sum elements are + // initialized for sure. There may be uninitialized elements + // in mf->son. Since we go through both mf->hash and + // mf->son here in normalization, Valgrind may complain + // that the "if" below depends on uninitialized value. In + // this case it is safe to ignore the warning. See also the + // comments in lz_encoder_init() in lz_encoder.c. + if (hash[i] <= subvalue) + hash[i] = EMPTY_HASH_VALUE; + else + hash[i] -= subvalue; + } + + // Update offset to match the new locations. + mf->offset -= subvalue; + + return; +} + + +/// Mark the current byte as processed from point of view of the match finder. +static void +move_pos(lzma_mf *mf) +{ + if (++mf->cyclic_buffer_pos == mf->cyclic_buffer_size) + mf->cyclic_buffer_pos = 0; + + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + + if (unlikely(mf->read_pos + mf->offset == UINT32_MAX)) + normalize(mf); +} + + +/// When flushing, we cannot run the match finder unless there is find_len_max +/// bytes available in the dictionary. Instead, we skip running the match +/// finder (indicating that no match was found), and count how many bytes we +/// have ignored this way. +/// +/// When new data is given after the flushing was completed, the match finder +/// is restarted by rewinding mf->read_pos backwards by mf->pending. Then +/// the missed bytes are added to the hash using the match finder's skip +/// function (with small amount of input, it may start using mf->pending +/// again if flushing). +/// +/// Due to this rewinding, we don't touch cyclic_buffer_pos or test for +/// normalization. It will be done when the match finder's skip function +/// catches up after a flush. +static void +move_pending(lzma_mf *mf) +{ + ++mf->read_pos; + assert(mf->read_pos <= mf->write_pos); + ++mf->pending; +} + + +/// Calculate len_limit and determine if there is enough input to run +/// the actual match finder code. Sets up "cur" and "pos". This macro +/// is used by all find functions and binary tree skip functions. Hash +/// chain skip function doesn't need len_limit so a simpler code is used +/// in them. +#define header(is_bt, len_min, ret_op) \ + uint32_t len_limit = mf_avail(mf); \ + if (mf->find_len_max <= len_limit) { \ + len_limit = mf->find_len_max; \ + } else if (len_limit < (len_min) \ + || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \ + assert(mf->action != LZMA_RUN); \ + move_pending(mf); \ + ret_op; \ + } \ + const uint8_t *cur = mf_ptr(mf); \ + const uint32_t pos = mf->read_pos + mf->offset + + +/// Header for find functions. "return 0" indicates that zero matches +/// were found. +#define header_find(is_bt, len_min) \ + header(is_bt, len_min, return 0); \ + uint32_t matches_count = 0 + + +/// Header for a loop in a skip function. "continue" tells to skip the rest +/// of the code in the loop. +#define header_skip(is_bt, len_min) \ + header(is_bt, len_min, continue) + + +/// Calls hc_find_func() or bt_find_func() and calculates the total number +/// of matches found. Updates the dictionary position and returns the number +/// of matches found. +#define call_find(func, len_best) \ +do { \ + matches_count = func(len_limit, pos, cur, cur_match, mf->loops, \ + mf->son, mf->cyclic_buffer_pos, \ + mf->cyclic_buffer_size, \ + matches + matches_count, len_best) \ + - matches; \ + move_pos(mf); \ + return matches_count; \ +} while (0) + + +//////////////// +// Hash Chain // +//////////////// + +#if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4) +/// +/// +/// \param len_limit Don't look for matches longer than len_limit. +/// \param pos lzma_mf.read_pos + lzma_mf.offset +/// \param cur Pointer to current byte (lzma_dict_ptr(mf)) +/// \param cur_match Start position of the current match candidate +/// \param loops Maximum length of the hash chain +/// \param son lzma_mf.son (contains the hash chain) +/// \param cyclic_buffer_pos +/// \param cyclic_buffer_size +/// \param matches Array to hold the matches. +/// \param len_best The length of the longest match found so far. +static lzma_match * +hc_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t loops, + uint32_t *const son, + const uint32_t cyclic_buffer_pos, + const uint32_t cyclic_buffer_size, + lzma_match *matches, + uint32_t len_best) +{ + son[cyclic_buffer_pos] = cur_match; + + while (true) { + const uint32_t delta = pos - cur_match; + if (loops-- == 0 || delta >= cyclic_buffer_size) + return matches; + + const uint8_t *const pb = cur - delta; + cur_match = son[cyclic_buffer_pos - delta + + (delta > cyclic_buffer_pos + ? cyclic_buffer_size : 0)]; + + if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { + uint32_t len = 0; + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) + return matches; + } + } + } +} + +/* +#define hc_header_find(len_min, ret_op) \ + uint32_t len_limit = mf_avail(mf); \ + if (mf->find_len_max <= len_limit) { \ + len_limit = mf->find_len_max; \ + } else if (len_limit < (len_min)) { \ + move_pending(mf); \ + ret_op; \ + } \ +#define header_hc(len_min, ret_op) \ +do { \ + if (mf_avail(mf) < (len_min)) { \ + move_pending(mf); \ + ret_op; \ + } \ +} while (0) +*/ + +#define hc_find(len_best) \ + call_find(hc_find_func, len_best) + + +#define hc_skip() \ +do { \ + mf->son[mf->cyclic_buffer_pos] = cur_match; \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_HC3 +extern uint32_t +lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + hc_skip(); + return 1; // matches_count + } + } + + hc_find(len_best); +} + + +extern void +lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 3) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_HC4 +extern uint32_t +lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(false, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value ] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_buffer_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + hc_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + hc_find(len_best); +} + + +extern void +lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + if (mf_avail(mf) < 4) { + move_pending(mf); + continue; + } + + const uint8_t *cur = mf_ptr(mf); + const uint32_t pos = mf->read_pos + mf->offset; + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + hc_skip(); + + } while (--amount != 0); +} +#endif + + +///////////////// +// Binary Tree // +///////////////// + +#if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4) +static lzma_match * +bt_find_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t loops, + uint32_t *const son, + const uint32_t cyclic_buffer_pos, + const uint32_t cyclic_buffer_size, + lzma_match *matches, + uint32_t len_best) +{ + uint32_t *ptr0 = son + (cyclic_buffer_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_buffer_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (loops-- == 0 || delta >= cyclic_buffer_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return matches; + } + + uint32_t *const pair = son + ((cyclic_buffer_pos - delta + + (delta > cyclic_buffer_pos + ? cyclic_buffer_size : 0)) << 1); + + const uint8_t *const pb = cur - delta; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len_best < len) { + len_best = len; + matches->len = len; + matches->dist = delta - 1; + ++matches; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return matches; + } + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +static void +bt_skip_func( + const uint32_t len_limit, + const uint32_t pos, + const uint8_t *const cur, + uint32_t cur_match, + uint32_t loops, + uint32_t *const son, + const uint32_t cyclic_buffer_pos, + const uint32_t cyclic_buffer_size) +{ + uint32_t *ptr0 = son + (cyclic_buffer_pos << 1) + 1; + uint32_t *ptr1 = son + (cyclic_buffer_pos << 1); + + uint32_t len0 = 0; + uint32_t len1 = 0; + + while (true) { + const uint32_t delta = pos - cur_match; + if (loops-- == 0 || delta >= cyclic_buffer_size) { + *ptr0 = EMPTY_HASH_VALUE; + *ptr1 = EMPTY_HASH_VALUE; + return; + } + + uint32_t *pair = son + ((cyclic_buffer_pos - delta + + (delta > cyclic_buffer_pos + ? cyclic_buffer_size : 0)) << 1); + const uint8_t *pb = cur - delta; + uint32_t len = MIN(len0, len1); + + if (pb[len] == cur[len]) { + while (++len != len_limit) + if (pb[len] != cur[len]) + break; + + if (len == len_limit) { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + + if (pb[len] < cur[len]) { + *ptr1 = cur_match; + ptr1 = pair + 1; + cur_match = *ptr1; + len1 = len; + } else { + *ptr0 = cur_match; + ptr0 = pair; + cur_match = *ptr0; + len0 = len; + } + } +} + + +#define bt_find(len_best) \ + call_find(bt_find_func, len_best) + +#define bt_skip() \ +do { \ + bt_skip_func(len_limit, pos, cur, cur_match, mf->loops, \ + mf->son, mf->cyclic_buffer_pos, \ + mf->cyclic_buffer_size); \ + move_pos(mf); \ +} while (0) + +#endif + + +#ifdef HAVE_MF_BT2 +extern uint32_t +lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_find(1); +} + + +extern void +lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 2); + + hash_2_calc(); + + const uint32_t cur_match = mf->hash[hash_value]; + mf->hash[hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT3 +extern uint32_t +lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 3); + + hash_3_calc(); + + const uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 2; + + if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[0].len = len_best; + matches[0].dist = delta2 - 1; + matches_count = 1; + + if (len_best == len_limit) { + bt_skip(); + return 1; // matches_count + } + } + + bt_find(len_best); +} + + +extern void +lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 3); + + hash_3_calc(); + + const uint32_t cur_match + = mf->hash[FIX_3_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif + + +#ifdef HAVE_MF_BT4 +extern uint32_t +lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) +{ + header_find(true, 4); + + hash_4_calc(); + + uint32_t delta2 = pos - mf->hash[hash_2_value]; + const uint32_t delta3 + = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; + const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + uint32_t len_best = 1; + + if (delta2 < mf->cyclic_buffer_size && *(cur - delta2) == *cur) { + len_best = 2; + matches[0].len = 2; + matches[0].dist = delta2 - 1; + matches_count = 1; + } + + if (delta2 != delta3 && delta3 < mf->cyclic_buffer_size + && *(cur - delta3) == *cur) { + len_best = 3; + matches[matches_count++].dist = delta3 - 1; + delta2 = delta3; + } + + if (matches_count != 0) { + for ( ; len_best != len_limit; ++len_best) + if (*(cur + len_best - delta2) != cur[len_best]) + break; + + matches[matches_count - 1].len = len_best; + + if (len_best == len_limit) { + bt_skip(); + return matches_count; + } + } + + if (len_best < 3) + len_best = 3; + + bt_find(len_best); +} + + +extern void +lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) +{ + do { + header_skip(true, 4); + + hash_4_calc(); + + const uint32_t cur_match + = mf->hash[FIX_4_HASH_SIZE + hash_value]; + + mf->hash[hash_2_value] = pos; + mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; + mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; + + bt_skip(); + + } while (--amount != 0); +} +#endif diff --git a/src/liblzma/lz/match_c.h b/src/liblzma/lz/match_c.h deleted file mode 100644 index 664db290..00000000 --- a/src/liblzma/lz/match_c.h +++ /dev/null @@ -1,412 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file match_c.h -/// \brief Template for different match finders -/// -/// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file -/// sets slighly different #defines, resulting the different match finders. -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -////////////// -// Includes // -////////////// - -#include "check.h" - - -/////////////// -// Constants // -/////////////// - -#define START_MAX_LEN 1 - -#ifdef HASH_ARRAY_2 -# define NUM_HASH_DIRECT_BYTES 0 -# define HASH_2_SIZE (1 << 10) -# ifdef HASH_ARRAY_3 -# define NUM_HASH_BYTES 4 -# define HASH_3_SIZE (1 << 16) -# define HASH_3_OFFSET HASH_2_SIZE -# define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) -# else -# define NUM_HASH_BYTES 3 -# define FIX_HASH_SIZE HASH_2_SIZE -# endif -# define HASH_SIZE 0 -# define MIN_MATCH_CHECK NUM_HASH_BYTES -#else -# define NUM_HASH_DIRECT_BYTES 2 -# define NUM_HASH_BYTES 2 -# define HASH_SIZE (1 << (8 * NUM_HASH_BYTES)) -# define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1) -# define FIX_HASH_SIZE 0 -#endif - - -//////////// -// Macros // -//////////// - -#ifdef HASH_ARRAY_2 -# ifdef HASH_ARRAY_3 -# define HASH_CALC() \ - do { \ - const uint32_t temp = lzma_crc32_table[0][ \ - cur[0]] ^ cur[1]; \ - hash_2_value = temp & (HASH_2_SIZE - 1); \ - hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ - & (HASH_3_SIZE - 1); \ - hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ - ^ (lzma_crc32_table[0][cur[3]] << 5)) \ - & lz->hash_mask; \ - } while (0) -# else -# define HASH_CALC() \ - do { \ - const uint32_t temp = lzma_crc32_table[0][ \ - cur[0]] ^ cur[1]; \ - hash_2_value = temp & (HASH_2_SIZE - 1); \ - hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ - & lz->hash_mask; \ - } while (0) -# endif -#else -# define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8) -#endif - - -// Moves the current read position forward by one byte. In LZMA SDK, -// CLZInWindow::MovePos() can read more input data if needed, because of -// the callback style API. In liblzma we must have ensured earlier, that -// there is enough data available in lz->buffer. -#define move_pos() \ -do { \ - if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \ - lz->cyclic_buffer_pos = 0; \ - ++lz->read_pos; \ - assert(lz->read_pos <= lz->write_pos); \ - if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \ - lzma_lz_encoder_normalize(lz); \ -} while (0) - - -#define move_pending() \ -do { \ - ++lz->read_pos; \ - assert(lz->read_pos <= lz->write_pos); \ - ++lz->pending; \ -} while (0) - - -////////////////////// -// Global constants // -////////////////////// - -LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE; -LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE; - - -/////////////////// -// API functions // -/////////////////// - -LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER) -{ - uint32_t len_limit; - if (lz->read_pos + lz->match_max_len <= lz->write_pos) { - len_limit = lz->match_max_len; - } else { - len_limit = lz->write_pos - lz->read_pos; - if (len_limit < MIN_MATCH_CHECK || lz->sequence == SEQ_FLUSH) { - distances[0] = 0; - move_pending(); - return; - } - } - - assert(lz->pending == 0); - - int32_t offset = 1; - const uint32_t match_min_pos - = lz->read_pos + lz->offset > lz->cyclic_buffer_size - ? lz->read_pos + lz->offset - lz->cyclic_buffer_size - : 0; - const uint8_t *cur = lz->buffer + lz->read_pos; - uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size - -#ifdef HASH_ARRAY_2 - uint32_t hash_2_value; -# ifdef HASH_ARRAY_3 - uint32_t hash_3_value; -# endif -#endif - uint32_t hash_value; - HASH_CALC(); - - uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; -#ifdef HASH_ARRAY_2 - uint32_t cur_match2 = lz->hash[hash_2_value]; -# ifdef HASH_ARRAY_3 - uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value]; -# endif - lz->hash[hash_2_value] = lz->read_pos + lz->offset; - - if (cur_match2 > match_min_pos) { - if (lz->buffer[cur_match2 - lz->offset] == cur[0]) { - max_len = 2; - distances[offset++] = 2; - distances[offset++] = lz->read_pos + lz->offset - - cur_match2 - 1; - } - } - -# ifdef HASH_ARRAY_3 - lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset; - if (cur_match3 > match_min_pos) { - if (lz->buffer[cur_match3 - lz->offset] == cur[0]) { - if (cur_match3 == cur_match2) - offset -= 2; - - max_len = 3; - distances[offset++] = 3; - distances[offset++] = lz->read_pos + lz->offset - - cur_match3 - 1; - cur_match2 = cur_match3; - } - } -# endif - - if (offset != 1 && cur_match2 == cur_match) { - offset -= 2; - max_len = START_MAX_LEN; - } -#endif - - lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset; - -#ifdef IS_HASH_CHAIN - lz->son[lz->cyclic_buffer_pos] = cur_match; -#else - uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; - uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); - - uint32_t len0 = NUM_HASH_DIRECT_BYTES; - uint32_t len1 = NUM_HASH_DIRECT_BYTES; -#endif - -#if NUM_HASH_DIRECT_BYTES != 0 - if (cur_match > match_min_pos) { - if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset] - != cur[NUM_HASH_DIRECT_BYTES]) { - max_len = NUM_HASH_DIRECT_BYTES; - distances[offset++] = NUM_HASH_DIRECT_BYTES; - distances[offset++] = lz->read_pos + lz->offset - - cur_match - 1; - } - } -#endif - - uint32_t count = lz->cut_value; - - while (true) { - if (cur_match <= match_min_pos || count-- == 0) { -#ifndef IS_HASH_CHAIN - *ptr0 = EMPTY_HASH_VALUE; - *ptr1 = EMPTY_HASH_VALUE; -#endif - break; - } - - const uint32_t delta = lz->read_pos + lz->offset - cur_match; - const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos - ? lz->cyclic_buffer_pos - delta - : lz->cyclic_buffer_pos - delta - + lz->cyclic_buffer_size; - uint32_t *pair = lz->son + -#ifdef IS_HASH_CHAIN - cyclic_pos; -#else - (cyclic_pos << 1); -#endif - - const uint8_t *pb = lz->buffer + cur_match - lz->offset; - uint32_t len = -#ifdef IS_HASH_CHAIN - NUM_HASH_DIRECT_BYTES; - if (pb[max_len] == cur[max_len]) -#else - MIN(len0, len1); -#endif - - if (pb[len] == cur[len]) { - while (++len != len_limit) - if (pb[len] != cur[len]) - break; - - if (max_len < len) { - max_len = len; - distances[offset++] = len; - distances[offset++] = delta - 1; - if (len == len_limit) { -#ifndef IS_HASH_CHAIN - *ptr1 = pair[0]; - *ptr0 = pair[1]; -#endif - break; - } - } - } - -#ifdef IS_HASH_CHAIN - cur_match = *pair; -#else - if (pb[len] < cur[len]) { - *ptr1 = cur_match; - ptr1 = pair + 1; - cur_match = *ptr1; - len1 = len; - } else { - *ptr0 = cur_match; - ptr0 = pair; - cur_match = *ptr0; - len0 = len; - } -#endif - } - - distances[0] = offset - 1; - - move_pos(); - - return; -} - - -LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER) -{ - do { -#ifdef IS_HASH_CHAIN - if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) { - move_pending(); - continue; - } -#else - uint32_t len_limit; - if (lz->read_pos + lz->match_max_len <= lz->write_pos) { - len_limit = lz->match_max_len; - } else { - len_limit = lz->write_pos - lz->read_pos; - if (len_limit < MIN_MATCH_CHECK - || lz->sequence == SEQ_FLUSH) { - move_pending(); - continue; - } - } - const uint32_t match_min_pos - = lz->read_pos + lz->offset > lz->cyclic_buffer_size - ? lz->read_pos + lz->offset - lz->cyclic_buffer_size - : 0; -#endif - - assert(lz->pending == 0); - - const uint8_t *cur = lz->buffer + lz->read_pos; - -#ifdef HASH_ARRAY_2 - uint32_t hash_2_value; -# ifdef HASH_ARRAY_3 - uint32_t hash_3_value; - uint32_t hash_value; - HASH_CALC(); - lz->hash[HASH_3_OFFSET + hash_3_value] - = lz->read_pos + lz->offset; -# else - uint32_t hash_value; - HASH_CALC(); -# endif - lz->hash[hash_2_value] = lz->read_pos + lz->offset; -#else - uint32_t hash_value; - HASH_CALC(); -#endif - - uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; - lz->hash[FIX_HASH_SIZE + hash_value] - = lz->read_pos + lz->offset; - -#ifdef IS_HASH_CHAIN - lz->son[lz->cyclic_buffer_pos] = cur_match; -#else - uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; - uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); - - uint32_t len0 = NUM_HASH_DIRECT_BYTES; - uint32_t len1 = NUM_HASH_DIRECT_BYTES; - uint32_t count = lz->cut_value; - - while (true) { - if (cur_match <= match_min_pos || count-- == 0) { - *ptr0 = EMPTY_HASH_VALUE; - *ptr1 = EMPTY_HASH_VALUE; - break; - } - - const uint32_t delta = lz->read_pos - + lz->offset - cur_match; - const uint32_t cyclic_pos - = delta <= lz->cyclic_buffer_pos - ? lz->cyclic_buffer_pos - delta - : lz->cyclic_buffer_pos - delta - + lz->cyclic_buffer_size; - uint32_t *pair = lz->son + (cyclic_pos << 1); - - const uint8_t *pb = lz->buffer + cur_match - - lz->offset; - uint32_t len = MIN(len0, len1); - - if (pb[len] == cur[len]) { - while (++len != len_limit) - if (pb[len] != cur[len]) - break; - - if (len == len_limit) { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - break; - } - } - - if (pb[len] < cur[len]) { - *ptr1 = cur_match; - ptr1 = pair + 1; - cur_match = *ptr1; - len1 = len; - } else { - *ptr0 = cur_match; - ptr0 = pair; - cur_match = *ptr0; - len0 = len; - } - } -#endif - - move_pos(); - - } while (--num != 0); - - return; -} diff --git a/src/liblzma/lz/match_h.h b/src/liblzma/lz/match_h.h deleted file mode 100644 index 2eae90ba..00000000 --- a/src/liblzma/lz/match_h.h +++ /dev/null @@ -1,69 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file match_h.h -/// \brief Header template for different match finders -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "lz_encoder_private.h" - - -////////////////////// -// Global constants // -////////////////////// - -#undef LZMA_HASH_SIZE -#undef LZMA_FIX_HASH_SIZE -#undef LZMA_HASH_SIZE_C -#undef LZMA_FIX_HASH_SIZE_C - -#define LZMA_HASH_SIZE(mf_name) LZMA_HASH_SIZE_C(mf_name) -#define LZMA_FIX_HASH_SIZE(mf_name) LZMA_FIX_HASH_SIZE_C(mf_name) - -#define LZMA_HASH_SIZE_C(mf_name) \ - const uint32_t LZMA_ ## mf_name ## _HASH_SIZE - -#define LZMA_FIX_HASH_SIZE_C(mf_name) \ - const uint32_t LZMA_ ## mf_name ## _FIX_HASH_SIZE - -extern LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER); -extern LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER); - - -/////////////// -// Functions // -/////////////// - -#undef LZMA_GET_MATCHES -#undef LZMA_SKIP -#undef LZMA_GET_MATCHES_C -#undef LZMA_SKIP_C - -#define LZMA_GET_MATCHES(mf_name) LZMA_GET_MATCHES_C(mf_name) -#define LZMA_SKIP(mf_name) LZMA_SKIP_C(mf_name) - -#define LZMA_GET_MATCHES_C(mf_name) \ - extern void lzma_ ## mf_name ## _get_matches( \ - lzma_lz_encoder *restrict lz, \ - uint32_t *restrict distances) - -#define LZMA_SKIP_C(mf_name) \ - extern void lzma_ ## mf_name ## _skip( \ - lzma_lz_encoder *lz, uint32_t num) - -LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER); - -LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER); diff --git a/src/liblzma/lzma/Makefile.am b/src/liblzma/lzma/Makefile.am index 59ded214..7aeceb63 100644 --- a/src/liblzma/lzma/Makefile.am +++ b/src/liblzma/lzma/Makefile.am @@ -14,37 +14,46 @@ EXTRA_DIST = fastpos_tablegen.c -noinst_LTLIBRARIES = liblzma4.la -liblzma4_la_CPPFLAGS = \ +## Using liblzma2 since liblzma is already used for the final library. +noinst_LTLIBRARIES = liblzma2.la +liblzma2_la_CPPFLAGS = \ -I@top_srcdir@/src/liblzma/api \ -I@top_srcdir@/src/liblzma/common \ -I@top_srcdir@/src/liblzma/lz \ -I@top_srcdir@/src/liblzma/rangecoder -liblzma4_la_SOURCES = \ - lzma_common.h \ - lzma_literal.c \ - lzma_literal.h +liblzma2_la_SOURCES = lzma_common.h -if COND_MAIN_ENCODER -liblzma4_la_SOURCES += \ +if COND_ENCODER_LZMA +liblzma2_la_SOURCES += \ fastpos.h \ lzma_encoder.h \ lzma_encoder.c \ lzma_encoder_presets.c \ lzma_encoder_private.h \ - lzma_encoder_init.c \ lzma_encoder_features.c \ - lzma_encoder_getoptimum.c \ - lzma_encoder_getoptimumfast.c + lzma_encoder_optimum_fast.c \ + lzma_encoder_optimum_normal.c if !COND_SMALL -liblzma4_la_SOURCES += fastpos_table.c +liblzma2_la_SOURCES += fastpos_table.c endif endif -if COND_MAIN_DECODER -liblzma4_la_SOURCES += \ +if COND_DECODER_LZMA +liblzma2_la_SOURCES += \ lzma_decoder.c \ lzma_decoder.h endif + +if COND_ENCODER_LZMA2 +liblzma2_la_SOURCES += \ + lzma2_encoder.c \ + lzma2_encoder.h +endif + +if COND_DECODER_LZMA2 +liblzma2_la_SOURCES += \ + lzma2_decoder.c \ + lzma2_decoder.h +endif diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h index 57a94556..503be275 100644 --- a/src/liblzma/lzma/fastpos.h +++ b/src/liblzma/lzma/fastpos.h @@ -81,8 +81,6 @@ // I'm making the table version the default, because that has good speed // on all systems I have tried. The size optimized version is sometimes // slightly faster, but sometimes it is a lot slower. -// -// Finally, this code isn't a major bottle neck in LZMA encoding anyway. #ifdef HAVE_SMALL # include "bsr.h" @@ -135,11 +133,7 @@ get_pos_slot(uint32_t pos) static inline uint32_t get_pos_slot_2(uint32_t pos) { - // FIXME: This assert() cannot be enabled at the moment, because - // lzma_getoptimum.c calls this function so that this assertion - // fails; however, it ignores the result of this function when - // this assert() would have failed. - // assert(pos >= FULL_DISTANCES); + assert(pos >= FULL_DISTANCES); if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0); diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c new file mode 100644 index 00000000..b16c40ce --- /dev/null +++ b/src/liblzma/lzma/lzma2_decoder.c @@ -0,0 +1,318 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.c +/// \brief LZMA2 decoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma2_decoder.h" +#include "lz_decoder.h" +#include "lzma_decoder.h" + + +struct lzma_coder_s { + enum sequence { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA, + SEQ_COPY, + } sequence; + + /// Sequence after the size fields have been decoded. + enum sequence next_sequence; + + /// LZMA decoder + lzma_lz_decoder lzma; + + /// Uncompressed size of LZMA chunk + size_t uncompressed_size; + + /// Compressed size of the chunk (naturally equals to uncompressed + /// size of uncompressed chunk) + size_t compressed_size; + + /// True if properties are needed. This is false before the + /// first LZMA chunk. + bool need_properties; + + /// True if dictionary reset is needed. This is false before the + /// first chunk (LZMA or uncompressed). + bool need_dictionary_reset; + + lzma_options_lzma options; +}; + + +static lzma_ret +lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size) +{ + // With SEQ_LZMA it is possible that no new input is needed to do + // some progress. The rest of the sequences assume that there is + // at least one byte of input. + while (*in_pos < in_size || coder->sequence == SEQ_LZMA) + switch (coder->sequence) { + case SEQ_CONTROL: + if (in[*in_pos] & 0x80) { + // Get the highest five bits of uncompressed size. + coder->uncompressed_size + = (uint32_t)(in[*in_pos] & 0x1F) << 16; + coder->sequence = SEQ_UNCOMPRESSED_1; + + // See if we need to reset dictionary or state. + switch ((in[(*in_pos)++] >> 5) & 3) { + case 3: + dict_reset(dict); + coder->need_dictionary_reset = false; + + // Fall through + + case 2: + if (coder->need_dictionary_reset) + return LZMA_DATA_ERROR; + + coder->need_properties = false; + coder->next_sequence = SEQ_PROPERTIES; + break; + + case 1: + if (coder->need_properties) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, + &coder->options); + + coder->next_sequence = SEQ_LZMA; + break; + + case 0: + if (coder->need_properties) + return LZMA_DATA_ERROR; + + coder->next_sequence = SEQ_LZMA; + break; + } + + } else { + switch (in[(*in_pos)++]) { + case 0: + // End of payload marker + return LZMA_STREAM_END; + + case 1: + // Dictionary reset + dict_reset(dict); + coder->need_dictionary_reset = false; + + // Fall through + + case 2: + if (coder->need_dictionary_reset) + return LZMA_DATA_ERROR; + + // Uncompressed chunk; we need to read total + // size first. + coder->sequence = SEQ_COMPRESSED_0; + coder->next_sequence = SEQ_COPY; + break; + + default: + return LZMA_DATA_ERROR; + } + } + + break; + + case SEQ_UNCOMPRESSED_1: + coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + coder->uncompressed_size += in[(*in_pos)++] + 1; + coder->sequence = SEQ_COMPRESSED_0; + coder->lzma.set_uncompressed(coder->lzma.coder, + coder->uncompressed_size); + break; + + case SEQ_COMPRESSED_0: + coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8; + coder->sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + coder->compressed_size += in[(*in_pos)++] + 1; + coder->sequence = coder->next_sequence; + break; + + case SEQ_PROPERTIES: + if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++])) + return LZMA_DATA_ERROR; + + coder->lzma.reset(coder->lzma.coder, &coder->options); + + coder->sequence = SEQ_LZMA; + break; + + case SEQ_LZMA: { + // Store the start offset so that we can update + // coder->compressed_size later. + const size_t in_start = *in_pos; + + // Decode from in[] to *dict. + const lzma_ret ret = coder->lzma.code(coder->lzma.coder, + dict, in, in_pos, in_size); + + // Validate and update coder->compressed_size. + const size_t in_used = *in_pos - in_start; + if (in_used > coder->compressed_size) + return LZMA_DATA_ERROR; + + coder->compressed_size -= in_used; + + // Return if we didn't finish the chunk, or an error occurred. + if (ret != LZMA_STREAM_END) + return ret; + + // The LZMA decoder must have consumed the whole chunk now. + // We don't need to worry about uncompressed size since it + // is checked by the LZMA decoder. + if (coder->compressed_size != 0) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_CONTROL; + break; + } + + case SEQ_COPY: { + // Copy from input to the dictionary as is. + // FIXME Can copy too much? + dict_write(dict, in, in_pos, in_size, &coder->compressed_size); + if (coder->compressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_CONTROL; + break; + } + + default: + assert(0); + return LZMA_PROG_ERROR; + } + + return LZMA_OK; +} + + +static void +lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + assert(coder->lzma.end == NULL); + lzma_free(coder->lzma.coder, allocator); + + lzma_free(coder, allocator); + + return; +} + + +static lzma_ret +lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, size_t *dict_size) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_decode; + lz->end = &lzma2_decoder_end; + + lz->coder->lzma = LZMA_LZ_DECODER_INIT; + } + + lz->coder->sequence = SEQ_CONTROL; + lz->coder->need_properties = true; + lz->coder->need_dictionary_reset = true; + + return lzma_lzma_decoder_create(&lz->coder->lzma, + allocator, options, dict_size); +} + + +extern lzma_ret +lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA2 can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma2_decoder_init); +} + + +extern uint64_t +lzma_lzma2_decoder_memusage(const void *options) +{ + const uint64_t lzma_memusage = lzma_lzma_decoder_memusage(options); + if (lzma_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lzma_memusage; +} + + +extern lzma_ret +lzma_lzma2_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 1) + return LZMA_HEADER_ERROR; + + // Check that reserved bits are unset. + if (props[0] & 0xC0) + return LZMA_HEADER_ERROR; + + // Decode the dictionary size. + if (props[0] > 40) + return LZMA_HEADER_ERROR; + + lzma_options_lzma *opt = lzma_alloc( + sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (props[0] == 40) { + opt->dictionary_size = UINT32_MAX; + } else { + opt->dictionary_size = 2 | (props[0] & 1); + opt->dictionary_size <<= props[0] / 2 + 11; + } + + opt->preset_dictionary = NULL; + opt->preset_dictionary_size = 0; + + *options = opt; + + return LZMA_OK; +} diff --git a/src/liblzma/lzma/lzma2_decoder.h b/src/liblzma/lzma/lzma2_decoder.h new file mode 100644 index 00000000..a7504863 --- /dev/null +++ b/src/liblzma/lzma/lzma2_decoder.h @@ -0,0 +1,35 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_decoder.h +/// \brief LZMA2 decoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_LZMA2_DECODER_H +#define LZMA_LZMA2_DECODER_H + +#include "common.h" + +extern lzma_ret lzma_lzma2_decoder_init(lzma_next_coder *next, + lzma_allocator *allocator, const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + +#endif diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c new file mode 100644 index 00000000..b2cd176b --- /dev/null +++ b/src/liblzma/lzma/lzma2_encoder.c @@ -0,0 +1,406 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma2_encoder.c +/// \brief LZMA2 encoder +// +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lz_encoder.h" +#include "lzma_encoder.h" +#include "fastpos.h" +#include "lzma2_encoder.h" + + +/// Maximum number of bytes of actual data per chunk (no headers) +#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16) + +/// Maximum uncompressed size of LZMA chunk (no headers) +#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21) + +/// Maximum size of LZMA2 headers +#define LZMA2_HEADER_MAX 6 + +/// Size of a header for uncompressed chunk +#define LZMA2_HEADER_UNCOMPRESSED 3 + + +struct lzma_coder_s { + enum { + SEQ_INIT, + SEQ_LZMA_ENCODE, + SEQ_LZMA_COPY, + SEQ_UNCOMPRESSED_HEADER, + SEQ_UNCOMPRESSED_COPY, + } sequence; + + /// LZMA encoder + lzma_coder *lzma; + + /// If this is not NULL, we will check new options from this + /// structure when starting a new chunk. + const lzma_options_lzma *opt_new; + + /// LZMA options currently in use. + lzma_options_lzma opt_cur; + + bool need_properties; + bool need_state_reset; + bool need_dictionary_reset; + + /// Uncompressed size of a chunk + size_t uncompressed_size; + + /// Compressed size of a chunk (excluding headers); this is also used + /// to indicate the end of buf[] in SEQ_LZMA_COPY. + size_t compressed_size; + + /// Read position in buf[] + size_t buf_pos; + + /// Buffer to hold the chunk header and LZMA compressed data + uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; +}; + + +static void +lzma2_header_lzma(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + assert(coder->compressed_size > 0); + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + + size_t pos; + + if (coder->need_properties) { + pos = 0; + + if (coder->need_dictionary_reset) + coder->buf[pos] = 0x80 + (3 << 5); + else + coder->buf[pos] = 0x80 + (2 << 5); + } else { + pos = 1; + + if (coder->need_state_reset) + coder->buf[pos] = 0x80 + (1 << 5); + else + coder->buf[pos] = 0x80; + } + + // Set the start position for copying. + coder->buf_pos = pos; + + // Uncompressed size + size_t size = coder->uncompressed_size - 1; + coder->buf[pos++] += size >> 16; + coder->buf[pos++] = (size >> 8) & 0xFF; + coder->buf[pos++] = size & 0xFF; + + // Compressed size + size = coder->compressed_size - 1; + coder->buf[pos++] = size >> 8; + coder->buf[pos++] = size & 0xFF; + + // Properties, if needed + if (coder->need_properties) + lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); + + coder->need_properties = false; + coder->need_state_reset = false; + coder->need_dictionary_reset = false; + + // The copying code uses coder->compressed_size to indicate the end + // of coder->buf[], so we need add the maximum size of the header here. + coder->compressed_size += LZMA2_HEADER_MAX; + + return; +} + + +static void +lzma2_header_uncompressed(lzma_coder *coder) +{ + assert(coder->uncompressed_size > 0); + assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); + + // If this is the first chunk, we need to include dictionary + // reset indicator. + if (coder->need_dictionary_reset) + coder->buf[0] = 1; + else + coder->buf[0] = 2; + + coder->need_dictionary_reset = false; + + // "Compressed" size + coder->buf[1] = (coder->uncompressed_size - 1) >> 8; + coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; + + // Set the start position for copying. + coder->buf_pos = 0; + return; +} + + +static lzma_ret +lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + while (*out_pos < out_size) + switch (coder->sequence) { + case SEQ_INIT: + // If there's no input left and we are flushing or finishing, + // don't start a new chunk. + if (mf_unencoded(mf) == 0) { + // Write end of payload marker if finishing. + if (mf->action == LZMA_FINISH) + out[(*out_pos)++] = 0; + + return mf->action == LZMA_RUN + ? LZMA_OK : LZMA_STREAM_END; + } + + // Look if there are new options. At least for now, + // only lc/lp/pb can be changed. + if (coder->opt_new != NULL + && (coder->opt_cur.literal_context_bits + != coder->opt_new->literal_context_bits + || coder->opt_cur.literal_pos_bits + != coder->opt_new->literal_pos_bits + || coder->opt_cur.pos_bits + != coder->opt_new->pos_bits)) { + // Options have been changed, copy them to opt_cur. + coder->opt_cur.literal_context_bits + = coder->opt_new->literal_context_bits; + coder->opt_cur.literal_pos_bits + = coder->opt_new->literal_pos_bits; + coder->opt_cur.pos_bits + = coder->opt_new->pos_bits; + + // We need to write the new options and reset + // the encoder state. + coder->need_properties = true; + coder->need_state_reset = true; + } + + if (coder->need_state_reset) + lzma_lzma_encoder_reset(coder->lzma, &coder->opt_cur); + + coder->uncompressed_size = 0; + coder->compressed_size = 0; + coder->sequence = SEQ_LZMA_ENCODE; + + // Fall through + + case SEQ_LZMA_ENCODE: { + // Calculate how much more uncompressed data this chunk + // could accept. + const uint32_t left = LZMA2_UNCOMPRESSED_MAX + - coder->uncompressed_size; + uint32_t limit; + + if (left < mf->match_len_max) { + // Must flush immediatelly since the next LZMA symbol + // could make the uncompressed size of the chunk too + // big. + limit = 0; + } else { + // Calculate maximum read_limit that is OK from point + // of view of LZMA2 chunk size. + limit = mf->read_pos - mf->read_ahead + + left - mf->match_len_max; + } + + // Save the start position so that we can update + // coder->uncompressed_size. + const uint32_t read_start = mf->read_pos - mf->read_ahead; + + // Call the LZMA encoder until the chunk is finished. + const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, + coder->buf + LZMA2_HEADER_MAX, + &coder->compressed_size, + LZMA2_CHUNK_MAX, limit); + + coder->uncompressed_size += mf->read_pos - mf->read_ahead + - read_start; + + assert(coder->compressed_size <= LZMA2_CHUNK_MAX); + assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); + + if (ret != LZMA_STREAM_END) + return LZMA_OK; + + // See if the chunk compressed. If it didn't, we encode it + // as uncompressed chunk. This saves a few bytes of space + // and makes decoding faster. + if (coder->compressed_size >= coder->uncompressed_size) { + coder->uncompressed_size += mf->read_ahead; + assert(coder->uncompressed_size + <= LZMA2_UNCOMPRESSED_MAX); + mf->read_ahead = 0; + lzma2_header_uncompressed(coder); + coder->need_state_reset = true; + coder->sequence = SEQ_UNCOMPRESSED_HEADER; + break; + } + + // The chunk did compress at least by one byte, so we store + // the chunk as LZMA. + lzma2_header_lzma(coder); + + coder->sequence = SEQ_LZMA_COPY; + } + + // Fall through + + case SEQ_LZMA_COPY: + // Copy the compressed chunk along its headers to the + // output buffer. + lzma_bufcpy(coder->buf, &coder->buf_pos, + coder->compressed_size, + out, out_pos, out_size); + if (coder->buf_pos != coder->compressed_size) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + + case SEQ_UNCOMPRESSED_HEADER: + // Copy the three-byte header to indicate uncompressed chunk. + lzma_bufcpy(coder->buf, &coder->buf_pos, + LZMA2_HEADER_UNCOMPRESSED, + out, out_pos, out_size); + if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) + return LZMA_OK; + + coder->sequence = SEQ_UNCOMPRESSED_COPY; + + // Fall through + + case SEQ_UNCOMPRESSED_COPY: + // Copy the uncompressed data as is from the dictionary + // to the output buffer. + mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); + if (coder->uncompressed_size != 0) + return LZMA_OK; + + coder->sequence = SEQ_INIT; + break; + } + + return LZMA_OK; +} + + +static void +lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_free(coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + if (options == NULL) + return LZMA_PROG_ERROR; + + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma2_encode; + lz->end = &lzma2_encoder_end; + + lz->coder->lzma = NULL; + } + + lz->coder->sequence = SEQ_INIT; + lz->coder->need_properties = true; + lz->coder->need_state_reset = false; + lz->coder->need_dictionary_reset = true; + + lz->coder->opt_cur = *(const lzma_options_lzma *)(options); + lz->coder->opt_new = lz->coder->opt_cur.persistent + ? options : NULL; + + // Initialize LZMA encoder + return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, + &lz->coder->opt_cur, lz_options)); + + // Make sure that we will always have enough history available in + // case we need to use uncompressed chunks. They are used when the + // compressed size of a chunk is not smaller than the uncompressed + // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes + // history available. + if (lz_options->before_size + lz_options->dictionary_size + < LZMA2_CHUNK_MAX) + lz_options->before_size = LZMA2_CHUNK_MAX + - lz_options->dictionary_size; + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + return lzma_lz_encoder_init( + next, allocator, filters, &lzma2_encoder_init); +} + + +extern uint64_t +lzma_lzma2_encoder_memusage(const void *options) +{ + const uint64_t lzma_memusage = lzma_lzma_encoder_memusage(options); + if (lzma_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lzma_memusage; +} + + +extern lzma_ret +lzma_lzma2_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + uint32_t d = MAX(opt->dictionary_size, LZMA_DICTIONARY_SIZE_MIN); + + // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending + // on which one is the next: + --d; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + + // Get the highest two bits using the proper encoding: + if (d == UINT32_MAX) + out[0] = 40; + else + out[0] = get_pos_slot(d + 1) - 24; + + return LZMA_OK; +} diff --git a/src/liblzma/common/raw_common.h b/src/liblzma/lzma/lzma2_encoder.h index 0a27f3dc..3e27f680 100644 --- a/src/liblzma/common/raw_common.h +++ b/src/liblzma/lzma/lzma2_encoder.h @@ -1,9 +1,10 @@ /////////////////////////////////////////////////////////////////////////////// // -/// \file raw_common.h -/// \brief Stuff shared between raw encoder and raw decoder +/// \file lzma2_encoder.h +/// \brief LZMA2 encoder // -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 1999-2008 Igor Pavlov +// Copyright (C) 2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -17,14 +18,17 @@ // /////////////////////////////////////////////////////////////////////////////// -#ifndef LZMA_RAW_COMMON_H -#define LZMA_RAW_COMMON_H +#ifndef LZMA_LZMA2_ENCODER_H +#define LZMA_LZMA2_ENCODER_H #include "common.h" -extern lzma_ret lzma_raw_coder_init(lzma_next_coder *next, - lzma_allocator *allocator, const lzma_options_filter *options, - lzma_init_function (*get_function)(lzma_vli id), - bool is_encoder); +extern lzma_ret lzma_lzma2_encoder_init( + lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters); + +extern uint64_t lzma_lzma2_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma2_props_encode(const void *options, uint8_t *out); #endif diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h index f677fcce..6909969b 100644 --- a/src/liblzma/lzma/lzma_common.h +++ b/src/liblzma/lzma/lzma_common.h @@ -22,81 +22,31 @@ #define LZMA_LZMA_COMMON_H #include "common.h" -#include "lzma_literal.h" #include "range_common.h" -/////////////// -// Constants // -/////////////// - -#define REP_DISTANCES 4 - -#define POS_SLOT_BITS 6 -#define DICT_LOG_SIZE_MAX 30 -#define DIST_TABLE_SIZE_MAX (DICT_LOG_SIZE_MAX * 2) -#if (UINT32_C(1) << DICT_LOG_SIZE_MAX) != LZMA_DICTIONARY_SIZE_MAX -# error DICT_LOG_SIZE_MAX is inconsistent with LZMA_DICTIONARY_SIZE_MAX -#endif - -// 2 is for speed optimization -#define LEN_TO_POS_STATES_BITS 2 -#define LEN_TO_POS_STATES (1 << LEN_TO_POS_STATES_BITS) - -#define MATCH_MIN_LEN 2 - -#define ALIGN_BITS 4 -#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) -#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) - -#define START_POS_MODEL_INDEX 4 -#define END_POS_MODEL_INDEX 14 -#define POS_MODELS (END_POS_MODEL_INDEX - START_POS_MODEL_INDEX) - -#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) -#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) +/////////////////// +// Miscellaneous // +/////////////////// +/// Maximum number of position states. A position state is the lowest pos bits +/// number of bits of the current uncompressed offset. In some places there +/// are different sets of probabilities for different pos states. #define POS_STATES_MAX (1 << LZMA_POS_BITS_MAX) -// Length coder & Length price table encoder -#define LEN_LOW_BITS 3 -#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) -#define LEN_MID_BITS 3 -#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) -#define LEN_HIGH_BITS 8 -#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) -#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) -#define LEN_SPEC_SYMBOLS (LOW_LOW_SYMBOLS + LEN_MID_LEN_SYMBOLS) -#define MATCH_MAX_LEN (MATCH_MIN_LEN + LEN_SYMBOLS - 1) - -// Total number of probs in a Len Encoder -#define LEN_CODER_TOTAL_PROBS (LEN_HIGH_CODER + LEN_HIGH_SYMBOLS) - -// Price table size of Len Encoder -#define LEN_PRICES (LEN_SYMBOLS << LZMA_POS_BITS_MAX) - -// Special lengths used together with distance == UINT32_MAX -#define LEN_SPECIAL_EOPM MATCH_MIN_LEN -#define LEN_SPECIAL_FLUSH (LEN_SPECIAL_EOPM + 1) - - -// Optimal - Number of entries in the optimum array. -#define OPTS (1 << 12) - - -// Miscellaneous -#define INFINITY_PRICE 0x0FFFFFFF - - -//////////// -// Macros // -//////////// - -#define get_len_to_pos_state(len) \ - ((len) < LEN_TO_POS_STATES + MATCH_MIN_LEN \ - ? (len) - MATCH_MIN_LEN \ - : LEN_TO_POS_STATES - 1) +/// Validates literal_context_bits, literal_pos_bits, and pos_bits. +static inline bool +is_lclppb_valid(const lzma_options_lzma *options) +{ + return options->literal_context_bits <= LZMA_LITERAL_CONTEXT_BITS_MAX + && options->literal_pos_bits + <= LZMA_LITERAL_POS_BITS_MAX + && options->literal_context_bits + + options->literal_pos_bits + <= LZMA_LITERAL_BITS_MAX + && options->pos_bits <= LZMA_POS_BITS_MAX; +} /////////// @@ -161,4 +111,126 @@ typedef enum { #define is_literal_state(state) \ ((state) < LIT_STATES) + +///////////// +// Literal // +///////////// + +/// Each literal coder is divided in three sections: +/// - 0x001-0x0FF: Without match byte +/// - 0x101-0x1FF: With match byte; match bit is 0 +/// - 0x201-0x2FF: With match byte; match bit is 1 +/// +/// Match byte is used when the previous LZMA symbol was something else than +/// a literal (that is, it was some kind of match). +#define LITERAL_CODER_SIZE 0x300 + +/// Maximum number of literal coders +#define LITERAL_CODERS_MAX (1 << LZMA_LITERAL_BITS_MAX) + +/// Locate the literal coder for the next literal byte. The choice depends on +/// - the lowest literal_pos_bits bits of the position of the current +/// byte; and +/// - the highest literal_context_bits bits of the previous byte. +#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \ + ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))]) + + +static inline void +literal_init(probability (*probs)[LITERAL_CODER_SIZE], + uint32_t literal_context_bits, uint32_t literal_pos_bits) +{ + assert(literal_context_bits + literal_pos_bits + <= LZMA_LITERAL_BITS_MAX); + + const uint32_t coders + = 1U << (literal_context_bits + literal_pos_bits); + + for (uint32_t i = 0; i < coders; ++i) + for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j) + bit_reset(probs[i][j]); + + return; +} + + +////////////////// +// Match length // +////////////////// + +// Minimum length of a match is two bytes. +#define MATCH_LEN_MIN 2 + +// Match length is encoded with 4, 5, or 10 bits. +// +// Length Bits +// 2-9 4 = Choice=0 + 3 bits +// 10-17 5 = Choice=1 + Choice2=0 + 3 bits +// 18-273 10 = Choice=1 + Choice2=1 + 8 bits +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +// Maximum length of a match is 273 which is a result of the encoding +// described above. +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + + +//////////////////// +// Match distance // +//////////////////// + +// Different set of probabilities is used for match distances that have very +// short match length: Lengths of 2, 3, and 4 bytes have a separate set of +// probabilities for each length. The matches with longer length use a shared +// set of probabilities. +#define LEN_TO_POS_STATES 4 + +// Macro to get the index of the appropriate probability array. +#define get_len_to_pos_state(len) \ + ((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \ + ? (len) - MATCH_LEN_MIN \ + : LEN_TO_POS_STATES - 1) + +// The highest two bits of a match distance (pos slot) are encoded using six +// bits. See fastpos.h for more explanation. +#define POS_SLOT_BITS 6 +#define POS_SLOTS (1 << POS_SLOT_BITS) + +// Match distances up to 127 are fully encoded using probabilities. Since +// the highest two bits (pos slot) are always encoded using six bits, the +// distances 0-3 don't need any additional bits to encode, since the pos +// slot itself is the same as the actual distance. START_POS_MODEL_INDEX +// indicates the first pos slot where at least one additional bit is needed. +#define START_POS_MODEL_INDEX 4 + +// Match distances greater than 127 are encoded in three pieces: +// - pos slot: the highest two bits +// - direct bits: 2-26 bits below the highest two bits +// - alignment bits: four lowest bits +// +// Direct bits don't use any probabilities. +// +// The pos slot value of 14 is for distances 128-191 (see the table in +// fastpos.h to understand why). +#define END_POS_MODEL_INDEX 14 + +// Seven-bit distances use the full FIXME +#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +// For match distances greater than 127, only the highest two bits and the +// lowest four bits (alignment) is encoded using probabilities. +#define ALIGN_BITS 4 +#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1) + +// LZMA remembers the four most recent match distances. Reusing these distances +// tends to take less space than re-encoding the actual distance value. +#define REP_DISTANCES 4 + #endif diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index 68941021..e9d047d3 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -4,7 +4,7 @@ /// \brief LZMA decoder // // Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin +// Copyright (C) 2007-2008 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public @@ -18,74 +18,147 @@ // /////////////////////////////////////////////////////////////////////////////// -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - +#include "lz_decoder.h" #include "lzma_common.h" #include "lzma_decoder.h" -#include "lz_decoder.h" #include "range_decoder.h" -/// REQUIRED_IN_BUFFER_SIZE is the number of required input bytes -/// for the worst case: longest match with longest distance. -/// LZMA_IN_BUFFER_SIZE must be larger than REQUIRED_IN_BUFFER_SIZE. -/// 23 bits = 2 (match select) + 10 (len) + 6 (distance) + 4 (align) -/// + 1 (rc_normalize) -/// -/// \todo Is this correct for sure? -/// -#define REQUIRED_IN_BUFFER_SIZE \ - ((23 * (BIT_MODEL_TOTAL_BITS - MOVE_BITS + 1) + 26 + 9) / 8) +#ifdef HAVE_SMALL +// Macros for (somewhat) size-optimized code. +#define seq_4(seq) seq -// Length decoders are easiest to have as macros, because they use range -// decoder macros, which use local variables rc_range and rc_code. +#define seq_6(seq) seq -#define length_decode(target, len_decoder, pos_state) \ +#define seq_8(seq) seq + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _CHOICE2, \ + seq ## _BITTREE + +#define len_decode(target, ld, pos_state, seq) \ do { \ - if_bit_0(len_decoder.choice) { \ - update_bit_0(len_decoder.choice); \ - target = MATCH_MIN_LEN; \ - bittree_decode(target, len_decoder.low[pos_state], LEN_LOW_BITS); \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + probs = ld.low[pos_state];\ + limit = LEN_LOW_SYMBOLS; \ + target = MATCH_LEN_MIN; \ } else { \ - update_bit_1(len_decoder.choice); \ - if_bit_0(len_decoder.choice2) { \ - update_bit_0(len_decoder.choice2); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ - bittree_decode(target, len_decoder.mid[pos_state], LEN_MID_BITS); \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + probs = ld.mid[pos_state]; \ + limit = LEN_MID_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ - update_bit_1(len_decoder.choice2); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ - bittree_decode(target, len_decoder.high, LEN_HIGH_BITS); \ + rc_update_1(ld.choice2); \ + probs = ld.high; \ + limit = LEN_HIGH_SYMBOLS; \ + target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \ + + LEN_MID_SYMBOLS; \ } \ } \ + symbol = 1; \ +case seq ## _BITTREE: \ + do { \ + rc_bit(probs[symbol], , , seq ## _BITTREE); \ + } while (symbol < limit); \ + target += symbol - limit; \ } while (0) - -#define length_decode_dummy(target, len_decoder, pos_state) \ +#else // HAVE_SMALL + +// Unrolled versions +#define seq_4(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3 + +#define seq_6(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5 + +#define seq_8(seq) \ + seq ## 0, \ + seq ## 1, \ + seq ## 2, \ + seq ## 3, \ + seq ## 4, \ + seq ## 5, \ + seq ## 6, \ + seq ## 7 + +#define seq_len(seq) \ + seq ## _CHOICE, \ + seq ## _LOW0, \ + seq ## _LOW1, \ + seq ## _LOW2, \ + seq ## _CHOICE2, \ + seq ## _MID0, \ + seq ## _MID1, \ + seq ## _MID2, \ + seq ## _HIGH0, \ + seq ## _HIGH1, \ + seq ## _HIGH2, \ + seq ## _HIGH3, \ + seq ## _HIGH4, \ + seq ## _HIGH5, \ + seq ## _HIGH6, \ + seq ## _HIGH7 + +#define len_decode(target, ld, pos_state, seq) \ do { \ - if_bit_0(len_decoder.choice) { \ - update_bit_0_dummy(); \ - target = MATCH_MIN_LEN; \ - bittree_decode_dummy(target, \ - len_decoder.low[pos_state], LEN_LOW_BITS); \ + symbol = 1; \ +case seq ## _CHOICE: \ + rc_if_0(ld.choice, seq ## _CHOICE) { \ + rc_update_0(ld.choice); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \ + rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \ + target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \ } else { \ - update_bit_1_dummy(); \ - if_bit_0(len_decoder.choice2) { \ - update_bit_0_dummy(); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS; \ - bittree_decode_dummy(target, len_decoder.mid[pos_state], \ - LEN_MID_BITS); \ + rc_update_1(ld.choice); \ +case seq ## _CHOICE2: \ + rc_if_0(ld.choice2, seq ## _CHOICE2) { \ + rc_update_0(ld.choice2); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID0); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID1); \ + rc_bit_case(ld.mid[pos_state][symbol], , , \ + seq ## _MID2); \ + target = symbol - LEN_MID_SYMBOLS \ + + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \ } else { \ - update_bit_1_dummy(); \ - target = MATCH_MIN_LEN + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ - bittree_decode_dummy(target, len_decoder.high, LEN_HIGH_BITS); \ + rc_update_1(ld.choice2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \ + rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \ + target = symbol - LEN_HIGH_SYMBOLS \ + + MATCH_LEN_MIN \ + + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \ } \ } \ } while (0) +#endif // HAVE_SMALL + +/// Length decoder probabilities; see comments in lzma_common.h. typedef struct { probability choice; probability choice2; @@ -96,26 +169,12 @@ typedef struct { struct lzma_coder_s { - /// Data of the next coder, if any. - lzma_next_coder next; - - /// Sliding output window a.k.a. dictionary a.k.a. history buffer. - lzma_lz_decoder lz; - - // Range coder - lzma_range_decoder rc; - - // State - lzma_lzma_state state; - uint32_t rep0; ///< Distance of the latest match - uint32_t rep1; ///< Distance of second latest match - uint32_t rep2; ///< Distance of third latest match - uint32_t rep3; ///< Distance of fourth latest match - uint32_t pos_bits; - uint32_t pos_mask; - uint32_t now_pos; // Lowest 32-bits are enough here. + /////////////////// + // Probabilities // + /////////////////// - lzma_literal_coder literal_coder; + /// Literals; see comments in lzma_common.h. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; /// If 1, it's a match. Otherwise it's a single 8-bit literal. probability is_match[STATES][POS_STATES_MAX]; @@ -138,178 +197,107 @@ struct lzma_coder_s { /// the length is decoded from rep_len_decoder. probability is_rep0_long[STATES][POS_STATES_MAX]; - probability pos_slot_decoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; - probability pos_decoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; - probability pos_align_decoder[1 << ALIGN_BITS]; - - /// Length of a match - lzma_length_decoder match_len_decoder; - - /// Length of a repeated match. - lzma_length_decoder rep_len_decoder; - - /// True when we have produced at least one byte of output since the - /// beginning of the stream or the latest flush marker. - bool has_produced_output; -}; - - -/// \brief Check if the next iteration of the decoder loop can be run. -/// -/// \note There must always be REQUIRED_IN_BUFFER_SIZE bytes -/// readable space! -/// -static bool lzma_attribute((pure)) -decode_dummy(const lzma_coder *restrict coder, - const uint8_t *restrict in, size_t in_pos_local, - const size_t in_size, lzma_range_decoder rc, - uint32_t state, uint32_t rep0, const uint32_t now_pos) -{ - uint32_t rc_bound; - - do { - const uint32_t pos_state = now_pos & coder->pos_mask; - - if_bit_0(coder->is_match[state][pos_state]) { - // It's a literal i.e. a single 8-bit byte. - - update_bit_0_dummy(); - - const probability *subcoder = literal_get_subcoder( - coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0)); - uint32_t symbol = 1; - - if (is_literal_state(state)) { - // Decode literal without match byte. - do { - if_bit_0(subcoder[symbol]) { - update_bit_0_dummy(); - symbol <<= 1; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - } - } while (symbol < 0x100); - - } else { - // Decode literal with match byte. - uint32_t match_byte = lz_get_byte(coder->lz, rep0); - uint32_t subcoder_offset = 0x100; - - do { - match_byte <<= 1; - const uint32_t match_bit = match_byte & subcoder_offset; - const uint32_t subcoder_index - = subcoder_offset + match_bit + symbol; - - if_bit_0(subcoder[subcoder_index]) { - update_bit_0_dummy(); - symbol <<= 1; - subcoder_offset &= ~match_bit; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - subcoder_offset &= match_bit; - } - } while (symbol < 0x100); - } - - break; - } - - update_bit_1_dummy(); - uint32_t len; - - if_bit_0(coder->is_rep[state]) { - update_bit_0_dummy(); - length_decode_dummy(len, coder->match_len_decoder, pos_state); - - const uint32_t len_to_pos_state = get_len_to_pos_state(len); - uint32_t pos_slot = 0; - bittree_decode_dummy(pos_slot, - coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS); - assert(pos_slot <= 63); - - if (pos_slot >= START_POS_MODEL_INDEX) { - uint32_t direct_bits = (pos_slot >> 1) - 1; - assert(direct_bits >= 1 && direct_bits <= 31); - rep0 = 2 | (pos_slot & 1); - - if (pos_slot < END_POS_MODEL_INDEX) { - assert(direct_bits <= 5); - rep0 <<= direct_bits; - assert(rep0 <= 96); - // -1 is fine, because bittree_reverse_decode() - // starts from table index [1] (not [0]). - assert((int32_t)(rep0 - pos_slot - 1) >= -1); - assert((int32_t)(rep0 - pos_slot - 1) <= 82); - // We add the result to rep0, so rep0 - // must not be part of second argument - // of the macro. - const int32_t offset = rep0 - pos_slot - 1; - bittree_reverse_decode_dummy(coder->pos_decoders + offset, - direct_bits); - } else { - assert(pos_slot >= 14); - assert(direct_bits >= 6); - direct_bits -= ALIGN_BITS; - assert(direct_bits >= 2); - rc_decode_direct_dummy(direct_bits); - - bittree_reverse_decode_dummy(coder->pos_align_decoder, - ALIGN_BITS); - } - } + /// Probability tree for the highest two bits of the match distance. + /// There is a separate probability tree for match lengths of + /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; - } else { - update_bit_1_dummy(); + /// Probility trees for additional bits for match distance when the + /// distance is in the range [4, 127]. + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; - if_bit_0(coder->is_rep0[state]) { - update_bit_0_dummy(); + /// Probability tree for the lowest four bits of a match distance + /// that is equal to or greater than 128. + probability pos_align[ALIGN_TABLE_SIZE]; - if_bit_0(coder->is_rep0_long[state][pos_state]) { - update_bit_0_dummy(); - break; - } else { - update_bit_1_dummy(); - } + /// Length of a normal match + lzma_length_decoder match_len_decoder; - } else { - update_bit_1_dummy(); + /// Length of a repeated match + lzma_length_decoder rep_len_decoder; - if_bit_0(coder->is_rep1[state]) { - update_bit_0_dummy(); - } else { - update_bit_1_dummy(); + /////////////////// + // Decoder state // + /////////////////// - if_bit_0(coder->is_rep2[state]) { - update_bit_0_dummy(); - } else { - update_bit_1_dummy(); - } - } - } + // Range coder + lzma_range_decoder rc; - length_decode_dummy(len, coder->rep_len_decoder, pos_state); - } - } while (0); + // Types of the most recently seen LZMA symbols + lzma_lzma_state state; - rc_normalize(); + uint32_t rep0; ///< Distance of the latest match + uint32_t rep1; ///< Distance of second latest match + uint32_t rep2; ///< Distance of third latest match + uint32_t rep3; ///< Distance of fourth latest match - return in_pos_local <= in_size; -} + uint32_t pos_mask; // (1U << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; + + /// Uncompressed size as bytes, or LZMA_VLI_VALUE_UNKNOWN if end of + /// payload marker is expected. + lzma_vli uncompressed_size; + + //////////////////////////////// + // State of incomplete symbol // + //////////////////////////////// + + /// Position where to continue the decoder loop + enum { + SEQ_NORMALIZE, + SEQ_IS_MATCH, + seq_8(SEQ_LITERAL), + seq_8(SEQ_LITERAL_MATCHED), + SEQ_LITERAL_WRITE, + SEQ_IS_REP, + seq_len(SEQ_MATCH_LEN), + seq_6(SEQ_POS_SLOT), + SEQ_POS_MODEL, + SEQ_DIRECT, + seq_4(SEQ_ALIGN), + SEQ_EOPM, + SEQ_IS_REP0, + SEQ_SHORTREP, + SEQ_IS_REP0_LONG, + SEQ_IS_REP1, + SEQ_IS_REP2, + seq_len(SEQ_REP_LEN), + SEQ_COPY, + } sequence; + + /// Base of the current probability tree + probability *probs; + + /// Symbol being decoded. This is also used as an index variable in + /// bittree decoders: probs[symbol] + uint32_t symbol; + + /// Used as a loop termination condition on bittree decoders and + /// direct bits decoder. + uint32_t limit; + + /// Matched literal decoder: 0x100 or 0 to help avoiding branches. + /// Bittree reverse decoders: Offset of the next bit: 1 << offset + uint32_t offset; + + /// If decoding a literal: match byte. + /// If decoding a match: length of the match. + uint32_t len; +}; -static bool -decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, - size_t *restrict in_pos, size_t in_size, bool has_safe_buffer) +static lzma_ret +lzma_decode(lzma_coder *restrict coder, lzma_dict *restrict dictptr, + const uint8_t *restrict in, + size_t *restrict in_pos, size_t in_size) { //////////////////// // Initialization // //////////////////// if (!rc_read_init(&coder->rc, in, in_pos, in_size)) - return false; + return LZMA_OK; /////////////// // Variables // @@ -318,8 +306,12 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // Making local copies of often-used variables improves both // speed and readability. + lzma_dict dict = *dictptr; + + const size_t dict_start = dict.pos; + // Range decoder - rc_to_local(coder->rc); + rc_to_local(coder->rc, *in_pos); // State uint32_t state = coder->state; @@ -328,87 +320,168 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, uint32_t rep2 = coder->rep2; uint32_t rep3 = coder->rep3; - // Misc - uint32_t now_pos = coder->now_pos; - bool has_produced_output = coder->has_produced_output; - - // Variables derived from decoder settings const uint32_t pos_mask = coder->pos_mask; - size_t in_pos_local = *in_pos; // Local copy - size_t in_limit; - if (in_size <= REQUIRED_IN_BUFFER_SIZE) - in_limit = 0; - else - in_limit = in_size - REQUIRED_IN_BUFFER_SIZE; - - - while (coder->lz.pos < coder->lz.limit - && (in_pos_local < in_limit || (has_safe_buffer - && decode_dummy(coder, in, in_pos_local, in_size, - rc, state, rep0, now_pos)))) { - - ///////////////////// - // Actual decoding // - ///////////////////// - - const uint32_t pos_state = now_pos & pos_mask; + // These variables are actually needed only if we last time ran + // out of input in the middle of the decoder loop. + probability *probs = coder->probs; + uint32_t symbol = coder->symbol; + uint32_t limit = coder->limit; + uint32_t offset = coder->offset; + uint32_t len = coder->len; + + const uint32_t literal_pos_mask = coder->literal_pos_mask; + const uint32_t literal_context_bits = coder->literal_context_bits; + + // Temporary variables + uint32_t pos_state = dict.pos & pos_mask; + + lzma_ret ret = LZMA_OK; + + // If uncompressed size is known, there must be no end of payload + // marker. + const bool no_eopm = coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN; + if (no_eopm && coder->uncompressed_size < dict.limit - dict.pos) + dict.limit = dict.pos + (size_t)(coder->uncompressed_size); + + // The main decoder loop. The "switch" is used to restart the decoder at + // correct location. Once restarted, the "switch" is no longer used. + switch (coder->sequence) + while (true) { + // Calculate new pos_state. This is skipped on the first loop + // since we already calculated it when setting up the local + // variables. + pos_state = dict.pos & pos_mask; + + case SEQ_NORMALIZE: + case SEQ_IS_MATCH: + if (unlikely(no_eopm && dict.pos == dict.limit)) + break; - if_bit_0(coder->is_match[state][pos_state]) { - update_bit_0(coder->is_match[state][pos_state]); + rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) { + rc_update_0(coder->is_match[state][pos_state]); // It's a literal i.e. a single 8-bit byte. - probability *subcoder = literal_get_subcoder(coder->literal_coder, - now_pos, lz_get_byte(coder->lz, 0)); - uint32_t symbol = 1; + probs = literal_subcoder(coder->literal, + literal_context_bits, literal_pos_mask, + dict.pos, dict_get(&dict, 0)); + symbol = 1; if (is_literal_state(state)) { // Decode literal without match byte. +#ifdef HAVE_SMALL + case SEQ_LITERAL: do { - if_bit_0(subcoder[symbol]) { - update_bit_0(subcoder[symbol]); - symbol <<= 1; - } else { - update_bit_1(subcoder[symbol]); - symbol = (symbol << 1) | 1; - } - } while (symbol < 0x100); - + rc_bit(probs[symbol], , , SEQ_LITERAL); + } while (symbol < (1 << 8)); +#else + rc_bit_case(probs[symbol], , , SEQ_LITERAL0); + rc_bit_case(probs[symbol], , , SEQ_LITERAL1); + rc_bit_case(probs[symbol], , , SEQ_LITERAL2); + rc_bit_case(probs[symbol], , , SEQ_LITERAL3); + rc_bit_case(probs[symbol], , , SEQ_LITERAL4); + rc_bit_case(probs[symbol], , , SEQ_LITERAL5); + rc_bit_case(probs[symbol], , , SEQ_LITERAL6); + rc_bit_case(probs[symbol], , , SEQ_LITERAL7); +#endif } else { // Decode literal with match byte. // - // The usage of subcoder_offset allows omitting some - // branches, which should give tiny speed improvement on - // some CPUs. subcoder_offset gets set to zero if match_bit - // didn't match. - uint32_t match_byte = lz_get_byte(coder->lz, rep0); - uint32_t subcoder_offset = 0x100; - + // We store the byte we compare against + // ("match byte") to "len" to minimize the + // number of variables we need to store + // between decoder calls. + len = dict_get(&dict, rep0) << 1; + + // The usage of "offset" allows omitting some + // branches, which should give tiny speed + // improvement on some CPUs. "offset" gets + // set to zero if match_bit didn't match. + offset = 0x100; + +#ifdef HAVE_SMALL + case SEQ_LITERAL_MATCHED: do { - match_byte <<= 1; - const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t match_bit + = len & offset; const uint32_t subcoder_index - = subcoder_offset + match_bit + symbol; + = offset + match_bit + + symbol; + + rc_bit(probs[subcoder_index], + offset &= ~match_bit, + offset &= match_bit, + SEQ_LITERAL_MATCHED); + + // It seems to be faster to do this + // here instead of putting it to the + // beginning of the loop and then + // putting the "case" in the middle + // of the loop. + len <<= 1; + + } while (symbol < (1 << 8)); +#else + // Unroll the loop. + uint32_t match_bit; + uint32_t subcoder_index; + +# define d(seq) \ + case seq: \ + match_bit = len & offset; \ + subcoder_index = offset + match_bit + symbol; \ + rc_bit(probs[subcoder_index], \ + offset &= ~match_bit, \ + offset &= match_bit, \ + seq) + + d(SEQ_LITERAL_MATCHED0); + len <<= 1; + d(SEQ_LITERAL_MATCHED1); + len <<= 1; + d(SEQ_LITERAL_MATCHED2); + len <<= 1; + d(SEQ_LITERAL_MATCHED3); + len <<= 1; + d(SEQ_LITERAL_MATCHED4); + len <<= 1; + d(SEQ_LITERAL_MATCHED5); + len <<= 1; + d(SEQ_LITERAL_MATCHED6); + len <<= 1; + d(SEQ_LITERAL_MATCHED7); +# undef d +#endif + } - if_bit_0(subcoder[subcoder_index]) { - update_bit_0(subcoder[subcoder_index]); - symbol <<= 1; - subcoder_offset &= ~match_bit; - } else { - update_bit_1(subcoder[subcoder_index]); - symbol = (symbol << 1) | 1; - subcoder_offset &= match_bit; - } - } while (symbol < 0x100); + //update_literal(state); + // Use a lookup table to update to literal state, + // since compared to other state updates, this would + // need two branches. + static const lzma_lzma_state next_state[] = { + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT + }; + state = next_state[state]; + + case SEQ_LITERAL_WRITE: + if (unlikely(dict_put(&dict, symbol))) { + coder->sequence = SEQ_LITERAL_WRITE; + goto out; } - // Put the decoded byte to the dictionary, update the - // decoder state, and start a new decoding loop. - coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol); - ++now_pos; - update_literal(state); - has_produced_output = true; continue; } @@ -416,115 +489,196 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // (distance and length) which will be repeated from our // output history. - update_bit_1(coder->is_match[state][pos_state]); - uint32_t len; - - if_bit_0(coder->is_rep[state]) { - update_bit_0(coder->is_rep[state]); + rc_update_1(coder->is_match[state][pos_state]); + case SEQ_IS_REP: + rc_if_0(coder->is_rep[state], SEQ_IS_REP) { // Not a repeated match - // - // We will decode a new distance and store - // the value to distance. - - // Decode the length of the match. - length_decode(len, coder->match_len_decoder, pos_state); - + rc_update_0(coder->is_rep[state]); update_match(state); - const uint32_t len_to_pos_state = get_len_to_pos_state(len); - uint32_t pos_slot = 0; - bittree_decode(pos_slot, - coder->pos_slot_decoder[len_to_pos_state], POS_SLOT_BITS); - assert(pos_slot <= 63); - - if (pos_slot >= START_POS_MODEL_INDEX) { - uint32_t direct_bits = (pos_slot >> 1) - 1; - assert(direct_bits >= 1 && direct_bits <= 30); - uint32_t distance = 2 | (pos_slot & 1); - - if (pos_slot < END_POS_MODEL_INDEX) { - assert(direct_bits <= 5); - distance <<= direct_bits; - assert(distance <= 96); - // -1 is fine, because - // bittree_reverse_decode() - // starts from table index [1] - // (not [0]). - assert((int32_t)(distance - pos_slot - 1) >= -1); - assert((int32_t)(distance - pos_slot - 1) <= 82); - // We add the result to distance, so distance - // must not be part of second argument - // of the macro. - const int32_t offset = distance - pos_slot - 1; - bittree_reverse_decode(distance, coder->pos_decoders + offset, - direct_bits); + // The latest three match distances are kept in + // memory in case there are repeated matches. + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + + // Decode the length of the match. + len_decode(len, coder->match_len_decoder, + pos_state, SEQ_MATCH_LEN); + + // Prepare to decode the highest two bits of the + // match distance. + probs = coder->pos_slot[get_len_to_pos_state(len)]; + symbol = 1; + +#ifdef HAVE_SMALL + case SEQ_POS_SLOT: + do { + rc_bit(probs[symbol], , , SEQ_POS_SLOT); + } while (symbol < POS_SLOTS); +#else + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT0); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT1); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT2); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT3); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT4); + rc_bit_case(probs[symbol], , , SEQ_POS_SLOT5); +#endif + // Get rid of the highest bit that was needed for + // indexing of the probability array. + symbol -= POS_SLOTS; + assert(symbol <= 63); + + if (symbol < START_POS_MODEL_INDEX) { + // Match distances [0, 3] have only two bits. + rep0 = symbol; + } else { + // Decode the lowest [1, 29] bits of + // the match distance. + limit = (symbol >> 1) - 1; + assert(limit >= 1 && limit <= 30); + rep0 = 2 + (symbol & 1); + + if (symbol < END_POS_MODEL_INDEX) { + // Prepare to decode the low bits for + // a distance of [4, 127]. + assert(limit <= 5); + rep0 <<= limit; + assert(rep0 <= 96); + // -1 is fine, because we start + // decoding at probs[1], not probs[0]. + // NOTE: This violates the C standard, + // since we are doing pointer + // arithmetic past the beginning of + // the array. + assert((int32_t)(rep0 - symbol - 1) + >= -1); + assert((int32_t)(rep0 - symbol - 1) + <= 82); + probs = coder->pos_special + rep0 + - symbol - 1; + symbol = 1; + offset = 0; + case SEQ_POS_MODEL: +#ifdef HAVE_SMALL + do { + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } while (++offset < limit); +#else + switch (limit) { + case 5: + assert(offset == 0); + rc_bit(probs[symbol], , + rep0 += 1, + SEQ_POS_MODEL); + ++offset; + --limit; + case 4: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 3: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 2: + rc_bit(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + ++offset; + --limit; + case 1: + // We need "symbol" only for + // indexing the probability + // array, thus we can use + // rc_bit_last() here to omit + // the unneeded updating of + // "symbol". + rc_bit_last(probs[symbol], , + rep0 += 1 << offset, + SEQ_POS_MODEL); + } +#endif } else { - assert(pos_slot >= 14); - assert(direct_bits >= 6); - direct_bits -= ALIGN_BITS; - assert(direct_bits >= 2); - rc_decode_direct(distance, direct_bits); - distance <<= ALIGN_BITS; - - bittree_reverse_decode(distance, coder->pos_align_decoder, - ALIGN_BITS); - - if (distance == UINT32_MAX) { - if (len == LEN_SPECIAL_EOPM) { - // End of Payload Marker found. - coder->lz.eopm_detected = true; - break; - - } else if (len == LEN_SPECIAL_FLUSH) { - // Flush marker detected. We must have produced - // at least one byte of output since the previous - // flush marker or the beginning of the stream. - // This is to prevent hanging the decoder with - // malicious input files. - if (!has_produced_output) - return true; - - has_produced_output = false; - - // We know that we have enough input to call - // this macro, because it is tested at the - // end of decode_dummy(). - rc_normalize(); - - rc_reset(rc); - - // If we don't have enough input here, we jump - // out of the loop. Note that while there is a - // useless call to rc_normalize(), it does nothing - // since we have just reset the range decoder. - if (!rc_read_init(&rc, in, &in_pos_local, in_size)) - break; - - continue; - - } else { - return true; + // The distace is >= 128. Decode the + // lower bits without probabilities + // except the lowest four bits. + assert(symbol >= 14); + assert(limit >= 6); + limit -= ALIGN_BITS; + assert(limit >= 2); + case SEQ_DIRECT: + // Not worth manual unrolling + do { + rc_direct(rep0, SEQ_DIRECT); + } while (--limit > 0); + + // Decode the lowest four bits using + // probabilities. + rep0 <<= ALIGN_BITS; + symbol = 1; +#ifdef HAVE_SMALL + offset = 0; + case SEQ_ALIGN: + do { + rc_bit(coder->pos_align[ + symbol], , + rep0 += 1 << offset, + SEQ_ALIGN); + } while (++offset < ALIGN_BITS); +#else + case SEQ_ALIGN0: + rc_bit(coder->pos_align[symbol], , + rep0 += 1, SEQ_ALIGN0); + case SEQ_ALIGN1: + rc_bit(coder->pos_align[symbol], , + rep0 += 2, SEQ_ALIGN1); + case SEQ_ALIGN2: + rc_bit(coder->pos_align[symbol], , + rep0 += 4, SEQ_ALIGN2); + case SEQ_ALIGN3: + // Like in SEQ_POS_MODEL, we don't + // need "symbol" for anything else + // than indexing the probability array. + rc_bit_last(coder->pos_align[symbol], , + rep0 += 8, SEQ_ALIGN3); +#endif + + if (rep0 == UINT32_MAX) { + // End of payload marker was + // found. It must not be + // present if uncompressed + // size is known. + if (coder->uncompressed_size + != LZMA_VLI_VALUE_UNKNOWN) { + ret = LZMA_DATA_ERROR; + goto out; } + + case SEQ_EOPM: + // TODO Comment + rc_normalize(SEQ_EOPM); + ret = LZMA_STREAM_END; + goto out; } } + } - // The latest three match distances are kept in - // memory in case there are repeated matches. - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance; - - } else { - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = pos_slot; + // Validate the distance we just decoded. + if (unlikely(!dict_is_distance_valid(&dict, rep0))) { + ret = LZMA_DATA_ERROR; + goto out; } } else { - update_bit_1(coder->is_rep[state]); + rc_update_1(coder->is_rep[state]); // Repeated match // @@ -532,242 +686,318 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, // earlier. The latest four match distances are // available as rep0, rep1, rep2 and rep3. We will // now decode which of them is the new distance. + // + // There cannot be a match if we haven't produced + // any output, so check that first. + if (unlikely(!dict_is_distance_valid(&dict, 0))) { + ret = LZMA_DATA_ERROR; + goto out; + } - if_bit_0(coder->is_rep0[state]) { - update_bit_0(coder->is_rep0[state]); - + case SEQ_IS_REP0: + rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) { + rc_update_0(coder->is_rep0[state]); // The distance is rep0. - if_bit_0(coder->is_rep0_long[state][pos_state]) { - update_bit_0(coder->is_rep0_long[state][pos_state]); + case SEQ_IS_REP0_LONG: + rc_if_0(coder->is_rep0_long[state][pos_state], + SEQ_IS_REP0_LONG) { + rc_update_0(coder->is_rep0_long[ + state][pos_state]); update_short_rep(state); - // Repeat exactly one byte and start a new decoding loop. - // Note that rep0 is known to have a safe value, thus we - // don't need to check if we are wrapping the dictionary - // when it isn't full yet. - if (unlikely(lz_is_empty(coder->lz))) - return true; - - coder->lz.dict[coder->lz.pos] - = lz_get_byte(coder->lz, rep0); - ++coder->lz.pos; - ++now_pos; - has_produced_output = true; - continue; - - } else { - update_bit_1(coder->is_rep0_long[state][pos_state]); + case SEQ_SHORTREP: + if (unlikely(dict_put(&dict, dict_get( + &dict, rep0)))) { + coder->sequence = SEQ_SHORTREP; + goto out; + } - // Repeating more than one byte at - // distance of rep0. + continue; } + // Repeating more than one byte at + // distance of rep0. + rc_update_1(coder->is_rep0_long[ + state][pos_state]); + } else { - update_bit_1(coder->is_rep0[state]); + rc_update_1(coder->is_rep0[state]); + case SEQ_IS_REP1: // The distance is rep1, rep2 or rep3. Once // we find out which one of these three, it // is stored to rep0 and rep1, rep2 and rep3 // are updated accordingly. + rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) { + rc_update_0(coder->is_rep1[state]); - uint32_t distance; + const uint32_t distance = rep1; + rep1 = rep0; + rep0 = distance; - if_bit_0(coder->is_rep1[state]) { - update_bit_0(coder->is_rep1[state]); - distance = rep1; } else { - update_bit_1(coder->is_rep1[state]); + rc_update_1(coder->is_rep1[state]); + case SEQ_IS_REP2: + rc_if_0(coder->is_rep2[state], + SEQ_IS_REP2) { + rc_update_0(coder->is_rep2[ + state]); + + const uint32_t distance = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; - if_bit_0(coder->is_rep2[state]) { - update_bit_0(coder->is_rep2[state]); - distance = rep2; } else { - update_bit_1(coder->is_rep2[state]); - distance = rep3; + rc_update_1(coder->is_rep2[ + state]); + + const uint32_t distance = rep3; rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance; } - - rep2 = rep1; } - - rep1 = rep0; - rep0 = distance; } update_long_rep(state); // Decode the length of the repeated match. - length_decode(len, coder->rep_len_decoder, pos_state); + len_decode(len, coder->rep_len_decoder, + pos_state, SEQ_REP_LEN); } - ///////////////////////////////// // Repeat from history buffer. // ///////////////////////////////// // The length is always between these limits. There is no way // to trigger the algorithm to set len outside this range. - assert(len >= MATCH_MIN_LEN); - assert(len <= MATCH_MAX_LEN); - - now_pos += len; - has_produced_output = true; + assert(len >= MATCH_LEN_MIN); + assert(len <= MATCH_LEN_MAX); + case SEQ_COPY: // Repeat len bytes from distance of rep0. - if (!lzma_lz_out_repeat(&coder->lz, rep0, len)) - return true; + if (unlikely(dict_repeat(&dict, rep0, &len))) { + coder->sequence = SEQ_COPY; + goto out; + } } - rc_normalize(); + rc_normalize(SEQ_NORMALIZE); + coder->sequence = SEQ_IS_MATCH; +out: + // Save state - ///////////////////////////////// - // Update the *data structure. // - ///////////////////////////////// + // NOTE: Must not copy dict.limit. + dictptr->pos = dict.pos; + dictptr->full = dict.full; - // Range decoder - rc_from_local(coder->rc); + rc_from_local(coder->rc, *in_pos); - // State coder->state = state; coder->rep0 = rep0; coder->rep1 = rep1; coder->rep2 = rep2; coder->rep3 = rep3; - // Misc - coder->now_pos = now_pos; - coder->has_produced_output = has_produced_output; - *in_pos = in_pos_local; + coder->probs = probs; + coder->symbol = symbol; + coder->limit = limit; + coder->offset = offset; + coder->len = len; + + // Update the remaining amount of uncompressed data if uncompressed + // size was known. + if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) { + coder->uncompressed_size -= dict.pos - dict_start; + + // Since there cannot be end of payload marker if the + // uncompressed size was known, we check here if we + // finished decoding. + if (coder->uncompressed_size == 0 && ret == LZMA_OK + && coder->sequence != SEQ_NORMALIZE) + ret = coder->sequence == SEQ_IS_MATCH + ? LZMA_STREAM_END : LZMA_DATA_ERROR; + } + + // We can do an additional check in the range decoder to catch some + // corrupted files. + if (ret == LZMA_STREAM_END) { + if (!rc_is_finished(coder->rc)) + ret = LZMA_DATA_ERROR; - return false; + // Reset the range decoder so that it is ready to reinitialize + // for a new LZMA2 chunk. + rc_reset(coder->rc); + } + + return ret; } + static void -lzma_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +lzma_decoder_uncompressed(lzma_coder *coder, lzma_vli uncompressed_size) { - lzma_next_coder_end(&coder->next, allocator); - lzma_lz_decoder_end(&coder->lz, allocator); - lzma_free(coder, allocator); - return; + coder->uncompressed_size = uncompressed_size; } - -extern lzma_ret -lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) +/* +extern void +lzma_lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size) { - // LZMA can only be the last filter in the chain. - assert(filters[1].init == NULL); - - // Validate pos_bits. Other options are validated by the - // respective initialization functions. - const lzma_options_lzma *options = filters[0].options; - if (options->pos_bits > LZMA_POS_BITS_MAX) - return LZMA_HEADER_ERROR; + // This is hack. + (*(lzma_coder **)(coder))->uncompressed_size = uncompressed_size; +} +*/ - // Allocate memory for the decoder if needed. - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; +static void +lzma_decoder_reset(lzma_coder *coder, const void *opt) +{ + const lzma_options_lzma *options = opt; - next->code = &lzma_lz_decode; - next->end = &lzma_decoder_end; - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->lz = LZMA_LZ_DECODER_INIT; - } + // NOTE: We assume that lc/lp/pb are valid since they were + // successfully decoded with lzma_lzma_decode_properties(). + // FIXME? - // Store the pos_bits and calculate pos_mask. - next->coder->pos_bits = options->pos_bits; - next->coder->pos_mask = (1U << next->coder->pos_bits) - 1; + // Calculate pos_mask. We don't need pos_bits as is for anything. + coder->pos_mask = (1U << options->pos_bits) - 1; // Initialize the literal decoder. - return_if_error(lzma_literal_init(&next->coder->literal_coder, - options->literal_context_bits, - options->literal_pos_bits)); + literal_init(coder->literal, options->literal_context_bits, + options->literal_pos_bits); - // Allocate and initialize the LZ decoder. - return_if_error(lzma_lz_decoder_reset(&next->coder->lz, allocator, - &decode_real, options->dictionary_size, - MATCH_MAX_LEN)); + coder->literal_context_bits = options->literal_context_bits; + coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1; // State - next->coder->state = 0; - next->coder->rep0 = 0; - next->coder->rep1 = 0; - next->coder->rep2 = 0; - next->coder->rep3 = 0; - next->coder->pos_bits = options->pos_bits; - next->coder->pos_mask = (1 << next->coder->pos_bits) - 1; - next->coder->now_pos = 0; + coder->state = STATE_LIT_LIT; + coder->rep0 = 0; + coder->rep1 = 0; + coder->rep2 = 0; + coder->rep3 = 0; + coder->pos_mask = (1 << options->pos_bits) - 1; // Range decoder - rc_reset(next->coder->rc); + rc_reset(coder->rc); // Bit and bittree decoders for (uint32_t i = 0; i < STATES; ++i) { - for (uint32_t j = 0; j <= next->coder->pos_mask; ++j) { - bit_reset(next->coder->is_match[i][j]); - bit_reset(next->coder->is_rep0_long[i][j]); + for (uint32_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); } - bit_reset(next->coder->is_rep[i]); - bit_reset(next->coder->is_rep0[i]); - bit_reset(next->coder->is_rep1[i]); - bit_reset(next->coder->is_rep2[i]); + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); } for (uint32_t i = 0; i < LEN_TO_POS_STATES; ++i) - bittree_reset(next->coder->pos_slot_decoder[i], POS_SLOT_BITS); + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); for (uint32_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) - bit_reset(next->coder->pos_decoders[i]); + bit_reset(coder->pos_special[i]); - bittree_reset(next->coder->pos_align_decoder, ALIGN_BITS); + bittree_reset(coder->pos_align, ALIGN_BITS); // Len decoders (also bit/bittree) - const uint32_t num_pos_states = 1 << next->coder->pos_bits; - bit_reset(next->coder->match_len_decoder.choice); - bit_reset(next->coder->match_len_decoder.choice2); - bit_reset(next->coder->rep_len_decoder.choice); - bit_reset(next->coder->rep_len_decoder.choice2); + const uint32_t num_pos_states = 1 << options->pos_bits; + bit_reset(coder->match_len_decoder.choice); + bit_reset(coder->match_len_decoder.choice2); + bit_reset(coder->rep_len_decoder.choice); + bit_reset(coder->rep_len_decoder.choice2); for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { - bittree_reset(next->coder->match_len_decoder.low[pos_state], + bittree_reset(coder->match_len_decoder.low[pos_state], LEN_LOW_BITS); - bittree_reset(next->coder->match_len_decoder.mid[pos_state], + bittree_reset(coder->match_len_decoder.mid[pos_state], LEN_MID_BITS); - bittree_reset(next->coder->rep_len_decoder.low[pos_state], + bittree_reset(coder->rep_len_decoder.low[pos_state], LEN_LOW_BITS); - bittree_reset(next->coder->rep_len_decoder.mid[pos_state], + bittree_reset(coder->rep_len_decoder.mid[pos_state], LEN_MID_BITS); } - bittree_reset(next->coder->match_len_decoder.high, LEN_HIGH_BITS); - bittree_reset(next->coder->rep_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS); + bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS); + + coder->sequence = SEQ_IS_MATCH; + coder->probs = NULL; + coder->symbol = 0; + coder->limit = 0; + coder->offset = 0; + coder->len = 0; + + return; +} + + +extern lzma_ret +lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, size_t *dict_size) +{ + if (lz->coder == NULL) { + lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (lz->coder == NULL) + return LZMA_MEM_ERROR; + + lz->code = &lzma_decode; + lz->reset = &lzma_decoder_reset; + lz->set_uncompressed = &lzma_decoder_uncompressed; + } - next->coder->has_produced_output = false; + // All dictionary sizes are OK here. LZ decoder will take care of + // the special cases. + const lzma_options_lzma *options = opt; + *dict_size = options->dictionary_size; return LZMA_OK; } -extern void -lzma_lzma_decoder_uncompressed_size( - lzma_next_coder *next, lzma_vli uncompressed_size) +/// Allocate and initialize LZMA decoder. This is used only via LZ +/// initialization (lzma_lzma_decoder_init() passes function pointer to +/// the LZ initialization). +static lzma_ret +lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *options, size_t *dict_size) { - next->coder->lz.uncompressed_size = uncompressed_size; - return; + if (!is_lclppb_valid(options)) + return LZMA_PROG_ERROR; + + return_if_error(lzma_lzma_decoder_create( + lz, allocator, options, dict_size)); + + lzma_decoder_reset(lz->coder, options); + lzma_decoder_uncompressed(lz->coder, LZMA_VLI_VALUE_UNKNOWN); + + return LZMA_OK; +} + + +extern lzma_ret +lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // LZMA can only be the last filter in the chain. This is enforced + // by the raw_decoder initialization. + assert(filters[1].init == NULL); + + return lzma_lz_decoder_init(next, allocator, filters, + &lzma_decoder_init); } extern bool -lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) +lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) { if (byte > (4 * 5 + 4) * 9 + 8) return true; @@ -781,3 +1011,49 @@ lzma_lzma_decode_properties(lzma_options_lzma *options, uint8_t byte) return options->literal_context_bits + options->literal_pos_bits > LZMA_LITERAL_BITS_MAX; } + + +extern uint64_t +lzma_lzma_decoder_memusage(const void *options) +{ + const lzma_options_lzma *const opt = options; + const uint64_t lz_memusage + = lzma_lz_decoder_memusage(opt->dictionary_size); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return sizeof(lzma_coder) + lz_memusage; +} + + +extern lzma_ret +lzma_lzma_props_decode(void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size) +{ + if (props_size != 5) + return LZMA_HEADER_ERROR; + + lzma_options_lzma *opt + = lzma_alloc(sizeof(lzma_options_lzma), allocator); + if (opt == NULL) + return LZMA_MEM_ERROR; + + if (lzma_lzma_lclppb_decode(opt, props[0])) + goto error; + + // All dictionary sizes are accepted, including zero. LZ decoder + // will automatically use a dictionary at least a few KiB even if + // a smaller dictionary is requested. + opt->dictionary_size = integer_read_32(props + 1); + + opt->preset_dictionary = NULL; + opt->preset_dictionary_size = 0; + + *options = opt; + + return LZMA_OK; + +error: + lzma_free(opt, allocator); + return LZMA_HEADER_ERROR; +} diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h index 9d57c7e5..3792f452 100644 --- a/src/liblzma/lzma/lzma_decoder.h +++ b/src/liblzma/lzma/lzma_decoder.h @@ -28,16 +28,27 @@ extern lzma_ret lzma_lzma_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters); -/// Set known uncompressed size. This is a hack needed to support -/// LZMA_Alone files that don't have EOPM. -extern void lzma_lzma_decoder_uncompressed_size( - lzma_next_coder *next, lzma_vli uncompressed_size); +extern uint64_t lzma_lzma_decoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_decode( + void **options, lzma_allocator *allocator, + const uint8_t *props, size_t props_size); + /// \brief Decodes the LZMA Properties byte (lc/lp/pb) /// /// \return true if error occorred, false on success /// -extern bool lzma_lzma_decode_properties( +extern bool lzma_lzma_lclppb_decode( lzma_options_lzma *options, uint8_t byte); + +#ifdef LZMA_LZ_DECODER_H +/// Allocate and setup function pointers only. This is used by LZMA1 and +/// LZMA2 decoders. +extern lzma_ret lzma_lzma_decoder_create( + lzma_lz_decoder *lz, lzma_allocator *allocator, + const void *opt, size_t *dict_size); +#endif + #endif diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c index afb1d5ed..a84801e7 100644 --- a/src/liblzma/lzma/lzma_encoder.c +++ b/src/liblzma/lzma/lzma_encoder.c @@ -30,40 +30,33 @@ static inline void literal_matched(lzma_range_encoder *rc, probability *subcoder, uint32_t match_byte, uint32_t symbol) { - uint32_t context = 1; - uint32_t bit_count = 8; + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; do { - uint32_t bit = (symbol >> --bit_count) & 1; - const uint32_t match_bit = (match_byte >> bit_count) & 1; - rc_bit(rc, &subcoder[(0x100 << match_bit) + context], bit); - context = (context << 1) | bit; - - if (match_bit != bit) { - // The bit from the literal being encoded and the bit - // from the previous match differ. Finish encoding - // as a normal literal. - while (bit_count != 0) { - bit = (symbol >> --bit_count) & 1; - rc_bit(rc, &subcoder[context], bit); - context = (context << 1) | bit; - } + match_byte <<= 1; + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + rc_bit(rc, &subcoder[subcoder_index], bit); - break; - } + symbol <<= 1; + offset &= ~(match_byte ^ symbol); - } while (bit_count != 0); + } while (symbol < (UINT32_C(1) << 16)); } static inline void -literal(lzma_coder *coder) +literal(lzma_coder *coder, lzma_mf *mf, uint32_t position) { // Locate the literal byte to be encoded and the subcoder. - const uint8_t cur_byte = coder->lz.buffer[ - coder->lz.read_pos - coder->additional_offset]; - probability *subcoder = literal_get_subcoder(coder->literal_coder, - coder->now_pos, coder->previous_byte); + const uint8_t cur_byte = mf->buffer[ + mf->read_pos - mf->read_ahead]; + probability *subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + position, mf->buffer[mf->read_pos - mf->read_ahead - 1]); if (is_literal_state(coder->state)) { // Previous LZMA-symbol was a literal. Encode a normal @@ -73,14 +66,13 @@ literal(lzma_coder *coder) // Previous LZMA-symbol was a match. Use the last byte of // the match as a "match byte". That is, compare the bits // of the current literal and the match byte. - const uint8_t match_byte = coder->lz.buffer[ - coder->lz.read_pos - coder->reps[0] - 1 - - coder->additional_offset]; + const uint8_t match_byte = mf->buffer[ + mf->read_pos - coder->reps[0] - 1 + - mf->read_ahead]; literal_matched(&coder->rc, subcoder, match_byte, cur_byte); } update_literal(coder->state); - coder->previous_byte = cur_byte; } @@ -88,12 +80,41 @@ literal(lzma_coder *coder) // Match length // ////////////////// +static void +length_update_prices(lzma_length_encoder *lc, const uint32_t pos_state) +{ + const uint32_t table_size = lc->table_size; + lc->counters[pos_state] = table_size; + + const uint32_t a0 = rc_bit_0_price(lc->choice); + const uint32_t a1 = rc_bit_1_price(lc->choice); + const uint32_t b0 = a1 + rc_bit_0_price(lc->choice2); + const uint32_t b1 = a1 + rc_bit_1_price(lc->choice2); + uint32_t *const prices = lc->prices[pos_state]; + + uint32_t i; + for (i = 0; i < table_size && i < LEN_LOW_SYMBOLS; ++i) + prices[i] = a0 + rc_bittree_price(lc->low[pos_state], + LEN_LOW_BITS, i); + + for (; i < table_size && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) + prices[i] = b0 + rc_bittree_price(lc->mid[pos_state], + LEN_MID_BITS, i - LEN_LOW_SYMBOLS); + + for (; i < table_size; ++i) + prices[i] = b1 + rc_bittree_price(lc->high, LEN_HIGH_BITS, + i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); + + return; +} + + static inline void length(lzma_range_encoder *rc, lzma_length_encoder *lc, - const uint32_t pos_state, uint32_t len) + const uint32_t pos_state, uint32_t len, const bool fast_mode) { - assert(len <= MATCH_MAX_LEN); - len -= MATCH_MIN_LEN; + assert(len <= MATCH_LEN_MAX); + len -= MATCH_LEN_MIN; if (len < LEN_LOW_SYMBOLS) { rc_bit(rc, &lc->choice, 0); @@ -111,6 +132,12 @@ length(lzma_range_encoder *rc, lzma_length_encoder *lc, rc_bittree(rc, lc->high, LEN_HIGH_BITS, len); } } + + // Only getoptimum uses the prices so don't update the table when + // in fast mode. + if (!fast_mode) + if (--lc->counters[pos_state] == 0) + length_update_prices(lc, pos_state); } @@ -124,12 +151,12 @@ match(lzma_coder *coder, const uint32_t pos_state, { update_match(coder->state); - length(&coder->rc, &coder->match_len_encoder, pos_state, len); - coder->prev_len_encoder = &coder->match_len_encoder; + length(&coder->rc, &coder->match_len_encoder, pos_state, len, + coder->fast_mode); const uint32_t pos_slot = get_pos_slot(distance); const uint32_t len_to_pos_state = get_len_to_pos_state(len); - rc_bittree(&coder->rc, coder->pos_slot_encoder[len_to_pos_state], + rc_bittree(&coder->rc, coder->pos_slot[len_to_pos_state], POS_SLOT_BITS, pos_slot); if (pos_slot >= START_POS_MODEL_INDEX) { @@ -139,13 +166,13 @@ match(lzma_coder *coder, const uint32_t pos_state, if (pos_slot < END_POS_MODEL_INDEX) { rc_bittree_reverse(&coder->rc, - &coder->pos_encoders[base - pos_slot - 1], + &coder->pos_special[base - pos_slot - 1], footer_bits, pos_reduced); } else { rc_direct(&coder->rc, pos_reduced >> ALIGN_BITS, footer_bits - ALIGN_BITS); rc_bittree_reverse( - &coder->rc, coder->pos_align_encoder, + &coder->rc, coder->pos_align, ALIGN_BITS, pos_reduced & ALIGN_MASK); ++coder->align_price_count; } @@ -196,8 +223,8 @@ rep_match(lzma_coder *coder, const uint32_t pos_state, if (len == 1) { update_short_rep(coder->state); } else { - length(&coder->rc, &coder->rep_len_encoder, pos_state, len); - coder->prev_len_encoder = &coder->rep_len_encoder; + length(&coder->rc, &coder->rep_len_encoder, pos_state, len, + coder->fast_mode); update_long_rep(coder->state); } } @@ -208,117 +235,123 @@ rep_match(lzma_coder *coder, const uint32_t pos_state, ////////// static void -encode_symbol(lzma_coder *coder, uint32_t pos, uint32_t len) +encode_symbol(lzma_coder *coder, lzma_mf *mf, + uint32_t back, uint32_t len, uint32_t position) { - const uint32_t pos_state = coder->now_pos & coder->pos_mask; + const uint32_t pos_state = position & coder->pos_mask; - if (len == 1 && pos == UINT32_MAX) { + if (back == UINT32_MAX) { // Literal i.e. eight-bit byte + assert(len == 1); rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 0); - literal(coder); + literal(coder, mf, position); } else { // Some type of match rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); - if (pos < REP_DISTANCES) { + if (back < REP_DISTANCES) { // It's a repeated match i.e. the same distance // has been used earlier. rc_bit(&coder->rc, &coder->is_rep[coder->state], 1); - rep_match(coder, pos_state, pos, len); + rep_match(coder, pos_state, back, len); } else { // Normal match rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); - match(coder, pos_state, pos - REP_DISTANCES, len); + match(coder, pos_state, back - REP_DISTANCES, len); } + } + + assert(mf->read_ahead >= len); + mf->read_ahead -= len; +} + + +static bool +encode_init(lzma_coder *coder, lzma_mf *mf) +{ + if (mf->read_pos == mf->read_limit) { + if (mf->action == LZMA_RUN) + return false; // We cannot do anything. - coder->previous_byte = coder->lz.buffer[ - coder->lz.read_pos + len - 1 - - coder->additional_offset]; + // We are finishing (we cannot get here when flushing). + assert(mf->write_pos == mf->read_pos); + assert(mf->action == LZMA_FINISH); + } else { + // Do the actual initialization. The first LZMA symbol must + // always be a literal. + mf_skip(mf, 1); + mf->read_ahead = 0; + rc_bit(&coder->rc, &coder->is_match[0][0], 0); + rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]); } - assert(coder->additional_offset >= len); - coder->additional_offset -= len; - coder->now_pos += len; + // Initialization is done (except if empty file). + coder->is_initialized = true; + + return true; } static void -encode_eopm(lzma_coder *coder) +encode_eopm(lzma_coder *coder, uint32_t position) { - const uint32_t pos_state = coder->now_pos & coder->pos_mask; + const uint32_t pos_state = position & coder->pos_mask; rc_bit(&coder->rc, &coder->is_match[coder->state][pos_state], 1); rc_bit(&coder->rc, &coder->is_rep[coder->state], 0); - match(coder, pos_state, UINT32_MAX, MATCH_MIN_LEN); + match(coder, pos_state, UINT32_MAX, MATCH_LEN_MIN); } -/** - * \brief LZMA encoder - * - * \return true if end of stream was reached, false otherwise. - */ -extern bool -lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size) +/// Number of bytes that a single encoding loop in lzma_lzma_encode() can +/// consume from the dictionary. This limit comes from lzma_lzma_optimum() +/// and may need to be updated if that function is significantly modified. +#define LOOP_INPUT_MAX (OPTS + 1) + + +extern lzma_ret +lzma_lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size, uint32_t limit) { // Initialize the stream if no data has been encoded yet. - if (!coder->is_initialized) { - if (coder->lz.read_pos == coder->lz.read_limit) { - if (coder->lz.sequence == SEQ_RUN) - return false; // We cannot do anything. - - // We are finishing (we cannot get here when flushing). - assert(coder->lz.write_pos == coder->lz.read_pos); - assert(coder->lz.sequence == SEQ_FINISH); - } else { - // Do the actual initialization. - uint32_t len; - uint32_t num_distance_pairs; - lzma_read_match_distances(coder, - &len, &num_distance_pairs); + if (!coder->is_initialized && !encode_init(coder, mf)) + return LZMA_OK; - encode_symbol(coder, UINT32_MAX, 1); + // Get the lowest bits of the uncompressed offset from the LZ layer. + uint32_t position = mf_position(mf); - assert(coder->additional_offset == 0); + while (true) { + // Encode pending bits, if any. Calling this before encoding + // the next symbol is needed only with plain LZMA, since + // LZMA2 always provides big enough buffer to flush + // everything out from the range encoder. For the same reason, + // rc_encode() never returns true when this function is used + // as part of LZMA2 encoder. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; } - // Initialization is done (except if empty file). - coder->is_initialized = true; - } - - // Encoding loop - while (true) { - // Encode pending bits, if any. - if (rc_encode(&coder->rc, out, out_pos, out_size)) - return false; + // With LZMA2 we need to take care that compressed size of + // a chunk doesn't get too big. + // TODO + if (limit != UINT32_MAX + && (mf->read_pos - mf->read_ahead >= limit + || *out_pos + rc_pending(&coder->rc) + >= (UINT32_C(1) << 16) + - LOOP_INPUT_MAX)) + break; // Check that there is some input to process. - if (coder->lz.read_pos >= coder->lz.read_limit) { - // If flushing or finishing, we must keep encoding - // until additional_offset becomes zero to make - // all the input available at output. - if (coder->lz.sequence == SEQ_RUN) - return false; - - if (coder->additional_offset == 0) - break; - } - - assert(coder->lz.read_pos <= coder->lz.write_pos); + if (mf->read_pos >= mf->read_limit) { + if (mf->action == LZMA_RUN) + return LZMA_OK; -#ifndef NDEBUG - if (coder->lz.sequence != SEQ_RUN) { - assert(coder->lz.read_limit == coder->lz.write_pos); - } else { - assert(coder->lz.read_limit + coder->lz.keep_size_after - == coder->lz.write_pos); + if (mf->read_ahead == 0) + break; } -#endif - - uint32_t pos; - uint32_t len; // Get optimal match (repeat position and length). // Value ranges for pos: @@ -327,33 +360,324 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, // match at (pos - REP_DISTANCES) // - UINT32_MAX: not a match but a literal // Value ranges for len: - // - [MATCH_MIN_LEN, MATCH_MAX_LEN] - if (coder->best_compression) - lzma_get_optimum(coder, &pos, &len); + // - [MATCH_LEN_MIN, MATCH_LEN_MAX] + uint32_t len; + uint32_t back; + + if (coder->fast_mode) + lzma_lzma_optimum_fast(coder, mf, &back, &len); else - lzma_get_optimum_fast(coder, &pos, &len); + lzma_lzma_optimum_normal( + coder, mf, &back, &len, position); + + encode_symbol(coder, mf, back, len, position); + + position += len; + } + + if (!coder->is_flushed) { + coder->is_flushed = true; - encode_symbol(coder, pos, len); + // We don't support encoding plain LZMA streams without EOPM, + // and LZMA2 doesn't use EOPM at LZMA level. + if (limit == UINT32_MAX) + encode_eopm(coder, position); + + // Flush the remaining bytes from the range encoder. + rc_flush(&coder->rc); + + // Copy the remaining bytes to the output buffer. If there + // isn't enough output space, we will copy out the remaining + // bytes on the next call to this function by using + // the rc_encode() call in the encoding loop above. + if (rc_encode(&coder->rc, out, out_pos, out_size)) { + assert(limit == UINT32_MAX); + return LZMA_OK; + } } - assert(!coder->longest_match_was_found); + // Make it ready for the next LZMA2 chunk. + coder->is_flushed = false; + + return LZMA_STREAM_END; +} + + +static lzma_ret +lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint8_t *restrict out, size_t *restrict out_pos, + size_t out_size) +{ + // Plain LZMA has no support for sync-flushing. + if (unlikely(mf->action == LZMA_SYNC_FLUSH)) + return LZMA_HEADER_ERROR; + + return lzma_lzma_encode(coder, mf, out, out_pos, out_size, UINT32_MAX); +} + - if (coder->is_flushed) { - coder->is_flushed = false; +//////////////////// +// Initialization // +//////////////////// + +static bool +set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options) +{ + if (!is_lclppb_valid(options) + || options->fast_bytes < LZMA_FAST_BYTES_MIN + || options->fast_bytes > LZMA_FAST_BYTES_MAX) return true; + + // FIXME validation + + lz_options->before_size = OPTS; + lz_options->dictionary_size = options->dictionary_size; + lz_options->after_size = LOOP_INPUT_MAX; + lz_options->match_len_max = MATCH_LEN_MAX; + lz_options->find_len_max = options->fast_bytes; + lz_options->match_finder = options->match_finder; + lz_options->match_finder_cycles = options->match_finder_cycles; + lz_options->preset_dictionary = options->preset_dictionary; + lz_options->preset_dictionary_size = options->preset_dictionary_size; + + return false; +} + + +static void +length_encoder_reset(lzma_length_encoder *lencoder, + const uint32_t num_pos_states, const bool fast_mode) +{ + bit_reset(lencoder->choice); + bit_reset(lencoder->choice2); + + for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { + bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); + bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); } - // We don't support encoding old LZMA streams without EOPM, and LZMA2 - // doesn't use EOPM at LZMA level. - if (coder->write_eopm) - encode_eopm(coder); + bittree_reset(lencoder->high, LEN_HIGH_BITS); - rc_flush(&coder->rc); + if (!fast_mode) + for (size_t pos_state = 0; pos_state < num_pos_states; + ++pos_state) + length_update_prices(lencoder, pos_state); - if (rc_encode(&coder->rc, out, out_pos, out_size)) { - coder->is_flushed = true; - return false; + return; +} + + +extern void +lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options) +{ + assert(!coder->is_flushed); + + coder->pos_mask = (1U << options->pos_bits) - 1; + coder->literal_context_bits = options->literal_context_bits; + coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1; + + + // Range coder + rc_reset(&coder->rc); + + // State + coder->state = 0; + for (size_t i = 0; i < REP_DISTANCES; ++i) + coder->reps[i] = 0; + + literal_init(coder->literal, options->literal_context_bits, + options->literal_pos_bits); + + // Bit encoders + for (size_t i = 0; i < STATES; ++i) { + for (size_t j = 0; j <= coder->pos_mask; ++j) { + bit_reset(coder->is_match[i][j]); + bit_reset(coder->is_rep0_long[i][j]); + } + + bit_reset(coder->is_rep[i]); + bit_reset(coder->is_rep0[i]); + bit_reset(coder->is_rep1[i]); + bit_reset(coder->is_rep2[i]); } - return true; + for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) + bit_reset(coder->pos_special[i]); + + // Bit tree encoders + for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) + bittree_reset(coder->pos_slot[i], POS_SLOT_BITS); + + bittree_reset(coder->pos_align, ALIGN_BITS); + + // Length encoders + length_encoder_reset(&coder->match_len_encoder, + 1U << options->pos_bits, coder->fast_mode); + + length_encoder_reset(&coder->rep_len_encoder, + 1U << options->pos_bits, coder->fast_mode); + + // FIXME: Too big or too small won't work when resetting in the middle of LZMA2. + coder->match_price_count = UINT32_MAX / 2; + coder->align_price_count = UINT32_MAX / 2; + + coder->opts_end_index = 0; + coder->opts_current_index = 0; +} + + +extern lzma_ret +lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options) +{ + if (*coder_ptr == NULL) { + *coder_ptr = lzma_alloc(sizeof(lzma_coder), allocator); + if (*coder_ptr == NULL) + return LZMA_MEM_ERROR; + } + + lzma_coder *coder = *coder_ptr; + + // Validate options that aren't validated elsewhere. + if (!is_lclppb_valid(options) + || options->fast_bytes < LZMA_FAST_BYTES_MIN + || options->fast_bytes > LZMA_FAST_BYTES_MAX) + return LZMA_HEADER_ERROR; + + // Set compression mode. + switch (options->mode) { + case LZMA_MODE_FAST: + coder->fast_mode = true; + break; + + case LZMA_MODE_NORMAL: { + coder->fast_mode = false; + + // Set dist_table_size. + // Round the dictionary size up to next 2^n. + uint32_t log_size = 0; + while ((UINT32_C(1) << log_size) + < options->dictionary_size) + ++log_size; + + coder->dist_table_size = log_size * 2; + + // Length encoders' price table size + coder->match_len_encoder.table_size + = options->fast_bytes + 1 - MATCH_LEN_MIN; + coder->rep_len_encoder.table_size + = options->fast_bytes + 1 - MATCH_LEN_MIN; + break; + } + + default: + return LZMA_HEADER_ERROR; + } + + coder->is_initialized = false; + coder->is_flushed = false; + + lzma_lzma_encoder_reset(coder, options); + + // LZ encoder options FIXME validation + if (set_lz_options(lz_options, options)) + return LZMA_HEADER_ERROR; + + return LZMA_OK; +} + + +static lzma_ret +lzma_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, + const void *options, lzma_lz_options *lz_options) +{ + lz->code = &lzma_encode; + return lzma_lzma_encoder_create( + &lz->coder, allocator, options, lz_options); +} + + +extern lzma_ret +lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, + const lzma_filter_info *filters) +{ + // Initialization call chain: + // + // lzma_lzma_encoder_init() + // `-- lzma_lz_encoder_init() + // `-- lzma_encoder_init() + // `-- lzma_encoder_init2() + // + // The above complexity is to let LZ encoder store the pointer to + // the LZMA encoder structure. Encoding call tree: + // + // lz_encode() + // |-- fill_window() + // | `-- Next coder in the chain, if any + // `-- lzma_encode() + // |-- lzma_dict_find() + // `-- lzma_dict_skip() + // + // FIXME ^ + // + return lzma_lz_encoder_init( + next, allocator, filters, &lzma_encoder_init); +} + + +extern uint64_t +lzma_lzma_encoder_memusage(const void *options) +{ + lzma_lz_options lz_options; + if (set_lz_options(&lz_options, options)) + return UINT64_MAX; + + const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options); + if (lz_memusage == UINT64_MAX) + return UINT64_MAX; + + return (uint64_t)(sizeof(lzma_coder)) + lz_memusage; +} + + +extern bool +lzma_lzma_lclppb_encode(const lzma_options_lzma *options, uint8_t *byte) +{ + if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX + || options->literal_pos_bits + > LZMA_LITERAL_POS_BITS_MAX + || options->pos_bits > LZMA_POS_BITS_MAX + || options->literal_context_bits + + options->literal_pos_bits + > LZMA_LITERAL_BITS_MAX) + return true; + + *byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9 + + options->literal_context_bits; + assert(*byte <= (4 * 5 + 4) * 9 + 8); + + return false; +} + + +#ifdef HAVE_ENCODER_LZMA +extern lzma_ret +lzma_lzma_props_encode(const void *options, uint8_t *out) +{ + const lzma_options_lzma *const opt = options; + + if (lzma_lzma_lclppb_encode(opt, out)) + return LZMA_PROG_ERROR; + + integer_write_32(out + 1, opt->dictionary_size); + + return LZMA_OK; +} +#endif + + +extern LZMA_API lzma_bool +lzma_mode_is_available(lzma_mode mode) +{ + return mode == LZMA_MODE_FAST || mode == LZMA_MODE_NORMAL; } diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h index 1c57f80a..e270cc27 100644 --- a/src/liblzma/lzma/lzma_encoder.h +++ b/src/liblzma/lzma/lzma_encoder.h @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file lzma_encoder.h -/// \brief LZMA method handler API +/// \brief LZMA encoder API // // Copyright (C) 1999-2006 Igor Pavlov // Copyright (C) 2007 Lasse Collin @@ -23,13 +23,47 @@ #include "common.h" + extern lzma_ret lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters); -extern bool lzma_lzma_encode_properties( + +extern uint64_t lzma_lzma_encoder_memusage(const void *options); + +extern lzma_ret lzma_lzma_props_encode(const void *options, uint8_t *out); + + +/// Encodes lc/lp/pb into one byte. Returns false on success and true on error. +extern bool lzma_lzma_lclppb_encode( const lzma_options_lzma *options, uint8_t *byte); + +#ifdef HAVE_SMALL + /// Initializes the lzma_fastpos[] array. extern void lzma_fastpos_init(void); #endif + + +#ifdef LZMA_LZ_ENCODER_H + +/// Initializes raw LZMA encoder; this is used by LZMA2. +extern lzma_ret lzma_lzma_encoder_create( + lzma_coder **coder_ptr, lzma_allocator *allocator, + const lzma_options_lzma *options, lzma_lz_options *lz_options); + + +/// Resets an already initialized LZMA encoder; this is used by LZMA2. +extern void lzma_lzma_encoder_reset( + lzma_coder *coder, const lzma_options_lzma *options); + + +extern lzma_ret lzma_lzma_encode(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, + uint32_t read_limit); + +#endif + +#endif diff --git a/src/liblzma/lzma/lzma_encoder_features.c b/src/liblzma/lzma/lzma_encoder_features.c index 56e59c6a..9fecee48 100644 --- a/src/liblzma/lzma/lzma_encoder_features.c +++ b/src/liblzma/lzma/lzma_encoder_features.c @@ -22,7 +22,7 @@ static lzma_mode modes[] = { LZMA_MODE_FAST, - LZMA_MODE_BEST, + LZMA_MODE_NORMAL, LZMA_MODE_INVALID }; diff --git a/src/liblzma/lzma/lzma_encoder_getoptimum.c b/src/liblzma/lzma/lzma_encoder_getoptimum.c deleted file mode 100644 index b175e4cb..00000000 --- a/src/liblzma/lzma/lzma_encoder_getoptimum.c +++ /dev/null @@ -1,925 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_encoder_getoptimum.c -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - - -// "Would you love the monster code? -// Could you understand beauty of the beast?" -// --Adapted from Lordi's "Would you love a monster man". - - -#include "lzma_encoder_private.h" -#include "fastpos.h" - - -#define length_get_price(length_encoder, symbol, pos_state) \ - (length_encoder).prices[pos_state][symbol] - - -#define get_rep_len_1_price(state, pos_state) \ - bit_get_price_0(coder->is_rep0[state]) \ - + bit_get_price_0(coder->is_rep0_long[state][pos_state]) - - -// Adds to price_target. -#define get_pure_rep_price(price_target, rep_index, state, pos_state) \ -do { \ - if ((rep_index) == 0) { \ - price_target += bit_get_price_0(coder->is_rep0[state]); \ - price_target += bit_get_price_1( \ - coder->is_rep0_long[state][pos_state]); \ - } else { \ - price_target += bit_get_price_1(coder->is_rep0[state]); \ - if ((rep_index) == 1) { \ - price_target += bit_get_price_0(coder->is_rep1[state]); \ - } else { \ - price_target += bit_get_price_1(coder->is_rep1[state]); \ - price_target += bit_get_price( \ - coder->is_rep2[state], (rep_index) - 2); \ - } \ - } \ -} while (0) - - -// Adds to price_target. -#define get_rep_price(price_target, rep_index, len, state, pos_state) \ -do { \ - get_pure_rep_price(price_target, rep_index, state, pos_state); \ - price_target += length_get_price(coder->rep_len_encoder, \ - (len) - MATCH_MIN_LEN, pos_state); \ -} while (0) - - -// Adds to price_target. -#define get_pos_len_price(price_target, pos, len, pos_state) \ -do { \ - const uint32_t len_to_pos_state_tmp = get_len_to_pos_state(len); \ - if ((pos) < FULL_DISTANCES) { \ - price_target += distances_prices[len_to_pos_state_tmp][pos]; \ - } else { \ - price_target \ - += pos_slot_prices[len_to_pos_state_tmp][get_pos_slot_2(pos)] \ - + align_prices[(pos) & ALIGN_MASK]; \ - } \ - price_target += length_get_price( \ - coder->match_len_encoder, (len) - MATCH_MIN_LEN, pos_state); \ -} while (0) - - -// Three macros to manipulate lzma_optimal structures: -#define make_as_char(opt) \ -do { \ - (opt).back_prev = UINT32_MAX; \ - (opt).prev_1_is_char = false; \ -} while (0) - - -#define make_as_short_rep(opt) \ -do { \ - (opt).back_prev = 0; \ - (opt).prev_1_is_char = false; \ -} while (0) - - -#define is_short_rep(opt) \ - ((opt).back_prev == 0) - - -static void -fill_length_prices(lzma_length_encoder *lc, uint32_t pos_state) -{ - const uint32_t num_symbols = lc->table_size; - const uint32_t a0 = bit_get_price_0(lc->choice); - const uint32_t a1 = bit_get_price_1(lc->choice); - const uint32_t b0 = a1 + bit_get_price_0(lc->choice2); - const uint32_t b1 = a1 + bit_get_price_1(lc->choice2); - - uint32_t *prices = lc->prices[pos_state]; - uint32_t i = 0; - - for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i) - prices[i] = a0 + bittree_get_price(lc->low[pos_state], - LEN_LOW_BITS, i); - - for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) - prices[i] = b0 + bittree_get_price(lc->mid[pos_state], - LEN_MID_BITS, i - LEN_LOW_SYMBOLS); - - for (; i < num_symbols; ++i) - prices[i] = b1 + bittree_get_price(lc->high, LEN_HIGH_BITS, - i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); - - lc->counters[pos_state] = num_symbols; - - return; -} - - -static void -fill_distances_prices(lzma_coder *coder) -{ - uint32_t temp_prices[FULL_DISTANCES]; - - for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { - const uint32_t pos_slot = get_pos_slot(i); - const uint32_t footer_bits = ((pos_slot >> 1) - 1); - const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; - temp_prices[i] = bittree_reverse_get_price( - coder->pos_encoders + base - pos_slot - 1, - footer_bits, i - base); - } - - const uint32_t dist_table_size = coder->dist_table_size; - - for (uint32_t len_to_pos_state = 0; - len_to_pos_state < LEN_TO_POS_STATES; - ++len_to_pos_state) { - - const probability *encoder = coder->pos_slot_encoder[len_to_pos_state]; - uint32_t *pos_slot_prices = coder->pos_slot_prices[len_to_pos_state]; - - for (uint32_t pos_slot = 0; - pos_slot < dist_table_size; - ++pos_slot) { - pos_slot_prices[pos_slot] = bittree_get_price(encoder, - POS_SLOT_BITS, pos_slot); - } - - for (uint32_t pos_slot = END_POS_MODEL_INDEX; - pos_slot < dist_table_size; - ++pos_slot) - pos_slot_prices[pos_slot] += (((pos_slot >> 1) - 1) - - ALIGN_BITS) << BIT_PRICE_SHIFT_BITS; - - - uint32_t *distances_prices - = coder->distances_prices[len_to_pos_state]; - - uint32_t i; - for (i = 0; i < START_POS_MODEL_INDEX; ++i) - distances_prices[i] = pos_slot_prices[i]; - - for (; i < FULL_DISTANCES; ++i) - distances_prices[i] = pos_slot_prices[get_pos_slot(i)] - + temp_prices[i]; - } - - coder->match_price_count = 0; - - return; -} - - -static void -fill_align_prices(lzma_coder *coder) -{ - for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) - coder->align_prices[i] = bittree_reverse_get_price( - coder->pos_align_encoder, ALIGN_BITS, i); - - coder->align_price_count = 0; - return; -} - - -// The first argument is a pointer returned by literal_get_subcoder(). -static uint32_t -literal_get_price(const probability *encoders, const bool match_mode, - const uint8_t match_byte, const uint8_t symbol) -{ - uint32_t price = 0; - uint32_t context = 1; - int i = 8; - - if (match_mode) { - do { - --i; - const uint32_t match_bit = (match_byte >> i) & 1; - const uint32_t bit = (symbol >> i) & 1; - const uint32_t subcoder_index - = 0x100 + (match_bit << 8) + context; - - price += bit_get_price(encoders[subcoder_index], bit); - context = (context << 1) | bit; - - if (match_bit != bit) - break; - - } while (i != 0); - } - - while (i != 0) { - --i; - const uint32_t bit = (symbol >> i) & 1; - price += bit_get_price(encoders[context], bit); - context = (context << 1) | bit; - } - - return price; -} - - -static void -backward(lzma_coder *restrict coder, uint32_t *restrict len_res, - uint32_t *restrict back_res, uint32_t cur) -{ - coder->optimum_end_index = cur; - - uint32_t pos_mem = coder->optimum[cur].pos_prev; - uint32_t back_mem = coder->optimum[cur].back_prev; - - do { - if (coder->optimum[cur].prev_1_is_char) { - make_as_char(coder->optimum[pos_mem]); - coder->optimum[pos_mem].pos_prev = pos_mem - 1; - - if (coder->optimum[cur].prev_2) { - coder->optimum[pos_mem - 1].prev_1_is_char = false; - coder->optimum[pos_mem - 1].pos_prev - = coder->optimum[cur].pos_prev_2; - coder->optimum[pos_mem - 1].back_prev - = coder->optimum[cur].back_prev_2; - } - } - - uint32_t pos_prev = pos_mem; - uint32_t back_cur = back_mem; - - back_mem = coder->optimum[pos_prev].back_prev; - pos_mem = coder->optimum[pos_prev].pos_prev; - - coder->optimum[pos_prev].back_prev = back_cur; - coder->optimum[pos_prev].pos_prev = cur; - cur = pos_prev; - - } while (cur != 0); - - coder->optimum_current_index = coder->optimum[0].pos_prev; - *len_res = coder->optimum[0].pos_prev; - *back_res = coder->optimum[0].back_prev; - - return; -} - - -extern void -lzma_get_optimum(lzma_coder *restrict coder, - uint32_t *restrict back_res, uint32_t *restrict len_res) -{ - uint32_t position = coder->now_pos; - uint32_t pos_state = position & coder->pos_mask; - - // Update the price tables. In the C++ LZMA SDK 4.42 this was done in both - // initialization function and in the main loop. In liblzma they were - // moved into this single place. - if (coder->additional_offset == 0) { - if (coder->match_price_count >= (1 << 7)) - fill_distances_prices(coder); - - if (coder->align_price_count >= ALIGN_TABLE_SIZE) - fill_align_prices(coder); - } - - if (coder->prev_len_encoder != NULL) { - if (--coder->prev_len_encoder->counters[pos_state] == 0) - fill_length_prices(coder->prev_len_encoder, pos_state); - - coder->prev_len_encoder = NULL; - } - - - if (coder->optimum_end_index != coder->optimum_current_index) { - *len_res = coder->optimum[coder->optimum_current_index].pos_prev - - coder->optimum_current_index; - *back_res = coder->optimum[coder->optimum_current_index].back_prev; - coder->optimum_current_index = coder->optimum[ - coder->optimum_current_index].pos_prev; - return; - } - - coder->optimum_current_index = 0; - coder->optimum_end_index = 0; - - - const uint32_t fast_bytes = coder->fast_bytes; - uint32_t *match_distances = coder->match_distances; - - uint32_t len_main; - uint32_t num_distance_pairs; - - if (!coder->longest_match_was_found) { - lzma_read_match_distances(coder, &len_main, &num_distance_pairs); - } else { - len_main = coder->longest_match_length; - num_distance_pairs = coder->num_distance_pairs; - coder->longest_match_was_found = false; - } - - - const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1; - uint32_t num_available_bytes - = coder->lz.write_pos - coder->lz.read_pos + 1; - if (num_available_bytes < 2) { - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - - if (num_available_bytes > MATCH_MAX_LEN) - num_available_bytes = MATCH_MAX_LEN; - - - uint32_t reps[REP_DISTANCES]; - uint32_t rep_lens[REP_DISTANCES]; - uint32_t rep_max_index = 0; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - reps[i] = coder->reps[i]; - const uint32_t back_offset = reps[i] + 1; - - if (buf[0] != *(buf - back_offset) - || buf[1] != *(buf + 1 - back_offset)) { - rep_lens[i] = 0; - continue; - } - - uint32_t len_test; - for (len_test = 2; len_test < num_available_bytes - && buf[len_test] == *(buf + len_test - back_offset); - ++len_test) ; - - rep_lens[i] = len_test; - if (len_test > rep_lens[rep_max_index]) - rep_max_index = i; - } - - if (rep_lens[rep_max_index] >= fast_bytes) { - *back_res = rep_max_index; - *len_res = rep_lens[rep_max_index]; - move_pos(*len_res - 1); - return; - } - - - if (len_main >= fast_bytes) { - *back_res = match_distances[num_distance_pairs] + REP_DISTANCES; - *len_res = len_main; - move_pos(len_main - 1); - return; - } - - uint8_t current_byte = *buf; - uint8_t match_byte = *(buf - reps[0] - 1); - - if (len_main < 2 && current_byte != match_byte - && rep_lens[rep_max_index] < 2) { - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - - coder->optimum[0].state = coder->state; - - coder->optimum[1].price = bit_get_price_0( - coder->is_match[coder->state][pos_state]) - + literal_get_price( - literal_get_subcoder(coder->literal_coder, - position, coder->previous_byte), - !is_literal_state(coder->state), match_byte, current_byte); - - make_as_char(coder->optimum[1]); - - uint32_t match_price - = bit_get_price_1(coder->is_match[coder->state][pos_state]); - uint32_t rep_match_price - = match_price + bit_get_price_1(coder->is_rep[coder->state]); - - - if (match_byte == current_byte) { - const uint32_t short_rep_price = rep_match_price - + get_rep_len_1_price(coder->state, pos_state); - - if (short_rep_price < coder->optimum[1].price) { - coder->optimum[1].price = short_rep_price; - make_as_short_rep(coder->optimum[1]); - } - } - - uint32_t len_end = (len_main >= rep_lens[rep_max_index]) - ? len_main - : rep_lens[rep_max_index]; - - if (len_end < 2) { - *back_res = coder->optimum[1].back_prev; - *len_res = 1; - return; - } - - coder->optimum[1].pos_prev = 0; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) - coder->optimum[0].backs[i] = reps[i]; - - uint32_t len = len_end; - do { - coder->optimum[len].price = INFINITY_PRICE; - } while (--len >= 2); - - - uint32_t (*distances_prices)[FULL_DISTANCES] = coder->distances_prices; - uint32_t (*pos_slot_prices)[DIST_TABLE_SIZE_MAX] = coder->pos_slot_prices; - uint32_t *align_prices = coder->align_prices; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - uint32_t rep_len = rep_lens[i]; - if (rep_len < 2) - continue; - - uint32_t price = rep_match_price; - get_pure_rep_price(price, i, coder->state, pos_state); - - do { - const uint32_t cur_and_len_price = price - + length_get_price( - coder->rep_len_encoder, - rep_len - 2, pos_state); - - if (cur_and_len_price < coder->optimum[rep_len].price) { - coder->optimum[rep_len].price = cur_and_len_price; - coder->optimum[rep_len].pos_prev = 0; - coder->optimum[rep_len].back_prev = i; - coder->optimum[rep_len].prev_1_is_char = false; - } - } while (--rep_len >= 2); - } - - - uint32_t normal_match_price = match_price - + bit_get_price_0(coder->is_rep[coder->state]); - - len = (rep_lens[0] >= 2) ? rep_lens[0] + 1 : 2; - - if (len <= len_main) { - uint32_t offs = 0; - - while (len > match_distances[offs + 1]) - offs += 2; - - for(; ; ++len) { - const uint32_t distance = match_distances[offs + 2]; - uint32_t cur_and_len_price = normal_match_price; - get_pos_len_price(cur_and_len_price, distance, len, pos_state); - - if (cur_and_len_price < coder->optimum[len].price) { - coder->optimum[len].price = cur_and_len_price; - coder->optimum[len].pos_prev = 0; - coder->optimum[len].back_prev = distance + REP_DISTANCES; - coder->optimum[len].prev_1_is_char = false; - } - - if (len == match_distances[offs + 1]) { - offs += 2; - if (offs == num_distance_pairs) - break; - } - } - } - - - ////////////////// - // Big loop ;-) // - ////////////////// - - uint32_t cur = 0; - - // The rest of this function is a huge while-loop. To avoid extreme - // indentation, the indentation level is not increased here. - while (true) { - - ++cur; - - assert(cur < OPTS); - - if (cur == len_end) { - backward(coder, len_res, back_res, cur); - return; - } - - uint32_t new_len; - - lzma_read_match_distances(coder, &new_len, &num_distance_pairs); - - if (new_len >= fast_bytes) { - coder->num_distance_pairs = num_distance_pairs; - coder->longest_match_length = new_len; - coder->longest_match_was_found = true; - backward(coder, len_res, back_res, cur); - return; - } - - - ++position; - - uint32_t pos_prev = coder->optimum[cur].pos_prev; - uint32_t state; - - if (coder->optimum[cur].prev_1_is_char) { - --pos_prev; - - if (coder->optimum[cur].prev_2) { - state = coder->optimum[coder->optimum[cur].pos_prev_2].state; - - if (coder->optimum[cur].back_prev_2 < REP_DISTANCES) - update_long_rep(state); - else - update_match(state); - - } else { - state = coder->optimum[pos_prev].state; - } - - update_literal(state); - - } else { - state = coder->optimum[pos_prev].state; - } - - if (pos_prev == cur - 1) { - if (is_short_rep(coder->optimum[cur])) - update_short_rep(state); - else - update_literal(state); - } else { - uint32_t pos; - if (coder->optimum[cur].prev_1_is_char && coder->optimum[cur].prev_2) { - pos_prev = coder->optimum[cur].pos_prev_2; - pos = coder->optimum[cur].back_prev_2; - update_long_rep(state); - } else { - pos = coder->optimum[cur].back_prev; - if (pos < REP_DISTANCES) - update_long_rep(state); - else - update_match(state); - } - - if (pos < REP_DISTANCES) { - reps[0] = coder->optimum[pos_prev].backs[pos]; - - uint32_t i; - for (i = 1; i <= pos; ++i) - reps[i] = coder->optimum[pos_prev].backs[i - 1]; - - for (; i < REP_DISTANCES; ++i) - reps[i] = coder->optimum[pos_prev].backs[i]; - - } else { - reps[0] = pos - REP_DISTANCES; - - for (uint32_t i = 1; i < REP_DISTANCES; ++i) - reps[i] = coder->optimum[pos_prev].backs[i - 1]; - } - } - - coder->optimum[cur].state = state; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) - coder->optimum[cur].backs[i] = reps[i]; - - const uint32_t cur_price = coder->optimum[cur].price; - - buf = coder->lz.buffer + coder->lz.read_pos - 1; - current_byte = *buf; - match_byte = *(buf - reps[0] - 1); - - pos_state = position & coder->pos_mask; - - const uint32_t cur_and_1_price = cur_price - + bit_get_price_0(coder->is_match[state][pos_state]) - + literal_get_price( - literal_get_subcoder(coder->literal_coder, - position, buf[-1]), - !is_literal_state(state), match_byte, current_byte); - - bool next_is_char = false; - - if (cur_and_1_price < coder->optimum[cur + 1].price) { - coder->optimum[cur + 1].price = cur_and_1_price; - coder->optimum[cur + 1].pos_prev = cur; - make_as_char(coder->optimum[cur + 1]); - next_is_char = true; - } - - match_price = cur_price - + bit_get_price_1(coder->is_match[state][pos_state]); - rep_match_price = match_price - + bit_get_price_1(coder->is_rep[state]); - - if (match_byte == current_byte - && !(coder->optimum[cur + 1].pos_prev < cur - && coder->optimum[cur + 1].back_prev == 0)) { - - const uint32_t short_rep_price = rep_match_price - + get_rep_len_1_price(state, pos_state); - - if (short_rep_price <= coder->optimum[cur + 1].price) { - coder->optimum[cur + 1].price = short_rep_price; - coder->optimum[cur + 1].pos_prev = cur; - make_as_short_rep(coder->optimum[cur + 1]); - next_is_char = true; - } - } - - uint32_t num_available_bytes_full - = coder->lz.write_pos - coder->lz.read_pos + 1; - num_available_bytes_full = MIN(OPTS - 1 - cur, num_available_bytes_full); - num_available_bytes = num_available_bytes_full; - - if (num_available_bytes < 2) - continue; - - if (num_available_bytes > fast_bytes) - num_available_bytes = fast_bytes; - - if (!next_is_char && match_byte != current_byte) { // speed optimization - // try literal + rep0 - const uint32_t back_offset = reps[0] + 1; - const uint32_t limit = MIN(num_available_bytes_full, fast_bytes + 1); - - uint32_t temp; - for (temp = 1; temp < limit - && buf[temp] == *(buf + temp - back_offset); - ++temp) ; - - const uint32_t len_test_2 = temp - 1; - - if (len_test_2 >= 2) { - uint32_t state_2 = state; - update_literal(state_2); - - const uint32_t pos_state_next = (position + 1) & coder->pos_mask; - const uint32_t next_rep_match_price = cur_and_1_price - + bit_get_price_1(coder->is_match[state_2][pos_state_next]) - + bit_get_price_1(coder->is_rep[state_2]); - - // for (; len_test_2 >= 2; --len_test_2) { - const uint32_t offset = cur + 1 + len_test_2; - - while (len_end < offset) - coder->optimum[++len_end].price = INFINITY_PRICE; - - uint32_t cur_and_len_price = next_rep_match_price; - get_rep_price(cur_and_len_price, - 0, len_test_2, state_2, pos_state_next); - - if (cur_and_len_price < coder->optimum[offset].price) { - coder->optimum[offset].price = cur_and_len_price; - coder->optimum[offset].pos_prev = cur + 1; - coder->optimum[offset].back_prev = 0; - coder->optimum[offset].prev_1_is_char = true; - coder->optimum[offset].prev_2 = false; - } -// } - } - } - - - uint32_t start_len = 2; // speed optimization - - for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { - const uint32_t back_offset = reps[rep_index] + 1; - - if (buf[0] != *(buf - back_offset) || buf[1] != *(buf + 1 - back_offset)) - continue; - - uint32_t len_test; - for (len_test = 2; len_test < num_available_bytes - && buf[len_test] == *(buf + len_test - back_offset); - ++len_test) ; - - while (len_end < cur + len_test) - coder->optimum[++len_end].price = INFINITY_PRICE; - - const uint32_t len_test_temp = len_test; - uint32_t price = rep_match_price; - get_pure_rep_price(price, rep_index, state, pos_state); - - do { - const uint32_t cur_and_len_price = price - + length_get_price(coder->rep_len_encoder, - len_test - 2, pos_state); - - if (cur_and_len_price < coder->optimum[cur + len_test].price) { - coder->optimum[cur + len_test].price = cur_and_len_price; - coder->optimum[cur + len_test].pos_prev = cur; - coder->optimum[cur + len_test].back_prev = rep_index; - coder->optimum[cur + len_test].prev_1_is_char = false; - } - } while (--len_test >= 2); - - len_test = len_test_temp; - - if (rep_index == 0) - start_len = len_test + 1; - - - uint32_t len_test_2 = len_test + 1; - const uint32_t limit = MIN(num_available_bytes_full, - len_test_2 + fast_bytes); - for (; len_test_2 < limit - && buf[len_test_2] == *(buf + len_test_2 - back_offset); - ++len_test_2) ; - - len_test_2 -= len_test + 1; - - if (len_test_2 >= 2) { - uint32_t state_2 = state; - update_long_rep(state_2); - - uint32_t pos_state_next = (position + len_test) & coder->pos_mask; - - const uint32_t cur_and_len_char_price = price - + length_get_price(coder->rep_len_encoder, - len_test - 2, pos_state) - + bit_get_price_0(coder->is_match[state_2][pos_state_next]) - + literal_get_price( - literal_get_subcoder(coder->literal_coder, - position + len_test, buf[len_test - 1]), - true, *(buf + len_test - back_offset), buf[len_test]); - - update_literal(state_2); - - pos_state_next = (position + len_test + 1) & coder->pos_mask; - - const uint32_t next_rep_match_price = cur_and_len_char_price - + bit_get_price_1(coder->is_match[state_2][pos_state_next]) - + bit_get_price_1(coder->is_rep[state_2]); - -// for(; len_test_2 >= 2; len_test_2--) { - const uint32_t offset = cur + len_test + 1 + len_test_2; - - while (len_end < offset) - coder->optimum[++len_end].price = INFINITY_PRICE; - - uint32_t cur_and_len_price = next_rep_match_price; - get_rep_price(cur_and_len_price, - 0, len_test_2, state_2, pos_state_next); - - if (cur_and_len_price < coder->optimum[offset].price) { - coder->optimum[offset].price = cur_and_len_price; - coder->optimum[offset].pos_prev = cur + len_test + 1; - coder->optimum[offset].back_prev = 0; - coder->optimum[offset].prev_1_is_char = true; - coder->optimum[offset].prev_2 = true; - coder->optimum[offset].pos_prev_2 = cur; - coder->optimum[offset].back_prev_2 = rep_index; - } -// } - } - } - - -// for (uint32_t len_test = 2; len_test <= new_len; ++len_test) - if (new_len > num_available_bytes) { - new_len = num_available_bytes; - - for (num_distance_pairs = 0; - new_len > match_distances[num_distance_pairs + 1]; - num_distance_pairs += 2) ; - - match_distances[num_distance_pairs + 1] = new_len; - num_distance_pairs += 2; - } - - - if (new_len >= start_len) { - normal_match_price = match_price - + bit_get_price_0(coder->is_rep[state]); - - while (len_end < cur + new_len) - coder->optimum[++len_end].price = INFINITY_PRICE; - - uint32_t offs = 0; - while (start_len > match_distances[offs + 1]) - offs += 2; - - uint32_t cur_back = match_distances[offs + 2]; - uint32_t pos_slot = get_pos_slot_2(cur_back); - - for (uint32_t len_test = start_len; ; ++len_test) { - uint32_t cur_and_len_price = normal_match_price; - const uint32_t len_to_pos_state = get_len_to_pos_state(len_test); - - if (cur_back < FULL_DISTANCES) - cur_and_len_price += distances_prices[ - len_to_pos_state][cur_back]; - else - cur_and_len_price += pos_slot_prices[ - len_to_pos_state][pos_slot] - + align_prices[cur_back & ALIGN_MASK]; - - cur_and_len_price += length_get_price(coder->match_len_encoder, - len_test - MATCH_MIN_LEN, pos_state); - - if (cur_and_len_price < coder->optimum[cur + len_test].price) { - coder->optimum[cur + len_test].price = cur_and_len_price; - coder->optimum[cur + len_test].pos_prev = cur; - coder->optimum[cur + len_test].back_prev - = cur_back + REP_DISTANCES; - coder->optimum[cur + len_test].prev_1_is_char = false; - } - - if (len_test == match_distances[offs + 1]) { - // Try Match + Literal + Rep0 - const uint32_t back_offset = cur_back + 1; - uint32_t len_test_2 = len_test + 1; - const uint32_t limit = MIN(num_available_bytes_full, - len_test_2 + fast_bytes); - - for (; len_test_2 < limit && - buf[len_test_2] == *(buf + len_test_2 - back_offset); - ++len_test_2) ; - - len_test_2 -= len_test + 1; - - if (len_test_2 >= 2) { - uint32_t state_2 = state; - update_match(state_2); - uint32_t pos_state_next - = (position + len_test) & coder->pos_mask; - - const uint32_t cur_and_len_char_price = cur_and_len_price - + bit_get_price_0( - coder->is_match[state_2][pos_state_next]) - + literal_get_price( - literal_get_subcoder( - coder->literal_coder, - position + len_test, - buf[len_test - 1]), - true, - *(buf + len_test - back_offset), - buf[len_test]); - - update_literal(state_2); - pos_state_next = (pos_state_next + 1) & coder->pos_mask; - - const uint32_t next_rep_match_price - = cur_and_len_char_price - + bit_get_price_1( - coder->is_match[state_2][pos_state_next]) - + bit_get_price_1(coder->is_rep[state_2]); - - // for(; len_test_2 >= 2; --len_test_2) { - const uint32_t offset = cur + len_test + 1 + len_test_2; - - while (len_end < offset) - coder->optimum[++len_end].price = INFINITY_PRICE; - - cur_and_len_price = next_rep_match_price; - get_rep_price(cur_and_len_price, - 0, len_test_2, state_2, pos_state_next); - - if (cur_and_len_price < coder->optimum[offset].price) { - coder->optimum[offset].price = cur_and_len_price; - coder->optimum[offset].pos_prev = cur + len_test + 1; - coder->optimum[offset].back_prev = 0; - coder->optimum[offset].prev_1_is_char = true; - coder->optimum[offset].prev_2 = true; - coder->optimum[offset].pos_prev_2 = cur; - coder->optimum[offset].back_prev_2 - = cur_back + REP_DISTANCES; - } -// } - } - - offs += 2; - if (offs == num_distance_pairs) - break; - - cur_back = match_distances[offs + 2]; - if (cur_back >= FULL_DISTANCES) - pos_slot = get_pos_slot_2(cur_back); - } - } - } - - } // Closes: while (true) -} diff --git a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c b/src/liblzma/lzma/lzma_encoder_getoptimumfast.c deleted file mode 100644 index fa06be21..00000000 --- a/src/liblzma/lzma/lzma_encoder_getoptimumfast.c +++ /dev/null @@ -1,201 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_encoder_getoptimumfast.c -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -// NOTE: If you want to keep the line length in 80 characters, set -// tab width to 4 or less in your editor when editing this file. - - -#include "lzma_encoder_private.h" - - -#define change_pair(small_dist, big_dist) \ - (((big_dist) >> 7) > (small_dist)) - - -extern void -lzma_get_optimum_fast(lzma_coder *restrict coder, - uint32_t *restrict back_res, uint32_t *restrict len_res) -{ - // Local copies - const uint32_t fast_bytes = coder->fast_bytes; - - uint32_t len_main; - uint32_t num_distance_pairs; - if (!coder->longest_match_was_found) { - lzma_read_match_distances(coder, &len_main, &num_distance_pairs); - } else { - len_main = coder->longest_match_length; - num_distance_pairs = coder->num_distance_pairs; - coder->longest_match_was_found = false; - } - - const uint8_t *buf = coder->lz.buffer + coder->lz.read_pos - 1; - uint32_t num_available_bytes - = coder->lz.write_pos - coder->lz.read_pos + 1; - - if (num_available_bytes < 2) { - // There's not enough input left to encode a match. - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - - if (num_available_bytes > MATCH_MAX_LEN) - num_available_bytes = MATCH_MAX_LEN; - - - // Look for repetitive matches; scan the previous four match distances - uint32_t rep_lens[REP_DISTANCES]; - uint32_t rep_max_index = 0; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - const uint32_t back_offset = coder->reps[i] + 1; - - // If the first two bytes (2 == MATCH_MIN_LEN) do not match, - // this rep_distance[i] is not useful. This is indicated - // using zero as the length of the repetitive match. - if (buf[0] != *(buf - back_offset) - || buf[1] != *(buf + 1 - back_offset)) { - rep_lens[i] = 0; - continue; - } - - // The first two bytes matched. - // Calculate the length of the match. - uint32_t len; - for (len = 2; len < num_available_bytes - && buf[len] == *(buf + len - back_offset); - ++len) ; - - // If we have found a repetitive match that is at least - // as long as fast_bytes, return it immediatelly. - if (len >= fast_bytes) { - *back_res = i; - *len_res = len; - move_pos(len - 1); - return; - } - - rep_lens[i] = len; - - // After this loop, rep_lens[rep_max_index] is the biggest - // value of all values in rep_lens[]. - if (len > rep_lens[rep_max_index]) - rep_max_index = i; - } - - - if (len_main >= fast_bytes) { - *back_res = coder->match_distances[num_distance_pairs] - + REP_DISTANCES; - *len_res = len_main; - move_pos(len_main - 1); - return; - } - - uint32_t back_main = 0; - if (len_main >= 2) { - back_main = coder->match_distances[num_distance_pairs]; - - while (num_distance_pairs > 2 && len_main == - coder->match_distances[num_distance_pairs - 3] + 1) { - if (!change_pair(coder->match_distances[ - num_distance_pairs - 2], back_main)) - break; - - num_distance_pairs -= 2; - len_main = coder->match_distances[num_distance_pairs - 1]; - back_main = coder->match_distances[num_distance_pairs]; - } - - if (len_main == 2 && back_main >= 0x80) - len_main = 1; - } - - if (rep_lens[rep_max_index] >= 2) { - if (rep_lens[rep_max_index] + 1 >= len_main - || (rep_lens[rep_max_index] + 2 >= len_main - && (back_main > (1 << 9))) - || (rep_lens[rep_max_index] + 3 >= len_main - && (back_main > (1 << 15)))) { - *back_res = rep_max_index; - *len_res = rep_lens[rep_max_index]; - move_pos(*len_res - 1); - return; - } - } - - if (len_main >= 2 && num_available_bytes > 2) { - lzma_read_match_distances(coder, &coder->longest_match_length, - &coder->num_distance_pairs); - - if (coder->longest_match_length >= 2) { - const uint32_t new_distance = coder->match_distances[ - coder->num_distance_pairs]; - - if ((coder->longest_match_length >= len_main - && new_distance < back_main) - || (coder->longest_match_length == len_main + 1 - && !change_pair(back_main, new_distance)) - || (coder->longest_match_length > len_main + 1) - || (coder->longest_match_length + 1 >= len_main - && len_main >= 3 - && change_pair(new_distance, back_main))) { - coder->longest_match_was_found = true; - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - } - - ++buf; - --num_available_bytes; - - for (uint32_t i = 0; i < REP_DISTANCES; ++i) { - const uint32_t back_offset = coder->reps[i] + 1; - - if (buf[1] != *(buf + 1 - back_offset) - || buf[2] != *(buf + 2 - back_offset)) { - rep_lens[i] = 0; - continue; - } - - uint32_t len; - for (len = 2; len < num_available_bytes - && buf[len] == *(buf + len - back_offset); - ++len) ; - - if (len + 1 >= len_main) { - coder->longest_match_was_found = true; - *back_res = UINT32_MAX; - *len_res = 1; - return; - } - } - - *back_res = back_main + REP_DISTANCES; - *len_res = len_main; - move_pos(len_main - 2); - return; - } - - *back_res = UINT32_MAX; - *len_res = 1; - return; -} diff --git a/src/liblzma/lzma/lzma_encoder_init.c b/src/liblzma/lzma/lzma_encoder_init.c deleted file mode 100644 index 21335f95..00000000 --- a/src/liblzma/lzma/lzma_encoder_init.c +++ /dev/null @@ -1,228 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_encoder_init.c -/// \brief Creating, resetting and destroying the LZMA encoder -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "lzma_encoder_private.h" - - -/// \brief Initializes the length encoder -static void -length_encoder_reset(lzma_length_encoder *lencoder, - const uint32_t num_pos_states, const uint32_t table_size) -{ - // NLength::CPriceTableEncoder::SetTableSize() - lencoder->table_size = table_size; - - // NLength::CEncoder::Init() - bit_reset(lencoder->choice); - bit_reset(lencoder->choice2); - - for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) { - bittree_reset(lencoder->low[pos_state], LEN_LOW_BITS); - bittree_reset(lencoder->mid[pos_state], LEN_MID_BITS); - } - - bittree_reset(lencoder->high, LEN_HIGH_BITS); - - // NLength::CPriceTableEncoder::UpdateTables() - for (size_t pos_state = 0; pos_state < num_pos_states; ++pos_state) - lencoder->counters[pos_state] = 1; - - return; -} - - -static void -lzma_lzma_encoder_end(lzma_coder *coder, lzma_allocator *allocator) -{ - lzma_lz_encoder_end(&coder->lz, allocator); - lzma_free(coder, allocator); - return; -} - - -extern lzma_ret -lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, - const lzma_filter_info *filters) -{ - if (next->coder == NULL) { - next->coder = lzma_alloc(sizeof(lzma_coder), allocator); - if (next->coder == NULL) - return LZMA_MEM_ERROR; - - next->coder->next = LZMA_NEXT_CODER_INIT; - next->coder->lz = LZMA_LZ_ENCODER_INIT; - } - - // Validate options that aren't validated elsewhere. - const lzma_options_lzma *options = filters[0].options; - if (options->pos_bits > LZMA_POS_BITS_MAX - || options->fast_bytes < LZMA_FAST_BYTES_MIN - || options->fast_bytes > LZMA_FAST_BYTES_MAX) { - lzma_lzma_encoder_end(next->coder, allocator); - return LZMA_HEADER_ERROR; - } - - // Set compression mode. - switch (options->mode) { - case LZMA_MODE_FAST: - next->coder->best_compression = false; - break; - - case LZMA_MODE_BEST: - next->coder->best_compression = true; - break; - - default: - lzma_lzma_encoder_end(next->coder, allocator); - return LZMA_HEADER_ERROR; - } - - // Initialize literal coder. - { - const lzma_ret ret = lzma_literal_init( - &next->coder->literal_coder, - options->literal_context_bits, - options->literal_pos_bits); - if (ret != LZMA_OK) - return ret; - } - - // Initialize LZ encoder. - { - const lzma_ret ret = lzma_lz_encoder_reset( - &next->coder->lz, allocator, &lzma_lzma_encode, - options->dictionary_size, OPTS, - options->fast_bytes, MATCH_MAX_LEN + 1 + OPTS, - options->match_finder, - options->match_finder_cycles, - options->preset_dictionary, - options->preset_dictionary_size); - if (ret != LZMA_OK) { - lzma_lzma_encoder_end(next->coder, allocator); - return ret; - } - } - - // Set dist_table_size. - { - // Round the dictionary size up to next 2^n. - uint32_t log_size; - for (log_size = 0; (UINT32_C(1) << log_size) - < options->dictionary_size; ++log_size) ; - - next->coder->dist_table_size = log_size * 2; - } - - // Misc FIXME desc - next->coder->align_price_count = UINT32_MAX; - next->coder->match_price_count = UINT32_MAX; - next->coder->dictionary_size = options->dictionary_size; - next->coder->pos_mask = (1U << options->pos_bits) - 1; - next->coder->fast_bytes = options->fast_bytes; - - // Range coder - rc_reset(&next->coder->rc); - - // State - next->coder->state = 0; - next->coder->previous_byte = 0; - for (size_t i = 0; i < REP_DISTANCES; ++i) - next->coder->reps[i] = 0; - - // Bit encoders - for (size_t i = 0; i < STATES; ++i) { - for (size_t j = 0; j <= next->coder->pos_mask; ++j) { - bit_reset(next->coder->is_match[i][j]); - bit_reset(next->coder->is_rep0_long[i][j]); - } - - bit_reset(next->coder->is_rep[i]); - bit_reset(next->coder->is_rep0[i]); - bit_reset(next->coder->is_rep1[i]); - bit_reset(next->coder->is_rep2[i]); - } - - for (size_t i = 0; i < FULL_DISTANCES - END_POS_MODEL_INDEX; ++i) - bit_reset(next->coder->pos_encoders[i]); - - // Bit tree encoders - for (size_t i = 0; i < LEN_TO_POS_STATES; ++i) - bittree_reset(next->coder->pos_slot_encoder[i], POS_SLOT_BITS); - - bittree_reset(next->coder->pos_align_encoder, ALIGN_BITS); - - // Length encoders - length_encoder_reset(&next->coder->match_len_encoder, - 1U << options->pos_bits, - options->fast_bytes + 1 - MATCH_MIN_LEN); - - length_encoder_reset(&next->coder->rep_len_encoder, - 1U << options->pos_bits, - next->coder->fast_bytes + 1 - MATCH_MIN_LEN); - - next->coder->prev_len_encoder = NULL; - - // Misc - next->coder->longest_match_was_found = false; - next->coder->optimum_end_index = 0; - next->coder->optimum_current_index = 0; - next->coder->additional_offset = 0; - - next->coder->now_pos = 0; - next->coder->is_initialized = false; - next->coder->is_flushed = false, - next->coder->write_eopm = true; - - // Initialize the next decoder in the chain, if any. - { - const lzma_ret ret = lzma_next_filter_init(&next->coder->next, - allocator, filters + 1); - if (ret != LZMA_OK) { - lzma_lzma_encoder_end(next->coder, allocator); - return ret; - } - } - - // Initialization successful. Set the function pointers. - next->code = &lzma_lz_encode; - next->end = &lzma_lzma_encoder_end; - - return LZMA_OK; -} - - -extern bool -lzma_lzma_encode_properties(const lzma_options_lzma *options, uint8_t *byte) -{ - if (options->literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX - || options->literal_pos_bits - > LZMA_LITERAL_POS_BITS_MAX - || options->pos_bits > LZMA_POS_BITS_MAX - || options->literal_context_bits - + options->literal_pos_bits - > LZMA_LITERAL_BITS_MAX) - return true; - - *byte = (options->pos_bits * 5 + options->literal_pos_bits) * 9 - + options->literal_context_bits; - assert(*byte <= (4 * 5 + 4) * 9 + 8); - - return false; -} diff --git a/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/liblzma/lzma/lzma_encoder_optimum_fast.c new file mode 100644 index 00000000..9da7e79e --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_optimum_fast.c @@ -0,0 +1,193 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_fast.c +// +// Copyright (C) 1999-2008 Igor Pavlov +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" + + +#define change_pair(small_dist, big_dist) \ + (((big_dist) >> 7) > (small_dist)) + + +static inline void +literal(const lzma_coder *restrict coder, const uint8_t *restrict buf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + // Try short rep0 instead of always coding it as a literal. + *back_res = *buf == *(buf - coder->reps[0] - 1) ? 0 : UINT32_MAX; + *len_res = 1; + return; +} + + +extern void +lzma_lzma_optimum_fast(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res) +{ + const uint32_t fast_bytes = mf->find_len_max; + + uint32_t len_main; + uint32_t matches_count; + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint8_t *buf = mf_ptr(mf) - 1; + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + + if (buf_avail < 2) { + // There's not enough input left to encode a match. + literal(coder, buf, back_res, len_res); + return; + } + + // Look for repeated matches; scan the previous four match distances + uint32_t rep_len = 0; + uint32_t rep_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + // Pointer to the beginning of the match candidate + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + // If the first two bytes (2 == MATCH_LEN_MIN) do not match, + // this rep is not useful. + if (not_equal_16(buf, buf_back)) + continue; + + // The first two bytes matched. + // Calculate the length of the match. + uint32_t len; + for (len = 2; len < buf_avail + && buf[len] == buf_back[len]; ++len) ; + + // If we have found a repeated match that is at least + // fast_bytes long, return it immediatelly. + if (len >= fast_bytes) { + *back_res = i; + *len_res = len; + mf_skip(mf, len - 1); + return; + } + + if (len > rep_len) { + rep_index = i; + rep_len = len; + } + } + + // We didn't find a long enough repeated match. Encode it as a normal + // match if the match length is at least fast_bytes. + if (len_main >= fast_bytes) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return; + } + + uint32_t back_main = 0; + if (len_main >= 2) { + back_main = coder->matches[matches_count - 1].dist; + + while (matches_count > 1 && len_main == + coder->matches[matches_count - 2].len + 1) { + if (!change_pair(coder->matches[ + matches_count - 2].dist, + back_main)) + break; + + --matches_count; + len_main = coder->matches[matches_count - 1].len; + back_main = coder->matches[matches_count - 1].dist; + } + + if (len_main == 2 && back_main >= 0x80) + len_main = 1; + } + + if (rep_len >= 2) { + if (rep_len + 1 >= len_main + || (rep_len + 2 >= len_main + && back_main > (UINT32_C(1) << 9)) + || (rep_len + 3 >= len_main + && back_main > (UINT32_C(1) << 15))) { + *back_res = rep_index; + *len_res = rep_len; + mf_skip(mf, rep_len - 1); + return; + } + } + + if (len_main < 2 || buf_avail <= 2) { + literal(coder, buf, back_res, len_res); + return; + } + + // Get the matches for the next byte. If we find a better match, + // the current byte is encoded as a literal. + coder->longest_match_length = mf_find(mf, + &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= 2) { + const uint32_t new_dist = coder->matches[ + coder->matches_count - 1].dist; + + if ((coder->longest_match_length >= len_main + && new_dist < back_main) + || (coder->longest_match_length == len_main + 1 + && !change_pair(back_main, new_dist)) + || (coder->longest_match_length > len_main + 1) + || (coder->longest_match_length + 1 >= len_main + && len_main >= 3 + && change_pair(new_dist, back_main))) { + literal(coder, buf, back_res, len_res); + return; + } + } + + // In contrast to LZMA SDK, dictionary could not have been moved + // between mf_find() calls, thus it is safe to just increment + // the old buf pointer instead of recalculating it with mf_ptr(). + ++buf; + + const uint32_t limit = len_main - 1; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len; + for (len = 2; len < limit + && buf[len] == buf_back[len]; ++len) ; + + if (len >= limit) { + literal(coder, buf - 1, back_res, len_res); + return; + } + } + + *back_res = back_main + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 2); + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/liblzma/lzma/lzma_encoder_optimum_normal.c new file mode 100644 index 00000000..f0dd92c9 --- /dev/null +++ b/src/liblzma/lzma/lzma_encoder_optimum_normal.c @@ -0,0 +1,875 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file lzma_encoder_optimum_normal.c +// +// Copyright (C) 1999-2008 Igor Pavlov +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_encoder_private.h" +#include "fastpos.h" + + +//////////// +// Prices // +//////////// + +static uint32_t +get_literal_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t prev_byte, const bool match_mode, + uint32_t match_byte, uint32_t symbol) +{ + const probability *const subcoder = literal_subcoder(coder->literal, + coder->literal_context_bits, coder->literal_pos_mask, + pos, prev_byte); + + uint32_t price = 0; + + if (!match_mode) { + price = rc_bittree_price(subcoder, 8, symbol); + } else { + uint32_t offset = 0x100; + symbol += UINT32_C(1) << 8; + + do { + match_byte <<= 1; + + const uint32_t match_bit = match_byte & offset; + const uint32_t subcoder_index + = offset + match_bit + (symbol >> 8); + const uint32_t bit = (symbol >> 7) & 1; + price += rc_bit_price(subcoder[subcoder_index], bit); + + symbol <<= 1; + offset &= ~(match_byte ^ symbol); + + } while (symbol < (UINT32_C(1) << 16)); + } + + return price; +} + + +static inline uint32_t +get_len_price(const lzma_length_encoder *const lencoder, + const uint32_t len, const uint32_t pos_state) +{ + // NOTE: Unlike the other price tables, length prices are updated + // in lzma_encoder.c + return lencoder->prices[pos_state][len - MATCH_LEN_MIN]; +} + + +static inline uint32_t +get_short_rep_price(const lzma_coder *const coder, + const lzma_lzma_state state, const uint32_t pos_state) +{ + return rc_bit_0_price(coder->is_rep0[state]) + + rc_bit_0_price(coder->is_rep0_long[state][pos_state]); +} + + +static inline uint32_t +get_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const lzma_lzma_state state, uint32_t pos_state) +{ + uint32_t price; + + if (rep_index == 0) { + price = rc_bit_0_price(coder->is_rep0[state]); + price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]); + } else { + price = rc_bit_1_price(coder->is_rep0[state]); + + if (rep_index == 1) { + price += rc_bit_0_price(coder->is_rep1[state]); + } else { + price += rc_bit_1_price(coder->is_rep1[state]); + price += rc_bit_price(coder->is_rep2[state], + rep_index - 2); + } + } + + return price; +} + + +static inline uint32_t +get_rep_price(const lzma_coder *const coder, const uint32_t rep_index, + const uint32_t len, const lzma_lzma_state state, + const uint32_t pos_state) +{ + return get_len_price(&coder->rep_len_encoder, len, pos_state) + + get_pure_rep_price(coder, rep_index, state, pos_state); +} + + +static inline uint32_t +get_pos_len_price(const lzma_coder *const coder, const uint32_t pos, + const uint32_t len, const uint32_t pos_state) +{ + const uint32_t len_to_pos_state = get_len_to_pos_state(len); + uint32_t price; + + if (pos < FULL_DISTANCES) { + price = coder->distances_prices[len_to_pos_state][pos]; + } else { + const uint32_t pos_slot = get_pos_slot_2(pos); + price = coder->pos_slot_prices[len_to_pos_state][pos_slot] + + coder->align_prices[pos & ALIGN_MASK]; + } + + price += get_len_price(&coder->match_len_encoder, len, pos_state); + + return price; +} + + +static void +fill_distances_prices(lzma_coder *coder) +{ + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) { + + uint32_t *const pos_slot_prices + = coder->pos_slot_prices[len_to_pos_state]; + + // Price to encode the pos_slot. + for (uint32_t pos_slot = 0; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] = rc_bittree_price( + coder->pos_slot[len_to_pos_state], + POS_SLOT_BITS, pos_slot); + + // For matches with distance >= FULL_DISTANCES, add the price + // of the direct bits part of the match distance. (Align bits + // are handled by fill_align_prices()). + for (uint32_t pos_slot = END_POS_MODEL_INDEX; + pos_slot < coder->dist_table_size; ++pos_slot) + pos_slot_prices[pos_slot] += rc_direct_price( + ((pos_slot >> 1) - 1) - ALIGN_BITS); + + // Distances in the range [0, 3] are fully encoded with + // pos_slot, so they are used for coder->distances_prices + // as is. + for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i) + coder->distances_prices[len_to_pos_state][i] + = pos_slot_prices[i]; + } + + // Distances in the range [4, 127] depend on pos_slot and pos_special. + // We do this in a loop separate from the above loop to avoid + // redundant calls to get_pos_slot(). + for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) { + const uint32_t pos_slot = get_pos_slot(i); + const uint32_t footer_bits = ((pos_slot >> 1) - 1); + const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; + const uint32_t price = rc_bittree_reverse_price( + coder->pos_special + base - pos_slot - 1, + footer_bits, i - base); + + for (uint32_t len_to_pos_state = 0; + len_to_pos_state < LEN_TO_POS_STATES; + ++len_to_pos_state) + coder->distances_prices[len_to_pos_state][i] + = price + coder->pos_slot_prices[ + len_to_pos_state][pos_slot]; + } + + coder->match_price_count = 0; + return; +} + + +static void +fill_align_prices(lzma_coder *coder) +{ + for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i) + coder->align_prices[i] = rc_bittree_reverse_price( + coder->pos_align, ALIGN_BITS, i); + + coder->align_price_count = 0; + return; +} + + +///////////// +// Optimal // +///////////// + +static inline void +make_literal(lzma_optimal *optimal) +{ + optimal->back_prev = UINT32_MAX; + optimal->prev_1_is_literal = false; +} + + +static inline void +make_short_rep(lzma_optimal *optimal) +{ + optimal->back_prev = 0; + optimal->prev_1_is_literal = false; +} + + +#define is_short_rep(optimal) \ + ((optimal).back_prev == 0) + + +static void +backward(lzma_coder *restrict coder, uint32_t *restrict len_res, + uint32_t *restrict back_res, uint32_t cur) +{ + coder->opts_end_index = cur; + + uint32_t pos_mem = coder->opts[cur].pos_prev; + uint32_t back_mem = coder->opts[cur].back_prev; + + do { + if (coder->opts[cur].prev_1_is_literal) { + make_literal(&coder->opts[pos_mem]); + coder->opts[pos_mem].pos_prev = pos_mem - 1; + + if (coder->opts[cur].prev_2) { + coder->opts[pos_mem - 1].prev_1_is_literal + = false; + coder->opts[pos_mem - 1].pos_prev + = coder->opts[cur].pos_prev_2; + coder->opts[pos_mem - 1].back_prev + = coder->opts[cur].back_prev_2; + } + } + + const uint32_t pos_prev = pos_mem; + const uint32_t back_cur = back_mem; + + back_mem = coder->opts[pos_prev].back_prev; + pos_mem = coder->opts[pos_prev].pos_prev; + + coder->opts[pos_prev].back_prev = back_cur; + coder->opts[pos_prev].pos_prev = cur; + cur = pos_prev; + + } while (cur != 0); + + coder->opts_current_index = coder->opts[0].pos_prev; + *len_res = coder->opts[0].pos_prev; + *back_res = coder->opts[0].back_prev; + + return; +} + + +////////// +// Main // +////////// + +static inline uint32_t +helper1(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + const uint32_t fast_bytes = mf->find_len_max; + + uint32_t len_main; + uint32_t matches_count; + + if (mf->read_ahead == 0) { + len_main = mf_find(mf, &matches_count, coder->matches); + } else { + assert(mf->read_ahead == 1); + len_main = coder->longest_match_length; + matches_count = coder->matches_count; + } + + const uint32_t buf_avail = MIN(mf_avail(mf) + 1, MATCH_LEN_MAX); + if (buf_avail < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + const uint8_t *const buf = mf_ptr(mf) - 1; + + uint32_t rep_lens[REP_DISTANCES]; + uint32_t rep_max_index = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + const uint8_t *const buf_back = buf - coder->reps[i] - 1; + + if (not_equal_16(buf, buf_back)) { + rep_lens[i] = 0; + continue; + } + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + rep_lens[i] = len_test; + if (len_test > rep_lens[rep_max_index]) + rep_max_index = i; + } + + if (rep_lens[rep_max_index] >= fast_bytes) { + *back_res = rep_max_index; + *len_res = rep_lens[rep_max_index]; + mf_skip(mf, *len_res - 1); + return UINT32_MAX; + } + + + if (len_main >= fast_bytes) { + *back_res = coder->matches[matches_count - 1].dist + + REP_DISTANCES; + *len_res = len_main; + mf_skip(mf, len_main - 1); + return UINT32_MAX; + } + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - coder->reps[0] - 1); + + if (len_main < 2 && current_byte != match_byte + && rep_lens[rep_max_index] < 2) { + *back_res = UINT32_MAX; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[0].state = coder->state; + + const uint32_t pos_state = position & coder->pos_mask; + + coder->opts[1].price = rc_bit_0_price( + coder->is_match[coder->state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(coder->state), + match_byte, current_byte); + + make_literal(&coder->opts[1]); + + const uint32_t match_price = rc_bit_1_price( + coder->is_match[coder->state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[coder->state]); + + if (match_byte == current_byte) { + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price( + coder, coder->state, pos_state); + + if (short_rep_price < coder->opts[1].price) { + coder->opts[1].price = short_rep_price; + make_short_rep(&coder->opts[1]); + } + } + + const uint32_t len_end = MAX(len_main, rep_lens[rep_max_index]); + + if (len_end < 2) { + *back_res = coder->opts[1].back_prev; + *len_res = 1; + return UINT32_MAX; + } + + coder->opts[1].pos_prev = 0; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[0].backs[i] = coder->reps[i]; + + uint32_t len = len_end; + do { + coder->opts[len].price = RC_INFINITY_PRICE; + } while (--len >= 2); + + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) { + uint32_t rep_len = rep_lens[i]; + if (rep_len < 2) + continue; + + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, i, coder->state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price( + &coder->rep_len_encoder, + rep_len, pos_state); + + if (cur_and_len_price < coder->opts[rep_len].price) { + coder->opts[rep_len].price = cur_and_len_price; + coder->opts[rep_len].pos_prev = 0; + coder->opts[rep_len].back_prev = i; + coder->opts[rep_len].prev_1_is_literal = false; + } + } while (--rep_len >= 2); + } + + + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[coder->state]); + + len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2; + if (len <= len_main) { + uint32_t i = 0; + while (len > coder->matches[i].len) + ++i; + + for(; ; ++len) { + const uint32_t dist = coder->matches[i].dist; + const uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + dist, len, pos_state); + + if (cur_and_len_price < coder->opts[len].price) { + coder->opts[len].price = cur_and_len_price; + coder->opts[len].pos_prev = 0; + coder->opts[len].back_prev + = dist + REP_DISTANCES; + coder->opts[len].prev_1_is_literal = false; + } + + if (len == coder->matches[i].len) + if (++i == matches_count) + break; + } + } + + return len_end; +} + + +static inline uint32_t +helper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf, + uint32_t len_end, uint32_t position, const uint32_t cur, + const uint32_t fast_bytes, const uint32_t buf_avail_full) +{ + uint32_t matches_count = coder->matches_count; + uint32_t new_len = coder->longest_match_length; + uint32_t pos_prev = coder->opts[cur].pos_prev; + uint32_t state; + + if (coder->opts[cur].prev_1_is_literal) { + --pos_prev; + + if (coder->opts[cur].prev_2) { + state = coder->opts[coder->opts[cur].pos_prev_2].state; + + if (coder->opts[cur].back_prev_2 < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + + } else { + state = coder->opts[pos_prev].state; + } + + update_literal(state); + + } else { + state = coder->opts[pos_prev].state; + } + + if (pos_prev == cur - 1) { + if (is_short_rep(coder->opts[cur])) + update_short_rep(state); + else + update_literal(state); + } else { + uint32_t pos; + if (coder->opts[cur].prev_1_is_literal + && coder->opts[cur].prev_2) { + pos_prev = coder->opts[cur].pos_prev_2; + pos = coder->opts[cur].back_prev_2; + update_long_rep(state); + } else { + pos = coder->opts[cur].back_prev; + if (pos < REP_DISTANCES) + update_long_rep(state); + else + update_match(state); + } + + if (pos < REP_DISTANCES) { + reps[0] = coder->opts[pos_prev].backs[pos]; + + uint32_t i; + for (i = 1; i <= pos; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + + for (; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i]; + + } else { + reps[0] = pos - REP_DISTANCES; + + for (uint32_t i = 1; i < REP_DISTANCES; ++i) + reps[i] = coder->opts[pos_prev].backs[i - 1]; + } + } + + coder->opts[cur].state = state; + + for (uint32_t i = 0; i < REP_DISTANCES; ++i) + coder->opts[cur].backs[i] = reps[i]; + + const uint32_t cur_price = coder->opts[cur].price; + + const uint8_t current_byte = *buf; + const uint8_t match_byte = *(buf - reps[0] - 1); + + const uint32_t pos_state = position & coder->pos_mask; + + const uint32_t cur_and_1_price = cur_price + + rc_bit_0_price(coder->is_match[state][pos_state]) + + get_literal_price(coder, position, buf[-1], + !is_literal_state(state), match_byte, current_byte); + + bool next_is_literal = false; + + if (cur_and_1_price < coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = cur_and_1_price; + coder->opts[cur + 1].pos_prev = cur; + make_literal(&coder->opts[cur + 1]); + next_is_literal = true; + } + + const uint32_t match_price = cur_price + + rc_bit_1_price(coder->is_match[state][pos_state]); + const uint32_t rep_match_price = match_price + + rc_bit_1_price(coder->is_rep[state]); + + if (match_byte == current_byte + && !(coder->opts[cur + 1].pos_prev < cur + && coder->opts[cur + 1].back_prev == 0)) { + + const uint32_t short_rep_price = rep_match_price + + get_short_rep_price(coder, state, pos_state); + + if (short_rep_price <= coder->opts[cur + 1].price) { + coder->opts[cur + 1].price = short_rep_price; + coder->opts[cur + 1].pos_prev = cur; + make_short_rep(&coder->opts[cur + 1]); + next_is_literal = true; + } + } + + if (buf_avail_full < 2) + return len_end; + + const uint32_t buf_avail = MIN(buf_avail_full, fast_bytes); + + if (!next_is_literal && match_byte != current_byte) { // speed optimization + // try literal + rep0 + const uint8_t *const buf_back = buf - reps[0] - 1; + const uint32_t limit = MIN(buf_avail_full, fast_bytes + 1); + + uint32_t len_test = 1; + while (len_test < limit && buf[len_test] == buf_back[len_test]) + ++len_test; + + --len_test; + + if (len_test >= 2) { + uint32_t state_2 = state; + update_literal(state_2); + + const uint32_t pos_state_next = (position + 1) & coder->pos_mask; + const uint32_t next_rep_match_price = cur_and_1_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for (; len_test >= 2; --len_test) { + const uint32_t offset = cur + 1 + len_test; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = false; + } + //} + } + } + + + uint32_t start_len = 2; // speed optimization + + for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) { + const uint8_t *const buf_back = buf - reps[rep_index] - 1; + if (not_equal_16(buf, buf_back)) + continue; + + uint32_t len_test; + for (len_test = 2; len_test < buf_avail + && buf[len_test] == buf_back[len_test]; + ++len_test) ; + + while (len_end < cur + len_test) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t len_test_temp = len_test; + const uint32_t price = rep_match_price + get_pure_rep_price( + coder, rep_index, state, pos_state); + + do { + const uint32_t cur_and_len_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev = rep_index; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + } while (--len_test >= 2); + + len_test = len_test_temp; + + if (rep_index == 0) + start_len = len_test + 1; + + + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + fast_bytes); + for (; len_test_2 < limit + && buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_long_rep(state_2); + + uint32_t pos_state_next = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = price + + get_len_price(&coder->rep_len_encoder, + len_test, pos_state) + + rc_bit_0_price(coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, position + len_test, + buf[len_test - 1], true, + buf_back[len_test], buf[len_test]); + + update_literal(state_2); + + pos_state_next = (position + len_test + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price = cur_and_len_literal_price + + rc_bit_1_price(coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + //for(; len_test_2 >= 2; len_test_2--) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + const uint32_t cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 = rep_index; + } + //} + } + } + + + //for (uint32_t len_test = 2; len_test <= new_len; ++len_test) + if (new_len > buf_avail) { + new_len = buf_avail; + + matches_count = 0; + while (new_len > coder->matches[matches_count].len) + ++matches_count; + + coder->matches[matches_count++].len = new_len; + } + + + if (new_len >= start_len) { + const uint32_t normal_match_price = match_price + + rc_bit_0_price(coder->is_rep[state]); + + while (len_end < cur + new_len) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + uint32_t i = 0; + while (start_len > coder->matches[i].len) + ++i; + + for (uint32_t len_test = start_len; ; ++len_test) { + const uint32_t cur_back = coder->matches[i].dist; + uint32_t cur_and_len_price = normal_match_price + + get_pos_len_price(coder, + cur_back, len_test, pos_state); + + if (cur_and_len_price < coder->opts[cur + len_test].price) { + coder->opts[cur + len_test].price = cur_and_len_price; + coder->opts[cur + len_test].pos_prev = cur; + coder->opts[cur + len_test].back_prev + = cur_back + REP_DISTANCES; + coder->opts[cur + len_test].prev_1_is_literal = false; + } + + if (len_test == coder->matches[i].len) { + // Try Match + Literal + Rep0 + const uint8_t *const buf_back = buf - cur_back - 1; + uint32_t len_test_2 = len_test + 1; + const uint32_t limit = MIN(buf_avail_full, + len_test_2 + fast_bytes); + + for (; len_test_2 < limit && + buf[len_test_2] == buf_back[len_test_2]; + ++len_test_2) ; + + len_test_2 -= len_test + 1; + + if (len_test_2 >= 2) { + uint32_t state_2 = state; + update_match(state_2); + uint32_t pos_state_next + = (position + len_test) & coder->pos_mask; + + const uint32_t cur_and_len_literal_price = cur_and_len_price + + rc_bit_0_price( + coder->is_match[state_2][pos_state_next]) + + get_literal_price(coder, + position + len_test, + buf[len_test - 1], + true, + buf_back[len_test], + buf[len_test]); + + update_literal(state_2); + pos_state_next = (pos_state_next + 1) & coder->pos_mask; + + const uint32_t next_rep_match_price + = cur_and_len_literal_price + + rc_bit_1_price( + coder->is_match[state_2][pos_state_next]) + + rc_bit_1_price(coder->is_rep[state_2]); + + // for(; len_test_2 >= 2; --len_test_2) { + const uint32_t offset = cur + len_test + 1 + len_test_2; + + while (len_end < offset) + coder->opts[++len_end].price = RC_INFINITY_PRICE; + + cur_and_len_price = next_rep_match_price + + get_rep_price(coder, 0, len_test_2, + state_2, pos_state_next); + + if (cur_and_len_price < coder->opts[offset].price) { + coder->opts[offset].price = cur_and_len_price; + coder->opts[offset].pos_prev = cur + len_test + 1; + coder->opts[offset].back_prev = 0; + coder->opts[offset].prev_1_is_literal = true; + coder->opts[offset].prev_2 = true; + coder->opts[offset].pos_prev_2 = cur; + coder->opts[offset].back_prev_2 + = cur_back + REP_DISTANCES; + } + //} + } + + if (++i == matches_count) + break; + } + } + } + + return len_end; +} + + +extern void +lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf, + uint32_t *restrict back_res, uint32_t *restrict len_res, + uint32_t position) +{ + // If we have symbols pending, return the next pending symbol. + if (coder->opts_end_index != coder->opts_current_index) { + assert(mf->read_ahead > 0); + *len_res = coder->opts[coder->opts_current_index].pos_prev + - coder->opts_current_index; + *back_res = coder->opts[coder->opts_current_index].back_prev; + coder->opts_current_index = coder->opts[ + coder->opts_current_index].pos_prev; + return; + } + + // Update the price tables. In LZMA SDK <= 4.60 (and possibly later) + // this was done in both initialization function and in the main loop. + // In liblzma they were moved into this single place. + if (mf->read_ahead == 0) { + if (coder->match_price_count >= (1 << 7)) + fill_distances_prices(coder); + + if (coder->align_price_count >= ALIGN_TABLE_SIZE) + fill_align_prices(coder); + } + + // TODO: This needs quite a bit of cleaning still. But splitting + // the oroginal function to two pieces makes it at least a little + // more readable, since those two parts don't share many variables. + + uint32_t len_end = helper1(coder, mf, back_res, len_res, position); + if (len_end == UINT32_MAX) + return; + + uint32_t reps[REP_DISTANCES]; + memcpy(reps, coder->reps, sizeof(reps)); + + uint32_t cur; + for (cur = 1; cur < len_end; ++cur) { + assert(cur < OPTS); + + coder->longest_match_length = mf_find( + mf, &coder->matches_count, coder->matches); + + if (coder->longest_match_length >= mf->find_len_max) + break; + + len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end, + position + cur, cur, mf->find_len_max, + MIN(mf_avail(mf) + 1, OPTS - 1 - cur)); + } + + backward(coder, len_res, back_res, cur); + return; +} diff --git a/src/liblzma/lzma/lzma_encoder_presets.c b/src/liblzma/lzma/lzma_encoder_presets.c index 966c7c86..08f339e9 100644 --- a/src/liblzma/lzma/lzma_encoder_presets.c +++ b/src/liblzma/lzma/lzma_encoder_presets.c @@ -20,15 +20,47 @@ #include "common.h" +#define pow2(e) (UINT32_C(1) << (e)) + + LZMA_API const lzma_options_lzma lzma_preset_lzma[9] = { -// dictionary_size lc lp pb mode fb mf mfc -{ UINT32_C(1) << 16, 3, 0, 2, NULL, 0, LZMA_MODE_FAST, 64, LZMA_MF_HC3, 0 }, -{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_FAST, 64, LZMA_MF_HC4, 0 }, -{ UINT32_C(1) << 19, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 64, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 20, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 64, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 21, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 22, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 23, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 128, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 24, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 }, -{ UINT32_C(1) << 25, 3, 0, 2, NULL, 0, LZMA_MODE_BEST, 273, LZMA_MF_BT4, 0 }, +// dict lc lp pb mode fb mf mfc +{ pow2(16), NULL, 0, 3, 0, 2, false, LZMA_MODE_FAST, 64, LZMA_MF_HC3, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(20), NULL, 0, 3, 0, 0, false, LZMA_MODE_FAST, 64, LZMA_MF_HC4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(19), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 64, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(20), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 64, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(21), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(22), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(23), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 128, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(24), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 273, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, +{ pow2(25), NULL, 0, 3, 0, 0, false, LZMA_MODE_NORMAL, 273, LZMA_MF_BT4, 0, 0, 0, 0, 0, NULL, NULL }, }; + + +/* +extern LZMA_API lzma_bool +lzma_preset_lzma(lzma_options_lzma *options, uint32_t level) +{ + *options = (lzma_options_lzma){ + + }; + + options->literal_context_bits = LZMA_LITERAL_CONTEXT_BITS_DEFAULT + options->literal_pos_bits = LZMA_LITERAL_POS_BITS_DEFAULT; + options->pos_bits = LZMA_POS_BITS_DEFAULT; + options->preset_dictionary = NULL; + options->preset_dictionary_size = 0; + options->persistent = false; + + options->mode = level <= 2 ? LZMA_MODE_FAST : LZMA_MODE_NORMAL; + options->fast_bytes = level <= + + options->match_finder = level == 1 ? LZMA_MF_HC3 + : (level == 2 ? LZMA_MF_HC4 : LZMA_MF_BT4); + options->match_finder_cycles = 0; + + + + options->dictionary_size = +} +*/ diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h index a16051f8..7533bc79 100644 --- a/src/liblzma/lzma/lzma_encoder_private.h +++ b/src/liblzma/lzma/lzma_encoder_private.h @@ -21,20 +21,27 @@ #ifndef LZMA_LZMA_ENCODER_PRIVATE_H #define LZMA_LZMA_ENCODER_PRIVATE_H -#include "lzma_encoder.h" -#include "lzma_common.h" #include "lz_encoder.h" #include "range_encoder.h" +#include "lzma_common.h" +#include "lzma_encoder.h" + +// Macro to compare if the first two bytes in two buffers differ. This is +// needed in lzma_lzma_optimum_*() to test if the match is at least +// MATCH_LEN_MIN bytes. Unaligned access gives tiny gain so there's no +// reason to not use it when it is supported. +#ifdef HAVE_FAST_UNALIGNED_ACCESS +# define not_equal_16(a, b) \ + (*(const uint16_t *)(a) != *(const uint16_t *)(b)) +#else +# define not_equal_16(a, b) \ + ((a)[0] != (b)[0] || (a)[1] != (b)[1]) +#endif -#define move_pos(num) \ -do { \ - assert((int32_t)(num) >= 0); \ - if ((num) != 0) { \ - coder->additional_offset += num; \ - coder->lz.skip(&coder->lz, num); \ - } \ -} while (0) + +// Optimal - Number of entries in the optimum array. +#define OPTS (1 << 12) typedef struct { @@ -54,7 +61,7 @@ typedef struct { typedef struct { lzma_lzma_state state; - bool prev_1_is_char; + bool prev_1_is_literal; bool prev_2; uint32_t pos_prev_2; @@ -70,132 +77,79 @@ typedef struct { struct lzma_coder_s { - // Next coder in the chain - lzma_next_coder next; - - // In window and match finder - lzma_lz_encoder lz; - - // Range encoder + /// Range encoder lzma_range_encoder rc; - // State + /// State lzma_lzma_state state; - uint8_t previous_byte; + + /// The four most recent match distances uint32_t reps[REP_DISTANCES]; - // Misc - uint32_t match_distances[MATCH_MAX_LEN * 2 + 2 + 1]; - uint32_t num_distance_pairs; - uint32_t additional_offset; - uint32_t now_pos; // Lowest 32 bits are enough here. - bool best_compression; ///< True when LZMA_MODE_BEST is used + /// Array of match candidates + lzma_match matches[MATCH_LEN_MAX + 1]; + + /// Number of match candidates in matches[] + uint32_t matches_count; + + /// Varibale to hold the length of the longest match between calls + /// to lzma_lzma_optimum_*(). + uint32_t longest_match_length; + + /// True if using getoptimumfast + bool fast_mode; + + /// True if the encoder has been initialized by encoding the first + /// byte as a literal. bool is_initialized; + + /// True if the range encoder has been flushed, but not all bytes + /// have been written to the output buffer yet. bool is_flushed; - bool write_eopm; - // Literal encoder - lzma_literal_coder literal_coder; + uint32_t pos_mask; ///< (1 << pos_bits) - 1 + uint32_t literal_context_bits; + uint32_t literal_pos_mask; - // Bit encoders + // These are the same as in lzma_decoder.c. See comments there. + probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; probability is_match[STATES][POS_STATES_MAX]; probability is_rep[STATES]; probability is_rep0[STATES]; probability is_rep1[STATES]; probability is_rep2[STATES]; probability is_rep0_long[STATES][POS_STATES_MAX]; - probability pos_encoders[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_slot[LEN_TO_POS_STATES][POS_SLOTS]; + probability pos_special[FULL_DISTANCES - END_POS_MODEL_INDEX]; + probability pos_align[ALIGN_TABLE_SIZE]; - // Bit Tree Encoders - probability pos_slot_encoder[LEN_TO_POS_STATES][1 << POS_SLOT_BITS]; - probability pos_align_encoder[1 << ALIGN_BITS]; - - // Length encoders + // These are the same as in lzma_decoder.c except that the encoders + // include also price tables. lzma_length_encoder match_len_encoder; lzma_length_encoder rep_len_encoder; - lzma_length_encoder *prev_len_encoder; - // Optimal - lzma_optimal optimum[OPTS]; - uint32_t optimum_end_index; - uint32_t optimum_current_index; - uint32_t longest_match_length; - bool longest_match_was_found; - - // Prices - uint32_t pos_slot_prices[LEN_TO_POS_STATES][DIST_TABLE_SIZE_MAX]; + // Price tables + uint32_t pos_slot_prices[LEN_TO_POS_STATES][POS_SLOTS]; uint32_t distances_prices[LEN_TO_POS_STATES][FULL_DISTANCES]; - uint32_t align_prices[ALIGN_TABLE_SIZE]; - uint32_t align_price_count; uint32_t dist_table_size; uint32_t match_price_count; - // LZMA specific settings - uint32_t dictionary_size; ///< Size in bytes - uint32_t fast_bytes; - uint32_t pos_state_bits; - uint32_t pos_mask; ///< (1 << pos_state_bits) - 1 -}; - - -extern void lzma_length_encoder_update_table(lzma_length_encoder *lencoder, - const uint32_t pos_state); + uint32_t align_prices[ALIGN_TABLE_SIZE]; + uint32_t align_price_count; -extern bool lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, - size_t *restrict out_pos, size_t out_size); + // Optimal + uint32_t opts_end_index; + uint32_t opts_current_index; + lzma_optimal opts[OPTS]; +}; -extern void lzma_get_optimum(lzma_coder *restrict coder, - uint32_t *restrict back_res, uint32_t *restrict len_res); -extern void lzma_get_optimum_fast(lzma_coder *restrict coder, +extern void lzma_lzma_optimum_fast( + lzma_coder *restrict coder, lzma_mf *restrict mf, uint32_t *restrict back_res, uint32_t *restrict len_res); - -// NOTE: Don't add 'restrict'. -static inline void -lzma_read_match_distances(lzma_coder *coder, - uint32_t *len_res, uint32_t *num_distance_pairs) -{ - *len_res = 0; - - coder->lz.get_matches(&coder->lz, coder->match_distances); - - *num_distance_pairs = coder->match_distances[0]; - - if (*num_distance_pairs > 0) { - *len_res = coder->match_distances[*num_distance_pairs - 1]; - assert(*len_res <= MATCH_MAX_LEN); - - if (*len_res == coder->fast_bytes) { - uint32_t offset = *len_res - 1; - const uint32_t distance = coder->match_distances[ - *num_distance_pairs] + 1; - uint32_t limit = MATCH_MAX_LEN - *len_res; - - assert(offset + limit < coder->lz.keep_size_after); - assert(coder->lz.read_pos <= coder->lz.write_pos); - - // If we are close to end of the stream, we may need - // to limit the length of the match. - if (coder->lz.write_pos - coder->lz.read_pos - < offset + limit) - limit = coder->lz.write_pos - - (coder->lz.read_pos + offset); - - offset += coder->lz.read_pos; - uint32_t i = 0; - while (i < limit && coder->lz.buffer[offset + i] - == coder->lz.buffer[ - offset + i - distance]) - ++i; - - *len_res += i; - } - } - - ++coder->additional_offset; - - return; -} +extern void lzma_lzma_optimum_normal(lzma_coder *restrict coder, + lzma_mf *restrict mf, uint32_t *restrict back_res, + uint32_t *restrict len_res, uint32_t position); #endif diff --git a/src/liblzma/lzma/lzma_literal.c b/src/liblzma/lzma/lzma_literal.c deleted file mode 100644 index 3611a1f7..00000000 --- a/src/liblzma/lzma/lzma_literal.c +++ /dev/null @@ -1,51 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_literal.c -/// \brief Literal Coder -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "lzma_literal.h" - - -extern lzma_ret -lzma_literal_init(lzma_literal_coder *coder, - uint32_t literal_context_bits, uint32_t literal_pos_bits) -{ - // Verify that arguments are sane. - if (literal_context_bits > LZMA_LITERAL_CONTEXT_BITS_MAX - || literal_pos_bits > LZMA_LITERAL_POS_BITS_MAX) - return LZMA_HEADER_ERROR; - - // Calculate the number of states the literal coder must store. - const uint32_t states = literal_states( - literal_pos_bits, literal_context_bits); - - // Store the new settings. - coder->literal_context_bits = literal_context_bits; - coder->literal_pos_bits = literal_pos_bits; - - // Calculate also the literal_pos_mask. It's not changed - // anywhere else than here. - coder->literal_pos_mask = (1 << literal_pos_bits) - 1; - - // Reset the literal coder. - for (uint32_t i = 0; i < states; ++i) - for (uint32_t j = 0; j < LIT_SIZE; ++j) - bit_reset(coder->coders[i][j]); - - return LZMA_OK; -} diff --git a/src/liblzma/lzma/lzma_literal.h b/src/liblzma/lzma/lzma_literal.h deleted file mode 100644 index 208abd99..00000000 --- a/src/liblzma/lzma/lzma_literal.h +++ /dev/null @@ -1,71 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file lzma_literal.h -/// \brief Literal Coder -/// -/// This is used as is by both LZMA encoder and decoder. -// -// Copyright (C) 1999-2006 Igor Pavlov -// Copyright (C) 2007 Lasse Collin -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef LZMA_LITERAL_H -#define LZMA_LITERAL_H - -#include "common.h" - -// We need typedef of `probability'. -#include "range_common.h" - - -/// Each literal coder is divided in three sections: -/// - 0x001-0x0FF: Without match byte -/// - 0x101-0x1FF: With match byte; match bit is 0 -/// - 0x201-0x2FF: With match byte; match bit is 1 -#define LIT_SIZE 0x300 - -/// Calculate how many states are needed. Each state has -/// LIT_SIZE `probability' variables. -#define literal_states(literal_context_bits, literal_pos_bits) \ - (1U << ((literal_context_bits) + (literal_pos_bits))) - -/// Locate the literal coder for the next literal byte. The choice depends on -/// - the lowest literal_pos_bits bits of the position of the current -/// byte; and -/// - the highest literal_context_bits bits of the previous byte. -#define literal_get_subcoder(literal_coder, pos, prev_byte) \ - (literal_coder).coders[(((pos) & (literal_coder).literal_pos_mask) \ - << (literal_coder).literal_context_bits) \ - + ((prev_byte) >> (8 - (literal_coder).literal_context_bits))] - - -typedef struct { - uint32_t literal_context_bits; - uint32_t literal_pos_bits; - - /// literal_pos_mask is always (1 << literal_pos_bits) - 1. - uint32_t literal_pos_mask; - - /// There are (1 << (literal_pos_bits + literal_context_bits)) - /// literal coders. - probability coders[1 << LZMA_LITERAL_BITS_MAX][LIT_SIZE]; - -} lzma_literal_coder; - - -extern lzma_ret lzma_literal_init( - lzma_literal_coder *coder, - uint32_t literal_context_bits, uint32_t literal_pos_bits); - -#endif |