aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lzma/lzma_common.h
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-08-28 22:53:15 +0300
committerLasse Collin <lasse.collin@tukaani.org>2008-08-28 22:53:15 +0300
commit3b34851de1eaf358cf9268922fa0eeed8278d680 (patch)
tree7bab212af647541df64227a8d350d17a2e789f6b /src/liblzma/lzma/lzma_common.h
parentFix test_filter_flags to match the new restriction of lc+lp. (diff)
downloadxz-3b34851de1eaf358cf9268922fa0eeed8278d680.tar.xz
Sort of garbage collection commit. :-| Many things are still
broken. API has changed a lot and it will still change a little more here and there. The command line tool doesn't have all the required changes to reflect the API changes, so it's easy to get "internal error" or trigger assertions.
Diffstat (limited to 'src/liblzma/lzma/lzma_common.h')
-rw-r--r--src/liblzma/lzma/lzma_common.h208
1 files changed, 140 insertions, 68 deletions
diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h
index f677fcce..6909969b 100644
--- a/src/liblzma/lzma/lzma_common.h
+++ b/src/liblzma/lzma/lzma_common.h
@@ -22,81 +22,31 @@
#define LZMA_LZMA_COMMON_H
#include "common.h"
-#include "lzma_literal.h"
#include "range_common.h"
-///////////////
-// Constants //
-///////////////
-
-#define REP_DISTANCES 4
-
-#define POS_SLOT_BITS 6
-#define DICT_LOG_SIZE_MAX 30
-#define DIST_TABLE_SIZE_MAX (DICT_LOG_SIZE_MAX * 2)
-#if (UINT32_C(1) << DICT_LOG_SIZE_MAX) != LZMA_DICTIONARY_SIZE_MAX
-# error DICT_LOG_SIZE_MAX is inconsistent with LZMA_DICTIONARY_SIZE_MAX
-#endif
-
-// 2 is for speed optimization
-#define LEN_TO_POS_STATES_BITS 2
-#define LEN_TO_POS_STATES (1 << LEN_TO_POS_STATES_BITS)
-
-#define MATCH_MIN_LEN 2
-
-#define ALIGN_BITS 4
-#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS)
-#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1)
-
-#define START_POS_MODEL_INDEX 4
-#define END_POS_MODEL_INDEX 14
-#define POS_MODELS (END_POS_MODEL_INDEX - START_POS_MODEL_INDEX)
-
-#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2)
-#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
+///////////////////
+// Miscellaneous //
+///////////////////
+/// Maximum number of position states. A position state is the lowest pos bits
+/// number of bits of the current uncompressed offset. In some places there
+/// are different sets of probabilities for different pos states.
#define POS_STATES_MAX (1 << LZMA_POS_BITS_MAX)
-// Length coder & Length price table encoder
-#define LEN_LOW_BITS 3
-#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
-#define LEN_MID_BITS 3
-#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
-#define LEN_HIGH_BITS 8
-#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
-#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
-#define LEN_SPEC_SYMBOLS (LOW_LOW_SYMBOLS + LEN_MID_LEN_SYMBOLS)
-#define MATCH_MAX_LEN (MATCH_MIN_LEN + LEN_SYMBOLS - 1)
-
-// Total number of probs in a Len Encoder
-#define LEN_CODER_TOTAL_PROBS (LEN_HIGH_CODER + LEN_HIGH_SYMBOLS)
-
-// Price table size of Len Encoder
-#define LEN_PRICES (LEN_SYMBOLS << LZMA_POS_BITS_MAX)
-
-// Special lengths used together with distance == UINT32_MAX
-#define LEN_SPECIAL_EOPM MATCH_MIN_LEN
-#define LEN_SPECIAL_FLUSH (LEN_SPECIAL_EOPM + 1)
-
-
-// Optimal - Number of entries in the optimum array.
-#define OPTS (1 << 12)
-
-
-// Miscellaneous
-#define INFINITY_PRICE 0x0FFFFFFF
-
-
-////////////
-// Macros //
-////////////
-
-#define get_len_to_pos_state(len) \
- ((len) < LEN_TO_POS_STATES + MATCH_MIN_LEN \
- ? (len) - MATCH_MIN_LEN \
- : LEN_TO_POS_STATES - 1)
+/// Validates literal_context_bits, literal_pos_bits, and pos_bits.
+static inline bool
+is_lclppb_valid(const lzma_options_lzma *options)
+{
+ return options->literal_context_bits <= LZMA_LITERAL_CONTEXT_BITS_MAX
+ && options->literal_pos_bits
+ <= LZMA_LITERAL_POS_BITS_MAX
+ && options->literal_context_bits
+ + options->literal_pos_bits
+ <= LZMA_LITERAL_BITS_MAX
+ && options->pos_bits <= LZMA_POS_BITS_MAX;
+}
///////////
@@ -161,4 +111,126 @@ typedef enum {
#define is_literal_state(state) \
((state) < LIT_STATES)
+
+/////////////
+// Literal //
+/////////////
+
+/// Each literal coder is divided in three sections:
+/// - 0x001-0x0FF: Without match byte
+/// - 0x101-0x1FF: With match byte; match bit is 0
+/// - 0x201-0x2FF: With match byte; match bit is 1
+///
+/// Match byte is used when the previous LZMA symbol was something else than
+/// a literal (that is, it was some kind of match).
+#define LITERAL_CODER_SIZE 0x300
+
+/// Maximum number of literal coders
+#define LITERAL_CODERS_MAX (1 << LZMA_LITERAL_BITS_MAX)
+
+/// Locate the literal coder for the next literal byte. The choice depends on
+/// - the lowest literal_pos_bits bits of the position of the current
+/// byte; and
+/// - the highest literal_context_bits bits of the previous byte.
+#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \
+ ((probs)[(((pos) & lp_mask) << lc) + ((prev_byte) >> (8 - lc))])
+
+
+static inline void
+literal_init(probability (*probs)[LITERAL_CODER_SIZE],
+ uint32_t literal_context_bits, uint32_t literal_pos_bits)
+{
+ assert(literal_context_bits + literal_pos_bits
+ <= LZMA_LITERAL_BITS_MAX);
+
+ const uint32_t coders
+ = 1U << (literal_context_bits + literal_pos_bits);
+
+ for (uint32_t i = 0; i < coders; ++i)
+ for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j)
+ bit_reset(probs[i][j]);
+
+ return;
+}
+
+
+//////////////////
+// Match length //
+//////////////////
+
+// Minimum length of a match is two bytes.
+#define MATCH_LEN_MIN 2
+
+// Match length is encoded with 4, 5, or 10 bits.
+//
+// Length Bits
+// 2-9 4 = Choice=0 + 3 bits
+// 10-17 5 = Choice=1 + Choice2=0 + 3 bits
+// 18-273 10 = Choice=1 + Choice2=1 + 8 bits
+#define LEN_LOW_BITS 3
+#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
+#define LEN_MID_BITS 3
+#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
+#define LEN_HIGH_BITS 8
+#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
+#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
+
+// Maximum length of a match is 273 which is a result of the encoding
+// described above.
+#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
+
+
+////////////////////
+// Match distance //
+////////////////////
+
+// Different set of probabilities is used for match distances that have very
+// short match length: Lengths of 2, 3, and 4 bytes have a separate set of
+// probabilities for each length. The matches with longer length use a shared
+// set of probabilities.
+#define LEN_TO_POS_STATES 4
+
+// Macro to get the index of the appropriate probability array.
+#define get_len_to_pos_state(len) \
+ ((len) < LEN_TO_POS_STATES + MATCH_LEN_MIN \
+ ? (len) - MATCH_LEN_MIN \
+ : LEN_TO_POS_STATES - 1)
+
+// The highest two bits of a match distance (pos slot) are encoded using six
+// bits. See fastpos.h for more explanation.
+#define POS_SLOT_BITS 6
+#define POS_SLOTS (1 << POS_SLOT_BITS)
+
+// Match distances up to 127 are fully encoded using probabilities. Since
+// the highest two bits (pos slot) are always encoded using six bits, the
+// distances 0-3 don't need any additional bits to encode, since the pos
+// slot itself is the same as the actual distance. START_POS_MODEL_INDEX
+// indicates the first pos slot where at least one additional bit is needed.
+#define START_POS_MODEL_INDEX 4
+
+// Match distances greater than 127 are encoded in three pieces:
+// - pos slot: the highest two bits
+// - direct bits: 2-26 bits below the highest two bits
+// - alignment bits: four lowest bits
+//
+// Direct bits don't use any probabilities.
+//
+// The pos slot value of 14 is for distances 128-191 (see the table in
+// fastpos.h to understand why).
+#define END_POS_MODEL_INDEX 14
+
+// Seven-bit distances use the full FIXME
+#define FULL_DISTANCES_BITS (END_POS_MODEL_INDEX / 2)
+#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
+
+// For match distances greater than 127, only the highest two bits and the
+// lowest four bits (alignment) is encoded using probabilities.
+#define ALIGN_BITS 4
+#define ALIGN_TABLE_SIZE (1 << ALIGN_BITS)
+#define ALIGN_MASK (ALIGN_TABLE_SIZE - 1)
+
+// LZMA remembers the four most recent match distances. Reusing these distances
+// tends to take less space than re-encoding the actual distance value.
+#define REP_DISTANCES 4
+
#endif