1 files changed, 86 insertions, 123 deletions
diff --git a/src/liblzma/rangecoder/range_decoder.h b/src/liblzma/rangecoder/range_decoder.h
index 62162448..ca2d392e 100644
--- a/src/liblzma/rangecoder/range_decoder.h
+++ b/src/liblzma/rangecoder/range_decoder.h
@@ -31,6 +31,7 @@ typedef struct {
 } lzma_range_decoder;
 
 
+/// Reads the first five bytes to initialize the range decoder.
 static inline bool
 rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in,
 		size_t *restrict in_pos, size_t in_size)
@@ -48,14 +49,22 @@ rc_read_init(lzma_range_decoder *rc, const uint8_t *restrict in,
 }
 
 
-/// Makes local copies of range decoder variables.
-#define rc_to_local(range_decoder) \
+/// Makes local copies of range decoder and *in_pos variables. Doing this
+/// improves speed significantly. The range decoder macros expect also
+/// variables `in' and `in_size' to be defined.
+#define rc_to_local(range_decoder, in_pos) \
 	lzma_range_decoder rc = range_decoder; \
+	size_t rc_in_pos = (in_pos); \
 	uint32_t rc_bound
 
+
 /// Stores the local copes back to the range decoder structure.
-#define rc_from_local(range_decoder) \
-	range_decoder = rc
+#define rc_from_local(range_decoder, in_pos) \
+do { \
+	range_decoder = rc; \
+	in_pos = rc_in_pos; \
+} while (0)
+
 
 /// Resets the range decoder structure.
 #define rc_reset(range_decoder) \
@@ -66,158 +75,112 @@ do { \
 } while (0)
 
 
-// All of the macros in this file expect the following variables being defined:
-//  - lzma_range_decoder range_decoder;
-//  - uint32_t rc_bound;   // Temporary variable
-//  - uint8_t *in;
-//  - size_t in_pos_local; // Local alias for *in_pos
-
+/// When decoding has been properly finished, rc.code is always zero unless
+/// the input stream is corrupt. So checking this can catch some corrupt
+/// files especially if they don't have any other integrity check.
+#define rc_is_finished(range_decoder) \
+	((range_decoder).code == 0)
 
-//////////////////
-// Buffer "I/O" //
-//////////////////
 
-// Read the next byte of compressed data from buffer_in, if needed.
-#define rc_normalize() \
+/// Read the next input byte if needed. If more input is needed but there is
+/// no more input available, "goto out" is used to jump out of the main
+/// decoder loop.
+#define rc_normalize(seq) \
 do { \
-	if (rc.range < TOP_VALUE) { \
-		rc.range <<= SHIFT_BITS; \
-		rc.code = (rc.code << SHIFT_BITS) | in[in_pos_local++]; \
+	if (rc.range < RC_TOP_VALUE) { \
+		if (unlikely(rc_in_pos == in_size)) { \
+			coder->sequence = seq; \
+			goto out; \
+		} \
+		rc.range <<= RC_SHIFT_BITS; \
+		rc.code = (rc.code << RC_SHIFT_BITS) | in[rc_in_pos++]; \
 	} \
 } while (0)
 
 
-//////////////////
-// Bit decoding //
-//////////////////
-
-// Range decoder's DecodeBit() is splitted into three macros:
-//    if_bit_0(prob) {
-//        update_bit_0(prob)
-//        ...
-//    } else {
-//        update_bit_1(prob)
-//        ...
-//    }
-
-#define if_bit_0(prob) \
-	rc_normalize(); \
-	rc_bound = (rc.range >> BIT_MODEL_TOTAL_BITS) * (prob); \
+/// Start decoding a bit. This must be used together with rc_update_0()
+/// and rc_update_1():
+///
+///     rc_if_0(prob, seq) {
+///         rc_update_0(prob);
+///         // Do something
+///     } else {
+///         rc_update_1(prob);
+///         // Do something else
+///     }
+///
+#define rc_if_0(prob, seq) \
+	rc_normalize(seq); \
+	rc_bound = (rc.range >> RC_BIT_MODEL_TOTAL_BITS) * (prob); \
 	if (rc.code < rc_bound)
 
 
-#define update_bit_0(prob) \
+/// Update the range decoder state and the used probability variable to
+/// match a decoded bit of 0.
+#define rc_update_0(prob) \
 do { \
 	rc.range = rc_bound; \
-	prob += (BIT_MODEL_TOTAL - (prob)) >> MOVE_BITS; \
+	prob += (RC_BIT_MODEL_TOTAL - (prob)) >> RC_MOVE_BITS; \
 } while (0)
 
 
-#define update_bit_1(prob) \
+/// Update the range decoder state and the used probability variable to
+/// match a decoded bit of 1.
+#define rc_update_1(prob) \
 do { \
 	rc.range -= rc_bound; \
 	rc.code -= rc_bound; \
-	prob -= (prob) >> MOVE_BITS; \
+	prob -= (prob) >> RC_MOVE_BITS; \
 } while (0)
 
 
-#define rc_decode_direct(dest, count) \
+/// Decodes one bit and runs action0 or action1 depending on the decoded bit.
+/// This macro is used as the last step in bittree reverse decoders since
+/// those don't use "symbol" for anything else than indexing the probability
+/// arrays.
+#define rc_bit_last(prob, action0, action1, seq) \
 do { \
-	rc_normalize(); \
-	rc.range >>= 1; \
-	rc.code -= rc.range; \
-	rc_bound = UINT32_C(0) - (rc.code >> 31); \
-	rc.code += rc.range & rc_bound; \
-	dest = (dest << 1) + (rc_bound + 1); \
-} while (--count > 0)
+	rc_if_0(prob, seq) { \
+		rc_update_0(prob); \
+		action0; \
+	} else { \
+		rc_update_1(prob); \
+		action1; \
+	} \
+} while (0)
 
 
-// Dummy versions don't update prob or dest.
-#define update_bit_0_dummy() \
-	rc.range = rc_bound
+/// Decodes one bit, updates "symbol", and runs action0 or action1 depending
+/// on the decoded bit.
+#define rc_bit(prob, action0, action1, seq) \
+	rc_bit_last(prob, \
+		symbol <<= 1; action0, \
+		symbol = (symbol << 1) + 1; action1, \
+		seq);
 
 
-#define update_bit_1_dummy() \
-do { \
-	rc.range -= rc_bound; \
-	rc.code -= rc_bound; \
-} while (0)
+/// Like rc_bit() but add "case seq:" as a prefix. This makes the unrolled
+/// loops more readable because the code isn't littered with "case"
+/// statements. On the other hand this also makes it less readable, since
+/// spotting the places where the decoder loop may be restarted is less
+/// obvious.
+#define rc_bit_case(prob, action0, action1, seq) \
+	case seq: rc_bit(prob, action0, action1, seq)
 
 
-#define rc_decode_direct_dummy(count) \
+/// Decode a bit without using a probability.
+#define rc_direct(dest, seq) \
 do { \
-	rc_normalize(); \
+	rc_normalize(seq); \
 	rc.range >>= 1; \
 	rc.code -= rc.range; \
-	rc.code += rc.range & (UINT32_C(0) - (rc.code >> 31)); \
-} while (--count > 0)
-
-
-///////////////////////
-// Bit tree decoding //
-///////////////////////
-
-#define bittree_decode(target, probs, bit_levels) \
-do { \
-	uint32_t model_index = 1; \
-	for (uint32_t bit_index = (bit_levels); bit_index != 0; --bit_index) { \
-		if_bit_0((probs)[model_index]) { \
-			update_bit_0((probs)[model_index]); \
-			model_index <<= 1; \
-		} else { \
-			update_bit_1((probs)[model_index]); \
-			model_index = (model_index << 1) | 1; \
-		} \
-	} \
-	target += model_index - (1 << bit_levels); \
-} while (0)
-
-
-#define bittree_reverse_decode(target, probs, bit_levels) \
-do { \
-	uint32_t model_index = 1; \
-	for (uint32_t bit_index = 0; bit_index < bit_levels; ++bit_index) { \
-		if_bit_0((probs)[model_index]) { \
-			update_bit_0((probs)[model_index]); \
-			model_index <<= 1; \
-		} else { \
-			update_bit_1((probs)[model_index]); \
-			model_index = (model_index << 1) | 1; \
-			target += 1 << bit_index; \
-		} \
-	} \
-} while (0)
-
-
-// Dummy versions don't update prob.
-#define bittree_decode_dummy(target, probs, bit_levels) \
-do { \
-	uint32_t model_index = 1; \
-	for (uint32_t bit_index = (bit_levels); bit_index != 0; --bit_index) { \
-		if_bit_0((probs)[model_index]) { \
-			update_bit_0_dummy(); \
-			model_index <<= 1; \
-		} else { \
-			update_bit_1_dummy(); \
-			model_index = (model_index << 1) | 1; \
-		} \
-	} \
-	target += model_index - (1 << bit_levels); \
+	rc_bound = UINT32_C(0) - (rc.code >> 31); \
+	rc.code += rc.range & rc_bound; \
+	dest = (dest << 1) + (rc_bound + 1); \
 } while (0)
 
 
-#define bittree_reverse_decode_dummy(probs, bit_levels) \
-do { \
-	uint32_t model_index = 1; \
-	for (uint32_t bit_index = 0; bit_index < bit_levels; ++bit_index) { \
-		if_bit_0((probs)[model_index]) { \
-			update_bit_0_dummy(); \
-			model_index <<= 1; \
-		} else { \
-			update_bit_1_dummy(); \
-			model_index = (model_index << 1) | 1; \
-		} \
-	} \
-} while (0)
+// NOTE: No macros are provided for bittree decoding. It seems to be simpler
+// to just write them open in the code.
 
 #endif