aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-01-17 18:56:53 +0200
committerLasse Collin <lasse.collin@tukaani.org>2008-01-17 18:56:53 +0200
commit33be3c0e24d8f43376ccf71cc77d53671e792f07 (patch)
treebfa4e4ce2a9046c9b53de19605ab34a3556d1f07
parentFix wrong too small size of argument unfiltered_max (diff)
downloadxz-33be3c0e24d8f43376ccf71cc77d53671e792f07.tar.xz
Subblock decoder: Don't exit the main loop in decode_buffer()
too early if we hit End of Input while decoding a Subblock of type Repeating Data. To keep the loop termination condition elegant, the order of enumerations in coder->sequence were changed. To keep the case-labels in roughly the same order as the enumerations in coder->sequence, large chunks of code was moved around. This made the diff big and ugly compared to the amount of the actual changes made.
-rw-r--r--src/liblzma/subblock/subblock_decoder.c272
1 files changed, 139 insertions, 133 deletions
diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c
index 4eb9e55f..6f38caff 100644
--- a/src/liblzma/subblock/subblock_decoder.c
+++ b/src/liblzma/subblock/subblock_decoder.c
@@ -30,20 +30,24 @@ struct lzma_coder_s {
lzma_next_coder next;
enum {
+ // These require that there is at least one input
+ // byte available.
SEQ_FLAGS,
- SEQ_SIZE_1,
- SEQ_SIZE_2,
- SEQ_SIZE_3,
- SEQ_DATA,
+ SEQ_FILTER_FLAGS,
+ SEQ_FILTER_END,
SEQ_REPEAT_COUNT_1,
SEQ_REPEAT_COUNT_2,
SEQ_REPEAT_COUNT_3,
SEQ_REPEAT_SIZE,
SEQ_REPEAT_READ_DATA,
+ SEQ_SIZE_1,
+ SEQ_SIZE_2,
+ SEQ_SIZE_3, // This must be right before SEQ_DATA.
+
+ // These don't require any input to be available.
+ SEQ_DATA,
SEQ_REPEAT_FAST,
SEQ_REPEAT_NORMAL,
- SEQ_FILTER_FLAGS,
- SEQ_FILTER_END,
} sequence;
/// Number of bytes left in the current Subblock Data field.
@@ -167,7 +171,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
size_t *restrict out_pos, size_t out_size, lzma_action action)
{
while (*out_pos < out_size && (*in_pos < in_size
- || coder->sequence == SEQ_DATA))
+ || coder->sequence >= SEQ_DATA))
switch (coder->sequence) {
case SEQ_FLAGS: {
if ((in[*in_pos] >> 4) != FLAG_PADDING)
@@ -284,8 +288,68 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break;
}
- case SEQ_SIZE_1:
+ case SEQ_FILTER_FLAGS: {
+ const lzma_ret ret = coder->filter_flags_decoder.code(
+ coder->filter_flags_decoder.coder, allocator,
+ in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
+ if (ret != LZMA_STREAM_END)
+ return ret == LZMA_HEADER_ERROR
+ ? LZMA_DATA_ERROR : ret;
+
+ // Don't free the filter_flags_decoder. It doesn't take much
+ // memory and we may need it again.
+
+ // Initialize the Subfilter. Subblock and Copy filters are
+ // not allowed.
+ if (coder->filter_flags.id == LZMA_FILTER_COPY
+ || coder->filter_flags.id
+ == LZMA_FILTER_SUBBLOCK)
+ return LZMA_DATA_ERROR;
+
+ coder->helper.end_was_reached = false;
+
+ lzma_options_filter filters[3] = {
+ {
+ .id = coder->filter_flags.id,
+ .options = coder->filter_flags.options,
+ }, {
+ .id = LZMA_FILTER_SUBBLOCK_HELPER,
+ .options = &coder->helper,
+ }, {
+ .id = LZMA_VLI_VALUE_UNKNOWN,
+ .options = NULL,
+ }
+ };
+
+ // Optimization: We know that LZMA uses End of Payload Marker
+ // (not End of Input), so we can omit the helper filter.
+ if (filters[0].id == LZMA_FILTER_LZMA)
+ filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
+
+ return_if_error(lzma_raw_decoder_init(
+ &coder->subfilter, allocator,
+ filters, LZMA_VLI_VALUE_UNKNOWN, false));
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+ }
+
+ case SEQ_FILTER_END:
+ // We are in the beginning of a Subblock. The next Subblock
+ // whose type is not Padding, must indicate end of Subfilter.
+ if (in[*in_pos] == (FLAG_PADDING << 4)) {
+ ++*in_pos;
+ break;
+ }
+
+ if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
+ return LZMA_DATA_ERROR;
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+
case SEQ_REPEAT_COUNT_1:
+ case SEQ_SIZE_1:
// We use the same code to parse
// - the Size (28 bits) in Subblocks of type Data; and
// - the Repeat count (28 bits) in Subblocks of type
@@ -295,21 +359,23 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
++coder->sequence;
break;
- case SEQ_SIZE_2:
case SEQ_REPEAT_COUNT_2:
+ case SEQ_SIZE_2:
coder->size |= (size_t)(in[*in_pos]) << 12;
++*in_pos;
++coder->sequence;
break;
- case SEQ_SIZE_3:
case SEQ_REPEAT_COUNT_3:
+ case SEQ_SIZE_3:
coder->size |= (size_t)(in[*in_pos]) << 20;
+ ++*in_pos;
// The real value is the stored value plus one.
++coder->size;
- ++*in_pos;
+ // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
+ // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
++coder->sequence;
break;
@@ -348,6 +414,68 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break;
}
+ case SEQ_DATA: {
+ // Limit the amount of input to match the available
+ // Subblock Data size.
+ size_t in_limit;
+ if (in_size - *in_pos > coder->size)
+ in_limit = *in_pos + coder->size;
+ else
+ in_limit = in_size;
+
+ if (coder->subfilter.code == NULL) {
+ const size_t copy_size = bufcpy(
+ in, in_pos, in_limit,
+ out, out_pos, out_size);
+
+ coder->size -= copy_size;
+
+ if (update_uncompressed_size(coder, copy_size))
+ return LZMA_DATA_ERROR;
+
+ } else {
+ const size_t in_start = *in_pos;
+ const lzma_ret ret = subfilter_decode(
+ coder, allocator,
+ in, in_pos, in_limit,
+ out, out_pos, out_size,
+ action);
+
+ // Update the number of unprocessed bytes left in
+ // this Subblock. This assert() is true because
+ // in_limit prevents *in_pos getting too big.
+ assert(*in_pos - in_start <= coder->size);
+ coder->size -= *in_pos - in_start;
+
+ if (ret == LZMA_STREAM_END) {
+ // End of Subfilter can occur only at
+ // a Subblock boundary.
+ if (coder->size != 0)
+ return LZMA_DATA_ERROR;
+
+ // We need a Subblock with Unset
+ // Subfilter before more data.
+ coder->sequence = SEQ_FILTER_END;
+ break;
+ }
+
+ if (ret != LZMA_OK)
+ return ret;
+ }
+
+ // If we couldn't process the whole Subblock Data yet, return.
+ if (coder->size > 0)
+ return LZMA_OK;
+
+ // Check if we have decoded all the data.
+ if (coder->uncompressed_size == 0
+ && coder->subfilter.code == NULL)
+ return LZMA_STREAM_END;
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+ }
+
case SEQ_REPEAT_FAST: {
// Optimization for cases when there is only one byte to
// repeat and no Subfilter.
@@ -432,128 +560,6 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
break;
- case SEQ_DATA: {
- // Limit the amount of input to match the available
- // Subblock Data size.
- size_t in_limit;
- if (in_size - *in_pos > coder->size)
- in_limit = *in_pos + coder->size;
- else
- in_limit = in_size;
-
- if (coder->subfilter.code == NULL) {
- const size_t copy_size = bufcpy(
- in, in_pos, in_limit,
- out, out_pos, out_size);
-
- coder->size -= copy_size;
-
- if (update_uncompressed_size(coder, copy_size))
- return LZMA_DATA_ERROR;
-
- } else {
- const size_t in_start = *in_pos;
- const lzma_ret ret = subfilter_decode(
- coder, allocator,
- in, in_pos, in_limit,
- out, out_pos, out_size,
- action);
-
- // Update the number of unprocessed bytes left in
- // this Subblock. This assert() is true because
- // in_limit prevents *in_pos getting too big.
- assert(*in_pos - in_start <= coder->size);
- coder->size -= *in_pos - in_start;
-
- if (ret == LZMA_STREAM_END) {
- // End of Subfilter can occur only at
- // a Subblock boundary.
- if (coder->size != 0)
- return LZMA_DATA_ERROR;
-
- // We need a Subblock with Unset
- // Subfilter before more data.
- coder->sequence = SEQ_FILTER_END;
- break;
- }
-
- if (ret != LZMA_OK)
- return ret;
- }
-
- // If we couldn't process the whole Subblock Data yet, return.
- if (coder->size > 0)
- return LZMA_OK;
-
- // Check if we have decoded all the data.
- if (coder->uncompressed_size == 0
- && coder->subfilter.code == NULL)
- return LZMA_STREAM_END;
-
- coder->sequence = SEQ_FLAGS;
- break;
- }
-
- case SEQ_FILTER_FLAGS: {
- const lzma_ret ret = coder->filter_flags_decoder.code(
- coder->filter_flags_decoder.coder, allocator,
- in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
- if (ret != LZMA_STREAM_END)
- return ret == LZMA_HEADER_ERROR
- ? LZMA_DATA_ERROR : ret;
-
- // Don't free the filter_flags_decoder. It doesn't take much
- // memory and we may need it again.
-
- // Initialize the Subfilter. Subblock and Copy filters are
- // not allowed.
- if (coder->filter_flags.id == LZMA_FILTER_COPY
- || coder->filter_flags.id
- == LZMA_FILTER_SUBBLOCK)
- return LZMA_DATA_ERROR;
-
- coder->helper.end_was_reached = false;
-
- lzma_options_filter filters[3] = {
- {
- .id = coder->filter_flags.id,
- .options = coder->filter_flags.options,
- }, {
- .id = LZMA_FILTER_SUBBLOCK_HELPER,
- .options = &coder->helper,
- }, {
- .id = LZMA_VLI_VALUE_UNKNOWN,
- .options = NULL,
- }
- };
-
- // Optimization: We know that LZMA uses End of Payload Marker
- // (not End of Input), so we can omit the helper filter.
- if (filters[0].id == LZMA_FILTER_LZMA)
- filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
-
- return_if_error(lzma_raw_decoder_init(
- &coder->subfilter, allocator,
- filters, LZMA_VLI_VALUE_UNKNOWN, false));
-
- coder->sequence = SEQ_FLAGS;
- break;
- }
-
- case SEQ_FILTER_END:
- // We are in the beginning of a Subblock. The next Subblock
- // whose type is not Padding, must indicate end of Subfilter.
- if (in[*in_pos] == (FLAG_PADDING << 4)) {
- ++*in_pos;
- break;
- }
-
- if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
- return LZMA_DATA_ERROR;
-
- coder->sequence = SEQ_FLAGS;
- break;
-
default:
return LZMA_PROG_ERROR;
}