aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/subblock/subblock_decoder.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2007-12-09 00:42:33 +0200
committerLasse Collin <lasse.collin@tukaani.org>2007-12-09 00:42:33 +0200
commit5d018dc03549c1ee4958364712fb0c94e1bf2741 (patch)
tree1b211911fb33fddb3f04b77f99e81df23623ffc4 /src/liblzma/subblock/subblock_decoder.c
downloadxz-5d018dc03549c1ee4958364712fb0c94e1bf2741.tar.xz
Imported to git.
Diffstat (limited to '')
-rw-r--r--src/liblzma/subblock/subblock_decoder.c681
1 files changed, 681 insertions, 0 deletions
diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c
new file mode 100644
index 00000000..ef3c289f
--- /dev/null
+++ b/src/liblzma/subblock/subblock_decoder.c
@@ -0,0 +1,681 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file subblock_decoder.c
+/// \brief Decoder of the Subblock filter
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "subblock_decoder.h"
+#include "subblock_decoder_helper.h"
+#include "raw_decoder.h"
+
+
+/// Maximum number of consecutive Subblocks with Subblock Type Padding
+#define PADDING_MAX 31
+
+
+struct lzma_coder_s {
+ lzma_next_coder next;
+
+ enum {
+ SEQ_FLAGS,
+ SEQ_SIZE_1,
+ SEQ_SIZE_2,
+ SEQ_SIZE_3,
+ SEQ_DATA,
+ SEQ_REPEAT_COUNT_1,
+ SEQ_REPEAT_COUNT_2,
+ SEQ_REPEAT_COUNT_3,
+ SEQ_REPEAT_SIZE,
+ SEQ_REPEAT_READ_DATA,
+ SEQ_REPEAT_FAST,
+ SEQ_REPEAT_NORMAL,
+ SEQ_FILTER_FLAGS,
+ SEQ_FILTER_END,
+ } sequence;
+
+ /// Number of bytes left in the current Subblock Data field.
+ size_t size;
+
+ /// Uncompressed Size, or LZMA_VLI_VALUE_UNKNOWN if unknown.
+ lzma_vli uncompressed_size;
+
+ /// Number of consecutive Subblocks with Subblock Type Padding
+ uint32_t padding;
+
+ /// True when .next.code() has returned LZMA_STREAM_END.
+ bool next_finished;
+
+ /// True when the Subblock decoder has detected End of Payload Marker.
+ /// This may become true before next_finished becomes true.
+ bool this_finished;
+
+ /// True if Subfilters are allowed.
+ bool allow_subfilters;
+
+ /// Indicates if at least one byte of decoded output has been
+ /// produced after enabling Subfilter.
+ bool got_output_with_subfilter;
+
+ /// Possible subfilter
+ lzma_next_coder subfilter;
+
+ /// Filter Flags decoder is needed to parse the ID and Properties
+ /// of the subfilter.
+ lzma_next_coder filter_flags_decoder;
+
+ /// The filter_flags_decoder stores its results here.
+ lzma_options_filter filter_flags;
+
+ /// Options for the Subblock decoder helper. This is used to tell
+ /// the helper when it should return LZMA_STREAM_END to the subfilter.
+ lzma_options_subblock_helper helper;
+
+ struct {
+ /// How many times buffer should be repeated
+ size_t count;
+
+ /// Size of the buffer
+ size_t size;
+
+ /// Position in the buffer
+ size_t pos;
+
+ /// Buffer to hold the data to be repeated
+ uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
+ } repeat;
+
+ /// Temporary buffer needed when the Subblock filter is not the last
+ /// filter in the chain. The output of the next filter is first
+ /// decoded into buffer[], which is then used as input for the actual
+ /// Subblock decoder.
+ struct {
+ size_t pos;
+ size_t size;
+ uint8_t buffer[LZMA_BUFFER_SIZE];
+ } temp;
+};
+
+
+/// Values of valid Subblock Flags
+enum {
+ FLAG_PADDING,
+ FLAG_EOPM,
+ FLAG_DATA,
+ FLAG_REPEAT,
+ FLAG_SET_SUBFILTER,
+ FLAG_END_SUBFILTER,
+};
+
+
+/// Substracts size from coder->uncompressed_size uncompressed size is known
+/// and size isn't bigger than coder->uncompressed_size.
+static inline bool
+update_uncompressed_size(lzma_coder *coder, size_t size)
+{
+ if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) {
+ if ((lzma_vli)(size) > coder->uncompressed_size)
+ return true;
+
+ coder->uncompressed_size -= size;
+ }
+
+ return false;
+}
+
+
+/// Calls the subfilter and updates coder->uncompressed_size.
+static lzma_ret
+subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ assert(coder->subfilter.code != NULL);
+
+ const size_t out_start = *out_pos;
+
+ // Call the subfilter.
+ const lzma_ret ret = coder->subfilter.code(
+ coder->subfilter.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size, action);
+
+ // Update uncompressed_size.
+ if (update_uncompressed_size(coder, *out_pos - out_start))
+ return LZMA_DATA_ERROR;
+
+ return ret;
+}
+
+
+static lzma_ret
+decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ while (*out_pos < out_size && (*in_pos < in_size
+ || coder->sequence == SEQ_DATA))
+ switch (coder->sequence) {
+ case SEQ_FLAGS: {
+ if ((in[*in_pos] >> 4) != FLAG_PADDING)
+ coder->padding = 0;
+
+ // Do the correct action depending on the Subblock Type.
+ switch (in[*in_pos] >> 4) {
+ case FLAG_PADDING:
+ // Only check that reserved bits are zero.
+// if (++coder->padding > PADDING_MAX
+// || in[*in_pos] & 0x0F)
+// return LZMA_DATA_ERROR;
+ ++*in_pos;
+ break;
+
+ case FLAG_EOPM:
+ // Check that reserved bits are zero.
+ if (in[*in_pos] & 0x0F)
+ return LZMA_DATA_ERROR;
+
+ // There must be no Subfilter enabled.
+ if (coder->subfilter.code != NULL)
+ return LZMA_DATA_ERROR;
+
+ // End of Payload Marker must not be used if
+ // uncompressed size is known.
+ if (coder->uncompressed_size != LZMA_VLI_VALUE_UNKNOWN)
+ return LZMA_DATA_ERROR;
+
+ ++*in_pos;
+ return LZMA_STREAM_END;
+
+ case FLAG_DATA:
+ // First four bits of the Subblock Data size.
+ coder->size = in[*in_pos] & 0x0F;
+ ++*in_pos;
+ coder->got_output_with_subfilter = true;
+ coder->sequence = SEQ_SIZE_1;
+ break;
+
+ case FLAG_REPEAT:
+ // First four bits of the Repeat Count. We use
+ // coder->size as a temporary place for it.
+ coder->size = in[*in_pos] & 0x0F;
+ ++*in_pos;
+ coder->got_output_with_subfilter = true;
+ coder->sequence = SEQ_REPEAT_COUNT_1;
+ break;
+
+ case FLAG_SET_SUBFILTER: {
+ if ((in[*in_pos] & 0x0F)
+ || coder->subfilter.code != NULL
+ || !coder->allow_subfilters)
+ return LZMA_DATA_ERROR;
+
+ assert(coder->filter_flags.options == NULL);
+ const lzma_ret ret = lzma_filter_flags_decoder_init(
+ &coder->filter_flags_decoder,
+ allocator, &coder->filter_flags);
+ if (ret != LZMA_OK)
+ return ret;
+
+ coder->got_output_with_subfilter = false;
+
+ ++*in_pos;
+ coder->sequence = SEQ_FILTER_FLAGS;
+ break;
+ }
+
+ case FLAG_END_SUBFILTER:
+ if (coder->subfilter.code == NULL
+ || !coder->got_output_with_subfilter)
+ return LZMA_DATA_ERROR;
+
+ // Tell the helper filter to indicate End of Input
+ // to our subfilter.
+ coder->helper.end_was_reached = true;
+
+ size_t dummy = 0;
+ const lzma_ret ret = subfilter_decode(coder, allocator,
+ NULL, &dummy, 0, out, out_pos,out_size,
+ action);
+
+ // If we didn't reach the end of the subfilter's output
+ // yet, return to the application. On the next call we
+ // will get to this same switch-case again, because we
+ // haven't updated *in_pos yet.
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Free Subfilter's memory. This is a bit debatable,
+ // since we could avoid some malloc()/free() calls
+ // if the same Subfilter gets used soon again. But
+ // if Subfilter isn't used again, we could leave
+ // a memory-hogging filter dangling until someone
+ // frees Subblock filter itself.
+ lzma_next_coder_end(&coder->subfilter, allocator);
+
+ // Free memory used for subfilter options. This is
+ // safe, because we don't support any Subfilter that
+ // would allow pointers in the options structure.
+ lzma_free(coder->filter_flags.options, allocator);
+ coder->filter_flags.options = NULL;
+
+ ++*in_pos;
+
+ if (coder->uncompressed_size == 0)
+ return LZMA_STREAM_END;
+
+ break;
+
+ default:
+ return LZMA_DATA_ERROR;
+ }
+
+ break;
+ }
+
+ case SEQ_SIZE_1:
+ case SEQ_REPEAT_COUNT_1:
+ // We use the same code to parse
+ // - the Size (28 bits) in Subblocks of type Data; and
+ // - the Repeat count (28 bits) in Subblocks of type
+ // Repeating Data.
+ coder->size |= (size_t)(in[*in_pos]) << 4;
+ ++*in_pos;
+ ++coder->sequence;
+ break;
+
+ case SEQ_SIZE_2:
+ case SEQ_REPEAT_COUNT_2:
+ coder->size |= (size_t)(in[*in_pos]) << 12;
+ ++*in_pos;
+ ++coder->sequence;
+ break;
+
+ case SEQ_SIZE_3:
+ case SEQ_REPEAT_COUNT_3:
+ coder->size |= (size_t)(in[*in_pos]) << 20;
+
+ // The real value is the stored value plus one.
+ ++coder->size;
+
+ ++*in_pos;
+ ++coder->sequence;
+ break;
+
+ case SEQ_REPEAT_SIZE:
+ // Move the Repeat Count to the correct variable and parse
+ // the Size of the Data to be repeated.
+ coder->repeat.count = coder->size;
+ coder->repeat.size = (size_t)(in[*in_pos]) + 1;
+ coder->repeat.pos = 0;
+ ++*in_pos;
+ coder->sequence = SEQ_REPEAT_READ_DATA;
+ break;
+
+ case SEQ_REPEAT_READ_DATA: {
+ // Fill coder->repeat.buffer[].
+ const size_t in_avail = in_size - *in_pos;
+ const size_t out_avail
+ = coder->repeat.size - coder->repeat.pos;
+ const size_t copy_size = MIN(in_avail, out_avail);
+
+ memcpy(coder->repeat.buffer + coder->repeat.pos,
+ in + *in_pos, copy_size);
+ *in_pos += copy_size;
+ coder->repeat.pos += copy_size;
+
+ if (coder->repeat.pos == coder->repeat.size) {
+ coder->repeat.pos = 0;
+
+ if (coder->repeat.size == 1
+ && coder->subfilter.code == NULL)
+ coder->sequence = SEQ_REPEAT_FAST;
+ else
+ coder->sequence = SEQ_REPEAT_NORMAL;
+ }
+
+ break;
+ }
+
+ case SEQ_REPEAT_FAST: {
+ // Optimization for cases when there is only one byte to
+ // repeat and no Subfilter.
+ const size_t out_avail = out_size - *out_pos;
+ const size_t copy_size = MIN(coder->repeat.count, out_avail);
+
+ memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
+
+ *out_pos += copy_size;
+ coder->repeat.count -= copy_size;
+
+ if (update_uncompressed_size(coder, copy_size))
+ return LZMA_DATA_ERROR;
+
+ if (coder->repeat.count == 0) {
+ if (coder->uncompressed_size == 0)
+ return LZMA_STREAM_END;
+ } else {
+ return LZMA_OK;
+ }
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+ }
+
+ case SEQ_REPEAT_NORMAL:
+ do {
+ // Cycle the repeat buffer if needed.
+ if (coder->repeat.pos == coder->repeat.size) {
+ if (--coder->repeat.count == 0) {
+ coder->sequence = SEQ_FLAGS;
+ break;
+ }
+
+ coder->repeat.pos = 0;
+ }
+
+ if (coder->subfilter.code == NULL) {
+ const size_t copy_size = bufcpy(
+ coder->repeat.buffer,
+ &coder->repeat.pos,
+ coder->repeat.size,
+ out, out_pos, out_size);
+
+ if (update_uncompressed_size(coder, copy_size))
+ return LZMA_DATA_ERROR;
+
+ } else {
+ const lzma_ret ret = subfilter_decode(
+ coder, allocator,
+ coder->repeat.buffer,
+ &coder->repeat.pos,
+ coder->repeat.size,
+ out, out_pos, out_size,
+ action);
+
+ if (ret == LZMA_STREAM_END) {
+ // End of Subfilter can occur only at
+ // a Subblock boundary.
+ if (coder->repeat.pos
+ != coder->repeat.size
+ || --coder->repeat
+ .count != 0)
+ return LZMA_DATA_ERROR;
+
+ // We need a Subblock with Unset
+ // Subfilter before more data.
+ coder->sequence = SEQ_FILTER_END;
+ break;
+
+ } else if (ret != LZMA_OK) {
+ return ret;
+ }
+ }
+ } while (*out_pos < out_size);
+
+ break;
+
+ case SEQ_DATA: {
+ // Limit the amount of input to match the available
+ // Subblock Data size.
+ size_t in_limit;
+ if (in_size - *in_pos > coder->size)
+ in_limit = *in_pos + coder->size;
+ else
+ in_limit = in_size;
+
+ if (coder->subfilter.code == NULL) {
+ const size_t copy_size = bufcpy(
+ in, in_pos, in_limit,
+ out, out_pos, out_size);
+
+ coder->size -= copy_size;
+
+ if (update_uncompressed_size(coder, copy_size))
+ return LZMA_DATA_ERROR;
+
+ } else {
+ const size_t in_start = *in_pos;
+ const lzma_ret ret = subfilter_decode(
+ coder, allocator,
+ in, in_pos, in_limit,
+ out, out_pos, out_size,
+ action);
+
+ // Update the number of unprocessed bytes left in
+ // this Subblock. This assert() is true because
+ // in_limit prevents *in_pos getting too big.
+ assert(*in_pos - in_start <= coder->size);
+ coder->size -= *in_pos - in_start;
+
+ if (ret == LZMA_STREAM_END) {
+ // End of Subfilter can occur only at
+ // a Subblock boundary.
+ if (coder->size != 0)
+ return LZMA_DATA_ERROR;
+
+ // We need a Subblock with Unset
+ // Subfilter before more data.
+ coder->sequence = SEQ_FILTER_END;
+ break;
+ }
+
+ if (ret != LZMA_OK)
+ return ret;
+ }
+
+ // If we couldn't process the whole Subblock Data yet, return.
+ if (coder->size > 0)
+ return LZMA_OK;
+
+ // Check if we have decoded all the data.
+ if (coder->uncompressed_size == 0
+ && coder->subfilter.code == NULL)
+ return LZMA_STREAM_END;
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+ }
+
+ case SEQ_FILTER_FLAGS: {
+ lzma_ret ret = coder->filter_flags_decoder.code(
+ coder->filter_flags_decoder.coder, allocator,
+ in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
+ if (ret != LZMA_STREAM_END)
+ return ret == LZMA_HEADER_ERROR
+ ? LZMA_DATA_ERROR : ret;
+
+ // Don't free the filter_flags_decoder. It doesn't take much
+ // memory and we may need it again.
+
+ // Initialize the Subfilter. Subblock and Copy filters are
+ // not allowed.
+ if (coder->filter_flags.id == LZMA_FILTER_COPY
+ || coder->filter_flags.id
+ == LZMA_FILTER_SUBBLOCK)
+ return LZMA_DATA_ERROR;
+
+ coder->helper.end_was_reached = false;
+
+ lzma_options_filter filters[3] = {
+ {
+ .id = coder->filter_flags.id,
+ .options = coder->filter_flags.options,
+ }, {
+ .id = LZMA_FILTER_SUBBLOCK_HELPER,
+ .options = &coder->helper,
+ }, {
+ .id = LZMA_VLI_VALUE_UNKNOWN,
+ .options = NULL,
+ }
+ };
+
+ // Optimization: We know that LZMA uses End of Payload Marker
+ // (not End of Input), so we can omit the helper filter.
+ if (filters[0].id == LZMA_FILTER_LZMA)
+ filters[1].id = LZMA_VLI_VALUE_UNKNOWN;
+
+ ret = lzma_raw_decoder_init(&coder->subfilter, allocator,
+ filters, LZMA_VLI_VALUE_UNKNOWN, false);
+ if (ret != LZMA_OK)
+ return ret;
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+ }
+
+ case SEQ_FILTER_END:
+ // We are in the beginning of a Subblock. The next Subblock
+ // whose type is not Padding, must indicate end of Subfilter.
+ if (in[*in_pos] == (FLAG_PADDING << 4)) {
+ ++*in_pos;
+ break;
+ }
+
+ if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
+ return LZMA_DATA_ERROR;
+
+ coder->sequence = SEQ_FLAGS;
+ break;
+
+ default:
+ return LZMA_PROG_ERROR;
+ }
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ if (coder->next.code == NULL)
+ return decode_buffer(coder, allocator, in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ while (*out_pos < out_size) {
+ if (!coder->next_finished
+ && coder->temp.pos == coder->temp.size) {
+ coder->temp.pos = 0;
+ coder->temp.size = 0;
+
+ const lzma_ret ret = coder->next.code(
+ coder->next.coder,
+ allocator, in, in_pos, in_size,
+ coder->temp.buffer, &coder->temp.size,
+ LZMA_BUFFER_SIZE, action);
+
+ if (ret == LZMA_STREAM_END)
+ coder->next_finished = true;
+ else if (coder->temp.size == 0 || ret != LZMA_OK)
+ return ret;
+ }
+
+ if (coder->this_finished) {
+ if (coder->temp.pos != coder->temp.size)
+ return LZMA_DATA_ERROR;
+
+ if (coder->next_finished)
+ return LZMA_STREAM_END;
+
+ return LZMA_OK;
+ }
+
+ const lzma_ret ret = decode_buffer(coder, allocator,
+ coder->temp.buffer, &coder->temp.pos,
+ coder->temp.size,
+ out, out_pos, out_size, action);
+
+ if (ret == LZMA_STREAM_END)
+ // The next coder in the chain hasn't finished
+ // yet. If the input data is valid, there
+ // must be no more output coming, but the
+ // next coder may still need a litle more
+ // input to detect End of Payload Marker.
+ coder->this_finished = true;
+ else if (ret != LZMA_OK)
+ return ret;
+ else if (coder->next_finished && *out_pos < out_size)
+ return LZMA_DATA_ERROR;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+ lzma_next_coder_end(&coder->next, allocator);
+ lzma_next_coder_end(&coder->subfilter, allocator);
+ lzma_next_coder_end(&coder->filter_flags_decoder, allocator);
+ lzma_free(coder->filter_flags.options, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+extern lzma_ret
+lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ if (next->coder == NULL) {
+ next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+ if (next->coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder->next = LZMA_NEXT_CODER_INIT;
+ next->coder->subfilter = LZMA_NEXT_CODER_INIT;
+ next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
+
+ } else {
+ lzma_next_coder_end(&next->coder->subfilter, allocator);
+ lzma_free(next->coder->filter_flags.options, allocator);
+ }
+
+ next->coder->filter_flags.options = NULL;
+
+ next->coder->sequence = SEQ_FLAGS;
+ next->coder->uncompressed_size = filters[0].uncompressed_size;
+ next->coder->padding = 0;
+ next->coder->next_finished = false;
+ next->coder->this_finished = false;
+ next->coder->temp.pos = 0;
+ next->coder->temp.size = 0;
+
+ if (filters[0].options != NULL)
+ next->coder->allow_subfilters = ((lzma_options_subblock *)(
+ filters[0].options))->allow_subfilters;
+ else
+ next->coder->allow_subfilters = false;
+
+ {
+ const lzma_ret ret = lzma_next_filter_init(&next->coder->next,
+ allocator, filters + 1);
+ if (ret != LZMA_OK) {
+ subblock_decoder_end(next->coder, allocator);
+ return ret;
+ }
+ }
+
+ next->code = &subblock_decode;
+ next->end = &subblock_decoder_end;
+
+ return LZMA_OK;
+}