aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common/metadata_decoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/liblzma/common/metadata_decoder.c')
-rw-r--r--src/liblzma/common/metadata_decoder.c555
1 files changed, 555 insertions, 0 deletions
diff --git a/src/liblzma/common/metadata_decoder.c b/src/liblzma/common/metadata_decoder.c
new file mode 100644
index 00000000..f2ac6c1d
--- /dev/null
+++ b/src/liblzma/common/metadata_decoder.c
@@ -0,0 +1,555 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file metadata_decoder.c
+/// \brief Decodes metadata stored in Metadata Blocks
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "metadata_decoder.h"
+#include "block_decoder.h"
+
+
+/// Maximum size of a single Extra Record. Again, this is mostly to make
+/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
+/// be smaller.
+#define EXTRA_SIZE_MAX (SIZE_MAX / 4)
+
+
+struct lzma_coder_s {
+ enum {
+ SEQ_FLAGS,
+ SEQ_HEADER_METADATA_SIZE,
+ SEQ_TOTAL_SIZE,
+ SEQ_UNCOMPRESSED_SIZE,
+ SEQ_INDEX_COUNT,
+ SEQ_INDEX_ALLOC,
+ SEQ_INDEX_TOTAL_SIZE,
+ SEQ_INDEX_UNCOMPRESSED_SIZE,
+ SEQ_EXTRA_PREPARE,
+ SEQ_EXTRA_ALLOC,
+ SEQ_EXTRA_ID,
+ SEQ_EXTRA_SIZE,
+ SEQ_EXTRA_DATA_ALLOC,
+ SEQ_EXTRA_DATA_COPY,
+ SEQ_EXTRA_DUMMY_ALLOC,
+ SEQ_EXTRA_DUMMY_ID,
+ SEQ_EXTRA_DUMMY_SIZE,
+ SEQ_EXTRA_DUMMY_COPY,
+ } sequence;
+
+ /// Number of "things" left to be parsed. If we hit end of input
+ /// when this isn't zero, we have corrupt Metadata Block.
+ size_t todo_count;
+
+ /// Position in variable-length integers
+ size_t pos;
+
+ /// Temporary variable needed to decode variables whose type
+ /// is size_t instead of lzma_vli.
+ lzma_vli tmp;
+
+ /// Pointer to target structure to hold the parsed results.
+ lzma_metadata *metadata;
+
+ /// The Index Record we currently are parsing
+ lzma_index *index_current;
+
+ /// Number of Records in Index
+ size_t index_count;
+
+ /// Sum of Total Size fields in the Index
+ lzma_vli index_total_size;
+
+ /// Sum of Uncompressed Size fields in the Index
+ lzma_vli index_uncompressed_size;
+
+ /// True if Extra is present.
+ bool has_extra;
+
+ /// True if we have been requested to store the Extra to *metadata.
+ bool want_extra;
+
+ /// Pointer to the end of the Extra Record list.
+ lzma_extra *extra_tail;
+
+ /// Dummy Extra Record used when only verifying integrity of Extra
+ /// (not storing it to RAM).
+ lzma_extra extra_dummy;
+
+ /// Block decoder
+ lzma_next_coder block_decoder;
+
+ /// buffer[buffer_pos] is the next byte to process.
+ size_t buffer_pos;
+
+ /// buffer[buffer_size] is the first byte to not process.
+ size_t buffer_size;
+
+ /// Temporary buffer to which encoded Metadata is read before
+ /// it is parsed.
+ uint8_t buffer[LZMA_BUFFER_SIZE];
+};
+
+
+/// Reads a variable-length integer to coder->num.
+#define read_vli(num) \
+do { \
+ const lzma_ret ret = lzma_vli_decode( \
+ &num, &coder->pos, \
+ coder->buffer, &coder->buffer_pos, \
+ coder->buffer_size); \
+ if (ret != LZMA_STREAM_END) \
+ return ret; \
+ \
+ coder->pos = 0; \
+} while (0)
+
+
+static lzma_ret
+process(lzma_coder *coder, lzma_allocator *allocator)
+{
+ while (coder->buffer_pos < coder->buffer_size)
+ switch (coder->sequence) {
+ case SEQ_FLAGS:
+ // Reserved bits must be unset.
+ if (coder->buffer[coder->buffer_pos] & 0x70)
+ return LZMA_HEADER_ERROR;
+
+ // If Size of Header Metadata is present, prepare the
+ // variable for variable-length integer decoding. Otherwise
+ // set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
+ // field isn't present.
+ if (coder->buffer[coder->buffer_pos] & 0x01) {
+ coder->metadata->header_metadata_size = 0;
+ ++coder->todo_count;
+ }
+
+ if (coder->buffer[coder->buffer_pos] & 0x02) {
+ coder->metadata->total_size = 0;
+ ++coder->todo_count;
+ }
+
+ if (coder->buffer[coder->buffer_pos] & 0x04) {
+ coder->metadata->uncompressed_size = 0;
+ ++coder->todo_count;
+ }
+
+ if (coder->buffer[coder->buffer_pos] & 0x08) {
+ // Setting index_count to 1 is just to indicate that
+ // Index is present. The real size is parsed later.
+ coder->index_count = 1;
+ ++coder->todo_count;
+ }
+
+ coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
+ != 0;
+
+ ++coder->buffer_pos;
+ coder->sequence = SEQ_HEADER_METADATA_SIZE;
+ break;
+
+ case SEQ_HEADER_METADATA_SIZE:
+ if (coder->metadata->header_metadata_size
+ != LZMA_VLI_VALUE_UNKNOWN) {
+ read_vli(coder->metadata->header_metadata_size);
+
+ if (coder->metadata->header_metadata_size == 0)
+ return LZMA_DATA_ERROR;
+
+ --coder->todo_count;
+ }
+
+ coder->sequence = SEQ_TOTAL_SIZE;
+ break;
+
+ case SEQ_TOTAL_SIZE:
+ if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
+ read_vli(coder->metadata->total_size);
+
+ if (coder->metadata->total_size == 0)
+ return LZMA_DATA_ERROR;
+
+ --coder->todo_count;
+ }
+
+ coder->sequence = SEQ_UNCOMPRESSED_SIZE;
+ break;
+
+ case SEQ_UNCOMPRESSED_SIZE:
+ if (coder->metadata->uncompressed_size
+ != LZMA_VLI_VALUE_UNKNOWN) {
+ read_vli(coder->metadata->uncompressed_size);
+ --coder->todo_count;
+ }
+
+ coder->sequence = SEQ_INDEX_COUNT;
+ break;
+
+ case SEQ_INDEX_COUNT:
+ if (coder->index_count == 0) {
+ coder->sequence = SEQ_EXTRA_PREPARE;
+ break;
+ }
+
+ read_vli(coder->tmp);
+
+ // Index must not be empty nor far too big (wouldn't fit
+ // in RAM).
+ if (coder->tmp == 0 || coder->tmp
+ >= SIZE_MAX / sizeof(lzma_index))
+ return LZMA_DATA_ERROR;
+
+ coder->index_count = (size_t)(coder->tmp);
+ coder->tmp = 0;
+
+ coder->sequence = SEQ_INDEX_ALLOC;
+ break;
+
+ case SEQ_INDEX_ALLOC: {
+ lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
+ if (i == NULL)
+ return LZMA_MEM_ERROR;
+
+ i->total_size = 0;
+ i->uncompressed_size = 0;
+ i->next = NULL;
+
+ if (coder->metadata->index == NULL)
+ coder->metadata->index = i;
+ else
+ coder->index_current->next = i;
+
+ coder->index_current = i;
+
+ coder->sequence = SEQ_INDEX_TOTAL_SIZE;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX_TOTAL_SIZE: {
+ read_vli(coder->index_current->total_size);
+
+ coder->index_total_size += coder->index_current->total_size;
+ if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
+ return LZMA_DATA_ERROR;
+
+ // No Block can have Total Size of zero bytes.
+ if (coder->index_current->total_size == 0)
+ return LZMA_DATA_ERROR;
+
+ if (--coder->index_count == 0) {
+ // If Total Size is present, it must match the sum
+ // of Total Sizes in Index.
+ if (coder->metadata->total_size
+ != LZMA_VLI_VALUE_UNKNOWN
+ && coder->metadata->total_size
+ != coder->index_total_size)
+ return LZMA_DATA_ERROR;
+
+ coder->index_current = coder->metadata->index;
+ coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
+ } else {
+ coder->sequence = SEQ_INDEX_ALLOC;
+ }
+
+ break;
+ }
+
+ case SEQ_INDEX_UNCOMPRESSED_SIZE: {
+ read_vli(coder->index_current->uncompressed_size);
+
+ coder->index_uncompressed_size
+ += coder->index_current->uncompressed_size;
+ if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
+ return LZMA_DATA_ERROR;
+
+ coder->index_current = coder->index_current->next;
+ if (coder->index_current == NULL) {
+ if (coder->metadata->uncompressed_size
+ != LZMA_VLI_VALUE_UNKNOWN
+ && coder->metadata->uncompressed_size
+ != coder->index_uncompressed_size)
+ return LZMA_DATA_ERROR;
+
+ --coder->todo_count;
+ coder->sequence = SEQ_EXTRA_PREPARE;
+ }
+
+ break;
+ }
+
+ case SEQ_EXTRA_PREPARE:
+ assert(coder->todo_count == 0);
+
+ // If we get here, we have at least one byte of input left.
+ // If "Extra is present" flag is unset in Metadata Flags,
+ // it means that there is some garbage and we return an error.
+ if (!coder->has_extra)
+ return LZMA_DATA_ERROR;
+
+ if (!coder->want_extra) {
+ coder->extra_tail = &coder->extra_dummy;
+ coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
+ break;
+ }
+
+ coder->sequence = SEQ_EXTRA_ALLOC;
+
+ // Fall through
+
+ case SEQ_EXTRA_ALLOC: {
+ lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
+ if (e == NULL)
+ return LZMA_MEM_ERROR;
+
+ e->next = NULL;
+ e->id = 0;
+ e->size = 0;
+ e->data = NULL;
+
+ if (coder->metadata->extra == NULL)
+ coder->metadata->extra = e;
+ else
+ coder->extra_tail->next = e;
+
+ coder->extra_tail = e;
+
+ coder->todo_count = 1;
+ coder->sequence = SEQ_EXTRA_ID;
+ }
+
+ // Fall through
+
+ case SEQ_EXTRA_ID:
+ case SEQ_EXTRA_DUMMY_ID:
+ read_vli(coder->extra_tail->id);
+
+ if (coder->extra_tail->id == 0) {
+ coder->extra_tail->size = 0;
+ coder->extra_tail->data = NULL;
+ coder->todo_count = 0;
+ --coder->sequence;
+ } else {
+ ++coder->sequence;
+ }
+
+ break;
+
+ case SEQ_EXTRA_SIZE:
+ case SEQ_EXTRA_DUMMY_SIZE:
+ read_vli(coder->tmp);
+ ++coder->sequence;
+ break;
+
+ case SEQ_EXTRA_DATA_ALLOC: {
+ if (coder->tmp > EXTRA_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+
+ coder->extra_tail->size = (size_t)(coder->tmp);
+ coder->tmp = 0;
+
+ uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size),
+ allocator);
+ if (d == NULL)
+ return LZMA_MEM_ERROR;
+
+ coder->extra_tail->data = d;
+ coder->sequence = SEQ_EXTRA_DATA_COPY;
+ }
+
+ // Fall through
+
+ case SEQ_EXTRA_DATA_COPY:
+ bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
+ coder->extra_tail->data, &coder->pos,
+ (size_t)(coder->extra_tail->size));
+
+ if ((size_t)(coder->extra_tail->size) == coder->pos) {
+ coder->pos = 0;
+ coder->todo_count = 0;
+ coder->sequence = SEQ_EXTRA_ALLOC;
+ }
+
+ break;
+
+ case SEQ_EXTRA_DUMMY_ALLOC:
+ // Not really alloc, just initialize the dummy entry.
+ coder->extra_dummy = (lzma_extra){
+ .next = NULL,
+ .id = 0,
+ .size = 0,
+ .data = NULL,
+ };
+
+ coder->todo_count = 1;
+ coder->sequence = SEQ_EXTRA_DUMMY_ID;
+ break;
+
+ case SEQ_EXTRA_DUMMY_COPY: {
+ // Simply skip as many bytes as indicated by Extra Record Size.
+ // We don't check lzma_extra_size_max because we don't
+ // allocate any memory to hold the data.
+ const size_t in_avail = coder->buffer_size - coder->buffer_pos;
+ const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
+ coder->buffer_pos += skip;
+ coder->tmp -= skip;
+
+ if (coder->tmp == 0) {
+ coder->todo_count = 0;
+ coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
+ }
+
+ break;
+ }
+
+ default:
+ return LZMA_PROG_ERROR;
+ }
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
+ size_t *restrict out_pos lzma_attribute((unused)),
+ size_t out_size lzma_attribute((unused)),
+ lzma_action action lzma_attribute((unused)))
+{
+ bool end_was_reached = false;
+
+ while (true) {
+ // Fill the buffer if it is empty.
+ if (coder->buffer_pos == coder->buffer_size) {
+ coder->buffer_pos = 0;
+ coder->buffer_size = 0;
+
+ const lzma_ret ret = coder->block_decoder.code(
+ coder->block_decoder.coder, allocator,
+ in, in_pos, in_size, coder->buffer,
+ &coder->buffer_size, LZMA_BUFFER_SIZE,
+ LZMA_RUN);
+
+ switch (ret) {
+ case LZMA_OK:
+ // Return immediatelly if we got no new data.
+ if (coder->buffer_size == 0)
+ return LZMA_OK;
+
+ break;
+
+ case LZMA_STREAM_END:
+ end_was_reached = true;
+ break;
+
+ default:
+ return ret;
+ }
+ }
+
+ // Process coder->buffer.
+ const lzma_ret ret = process(coder, allocator);
+ if (ret != LZMA_OK)
+ return ret;
+
+ // On success, process() eats all the input.
+ assert(coder->buffer_pos == coder->buffer_size);
+
+ if (end_was_reached) {
+ // Check that the sequence is not in the
+ // middle of anything.
+ if (coder->todo_count != 0)
+ return LZMA_DATA_ERROR;
+
+ return LZMA_STREAM_END;
+ }
+ }
+}
+
+
+static void
+metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ lzma_options_block *options, lzma_metadata *metadata,
+ bool want_extra)
+{
+ if (options == NULL || metadata == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (next->coder == NULL) {
+ next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+ if (next->coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->code = &metadata_decode;
+ next->end = &metadata_decoder_end;
+ next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
+ }
+
+ metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
+ metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
+ metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
+ metadata->index = NULL;
+ metadata->extra = NULL;
+
+ next->coder->sequence = SEQ_FLAGS;
+ next->coder->todo_count = 0;
+ next->coder->pos = 0;
+ next->coder->tmp = 0;
+ next->coder->metadata = metadata;
+ next->coder->index_current = NULL;
+ next->coder->index_count = 0;
+ next->coder->index_total_size = 0;
+ next->coder->index_uncompressed_size = 0;
+ next->coder->want_extra = want_extra;
+ next->coder->extra_tail = NULL;
+ next->coder->buffer_pos = 0;
+ next->coder->buffer_size = 0;
+
+ return lzma_block_decoder_init(
+ &next->coder->block_decoder, allocator, options);
+}
+
+
+extern lzma_ret
+lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ lzma_options_block *options, lzma_metadata *metadata,
+ bool want_extra)
+{
+ lzma_next_coder_init(metadata_decoder_init, next, allocator,
+ options, metadata, want_extra);
+}
+
+
+extern LZMA_API lzma_ret
+lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
+ lzma_metadata *metadata, lzma_bool want_extra)
+{
+ lzma_next_strm_init(strm, lzma_metadata_decoder_init,
+ options, metadata, want_extra);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+
+ return LZMA_OK;
+}