///////////////////////////////////////////////////////////////////////////////
//
/// \file metadata_decoder.c
/// \brief Decodes metadata stored in Metadata Blocks
//
// Copyright (C) 2007 Lasse Collin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
#include "metadata_decoder.h"
#include "block_decoder.h"
/// Maximum size of a single Extra Record. Again, this is mostly to make
/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
/// be smaller.
#define EXTRA_SIZE_MAX (SIZE_MAX / 4)
struct lzma_coder_s {
enum {
SEQ_FLAGS,
SEQ_HEADER_METADATA_SIZE,
SEQ_TOTAL_SIZE,
SEQ_UNCOMPRESSED_SIZE,
SEQ_INDEX_COUNT,
SEQ_INDEX_ALLOC,
SEQ_INDEX_TOTAL_SIZE,
SEQ_INDEX_UNCOMPRESSED_SIZE,
SEQ_EXTRA_PREPARE,
SEQ_EXTRA_ALLOC,
SEQ_EXTRA_ID,
SEQ_EXTRA_SIZE,
SEQ_EXTRA_DATA_ALLOC,
SEQ_EXTRA_DATA_COPY,
SEQ_EXTRA_DUMMY_ALLOC,
SEQ_EXTRA_DUMMY_ID,
SEQ_EXTRA_DUMMY_SIZE,
SEQ_EXTRA_DUMMY_COPY,
} sequence;
/// Number of "things" left to be parsed. If we hit end of input
/// when this isn't zero, we have corrupt Metadata Block.
size_t todo_count;
/// Position in variable-length integers
size_t pos;
/// Temporary variable needed to decode variables whose type
/// is size_t instead of lzma_vli.
lzma_vli tmp;
/// Pointer to target structure to hold the parsed results.
lzma_metadata *metadata;
/// The Index Record we currently are parsing
lzma_index *index_current;
/// Number of Records in Index
size_t index_count;
/// Sum of Total Size fields in the Index
lzma_vli index_total_size;
/// Sum of Uncompressed Size fields in the Index
lzma_vli index_uncompressed_size;
/// True if Extra is present.
bool has_extra;
/// True if we have been requested to store the Extra to *metadata.
bool want_extra;
/// Pointer to the end of the Extra Record list.
lzma_extra *extra_tail;
/// Dummy Extra Record used when only verifying integrity of Extra
/// (not storing it to RAM).
lzma_extra extra_dummy;
/// Block decoder
lzma_next_coder block_decoder;
/// buffer[buffer_pos] is the next byte to process.
size_t buffer_pos;
/// buffer[buffer_size] is the first byte to not process.
size_t buffer_size;
/// Temporary buffer to which encoded Metadata is read before
/// it is parsed.
uint8_t buffer[LZMA_BUFFER_SIZE];
};
/// Reads a variable-length integer to coder->num.
#define read_vli(num) \
do { \
const lzma_ret ret = lzma_vli_decode( \
&num, &coder->pos, \
coder->buffer, &coder->buffer_pos, \
coder->buffer_size); \
if (ret != LZMA_STREAM_END) \
return ret; \
\
coder->pos = 0; \
} while (0)
static lzma_ret
process(lzma_coder *coder, lzma_allocator *allocator)
{
while (coder->buffer_pos < coder->buffer_size)
switch (coder->sequence) {
case SEQ_FLAGS:
// Reserved bits must be unset.
if (coder->buffer[coder->buffer_pos] & 0x70)
return LZMA_HEADER_ERROR;
coder->todo_count = 0;
// If Size of Header Metadata is present, prepare the
// variable for variable-length integer decoding. Otherwise
// set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
// field isn't present.
if (coder->buffer[coder->buffer_pos] & 0x01) {
coder->metadata->header_metadata_size = 0;
++coder->todo_count;
}
if (coder->buffer[coder->buffer_pos] & 0x02) {
coder->metadata->total_size = 0;
++coder->todo_count;
}
if (coder->buffer[coder->buffer_pos] & 0x04) {
coder->metadata->uncompressed_size = 0;
++coder->todo_count;
}
if (coder->buffer[coder->buffer_pos] & 0x08) {
// Setting index_count to 1 is just to indicate that
// Index is present. The real size is parsed later.
coder->index_count = 1;
++coder->todo_count;
}
coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
!= 0;
++coder->buffer_pos;
coder->sequence = SEQ_HEADER_METADATA_SIZE;
break;
case SEQ_HEADER_METADATA_SIZE:
if (coder->metadata->header_metadata_size
!= LZMA_VLI_VALUE_UNKNOWN) {
read_vli(coder->metadata->header_metadata_size);
if (coder->metadata->header_metadata_size == 0)
return LZMA_DATA_ERROR;
--coder->todo_count;
}
coder->sequence = SEQ_TOTAL_SIZE;
break;
case SEQ_TOTAL_SIZE:
if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
read_vli(coder->metadata->total_size);
if (coder->metadata->total_size == 0)
return LZMA_DATA_ERROR;
--coder->todo_count;
}
coder->sequence = SEQ_UNCOMPRESSED_SIZE;
break;
case SEQ_UNCOMPRESSED_SIZE:
if (coder->metadata->uncompressed_size
!= LZMA_VLI_VALUE_UNKNOWN) {
read_vli(coder->metadata->uncompressed_size);
--coder->todo_count;
}
coder->sequence = SEQ_INDEX_COUNT;
break;
case SEQ_INDEX_COUNT:
if (coder->index_count == 0) {
coder->sequence = SEQ_EXTRA_PREPARE;
break;
}
read_vli(coder->tmp);
// Index must not be empty nor far too big (wouldn't fit
// in RAM).
if (coder->tmp == 0 || coder->tmp
>= SIZE_MAX / sizeof(lzma_index))
return LZMA_DATA_ERROR;
coder->index_count = (size_t)(coder->tmp);
coder->tmp = 0;
coder->sequence = SEQ_INDEX_ALLOC;
break;
case SEQ_INDEX_ALLOC: {
lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
if (i == NULL)
return LZMA_MEM_ERROR;
i->total_size = 0;
i->uncompressed_size = 0;
i->next = NULL;
if (coder->metadata->index == NULL)
coder->metadata->index = i;
else
coder->index_current->next = i;
coder->index_current = i;
coder->sequence = SEQ_INDEX_TOTAL_SIZE;
}
// Fall through
case SEQ_INDEX_TOTAL_SIZE: {
read_vli(coder->index_current->total_size);
coder->index_total_size += coder->index_current->total_size;
if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
return LZMA_DATA_ERROR;
// No Block can have Total Size of zero bytes.
if (coder->index_current->total_size == 0)
return LZMA_DATA_ERROR;
if (--coder->index_count == 0) {
// If Total Size is present, it must match the sum
// of Total Sizes in Index.
if (coder->metadata->total_size
!= LZMA_VLI_VALUE_UNKNOWN
&& coder->metadata->total_size
!= coder->index_total_size)
return LZMA_DATA_ERROR;
coder->index_current = coder->metadata->index;
coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
} else {
coder->sequence = SEQ_INDEX_ALLOC;
}
break;
}
case SEQ_INDEX_UNCOMPRESSED_SIZE: {
read_vli(coder->index_current->uncompressed_size);
coder->index_uncompressed_size
+= coder->index_current->uncompressed_size;
if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
return LZMA_DATA_ERROR;
coder->index_current = coder->index_current->next;
if (coder->index_current == NULL) {
if (coder->metadata->uncompressed_size
!= LZMA_VLI_VALUE_UNKNOWN
&& coder->metadata->uncompressed_size
!= coder->index_uncompressed_size)
return LZMA_DATA_ERROR;
--coder->todo_count;
coder->sequence = SEQ_EXTRA_PREPARE;
}
break;
}
case SEQ_EXTRA_PREPARE:
assert(coder->todo_count == 0);
// If we get here, we have at least one byte of input left.
// If "Extra is present" flag is unset in Metadata Flags,
// it means that there is some garbage and we return an error.
if (!coder->has_extra)
return LZMA_DATA_ERROR;
if (!coder->want_extra) {
coder->extra_tail = &coder->extra_dummy;
coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
break;
}
coder->sequence = SEQ_EXTRA_ALLOC;
// Fall through
case SEQ_EXTRA_ALLOC: {
lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
if (e == NULL)
return LZMA_MEM_ERROR;
e->next = NULL;
e->id = 0;
e->size = 0;
e->data = NULL;
if (coder->metadata->extra == NULL)
coder->metadata->extra = e;
else
coder->extra_tail->next = e;
coder->extra_tail = e;
coder->todo_count = 1;
coder->sequence = SEQ_EXTRA_ID;
}
// Fall through
case SEQ_EXTRA_ID:
case SEQ_EXTRA_DUMMY_ID:
read_vli(coder->extra_tail->id);
if (coder->extra_tail->id == 0) {
coder->extra_tail->size = 0;
coder->extra_tail->data = NULL;
coder->todo_count = 0;
--coder->sequence;
} else {
++coder->sequence;
}
break;
case SEQ_EXTRA_SIZE:
case SEQ_EXTRA_DUMMY_SIZE:
read_vli(coder->tmp);
if (coder->tmp == 0) {
// We have no Data in the Extra Record. Don't
// allocate any memory for it. Go back to
// SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC.
coder->tmp = 0;
coder->sequence -= 2;
coder->todo_count = 0;
} else {
++coder->sequence;
}
break;
case SEQ_EXTRA_DATA_ALLOC: {
if (coder->tmp > EXTRA_SIZE_MAX)
return LZMA_DATA_ERROR;
coder->extra_tail->size = (size_t)(coder->tmp);
coder->tmp = 0;
// We reserve space for the trailing '\0' too.
uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1,
allocator);
if (d == NULL)
return LZMA_MEM_ERROR;
coder->extra_tail->data = d;
coder->sequence = SEQ_EXTRA_DATA_COPY;
}
// Fall through
case SEQ_EXTRA_DATA_COPY:
bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
coder->extra_tail->data, &coder->pos,
(size_t)(coder->extra_tail->size));
if ((size_t)(coder->extra_tail->size) == coder->pos) {
coder->extra_tail->data[coder->pos] = '\0';
coder->pos = 0;
coder->todo_count = 0;
coder->sequence = SEQ_EXTRA_ALLOC;
}
break;
case SEQ_EXTRA_DUMMY_ALLOC:
// Not really alloc, just initialize the dummy entry.
coder->extra_dummy = (lzma_extra){
.next = NULL,
.id = 0,
.size = 0,
.data = NULL,
};
coder->todo_count = 1;
coder->sequence = SEQ_EXTRA_DUMMY_ID;
break;
case SEQ_EXTRA_DUMMY_COPY: {
// Simply skip as many bytes as indicated by Extra Record Size.
// We don't check lzma_extra_size_max because we don't
// allocate any memory to hold the data.
const size_t in_avail = coder->buffer_size - coder->buffer_pos;
const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
coder->buffer_pos += skip;
coder->tmp -= skip;
if (coder->tmp == 0) {
coder->todo_count = 0;
coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
}
break;
}
default:
return LZMA_PROG_ERROR;
}
return LZMA_OK;
}
static lzma_ret
metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
size_t *restrict out_pos lzma_attribute((unused)),
size_t out_size lzma_attribute((unused)),
lzma_action action lzma_attribute((unused)))
{
bool end_was_reached = false;
while (true) {
// Fill the buffer if it is empty.
if (coder->buffer_pos == coder->buffer_size) {
coder->buffer_pos = 0;
coder->buffer_size = 0;
const lzma_ret ret = coder->block_decoder.code(
coder->block_decoder.coder, allocator,
in, in_pos, in_size, coder->buffer,
&coder->buffer_size, LZMA_BUFFER_SIZE,
LZMA_RUN);
switch (ret) {
case LZMA_OK:
// Return immediatelly if we got no new data.
if (coder->buffer_size == 0)
return LZMA_OK;
break;
case LZMA_STREAM_END:
end_was_reached = true;
break;
default:
return ret;
}
}
// Process coder->buffer.
const lzma_ret ret = process(coder, allocator);
if (ret != LZMA_OK)
return ret;
// On success, process() eats all the input.
assert(coder->buffer_pos == coder->buffer_size);
if (end_was_reached) {
// Check that the sequence is not in the
// middle of anything.
if (coder->todo_count != 0)
return LZMA_DATA_ERROR;
// If Size of Header Metadata Block was not
// present, we use zero as its size instead
// of LZMA_VLI_VALUE_UNKNOWN.
if (coder->metadata->header_metadata_size
== LZMA_VLI_VALUE_UNKNOWN)
coder->metadata->header_metadata_size = 0;
return LZMA_STREAM_END;
}
}
}
static void
metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
lzma_next_coder_end(&coder->block_decoder, allocator);
lzma_free(coder, allocator);
return;
}
static lzma_ret
metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
lzma_options_block *options, lzma_metadata *metadata,
bool want_extra)
{
if (options == NULL || metadata == NULL)
return LZMA_PROG_ERROR;
if (next->coder == NULL) {
next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
if (next->coder == NULL)
return LZMA_MEM_ERROR;
next->code = &metadata_decode;
next->end = &metadata_decoder_end;
next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
}
metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
metadata->index = NULL;
metadata->extra = NULL;
next->coder->sequence = SEQ_FLAGS;
next->coder->todo_count = 1;
next->coder->pos = 0;
next->coder->tmp = 0;
next->coder->metadata = metadata;
next->coder->index_current = NULL;
next->coder->index_count = 0;
next->coder->index_total_size = 0;
next->coder->index_uncompressed_size = 0;
next->coder->want_extra = want_extra;
next->coder->extra_tail = NULL;
next->coder->buffer_pos = 0;
next->coder->buffer_size = 0;
return lzma_block_decoder_init(
&next->coder->block_decoder, allocator, options);
}
extern lzma_ret
lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
lzma_options_block *options, lzma_metadata *metadata,
bool want_extra)
{
lzma_next_coder_init(metadata_decoder_init, next, allocator,
options, metadata, want_extra);
}
extern LZMA_API lzma_ret
lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
lzma_metadata *metadata, lzma_bool want_extra)
{
lzma_next_strm_init(strm, lzma_metadata_decoder_init,
options, metadata, want_extra);
strm->internal->supported_actions[LZMA_RUN] = true;
return LZMA_OK;
}