aboutsummaryrefslogblamecommitdiff
path: root/src/liblzma/common/metadata_decoder.c
blob: 97045750b0d6cd25e7f9ec615e2524919e430d9f (plain) (tree)










































































































































































                                                                               





                                                                             




















































































































































































                                                                              











                                                                     








                                                               

                                                                              















                                                                             
                                                                   










































































































                                                                               
                                                              




































































                                                                            
///////////////////////////////////////////////////////////////////////////////
//
/// \file       metadata_decoder.c
/// \brief      Decodes metadata stored in Metadata Blocks
//
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

#include "metadata_decoder.h"
#include "block_decoder.h"


/// Maximum size of a single Extra Record. Again, this is mostly to make
/// sure that the parsed lzma_vli fits into size_t. Still, maybe this should
/// be smaller.
#define EXTRA_SIZE_MAX (SIZE_MAX / 4)


struct lzma_coder_s {
	enum {
		SEQ_FLAGS,
		SEQ_HEADER_METADATA_SIZE,
		SEQ_TOTAL_SIZE,
		SEQ_UNCOMPRESSED_SIZE,
		SEQ_INDEX_COUNT,
		SEQ_INDEX_ALLOC,
		SEQ_INDEX_TOTAL_SIZE,
		SEQ_INDEX_UNCOMPRESSED_SIZE,
		SEQ_EXTRA_PREPARE,
		SEQ_EXTRA_ALLOC,
		SEQ_EXTRA_ID,
		SEQ_EXTRA_SIZE,
		SEQ_EXTRA_DATA_ALLOC,
		SEQ_EXTRA_DATA_COPY,
		SEQ_EXTRA_DUMMY_ALLOC,
		SEQ_EXTRA_DUMMY_ID,
		SEQ_EXTRA_DUMMY_SIZE,
		SEQ_EXTRA_DUMMY_COPY,
	} sequence;

	/// Number of "things" left to be parsed. If we hit end of input
	/// when this isn't zero, we have corrupt Metadata Block.
	size_t todo_count;

	/// Position in variable-length integers
	size_t pos;

	/// Temporary variable needed to decode variables whose type
	/// is size_t instead of lzma_vli.
	lzma_vli tmp;

	/// Pointer to target structure to hold the parsed results.
	lzma_metadata *metadata;

	/// The Index Record we currently are parsing
	lzma_index *index_current;

	/// Number of Records in Index
	size_t index_count;

	/// Sum of Total Size fields in the Index
	lzma_vli index_total_size;

	/// Sum of Uncompressed Size fields in the Index
	lzma_vli index_uncompressed_size;

	/// True if Extra is present.
	bool has_extra;

	/// True if we have been requested to store the Extra to *metadata.
	bool want_extra;

	/// Pointer to the end of the Extra Record list.
	lzma_extra *extra_tail;

	/// Dummy Extra Record used when only verifying integrity of Extra
	/// (not storing it to RAM).
	lzma_extra extra_dummy;

	/// Block decoder
	lzma_next_coder block_decoder;

	/// buffer[buffer_pos] is the next byte to process.
	size_t buffer_pos;

	/// buffer[buffer_size] is the first byte to not process.
	size_t buffer_size;

	/// Temporary buffer to which encoded Metadata is read before
	/// it is parsed.
	uint8_t buffer[LZMA_BUFFER_SIZE];
};


/// Reads a variable-length integer to coder->num.
#define read_vli(num) \
do { \
	const lzma_ret ret = lzma_vli_decode( \
			&num, &coder->pos, \
			coder->buffer, &coder->buffer_pos, \
			coder->buffer_size); \
	if (ret != LZMA_STREAM_END) \
		return ret; \
	\
	coder->pos = 0; \
} while (0)


static lzma_ret
process(lzma_coder *coder, lzma_allocator *allocator)
{
	while (coder->buffer_pos < coder->buffer_size)
	switch (coder->sequence) {
	case SEQ_FLAGS:
		// Reserved bits must be unset.
		if (coder->buffer[coder->buffer_pos] & 0x70)
			return LZMA_HEADER_ERROR;

		// If Size of Header Metadata is present, prepare the
		// variable for variable-length integer decoding. Otherwise
		// set it to LZMA_VLI_VALUE_UNKNOWN to indicate that the
		// field isn't present.
		if (coder->buffer[coder->buffer_pos] & 0x01) {
			coder->metadata->header_metadata_size = 0;
			++coder->todo_count;
		}

		if (coder->buffer[coder->buffer_pos] & 0x02) {
			coder->metadata->total_size = 0;
			++coder->todo_count;
		}

		if (coder->buffer[coder->buffer_pos] & 0x04) {
			coder->metadata->uncompressed_size = 0;
			++coder->todo_count;
		}

		if (coder->buffer[coder->buffer_pos] & 0x08) {
			// Setting index_count to 1 is just to indicate that
			// Index is present. The real size is parsed later.
			coder->index_count = 1;
			++coder->todo_count;
		}

		coder->has_extra = (coder->buffer[coder->buffer_pos] & 0x80)
				!= 0;

		++coder->buffer_pos;
		coder->sequence = SEQ_HEADER_METADATA_SIZE;
		break;

	case SEQ_HEADER_METADATA_SIZE:
		if (coder->metadata->header_metadata_size
				!= LZMA_VLI_VALUE_UNKNOWN) {
			read_vli(coder->metadata->header_metadata_size);

			if (coder->metadata->header_metadata_size == 0)
				return LZMA_DATA_ERROR;

			--coder->todo_count;
		} else {
			// Zero indicates that Size of Header Metadata Block
			// is not present. That is, after successful Metadata
			// decoding, metadata->header_metadata_size is
			// never LZMA_VLI_VALUE_UNKNOWN.
			coder->metadata->header_metadata_size = 0;
		}

		coder->sequence = SEQ_TOTAL_SIZE;
		break;

	case SEQ_TOTAL_SIZE:
		if (coder->metadata->total_size != LZMA_VLI_VALUE_UNKNOWN) {
			read_vli(coder->metadata->total_size);

			if (coder->metadata->total_size == 0)
				return LZMA_DATA_ERROR;

			--coder->todo_count;
		}

		coder->sequence = SEQ_UNCOMPRESSED_SIZE;
		break;

	case SEQ_UNCOMPRESSED_SIZE:
		if (coder->metadata->uncompressed_size
				!= LZMA_VLI_VALUE_UNKNOWN) {
			read_vli(coder->metadata->uncompressed_size);
			--coder->todo_count;
		}

		coder->sequence = SEQ_INDEX_COUNT;
		break;

	case SEQ_INDEX_COUNT:
		if (coder->index_count == 0) {
			coder->sequence = SEQ_EXTRA_PREPARE;
			break;
		}

		read_vli(coder->tmp);

		// Index must not be empty nor far too big (wouldn't fit
		// in RAM).
		if (coder->tmp == 0 || coder->tmp
				>= SIZE_MAX / sizeof(lzma_index))
			return LZMA_DATA_ERROR;

		coder->index_count = (size_t)(coder->tmp);
		coder->tmp = 0;

		coder->sequence = SEQ_INDEX_ALLOC;
		break;

	case SEQ_INDEX_ALLOC: {
		lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
		if (i == NULL)
			return LZMA_MEM_ERROR;

		i->total_size = 0;
		i->uncompressed_size = 0;
		i->next = NULL;

		if (coder->metadata->index == NULL)
			coder->metadata->index = i;
		else
			coder->index_current->next = i;

		coder->index_current = i;

		coder->sequence = SEQ_INDEX_TOTAL_SIZE;
	}

	// Fall through

	case SEQ_INDEX_TOTAL_SIZE: {
		read_vli(coder->index_current->total_size);

		coder->index_total_size += coder->index_current->total_size;
		if (coder->index_total_size > LZMA_VLI_VALUE_MAX)
			return LZMA_DATA_ERROR;

		// No Block can have Total Size of zero bytes.
		if (coder->index_current->total_size == 0)
			return LZMA_DATA_ERROR;

		if (--coder->index_count == 0) {
			// If Total Size is present, it must match the sum
			// of Total Sizes in Index.
			if (coder->metadata->total_size
						!= LZMA_VLI_VALUE_UNKNOWN
					&& coder->metadata->total_size
						!= coder->index_total_size)
				return LZMA_DATA_ERROR;

			coder->index_current = coder->metadata->index;
			coder->sequence = SEQ_INDEX_UNCOMPRESSED_SIZE;
		} else {
			coder->sequence = SEQ_INDEX_ALLOC;
		}

		break;
	}

	case SEQ_INDEX_UNCOMPRESSED_SIZE: {
		read_vli(coder->index_current->uncompressed_size);

		coder->index_uncompressed_size
				+= coder->index_current->uncompressed_size;
		if (coder->index_uncompressed_size > LZMA_VLI_VALUE_MAX)
			return LZMA_DATA_ERROR;

		coder->index_current = coder->index_current->next;
		if (coder->index_current == NULL) {
			if (coder->metadata->uncompressed_size
						!= LZMA_VLI_VALUE_UNKNOWN
					&& coder->metadata->uncompressed_size
					!= coder->index_uncompressed_size)
				return LZMA_DATA_ERROR;

			--coder->todo_count;
			coder->sequence = SEQ_EXTRA_PREPARE;
		}

		break;
	}

	case SEQ_EXTRA_PREPARE:
		assert(coder->todo_count == 0);

		// If we get here, we have at least one byte of input left.
		// If "Extra is present" flag is unset in Metadata Flags,
		// it means that there is some garbage and we return an error.
		if (!coder->has_extra)
			return LZMA_DATA_ERROR;

		if (!coder->want_extra) {
			coder->extra_tail = &coder->extra_dummy;
			coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
			break;
		}

		coder->sequence = SEQ_EXTRA_ALLOC;

	// Fall through

	case SEQ_EXTRA_ALLOC: {
		lzma_extra *e = lzma_alloc(sizeof(lzma_extra), allocator);
		if (e == NULL)
			return LZMA_MEM_ERROR;

		e->next = NULL;
		e->id = 0;
		e->size = 0;
		e->data = NULL;

		if (coder->metadata->extra == NULL)
			coder->metadata->extra = e;
		else
			coder->extra_tail->next = e;

		coder->extra_tail = e;

		coder->todo_count = 1;
		coder->sequence = SEQ_EXTRA_ID;
	}

	// Fall through

	case SEQ_EXTRA_ID:
	case SEQ_EXTRA_DUMMY_ID:
		read_vli(coder->extra_tail->id);

		if (coder->extra_tail->id == 0) {
			coder->extra_tail->size = 0;
			coder->extra_tail->data = NULL;
			coder->todo_count = 0;
			--coder->sequence;
		} else {
			++coder->sequence;
		}

		break;

	case SEQ_EXTRA_SIZE:
	case SEQ_EXTRA_DUMMY_SIZE:
		read_vli(coder->tmp);

		if (coder->tmp == 0) {
			// We have no Data in the Extra Record. Don't
			// allocate any memory for it. Go back to
			// SEQ_EXTRA_ALLOC or SEQ_EXTRA_DUMMY_ALLOC.
			coder->tmp = 0;
			coder->sequence -= 2;
			coder->todo_count = 0;
		} else {
			++coder->sequence;
		}

		break;

	case SEQ_EXTRA_DATA_ALLOC: {
		if (coder->tmp > EXTRA_SIZE_MAX)
			return LZMA_DATA_ERROR;

		coder->extra_tail->size = (size_t)(coder->tmp);
		coder->tmp = 0;

		// We reserve space for the trailing '\0' too.
		uint8_t *d = lzma_alloc((size_t)(coder->extra_tail->size) + 1,
				allocator);
		if (d == NULL)
			return LZMA_MEM_ERROR;

		coder->extra_tail->data = d;
		coder->sequence = SEQ_EXTRA_DATA_COPY;
	}

	// Fall through

	case SEQ_EXTRA_DATA_COPY:
		bufcpy(coder->buffer, &coder->buffer_pos, coder->buffer_size,
				coder->extra_tail->data, &coder->pos,
				(size_t)(coder->extra_tail->size));

		if ((size_t)(coder->extra_tail->size) == coder->pos) {
			coder->extra_tail->data[coder->pos] = '\0';
			coder->pos = 0;
			coder->todo_count = 0;
			coder->sequence = SEQ_EXTRA_ALLOC;
		}

		break;

	case SEQ_EXTRA_DUMMY_ALLOC:
		// Not really alloc, just initialize the dummy entry.
		coder->extra_dummy = (lzma_extra){
			.next = NULL,
			.id = 0,
			.size = 0,
			.data = NULL,
		};

		coder->todo_count = 1;
		coder->sequence = SEQ_EXTRA_DUMMY_ID;
		break;

	case SEQ_EXTRA_DUMMY_COPY: {
		// Simply skip as many bytes as indicated by Extra Record Size.
		// We don't check lzma_extra_size_max because we don't
		// allocate any memory to hold the data.
		const size_t in_avail = coder->buffer_size - coder->buffer_pos;
		const size_t skip = MIN((lzma_vli)(in_avail), coder->tmp);
		coder->buffer_pos += skip;
		coder->tmp -= skip;

		if (coder->tmp == 0) {
			coder->todo_count = 0;
			coder->sequence = SEQ_EXTRA_DUMMY_ALLOC;
		}

		break;
	}

	default:
		return LZMA_PROG_ERROR;
	}

	return LZMA_OK;
}


static lzma_ret
metadata_decode(lzma_coder *coder, lzma_allocator *allocator,
		const uint8_t *restrict in, size_t *restrict in_pos,
		size_t in_size, uint8_t *restrict out lzma_attribute((unused)),
		size_t *restrict out_pos lzma_attribute((unused)),
		size_t out_size lzma_attribute((unused)),
		lzma_action action lzma_attribute((unused)))
{
	bool end_was_reached = false;

	while (true) {
		// Fill the buffer if it is empty.
		if (coder->buffer_pos == coder->buffer_size) {
			coder->buffer_pos = 0;
			coder->buffer_size = 0;

			const lzma_ret ret = coder->block_decoder.code(
					coder->block_decoder.coder, allocator,
					in, in_pos, in_size, coder->buffer,
					&coder->buffer_size, LZMA_BUFFER_SIZE,
					LZMA_RUN);

			switch (ret) {
			case LZMA_OK:
				// Return immediatelly if we got no new data.
				if (coder->buffer_size == 0)
					return LZMA_OK;

				break;

			case LZMA_STREAM_END:
				end_was_reached = true;
				break;

			default:
				return ret;
			}
		}

		// Process coder->buffer.
		const lzma_ret ret = process(coder, allocator);
		if (ret != LZMA_OK)
			return ret;

		// On success, process() eats all the input.
		assert(coder->buffer_pos == coder->buffer_size);

		if (end_was_reached) {
			// Check that the sequence is not in the
			// middle of anything.
			if (coder->todo_count != 0)
				return LZMA_DATA_ERROR;

			return LZMA_STREAM_END;
		}
	}
}


static void
metadata_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
	lzma_next_coder_end(&coder->block_decoder, allocator);
	lzma_free(coder, allocator);
	return;
}


static lzma_ret
metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
		lzma_options_block *options, lzma_metadata *metadata,
		bool want_extra)
{
	if (options == NULL || metadata == NULL)
		return LZMA_PROG_ERROR;

	if (next->coder == NULL) {
		next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
		if (next->coder == NULL)
			return LZMA_MEM_ERROR;

		next->code = &metadata_decode;
		next->end = &metadata_decoder_end;
		next->coder->block_decoder = LZMA_NEXT_CODER_INIT;
	}

	metadata->header_metadata_size = LZMA_VLI_VALUE_UNKNOWN;
	metadata->total_size = LZMA_VLI_VALUE_UNKNOWN;
	metadata->uncompressed_size = LZMA_VLI_VALUE_UNKNOWN;
	metadata->index = NULL;
	metadata->extra = NULL;

	next->coder->sequence = SEQ_FLAGS;
	next->coder->todo_count = 0;
	next->coder->pos = 0;
	next->coder->tmp = 0;
	next->coder->metadata = metadata;
	next->coder->index_current = NULL;
	next->coder->index_count = 0;
	next->coder->index_total_size = 0;
	next->coder->index_uncompressed_size = 0;
	next->coder->want_extra = want_extra;
	next->coder->extra_tail = NULL;
	next->coder->buffer_pos = 0;
	next->coder->buffer_size = 0;

	return lzma_block_decoder_init(
			&next->coder->block_decoder, allocator, options);
}


extern lzma_ret
lzma_metadata_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
		lzma_options_block *options, lzma_metadata *metadata,
		bool want_extra)
{
	lzma_next_coder_init(metadata_decoder_init, next, allocator,
			options, metadata, want_extra);
}


extern LZMA_API lzma_ret
lzma_metadata_decoder(lzma_stream *strm, lzma_options_block *options,
		lzma_metadata *metadata, lzma_bool want_extra)
{
	lzma_next_strm_init(strm, lzma_metadata_decoder_init,
			options, metadata, want_extra);

	strm->internal->supported_actions[LZMA_RUN] = true;

	return LZMA_OK;
}