diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-06-18 18:02:10 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-06-18 18:02:10 +0300 |
commit | 7d17818cec8597f847b0a2537fde991bbc3d9e96 (patch) | |
tree | 9c41502e3eb96f103fe98e13456b382fbba7a292 /src/liblzma/common/index_decoder.c | |
parent | Update the file format specification draft. The new one is (diff) | |
download | xz-7d17818cec8597f847b0a2537fde991bbc3d9e96.tar.xz |
Update the code to mostly match the new simpler file format
specification. Simplify things by removing most of the
support for known uncompressed size in most places.
There are some miscellaneous changes here and there too.
The API of liblzma has got many changes and still some
more will be done soon. While most of the code has been
updated, some things are not fixed (the command line tool
will choke with invalid filter chain, if nothing else).
Subblock filter is somewhat broken for now. It will be
updated once the encoded format of the Subblock filter
has been decided.
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/common/index_decoder.c | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c new file mode 100644 index 00000000..1635948c --- /dev/null +++ b/src/liblzma/common/index_decoder.c @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file index_decoder.c +/// \brief Decodes the Index field +// +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "index.h" +#include "check.h" + + +struct lzma_coder_s { + enum { + SEQ_INDICATOR, + SEQ_COUNT, + SEQ_TOTAL, + SEQ_UNCOMPRESSED, + SEQ_PADDING_INIT, + SEQ_PADDING, + SEQ_CRC32, + } sequence; + + /// Target Index + lzma_index *index; + + /// Number of Records left to decode. + lzma_vli count; + + /// The most recent Total Size field + lzma_vli total_size; + + /// The most recent Uncompressed Size field + lzma_vli uncompressed_size; + + /// Position in integers + size_t pos; + + /// CRC32 of the List of Records field + uint32_t crc32; +}; + + +static lzma_ret +index_decode(lzma_coder *coder, lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out lzma_attribute((unused)), + size_t *restrict out_pos lzma_attribute((unused)), + size_t out_size lzma_attribute((unused)), + lzma_action action lzma_attribute((unused))) +{ + // Similar optimization as in index_encoder.c + const size_t in_start = *in_pos; + lzma_ret ret = LZMA_OK; + + while (*in_pos < in_size) + switch (coder->sequence) { + case SEQ_INDICATOR: + // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or + // LZMA_FORMAT_ERROR, because a typical usage case for Index + // decoder is when parsing the Stream backwards. If seeking + // backward from the Stream Footer gives us something that + // doesn't begin with Index Indicator, the file is considered + // corrupt, not "programming error" or "unrecognized file + // format". One could argue that the application should + // verify the Index Indicator before trying to decode the + // Index, but well, I suppose it is simpler this way. + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + coder->sequence = SEQ_COUNT; + break; + + case SEQ_COUNT: { + ret = lzma_vli_decode(&coder->count, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + coder->sequence = coder->count == 0 + ? SEQ_PADDING_INIT : SEQ_TOTAL; + break; + } + + case SEQ_TOTAL: + case SEQ_UNCOMPRESSED: { + lzma_vli *size = coder->sequence == SEQ_TOTAL + ? &coder->total_size + : &coder->uncompressed_size; + + ret = lzma_vli_decode(size, &coder->pos, + in, in_pos, in_size); + if (ret != LZMA_STREAM_END) + goto out; + + ret = LZMA_OK; + coder->pos = 0; + + if (coder->sequence == SEQ_TOTAL) { + // Validate that encoded Total Size isn't too big. + if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + return LZMA_DATA_ERROR; + + // Convert the encoded Total Size to the real + // Total Size. + coder->total_size = total_size_decode( + coder->total_size); + coder->sequence = SEQ_UNCOMPRESSED; + } else { + // Add the decoded Record to the Index. + return_if_error(lzma_index_append( + coder->index, allocator, + coder->total_size, + coder->uncompressed_size)); + + // Check if this was the last Record. + coder->sequence = --coder->count == 0 + ? SEQ_PADDING_INIT + : SEQ_TOTAL; + } + + break; + } + + case SEQ_PADDING_INIT: + coder->pos = lzma_index_padding_size(coder->index); + coder->sequence = SEQ_PADDING; + + // Fall through + + case SEQ_PADDING: + if (coder->pos > 0) { + --coder->pos; + if (in[(*in_pos)++] != 0x00) + return LZMA_DATA_ERROR; + + break; + } + + // Finish the CRC32 calculation. + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + coder->sequence = SEQ_CRC32; + + // Fall through + + case SEQ_CRC32: + do { + if (*in_pos == in_size) + return LZMA_OK; + + if (((coder->crc32 >> (coder->pos * 8)) & 0xFF) + != in[(*in_pos)++]) + return LZMA_DATA_ERROR; + + } while (++coder->pos < 4); + + // Make index NULL so we don't free it unintentionally. + coder->index = NULL; + + return LZMA_STREAM_END; + + default: + assert(0); + return LZMA_PROG_ERROR; + } + +out: + // Update the CRC32, + coder->crc32 = lzma_crc32(in + in_start, + *in_pos - in_start, coder->crc32); + + return ret; +} + + +static void +index_decoder_end(lzma_coder *coder, lzma_allocator *allocator) +{ + lzma_index_end(coder->index, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + if (i == NULL) + return LZMA_PROG_ERROR; + + if (next->coder == NULL) { + next->coder = lzma_alloc(sizeof(lzma_coder), allocator); + if (next->coder == NULL) + return LZMA_MEM_ERROR; + + next->code = &index_decode; + next->end = &index_decoder_end; + next->coder->index = NULL; + } else { + lzma_index_end(next->coder->index, allocator); + } + + // We always allocate a new lzma_index. + *i = lzma_index_init(NULL, allocator); + if (*i == NULL) + return LZMA_MEM_ERROR; + + // Initialize the rest. + next->coder->sequence = SEQ_INDICATOR; + next->coder->index = *i; + next->coder->pos = 0; + next->coder->crc32 = 0; + + return LZMA_OK; +} + + +/* +extern lzma_ret +lzma_index_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, + lzma_index **i) +{ + lzma_next_coder_init(index_decoder_init, next, allocator, i); +} +*/ + + +extern LZMA_API lzma_ret +lzma_index_decoder(lzma_stream *strm, lzma_index **i) +{ + lzma_next_strm_init(strm, index_decoder_init, i); + + strm->internal->supported_actions[LZMA_RUN] = true; + + return LZMA_OK; +} |