aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common/index_hash.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-06-18 18:02:10 +0300
committerLasse Collin <lasse.collin@tukaani.org>2008-06-18 18:02:10 +0300
commit7d17818cec8597f847b0a2537fde991bbc3d9e96 (patch)
tree9c41502e3eb96f103fe98e13456b382fbba7a292 /src/liblzma/common/index_hash.c
parentUpdate the file format specification draft. The new one is (diff)
downloadxz-7d17818cec8597f847b0a2537fde991bbc3d9e96.tar.xz
Update the code to mostly match the new simpler file format
specification. Simplify things by removing most of the support for known uncompressed size in most places. There are some miscellaneous changes here and there too. The API of liblzma has got many changes and still some more will be done soon. While most of the code has been updated, some things are not fixed (the command line tool will choke with invalid filter chain, if nothing else). Subblock filter is somewhat broken for now. It will be updated once the encoded format of the Subblock filter has been decided.
Diffstat (limited to 'src/liblzma/common/index_hash.c')
-rw-r--r--src/liblzma/common/index_hash.c340
1 files changed, 340 insertions, 0 deletions
diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c
new file mode 100644
index 00000000..35dea41f
--- /dev/null
+++ b/src/liblzma/common/index_hash.c
@@ -0,0 +1,340 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_hash.c
+/// \brief Validates Index by using a hash function
+//
+// Copyright (C) 2008 Lasse Collin
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "index.h"
+#include "check.h"
+
+
+typedef struct {
+ /// Sum of the Total Size fields
+ lzma_vli total_size;
+
+ /// Sum of the Uncompressed Size fields
+ lzma_vli uncompressed_size;
+
+ /// Number of Records
+ lzma_vli count;
+
+ /// Size of the List of Index Records as bytes
+ lzma_vli index_list_size;
+
+ /// Check calculated from Total Sizes and Uncompressed Sizes.
+ lzma_check check;
+
+} lzma_index_hash_info;
+
+
+struct lzma_index_hash_s {
+ enum {
+ SEQ_BLOCK,
+ SEQ_COUNT,
+ SEQ_TOTAL,
+ SEQ_UNCOMPRESSED,
+ SEQ_PADDING_INIT,
+ SEQ_PADDING,
+ SEQ_CRC32,
+ } sequence;
+
+ /// Information collected while decoding the actual Blocks.
+ lzma_index_hash_info blocks;
+
+ /// Information collected from the Index field.
+ lzma_index_hash_info records;
+
+ /// Number of Records not fully decoded
+ lzma_vli remaining;
+
+ /// Total Size currently being read from an Index Record.
+ lzma_vli total_size;
+
+ /// Uncompressed Size currently being read from an Index Record.
+ lzma_vli uncompressed_size;
+
+ /// Position in variable-length integers when decoding them from
+ /// the List of Records.
+ size_t pos;
+
+ /// CRC32 of the Index
+ uint32_t crc32;
+};
+
+
+extern LZMA_API lzma_index_hash *
+lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator)
+{
+ if (index_hash == NULL) {
+ index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
+ if (index_hash == NULL)
+ return NULL;
+ }
+
+ index_hash->sequence = SEQ_BLOCK;
+ index_hash->blocks.total_size = 0;
+ index_hash->blocks.uncompressed_size = 0;
+ index_hash->blocks.count = 0;
+ index_hash->blocks.index_list_size = 0;
+ index_hash->records.total_size = 0;
+ index_hash->records.uncompressed_size = 0;
+ index_hash->records.count = 0;
+ index_hash->records.index_list_size = 0;
+ index_hash->total_size = 0;
+ index_hash->uncompressed_size = 0;
+ index_hash->pos = 0;
+ index_hash->crc32 = 0;
+
+ // These cannot fail because LZMA_CHECK_BEST is known to be supported.
+ (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
+ (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
+
+ return index_hash;
+}
+
+
+extern LZMA_API void
+lzma_index_hash_end(lzma_index_hash *index_hash, lzma_allocator *allocator)
+{
+ lzma_free(index_hash, allocator);
+ return;
+}
+
+
+extern LZMA_API lzma_vli
+lzma_index_hash_size(const lzma_index_hash *index_hash)
+{
+ // Get the size of the Index from ->blocks instead of ->records for
+ // cases where application wants to know the Index Size before
+ // decoding the Index.
+ return index_size(index_hash->blocks.count,
+ index_hash->blocks.index_list_size);
+}
+
+
+/// Updates the sizes and the hash without any validation.
+static lzma_ret
+hash_append(lzma_index_hash_info *info, lzma_vli total_size,
+ lzma_vli uncompressed_size)
+{
+ info->total_size += total_size;
+ info->uncompressed_size += uncompressed_size;
+ info->index_list_size += lzma_vli_size(total_size_encode(total_size))
+ + lzma_vli_size(uncompressed_size);
+ ++info->count;
+
+ const lzma_vli sizes[2] = { total_size, uncompressed_size };
+ lzma_check_update(&info->check, LZMA_CHECK_BEST,
+ (const uint8_t *)(sizes), sizeof(sizes));
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API lzma_ret
+lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size,
+ lzma_vli uncompressed_size)
+{
+ // Validate the arguments.
+ if (index_hash->sequence != SEQ_BLOCK || total_size == 0 ||
+ total_size > LZMA_VLI_VALUE_MAX || (total_size & 3)
+ || uncompressed_size > LZMA_VLI_VALUE_MAX)
+ return LZMA_PROG_ERROR;
+
+ // Update the hash.
+ return_if_error(hash_append(&index_hash->blocks,
+ total_size, uncompressed_size));
+
+ // Validate the properties of *info are still in allowed limits.
+ if (index_hash->blocks.total_size > LZMA_VLI_VALUE_MAX
+ || index_hash->blocks.uncompressed_size
+ > LZMA_VLI_VALUE_MAX
+ || index_size(index_hash->blocks.count,
+ index_hash->blocks.index_list_size)
+ > LZMA_BACKWARD_SIZE_MAX
+ || index_stream_size(index_hash->blocks.total_size,
+ index_hash->blocks.count,
+ index_hash->blocks.index_list_size)
+ > LZMA_VLI_VALUE_MAX)
+ return LZMA_DATA_ERROR;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API lzma_ret
+lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
+ size_t *in_pos, size_t in_size)
+{
+ // Catch zero input buffer here, because in contrast to Index encoder
+ // and decoder functions, applications call this function directly
+ // instead of via lzma_code(), which does the buffer checking.
+ if (*in_pos >= in_size)
+ return LZMA_BUF_ERROR;
+
+ // NOTE: This function has many similarities to index_encode() and
+ // index_decode() functions found from index_encoder.c and
+ // index_decoder.c. See the comments especially in index_encoder.c.
+ const size_t in_start = *in_pos;
+ lzma_ret ret = LZMA_OK;
+
+ while (*in_pos < in_size)
+ switch (index_hash->sequence) {
+ case SEQ_BLOCK:
+ // Check the Index Indicator is present.
+ if (in[(*in_pos)++] != 0x00)
+ return LZMA_DATA_ERROR;
+
+ index_hash->sequence = SEQ_COUNT;
+ break;
+
+ case SEQ_COUNT: {
+ ret = lzma_vli_decode(&index_hash->remaining,
+ &index_hash->pos, in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ // The count must match the count of the Blocks decoded.
+ if (index_hash->remaining != index_hash->blocks.count)
+ return LZMA_DATA_ERROR;
+
+ ret = LZMA_OK;
+ index_hash->pos = 0;
+
+ // Handle the special case when there are no Blocks.
+ index_hash->sequence = index_hash->remaining == 0
+ ? SEQ_PADDING_INIT : SEQ_TOTAL;
+ break;
+ }
+
+ case SEQ_TOTAL:
+ case SEQ_UNCOMPRESSED: {
+ lzma_vli *size = index_hash->sequence == SEQ_TOTAL
+ ? &index_hash->total_size
+ : &index_hash->uncompressed_size;
+
+ ret = lzma_vli_decode(size, &index_hash->pos,
+ in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ ret = LZMA_OK;
+ index_hash->pos = 0;
+
+ if (index_hash->sequence == SEQ_TOTAL) {
+ if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX)
+ return LZMA_DATA_ERROR;
+
+ index_hash->total_size = total_size_decode(
+ index_hash->total_size);
+
+ index_hash->sequence = SEQ_UNCOMPRESSED;
+ } else {
+ // Update the hash.
+ return_if_error(hash_append(&index_hash->records,
+ index_hash->total_size,
+ index_hash->uncompressed_size));
+
+ // Verify that we don't go over the known sizes. Note
+ // that this validation is simpler than the one used
+ // in lzma_index_hash_append(), because here we know
+ // that values in index_hash->blocks are already
+ // validated and we are fine as long as we don't
+ // exceed them in index_hash->records.
+ if (index_hash->blocks.total_size
+ < index_hash->records.total_size
+ || index_hash->blocks.uncompressed_size
+ < index_hash->records.uncompressed_size
+ || index_hash->blocks.index_list_size
+ < index_hash->records.index_list_size)
+ return LZMA_DATA_ERROR;
+
+ // Check if this was the last Record.
+ index_hash->sequence = --index_hash->remaining == 0
+ ? SEQ_PADDING_INIT : SEQ_TOTAL;
+ }
+
+ break;
+ }
+
+ case SEQ_PADDING_INIT:
+ index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
+ index_hash->records.count,
+ index_hash->records.index_list_size)) & 3;
+ index_hash->sequence = SEQ_PADDING;
+
+ // Fall through
+
+ case SEQ_PADDING:
+ if (index_hash->pos > 0) {
+ --index_hash->pos;
+ if (in[(*in_pos)++] != 0x00)
+ return LZMA_DATA_ERROR;
+
+ break;
+ }
+
+ // Compare the sizes.
+ if (index_hash->blocks.total_size
+ != index_hash->records.total_size
+ || index_hash->blocks.uncompressed_size
+ != index_hash->records.uncompressed_size
+ || index_hash->blocks.index_list_size
+ != index_hash->records.index_list_size)
+ return LZMA_DATA_ERROR;
+
+ // Finish the hashes and compare them.
+ lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
+ lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
+ if (memcmp(index_hash->blocks.check.buffer,
+ index_hash->records.check.buffer,
+ lzma_check_sizes[LZMA_CHECK_BEST]) != 0)
+ return LZMA_DATA_ERROR;
+
+ // Finish the CRC32 calculation.
+ index_hash->crc32 = lzma_crc32(in + in_start,
+ *in_pos - in_start, index_hash->crc32);
+
+ index_hash->sequence = SEQ_CRC32;
+
+ // Fall through
+
+ case SEQ_CRC32:
+ do {
+ if (*in_pos == in_size)
+ return LZMA_OK;
+
+ if (((index_hash->crc32 >> (index_hash->pos * 8))
+ & 0xFF) != in[(*in_pos)++])
+ return LZMA_DATA_ERROR;
+
+ } while (++index_hash->pos < 4);
+
+ return LZMA_STREAM_END;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+out:
+ // Update the CRC32,
+ index_hash->crc32 = lzma_crc32(in + in_start,
+ *in_pos - in_start, index_hash->crc32);
+
+ return ret;
+}