aboutsummaryrefslogblamecommitdiff
path: root/src/liblzma/lz/match_c.h
blob: e1ee6a078eaf90dd7a4e1a7c65d14bb59e968a4b (plain) (tree)



























































































































                                                                               
















































































































































































                                                                               

































































































                                                                            
///////////////////////////////////////////////////////////////////////////////
//
/// \file       match_c.h
/// \brief      Template for different match finders
///
/// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
/// sets slighly different #defines, resulting the different match finders.
//
//  Copyright (C) 1999-2006 Igor Pavlov
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

//////////////
// Includes //
//////////////

#include "check.h"


///////////////
// Constants //
///////////////

#define START_MAX_LEN 1

#ifdef HASH_ARRAY_2
#	define NUM_HASH_DIRECT_BYTES 0
#	define HASH_2_SIZE (1 << 10)
#	ifdef HASH_ARRAY_3
#		define NUM_HASH_BYTES 4
#		define HASH_3_SIZE (1 << 16)
#		define HASH_3_OFFSET HASH_2_SIZE
#		define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
#	else
#		define NUM_HASH_BYTES 3
#		define FIX_HASH_SIZE HASH_2_SIZE
#	endif
#	define HASH_SIZE 0
#	define MIN_MATCH_CHECK NUM_HASH_BYTES
#else
#	define NUM_HASH_DIRECT_BYTES 2
#	define NUM_HASH_BYTES 2
#	define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
#	define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
#	define FIX_HASH_SIZE 0
#endif


////////////
// Macros //
////////////

#ifdef HASH_ARRAY_2
#	ifdef HASH_ARRAY_3
#		define HASH_CALC() \
		do { \
			const uint32_t temp = lzma_crc32_table[0][ \
					cur[0]] ^ cur[1]; \
			hash_2_value = temp & (HASH_2_SIZE - 1); \
			hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
					& (HASH_3_SIZE - 1); \
			hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
					^ (lzma_crc32_table[0][cur[3]] << 5)) \
					& lz->hash_mask; \
		} while (0)
#	else
#		define HASH_CALC() \
		do { \
			const uint32_t temp = lzma_crc32_table[0][ \
					cur[0]] ^ cur[1]; \
			hash_2_value = temp & (HASH_2_SIZE - 1); \
			hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
					& lz->hash_mask; \
		} while (0)
#	endif
#else
#	define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
#endif


// Moves the current read position forward by one byte. In LZMA SDK,
// CLZInWindow::MovePos() can read more input data if needed, because of
// the callback style API. In liblzma we must have ensured earlier, that
// there is enough data available in lz->buffer.
#define move_pos() \
do { \
	if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
		lz->cyclic_buffer_pos = 0; \
	++lz->read_pos; \
	assert(lz->read_pos <= lz->write_pos); \
	if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
		lzma_lz_encoder_normalize(lz); \
} while (0)


//////////////////////
// Global constants //
//////////////////////

LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;


///////////////////
// API functions //
///////////////////

LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
{
	uint32_t len_limit;
	if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
		len_limit = lz->match_max_len;
	} else {
		len_limit = lz->write_pos - lz->read_pos;
		if (len_limit < MIN_MATCH_CHECK) {
			distances[0] = 0;
			move_pos();
			return;
		}
	}

	int32_t offset = 1;
	const uint32_t match_min_pos
			= lz->read_pos + lz->offset > lz->cyclic_buffer_size
			? lz->read_pos + lz->offset - lz->cyclic_buffer_size
			: 0;
	const uint8_t *cur = lz->buffer + lz->read_pos;
	uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size

#ifdef HASH_ARRAY_2
	uint32_t hash_2_value;
#	ifdef HASH_ARRAY_3
	uint32_t hash_3_value;
#	endif
#endif
	uint32_t hash_value;
	HASH_CALC();

	uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
#ifdef HASH_ARRAY_2
	uint32_t cur_match2 = lz->hash[hash_2_value];
#	ifdef HASH_ARRAY_3
	uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
#	endif
	lz->hash[hash_2_value] = lz->read_pos + lz->offset;

	if (cur_match2 > match_min_pos) {
		if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
			max_len = 2;
			distances[offset++] = 2;
			distances[offset++] = lz->read_pos + lz->offset
					- cur_match2 - 1;
		}
	}

#	ifdef HASH_ARRAY_3
	lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
	if (cur_match3 > match_min_pos) {
		if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
			if (cur_match3 == cur_match2)
				offset -= 2;

			max_len = 3;
			distances[offset++] = 3;
			distances[offset++] = lz->read_pos + lz->offset
					- cur_match3 - 1;
			cur_match2 = cur_match3;
		}
	}
#	endif

	if (offset != 1 && cur_match2 == cur_match) {
		offset -= 2;
		max_len = START_MAX_LEN;
	}
#endif

	lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;

#ifdef IS_HASH_CHAIN
	lz->son[lz->cyclic_buffer_pos] = cur_match;
#else
	uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
	uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);

	uint32_t len0 = NUM_HASH_DIRECT_BYTES;
	uint32_t len1 = NUM_HASH_DIRECT_BYTES;
#endif

#if NUM_HASH_DIRECT_BYTES != 0
	if (cur_match > match_min_pos) {
		if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
				!= cur[NUM_HASH_DIRECT_BYTES]) {
			max_len = NUM_HASH_DIRECT_BYTES;
			distances[offset++] = NUM_HASH_DIRECT_BYTES;
			distances[offset++] = lz->read_pos + lz->offset
					- cur_match - 1;
		}
	}
#endif

	uint32_t count = lz->cut_value;

	while (true) {
		if (cur_match <= match_min_pos || count-- == 0) {
#ifndef IS_HASH_CHAIN
			*ptr0 = EMPTY_HASH_VALUE;
			*ptr1 = EMPTY_HASH_VALUE;
#endif
			break;
		}

		const uint32_t delta = lz->read_pos + lz->offset - cur_match;
		const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
				? lz->cyclic_buffer_pos - delta
				: lz->cyclic_buffer_pos - delta
					+ lz->cyclic_buffer_size;
		uint32_t *pair = lz->son +
#ifdef IS_HASH_CHAIN
				cyclic_pos;
#else
				(cyclic_pos << 1);
#endif

		const uint8_t *pb = lz->buffer + cur_match - lz->offset;
		uint32_t len =
#ifdef IS_HASH_CHAIN
				NUM_HASH_DIRECT_BYTES;
		if (pb[max_len] == cur[max_len])
#else
				MIN(len0, len1);
#endif

		if (pb[len] == cur[len]) {
			while (++len != len_limit)
				if (pb[len] != cur[len])
					break;

			if (max_len < len) {
				max_len = len;
				distances[offset++] = len;
				distances[offset++] = delta - 1;
				if (len == len_limit) {
#ifndef IS_HASH_CHAIN
					*ptr1 = pair[0];
					*ptr0 = pair[1];
#endif
					break;
				}
			}
		}

#ifdef IS_HASH_CHAIN
		cur_match = *pair;
#else
		if (pb[len] < cur[len]) {
			*ptr1 = cur_match;
			ptr1 = pair + 1;
			cur_match = *ptr1;
			len1 = len;
		} else {
			*ptr0 = cur_match;
			ptr0 = pair;
			cur_match = *ptr0;
			len0 = len;
		}
#endif
	}

	distances[0] = offset - 1;

	move_pos();

	return;
}


LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
{
	do {
#ifdef IS_HASH_CHAIN
		if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
			move_pos();
			continue;
		}
#else
		uint32_t len_limit;
		if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
			len_limit = lz->match_max_len;
		} else {
			len_limit = lz->write_pos - lz->read_pos;
			if (len_limit < MIN_MATCH_CHECK) {
				move_pos();
				continue;
			}
		}
		const uint32_t match_min_pos
			= lz->read_pos + lz->offset > lz->cyclic_buffer_size
			? lz->read_pos + lz->offset - lz->cyclic_buffer_size
			: 0;
#endif

		const uint8_t *cur = lz->buffer + lz->read_pos;

#ifdef HASH_ARRAY_2
		uint32_t hash_2_value;
#	ifdef HASH_ARRAY_3
		uint32_t hash_3_value;
		uint32_t hash_value;
		HASH_CALC();
		lz->hash[HASH_3_OFFSET + hash_3_value]
				= lz->read_pos + lz->offset;
#	else
		uint32_t hash_value;
		HASH_CALC();
#	endif
		lz->hash[hash_2_value] = lz->read_pos + lz->offset;
#else
		uint32_t hash_value;
		HASH_CALC();
#endif

		uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
		lz->hash[FIX_HASH_SIZE + hash_value]
				= lz->read_pos + lz->offset;

#ifdef IS_HASH_CHAIN
		lz->son[lz->cyclic_buffer_pos] = cur_match;
#else
		uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
		uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);

		uint32_t len0 = NUM_HASH_DIRECT_BYTES;
		uint32_t len1 = NUM_HASH_DIRECT_BYTES;
		uint32_t count = lz->cut_value;

		while (true) {
			if (cur_match <= match_min_pos || count-- == 0) {
				*ptr0 = EMPTY_HASH_VALUE;
				*ptr1 = EMPTY_HASH_VALUE;
				break;
			}

			const uint32_t delta = lz->read_pos
					+ lz->offset - cur_match;
			const uint32_t cyclic_pos
					= delta <= lz->cyclic_buffer_pos
					? lz->cyclic_buffer_pos - delta
					: lz->cyclic_buffer_pos - delta
						+ lz->cyclic_buffer_size;
			uint32_t *pair = lz->son + (cyclic_pos << 1);

			const uint8_t *pb = lz->buffer + cur_match
					- lz->offset;
			uint32_t len = MIN(len0, len1);

			if (pb[len] == cur[len]) {
				while (++len != len_limit)
					if (pb[len] != cur[len])
						break;

				if (len == len_limit) {
					*ptr1 = pair[0];
					*ptr0 = pair[1];
					break;
				}
			}

			if (pb[len] < cur[len]) {
				*ptr1 = cur_match;
				ptr1 = pair + 1;
				cur_match = *ptr1;
				len1 = len;
			} else {
				*ptr0 = cur_match;
				ptr0 = pair;
				cur_match = *ptr0;
				len0 = len;
			}
		}
#endif

		move_pos();

	} while (--num != 0);

	return;
}