/////////////////////////////////////////////////////////////////////////////// // /// \file match_c.h /// \brief Template for different match finders /// /// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file /// sets slighly different #defines, resulting the different match finders. // // Copyright (C) 1999-2006 Igor Pavlov // Copyright (C) 2007 Lasse Collin // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // /////////////////////////////////////////////////////////////////////////////// ////////////// // Includes // ////////////// #include "check.h" /////////////// // Constants // /////////////// #define START_MAX_LEN 1 #ifdef HASH_ARRAY_2 # define NUM_HASH_DIRECT_BYTES 0 # define HASH_2_SIZE (1 << 10) # ifdef HASH_ARRAY_3 # define NUM_HASH_BYTES 4 # define HASH_3_SIZE (1 << 16) # define HASH_3_OFFSET HASH_2_SIZE # define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE) # else # define NUM_HASH_BYTES 3 # define FIX_HASH_SIZE HASH_2_SIZE # endif # define HASH_SIZE 0 # define MIN_MATCH_CHECK NUM_HASH_BYTES #else # define NUM_HASH_DIRECT_BYTES 2 # define NUM_HASH_BYTES 2 # define HASH_SIZE (1 << (8 * NUM_HASH_BYTES)) # define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1) # define FIX_HASH_SIZE 0 #endif //////////// // Macros // //////////// #ifdef HASH_ARRAY_2 # ifdef HASH_ARRAY_3 # define HASH_CALC() \ do { \ const uint32_t temp = lzma_crc32_table[0][ \ cur[0]] ^ cur[1]; \ hash_2_value = temp & (HASH_2_SIZE - 1); \ hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ & (HASH_3_SIZE - 1); \ hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \ ^ (lzma_crc32_table[0][cur[3]] << 5)) \ & lz->hash_mask; \ } while (0) # else # define HASH_CALC() \ do { \ const uint32_t temp = lzma_crc32_table[0][ \ cur[0]] ^ cur[1]; \ hash_2_value = temp & (HASH_2_SIZE - 1); \ hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \ & lz->hash_mask; \ } while (0) # endif #else # define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8) #endif // Moves the current read position forward by one byte. In LZMA SDK, // CLZInWindow::MovePos() can read more input data if needed, because of // the callback style API. In liblzma we must have ensured earlier, that // there is enough data available in lz->buffer. #define move_pos() \ do { \ if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \ lz->cyclic_buffer_pos = 0; \ ++lz->read_pos; \ assert(lz->read_pos <= lz->write_pos); \ if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \ lzma_lz_encoder_normalize(lz); \ } while (0) ////////////////////// // Global constants // ////////////////////// LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE; LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE; /////////////////// // API functions // /////////////////// LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER) { uint32_t len_limit; if (lz->read_pos + lz->match_max_len <= lz->write_pos) { len_limit = lz->match_max_len; } else { len_limit = lz->write_pos - lz->read_pos; if (len_limit < MIN_MATCH_CHECK) { distances[0] = 0; move_pos(); return; } } int32_t offset = 1; const uint32_t match_min_pos = lz->read_pos + lz->offset > lz->cyclic_buffer_size ? lz->read_pos + lz->offset - lz->cyclic_buffer_size : 0; const uint8_t *cur = lz->buffer + lz->read_pos; uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size #ifdef HASH_ARRAY_2 uint32_t hash_2_value; # ifdef HASH_ARRAY_3 uint32_t hash_3_value; # endif #endif uint32_t hash_value; HASH_CALC(); uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; #ifdef HASH_ARRAY_2 uint32_t cur_match2 = lz->hash[hash_2_value]; # ifdef HASH_ARRAY_3 uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value]; # endif lz->hash[hash_2_value] = lz->read_pos + lz->offset; if (cur_match2 > match_min_pos) { if (lz->buffer[cur_match2 - lz->offset] == cur[0]) { max_len = 2; distances[offset++] = 2; distances[offset++] = lz->read_pos + lz->offset - cur_match2 - 1; } } # ifdef HASH_ARRAY_3 lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset; if (cur_match3 > match_min_pos) { if (lz->buffer[cur_match3 - lz->offset] == cur[0]) { if (cur_match3 == cur_match2) offset -= 2; max_len = 3; distances[offset++] = 3; distances[offset++] = lz->read_pos + lz->offset - cur_match3 - 1; cur_match2 = cur_match3; } } # endif if (offset != 1 && cur_match2 == cur_match) { offset -= 2; max_len = START_MAX_LEN; } #endif lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset; #ifdef IS_HASH_CHAIN lz->son[lz->cyclic_buffer_pos] = cur_match; #else uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); uint32_t len0 = NUM_HASH_DIRECT_BYTES; uint32_t len1 = NUM_HASH_DIRECT_BYTES; #endif #if NUM_HASH_DIRECT_BYTES != 0 if (cur_match > match_min_pos) { if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset] != cur[NUM_HASH_DIRECT_BYTES]) { max_len = NUM_HASH_DIRECT_BYTES; distances[offset++] = NUM_HASH_DIRECT_BYTES; distances[offset++] = lz->read_pos + lz->offset - cur_match - 1; } } #endif uint32_t count = lz->cut_value; while (true) { if (cur_match <= match_min_pos || count-- == 0) { #ifndef IS_HASH_CHAIN *ptr0 = EMPTY_HASH_VALUE; *ptr1 = EMPTY_HASH_VALUE; #endif break; } const uint32_t delta = lz->read_pos + lz->offset - cur_match; const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos ? lz->cyclic_buffer_pos - delta : lz->cyclic_buffer_pos - delta + lz->cyclic_buffer_size; uint32_t *pair = lz->son + #ifdef IS_HASH_CHAIN cyclic_pos; #else (cyclic_pos << 1); #endif const uint8_t *pb = lz->buffer + cur_match - lz->offset; uint32_t len = #ifdef IS_HASH_CHAIN NUM_HASH_DIRECT_BYTES; if (pb[max_len] == cur[max_len]) #else MIN(len0, len1); #endif if (pb[len] == cur[len]) { while (++len != len_limit) if (pb[len] != cur[len]) break; if (max_len < len) { max_len = len; distances[offset++] = len; distances[offset++] = delta - 1; if (len == len_limit) { #ifndef IS_HASH_CHAIN *ptr1 = pair[0]; *ptr0 = pair[1]; #endif break; } } } #ifdef IS_HASH_CHAIN cur_match = *pair; #else if (pb[len] < cur[len]) { *ptr1 = cur_match; ptr1 = pair + 1; cur_match = *ptr1; len1 = len; } else { *ptr0 = cur_match; ptr0 = pair; cur_match = *ptr0; len0 = len; } #endif } distances[0] = offset - 1; move_pos(); return; } LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER) { do { #ifdef IS_HASH_CHAIN if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) { move_pos(); continue; } #else uint32_t len_limit; if (lz->read_pos + lz->match_max_len <= lz->write_pos) { len_limit = lz->match_max_len; } else { len_limit = lz->write_pos - lz->read_pos; if (len_limit < MIN_MATCH_CHECK) { move_pos(); continue; } } const uint32_t match_min_pos = lz->read_pos + lz->offset > lz->cyclic_buffer_size ? lz->read_pos + lz->offset - lz->cyclic_buffer_size : 0; #endif const uint8_t *cur = lz->buffer + lz->read_pos; #ifdef HASH_ARRAY_2 uint32_t hash_2_value; # ifdef HASH_ARRAY_3 uint32_t hash_3_value; uint32_t hash_value; HASH_CALC(); lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset; # else uint32_t hash_value; HASH_CALC(); # endif lz->hash[hash_2_value] = lz->read_pos + lz->offset; #else uint32_t hash_value; HASH_CALC(); #endif uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value]; lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset; #ifdef IS_HASH_CHAIN lz->son[lz->cyclic_buffer_pos] = cur_match; #else uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1; uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1); uint32_t len0 = NUM_HASH_DIRECT_BYTES; uint32_t len1 = NUM_HASH_DIRECT_BYTES; uint32_t count = lz->cut_value; while (true) { if (cur_match <= match_min_pos || count-- == 0) { *ptr0 = EMPTY_HASH_VALUE; *ptr1 = EMPTY_HASH_VALUE; break; } const uint32_t delta = lz->read_pos + lz->offset - cur_match; const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos ? lz->cyclic_buffer_pos - delta : lz->cyclic_buffer_pos - delta + lz->cyclic_buffer_size; uint32_t *pair = lz->son + (cyclic_pos << 1); const uint8_t *pb = lz->buffer + cur_match - lz->offset; uint32_t len = MIN(len0, len1); if (pb[len] == cur[len]) { while (++len != len_limit) if (pb[len] != cur[len]) break; if (len == len_limit) { *ptr1 = pair[0]; *ptr0 = pair[1]; break; } } if (pb[len] < cur[len]) { *ptr1 = cur_match; ptr1 = pair + 1; cur_match = *ptr1; len1 = len; } else { *ptr0 = cur_match; ptr0 = pair; cur_match = *ptr0; len0 = len; } } #endif move_pos(); } while (--num != 0); return; }