blob: 687663850d17292a4cddd807ab0fb86f9c47b35c [file] [log] [blame]
///////////////////////////////////////////////////////////////////////////////
//
/// \file match_c.h
/// \brief Template for different match finders
///
/// This file is included by hc3.c, hc4, bt2.c, bt3.c and bt4.c. Each file
/// sets slighly different #defines, resulting the different match finders.
//
// Copyright (C) 1999-2006 Igor Pavlov
// Copyright (C) 2007 Lasse Collin
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////
//////////////
// Includes //
//////////////
#include "check.h"
///////////////
// Constants //
///////////////
#define START_MAX_LEN 1
#ifdef HASH_ARRAY_2
# define NUM_HASH_DIRECT_BYTES 0
# define HASH_2_SIZE (1 << 10)
# ifdef HASH_ARRAY_3
# define NUM_HASH_BYTES 4
# define HASH_3_SIZE (1 << 16)
# define HASH_3_OFFSET HASH_2_SIZE
# define FIX_HASH_SIZE (HASH_2_SIZE + HASH_3_SIZE)
# else
# define NUM_HASH_BYTES 3
# define FIX_HASH_SIZE HASH_2_SIZE
# endif
# define HASH_SIZE 0
# define MIN_MATCH_CHECK NUM_HASH_BYTES
#else
# define NUM_HASH_DIRECT_BYTES 2
# define NUM_HASH_BYTES 2
# define HASH_SIZE (1 << (8 * NUM_HASH_BYTES))
# define MIN_MATCH_CHECK (NUM_HASH_BYTES + 1)
# define FIX_HASH_SIZE 0
#endif
////////////
// Macros //
////////////
#ifdef HASH_ARRAY_2
# ifdef HASH_ARRAY_3
# define HASH_CALC() \
do { \
const uint32_t temp = lzma_crc32_table[0][ \
cur[0]] ^ cur[1]; \
hash_2_value = temp & (HASH_2_SIZE - 1); \
hash_3_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
& (HASH_3_SIZE - 1); \
hash_value = (temp ^ ((uint32_t)(cur[2]) << 8) \
^ (lzma_crc32_table[0][cur[3]] << 5)) \
& lz->hash_mask; \
} while (0)
# else
# define HASH_CALC() \
do { \
const uint32_t temp = lzma_crc32_table[0][ \
cur[0]] ^ cur[1]; \
hash_2_value = temp & (HASH_2_SIZE - 1); \
hash_value = (temp ^ ((uint32_t)(cur[2]) << 8)) \
& lz->hash_mask; \
} while (0)
# endif
#else
# define HASH_CALC() hash_value = cur[0] ^ ((uint32_t)(cur[1]) << 8)
#endif
// Moves the current read position forward by one byte. In LZMA SDK,
// CLZInWindow::MovePos() can read more input data if needed, because of
// the callback style API. In liblzma we must have ensured earlier, that
// there is enough data available in lz->buffer.
#define move_pos() \
do { \
if (++lz->cyclic_buffer_pos == lz->cyclic_buffer_size) \
lz->cyclic_buffer_pos = 0; \
++lz->read_pos; \
assert(lz->read_pos <= lz->write_pos); \
if (lz->read_pos == MAX_VAL_FOR_NORMALIZE) \
lzma_lz_encoder_normalize(lz); \
} while (0)
//////////////////////
// Global constants //
//////////////////////
LZMA_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = HASH_SIZE;
LZMA_FIX_HASH_SIZE(LZMA_MATCH_FINDER_NAME_UPPER) = FIX_HASH_SIZE;
///////////////////
// API functions //
///////////////////
LZMA_GET_MATCHES(LZMA_MATCH_FINDER_NAME_LOWER)
{
uint32_t len_limit;
if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
len_limit = lz->match_max_len;
} else {
assert(lz->stream_end_was_reached);
len_limit = lz->write_pos - lz->read_pos;
if (len_limit < MIN_MATCH_CHECK) {
distances[0] = 0;
move_pos();
return;
}
}
int32_t offset = 1;
const uint32_t match_min_pos
= lz->read_pos + lz->offset > lz->cyclic_buffer_size
? lz->read_pos + lz->offset - lz->cyclic_buffer_size
: 0;
const uint8_t *cur = lz->buffer + lz->read_pos;
uint32_t max_len = START_MAX_LEN; // to avoid items for len < hash_size
#ifdef HASH_ARRAY_2
uint32_t hash_2_value;
# ifdef HASH_ARRAY_3
uint32_t hash_3_value;
# endif
#endif
uint32_t hash_value;
HASH_CALC();
uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
#ifdef HASH_ARRAY_2
uint32_t cur_match2 = lz->hash[hash_2_value];
# ifdef HASH_ARRAY_3
uint32_t cur_match3 = lz->hash[HASH_3_OFFSET + hash_3_value];
# endif
lz->hash[hash_2_value] = lz->read_pos + lz->offset;
if (cur_match2 > match_min_pos) {
if (lz->buffer[cur_match2 - lz->offset] == cur[0]) {
max_len = 2;
distances[offset++] = 2;
distances[offset++] = lz->read_pos + lz->offset
- cur_match2 - 1;
}
}
# ifdef HASH_ARRAY_3
lz->hash[HASH_3_OFFSET + hash_3_value] = lz->read_pos + lz->offset;
if (cur_match3 > match_min_pos) {
if (lz->buffer[cur_match3 - lz->offset] == cur[0]) {
if (cur_match3 == cur_match2)
offset -= 2;
max_len = 3;
distances[offset++] = 3;
distances[offset++] = lz->read_pos + lz->offset
- cur_match3 - 1;
cur_match2 = cur_match3;
}
}
# endif
if (offset != 1 && cur_match2 == cur_match) {
offset -= 2;
max_len = START_MAX_LEN;
}
#endif
lz->hash[FIX_HASH_SIZE + hash_value] = lz->read_pos + lz->offset;
#ifdef IS_HASH_CHAIN
lz->son[lz->cyclic_buffer_pos] = cur_match;
#else
uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
uint32_t len0 = NUM_HASH_DIRECT_BYTES;
uint32_t len1 = NUM_HASH_DIRECT_BYTES;
#endif
#if NUM_HASH_DIRECT_BYTES != 0
if (cur_match > match_min_pos) {
if (lz->buffer[cur_match + NUM_HASH_DIRECT_BYTES - lz->offset]
!= cur[NUM_HASH_DIRECT_BYTES]) {
max_len = NUM_HASH_DIRECT_BYTES;
distances[offset++] = NUM_HASH_DIRECT_BYTES;
distances[offset++] = lz->read_pos + lz->offset
- cur_match - 1;
}
}
#endif
uint32_t count = lz->cut_value;
while (true) {
if (cur_match <= match_min_pos || count-- == 0) {
#ifndef IS_HASH_CHAIN
*ptr0 = EMPTY_HASH_VALUE;
*ptr1 = EMPTY_HASH_VALUE;
#endif
break;
}
const uint32_t delta = lz->read_pos + lz->offset - cur_match;
const uint32_t cyclic_pos = delta <= lz->cyclic_buffer_pos
? lz->cyclic_buffer_pos - delta
: lz->cyclic_buffer_pos - delta
+ lz->cyclic_buffer_size;
uint32_t *pair = lz->son +
#ifdef IS_HASH_CHAIN
cyclic_pos;
#else
(cyclic_pos << 1);
#endif
const uint8_t *pb = lz->buffer + cur_match - lz->offset;
uint32_t len =
#ifdef IS_HASH_CHAIN
NUM_HASH_DIRECT_BYTES;
if (pb[max_len] == cur[max_len])
#else
MIN(len0, len1);
#endif
if (pb[len] == cur[len]) {
while (++len != len_limit)
if (pb[len] != cur[len])
break;
if (max_len < len) {
max_len = len;
distances[offset++] = len;
distances[offset++] = delta - 1;
if (len == len_limit) {
#ifndef IS_HASH_CHAIN
*ptr1 = pair[0];
*ptr0 = pair[1];
#endif
break;
}
}
}
#ifdef IS_HASH_CHAIN
cur_match = *pair;
#else
if (pb[len] < cur[len]) {
*ptr1 = cur_match;
ptr1 = pair + 1;
cur_match = *ptr1;
len1 = len;
} else {
*ptr0 = cur_match;
ptr0 = pair;
cur_match = *ptr0;
len0 = len;
}
#endif
}
distances[0] = offset - 1;
move_pos();
return;
}
LZMA_SKIP(LZMA_MATCH_FINDER_NAME_LOWER)
{
do {
#ifdef IS_HASH_CHAIN
if (lz->write_pos - lz->read_pos < NUM_HASH_BYTES) {
move_pos();
continue;
}
#else
uint32_t len_limit;
if (lz->read_pos + lz->match_max_len <= lz->write_pos) {
len_limit = lz->match_max_len;
} else {
assert(lz->stream_end_was_reached == true);
len_limit = lz->write_pos - lz->read_pos;
if (len_limit < MIN_MATCH_CHECK) {
move_pos();
continue;
}
}
const uint32_t match_min_pos
= lz->read_pos + lz->offset > lz->cyclic_buffer_size
? lz->read_pos + lz->offset - lz->cyclic_buffer_size
: 0;
#endif
const uint8_t *cur = lz->buffer + lz->read_pos;
#ifdef HASH_ARRAY_2
uint32_t hash_2_value;
# ifdef HASH_ARRAY_3
uint32_t hash_3_value;
uint32_t hash_value;
HASH_CALC();
lz->hash[HASH_3_OFFSET + hash_3_value]
= lz->read_pos + lz->offset;
# else
uint32_t hash_value;
HASH_CALC();
# endif
lz->hash[hash_2_value] = lz->read_pos + lz->offset;
#else
uint32_t hash_value;
HASH_CALC();
#endif
uint32_t cur_match = lz->hash[FIX_HASH_SIZE + hash_value];
lz->hash[FIX_HASH_SIZE + hash_value]
= lz->read_pos + lz->offset;
#ifdef IS_HASH_CHAIN
lz->son[lz->cyclic_buffer_pos] = cur_match;
#else
uint32_t *ptr0 = lz->son + (lz->cyclic_buffer_pos << 1) + 1;
uint32_t *ptr1 = lz->son + (lz->cyclic_buffer_pos << 1);
uint32_t len0 = NUM_HASH_DIRECT_BYTES;
uint32_t len1 = NUM_HASH_DIRECT_BYTES;
uint32_t count = lz->cut_value;
while (true) {
if (cur_match <= match_min_pos || count-- == 0) {
*ptr0 = EMPTY_HASH_VALUE;
*ptr1 = EMPTY_HASH_VALUE;
break;
}
const uint32_t delta = lz->read_pos
+ lz->offset - cur_match;
const uint32_t cyclic_pos
= delta <= lz->cyclic_buffer_pos
? lz->cyclic_buffer_pos - delta
: lz->cyclic_buffer_pos - delta
+ lz->cyclic_buffer_size;
uint32_t *pair = lz->son + (cyclic_pos << 1);
const uint8_t *pb = lz->buffer + cur_match
- lz->offset;
uint32_t len = MIN(len0, len1);
if (pb[len] == cur[len]) {
while (++len != len_limit)
if (pb[len] != cur[len])
break;
if (len == len_limit) {
*ptr1 = pair[0];
*ptr0 = pair[1];
break;
}
}
if (pb[len] < cur[len]) {
*ptr1 = cur_match;
ptr1 = pair + 1;
cur_match = *ptr1;
len1 = len;
} else {
*ptr0 = cur_match;
ptr0 = pair;
cur_match = *ptr0;
len0 = len;
}
}
#endif
move_pos();
} while (--num != 0);
return;
}