| /////////////////////////////////////////////////////////////////////////////// |
| // |
| /// \file lzma_encoder.c |
| /// \brief LZMA encoder |
| // |
| // Copyright (C) 1999-2006 Igor Pavlov |
| // Copyright (C) 2007 Lasse Collin |
| // |
| // This library is free software; you can redistribute it and/or |
| // modify it under the terms of the GNU Lesser General Public |
| // License as published by the Free Software Foundation; either |
| // version 2.1 of the License, or (at your option) any later version. |
| // |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| // Lesser General Public License for more details. |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| // NOTE: If you want to keep the line length in 80 characters, set |
| // tab width to 4 or less in your editor when editing this file. |
| |
| |
| #include "lzma_encoder_private.h" |
| |
| |
| //////////// |
| // Macros // |
| //////////// |
| |
| // These are as macros mostly because they use local range encoder variables. |
| |
| #define literal_encode(subcoder, symbol) \ |
| do { \ |
| uint32_t context = 1; \ |
| int i = 8; \ |
| do { \ |
| --i; \ |
| const uint32_t bit = ((symbol) >> i) & 1; \ |
| bit_encode(subcoder[context], bit); \ |
| context = (context << 1) | bit; \ |
| } while (i != 0); \ |
| } while (0) |
| |
| |
| #define literal_encode_matched(subcoder, match_byte, symbol) \ |
| do { \ |
| uint32_t context = 1; \ |
| int i = 8; \ |
| do { \ |
| --i; \ |
| uint32_t bit = ((symbol) >> i) & 1; \ |
| const uint32_t match_bit = ((match_byte) >> i) & 1; \ |
| const uint32_t subcoder_index = 0x100 + (match_bit << 8) + context; \ |
| bit_encode(subcoder[subcoder_index], bit); \ |
| context = (context << 1) | bit; \ |
| if (match_bit != bit) { \ |
| while (i != 0) { \ |
| --i; \ |
| bit = ((symbol) >> i) & 1; \ |
| bit_encode(subcoder[context], bit); \ |
| context = (context << 1) | bit; \ |
| } \ |
| break; \ |
| } \ |
| } while (i != 0); \ |
| } while (0) |
| |
| |
| #define length_encode(length_encoder, symbol, pos_state, update_price) \ |
| do { \ |
| \ |
| if ((symbol) < LEN_LOW_SYMBOLS) { \ |
| bit_encode_0((length_encoder).choice); \ |
| bittree_encode((length_encoder).low[pos_state], \ |
| LEN_LOW_BITS, symbol); \ |
| } else { \ |
| bit_encode_1((length_encoder).choice); \ |
| if ((symbol) < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS) { \ |
| bit_encode_0((length_encoder).choice2); \ |
| bittree_encode((length_encoder).mid[pos_state], \ |
| LEN_MID_BITS, \ |
| (symbol) - LEN_LOW_SYMBOLS); \ |
| } else { \ |
| bit_encode_1((length_encoder).choice2); \ |
| bittree_encode((length_encoder).high, LEN_HIGH_BITS, \ |
| (symbol) - LEN_LOW_SYMBOLS \ |
| - LEN_MID_SYMBOLS); \ |
| } \ |
| } \ |
| if (update_price) \ |
| if (--(length_encoder).counters[pos_state] == 0) \ |
| lzma_length_encoder_update_table(&(length_encoder), pos_state); \ |
| } while (0) |
| |
| |
| /////////////// |
| // Functions // |
| /////////////// |
| |
| /// \brief Updates price table of the length encoder |
| /// |
| /// All all the other prices in LZMA, these are used by lzma_get_optimum(). |
| /// |
| extern void |
| lzma_length_encoder_update_table(lzma_length_encoder *lencoder, |
| const uint32_t pos_state) |
| { |
| const uint32_t num_symbols = lencoder->table_size; |
| const uint32_t a0 = bit_get_price_0(lencoder->choice); |
| const uint32_t a1 = bit_get_price_1(lencoder->choice); |
| const uint32_t b0 = a1 + bit_get_price_0(lencoder->choice2); |
| const uint32_t b1 = a1 + bit_get_price_1(lencoder->choice2); |
| |
| uint32_t *prices = lencoder->prices[pos_state]; |
| uint32_t i = 0; |
| |
| for (i = 0; i < num_symbols && i < LEN_LOW_SYMBOLS; ++i) { |
| prices[i] = a0; |
| bittree_get_price(prices[i], lencoder->low[pos_state], |
| LEN_LOW_BITS, i); |
| } |
| |
| for (; i < num_symbols && i < LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; ++i) { |
| prices[i] = b0; |
| bittree_get_price(prices[i], lencoder->mid[pos_state], |
| LEN_MID_BITS, i - LEN_LOW_SYMBOLS); |
| } |
| |
| for (; i < num_symbols; ++i) { |
| prices[i] = b1; |
| bittree_get_price(prices[i], lencoder->high, LEN_HIGH_BITS, |
| i - LEN_LOW_SYMBOLS - LEN_MID_SYMBOLS); |
| } |
| |
| lencoder->counters[pos_state] = num_symbols; |
| |
| return; |
| } |
| |
| |
| /** |
| * \brief LZMA encoder |
| * |
| * \return true if end of stream was reached, false otherwise. |
| */ |
| extern bool |
| lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, |
| size_t *restrict out_pos, size_t out_size) |
| { |
| // Flush the range encoder's temporary buffer to out[]. |
| // Return immediatelly if not everything could be flushed. |
| if (rc_flush_buffer(&coder->rc, out, out_pos, out_size)) |
| return false; |
| |
| // Return immediatelly if we have already finished our work. |
| if (coder->lz.stream_end_was_reached |
| && coder->is_initialized |
| && coder->lz.read_pos == coder->lz.write_pos |
| && coder->additional_offset == 0) |
| return true; |
| |
| // Local copies |
| rc_to_local(coder->rc); |
| size_t out_pos_local = *out_pos; |
| const uint32_t pos_mask = coder->pos_mask; |
| const bool best_compression = coder->best_compression; |
| |
| // Initialize the stream if no data has been encoded yet. |
| if (!coder->is_initialized) { |
| if (coder->lz.read_pos == coder->lz.read_limit) { |
| // Cannot initialize, because there is no input data. |
| if (!coder->lz.stream_end_was_reached) |
| return false; |
| |
| // If we get here, we are encoding an empty file. |
| // Initialization is skipped completely. |
| assert(coder->lz.write_pos == coder->lz.read_pos); |
| |
| } else { |
| // Do the actual initialization. |
| uint32_t len; |
| uint32_t num_distance_pairs; |
| lzma_read_match_distances(coder, &len, &num_distance_pairs); |
| |
| bit_encode_0(coder->is_match[coder->state][0]); |
| update_char(coder->state); |
| |
| const uint8_t cur_byte = coder->lz.buffer[ |
| coder->lz.read_pos - coder->additional_offset]; |
| probability *subcoder = literal_get_subcoder(coder->literal_coder, |
| coder->now_pos, coder->previous_byte); |
| literal_encode(subcoder, cur_byte); |
| |
| coder->previous_byte = cur_byte; |
| --coder->additional_offset; |
| ++coder->now_pos; |
| |
| assert(coder->additional_offset == 0); |
| } |
| |
| // Initialization is done (except if empty file). |
| coder->is_initialized = true; |
| } |
| |
| // Encoding loop |
| while (true) { |
| // Check that there is free output space. |
| if (out_pos_local == out_size) |
| break; |
| |
| assert(rc_buffer_size == 0); |
| |
| // Check that there is some input to process. |
| if (coder->lz.read_pos >= coder->lz.read_limit) { |
| // If end of input has been reached, we must keep |
| // encoding until additional_offset becomes zero. |
| if (!coder->lz.stream_end_was_reached |
| || coder->additional_offset == 0) |
| break; |
| } |
| |
| assert(coder->lz.read_pos <= coder->lz.write_pos); |
| |
| #ifndef NDEBUG |
| if (coder->lz.stream_end_was_reached) { |
| assert(coder->lz.read_limit == coder->lz.write_pos); |
| } else { |
| assert(coder->lz.read_limit + coder->lz.keep_size_after |
| == coder->lz.write_pos); |
| } |
| #endif |
| |
| const uint32_t pos_state = coder->now_pos & pos_mask; |
| |
| uint32_t pos; |
| uint32_t len; |
| |
| // Get optimal match (repeat position and length). |
| // Value ranges for pos: |
| // - [0, REP_DISTANCES): repeated match |
| // - [REP_DISTANCES, UINT32_MAX): match at (pos - REP_DISTANCES) |
| // - UINT32_MAX: not a match but a literal |
| // Value ranges for len: |
| // - [MATCH_MIN_LEN, MATCH_MAX_LEN] |
| if (best_compression) |
| lzma_get_optimum(coder, &pos, &len); |
| else |
| lzma_get_optimum_fast(coder, &pos, &len); |
| |
| if (len == 1 && pos == UINT32_MAX) { |
| // It's a literal. |
| bit_encode_0(coder->is_match[coder->state][pos_state]); |
| |
| const uint8_t cur_byte = coder->lz.buffer[ |
| coder->lz.read_pos - coder->additional_offset]; |
| probability *subcoder = literal_get_subcoder(coder->literal_coder, |
| coder->now_pos, coder->previous_byte); |
| |
| if (is_char_state(coder->state)) { |
| literal_encode(subcoder, cur_byte); |
| } else { |
| const uint8_t match_byte = coder->lz.buffer[ |
| coder->lz.read_pos |
| - coder->rep_distances[0] - 1 |
| - coder->additional_offset]; |
| literal_encode_matched(subcoder, match_byte, cur_byte); |
| } |
| |
| update_char(coder->state); |
| coder->previous_byte = cur_byte; |
| |
| } else { |
| // It's a match. |
| bit_encode_1(coder->is_match[coder->state][pos_state]); |
| |
| if (pos < REP_DISTANCES) { |
| // It's a repeated match i.e. the same distance |
| // has been used earlier. |
| bit_encode_1(coder->is_rep[coder->state]); |
| |
| if (pos == 0) { |
| bit_encode_0(coder->is_rep0[coder->state]); |
| const uint32_t symbol = (len == 1) ? 0 : 1; |
| bit_encode(coder->is_rep0_long[coder->state][pos_state], |
| symbol); |
| } else { |
| const uint32_t distance = coder->rep_distances[pos]; |
| bit_encode_1(coder->is_rep0[coder->state]); |
| |
| if (pos == 1) { |
| bit_encode_0(coder->is_rep1[coder->state]); |
| } else { |
| bit_encode_1(coder->is_rep1[coder->state]); |
| bit_encode(coder->is_rep2[coder->state], pos - 2); |
| |
| if (pos == 3) |
| coder->rep_distances[3] = coder->rep_distances[2]; |
| |
| coder->rep_distances[2] = coder->rep_distances[1]; |
| } |
| |
| coder->rep_distances[1] = coder->rep_distances[0]; |
| coder->rep_distances[0] = distance; |
| } |
| |
| if (len == 1) { |
| update_short_rep(coder->state); |
| } else { |
| length_encode(coder->rep_match_len_encoder, |
| len - MATCH_MIN_LEN, pos_state, |
| best_compression); |
| update_rep(coder->state); |
| } |
| |
| } else { |
| bit_encode_0(coder->is_rep[coder->state]); |
| update_match(coder->state); |
| length_encode(coder->len_encoder, len - MATCH_MIN_LEN, |
| pos_state, best_compression); |
| pos -= REP_DISTANCES; |
| |
| const uint32_t pos_slot = get_pos_slot(pos); |
| const uint32_t len_to_pos_state = get_len_to_pos_state(len); |
| bittree_encode(coder->pos_slot_encoder[len_to_pos_state], |
| POS_SLOT_BITS, pos_slot); |
| |
| if (pos_slot >= START_POS_MODEL_INDEX) { |
| const uint32_t footer_bits = (pos_slot >> 1) - 1; |
| const uint32_t base = (2 | (pos_slot & 1)) << footer_bits; |
| const uint32_t pos_reduced = pos - base; |
| |
| if (pos_slot < END_POS_MODEL_INDEX) { |
| bittree_reverse_encode( |
| coder->pos_encoders + base - pos_slot - 1, |
| footer_bits, pos_reduced); |
| } else { |
| rc_encode_direct_bits(pos_reduced >> ALIGN_BITS, |
| footer_bits - ALIGN_BITS); |
| bittree_reverse_encode(coder->pos_align_encoder, |
| ALIGN_BITS, pos_reduced & ALIGN_MASK); |
| ++coder->align_price_count; |
| } |
| } |
| |
| coder->rep_distances[3] = coder->rep_distances[2]; |
| coder->rep_distances[2] = coder->rep_distances[1]; |
| coder->rep_distances[1] = coder->rep_distances[0]; |
| coder->rep_distances[0] = pos; |
| ++coder->match_price_count; |
| } |
| |
| coder->previous_byte = coder->lz.buffer[ |
| coder->lz.read_pos + len - 1 |
| - coder->additional_offset]; |
| } |
| |
| assert(coder->additional_offset >= len); |
| coder->additional_offset -= len; |
| coder->now_pos += len; |
| } |
| |
| // Check if everything is done. |
| bool all_done = false; |
| if (coder->lz.stream_end_was_reached |
| && coder->lz.read_pos == coder->lz.write_pos |
| && coder->additional_offset == 0) { |
| // Write end of stream marker. It is encoded as a match with |
| // distance of UINT32_MAX. Match length is needed but it is |
| // ignored by the decoder. |
| if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { |
| const uint32_t pos_state = coder->now_pos & pos_mask; |
| bit_encode_1(coder->is_match[coder->state][pos_state]); |
| bit_encode_0(coder->is_rep[coder->state]); |
| update_match(coder->state); |
| |
| const uint32_t len = MATCH_MIN_LEN; // MATCH_MAX_LEN; |
| length_encode(coder->len_encoder, len - MATCH_MIN_LEN, |
| pos_state, best_compression); |
| |
| const uint32_t pos_slot = (1 << POS_SLOT_BITS) - 1; |
| const uint32_t len_to_pos_state = get_len_to_pos_state(len); |
| bittree_encode(coder->pos_slot_encoder[len_to_pos_state], |
| POS_SLOT_BITS, pos_slot); |
| |
| const uint32_t footer_bits = 30; |
| const uint32_t pos_reduced |
| = (UINT32_C(1) << footer_bits) - 1; |
| rc_encode_direct_bits(pos_reduced >> ALIGN_BITS, |
| footer_bits - ALIGN_BITS); |
| |
| bittree_reverse_encode(coder->pos_align_encoder, ALIGN_BITS, |
| pos_reduced & ALIGN_MASK); |
| } |
| |
| // Flush the last bytes of compressed data from |
| // the range coder to the output buffer. |
| rc_flush(); |
| |
| // All done. Note that some output bytes might be |
| // pending in coder->buffer. lzma_encode() will |
| // take care of those bytes. |
| if (rc_buffer_size == 0) |
| all_done = true; |
| } |
| |
| // Store local variables back to *coder. |
| rc_from_local(coder->rc); |
| *out_pos = out_pos_local; |
| |
| return all_done; |
| } |