| /////////////////////////////////////////////////////////////////////////////// |
| // |
| /// \file lzma2_encoder.c |
| /// \brief LZMA2 encoder |
| /// |
| // Authors: Igor Pavlov |
| // Lasse Collin |
| // |
| // This file has been put into the public domain. |
| // You can do whatever you want with this file. |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #include "lz_encoder.h" |
| #include "lzma_encoder.h" |
| #include "fastpos.h" |
| #include "lzma2_encoder.h" |
| |
| |
| struct lzma_coder_s { |
| enum { |
| SEQ_INIT, |
| SEQ_LZMA_ENCODE, |
| SEQ_LZMA_COPY, |
| SEQ_UNCOMPRESSED_HEADER, |
| SEQ_UNCOMPRESSED_COPY, |
| } sequence; |
| |
| /// LZMA encoder |
| lzma_coder *lzma; |
| |
| /// LZMA options currently in use. |
| lzma_options_lzma opt_cur; |
| |
| bool need_properties; |
| bool need_state_reset; |
| bool need_dictionary_reset; |
| |
| /// Uncompressed size of a chunk |
| size_t uncompressed_size; |
| |
| /// Compressed size of a chunk (excluding headers); this is also used |
| /// to indicate the end of buf[] in SEQ_LZMA_COPY. |
| size_t compressed_size; |
| |
| /// Read position in buf[] |
| size_t buf_pos; |
| |
| /// Buffer to hold the chunk header and LZMA compressed data |
| uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; |
| }; |
| |
| |
| static void |
| lzma2_header_lzma(lzma_coder *coder) |
| { |
| assert(coder->uncompressed_size > 0); |
| assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); |
| assert(coder->compressed_size > 0); |
| assert(coder->compressed_size <= LZMA2_CHUNK_MAX); |
| |
| size_t pos; |
| |
| if (coder->need_properties) { |
| pos = 0; |
| |
| if (coder->need_dictionary_reset) |
| coder->buf[pos] = 0x80 + (3 << 5); |
| else |
| coder->buf[pos] = 0x80 + (2 << 5); |
| } else { |
| pos = 1; |
| |
| if (coder->need_state_reset) |
| coder->buf[pos] = 0x80 + (1 << 5); |
| else |
| coder->buf[pos] = 0x80; |
| } |
| |
| // Set the start position for copying. |
| coder->buf_pos = pos; |
| |
| // Uncompressed size |
| size_t size = coder->uncompressed_size - 1; |
| coder->buf[pos++] += size >> 16; |
| coder->buf[pos++] = (size >> 8) & 0xFF; |
| coder->buf[pos++] = size & 0xFF; |
| |
| // Compressed size |
| size = coder->compressed_size - 1; |
| coder->buf[pos++] = size >> 8; |
| coder->buf[pos++] = size & 0xFF; |
| |
| // Properties, if needed |
| if (coder->need_properties) |
| lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); |
| |
| coder->need_properties = false; |
| coder->need_state_reset = false; |
| coder->need_dictionary_reset = false; |
| |
| // The copying code uses coder->compressed_size to indicate the end |
| // of coder->buf[], so we need add the maximum size of the header here. |
| coder->compressed_size += LZMA2_HEADER_MAX; |
| |
| return; |
| } |
| |
| |
| static void |
| lzma2_header_uncompressed(lzma_coder *coder) |
| { |
| assert(coder->uncompressed_size > 0); |
| assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); |
| |
| // If this is the first chunk, we need to include dictionary |
| // reset indicator. |
| if (coder->need_dictionary_reset) |
| coder->buf[0] = 1; |
| else |
| coder->buf[0] = 2; |
| |
| coder->need_dictionary_reset = false; |
| |
| // "Compressed" size |
| coder->buf[1] = (coder->uncompressed_size - 1) >> 8; |
| coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; |
| |
| // Set the start position for copying. |
| coder->buf_pos = 0; |
| return; |
| } |
| |
| |
| static lzma_ret |
| lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf, |
| uint8_t *restrict out, size_t *restrict out_pos, |
| size_t out_size) |
| { |
| while (*out_pos < out_size) |
| switch (coder->sequence) { |
| case SEQ_INIT: |
| // If there's no input left and we are flushing or finishing, |
| // don't start a new chunk. |
| if (mf_unencoded(mf) == 0) { |
| // Write end of payload marker if finishing. |
| if (mf->action == LZMA_FINISH) |
| out[(*out_pos)++] = 0; |
| |
| return mf->action == LZMA_RUN |
| ? LZMA_OK : LZMA_STREAM_END; |
| } |
| |
| if (coder->need_state_reset) |
| return_if_error(lzma_lzma_encoder_reset( |
| coder->lzma, &coder->opt_cur)); |
| |
| coder->uncompressed_size = 0; |
| coder->compressed_size = 0; |
| coder->sequence = SEQ_LZMA_ENCODE; |
| |
| // Fall through |
| |
| case SEQ_LZMA_ENCODE: { |
| // Calculate how much more uncompressed data this chunk |
| // could accept. |
| const uint32_t left = LZMA2_UNCOMPRESSED_MAX |
| - coder->uncompressed_size; |
| uint32_t limit; |
| |
| if (left < mf->match_len_max) { |
| // Must flush immediately since the next LZMA symbol |
| // could make the uncompressed size of the chunk too |
| // big. |
| limit = 0; |
| } else { |
| // Calculate maximum read_limit that is OK from point |
| // of view of LZMA2 chunk size. |
| limit = mf->read_pos - mf->read_ahead |
| + left - mf->match_len_max; |
| } |
| |
| // Save the start position so that we can update |
| // coder->uncompressed_size. |
| const uint32_t read_start = mf->read_pos - mf->read_ahead; |
| |
| // Call the LZMA encoder until the chunk is finished. |
| const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, |
| coder->buf + LZMA2_HEADER_MAX, |
| &coder->compressed_size, |
| LZMA2_CHUNK_MAX, limit); |
| |
| coder->uncompressed_size += mf->read_pos - mf->read_ahead |
| - read_start; |
| |
| assert(coder->compressed_size <= LZMA2_CHUNK_MAX); |
| assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); |
| |
| if (ret != LZMA_STREAM_END) |
| return LZMA_OK; |
| |
| // See if the chunk compressed. If it didn't, we encode it |
| // as uncompressed chunk. This saves a few bytes of space |
| // and makes decoding faster. |
| if (coder->compressed_size >= coder->uncompressed_size) { |
| coder->uncompressed_size += mf->read_ahead; |
| assert(coder->uncompressed_size |
| <= LZMA2_UNCOMPRESSED_MAX); |
| mf->read_ahead = 0; |
| lzma2_header_uncompressed(coder); |
| coder->need_state_reset = true; |
| coder->sequence = SEQ_UNCOMPRESSED_HEADER; |
| break; |
| } |
| |
| // The chunk did compress at least by one byte, so we store |
| // the chunk as LZMA. |
| lzma2_header_lzma(coder); |
| |
| coder->sequence = SEQ_LZMA_COPY; |
| } |
| |
| // Fall through |
| |
| case SEQ_LZMA_COPY: |
| // Copy the compressed chunk along its headers to the |
| // output buffer. |
| lzma_bufcpy(coder->buf, &coder->buf_pos, |
| coder->compressed_size, |
| out, out_pos, out_size); |
| if (coder->buf_pos != coder->compressed_size) |
| return LZMA_OK; |
| |
| coder->sequence = SEQ_INIT; |
| break; |
| |
| case SEQ_UNCOMPRESSED_HEADER: |
| // Copy the three-byte header to indicate uncompressed chunk. |
| lzma_bufcpy(coder->buf, &coder->buf_pos, |
| LZMA2_HEADER_UNCOMPRESSED, |
| out, out_pos, out_size); |
| if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) |
| return LZMA_OK; |
| |
| coder->sequence = SEQ_UNCOMPRESSED_COPY; |
| |
| // Fall through |
| |
| case SEQ_UNCOMPRESSED_COPY: |
| // Copy the uncompressed data as is from the dictionary |
| // to the output buffer. |
| mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); |
| if (coder->uncompressed_size != 0) |
| return LZMA_OK; |
| |
| coder->sequence = SEQ_INIT; |
| break; |
| } |
| |
| return LZMA_OK; |
| } |
| |
| |
| static void |
| lzma2_encoder_end(lzma_coder *coder, const lzma_allocator *allocator) |
| { |
| lzma_free(coder->lzma, allocator); |
| lzma_free(coder, allocator); |
| return; |
| } |
| |
| |
| static lzma_ret |
| lzma2_encoder_options_update(lzma_coder *coder, const lzma_filter *filter) |
| { |
| // New options can be set only when there is no incomplete chunk. |
| // This is the case at the beginning of the raw stream and right |
| // after LZMA_SYNC_FLUSH. |
| if (filter->options == NULL || coder->sequence != SEQ_INIT) |
| return LZMA_PROG_ERROR; |
| |
| // Look if there are new options. At least for now, |
| // only lc/lp/pb can be changed. |
| const lzma_options_lzma *opt = filter->options; |
| if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp |
| || coder->opt_cur.pb != opt->pb) { |
| // Validate the options. |
| if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX |
| || opt->lc + opt->lp > LZMA_LCLP_MAX |
| || opt->pb > LZMA_PB_MAX) |
| return LZMA_OPTIONS_ERROR; |
| |
| // The new options will be used when the encoder starts |
| // a new LZMA2 chunk. |
| coder->opt_cur.lc = opt->lc; |
| coder->opt_cur.lp = opt->lp; |
| coder->opt_cur.pb = opt->pb; |
| coder->need_properties = true; |
| coder->need_state_reset = true; |
| } |
| |
| return LZMA_OK; |
| } |
| |
| |
| static lzma_ret |
| lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, |
| const void *options, lzma_lz_options *lz_options) |
| { |
| if (options == NULL) |
| return LZMA_PROG_ERROR; |
| |
| if (lz->coder == NULL) { |
| lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); |
| if (lz->coder == NULL) |
| return LZMA_MEM_ERROR; |
| |
| lz->code = &lzma2_encode; |
| lz->end = &lzma2_encoder_end; |
| lz->options_update = &lzma2_encoder_options_update; |
| |
| lz->coder->lzma = NULL; |
| } |
| |
| lz->coder->opt_cur = *(const lzma_options_lzma *)(options); |
| |
| lz->coder->sequence = SEQ_INIT; |
| lz->coder->need_properties = true; |
| lz->coder->need_state_reset = false; |
| lz->coder->need_dictionary_reset |
| = lz->coder->opt_cur.preset_dict == NULL |
| || lz->coder->opt_cur.preset_dict_size == 0; |
| |
| // Initialize LZMA encoder |
| return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, |
| &lz->coder->opt_cur, lz_options)); |
| |
| // Make sure that we will always have enough history available in |
| // case we need to use uncompressed chunks. They are used when the |
| // compressed size of a chunk is not smaller than the uncompressed |
| // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes |
| // history available. |
| if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) |
| lz_options->before_size |
| = LZMA2_CHUNK_MAX - lz_options->dict_size; |
| |
| return LZMA_OK; |
| } |
| |
| |
| extern lzma_ret |
| lzma_lzma2_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, |
| const lzma_filter_info *filters) |
| { |
| return lzma_lz_encoder_init( |
| next, allocator, filters, &lzma2_encoder_init); |
| } |
| |
| |
| extern uint64_t |
| lzma_lzma2_encoder_memusage(const void *options) |
| { |
| const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); |
| if (lzma_mem == UINT64_MAX) |
| return UINT64_MAX; |
| |
| return sizeof(lzma_coder) + lzma_mem; |
| } |
| |
| |
| extern lzma_ret |
| lzma_lzma2_props_encode(const void *options, uint8_t *out) |
| { |
| const lzma_options_lzma *const opt = options; |
| uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN); |
| |
| // Round up to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending |
| // on which one is the next: |
| --d; |
| d |= d >> 2; |
| d |= d >> 3; |
| d |= d >> 4; |
| d |= d >> 8; |
| d |= d >> 16; |
| |
| // Get the highest two bits using the proper encoding: |
| if (d == UINT32_MAX) |
| out[0] = 40; |
| else |
| out[0] = get_dist_slot(d + 1) - 24; |
| |
| return LZMA_OK; |
| } |
| |
| |
| extern uint64_t |
| lzma_lzma2_block_size(const void *options) |
| { |
| const lzma_options_lzma *const opt = options; |
| |
| // Use at least 1 MiB to keep compression ratio better. |
| return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20); |
| } |