| // SPDX-License-Identifier: 0BSD |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| // |
| /// \file test_microlzma.c |
| /// \brief Tests MicroLZMA encoding and decoding |
| // |
| // Author: Jia Tan |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #include "tests.h" |
| |
| #define BUFFER_SIZE 1024 |
| |
| |
| #ifdef HAVE_ENCODER_LZMA1 |
| |
| // MicroLZMA encoded "Hello\nWorld\n" output size in bytes. |
| #define ENCODED_OUTPUT_SIZE 17 |
| |
| // Byte array of "Hello\nWorld\n". This is used for various encoder tests. |
| static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, |
| 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A }; |
| |
| // This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n". |
| // The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0. |
| // This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe |
| // considering the encoded "Hello\nWorld\n" can successfully be decoded at |
| // this time. This is to test for regressions that cause MicroLZMA output |
| // to change. |
| static const uint32_t hello_world_encoded_crc = 0x3CDE40A8; |
| |
| |
| // Function implementation borrowed from lzma_decoder.c. It is needed to |
| // ensure the first byte of a MicroLZMA stream is set correctly with the |
| // negation of the LZMA properties. |
| static bool |
| lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) |
| { |
| if (byte > (4 * 5 + 4) * 9 + 8) |
| return true; |
| |
| // See the file format specification to understand this. |
| options->pb = byte / (9 * 5); |
| byte -= options->pb * 9 * 5; |
| options->lp = byte / 9; |
| options->lc = byte - options->lp * 9; |
| |
| return options->lc + options->lp > LZMA_LCLP_MAX; |
| } |
| |
| |
| /////////////////// |
| // Encoder tests // |
| /////////////////// |
| |
| // This tests a few of the basic options. These options are not unique to |
| // MicroLZMA in any way, its mostly ensuring that the options are actually |
| // being checked before initializing the decoder internals. |
| static void |
| test_encode_options(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| lzma_options_lzma opt_lzma; |
| |
| // Initialize with default options. |
| assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); |
| |
| // NULL stream |
| assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma), |
| LZMA_PROG_ERROR); |
| |
| // lc/lp/pb = 5/0/2 (lc invalid) |
| opt_lzma.lc = 5; |
| opt_lzma.lp = 0; |
| opt_lzma.pb = 2; |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OPTIONS_ERROR); |
| |
| // lc/lp/pb = 0/5/2 (lp invalid) |
| opt_lzma.lc = 0; |
| opt_lzma.lp = 5; |
| opt_lzma.pb = 2; |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OPTIONS_ERROR); |
| |
| // lc/lp/pb = 3/2/2 (lc + lp invalid) |
| opt_lzma.lc = 3; |
| opt_lzma.lp = 2; |
| opt_lzma.pb = 2; |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OPTIONS_ERROR); |
| |
| // lc/lp/pb = 3/0/5 (pb invalid) |
| opt_lzma.lc = 3; |
| opt_lzma.lp = 0; |
| opt_lzma.pb = 5; |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OPTIONS_ERROR); |
| |
| // Zero out lp, pb, lc options to not interfere with later tests. |
| opt_lzma.lp = 0; |
| opt_lzma.pb = 0; |
| opt_lzma.lc = 0; |
| |
| // Set invalid dictionary size. |
| opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1; |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OPTIONS_ERROR); |
| |
| // Maximum dictionary size for the encoder, as described in lzma12.h |
| // is 1.5 GiB. |
| opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1; |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OPTIONS_ERROR); |
| |
| lzma_end(&strm); |
| } |
| |
| |
| static void |
| test_encode_basic(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| lzma_options_lzma opt_lzma; |
| |
| // The lzma_lzma_preset return value is inverse of what it perhaps |
| // should be, that is, it returns false on success. |
| assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); |
| |
| // Initialize the encoder using the default options. |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = hello_world; |
| strm.avail_in = sizeof(hello_world); |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| // Everything must be encoded in one lzma_code() call. |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); |
| |
| // Check that the entire input was consumed. |
| assert_uint_eq(strm.total_in, sizeof(hello_world)); |
| |
| // Check that the first byte in the output stream is not 0x00. |
| // In a regular raw LZMA stream the first byte is always 0x00. |
| // In MicroLZMA the first byte replaced by the bitwise-negation |
| // of the LZMA properties. |
| assert_uint(output[0], !=, 0x00); |
| |
| const uint8_t props = ~output[0]; |
| |
| lzma_options_lzma test_options; |
| assert_false(lzma_lzma_lclppb_decode(&test_options, props)); |
| |
| assert_uint_eq(opt_lzma.lc, test_options.lc); |
| assert_uint_eq(opt_lzma.lp, test_options.lp); |
| assert_uint_eq(opt_lzma.pb, test_options.pb); |
| |
| // Compute the check over the output data. This is compared to |
| // the expected check value. |
| const uint32_t check_val = lzma_crc32(output, strm.total_out, 0); |
| |
| assert_uint_eq(check_val, hello_world_encoded_crc); |
| |
| lzma_end(&strm); |
| } |
| |
| |
| // This tests the behavior when strm.avail_out is so small it cannot hold |
| // the header plus 1 encoded byte (< 6). |
| static void |
| test_encode_small_out(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| lzma_options_lzma opt_lzma; |
| |
| assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); |
| |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = hello_world; |
| strm.avail_in = sizeof(hello_world); |
| strm.next_out = output; |
| strm.avail_out = 5; |
| |
| // LZMA_PROG_ERROR is expected when strm.avail_out < 6 |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR); |
| |
| // The encoder must be reset because coders cannot be used again |
| // after returning LZMA_PROG_ERROR. |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); |
| |
| // Reset strm.avail_out to be > 6, but not enough to hold all of the |
| // compressed data. |
| strm.avail_out = ENCODED_OUTPUT_SIZE - 1; |
| |
| // Encoding should not return an error now. |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); |
| assert_uint(strm.total_in, <, sizeof(hello_world)); |
| |
| lzma_end(&strm); |
| } |
| |
| |
| // LZMA_FINISH is the only supported action. All others must |
| // return LZMA_PROG_ERROR. |
| static void |
| test_encode_actions(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| lzma_options_lzma opt_lzma; |
| |
| assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); |
| |
| const lzma_action actions[] = { |
| LZMA_RUN, |
| LZMA_SYNC_FLUSH, |
| LZMA_FULL_FLUSH, |
| LZMA_FULL_BARRIER, |
| }; |
| |
| for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) { |
| assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), |
| LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = hello_world; |
| strm.avail_in = sizeof(hello_world); |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| assert_lzma_ret(lzma_code(&strm, actions[i]), |
| LZMA_PROG_ERROR); |
| } |
| |
| lzma_end(&strm); |
| } |
| #endif // HAVE_ENCODER_LZMA1 |
| |
| |
| /////////////////// |
| // Decoder tests // |
| /////////////////// |
| |
| #if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1) |
| |
| // Byte array of "Goodbye World!". This is used for various decoder tests. |
| static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62, |
| 0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 }; |
| |
| static uint8_t *goodbye_world_encoded = NULL; |
| static size_t goodbye_world_encoded_size = 0; |
| |
| |
| // Helper function to encode data and return the compressed size. |
| static size_t |
| basic_microlzma_encode(const uint8_t *input, size_t in_size, |
| uint8_t **compressed) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| lzma_options_lzma opt_lzma; |
| |
| // Lazy way to set the output size since the input should never |
| // inflate by much in these simple test cases. This is tested to |
| // be large enough after encoding to fit the entire input, so if |
| // this assumption does not hold then this will fail. |
| const size_t out_size = in_size << 1; |
| |
| *compressed = tuktest_malloc(out_size); |
| |
| // Always encode with the default options for simplicity. |
| if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)) |
| goto decoder_setup_error; |
| |
| if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK) |
| goto decoder_setup_error; |
| |
| strm.next_in = input; |
| strm.avail_in = in_size; |
| strm.next_out = *compressed; |
| strm.avail_out = out_size; |
| |
| if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END) |
| goto decoder_setup_error; |
| |
| // Check that the entire input was consumed and that it fit into |
| // the output buffer. |
| if (strm.total_in != in_size) |
| goto decoder_setup_error; |
| |
| lzma_end(&strm); |
| |
| // lzma_end() doesn't touch other members of lzma_stream than |
| // lzma_stream.internal so using strm.total_out here is fine. |
| return strm.total_out; |
| |
| decoder_setup_error: |
| tuktest_error("Failed to initialize decoder tests"); |
| return 0; |
| } |
| |
| |
| static void |
| test_decode_options(void) |
| { |
| // NULL stream |
| assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE, |
| sizeof(hello_world), true, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR); |
| |
| // Uncompressed size larger than max |
| lzma_stream strm = LZMA_STREAM_INIT; |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE, |
| LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT), |
| LZMA_OPTIONS_ERROR); |
| } |
| |
| |
| // Test that decoding succeeds when uncomp_size is correct regardless of |
| // the value of uncomp_size_is_exact. |
| static void |
| test_decode_uncomp_size_is_exact(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| sizeof(goodbye_world), true, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END); |
| assert_uint_eq(strm.total_in, goodbye_world_encoded_size); |
| |
| assert_uint_eq(strm.total_out, sizeof(goodbye_world)); |
| assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); |
| |
| // Reset decoder with uncomp_size_is_exact set to false and |
| // uncomp_size set to correct value. Also test using the |
| // uncompressed size as the dictionary size. |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| sizeof(goodbye_world), false, |
| sizeof(goodbye_world)), LZMA_OK); |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END); |
| assert_uint_eq(strm.total_in, goodbye_world_encoded_size); |
| |
| assert_uint_eq(strm.total_out, sizeof(goodbye_world)); |
| assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); |
| |
| lzma_end(&strm); |
| } |
| |
| |
| // This tests decoding when MicroLZMA decoder is called with |
| // an incorrect uncompressed size. |
| static void |
| test_decode_uncomp_size_wrong(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| sizeof(goodbye_world) + 1, false, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| // LZMA_OK should be returned because the input size given was |
| // larger than the actual encoded size. The decoder is expecting |
| // more input to possibly fill the uncompressed size that was set. |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); |
| |
| assert_uint_eq(strm.total_out, sizeof(goodbye_world)); |
| |
| assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); |
| |
| // Next, test with uncomp_size_is_exact set. |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| sizeof(goodbye_world) + 1, true, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| // No error detected, even though all input was consumed and there |
| // is more room in the output buffer. |
| // |
| // FIXME? LZMA_FINISH tells that no more input is coming and |
| // the MicroLZMA decoder knows the exact compressed size from |
| // the initialization as well. So should it return LZMA_DATA_ERROR |
| // on the first call instead of relying on the generic lzma_code() |
| // logic to eventually get LZMA_BUF_ERROR? |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_BUF_ERROR); |
| |
| assert_uint_eq(strm.total_out, sizeof(goodbye_world)); |
| assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); |
| |
| // Reset stream with uncomp_size smaller than the real |
| // uncompressed size. |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| ARRAY_SIZE(hello_world) - 1, true, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| // This case actually results in an error since it decodes the full |
| // uncompressed size but the range coder is not in the proper state |
| // for the stream to end. |
| assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR); |
| |
| lzma_end(&strm); |
| } |
| |
| |
| static void |
| test_decode_comp_size_wrong(void) |
| { |
| lzma_stream strm = LZMA_STREAM_INIT; |
| |
| // goodbye_world_encoded_size + 1 is safe because extra space was |
| // allocated for goodbye_world_encoded. The extra space isn't |
| // initialized but it shouldn't be read either, thus Valgrind |
| // has to remain happy with this code. |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size + 1, |
| sizeof(goodbye_world), true, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| // When uncomp_size_is_exact is set, the compressed size must be |
| // correct or else LZMA_DATA_ERROR is returned. |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR); |
| |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size + 1, |
| sizeof(goodbye_world), false, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| strm.next_in = goodbye_world_encoded; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| // When uncomp_size_is_exact is not set, the decoder does not |
| // detect when the compressed size is wrong as long as all of the |
| // expected output has been decoded. This is because the decoder |
| // assumes that the real uncompressed size might be bigger than |
| // the specified value and in that case more input might be needed |
| // as well. |
| assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); |
| |
| lzma_end(&strm); |
| } |
| |
| |
| static void |
| test_decode_bad_lzma_properties(void) |
| { |
| // Alter first byte to encode invalid LZMA properties. |
| uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size); |
| memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size); |
| |
| // lc=3, lp=2, pb=2 |
| compressed[0] = (uint8_t)~0x6FU; |
| |
| lzma_stream strm = LZMA_STREAM_INIT; |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| sizeof(goodbye_world), false, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| uint8_t output[BUFFER_SIZE]; |
| |
| strm.next_in = compressed; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR); |
| |
| // Use valid, but incorrect LZMA properties. |
| // lc=3, lp=1, pb=2 |
| compressed[0] = (uint8_t)~0x66; |
| |
| assert_lzma_ret(lzma_microlzma_decoder(&strm, |
| goodbye_world_encoded_size, |
| ARRAY_SIZE(goodbye_world), true, |
| LZMA_DICT_SIZE_DEFAULT), LZMA_OK); |
| |
| strm.next_in = compressed; |
| strm.avail_in = goodbye_world_encoded_size; |
| strm.next_out = output; |
| strm.avail_out = sizeof(output); |
| |
| assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR); |
| |
| lzma_end(&strm); |
| } |
| #endif |
| |
| |
| extern int |
| main(int argc, char **argv) |
| { |
| tuktest_start(argc, argv); |
| |
| #ifndef HAVE_ENCODER_LZMA1 |
| tuktest_early_skip("LZMA1 encoder disabled"); |
| #else |
| tuktest_run(test_encode_options); |
| tuktest_run(test_encode_basic); |
| tuktest_run(test_encode_small_out); |
| tuktest_run(test_encode_actions); |
| |
| // MicroLZMA decoder tests require the basic encoder functionality. |
| # ifdef HAVE_DECODER_LZMA1 |
| goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world, |
| sizeof(goodbye_world), &goodbye_world_encoded); |
| |
| tuktest_run(test_decode_options); |
| tuktest_run(test_decode_uncomp_size_is_exact); |
| tuktest_run(test_decode_uncomp_size_wrong); |
| tuktest_run(test_decode_comp_size_wrong); |
| tuktest_run(test_decode_bad_lzma_properties); |
| # endif |
| |
| return tuktest_end(); |
| #endif |
| } |