| /////////////////////////////////////////////////////////////////////////////// |
| // |
| /// \file string_conversion.c |
| /// \brief Conversion of strings to filter chain and vice versa |
| // |
| // Author: Lasse Collin |
| // |
| // This file has been put into the public domain. |
| // You can do whatever you want with this file. |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #include "filter_common.h" |
| |
| |
| ///////////////////// |
| // String building // |
| ///////////////////// |
| |
| /// How much memory to allocate for strings. For now, no realloc is used |
| /// so this needs to be big enough even though there of course is |
| /// an overflow check still. |
| /// |
| /// FIXME? Using a fixed size is wasteful if the application doesn't free |
| /// the string fairly quickly but this can be improved later if needed. |
| #define STR_ALLOC_SIZE 800 |
| |
| |
| typedef struct { |
| char *buf; |
| size_t pos; |
| } lzma_str; |
| |
| |
| static lzma_ret |
| str_init(lzma_str *str, const lzma_allocator *allocator) |
| { |
| str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator); |
| if (str->buf == NULL) |
| return LZMA_MEM_ERROR; |
| |
| str->pos = 0; |
| return LZMA_OK; |
| } |
| |
| |
| static void |
| str_free(lzma_str *str, const lzma_allocator *allocator) |
| { |
| lzma_free(str->buf, allocator); |
| return; |
| } |
| |
| |
| static bool |
| str_is_full(const lzma_str *str) |
| { |
| return str->pos == STR_ALLOC_SIZE - 1; |
| } |
| |
| |
| static lzma_ret |
| str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator) |
| { |
| if (str_is_full(str)) { |
| // The preallocated buffer was too small. |
| // This shouldn't happen as STR_ALLOC_SIZE should |
| // be adjusted if new filters are added. |
| lzma_free(str->buf, allocator); |
| *dest = NULL; |
| assert(0); |
| return LZMA_PROG_ERROR; |
| } |
| |
| str->buf[str->pos] = '\0'; |
| *dest = str->buf; |
| return LZMA_OK; |
| } |
| |
| |
| static void |
| str_append_str(lzma_str *str, const char *s) |
| { |
| const size_t len = strlen(s); |
| const size_t limit = STR_ALLOC_SIZE - 1 - str->pos; |
| const size_t copy_size = my_min(len, limit); |
| |
| memcpy(str->buf + str->pos, s, copy_size); |
| str->pos += copy_size; |
| return; |
| } |
| |
| |
| static void |
| str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix) |
| { |
| if (v == 0) { |
| str_append_str(str, "0"); |
| } else { |
| // NOTE: Don't use plain "B" because xz and the parser in this |
| // file don't support it and at glance it may look like 8 |
| // (there cannot be a space before the suffix). |
| static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" }; |
| |
| size_t suf = 0; |
| if (use_byte_suffix) { |
| while ((v & 1023) == 0 |
| && suf < ARRAY_SIZE(suffixes) - 1) { |
| v >>= 10; |
| ++suf; |
| } |
| } |
| |
| // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember |
| // that initializing to "" initializes all elements to |
| // zero so '\0'-termination gets handled by this. |
| char buf[16] = ""; |
| size_t pos = sizeof(buf) - 1; |
| |
| do { |
| buf[--pos] = '0' + (v % 10); |
| v /= 10; |
| } while (v != 0); |
| |
| str_append_str(str, buf + pos); |
| str_append_str(str, suffixes[suf]); |
| } |
| |
| return; |
| } |
| |
| |
| ////////////////////////////////////////////// |
| // Parsing and stringification declarations // |
| ////////////////////////////////////////////// |
| |
| /// Maximum length for filter and option names. |
| /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes |
| #define NAME_LEN_MAX 11 |
| |
| |
| /// For option_map.flags: Use .u.map to do convert the input value |
| /// to an integer. Without this flag, .u.range.{min,max} are used |
| /// as the allowed range for the integer. |
| #define OPTMAP_USE_NAME_VALUE_MAP 0x01 |
| |
| /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in |
| /// the stringified output if the value is an exact multiple of these. |
| /// This is used e.g. for LZMA1/2 dictionary size. |
| #define OPTMAP_USE_BYTE_SUFFIX 0x02 |
| |
| /// For option_map.flags: If the integer value is zero then this option |
| /// won't be included in the stringified output. It's used e.g. for |
| /// BCJ filter start offset which usually is zero. |
| #define OPTMAP_NO_STRFY_ZERO 0x04 |
| |
| /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0, |
| /// it doesn't need to be specified in the initializers as it is |
| /// the implicit value. |
| enum { |
| OPTMAP_TYPE_UINT32, |
| OPTMAP_TYPE_LZMA_MODE, |
| OPTMAP_TYPE_LZMA_MATCH_FINDER, |
| OPTMAP_TYPE_LZMA_PRESET, |
| }; |
| |
| |
| /// This is for mapping string values in options to integers. |
| /// The last element of an array must have "" as the name. |
| /// It's used e.g. for match finder names in LZMA1/2. |
| typedef struct { |
| const char name[NAME_LEN_MAX + 1]; |
| const uint32_t value; |
| } name_value_map; |
| |
| |
| /// Each filter that has options needs an array of option_map structures. |
| /// The array doesn't need to be terminated as the functions take the |
| /// length of the array as an argument. |
| /// |
| /// When converting a string to filter options structure, option values |
| /// will be handled in a few different ways: |
| /// |
| /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string |
| /// is handled specially. |
| /// |
| /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is |
| /// converted to an integer using the name_value_map pointed by .u.map. |
| /// The last element in .u.map must have .name = "" as the terminator. |
| /// |
| /// (3) Otherwise the string is treated as a non-negative unsigned decimal |
| /// integer which must be in the range set in .u.range. If .flags has |
| /// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed. |
| /// |
| /// The integer value from (2) or (3) is then stored to filter_options |
| /// at the offset specified in .offset using the type specified in .type |
| /// (default is uint32_t). |
| /// |
| /// Stringifying a filter is done by processing a given number of options |
| /// in order from the beginning of an option_map array. The integer is |
| /// read from filter_options at .offset using the type from .type. |
| /// |
| /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the |
| /// option is skipped. |
| /// |
| /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used |
| /// to convert the option to a string. If the map doesn't contain a string |
| /// for the integer value then "UNKNOWN" is used. |
| /// |
| /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is |
| /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB, |
| /// MiB, or GiB suffix is used if the value is an exact multiple of these. |
| /// Plain "B" suffix is never used. |
| typedef struct { |
| char name[NAME_LEN_MAX + 1]; |
| uint8_t type; |
| uint8_t flags; |
| uint16_t offset; |
| |
| union { |
| struct { |
| uint32_t min; |
| uint32_t max; |
| } range; |
| |
| const name_value_map *map; |
| } u; |
| } option_map; |
| |
| |
| static const char *parse_options(const char **const str, const char *str_end, |
| void *filter_options, |
| const option_map *const optmap, const size_t optmap_size); |
| |
| |
| ///////// |
| // BCJ // |
| ///////// |
| |
| #if defined(HAVE_ENCODER_X86) \ |
| || defined(HAVE_DECODER_X86) \ |
| || defined(HAVE_ENCODER_ARM) \ |
| || defined(HAVE_DECODER_ARM) \ |
| || defined(HAVE_ENCODER_ARMTHUMB) \ |
| || defined(HAVE_DECODER_ARMTHUMB) \ |
| || defined(HAVE_ENCODER_ARM64) \ |
| || defined(HAVE_DECODER_ARM64) \ |
| || defined(HAVE_ENCODER_POWERPC) \ |
| || defined(HAVE_DECODER_POWERPC) \ |
| || defined(HAVE_ENCODER_IA64) \ |
| || defined(HAVE_DECODER_IA64) \ |
| || defined(HAVE_ENCODER_SPARC) \ |
| || defined(HAVE_DECODER_SPARC) |
| static const option_map bcj_optmap[] = { |
| { |
| .name = "start", |
| .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX, |
| .offset = offsetof(lzma_options_bcj, start_offset), |
| .u.range.min = 0, |
| .u.range.max = UINT32_MAX, |
| } |
| }; |
| |
| |
| static const char * |
| parse_bcj(const char **const str, const char *str_end, void *filter_options) |
| { |
| // filter_options was zeroed on allocation and that is enough |
| // for the default value. |
| return parse_options(str, str_end, filter_options, |
| bcj_optmap, ARRAY_SIZE(bcj_optmap)); |
| } |
| #endif |
| |
| |
| /////////// |
| // Delta // |
| /////////// |
| |
| #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) |
| static const option_map delta_optmap[] = { |
| { |
| .name = "dist", |
| .offset = offsetof(lzma_options_delta, dist), |
| .u.range.min = LZMA_DELTA_DIST_MIN, |
| .u.range.max = LZMA_DELTA_DIST_MAX, |
| } |
| }; |
| |
| |
| static const char * |
| parse_delta(const char **const str, const char *str_end, void *filter_options) |
| { |
| lzma_options_delta *opts = filter_options; |
| opts->type = LZMA_DELTA_TYPE_BYTE; |
| opts->dist = LZMA_DELTA_DIST_MIN; |
| |
| return parse_options(str, str_end, filter_options, |
| delta_optmap, ARRAY_SIZE(delta_optmap)); |
| } |
| #endif |
| |
| |
| /////////////////// |
| // LZMA1 & LZMA2 // |
| /////////////////// |
| |
| /// Help string for presets |
| #define LZMA12_PRESET_STR "0-9[e]" |
| |
| |
| static const char * |
| parse_lzma12_preset(const char **const str, const char *str_end, |
| uint32_t *preset) |
| { |
| assert(*str < str_end); |
| *preset = (uint32_t)(**str - '0'); |
| |
| // NOTE: Remember to update LZMA12_PRESET_STR if this is modified! |
| while (++*str < str_end) { |
| switch (**str) { |
| case 'e': |
| *preset |= LZMA_PRESET_EXTREME; |
| break; |
| |
| default: |
| return "Unsupported preset flag"; |
| } |
| } |
| |
| return NULL; |
| } |
| |
| |
| static const char * |
| set_lzma12_preset(const char **const str, const char *str_end, |
| void *filter_options) |
| { |
| uint32_t preset; |
| const char *errmsg = parse_lzma12_preset(str, str_end, &preset); |
| if (errmsg != NULL) |
| return errmsg; |
| |
| lzma_options_lzma *opts = filter_options; |
| if (lzma_lzma_preset(opts, preset)) |
| return "Unsupported preset"; |
| |
| return NULL; |
| } |
| |
| |
| static const name_value_map lzma12_mode_map[] = { |
| { "fast", LZMA_MODE_FAST }, |
| { "normal", LZMA_MODE_NORMAL }, |
| { "", 0 } |
| }; |
| |
| |
| static const name_value_map lzma12_mf_map[] = { |
| { "hc3", LZMA_MF_HC3 }, |
| { "hc4", LZMA_MF_HC4 }, |
| { "bt2", LZMA_MF_BT2 }, |
| { "bt3", LZMA_MF_BT3 }, |
| { "bt4", LZMA_MF_BT4 }, |
| { "", 0 } |
| }; |
| |
| |
| static const option_map lzma12_optmap[] = { |
| { |
| .name = "preset", |
| .type = OPTMAP_TYPE_LZMA_PRESET, |
| }, { |
| .name = "dict", |
| .flags = OPTMAP_USE_BYTE_SUFFIX, |
| .offset = offsetof(lzma_options_lzma, dict_size), |
| .u.range.min = LZMA_DICT_SIZE_MIN, |
| // FIXME? The max is really max for encoding but decoding |
| // would allow 4 GiB - 1 B. |
| .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29), |
| }, { |
| .name = "lc", |
| .offset = offsetof(lzma_options_lzma, lc), |
| .u.range.min = LZMA_LCLP_MIN, |
| .u.range.max = LZMA_LCLP_MAX, |
| }, { |
| .name = "lp", |
| .offset = offsetof(lzma_options_lzma, lp), |
| .u.range.min = LZMA_LCLP_MIN, |
| .u.range.max = LZMA_LCLP_MAX, |
| }, { |
| .name = "pb", |
| .offset = offsetof(lzma_options_lzma, pb), |
| .u.range.min = LZMA_PB_MIN, |
| .u.range.max = LZMA_PB_MAX, |
| }, { |
| .name = "mode", |
| .type = OPTMAP_TYPE_LZMA_MODE, |
| .flags = OPTMAP_USE_NAME_VALUE_MAP, |
| .offset = offsetof(lzma_options_lzma, mode), |
| .u.map = lzma12_mode_map, |
| }, { |
| .name = "nice", |
| .offset = offsetof(lzma_options_lzma, nice_len), |
| .u.range.min = 2, |
| .u.range.max = 273, |
| }, { |
| .name = "mf", |
| .type = OPTMAP_TYPE_LZMA_MATCH_FINDER, |
| .flags = OPTMAP_USE_NAME_VALUE_MAP, |
| .offset = offsetof(lzma_options_lzma, mf), |
| .u.map = lzma12_mf_map, |
| }, { |
| .name = "depth", |
| .offset = offsetof(lzma_options_lzma, depth), |
| .u.range.min = 0, |
| .u.range.max = UINT32_MAX, |
| } |
| }; |
| |
| |
| static const char * |
| parse_lzma12(const char **const str, const char *str_end, void *filter_options) |
| { |
| lzma_options_lzma *opts = filter_options; |
| |
| // It cannot fail. |
| const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT); |
| assert(!preset_ret); |
| (void)preset_ret; |
| |
| const char *errmsg = parse_options(str, str_end, filter_options, |
| lzma12_optmap, ARRAY_SIZE(lzma12_optmap)); |
| if (errmsg != NULL) |
| return errmsg; |
| |
| if (opts->lc + opts->lp > LZMA_LCLP_MAX) |
| return "The sum of lc and lp must not exceed 4"; |
| |
| return NULL; |
| } |
| |
| |
| ///////////////////////////////////////// |
| // Generic parsing and stringification // |
| ///////////////////////////////////////// |
| |
| static const struct { |
| /// Name of the filter |
| char name[NAME_LEN_MAX + 1]; |
| |
| /// For lzma_str_to_filters: |
| /// Size of the filter-specific options structure. |
| uint32_t opts_size; |
| |
| /// Filter ID |
| lzma_vli id; |
| |
| /// For lzma_str_to_filters: |
| /// Function to parse the filter-specific options. The filter_options |
| /// will already have been allocated using lzma_alloc_zero(). |
| const char *(*parse)(const char **str, const char *str_end, |
| void *filter_options); |
| |
| /// For lzma_str_from_filters: |
| /// If the flag LZMA_STR_ENCODER is used then the first |
| /// strfy_encoder elements of optmap are stringified. |
| /// With LZMA_STR_DECODER strfy_decoder is used. |
| /// Currently encoders use all options that decoders do but if |
| /// that changes then this needs to be changed too, for example, |
| /// add a new OPTMAP flag to skip printing some decoder-only options. |
| const option_map *optmap; |
| uint8_t strfy_encoder; |
| uint8_t strfy_decoder; |
| |
| /// For lzma_str_from_filters: |
| /// If true, lzma_filter.options is allowed to be NULL. In that case, |
| /// only the filter name is printed without any options. |
| bool allow_null; |
| |
| } filter_name_map[] = { |
| #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) |
| { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1, |
| &parse_lzma12, lzma12_optmap, 9, 5, false }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) |
| { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2, |
| &parse_lzma12, lzma12_optmap, 9, 2, false }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86) |
| { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM) |
| { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB) |
| { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64) |
| { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC) |
| { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64) |
| { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC) |
| { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC, |
| &parse_bcj, bcj_optmap, 1, 1, true }, |
| #endif |
| |
| #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) |
| { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA, |
| &parse_delta, delta_optmap, 1, 1, false }, |
| #endif |
| }; |
| |
| |
| /// Decodes options from a string for one filter (name1=value1,name2=value2). |
| /// Caller must have allocated memory for filter_options already and set |
| /// the initial default values. This is called from the filter-specific |
| /// parse_* functions. |
| /// |
| /// The input string starts at *str and the address in str_end is the first |
| /// char that is not part of the string anymore. So no '\0' terminator is |
| /// used. *str is advanced every time something has been decoded successfully. |
| static const char * |
| parse_options(const char **const str, const char *str_end, |
| void *filter_options, |
| const option_map *const optmap, const size_t optmap_size) |
| { |
| while (*str < str_end && **str != '\0') { |
| // Each option is of the form name=value. |
| // Commas (',') separate options. Extra commas are ignored. |
| // Ignoring extra commas makes it simpler if an optional |
| // option stored in a shell variable which can be empty. |
| if (**str == ',') { |
| ++*str; |
| continue; |
| } |
| |
| // Find where the next name=value ends. |
| const size_t str_len = (size_t)(str_end - *str); |
| const char *name_eq_value_end = memchr(*str, ',', str_len); |
| if (name_eq_value_end == NULL) |
| name_eq_value_end = str_end; |
| |
| const char *equals_sign = memchr(*str, '=', |
| (size_t)(name_eq_value_end - *str)); |
| |
| // Fail if the '=' wasn't found or the option name is missing |
| // (the first char is '='). |
| if (equals_sign == NULL || **str == '=') |
| return "Options must be 'name=value' pairs separated " |
| "with commas"; |
| |
| // Reject a too long option name so that the memcmp() |
| // in the loop below won't read past the end of the |
| // string in optmap[i].name. |
| const size_t name_len = (size_t)(equals_sign - *str); |
| if (name_len > NAME_LEN_MAX) |
| return "Unknown option name"; |
| |
| // Find the option name from optmap[]. |
| size_t i = 0; |
| while (true) { |
| if (i == optmap_size) |
| return "Unknown option name"; |
| |
| if (memcmp(*str, optmap[i].name, name_len) == 0 |
| && optmap[i].name[name_len] == '\0') |
| break; |
| |
| ++i; |
| } |
| |
| // The input string is good at least until the start of |
| // the option value. |
| *str = equals_sign + 1; |
| |
| // The code assumes that the option value isn't an empty |
| // string so check it here. |
| const size_t value_len = (size_t)(name_eq_value_end - *str); |
| if (value_len == 0) |
| return "Option value cannot be empty"; |
| |
| // LZMA1/2 preset has its own parsing function. |
| if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) { |
| const char *errmsg = set_lzma12_preset(str, |
| name_eq_value_end, filter_options); |
| if (errmsg != NULL) |
| return errmsg; |
| |
| continue; |
| } |
| |
| // It's an integer value. |
| uint32_t v; |
| if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { |
| // The integer is picked from a string-to-integer map. |
| // |
| // Reject a too long value string so that the memcmp() |
| // in the loop below won't read past the end of the |
| // string in optmap[i].u.map[j].name. |
| if (value_len > NAME_LEN_MAX) |
| return "Invalid option value"; |
| |
| const name_value_map *map = optmap[i].u.map; |
| size_t j = 0; |
| while (true) { |
| // The array is terminated with an empty name. |
| if (map[j].name[0] == '\0') |
| return "Invalid option value"; |
| |
| if (memcmp(*str, map[j].name, value_len) == 0 |
| && map[j].name[value_len] |
| == '\0') { |
| v = map[j].value; |
| break; |
| } |
| |
| ++j; |
| } |
| } else if (**str < '0' || **str > '9') { |
| // Note that "max" isn't supported while it is |
| // supported in xz. It's not useful here. |
| return "Value is not a non-negative decimal integer"; |
| } else { |
| // strtoul() has locale-specific behavior so it cannot |
| // be relied on to get reproducible results since we |
| // cannot change the locate in a thread-safe library. |
| // It also needs '\0'-termination. |
| // |
| // Use a temporary pointer so that *str will point |
| // to the beginning of the value string in case |
| // an error occurs. |
| const char *p = *str; |
| v = 0; |
| do { |
| if (v > UINT32_MAX / 10) |
| return "Value out of range"; |
| |
| v *= 10; |
| |
| const uint32_t add = (uint32_t)(*p - '0'); |
| if (UINT32_MAX - add < v) |
| return "Value out of range"; |
| |
| v += add; |
| ++p; |
| } while (p < name_eq_value_end |
| && *p >= '0' && *p <= '9'); |
| |
| if (p < name_eq_value_end) { |
| // Remember this position so that it can be |
| // used for error messages that are |
| // specifically about the suffix. (Out of |
| // range values are about the whole value |
| // and those error messages point to the |
| // beginning of the number part, |
| // not to the suffix.) |
| const char *multiplier_start = p; |
| |
| // If multiplier suffix shouldn't be used |
| // then don't allow them even if the value |
| // would stay within limits. This is a somewhat |
| // unnecessary check but it rejects silly |
| // things like lzma2:pb=0MiB which xz allows. |
| if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX) |
| == 0) { |
| *str = multiplier_start; |
| return "This option does not support " |
| "any integer suffixes"; |
| } |
| |
| uint32_t shift; |
| |
| switch (*p) { |
| case 'k': |
| case 'K': |
| shift = 10; |
| break; |
| |
| case 'm': |
| case 'M': |
| shift = 20; |
| break; |
| |
| case 'g': |
| case 'G': |
| shift = 30; |
| break; |
| |
| default: |
| *str = multiplier_start; |
| return "Invalid multiplier suffix " |
| "(KiB, MiB, or GiB)"; |
| } |
| |
| ++p; |
| |
| // Allow "M", "Mi", "MB", "MiB" and the same |
| // for the other five characters from the |
| // switch-statement above. All are handled |
| // as base-2 (perhaps a mistake, perhaps not). |
| // Note that 'i' and 'B' are case sensitive. |
| if (p < name_eq_value_end && *p == 'i') |
| ++p; |
| |
| if (p < name_eq_value_end && *p == 'B') |
| ++p; |
| |
| // Now we must have no chars remaining. |
| if (p < name_eq_value_end) { |
| *str = multiplier_start; |
| return "Invalid multiplier suffix " |
| "(KiB, MiB, or GiB)"; |
| } |
| |
| if (v > (UINT32_MAX >> shift)) |
| return "Value out of range"; |
| |
| v <<= shift; |
| } |
| |
| if (v < optmap[i].u.range.min |
| || v > optmap[i].u.range.max) |
| return "Value out of range"; |
| } |
| |
| // Set the value in filter_options. Enums are handled |
| // specially since the underlying type isn't the same |
| // as uint32_t on all systems. |
| void *ptr = (char *)filter_options + optmap[i].offset; |
| switch (optmap[i].type) { |
| case OPTMAP_TYPE_LZMA_MODE: |
| *(lzma_mode *)ptr = (lzma_mode)v; |
| break; |
| |
| case OPTMAP_TYPE_LZMA_MATCH_FINDER: |
| *(lzma_match_finder *)ptr = (lzma_match_finder)v; |
| break; |
| |
| default: |
| *(uint32_t *)ptr = v; |
| break; |
| } |
| |
| // This option has been successfully handled. |
| *str = name_eq_value_end; |
| } |
| |
| // No errors. |
| return NULL; |
| } |
| |
| |
| /// Finds the name of the filter at the beginning of the string and |
| /// calls filter_name_map[i].parse() to decode the filter-specific options. |
| /// The caller must have set str_end so that exactly one filter and its |
| /// options are present without any trailing characters. |
| static const char * |
| parse_filter(const char **const str, const char *str_end, lzma_filter *filter, |
| const lzma_allocator *allocator, bool only_xz) |
| { |
| // Search for a colon or equals sign that would separate the filter |
| // name from filter options. If neither is found, then the input |
| // string only contains a filter name and there are no options. |
| // |
| // First assume that a colon or equals sign won't be found: |
| const char *name_end = str_end; |
| const char *opts_start = str_end; |
| |
| for (const char *p = *str; p < str_end; ++p) { |
| if (*p == ':' || *p == '=') { |
| name_end = p; |
| |
| // Filter options (name1=value1,name2=value2,...) |
| // begin after the colon or equals sign. |
| opts_start = p + 1; |
| break; |
| } |
| } |
| |
| // Reject a too long filter name so that the memcmp() |
| // in the loop below won't read past the end of the |
| // string in filter_name_map[i].name. |
| const size_t name_len = (size_t)(name_end - *str); |
| if (name_len > NAME_LEN_MAX) |
| return "Unknown filter name"; |
| |
| for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { |
| if (memcmp(*str, filter_name_map[i].name, name_len) == 0 |
| && filter_name_map[i].name[name_len] == '\0') { |
| if (only_xz && filter_name_map[i].id |
| >= LZMA_FILTER_RESERVED_START) |
| return "This filter cannot be used in " |
| "the .xz format"; |
| |
| // Allocate the filter-specific options and |
| // initialize the memory with zeros. |
| void *options = lzma_alloc_zero( |
| filter_name_map[i].opts_size, |
| allocator); |
| if (options == NULL) |
| return "Memory allocation failed"; |
| |
| // Filter name was found so the input string is good |
| // at least this far. |
| *str = opts_start; |
| |
| const char *errmsg = filter_name_map[i].parse( |
| str, str_end, options); |
| if (errmsg != NULL) { |
| lzma_free(options, allocator); |
| return errmsg; |
| } |
| |
| // *filter is modified only when parsing is successful. |
| filter->id = filter_name_map[i].id; |
| filter->options = options; |
| return NULL; |
| } |
| } |
| |
| return "Unknown filter name"; |
| } |
| |
| |
| /// Converts the string to a filter chain (array of lzma_filter structures). |
| /// |
| /// *str is advanced every time something has been decoded successfully. |
| /// This way the caller knows where in the string a possible error occurred. |
| static const char * |
| str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags, |
| const lzma_allocator *allocator) |
| { |
| const char *errmsg; |
| |
| // Skip leading spaces. |
| while (**str == ' ') |
| ++*str; |
| |
| if (**str == '\0') |
| return "Empty string is not allowed, " |
| "try \"6\" if a default value is needed"; |
| |
| // Detect the type of the string. |
| // |
| // A string beginning with a digit or a string beginning with |
| // one dash and a digit are treated as presets. Trailing spaces |
| // will be ignored too (leading spaces were already ignored above). |
| // |
| // For example, "6", "7 ", "-9e", or " -3 " are treated as presets. |
| // Strings like "-" or "- " aren't preset. |
| #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9') |
| if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) { |
| if (**str == '-') |
| ++*str; |
| |
| // Ignore trailing spaces. |
| const size_t str_len = strlen(*str); |
| const char *str_end = memchr(*str, ' ', str_len); |
| if (str_end != NULL) { |
| // There is at least one trailing space. Check that |
| // there are no chars other than spaces. |
| for (size_t i = 1; str_end[i] != '\0'; ++i) |
| if (str_end[i] != ' ') |
| return "Unsupported preset"; |
| } else { |
| // There are no trailing spaces. Use the whole string. |
| str_end = *str + str_len; |
| } |
| |
| uint32_t preset; |
| errmsg = parse_lzma12_preset(str, str_end, &preset); |
| if (errmsg != NULL) |
| return errmsg; |
| |
| lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator); |
| if (opts == NULL) |
| return "Memory allocation failed"; |
| |
| if (lzma_lzma_preset(opts, preset)) { |
| lzma_free(opts, allocator); |
| return "Unsupported preset"; |
| } |
| |
| filters[0].id = LZMA_FILTER_LZMA2; |
| filters[0].options = opts; |
| filters[1].id = LZMA_VLI_UNKNOWN; |
| filters[1].options = NULL; |
| |
| return NULL; |
| } |
| |
| // Not a preset so it must be a filter chain. |
| // |
| // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that |
| // can be used in .xz. |
| const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0; |
| |
| // Use a temporary array so that we don't modify the caller-supplied |
| // one until we know that no errors occurred. |
| lzma_filter temp_filters[LZMA_FILTERS_MAX + 1]; |
| |
| size_t i = 0; |
| do { |
| if (i == LZMA_FILTERS_MAX) { |
| errmsg = "The maximum number of filters is four"; |
| goto error; |
| } |
| |
| // Skip "--" if present. |
| if ((*str)[0] == '-' && (*str)[1] == '-') |
| *str += 2; |
| |
| // Locate the end of "filter:name1=value1,name2=value2", |
| // stopping at the first "--" or a single space. |
| const char *filter_end = *str; |
| while (filter_end[0] != '\0') { |
| if ((filter_end[0] == '-' && filter_end[1] == '-') |
| || filter_end[0] == ' ') |
| break; |
| |
| ++filter_end; |
| } |
| |
| // Inputs that have "--" at the end or "-- " in the middle |
| // will result in an empty filter name. |
| if (filter_end == *str) { |
| errmsg = "Filter name is missing"; |
| goto error; |
| } |
| |
| errmsg = parse_filter(str, filter_end, &temp_filters[i], |
| allocator, only_xz); |
| if (errmsg != NULL) |
| goto error; |
| |
| // Skip trailing spaces. |
| while (**str == ' ') |
| ++*str; |
| |
| ++i; |
| } while (**str != '\0'); |
| |
| // Seems to be good, terminate the array so that |
| // basic validation can be done. |
| temp_filters[i].id = LZMA_VLI_UNKNOWN; |
| temp_filters[i].options = NULL; |
| |
| // Do basic validation if the application didn't prohibit it. |
| if ((flags & LZMA_STR_NO_VALIDATION) == 0) { |
| size_t dummy; |
| const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy); |
| assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR); |
| if (ret != LZMA_OK) { |
| errmsg = "Invalid filter chain " |
| "('lzma2' missing at the end?)"; |
| goto error; |
| } |
| } |
| |
| // All good. Copy the filters to the application supplied array. |
| memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter)); |
| return NULL; |
| |
| error: |
| // Free the filter options that were successfully decoded. |
| while (i-- > 0) |
| lzma_free(temp_filters[i].options, allocator); |
| |
| return errmsg; |
| } |
| |
| |
| extern LZMA_API(const char *) |
| lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters, |
| uint32_t flags, const lzma_allocator *allocator) |
| { |
| if (str == NULL || filters == NULL) |
| return "Unexpected NULL pointer argument(s) " |
| "to lzma_str_to_filters()"; |
| |
| // Validate the flags. |
| const uint32_t supported_flags |
| = LZMA_STR_ALL_FILTERS |
| | LZMA_STR_NO_VALIDATION; |
| |
| if (flags & ~supported_flags) |
| return "Unsupported flags to lzma_str_to_filters()"; |
| |
| const char *used = str; |
| const char *errmsg = str_to_filters(&used, filters, flags, allocator); |
| |
| if (error_pos != NULL) { |
| const size_t n = (size_t)(used - str); |
| *error_pos = n > INT_MAX ? INT_MAX : (int)n; |
| } |
| |
| return errmsg; |
| } |
| |
| |
| /// Converts options of one filter to a string. |
| /// |
| /// The caller must have already put the filter name in the destination |
| /// string. Since it is possible that no options will be needed, the caller |
| /// won't have put a delimiter character (':' or '=') in the string yet. |
| /// We will add it if at least one option will be added to the string. |
| static void |
| strfy_filter(lzma_str *dest, const char *delimiter, |
| const option_map *optmap, size_t optmap_count, |
| const void *filter_options) |
| { |
| for (size_t i = 0; i < optmap_count; ++i) { |
| // No attempt is made to reverse LZMA1/2 preset. |
| if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) |
| continue; |
| |
| // All options have integer values, some just are mapped |
| // to a string with a name_value_map. LZMA1/2 preset |
| // isn't reversed back to preset=PRESET form. |
| uint32_t v; |
| const void *ptr |
| = (const char *)filter_options + optmap[i].offset; |
| switch (optmap[i].type) { |
| case OPTMAP_TYPE_LZMA_MODE: |
| v = *(const lzma_mode *)ptr; |
| break; |
| |
| case OPTMAP_TYPE_LZMA_MATCH_FINDER: |
| v = *(const lzma_match_finder *)ptr; |
| break; |
| |
| default: |
| v = *(const uint32_t *)ptr; |
| break; |
| } |
| |
| // Skip this if this option should be omitted from |
| // the string when the value is zero. |
| if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO)) |
| continue; |
| |
| // Before the first option we add whatever delimiter |
| // the caller gave us. For later options a comma is used. |
| str_append_str(dest, delimiter); |
| delimiter = ","; |
| |
| // Add the option name and equals sign. |
| str_append_str(dest, optmap[i].name); |
| str_append_str(dest, "="); |
| |
| if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) { |
| const name_value_map *map = optmap[i].u.map; |
| size_t j = 0; |
| while (true) { |
| if (map[j].name[0] == '\0') { |
| str_append_str(dest, "UNKNOWN"); |
| break; |
| } |
| |
| if (map[j].value == v) { |
| str_append_str(dest, map[j].name); |
| break; |
| } |
| |
| ++j; |
| } |
| } else { |
| str_append_u32(dest, v, |
| optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX); |
| } |
| } |
| |
| return; |
| } |
| |
| |
| extern LZMA_API(lzma_ret) |
| lzma_str_from_filters(char **output_str, const lzma_filter *filters, |
| uint32_t flags, const lzma_allocator *allocator) |
| { |
| // On error *output_str is always set to NULL. |
| // Do it as the very first step. |
| if (output_str == NULL) |
| return LZMA_PROG_ERROR; |
| |
| *output_str = NULL; |
| |
| if (filters == NULL) |
| return LZMA_PROG_ERROR; |
| |
| // Validate the flags. |
| const uint32_t supported_flags |
| = LZMA_STR_ENCODER |
| | LZMA_STR_DECODER |
| | LZMA_STR_GETOPT_LONG |
| | LZMA_STR_NO_SPACES; |
| |
| if (flags & ~supported_flags) |
| return LZMA_OPTIONS_ERROR; |
| |
| // There must be at least one filter. |
| if (filters[0].id == LZMA_VLI_UNKNOWN) |
| return LZMA_OPTIONS_ERROR; |
| |
| // Allocate memory for the output string. |
| lzma_str dest; |
| return_if_error(str_init(&dest, allocator)); |
| |
| const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); |
| |
| const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; |
| |
| for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { |
| // If we reach LZMA_FILTERS_MAX, then the filters array |
| // is too large since the ID cannot be LZMA_VLI_UNKNOWN here. |
| if (i == LZMA_FILTERS_MAX) { |
| str_free(&dest, allocator); |
| return LZMA_OPTIONS_ERROR; |
| } |
| |
| // Don't add a space between filters if the caller |
| // doesn't want them. |
| if (i > 0 && !(flags & LZMA_STR_NO_SPACES)) |
| str_append_str(&dest, " "); |
| |
| // Use dashes for xz getopt_long() compatible syntax but also |
| // use dashes to separate filters when spaces weren't wanted. |
| if ((flags & LZMA_STR_GETOPT_LONG) |
| || (i > 0 && (flags & LZMA_STR_NO_SPACES))) |
| str_append_str(&dest, "--"); |
| |
| size_t j = 0; |
| while (true) { |
| if (j == ARRAY_SIZE(filter_name_map)) { |
| // Filter ID in filters[i].id isn't supported. |
| str_free(&dest, allocator); |
| return LZMA_OPTIONS_ERROR; |
| } |
| |
| if (filter_name_map[j].id == filters[i].id) { |
| // Add the filter name. |
| str_append_str(&dest, filter_name_map[j].name); |
| |
| // If only the filter names were wanted then |
| // skip to the next filter. In this case |
| // .options is ignored and may be NULL even |
| // when the filter doesn't allow NULL options. |
| if (!show_opts) |
| break; |
| |
| if (filters[i].options == NULL) { |
| if (!filter_name_map[j].allow_null) { |
| // Filter-specific options |
| // are missing but with |
| // this filter the options |
| // structure is mandatory. |
| str_free(&dest, allocator); |
| return LZMA_OPTIONS_ERROR; |
| } |
| |
| // .options is allowed to be NULL. |
| // There is no need to add any |
| // options to the string. |
| break; |
| } |
| |
| // Options structure is available. Add |
| // the filter options to the string. |
| const size_t optmap_count |
| = (flags & LZMA_STR_ENCODER) |
| ? filter_name_map[j].strfy_encoder |
| : filter_name_map[j].strfy_decoder; |
| strfy_filter(&dest, opt_delim, |
| filter_name_map[j].optmap, |
| optmap_count, |
| filters[i].options); |
| break; |
| } |
| |
| ++j; |
| } |
| } |
| |
| return str_finish(output_str, &dest, allocator); |
| } |
| |
| |
| extern LZMA_API(lzma_ret) |
| lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags, |
| const lzma_allocator *allocator) |
| { |
| // On error *output_str is always set to NULL. |
| // Do it as the very first step. |
| if (output_str == NULL) |
| return LZMA_PROG_ERROR; |
| |
| *output_str = NULL; |
| |
| // Validate the flags. |
| const uint32_t supported_flags |
| = LZMA_STR_ALL_FILTERS |
| | LZMA_STR_ENCODER |
| | LZMA_STR_DECODER |
| | LZMA_STR_GETOPT_LONG; |
| |
| if (flags & ~supported_flags) |
| return LZMA_OPTIONS_ERROR; |
| |
| // Allocate memory for the output string. |
| lzma_str dest; |
| return_if_error(str_init(&dest, allocator)); |
| |
| // If only listing the filter names then separate them with spaces. |
| // Otherwise use newlines. |
| const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER)); |
| const char *filter_delim = show_opts ? "\n" : " "; |
| |
| const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":"; |
| bool first_filter_printed = false; |
| |
| for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) { |
| // If we are printing only one filter then skip others. |
| if (filter_id != LZMA_VLI_UNKNOWN |
| && filter_id != filter_name_map[i].id) |
| continue; |
| |
| // If we are printing only .xz filters then skip the others. |
| if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START |
| && (flags & LZMA_STR_ALL_FILTERS) == 0 |
| && filter_id == LZMA_VLI_UNKNOWN) |
| continue; |
| |
| // Add a new line if this isn't the first filter being |
| // written to the string. |
| if (first_filter_printed) |
| str_append_str(&dest, filter_delim); |
| |
| first_filter_printed = true; |
| |
| if (flags & LZMA_STR_GETOPT_LONG) |
| str_append_str(&dest, "--"); |
| |
| str_append_str(&dest, filter_name_map[i].name); |
| |
| // If only the filter names were wanted then continue |
| // to the next filter. |
| if (!show_opts) |
| continue; |
| |
| const option_map *optmap = filter_name_map[i].optmap; |
| const char *d = opt_delim; |
| |
| const size_t end = (flags & LZMA_STR_ENCODER) |
| ? filter_name_map[i].strfy_encoder |
| : filter_name_map[i].strfy_decoder; |
| |
| for (size_t j = 0; j < end; ++j) { |
| // The first option is delimited from the filter |
| // name using "=" or ":" and the rest of the options |
| // are separated with ",". |
| str_append_str(&dest, d); |
| d = ","; |
| |
| // optname=<possible_values> |
| str_append_str(&dest, optmap[j].name); |
| str_append_str(&dest, "=<"); |
| |
| if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) { |
| // LZMA1/2 preset has its custom help string. |
| str_append_str(&dest, LZMA12_PRESET_STR); |
| } else if (optmap[j].flags |
| & OPTMAP_USE_NAME_VALUE_MAP) { |
| // Separate the possible option values by "|". |
| const name_value_map *m = optmap[j].u.map; |
| for (size_t k = 0; m[k].name[0] != '\0'; ++k) { |
| if (k > 0) |
| str_append_str(&dest, "|"); |
| |
| str_append_str(&dest, m[k].name); |
| } |
| } else { |
| // Integer range is shown as min-max. |
| const bool use_byte_suffix = optmap[j].flags |
| & OPTMAP_USE_BYTE_SUFFIX; |
| str_append_u32(&dest, optmap[j].u.range.min, |
| use_byte_suffix); |
| str_append_str(&dest, "-"); |
| str_append_u32(&dest, optmap[j].u.range.max, |
| use_byte_suffix); |
| } |
| |
| str_append_str(&dest, ">"); |
| } |
| } |
| |
| // If no filters were added to the string then it must be because |
| // the caller provided an unsupported Filter ID. |
| if (!first_filter_printed) { |
| str_free(&dest, allocator); |
| return LZMA_OPTIONS_ERROR; |
| } |
| |
| return str_finish(output_str, &dest, allocator); |
| } |