src/liblzma/common/string_conversion.c - jrn/xz - Git at Google

 ///////////////////////////////////////////////////////////////////////////////
 //
 /// \file       string_conversion.c
 /// \brief      Conversion of strings to filter chain and vice versa
 //
 //  Author:     Lasse Collin
 //
 //  This file has been put into the public domain.
 //  You can do whatever you want with this file.
 //
 ///////////////////////////////////////////////////////////////////////////////

 #include "filter_common.h"


 /////////////////////
 // String building //
 /////////////////////

 /// How much memory to allocate for strings. For now, no realloc is used
 /// so this needs to be big enough even though there of course is
 /// an overflow check still.
 ///
 /// FIXME? Using a fixed size is wasteful if the application doesn't free
 /// the string fairly quickly but this can be improved later if needed.
 #define STR_ALLOC_SIZE 800


 typedef struct {
 	char *buf;
 	size_t pos;
 } lzma_str;


 static lzma_ret
 str_init(lzma_str *str, const lzma_allocator *allocator)
 {
 	str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
 	if (str->buf == NULL)
 		return LZMA_MEM_ERROR;

 	str->pos = 0;
 	return LZMA_OK;
 }


 static void
 str_free(lzma_str *str, const lzma_allocator *allocator)
 {
 	lzma_free(str->buf, allocator);
 	return;
 }


 static bool
 str_is_full(const lzma_str *str)
 {
 	return str->pos == STR_ALLOC_SIZE - 1;
 }


 static lzma_ret
 str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
 {
 	if (str_is_full(str)) {
 		// The preallocated buffer was too small.
 		// This shouldn't happen as STR_ALLOC_SIZE should
 		// be adjusted if new filters are added.
 		lzma_free(str->buf, allocator);
 		*dest = NULL;
 		assert(0);
 		return LZMA_PROG_ERROR;
 	}

 	str->buf[str->pos] = '\0';
 	*dest = str->buf;
 	return LZMA_OK;
 }


 static void
 str_append_str(lzma_str *str, const char *s)
 {
 	const size_t len = strlen(s);
 	const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
 	const size_t copy_size = my_min(len, limit);

 	memcpy(str->buf + str->pos, s, copy_size);
 	str->pos += copy_size;
 	return;
 }


 static void
 str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
 {
 	if (v == 0) {
 		str_append_str(str, "0");
 	} else {
 		// NOTE: Don't use plain "B" because xz and the parser in this
 		// file don't support it and at glance it may look like 8
 		// (there cannot be a space before the suffix).
 		static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };

 		size_t suf = 0;
 		if (use_byte_suffix) {
 			while ((v & 1023) == 0
 					&& suf < ARRAY_SIZE(suffixes) - 1) {
 				v >>= 10;
 				++suf;
 			}
 		}

 		// UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
 		// that initializing to "" initializes all elements to
 		// zero so '\0'-termination gets handled by this.
 		char buf[16] = "";
 		size_t pos = sizeof(buf) - 1;

 		do {
 			buf[--pos] = '0' + (v % 10);
 			v /= 10;
 		} while (v != 0);

 		str_append_str(str, buf + pos);
 		str_append_str(str, suffixes[suf]);
 	}

 	return;
 }


 //////////////////////////////////////////////
 // Parsing and stringification declarations //
 //////////////////////////////////////////////

 /// Maximum length for filter and option names.
 /// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
 #define NAME_LEN_MAX 11


 /// For option_map.flags: Use .u.map to do convert the input value
 /// to an integer. Without this flag, .u.range.{min,max} are used
 /// as the allowed range for the integer.
 #define OPTMAP_USE_NAME_VALUE_MAP 0x01

 /// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
 /// the stringified output if the value is an exact multiple of these.
 /// This is used e.g. for LZMA1/2 dictionary size.
 #define OPTMAP_USE_BYTE_SUFFIX 0x02

 /// For option_map.flags: If the integer value is zero then this option
 /// won't be included in the stringified output. It's used e.g. for
 /// BCJ filter start offset which usually is zero.
 #define OPTMAP_NO_STRFY_ZERO 0x04

 /// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
 /// it doesn't need to be specified in the initializers as it is
 /// the implicit value.
 enum {
 	OPTMAP_TYPE_UINT32,
 	OPTMAP_TYPE_LZMA_MODE,
 	OPTMAP_TYPE_LZMA_MATCH_FINDER,
 	OPTMAP_TYPE_LZMA_PRESET,
 };


 /// This is for mapping string values in options to integers.
 /// The last element of an array must have "" as the name.
 /// It's used e.g. for match finder names in LZMA1/2.
 typedef struct {
 	const char name[NAME_LEN_MAX + 1];
 	const uint32_t value;
 } name_value_map;


 /// Each filter that has options needs an array of option_map structures.
 /// The array doesn't need to be terminated as the functions take the
 /// length of the array as an argument.
 ///
 /// When converting a string to filter options structure, option values
 /// will be handled in a few different ways:
 ///
 /// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
 ///     is handled specially.
 ///
 /// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
 ///     converted to an integer using the name_value_map pointed by .u.map.
 ///     The last element in .u.map must have .name = "" as the terminator.
 ///
 /// (3) Otherwise the string is treated as a non-negative unsigned decimal
 ///     integer which must be in the range set in .u.range. If .flags has
 ///     OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
 ///
 /// The integer value from (2) or (3) is then stored to filter_options
 /// at the offset specified in .offset using the type specified in .type
 /// (default is uint32_t).
 ///
 /// Stringifying a filter is done by processing a given number of options
 /// in order from the beginning of an option_map array. The integer is
 /// read from filter_options at .offset using the type from .type.
 ///
 /// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
 /// option is skipped.
 ///
 /// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
 /// to convert the option to a string. If the map doesn't contain a string
 /// for the integer value then "UNKNOWN" is used.
 ///
 /// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
 /// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
 /// MiB, or GiB suffix is used if the value is an exact multiple of these.
 /// Plain "B" suffix is never used.
 typedef struct {
 	char name[NAME_LEN_MAX + 1];
 	uint8_t type;
 	uint8_t flags;
 	uint16_t offset;

 	union {
 		struct {
 			uint32_t min;
 			uint32_t max;
 		} range;

 		const name_value_map *map;
 	} u;
 } option_map;


 static const char *parse_options(const char **const str, const char *str_end,
 		void *filter_options,
 		const option_map *const optmap, const size_t optmap_size);


 /////////
 // BCJ //
 /////////

 #if defined(HAVE_ENCODER_X86) \
 		|| defined(HAVE_DECODER_X86) \
 		|| defined(HAVE_ENCODER_ARM) \
 		|| defined(HAVE_DECODER_ARM) \
 		|| defined(HAVE_ENCODER_ARMTHUMB) \
 		|| defined(HAVE_DECODER_ARMTHUMB) \
 		|| defined(HAVE_ENCODER_ARM64) \
 		|| defined(HAVE_DECODER_ARM64) \
 		|| defined(HAVE_ENCODER_POWERPC) \
 		|| defined(HAVE_DECODER_POWERPC) \
 		|| defined(HAVE_ENCODER_IA64) \
 		|| defined(HAVE_DECODER_IA64) \
 		|| defined(HAVE_ENCODER_SPARC) \
 		|| defined(HAVE_DECODER_SPARC)
 static const option_map bcj_optmap[] = {
 	{
 		.name = "start",
 		.flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
 		.offset = offsetof(lzma_options_bcj, start_offset),
 		.u.range.min = 0,
 		.u.range.max = UINT32_MAX,
 	}
 };


 static const char *
 parse_bcj(const char **const str, const char *str_end, void *filter_options)
 {
 	// filter_options was zeroed on allocation and that is enough
 	// for the default value.
 	return parse_options(str, str_end, filter_options,
 			bcj_optmap, ARRAY_SIZE(bcj_optmap));
 }
 #endif


 ///////////
 // Delta //
 ///////////

 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
 static const option_map delta_optmap[] = {
 	{
 		.name = "dist",
 		.offset = offsetof(lzma_options_delta, dist),
 		.u.range.min = LZMA_DELTA_DIST_MIN,
 		.u.range.max = LZMA_DELTA_DIST_MAX,
 	}
 };


 static const char *
 parse_delta(const char **const str, const char *str_end, void *filter_options)
 {
 	lzma_options_delta *opts = filter_options;
 	opts->type = LZMA_DELTA_TYPE_BYTE;
 	opts->dist = LZMA_DELTA_DIST_MIN;

 	return parse_options(str, str_end, filter_options,
 			delta_optmap, ARRAY_SIZE(delta_optmap));
 }
 #endif


 ///////////////////
 // LZMA1 & LZMA2 //
 ///////////////////

 /// Help string for presets
 #define LZMA12_PRESET_STR "0-9[e]"


 static const char *
 parse_lzma12_preset(const char **const str, const char *str_end,
 		uint32_t *preset)
 {
 	assert(*str < str_end);
 	*preset = (uint32_t)(**str - '0');

 	// NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
 	while (++*str < str_end) {
 		switch (**str) {
 		case 'e':
 			*preset |= LZMA_PRESET_EXTREME;
 			break;

 		default:
 			return "Unsupported preset flag";
 		}
 	}

 	return NULL;
 }


 static const char *
 set_lzma12_preset(const char **const str, const char *str_end,
 		void *filter_options)
 {
 	uint32_t preset;
 	const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
 	if (errmsg != NULL)
 		return errmsg;

 	lzma_options_lzma *opts = filter_options;
 	if (lzma_lzma_preset(opts, preset))
 		return "Unsupported preset";

 	return NULL;
 }


 static const name_value_map lzma12_mode_map[] = {
 	{ "fast",   LZMA_MODE_FAST },
 	{ "normal", LZMA_MODE_NORMAL },
 	{ "",       0 }
 };


 static const name_value_map lzma12_mf_map[] = {
 	{ "hc3", LZMA_MF_HC3 },
 	{ "hc4", LZMA_MF_HC4 },
 	{ "bt2", LZMA_MF_BT2 },
 	{ "bt3", LZMA_MF_BT3 },
 	{ "bt4", LZMA_MF_BT4 },
 	{ "",    0 }
 };


 static const option_map lzma12_optmap[] = {
 	{
 		.name = "preset",
 		.type = OPTMAP_TYPE_LZMA_PRESET,
 	}, {
 		.name = "dict",
 		.flags = OPTMAP_USE_BYTE_SUFFIX,
 		.offset = offsetof(lzma_options_lzma, dict_size),
 		.u.range.min = LZMA_DICT_SIZE_MIN,
 		// FIXME? The max is really max for encoding but decoding
 		// would allow 4 GiB - 1 B.
 		.u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
 	}, {
 		.name = "lc",
 		.offset = offsetof(lzma_options_lzma, lc),
 		.u.range.min = LZMA_LCLP_MIN,
 		.u.range.max = LZMA_LCLP_MAX,
 	}, {
 		.name = "lp",
 		.offset = offsetof(lzma_options_lzma, lp),
 		.u.range.min = LZMA_LCLP_MIN,
 		.u.range.max = LZMA_LCLP_MAX,
 	}, {
 		.name = "pb",
 		.offset = offsetof(lzma_options_lzma, pb),
 		.u.range.min = LZMA_PB_MIN,
 		.u.range.max = LZMA_PB_MAX,
 	}, {
 		.name = "mode",
 		.type = OPTMAP_TYPE_LZMA_MODE,
 		.flags = OPTMAP_USE_NAME_VALUE_MAP,
 		.offset = offsetof(lzma_options_lzma, mode),
 		.u.map = lzma12_mode_map,
 	}, {
 		.name = "nice",
 		.offset = offsetof(lzma_options_lzma, nice_len),
 		.u.range.min = 2,
 		.u.range.max = 273,
 	}, {
 		.name = "mf",
 		.type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
 		.flags = OPTMAP_USE_NAME_VALUE_MAP,
 		.offset = offsetof(lzma_options_lzma, mf),
 		.u.map = lzma12_mf_map,
 	}, {
 		.name = "depth",
 		.offset = offsetof(lzma_options_lzma, depth),
 		.u.range.min = 0,
 		.u.range.max = UINT32_MAX,
 	}
 };


 static const char *
 parse_lzma12(const char **const str, const char *str_end, void *filter_options)
 {
 	lzma_options_lzma *opts = filter_options;

 	// It cannot fail.
 	const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
 	assert(!preset_ret);
 	(void)preset_ret;

 	const char *errmsg = parse_options(str, str_end, filter_options,
 			lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
 	if (errmsg != NULL)
 		return errmsg;

 	if (opts->lc + opts->lp > LZMA_LCLP_MAX)
 		return "The sum of lc and lp must not exceed 4";

 	return NULL;
 }


 /////////////////////////////////////////
 // Generic parsing and stringification //
 /////////////////////////////////////////

 static const struct {
 	/// Name of the filter
 	char name[NAME_LEN_MAX + 1];

 	/// For lzma_str_to_filters:
 	/// Size of the filter-specific options structure.
 	uint32_t opts_size;

 	/// Filter ID
 	lzma_vli id;

 	/// For lzma_str_to_filters:
 	/// Function to parse the filter-specific options. The filter_options
 	/// will already have been allocated using lzma_alloc_zero().
 	const char *(*parse)(const char **str, const char *str_end,
 			void *filter_options);

 	/// For lzma_str_from_filters:
 	/// If the flag LZMA_STR_ENCODER is used then the first
 	/// strfy_encoder elements of optmap are stringified.
 	/// With LZMA_STR_DECODER strfy_decoder is used.
 	/// Currently encoders use all options that decoders do but if
 	/// that changes then this needs to be changed too, for example,
 	/// add a new OPTMAP flag to skip printing some decoder-only options.
 	const option_map *optmap;
 	uint8_t strfy_encoder;
 	uint8_t strfy_decoder;

 	/// For lzma_str_from_filters:
 	/// If true, lzma_filter.options is allowed to be NULL. In that case,
 	/// only the filter name is printed without any options.
 	bool allow_null;

 } filter_name_map[] = {
 #if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
 	{ "lzma1",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA1,
 	  &parse_lzma12,  lzma12_optmap, 9, 5, false },
 #endif

 #if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
 	{ "lzma2",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA2,
 	  &parse_lzma12,  lzma12_optmap, 9, 2, false },
 #endif

 #if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
 	{ "x86",          sizeof(lzma_options_bcj),   LZMA_FILTER_X86,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
 	{ "arm",          sizeof(lzma_options_bcj),   LZMA_FILTER_ARM,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
 	{ "armthumb",     sizeof(lzma_options_bcj),   LZMA_FILTER_ARMTHUMB,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
 	{ "arm64",        sizeof(lzma_options_bcj),   LZMA_FILTER_ARM64,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
 	{ "powerpc",      sizeof(lzma_options_bcj),   LZMA_FILTER_POWERPC,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
 	{ "ia64",         sizeof(lzma_options_bcj),   LZMA_FILTER_IA64,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
 	{ "sparc",        sizeof(lzma_options_bcj),   LZMA_FILTER_SPARC,
 	  &parse_bcj,     bcj_optmap, 1, 1, true },
 #endif

 #if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
 	{ "delta",        sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
 	  &parse_delta,   delta_optmap, 1, 1, false },
 #endif
 };


 /// Decodes options from a string for one filter (name1=value1,name2=value2).
 /// Caller must have allocated memory for filter_options already and set
 /// the initial default values. This is called from the filter-specific
 /// parse_* functions.
 ///
 /// The input string starts at *str and the address in str_end is the first
 /// char that is not part of the string anymore. So no '\0' terminator is
 /// used. *str is advanced every time something has been decoded successfully.
 static const char *
 parse_options(const char **const str, const char *str_end,
 		void *filter_options,
 		const option_map *const optmap, const size_t optmap_size)
 {
 	while (*str < str_end && **str != '\0') {
 		// Each option is of the form name=value.
 		// Commas (',') separate options. Extra commas are ignored.
 		// Ignoring extra commas makes it simpler if an optional
 		// option stored in a shell variable which can be empty.
 		if (**str == ',') {
 			++*str;
 			continue;
 		}

 		// Find where the next name=value ends.
 		const size_t str_len = (size_t)(str_end - *str);
 		const char *name_eq_value_end = memchr(*str, ',', str_len);
 		if (name_eq_value_end == NULL)
 			name_eq_value_end = str_end;

 		const char *equals_sign = memchr(*str, '=',
 				(size_t)(name_eq_value_end - *str));

 		// Fail if the '=' wasn't found or the option name is missing
 		// (the first char is '=').
 		if (equals_sign == NULL || **str == '=')
 			return "Options must be 'name=value' pairs separated "
 					"with commas";

 		// Reject a too long option name so that the memcmp()
 		// in the loop below won't read past the end of the
 		// string in optmap[i].name.
 		const size_t name_len = (size_t)(equals_sign - *str);
 		if (name_len > NAME_LEN_MAX)
 			return "Unknown option name";

 		// Find the option name from optmap[].
 		size_t i = 0;
 		while (true) {
 			if (i == optmap_size)
 				return "Unknown option name";

 			if (memcmp(*str, optmap[i].name, name_len) == 0
 					&& optmap[i].name[name_len] == '\0')
 				break;

 			++i;
 		}

 		// The input string is good at least until the start of
 		// the option value.
 		*str = equals_sign + 1;

 		// The code assumes that the option value isn't an empty
 		// string so check it here.
 		const size_t value_len = (size_t)(name_eq_value_end - *str);
 		if (value_len == 0)
 			return "Option value cannot be empty";

 		// LZMA1/2 preset has its own parsing function.
 		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
 			const char *errmsg = set_lzma12_preset(str,
 					name_eq_value_end, filter_options);
 			if (errmsg != NULL)
 				return errmsg;

 			continue;
 		}

 		// It's an integer value.
 		uint32_t v;
 		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
 			// The integer is picked from a string-to-integer map.
 			//
 			// Reject a too long value string so that the memcmp()
 			// in the loop below won't read past the end of the
 			// string in optmap[i].u.map[j].name.
 			if (value_len > NAME_LEN_MAX)
 				return "Invalid option value";

 			const name_value_map *map = optmap[i].u.map;
 			size_t j = 0;
 			while (true) {
 				// The array is terminated with an empty name.
 				if (map[j].name[0] == '\0')
 					return "Invalid option value";

 				if (memcmp(*str, map[j].name, value_len) == 0
 						&& map[j].name[value_len]
 							== '\0') {
 					v = map[j].value;
 					break;
 				}

 				++j;
 			}
 		} else if (**str < '0' || **str > '9') {
 			// Note that "max" isn't supported while it is
 			// supported in xz. It's not useful here.
 			return "Value is not a non-negative decimal integer";
 		} else {
 			// strtoul() has locale-specific behavior so it cannot
 			// be relied on to get reproducible results since we
 			// cannot change the locate in a thread-safe library.
 			// It also needs '\0'-termination.
 			//
 			// Use a temporary pointer so that *str will point
 			// to the beginning of the value string in case
 			// an error occurs.
 			const char *p = *str;
 			v = 0;
 			do {
 				if (v > UINT32_MAX / 10)
 					return "Value out of range";

 				v *= 10;

 				const uint32_t add = (uint32_t)(*p - '0');
 				if (UINT32_MAX - add < v)
 					return "Value out of range";

 				v += add;
 				++p;
 			} while (p < name_eq_value_end
 					&& *p >= '0' && *p <= '9');

 			if (p < name_eq_value_end) {
 				// Remember this position so that it can be
 				// used for error messages that are
 				// specifically about the suffix. (Out of
 				// range values are about the whole value
 				// and those error messages point to the
 				// beginning of the number part,
 				// not to the suffix.)
 				const char *multiplier_start = p;

 				// If multiplier suffix shouldn't be used
 				// then don't allow them even if the value
 				// would stay within limits. This is a somewhat
 				// unnecessary check but it rejects silly
 				// things like lzma2:pb=0MiB which xz allows.
 				if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
 						== 0) {
 					*str = multiplier_start;
 					return "This option does not support "
 						"any integer suffixes";
 				}

 				uint32_t shift;

 				switch (*p) {
 				case 'k':
 				case 'K':
 					shift = 10;
 					break;

 				case 'm':
 				case 'M':
 					shift = 20;
 					break;

 				case 'g':
 				case 'G':
 					shift = 30;
 					break;

 				default:
 					*str = multiplier_start;
 					return "Invalid multiplier suffix "
 							"(KiB, MiB, or GiB)";
 				}

 				++p;

 				// Allow "M", "Mi", "MB", "MiB" and the same
 				// for the other five characters from the
 				// switch-statement above. All are handled
 				// as base-2 (perhaps a mistake, perhaps not).
 				// Note that 'i' and 'B' are case sensitive.
 				if (p < name_eq_value_end && *p == 'i')
 					++p;

 				if (p < name_eq_value_end && *p == 'B')
 					++p;

 				// Now we must have no chars remaining.
 				if (p < name_eq_value_end) {
 					*str = multiplier_start;
 					return "Invalid multiplier suffix "
 							"(KiB, MiB, or GiB)";
 				}

 				if (v > (UINT32_MAX >> shift))
 					return "Value out of range";

 				v <<= shift;
 			}

 			if (v < optmap[i].u.range.min
 					|| v > optmap[i].u.range.max)
 				return "Value out of range";
 		}

 		// Set the value in filter_options. Enums are handled
 		// specially since the underlying type isn't the same
 		// as uint32_t on all systems.
 		void *ptr = (char *)filter_options + optmap[i].offset;
 		switch (optmap[i].type) {
 		case OPTMAP_TYPE_LZMA_MODE:
 			*(lzma_mode *)ptr = (lzma_mode)v;
 			break;

 		case OPTMAP_TYPE_LZMA_MATCH_FINDER:
 			*(lzma_match_finder *)ptr = (lzma_match_finder)v;
 			break;

 		default:
 			*(uint32_t *)ptr = v;
 			break;
 		}

 		// This option has been successfully handled.
 		*str = name_eq_value_end;
 	}

 	// No errors.
 	return NULL;
 }


 /// Finds the name of the filter at the beginning of the string and
 /// calls filter_name_map[i].parse() to decode the filter-specific options.
 /// The caller must have set str_end so that exactly one filter and its
 /// options are present without any trailing characters.
 static const char *
 parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
 		const lzma_allocator *allocator, bool only_xz)
 {
 	// Search for a colon or equals sign that would separate the filter
 	// name from filter options. If neither is found, then the input
 	// string only contains a filter name and there are no options.
 	//
 	// First assume that a colon or equals sign won't be found:
 	const char *name_end = str_end;
 	const char *opts_start = str_end;

 	for (const char *p = *str; p < str_end; ++p) {
 		if (*p == ':' || *p == '=') {
 			name_end = p;

 			// Filter options (name1=value1,name2=value2,...)
 			// begin after the colon or equals sign.
 			opts_start = p + 1;
 			break;
 		}
 	}

 	// Reject a too long filter name so that the memcmp()
 	// in the loop below won't read past the end of the
 	// string in filter_name_map[i].name.
 	const size_t name_len = (size_t)(name_end - *str);
 	if (name_len > NAME_LEN_MAX)
 		return "Unknown filter name";

 	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
 		if (memcmp(*str, filter_name_map[i].name, name_len) == 0
 				&& filter_name_map[i].name[name_len] == '\0') {
 			if (only_xz && filter_name_map[i].id
 					>= LZMA_FILTER_RESERVED_START)
 				return "This filter cannot be used in "
 						"the .xz format";

 			// Allocate the filter-specific options and
 			// initialize the memory with zeros.
 			void *options = lzma_alloc_zero(
 					filter_name_map[i].opts_size,
 					allocator);
 			if (options == NULL)
 				return "Memory allocation failed";

 			// Filter name was found so the input string is good
 			// at least this far.
 			*str = opts_start;

 			const char *errmsg = filter_name_map[i].parse(
 					str, str_end, options);
 			if (errmsg != NULL) {
 				lzma_free(options, allocator);
 				return errmsg;
 			}

 			// *filter is modified only when parsing is successful.
 			filter->id = filter_name_map[i].id;
 			filter->options = options;
 			return NULL;
 		}
 	}

 	return "Unknown filter name";
 }


 /// Converts the string to a filter chain (array of lzma_filter structures).
 ///
 /// *str is advanced every time something has been decoded successfully.
 /// This way the caller knows where in the string a possible error occurred.
 static const char *
 str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
 		const lzma_allocator *allocator)
 {
 	const char *errmsg;

 	// Skip leading spaces.
 	while (**str == ' ')
 		++*str;

 	if (**str == '\0')
 		return "Empty string is not allowed, "
 				"try \"6\" if a default value is needed";

 	// Detect the type of the string.
 	//
 	// A string beginning with a digit or a string beginning with
 	// one dash and a digit are treated as presets. Trailing spaces
 	// will be ignored too (leading spaces were already ignored above).
 	//
 	// For example, "6", "7  ", "-9e", or "  -3  " are treated as presets.
 	// Strings like "-" or "- " aren't preset.
 #define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
 	if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
 		if (**str == '-')
 			++*str;

 		// Ignore trailing spaces.
 		const size_t str_len = strlen(*str);
 		const char *str_end = memchr(*str, ' ', str_len);
 		if (str_end != NULL) {
 			// There is at least one trailing space. Check that
 			// there are no chars other than spaces.
 			for (size_t i = 1; str_end[i] != '\0'; ++i)
 				if (str_end[i] != ' ')
 					return "Unsupported preset";
 		} else {
 			// There are no trailing spaces. Use the whole string.
 			str_end = *str + str_len;
 		}

 		uint32_t preset;
 		errmsg = parse_lzma12_preset(str, str_end, &preset);
 		if (errmsg != NULL)
 			return errmsg;

 		lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
 		if (opts == NULL)
 			return "Memory allocation failed";

 		if (lzma_lzma_preset(opts, preset)) {
 			lzma_free(opts, allocator);
 			return "Unsupported preset";
 		}

 		filters[0].id = LZMA_FILTER_LZMA2;
 		filters[0].options = opts;
 		filters[1].id = LZMA_VLI_UNKNOWN;
 		filters[1].options = NULL;

 		return NULL;
 	}

 	// Not a preset so it must be a filter chain.
 	//
 	// If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
 	// can be used in .xz.
 	const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;

 	// Use a temporary array so that we don't modify the caller-supplied
 	// one until we know that no errors occurred.
 	lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];

 	size_t i = 0;
 	do {
 		if (i == LZMA_FILTERS_MAX) {
 			errmsg = "The maximum number of filters is four";
 			goto error;
 		}

 		// Skip "--" if present.
 		if ((*str)[0] == '-' && (*str)[1] == '-')
 			*str += 2;

 		// Locate the end of "filter:name1=value1,name2=value2",
 		// stopping at the first "--" or a single space.
 		const char *filter_end = *str;
 		while (filter_end[0] != '\0') {
 			if ((filter_end[0] == '-' && filter_end[1] == '-')
 					|| filter_end[0] == ' ')
 				break;

 			++filter_end;
 		}

 		// Inputs that have "--" at the end or "-- " in the middle
 		// will result in an empty filter name.
 		if (filter_end == *str) {
 			errmsg = "Filter name is missing";
 			goto error;
 		}

 		errmsg = parse_filter(str, filter_end, &temp_filters[i],
 				allocator, only_xz);
 		if (errmsg != NULL)
 			goto error;

 		// Skip trailing spaces.
 		while (**str == ' ')
 			++*str;

 		++i;
 	} while (**str != '\0');

 	// Seems to be good, terminate the array so that
 	// basic validation can be done.
 	temp_filters[i].id = LZMA_VLI_UNKNOWN;
 	temp_filters[i].options = NULL;

 	// Do basic validation if the application didn't prohibit it.
 	if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
 		size_t dummy;
 		const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
 		assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
 		if (ret != LZMA_OK) {
 			errmsg = "Invalid filter chain "
 					"('lzma2' missing at the end?)";
 			goto error;
 		}
 	}

 	// All good. Copy the filters to the application supplied array.
 	memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
 	return NULL;

 error:
 	// Free the filter options that were successfully decoded.
 	while (i-- > 0)
 		lzma_free(temp_filters[i].options, allocator);

 	return errmsg;
 }


 extern LZMA_API(const char *)
 lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
 		uint32_t flags, const lzma_allocator *allocator)
 {
 	if (str == NULL || filters == NULL)
 		return "Unexpected NULL pointer argument(s) "
 				"to lzma_str_to_filters()";

 	// Validate the flags.
 	const uint32_t supported_flags
 			= LZMA_STR_ALL_FILTERS
 			| LZMA_STR_NO_VALIDATION;

 	if (flags & ~supported_flags)
 		return "Unsupported flags to lzma_str_to_filters()";

 	const char *used = str;
 	const char *errmsg = str_to_filters(&used, filters, flags, allocator);

 	if (error_pos != NULL) {
 		const size_t n = (size_t)(used - str);
 		*error_pos = n > INT_MAX ? INT_MAX : (int)n;
 	}

 	return errmsg;
 }


 /// Converts options of one filter to a string.
 ///
 /// The caller must have already put the filter name in the destination
 /// string. Since it is possible that no options will be needed, the caller
 /// won't have put a delimiter character (':' or '=') in the string yet.
 /// We will add it if at least one option will be added to the string.
 static void
 strfy_filter(lzma_str *dest, const char *delimiter,
 		const option_map *optmap, size_t optmap_count,
 		const void *filter_options)
 {
 	for (size_t i = 0; i < optmap_count; ++i) {
 		// No attempt is made to reverse LZMA1/2 preset.
 		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
 			continue;

 		// All options have integer values, some just are mapped
 		// to a string with a name_value_map. LZMA1/2 preset
 		// isn't reversed back to preset=PRESET form.
 		uint32_t v;
 		const void *ptr
 			= (const char *)filter_options + optmap[i].offset;
 		switch (optmap[i].type) {
 			case OPTMAP_TYPE_LZMA_MODE:
 				v = *(const lzma_mode *)ptr;
 				break;

 			case OPTMAP_TYPE_LZMA_MATCH_FINDER:
 				v = *(const lzma_match_finder *)ptr;
 				break;

 			default:
 				v = *(const uint32_t *)ptr;
 				break;
 		}

 		// Skip this if this option should be omitted from
 		// the string when the value is zero.
 		if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
 			continue;

 		// Before the first option we add whatever delimiter
 		// the caller gave us. For later options a comma is used.
 		str_append_str(dest, delimiter);
 		delimiter = ",";

 		// Add the option name and equals sign.
 		str_append_str(dest, optmap[i].name);
 		str_append_str(dest, "=");

 		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
 			const name_value_map *map = optmap[i].u.map;
 			size_t j = 0;
 			while (true) {
 				if (map[j].name[0] == '\0') {
 					str_append_str(dest, "UNKNOWN");
 					break;
 				}

 				if (map[j].value == v) {
 					str_append_str(dest, map[j].name);
 					break;
 				}

 				++j;
 			}
 		} else {
 			str_append_u32(dest, v,
 				optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
 		}
 	}

 	return;
 }


 extern LZMA_API(lzma_ret)
 lzma_str_from_filters(char **output_str, const lzma_filter *filters,
 		uint32_t flags, const lzma_allocator *allocator)
 {
 	// On error *output_str is always set to NULL.
 	// Do it as the very first step.
 	if (output_str == NULL)
 		return LZMA_PROG_ERROR;

 	*output_str = NULL;

 	if (filters == NULL)
 		return LZMA_PROG_ERROR;

 	// Validate the flags.
 	const uint32_t supported_flags
 			= LZMA_STR_ENCODER
 			| LZMA_STR_DECODER
 			| LZMA_STR_GETOPT_LONG
 			| LZMA_STR_NO_SPACES;

 	if (flags & ~supported_flags)
 		return LZMA_OPTIONS_ERROR;

 	// There must be at least one filter.
 	if (filters[0].id == LZMA_VLI_UNKNOWN)
 		return LZMA_OPTIONS_ERROR;

 	// Allocate memory for the output string.
 	lzma_str dest;
 	return_if_error(str_init(&dest, allocator));

 	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));

 	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";

 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
 		// If we reach LZMA_FILTERS_MAX, then the filters array
 		// is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
 		if (i == LZMA_FILTERS_MAX) {
 			str_free(&dest, allocator);
 			return LZMA_OPTIONS_ERROR;
 		}

 		// Don't add a space between filters if the caller
 		// doesn't want them.
 		if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
 			str_append_str(&dest, " ");

 		// Use dashes for xz getopt_long() compatible syntax but also
 		// use dashes to separate filters when spaces weren't wanted.
 		if ((flags & LZMA_STR_GETOPT_LONG)
 				|| (i > 0 && (flags & LZMA_STR_NO_SPACES)))
 			str_append_str(&dest, "--");

 		size_t j = 0;
 		while (true) {
 			if (j == ARRAY_SIZE(filter_name_map)) {
 				// Filter ID in filters[i].id isn't supported.
 				str_free(&dest, allocator);
 				return LZMA_OPTIONS_ERROR;
 			}

 			if (filter_name_map[j].id == filters[i].id) {
 				// Add the filter name.
 				str_append_str(&dest, filter_name_map[j].name);

 				// If only the filter names were wanted then
 				// skip to the next filter. In this case
 				// .options is ignored and may be NULL even
 				// when the filter doesn't allow NULL options.
 				if (!show_opts)
 					break;

 				if (filters[i].options == NULL) {
 					if (!filter_name_map[j].allow_null) {
 						// Filter-specific options
 						// are missing but with
 						// this filter the options
 						// structure is mandatory.
 						str_free(&dest, allocator);
 						return LZMA_OPTIONS_ERROR;
 					}

 					// .options is allowed to be NULL.
 					// There is no need to add any
 					// options to the string.
 					break;
 				}

 				// Options structure is available. Add
 				// the filter options to the string.
 				const size_t optmap_count
 					= (flags & LZMA_STR_ENCODER)
 					? filter_name_map[j].strfy_encoder
 					: filter_name_map[j].strfy_decoder;
 				strfy_filter(&dest, opt_delim,
 						filter_name_map[j].optmap,
 						optmap_count,
 						filters[i].options);
 				break;
 			}

 			++j;
 		}
 	}

 	return str_finish(output_str, &dest, allocator);
 }


 extern LZMA_API(lzma_ret)
 lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
 		const lzma_allocator *allocator)
 {
 	// On error *output_str is always set to NULL.
 	// Do it as the very first step.
 	if (output_str == NULL)
 		return LZMA_PROG_ERROR;

 	*output_str = NULL;

 	// Validate the flags.
 	const uint32_t supported_flags
 			= LZMA_STR_ALL_FILTERS
 			| LZMA_STR_ENCODER
 			| LZMA_STR_DECODER
 			| LZMA_STR_GETOPT_LONG;

 	if (flags & ~supported_flags)
 		return LZMA_OPTIONS_ERROR;

 	// Allocate memory for the output string.
 	lzma_str dest;
 	return_if_error(str_init(&dest, allocator));

 	// If only listing the filter names then separate them with spaces.
 	// Otherwise use newlines.
 	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
 	const char *filter_delim = show_opts ? "\n" : " ";

 	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
 	bool first_filter_printed = false;

 	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
 		// If we are printing only one filter then skip others.
 		if (filter_id != LZMA_VLI_UNKNOWN
 				&& filter_id != filter_name_map[i].id)
 			continue;

 		// If we are printing only .xz filters then skip the others.
 		if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
 				&& (flags & LZMA_STR_ALL_FILTERS) == 0
 				&& filter_id == LZMA_VLI_UNKNOWN)
 			continue;

 		// Add a new line if this isn't the first filter being
 		// written to the string.
 		if (first_filter_printed)
 			str_append_str(&dest, filter_delim);

 		first_filter_printed = true;

 		if (flags & LZMA_STR_GETOPT_LONG)
 			str_append_str(&dest, "--");

 		str_append_str(&dest, filter_name_map[i].name);

 		// If only the filter names were wanted then continue
 		// to the next filter.
 		if (!show_opts)
 			continue;

 		const option_map *optmap = filter_name_map[i].optmap;
 		const char *d = opt_delim;

 		const size_t end = (flags & LZMA_STR_ENCODER)
 				? filter_name_map[i].strfy_encoder
 				: filter_name_map[i].strfy_decoder;

 		for (size_t j = 0; j < end; ++j) {
 			// The first option is delimited from the filter
 			// name using "=" or ":" and the rest of the options
 			// are separated with ",".
 			str_append_str(&dest, d);
 			d = ",";

 			// optname=<possible_values>
 			str_append_str(&dest, optmap[j].name);
 			str_append_str(&dest, "=<");

 			if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
 				// LZMA1/2 preset has its custom help string.
 				str_append_str(&dest, LZMA12_PRESET_STR);
 			} else if (optmap[j].flags
 					& OPTMAP_USE_NAME_VALUE_MAP) {
 				// Separate the possible option values by "|".
 				const name_value_map *m = optmap[j].u.map;
 				for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
 					if (k > 0)
 						str_append_str(&dest, "|");

 					str_append_str(&dest, m[k].name);
 				}
 			} else {
 				// Integer range is shown as min-max.
 				const bool use_byte_suffix = optmap[j].flags
 						& OPTMAP_USE_BYTE_SUFFIX;
 				str_append_u32(&dest, optmap[j].u.range.min,
 						use_byte_suffix);
 				str_append_str(&dest, "-");
 				str_append_u32(&dest, optmap[j].u.range.max,
 						use_byte_suffix);
 			}

 			str_append_str(&dest, ">");
 		}
 	}

 	// If no filters were added to the string then it must be because
 	// the caller provided an unsupported Filter ID.
 	if (!first_filter_printed) {
 		str_free(&dest, allocator);
 		return LZMA_OPTIONS_ERROR;
 	}

 	return str_finish(output_str, &dest, allocator);
 }