aboutsummaryrefslogblamecommitdiff
path: root/src/liblzma/common/string_conversion.c
blob: d2c1e80936b24477b99fd6af17d3347eed8605b2 (plain) (tree)






































































































































































































                                                                               
                                                                      






































                                                                             













                                                   


















                                                                            
      












































                                                                              
                                                                          




















































































































































                                                                               
                                                                      
                                                                        
                                                                             




































































                                                                             
                                                                              































































































































                                                                              
                                                                           















































































































































































                                                                               
                                                                        





























































































































































































































































































                                                                               






                                                                             























































































































































































                                                                               
///////////////////////////////////////////////////////////////////////////////
//
/// \file       string_conversion.c
/// \brief      Conversion of strings to filter chain and vice versa
//
//  Author:     Lasse Collin
//
//  This file has been put into the public domain.
//  You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////

#include "filter_common.h"


/////////////////////
// String building //
/////////////////////

/// How much memory to allocate for strings. For now, no realloc is used
/// so this needs to be big enough even though there of course is
/// an overflow check still.
///
/// FIXME? Using a fixed size is wasteful if the application doesn't free
/// the string fairly quickly but this can be improved later if needed.
#define STR_ALLOC_SIZE 800


typedef struct {
	char *buf;
	size_t pos;
} lzma_str;


static lzma_ret
str_init(lzma_str *str, const lzma_allocator *allocator)
{
	str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
	if (str->buf == NULL)
		return LZMA_MEM_ERROR;

	str->pos = 0;
	return LZMA_OK;
}


static void
str_free(lzma_str *str, const lzma_allocator *allocator)
{
	lzma_free(str->buf, allocator);
	return;
}


static bool
str_is_full(const lzma_str *str)
{
	return str->pos == STR_ALLOC_SIZE - 1;
}


static lzma_ret
str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
{
	if (str_is_full(str)) {
		// The preallocated buffer was too small.
		// This shouldn't happen as STR_ALLOC_SIZE should
		// be adjusted if new filters are added.
		lzma_free(str->buf, allocator);
		*dest = NULL;
		assert(0);
		return LZMA_PROG_ERROR;
	}

	str->buf[str->pos] = '\0';
	*dest = str->buf;
	return LZMA_OK;
}


static void
str_append_str(lzma_str *str, const char *s)
{
	const size_t len = strlen(s);
	const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
	const size_t copy_size = my_min(len, limit);

	memcpy(str->buf + str->pos, s, copy_size);
	str->pos += copy_size;
	return;
}


static void
str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
{
	if (v == 0) {
		str_append_str(str, "0");
	} else {
		// NOTE: Don't use plain "B" because xz and the parser in this
		// file don't support it and at glance it may look like 8
		// (there cannot be a space before the suffix).
		static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };

		size_t suf = 0;
		if (use_byte_suffix) {
			while ((v & 1023) == 0
					&& suf < ARRAY_SIZE(suffixes) - 1) {
				v >>= 10;
				++suf;
			}
		}

		// UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
		// that initializing to "" initializes all elements to
		// zero so '\0'-termination gets handled by this.
		char buf[16] = "";
		size_t pos = sizeof(buf) - 1;

		do {
			buf[--pos] = '0' + (v % 10);
			v /= 10;
		} while (v != 0);

		str_append_str(str, buf + pos);
		str_append_str(str, suffixes[suf]);
	}

	return;
}


//////////////////////////////////////////////
// Parsing and stringification declarations //
//////////////////////////////////////////////

/// Maximum length for filter and option names.
/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
#define NAME_LEN_MAX 11


/// For option_map.flags: Use .u.map to do convert the input value
/// to an integer. Without this flag, .u.range.{min,max} are used
/// as the allowed range for the integer.
#define OPTMAP_USE_NAME_VALUE_MAP 0x01

/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
/// the stringified output if the value is an exact multiple of these.
/// This is used e.g. for LZMA1/2 dictionary size.
#define OPTMAP_USE_BYTE_SUFFIX 0x02

/// For option_map.flags: If the integer value is zero then this option
/// won't be included in the stringified output. It's used e.g. for
/// BCJ filter start offset which usually is zero.
#define OPTMAP_NO_STRFY_ZERO 0x04

/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
/// it doesn't need to be specified in the initializers as it is
/// the implicit value.
enum {
	OPTMAP_TYPE_UINT32,
	OPTMAP_TYPE_LZMA_MODE,
	OPTMAP_TYPE_LZMA_MATCH_FINDER,
	OPTMAP_TYPE_LZMA_PRESET,
};


/// This is for mapping string values in options to integers.
/// The last element of an array must have "" as the name.
/// It's used e.g. for match finder names in LZMA1/2.
typedef struct {
	const char name[NAME_LEN_MAX + 1];
	const uint32_t value;
} name_value_map;


/// Each filter that has options needs an array of option_map structures.
/// The array doesn't need to be terminated as the functions take the
/// length of the array as an argument.
///
/// When converting a string to filter options structure, option values
/// will be handled in a few different ways:
///
/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
///     is handled specially.
///
/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
///     converted to an integer using the name_value_map pointed by .u.map.
///     The last element in .u.map must have .name = "" as the terminator.
///
/// (3) Otherwise the string is treated as a non-negative unsigned decimal
///     integer which must be in the range set in .u.range. If .flags has
///     OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
///
/// The integer value from (2) or (3) is then stored to filter_options
/// at the offset specified in .offset using the type specified in .type
/// (default is uint32_t).
///
/// Stringifying a filter is done by processing a given number of options
/// in order from the beginning of an option_map array. The integer is
/// read from filter_options at .offset using the type from .type.
///
/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
/// option is skipped.
///
/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
/// to convert the option to a string. If the map doesn't contain a string
/// for the integer value then "UNKNOWN" is used.
///
/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
/// MiB, or GiB suffix is used if the value is an exact multiple of these.
/// Plain "B" suffix is never used.
typedef struct {
	char name[NAME_LEN_MAX + 1];
	uint8_t type;
	uint8_t flags;
	uint16_t offset;

	union {
		struct {
			uint32_t min;
			uint32_t max;
		} range;

		const name_value_map *map;
	} u;
} option_map;


static const char *parse_options(const char **const str, const char *str_end,
		void *filter_options,
		const option_map *const optmap, const size_t optmap_size);


/////////
// BCJ //
/////////

#if defined(HAVE_ENCODER_X86) \
		|| defined(HAVE_DECODER_X86) \
		|| defined(HAVE_ENCODER_ARM) \
		|| defined(HAVE_DECODER_ARM) \
		|| defined(HAVE_ENCODER_ARMTHUMB) \
		|| defined(HAVE_DECODER_ARMTHUMB) \
		|| defined(HAVE_ENCODER_ARM64) \
		|| defined(HAVE_DECODER_ARM64) \
		|| defined(HAVE_ENCODER_POWERPC) \
		|| defined(HAVE_DECODER_POWERPC) \
		|| defined(HAVE_ENCODER_IA64) \
		|| defined(HAVE_DECODER_IA64) \
		|| defined(HAVE_ENCODER_SPARC) \
		|| defined(HAVE_DECODER_SPARC)
static const option_map bcj_optmap[] = {
	{
		.name = "start",
		.flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
		.offset = offsetof(lzma_options_bcj, start_offset),
		.u.range.min = 0,
		.u.range.max = UINT32_MAX,
	}
};


static const char *
parse_bcj(const char **const str, const char *str_end, void *filter_options)
{
	// filter_options was zeroed on allocation and that is enough
	// for the default value.
	return parse_options(str, str_end, filter_options,
			bcj_optmap, ARRAY_SIZE(bcj_optmap));
}
#endif


///////////
// Delta //
///////////

#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
static const option_map delta_optmap[] = {
	{
		.name = "dist",
		.offset = offsetof(lzma_options_delta, dist),
		.u.range.min = LZMA_DELTA_DIST_MIN,
		.u.range.max = LZMA_DELTA_DIST_MAX,
	}
};


static const char *
parse_delta(const char **const str, const char *str_end, void *filter_options)
{
	lzma_options_delta *opts = filter_options;
	opts->type = LZMA_DELTA_TYPE_BYTE;
	opts->dist = LZMA_DELTA_DIST_MIN;

	return parse_options(str, str_end, filter_options,
			delta_optmap, ARRAY_SIZE(delta_optmap));
}
#endif


///////////////////
// LZMA1 & LZMA2 //
///////////////////

/// Help string for presets
#define LZMA12_PRESET_STR "0-9[e]"


static const char *
parse_lzma12_preset(const char **const str, const char *str_end,
		uint32_t *preset)
{
	assert(*str < str_end);
	*preset = (uint32_t)(**str - '0');

	// NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
	while (++*str < str_end) {
		switch (**str) {
		case 'e':
			*preset |= LZMA_PRESET_EXTREME;
			break;

		default:
			return "Unsupported preset flag";
		}
	}

	return NULL;
}


static const char *
set_lzma12_preset(const char **const str, const char *str_end,
		void *filter_options)
{
	uint32_t preset;
	const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
	if (errmsg != NULL)
		return errmsg;

	lzma_options_lzma *opts = filter_options;
	if (lzma_lzma_preset(opts, preset))
		return "Unsupported preset";

	return NULL;
}


static const name_value_map lzma12_mode_map[] = {
	{ "fast",   LZMA_MODE_FAST },
	{ "normal", LZMA_MODE_NORMAL },
	{ "",       0 }
};


static const name_value_map lzma12_mf_map[] = {
	{ "hc3", LZMA_MF_HC3 },
	{ "hc4", LZMA_MF_HC4 },
	{ "bt2", LZMA_MF_BT2 },
	{ "bt3", LZMA_MF_BT3 },
	{ "bt4", LZMA_MF_BT4 },
	{ "",    0 }
};


static const option_map lzma12_optmap[] = {
	{
		.name = "preset",
		.type = OPTMAP_TYPE_LZMA_PRESET,
	}, {
		.name = "dict",
		.flags = OPTMAP_USE_BYTE_SUFFIX,
		.offset = offsetof(lzma_options_lzma, dict_size),
		.u.range.min = LZMA_DICT_SIZE_MIN,
		// FIXME? The max is really max for encoding but decoding
		// would allow 4 GiB - 1 B.
		.u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
	}, {
		.name = "lc",
		.offset = offsetof(lzma_options_lzma, lc),
		.u.range.min = LZMA_LCLP_MIN,
		.u.range.max = LZMA_LCLP_MAX,
	}, {
		.name = "lp",
		.offset = offsetof(lzma_options_lzma, lp),
		.u.range.min = LZMA_LCLP_MIN,
		.u.range.max = LZMA_LCLP_MAX,
	}, {
		.name = "pb",
		.offset = offsetof(lzma_options_lzma, pb),
		.u.range.min = LZMA_PB_MIN,
		.u.range.max = LZMA_PB_MAX,
	}, {
		.name = "mode",
		.type = OPTMAP_TYPE_LZMA_MODE,
		.flags = OPTMAP_USE_NAME_VALUE_MAP,
		.offset = offsetof(lzma_options_lzma, mode),
		.u.map = lzma12_mode_map,
	}, {
		.name = "nice",
		.offset = offsetof(lzma_options_lzma, nice_len),
		.u.range.min = 2,
		.u.range.max = 273,
	}, {
		.name = "mf",
		.type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
		.flags = OPTMAP_USE_NAME_VALUE_MAP,
		.offset = offsetof(lzma_options_lzma, mf),
		.u.map = lzma12_mf_map,
	}, {
		.name = "depth",
		.offset = offsetof(lzma_options_lzma, depth),
		.u.range.min = 0,
		.u.range.max = UINT32_MAX,
	}
};


static const char *
parse_lzma12(const char **const str, const char *str_end, void *filter_options)
{
	lzma_options_lzma *opts = filter_options;

	// It cannot fail.
	const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
	assert(!preset_ret);
	(void)preset_ret;

	const char *errmsg = parse_options(str, str_end, filter_options,
			lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
	if (errmsg != NULL)
		return errmsg;

	if (opts->lc + opts->lp > LZMA_LCLP_MAX)
		return "The sum of lc and lp must not exceed 4";

	return NULL;
}


/////////////////////////////////////////
// Generic parsing and stringification //
/////////////////////////////////////////

static const struct {
	/// Name of the filter
	char name[NAME_LEN_MAX + 1];

	/// For lzma_str_to_filters:
	/// Size of the filter-specific options structure.
	uint32_t opts_size;

	/// Filter ID
	lzma_vli id;

	/// For lzma_str_to_filters:
	/// Function to parse the filter-specific options. The filter_options
	/// will already have been allocated using lzma_alloc_zero().
	const char *(*parse)(const char **str, const char *str_end,
			void *filter_options);

	/// For lzma_str_from_filters:
	/// If the flag LZMA_STR_ENCODER is used then the first
	/// strfy_encoder elements of optmap are stringified.
	/// With LZMA_STR_DECODER strfy_decoder is used.
	/// Currently encoders use all options that decoders do but if
	/// that changes then this needs to be changed too, for example,
	/// add a new OPTMAP flag to skip printing some decoder-only options.
	const option_map *optmap;
	uint8_t strfy_encoder;
	uint8_t strfy_decoder;

	/// For lzma_str_from_filters:
	/// If true, lzma_filter.options is allowed to be NULL. In that case,
	/// only the filter name is printed without any options.
	bool allow_null;

} filter_name_map[] = {
#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
	{ "lzma1",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA1,
	  &parse_lzma12,  lzma12_optmap, 9, 5, false },
#endif

#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
	{ "lzma2",        sizeof(lzma_options_lzma),  LZMA_FILTER_LZMA2,
	  &parse_lzma12,  lzma12_optmap, 9, 2, false },
#endif

#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
	{ "x86",          sizeof(lzma_options_bcj),   LZMA_FILTER_X86,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
	{ "arm",          sizeof(lzma_options_bcj),   LZMA_FILTER_ARM,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
	{ "armthumb",     sizeof(lzma_options_bcj),   LZMA_FILTER_ARMTHUMB,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
	{ "arm64",        sizeof(lzma_options_bcj),   LZMA_FILTER_ARM64,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
	{ "powerpc",      sizeof(lzma_options_bcj),   LZMA_FILTER_POWERPC,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
	{ "ia64",         sizeof(lzma_options_bcj),   LZMA_FILTER_IA64,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
	{ "sparc",        sizeof(lzma_options_bcj),   LZMA_FILTER_SPARC,
	  &parse_bcj,     bcj_optmap, 1, 1, true },
#endif

#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
	{ "delta",        sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
	  &parse_delta,   delta_optmap, 1, 1, false },
#endif
};


/// Decodes options from a string for one filter (name1=value1,name2=value2).
/// Caller must have allocated memory for filter_options already and set
/// the initial default values. This is called from the filter-specific
/// parse_* functions.
///
/// The input string starts at *str and the address in str_end is the first
/// char that is not part of the string anymore. So no '\0' terminator is
/// used. *str is advanced every time something has been decoded successfully.
static const char *
parse_options(const char **const str, const char *str_end,
		void *filter_options,
		const option_map *const optmap, const size_t optmap_size)
{
	while (*str < str_end && **str != '\0') {
		// Each option is of the form name=value.
		// Commas (',') separate options. Extra commas are ignored.
		// Ignoring extra commas makes it simpler if an optional
		// option stored in a shell variable which can be empty.
		if (**str == ',') {
			++*str;
			continue;
		}

		// Find where the next name=value ends.
		const size_t str_len = (size_t)(str_end - *str);
		const char *name_eq_value_end = memchr(*str, ',', str_len);
		if (name_eq_value_end == NULL)
			name_eq_value_end = str_end;

		const char *equals_sign = memchr(*str, '=',
				(size_t)(name_eq_value_end - *str));

		// Fail if the '=' wasn't found or the option name is missing
		// (the first char is '=').
		if (equals_sign == NULL || **str == '=')
			return "Options must be 'name=value' pairs separated "
					"with commas";

		// Reject a too long option name so that the memcmp()
		// in the loop below won't read past the end of the
		// string in optmap[i].name.
		const size_t name_len = (size_t)(equals_sign - *str);
		if (name_len > NAME_LEN_MAX)
			return "Unknown option name";

		// Find the option name from optmap[].
		size_t i = 0;
		while (true) {
			if (i == optmap_size)
				return "Unknown option name";

			if (memcmp(*str, optmap[i].name, name_len) == 0
					&& optmap[i].name[name_len] == '\0')
				break;

			++i;
		}

		// The input string is good at least until the start of
		// the option value.
		*str = equals_sign + 1;

		// The code assumes that the option value isn't an empty
		// string so check it here.
		const size_t value_len = (size_t)(name_eq_value_end - *str);
		if (value_len == 0)
			return "Option value cannot be empty";

		// LZMA1/2 preset has its own parsing function.
		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
			const char *errmsg = set_lzma12_preset(str,
					name_eq_value_end, filter_options);
			if (errmsg != NULL)
				return errmsg;

			continue;
		}

		// It's an integer value.
		uint32_t v;
		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
			// The integer is picked from a string-to-integer map.
			//
			// Reject a too long value string so that the memcmp()
			// in the loop below won't read past the end of the
			// string in optmap[i].u.map[j].name.
			if (value_len > NAME_LEN_MAX)
				return "Invalid option value";

			const name_value_map *map = optmap[i].u.map;
			size_t j = 0;
			while (true) {
				// The array is terminated with an empty name.
				if (map[j].name[0] == '\0')
					return "Invalid option value";

				if (memcmp(*str, map[j].name, value_len) == 0
						&& map[j].name[value_len]
							== '\0') {
					v = map[j].value;
					break;
				}

				++j;
			}
		} else if (**str < '0' || **str > '9') {
			// Note that "max" isn't supported while it is
			// supported in xz. It's not useful here.
			return "Value is not a non-negative decimal integer";
		} else {
			// strtoul() has locale-specific behavior so it cannot
			// be relied on to get reproducible results since we
			// cannot change the locate in a thread-safe library.
			// It also needs '\0'-termination.
			//
			// Use a temporary pointer so that *str will point
			// to the beginning of the value string in case
			// an error occurs.
			const char *p = *str;
			v = 0;
			do {
				if (v > UINT32_MAX / 10)
					return "Value out of range";

				v *= 10;

				const uint32_t add = (uint32_t)(*p - '0');
				if (UINT32_MAX - add < v)
					return "Value out of range";

				v += add;
				++p;
			} while (p < name_eq_value_end
					&& *p >= '0' && *p <= '9');

			if (p < name_eq_value_end) {
				// Remember this position so that it can be
				// used for error messages that are
				// specifically about the suffix. (Out of
				// range values are about the whole value
				// and those error messages point to the
				// beginning of the number part,
				// not to the suffix.)
				const char *multiplier_start = p;

				// If multiplier suffix shouldn't be used
				// then don't allow them even if the value
				// would stay within limits. This is a somewhat
				// unnecessary check but it rejects silly
				// things like lzma2:pb=0MiB which xz allows.
				if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
						== 0) {
					*str = multiplier_start;
					return "This option does not support "
						"any integer suffixes";
				}

				uint32_t shift;

				switch (*p) {
				case 'k':
				case 'K':
					shift = 10;
					break;

				case 'm':
				case 'M':
					shift = 20;
					break;

				case 'g':
				case 'G':
					shift = 30;
					break;

				default:
					*str = multiplier_start;
					return "Invalid multiplier suffix "
							"(KiB, MiB, or GiB)";
				}

				++p;

				// Allow "M", "Mi", "MB", "MiB" and the same
				// for the other five characters from the
				// switch-statement above. All are handled
				// as base-2 (perhaps a mistake, perhaps not).
				// Note that 'i' and 'B' are case sensitive.
				if (p < name_eq_value_end && *p == 'i')
					++p;

				if (p < name_eq_value_end && *p == 'B')
					++p;

				// Now we must have no chars remaining.
				if (p < name_eq_value_end) {
					*str = multiplier_start;
					return "Invalid multiplier suffix "
							"(KiB, MiB, or GiB)";
				}

				if (v > (UINT32_MAX >> shift))
					return "Value out of range";

				v <<= shift;
			}

			if (v < optmap[i].u.range.min
					|| v > optmap[i].u.range.max)
				return "Value out of range";
		}

		// Set the value in filter_options. Enums are handled
		// specially since the underlying type isn't the same
		// as uint32_t on all systems.
		void *ptr = (char *)filter_options + optmap[i].offset;
		switch (optmap[i].type) {
		case OPTMAP_TYPE_LZMA_MODE:
			*(lzma_mode *)ptr = (lzma_mode)v;
			break;

		case OPTMAP_TYPE_LZMA_MATCH_FINDER:
			*(lzma_match_finder *)ptr = (lzma_match_finder)v;
			break;

		default:
			*(uint32_t *)ptr = v;
			break;
		}

		// This option has been successfully handled.
		*str = name_eq_value_end;
	}

	// No errors.
	return NULL;
}


/// Finds the name of the filter at the beginning of the string and
/// calls filter_name_map[i].parse() to decode the filter-specific options.
/// The caller must have set str_end so that exactly one filter and its
/// options are present without any trailing characters.
static const char *
parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
		const lzma_allocator *allocator, bool only_xz)
{
	// Search for a colon or equals sign that would separate the filter
	// name from filter options. If neither is found, then the input
	// string only contains a filter name and there are no options.
	//
	// First assume that a colon or equals sign won't be found:
	const char *name_end = str_end;
	const char *opts_start = str_end;

	for (const char *p = *str; p < str_end; ++p) {
		if (*p == ':' || *p == '=') {
			name_end = p;

			// Filter options (name1=value1,name2=value2,...)
			// begin after the colon or equals sign.
			opts_start = p + 1;
			break;
		}
	}

	// Reject a too long filter name so that the memcmp()
	// in the loop below won't read past the end of the
	// string in filter_name_map[i].name.
	const size_t name_len = (size_t)(name_end - *str);
	if (name_len > NAME_LEN_MAX)
		return "Unknown filter name";

	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
		if (memcmp(*str, filter_name_map[i].name, name_len) == 0
				&& filter_name_map[i].name[name_len] == '\0') {
			if (only_xz && filter_name_map[i].id
					>= LZMA_FILTER_RESERVED_START)
				return "This filter cannot be used in "
						"the .xz format";

			// Allocate the filter-specific options and
			// initialize the memory with zeros.
			void *options = lzma_alloc_zero(
					filter_name_map[i].opts_size,
					allocator);
			if (options == NULL)
				return "Memory allocation failed";

			// Filter name was found so the input string is good
			// at least this far.
			*str = opts_start;

			const char *errmsg = filter_name_map[i].parse(
					str, str_end, options);
			if (errmsg != NULL) {
				lzma_free(options, allocator);
				return errmsg;
			}

			// *filter is modified only when parsing is successful.
			filter->id = filter_name_map[i].id;
			filter->options = options;
			return NULL;
		}
	}

	return "Unknown filter name";
}


/// Converts the string to a filter chain (array of lzma_filter structures).
///
/// *str is advanced every time something has been decoded successfully.
/// This way the caller knows where in the string a possible error occurred.
static const char *
str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
		const lzma_allocator *allocator)
{
	const char *errmsg;

	// Skip leading spaces.
	while (**str == ' ')
		++*str;

	if (**str == '\0')
		return "Empty string is not allowed, "
				"try \"6\" if a default value is needed";

	// Detect the type of the string.
	//
	// A string beginning with a digit or a string beginning with
	// one dash and a digit are treated as presets. Trailing spaces
	// will be ignored too (leading spaces were already ignored above).
	//
	// For example, "6", "7  ", "-9e", or "  -3  " are treated as presets.
	// Strings like "-" or "- " aren't preset.
#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
	if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
		if (**str == '-')
			++*str;

		// Ignore trailing spaces.
		const size_t str_len = strlen(*str);
		const char *str_end = memchr(*str, ' ', str_len);
		if (str_end != NULL) {
			// There is at least one trailing space. Check that
			// there are no chars other than spaces.
			for (size_t i = 1; str_end[i] != '\0'; ++i)
				if (str_end[i] != ' ')
					return "Unsupported preset";
		} else {
			// There are no trailing spaces. Use the whole string.
			str_end = *str + str_len;
		}

		uint32_t preset;
		errmsg = parse_lzma12_preset(str, str_end, &preset);
		if (errmsg != NULL)
			return errmsg;

		lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
		if (opts == NULL)
			return "Memory allocation failed";

		if (lzma_lzma_preset(opts, preset)) {
			lzma_free(opts, allocator);
			return "Unsupported preset";
		}

		filters[0].id = LZMA_FILTER_LZMA2;
		filters[0].options = opts;
		filters[1].id = LZMA_VLI_UNKNOWN;
		filters[1].options = NULL;

		return NULL;
	}

	// Not a preset so it must be a filter chain.
	//
	// If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
	// can be used in .xz.
	const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;

	// Use a temporary array so that we don't modify the caller-supplied
	// one until we know that no errors occurred.
	lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];

	size_t i = 0;
	do {
		if (i == LZMA_FILTERS_MAX) {
			errmsg = "The maximum number of filters is four";
			goto error;
		}

		// Skip "--" if present.
		if ((*str)[0] == '-' && (*str)[1] == '-')
			*str += 2;

		// Locate the end of "filter:name1=value1,name2=value2",
		// stopping at the first "--" or a single space.
		const char *filter_end = *str;
		while (filter_end[0] != '\0') {
			if ((filter_end[0] == '-' && filter_end[1] == '-')
					|| filter_end[0] == ' ')
				break;

			++filter_end;
		}

		// Inputs that have "--" at the end or "-- " in the middle
		// will result in an empty filter name.
		if (filter_end == *str) {
			errmsg = "Filter name is missing";
			goto error;
		}

		errmsg = parse_filter(str, filter_end, &temp_filters[i],
				allocator, only_xz);
		if (errmsg != NULL)
			goto error;

		// Skip trailing spaces.
		while (**str == ' ')
			++*str;

		++i;
	} while (**str != '\0');

	// Seems to be good, terminate the array so that
	// basic validation can be done.
	temp_filters[i].id = LZMA_VLI_UNKNOWN;
	temp_filters[i].options = NULL;

	// Do basic validation if the application didn't prohibit it.
	if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
		size_t dummy;
		const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
		assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
		if (ret != LZMA_OK) {
			errmsg = "Invalid filter chain "
					"('lzma2' missing at the end?)";
			goto error;
		}
	}

	// All good. Copy the filters to the application supplied array.
	memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
	return NULL;

error:
	// Free the filter options that were successfully decoded.
	while (i-- > 0)
		lzma_free(temp_filters[i].options, allocator);

	return errmsg;
}


extern LZMA_API(const char *)
lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
		uint32_t flags, const lzma_allocator *allocator)
{
	if (str == NULL || filters == NULL)
		return "Unexpected NULL pointer argument(s) "
				"to lzma_str_to_filters()";

	// Validate the flags.
	const uint32_t supported_flags
			= LZMA_STR_ALL_FILTERS
			| LZMA_STR_NO_VALIDATION;

	if (flags & ~supported_flags)
		return "Unsupported flags to lzma_str_to_filters()";

	const char *used = str;
	const char *errmsg = str_to_filters(&used, filters, flags, allocator);

	if (error_pos != NULL) {
		const size_t n = (size_t)(used - str);
		*error_pos = n > INT_MAX ? INT_MAX : (int)n;
	}

	return errmsg;
}


/// Converts options of one filter to a string.
///
/// The caller must have already put the filter name in the destination
/// string. Since it is possible that no options will be needed, the caller
/// won't have put a delimiter character (':' or '=') in the string yet.
/// We will add it if at least one option will be added to the string.
static void
strfy_filter(lzma_str *dest, const char *delimiter,
		const option_map *optmap, size_t optmap_count,
		const void *filter_options)
{
	for (size_t i = 0; i < optmap_count; ++i) {
		// No attempt is made to reverse LZMA1/2 preset.
		if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
			continue;

		// All options have integer values, some just are mapped
		// to a string with a name_value_map. LZMA1/2 preset
		// isn't reversed back to preset=PRESET form.
		uint32_t v;
		const void *ptr
			= (const char *)filter_options + optmap[i].offset;
		switch (optmap[i].type) {
			case OPTMAP_TYPE_LZMA_MODE:
				v = *(const lzma_mode *)ptr;
				break;

			case OPTMAP_TYPE_LZMA_MATCH_FINDER:
				v = *(const lzma_match_finder *)ptr;
				break;

			default:
				v = *(const uint32_t *)ptr;
				break;
		}

		// Skip this if this option should be omitted from
		// the string when the value is zero.
		if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
			continue;

		// Before the first option we add whatever delimiter
		// the caller gave us. For later options a comma is used.
		str_append_str(dest, delimiter);
		delimiter = ",";

		// Add the option name and equals sign.
		str_append_str(dest, optmap[i].name);
		str_append_str(dest, "=");

		if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
			const name_value_map *map = optmap[i].u.map;
			size_t j = 0;
			while (true) {
				if (map[j].name[0] == '\0') {
					str_append_str(dest, "UNKNOWN");
					break;
				}

				if (map[j].value == v) {
					str_append_str(dest, map[j].name);
					break;
				}

				++j;
			}
		} else {
			str_append_u32(dest, v,
				optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
		}
	}

	return;
}


extern LZMA_API(lzma_ret)
lzma_str_from_filters(char **output_str, const lzma_filter *filters,
		uint32_t flags, const lzma_allocator *allocator)
{
	// On error *output_str is always set to NULL.
	// Do it as the very first step.
	if (output_str == NULL)
		return LZMA_PROG_ERROR;

	*output_str = NULL;

	if (filters == NULL)
		return LZMA_PROG_ERROR;

	// Validate the flags.
	const uint32_t supported_flags
			= LZMA_STR_ENCODER
			| LZMA_STR_DECODER
			| LZMA_STR_GETOPT_LONG
			| LZMA_STR_NO_SPACES;

	if (flags & ~supported_flags)
		return LZMA_OPTIONS_ERROR;

	// There must be at least one filter.
	if (filters[0].id == LZMA_VLI_UNKNOWN)
		return LZMA_OPTIONS_ERROR;

	// Allocate memory for the output string.
	lzma_str dest;
	return_if_error(str_init(&dest, allocator));

	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));

	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";

	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
		// If we reach LZMA_FILTERS_MAX, then the filters array
		// is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
		if (i == LZMA_FILTERS_MAX) {
			str_free(&dest, allocator);
			return LZMA_OPTIONS_ERROR;
		}

		// Don't add a space between filters if the caller
		// doesn't want them.
		if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
			str_append_str(&dest, " ");

		// Use dashes for xz getopt_long() compatible syntax but also
		// use dashes to separate filters when spaces weren't wanted.
		if ((flags & LZMA_STR_GETOPT_LONG)
				|| (i > 0 && (flags & LZMA_STR_NO_SPACES)))
			str_append_str(&dest, "--");

		size_t j = 0;
		while (true) {
			if (j == ARRAY_SIZE(filter_name_map)) {
				// Filter ID in filters[i].id isn't supported.
				str_free(&dest, allocator);
				return LZMA_OPTIONS_ERROR;
			}

			if (filter_name_map[j].id == filters[i].id) {
				// Add the filter name.
				str_append_str(&dest, filter_name_map[j].name);

				// If only the filter names were wanted then
				// skip to the next filter. In this case
				// .options is ignored and may be NULL even
				// when the filter doesn't allow NULL options.
				if (!show_opts)
					break;

				if (filters[i].options == NULL) {
					if (!filter_name_map[j].allow_null) {
						// Filter-specific options
						// are missing but with
						// this filter the options
						// structure is mandatory.
						str_free(&dest, allocator);
						return LZMA_OPTIONS_ERROR;
					}

					// .options is allowed to be NULL.
					// There is no need to add any
					// options to the string.
					break;
				}

				// Options structure is available. Add
				// the filter options to the string.
				const size_t optmap_count
					= (flags & LZMA_STR_ENCODER)
					? filter_name_map[j].strfy_encoder
					: filter_name_map[j].strfy_decoder;
				strfy_filter(&dest, opt_delim,
						filter_name_map[j].optmap,
						optmap_count,
						filters[i].options);
				break;
			}

			++j;
		}
	}

	return str_finish(output_str, &dest, allocator);
}


extern LZMA_API(lzma_ret)
lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
		const lzma_allocator *allocator)
{
	// On error *output_str is always set to NULL.
	// Do it as the very first step.
	if (output_str == NULL)
		return LZMA_PROG_ERROR;

	*output_str = NULL;

	// Validate the flags.
	const uint32_t supported_flags
			= LZMA_STR_ALL_FILTERS
			| LZMA_STR_ENCODER
			| LZMA_STR_DECODER
			| LZMA_STR_GETOPT_LONG;

	if (flags & ~supported_flags)
		return LZMA_OPTIONS_ERROR;

	// Allocate memory for the output string.
	lzma_str dest;
	return_if_error(str_init(&dest, allocator));

	// If only listing the filter names then separate them with spaces.
	// Otherwise use newlines.
	const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
	const char *filter_delim = show_opts ? "\n" : " ";

	const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
	bool first_filter_printed = false;

	for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
		// If we are printing only one filter then skip others.
		if (filter_id != LZMA_VLI_UNKNOWN
				&& filter_id != filter_name_map[i].id)
			continue;

		// If we are printing only .xz filters then skip the others.
		if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
				&& (flags & LZMA_STR_ALL_FILTERS) == 0
				&& filter_id == LZMA_VLI_UNKNOWN)
			continue;

		// Add a new line if this isn't the first filter being
		// written to the string.
		if (first_filter_printed)
			str_append_str(&dest, filter_delim);

		first_filter_printed = true;

		if (flags & LZMA_STR_GETOPT_LONG)
			str_append_str(&dest, "--");

		str_append_str(&dest, filter_name_map[i].name);

		// If only the filter names were wanted then continue
		// to the next filter.
		if (!show_opts)
			continue;

		const option_map *optmap = filter_name_map[i].optmap;
		const char *d = opt_delim;

		const size_t end = (flags & LZMA_STR_ENCODER)
				? filter_name_map[i].strfy_encoder
				: filter_name_map[i].strfy_decoder;

		for (size_t j = 0; j < end; ++j) {
			// The first option is delimited from the filter
			// name using "=" or ":" and the rest of the options
			// are separated with ",".
			str_append_str(&dest, d);
			d = ",";

			// optname=<possible_values>
			str_append_str(&dest, optmap[j].name);
			str_append_str(&dest, "=<");

			if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
				// LZMA1/2 preset has its custom help string.
				str_append_str(&dest, LZMA12_PRESET_STR);
			} else if (optmap[j].flags
					& OPTMAP_USE_NAME_VALUE_MAP) {
				// Separate the possible option values by "|".
				const name_value_map *m = optmap[j].u.map;
				for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
					if (k > 0)
						str_append_str(&dest, "|");

					str_append_str(&dest, m[k].name);
				}
			} else {
				// Integer range is shown as min-max.
				const bool use_byte_suffix = optmap[j].flags
						& OPTMAP_USE_BYTE_SUFFIX;
				str_append_u32(&dest, optmap[j].u.range.min,
						use_byte_suffix);
				str_append_str(&dest, "-");
				str_append_u32(&dest, optmap[j].u.range.max,
						use_byte_suffix);
			}

			str_append_str(&dest, ">");
		}
	}

	// If no filters were added to the string then it must be because
	// the caller provided an unsupported Filter ID.
	if (!first_filter_printed) {
		str_free(&dest, allocator);
		return LZMA_OPTIONS_ERROR;
	}

	return str_finish(output_str, &dest, allocator);
}