aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/liblzma/common/Makefile.inc1
-rw-r--r--src/liblzma/common/string_conversion.c1302
2 files changed, 1303 insertions, 0 deletions
diff --git a/src/liblzma/common/Makefile.inc b/src/liblzma/common/Makefile.inc
index 44ab32f3..f0276a20 100644
--- a/src/liblzma/common/Makefile.inc
+++ b/src/liblzma/common/Makefile.inc
@@ -19,6 +19,7 @@ liblzma_la_SOURCES += \
common/index.h \
common/stream_flags_common.c \
common/stream_flags_common.h \
+ common/string_conversion.c \
common/vli_size.c
if COND_THREADS
diff --git a/src/liblzma/common/string_conversion.c b/src/liblzma/common/string_conversion.c
new file mode 100644
index 00000000..04a70167
--- /dev/null
+++ b/src/liblzma/common/string_conversion.c
@@ -0,0 +1,1302 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file string_conversion.c
+/// \brief Conversion of strings to filter chain and vice versa
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_common.h"
+
+
+/////////////////////
+// String building //
+/////////////////////
+
+/// How much memory to allocate for strings. For now, no realloc is used
+/// so this needs to be big enough even though there of course is
+/// an overflow check still.
+///
+/// FIXME? Using a fixed size is wasteful if the application doesn't free
+/// the string fairly quickly but this can be improved later if needed.
+#define STR_ALLOC_SIZE 800
+
+
+typedef struct {
+ char *buf;
+ size_t pos;
+} lzma_str;
+
+
+static lzma_ret
+str_init(lzma_str *str, const lzma_allocator *allocator)
+{
+ str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
+ if (str->buf == NULL)
+ return LZMA_MEM_ERROR;
+
+ str->pos = 0;
+ return LZMA_OK;
+}
+
+
+static void
+str_free(lzma_str *str, const lzma_allocator *allocator)
+{
+ lzma_free(str->buf, allocator);
+ return;
+}
+
+
+static bool
+str_is_full(const lzma_str *str)
+{
+ return str->pos == STR_ALLOC_SIZE - 1;
+}
+
+
+static lzma_ret
+str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
+{
+ if (str_is_full(str)) {
+ // The preallocated buffer was too small.
+ // This shouldn't happen as STR_ALLOC_SIZE should
+ // be adjusted if new filters are added.
+ lzma_free(str->buf, allocator);
+ *dest = NULL;
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ str->buf[str->pos] = '\0';
+ *dest = str->buf;
+ return LZMA_OK;
+}
+
+
+static void
+str_append_str(lzma_str *str, const char *s)
+{
+ const size_t len = strlen(s);
+ const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
+ const size_t copy_size = my_min(len, limit);
+
+ memcpy(str->buf + str->pos, s, copy_size);
+ str->pos += copy_size;
+ return;
+}
+
+
+static void
+str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
+{
+ if (v == 0) {
+ str_append_str(str, "0");
+ } else {
+ // NOTE: Don't use plain "B" because xz and the parser in this
+ // file don't support it and at glance it may look like 8
+ // (there cannot be a space before the suffix).
+ static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
+
+ size_t suf = 0;
+ if (use_byte_suffix) {
+ while ((v & 1023) == 0
+ && suf < ARRAY_SIZE(suffixes) - 1) {
+ v >>= 10;
+ ++suf;
+ }
+ }
+
+ // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
+ // that initializing to "" initializes all elements to
+ // zero so '\0'-termination gets handled by this.
+ char buf[16] = "";
+ size_t pos = sizeof(buf) - 1;
+
+ do {
+ buf[--pos] = '0' + (v % 10);
+ v /= 10;
+ } while (v != 0);
+
+ str_append_str(str, buf + pos);
+ str_append_str(str, suffixes[suf]);
+ }
+
+ return;
+}
+
+
+//////////////////////////////////////////////
+// Parsing and stringification declarations //
+//////////////////////////////////////////////
+
+/// Maximum length for filter and option names.
+/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
+#define NAME_LEN_MAX 11
+
+
+/// For option_map.flags: Use .u.map to do convert the input value
+/// to an integer. Without this flag, .u.range.{min,max} are used
+/// as the allowed range for the integer.
+#define OPTMAP_USE_NAME_VALUE_MAP 0x01
+
+/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
+/// the stringified output if the value is an exact multiple of these.
+/// This is used e.g. for LZMA1/2 dictionary size.
+#define OPTMAP_USE_BYTE_SUFFIX 0x02
+
+/// For option_map.flags: If the integer value is zero then this option
+/// won't be included in the stringified output. It's used e.g. for
+/// BCJ filter start offset which usually is zero.
+#define OPTMAP_NO_STRFY_ZERO 0x04
+
+/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
+/// it doesn't need to be specified in the initializers as it is
+/// the implicit value.
+enum {
+ OPTMAP_TYPE_UINT32,
+ OPTMAP_TYPE_LZMA_MODE,
+ OPTMAP_TYPE_LZMA_MATCH_FINDER,
+ OPTMAP_TYPE_LZMA_PRESET,
+};
+
+
+/// This is for mapping string values in options to integers.
+/// The last element of an array must have "" as the name.
+/// It's used e.g. for match finder names in LZMA1/2.
+typedef struct {
+ const char name[NAME_LEN_MAX + 1];
+ const uint32_t value;
+} name_value_map;
+
+
+/// Each filter that has options needs an array of option_map structures.
+/// The array doesn't need to be terminated as the functions take the
+/// length of the array as an argument.
+///
+/// When converting a string to filter options structure, option values
+/// will be handled in a few different ways:
+///
+/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
+/// is handled specially.
+///
+/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
+/// converted to an integer using the name_value_map pointed by .u.map.
+/// The last element in .u.map must have .name = "" as the terminator.
+///
+/// (3) Otherwise the string is treated as a non-negative unsigned decimal
+/// integer which must be in the range set in .u.range. If .flags has
+/// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
+///
+/// The integer value from (2) or (3) is then stored to filter_options
+/// at the offset specified in .offset using the type specified in .type
+/// (default is uint32_t).
+///
+/// Stringifying a filter is done by processing a given number of options
+/// in oder from the beginning of an option_map array. The integer is
+/// read from filter_options at .offset using the type from .type.
+///
+/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
+/// option is skipped.
+///
+/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
+/// to convert the option to a string. If the map doesn't contain a string
+/// for the integer value then "UNKNOWN" is used.
+///
+/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
+/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
+/// MiB, or GiB suffix is used if the value is an exact multiple of these.
+/// Plain "B" suffix is never used.
+typedef struct {
+ char name[NAME_LEN_MAX + 1];
+ uint8_t type;
+ uint8_t flags;
+ uint16_t offset;
+
+ union {
+ struct {
+ uint32_t min;
+ uint32_t max;
+ } range;
+
+ const name_value_map *map;
+ } u;
+} option_map;
+
+
+static const char *parse_options(const char **const str, const char *str_end,
+ void *filter_options,
+ const option_map *const optmap, const size_t optmap_size);
+
+
+/////////
+// BCJ //
+/////////
+
+static const option_map bcj_optmap[] = {
+ {
+ .name = "start",
+ .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
+ .offset = offsetof(lzma_options_bcj, start_offset),
+ .u.range.min = 0,
+ .u.range.max = UINT32_MAX,
+ }
+};
+
+
+static const char *
+parse_bcj(const char **const str, const char *str_end, void *filter_options)
+{
+ // filter_options was zeroed on allocation and that is enough
+ // for the default value.
+ return parse_options(str, str_end, filter_options,
+ bcj_optmap, ARRAY_SIZE(bcj_optmap));
+}
+
+
+///////////
+// Delta //
+///////////
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+static const option_map delta_optmap[] = {
+ {
+ .name = "dist",
+ .offset = offsetof(lzma_options_delta, dist),
+ .u.range.min = LZMA_DELTA_DIST_MIN,
+ .u.range.max = LZMA_DELTA_DIST_MAX,
+ }
+};
+
+
+static const char *
+parse_delta(const char **const str, const char *str_end, void *filter_options)
+{
+ lzma_options_delta *opts = filter_options;
+ opts->type = LZMA_DELTA_TYPE_BYTE;
+ opts->dist = LZMA_DELTA_DIST_MIN;
+
+ return parse_options(str, str_end, filter_options,
+ delta_optmap, ARRAY_SIZE(delta_optmap));
+}
+#endif
+
+
+///////////////////
+// LZMA1 & LZMA2 //
+///////////////////
+
+/// Help string for presets
+#define LZMA12_PRESET_STR "0-9[e]"
+
+
+static const char *
+parse_lzma12_preset(const char **const str, const char *str_end,
+ uint32_t *preset)
+{
+ assert(*str < str_end);
+ *preset = (uint32_t)(**str - '0');
+
+ // NOTE: Remember to update LZMA_PRESET_STR if this is modified!
+ while (++*str < str_end) {
+ switch (**str) {
+ case 'e':
+ *preset |= LZMA_PRESET_EXTREME;
+ break;
+
+ default:
+ return "Unsupported preset flag";
+ }
+ }
+
+ return NULL;
+}
+
+
+static const char *
+set_lzma12_preset(const char **const str, const char *str_end,
+ void *filter_options)
+{
+ uint32_t preset;
+ const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
+ if (errmsg != NULL)
+ return errmsg;
+
+ lzma_options_lzma *opts = filter_options;
+ if (lzma_lzma_preset(opts, preset))
+ return "Unsupported preset";
+
+ return NULL;
+}
+
+
+static const name_value_map lzma12_mode_map[] = {
+ { "fast", LZMA_MODE_FAST },
+ { "normal", LZMA_MODE_NORMAL },
+ { "", 0 }
+};
+
+
+static const name_value_map lzma12_mf_map[] = {
+ { "hc3", LZMA_MF_HC3 },
+ { "hc4", LZMA_MF_HC4 },
+ { "bt2", LZMA_MF_BT2 },
+ { "bt3", LZMA_MF_BT3 },
+ { "bt4", LZMA_MF_BT4 },
+ { "", 0 }
+};
+
+
+static const option_map lzma12_optmap[] = {
+ {
+ .name = "preset",
+ .type = OPTMAP_TYPE_LZMA_PRESET,
+ }, {
+ .name = "dict",
+ .flags = OPTMAP_USE_BYTE_SUFFIX,
+ .offset = offsetof(lzma_options_lzma, dict_size),
+ .u.range.min = LZMA_DICT_SIZE_MIN,
+ // FIXME? The max is really max for encoding but decoding
+ // would allow 4 GiB - 1 B.
+ .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
+ }, {
+ .name = "lc",
+ .offset = offsetof(lzma_options_lzma, lc),
+ .u.range.min = LZMA_LCLP_MIN,
+ .u.range.max = LZMA_LCLP_MAX,
+ }, {
+ .name = "lp",
+ .offset = offsetof(lzma_options_lzma, lp),
+ .u.range.min = LZMA_LCLP_MIN,
+ .u.range.max = LZMA_LCLP_MAX,
+ }, {
+ .name = "pb",
+ .offset = offsetof(lzma_options_lzma, pb),
+ .u.range.min = LZMA_PB_MIN,
+ .u.range.max = LZMA_PB_MAX,
+ }, {
+ .name = "mode",
+ .type = OPTMAP_TYPE_LZMA_MODE,
+ .flags = OPTMAP_USE_NAME_VALUE_MAP,
+ .offset = offsetof(lzma_options_lzma, mode),
+ .u.map = lzma12_mode_map,
+ }, {
+ .name = "nice",
+ .offset = offsetof(lzma_options_lzma, nice_len),
+ .u.range.min = 2,
+ .u.range.max = 273,
+ }, {
+ .name = "mf",
+ .type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
+ .flags = OPTMAP_USE_NAME_VALUE_MAP,
+ .offset = offsetof(lzma_options_lzma, mf),
+ .u.map = lzma12_mf_map,
+ }, {
+ .name = "depth",
+ .offset = offsetof(lzma_options_lzma, depth),
+ .u.range.min = 0,
+ .u.range.max = UINT32_MAX,
+ }
+};
+
+
+static const char *
+parse_lzma12(const char **const str, const char *str_end, void *filter_options)
+{
+ lzma_options_lzma *opts = filter_options;
+
+ // It cannot fail.
+ const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
+ assert(!preset_ret);
+ (void)preset_ret;
+
+ const char *errmsg = parse_options(str, str_end, filter_options,
+ lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
+ if (errmsg != NULL)
+ return errmsg;
+
+ if (opts->lc + opts->lp > LZMA_LCLP_MAX)
+ return "The sum of lc and lp must not exceed 4";
+
+ return NULL;
+}
+
+
+/////////////////////////////////////////
+// Generic parsing and stringification //
+/////////////////////////////////////////
+
+static const struct {
+ /// Name of the filter
+ char name[NAME_LEN_MAX + 1];
+
+ /// For lzma_str_to_filters:
+ /// Size of the filter-specific options structure.
+ uint32_t opts_size;
+
+ /// Filter ID
+ lzma_vli id;
+
+ /// For lzma_str_to_filters:
+ /// Function to parse the filter-specific options. The filter_options
+ /// will already have been allocated using lzma_alloc_zero().
+ const char *(*parse)(const char **str, const char *str_end,
+ void *filter_options);
+
+ /// For lzma_str_from_filters:
+ /// If the flag LZMA_STR_ENCODER is used then the first
+ /// strfy_encoder elements of optmap are stringified.
+ /// With LZMA_STR_DECODER strfy_decoder is used.
+ /// Currently encoders use all flags that decoders do but if
+ /// that changes then this needs to be changed too, for example,
+ /// add a new OPTMAP flag to skip printing some decoder-only flags.
+ const option_map *optmap;
+ uint8_t strfy_encoder;
+ uint8_t strfy_decoder;
+
+ /// For lzma_str_from_filters:
+ /// If true, lzma_filter.options is allowed to be NULL. In that case,
+ /// only the filter name is printed without any options.
+ bool allow_null;
+
+} filter_name_map[] = {
+#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
+ { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1,
+ &parse_lzma12, lzma12_optmap, 9, 5, false },
+#endif
+
+#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
+ { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2,
+ &parse_lzma12, lzma12_optmap, 9, 2, false },
+#endif
+
+#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
+ { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
+ { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
+ { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
+ { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
+ { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
+ { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
+ { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+ { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
+ &parse_delta, delta_optmap, 1, 1, false },
+#endif
+};
+
+
+/// Decodes options from a string for one filter (name1=value1,name2=value2).
+/// Caller must have allocated memory for filter_options already and set
+/// the initial default values. This is called from the filter-specific
+/// parse_* functions.
+///
+/// The input string starts at *str and the address in str_end is the first
+/// char that is not part of the string anymore. So no '\0' terminator is
+/// used. *str is advanced everytime something has been decoded successfully.
+static const char *
+parse_options(const char **const str, const char *str_end,
+ void *filter_options,
+ const option_map *const optmap, const size_t optmap_size)
+{
+ while (*str < str_end && **str != '\0') {
+ // Each option is of the form name=value.
+ // Commas (',') separate options. Extra commas are ignored.
+ // Ignoring extra commas makes it simpler if an optional
+ // option stored in a shell variable which can be empty.
+ if (**str == ',') {
+ ++*str;
+ continue;
+ }
+
+ // Find where the next name=value ends.
+ const size_t str_len = (size_t)(str_end - *str);
+ const char *name_eq_value_end = memchr(*str, ',', str_len);
+ if (name_eq_value_end == NULL)
+ name_eq_value_end = str_end;
+
+ const char *equals_sign = memchr(*str, '=',
+ (size_t)(name_eq_value_end - *str));
+
+ // Fail if the '=' wasn't found or the option name is missing
+ // (the first char is '=').
+ if (equals_sign == NULL || **str == '=')
+ return "Options must be 'name=value' pairs separated "
+ "with commas";
+
+ // Reject a too long option name so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in optmap[i].name.
+ const size_t name_len = (size_t)(equals_sign - *str);
+ if (name_len > NAME_LEN_MAX)
+ return "Unknown option name";
+
+ // Find the option name from optmap[].
+ size_t i = 0;
+ while (true) {
+ if (i == optmap_size)
+ return "Unknown option name";
+
+ if (memcmp(*str, optmap[i].name, name_len) == 0
+ && optmap[i].name[name_len] == '\0')
+ break;
+
+ ++i;
+ }
+
+ // The input string is good at least until the start of
+ // the option value.
+ *str = equals_sign + 1;
+
+ // The code assumes that the option value isn't an empty
+ // string so check it here.
+ const size_t value_len = (size_t)(name_eq_value_end - *str);
+ if (value_len == 0)
+ return "Option value cannot be empty";
+
+ // LZMA1/2 preset has its own parsing function.
+ if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
+ const char *errmsg = set_lzma12_preset(str,
+ name_eq_value_end, filter_options);
+ if (errmsg != NULL)
+ return errmsg;
+
+ continue;
+ }
+
+ // It's an integer value.
+ uint32_t v;
+ if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
+ // The integer is picked from a string-to-integer map.
+ //
+ // Reject a too long value string so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in optmap[i].u.map[j].name.
+ if (value_len > NAME_LEN_MAX)
+ return "Invalid option value";
+
+ const name_value_map *map = optmap[i].u.map;
+ size_t j = 0;
+ while (true) {
+ // The array is terminated with an empty name.
+ if (map[j].name[0] == '\0')
+ return "Invalid option value";
+
+ if (memcmp(*str, map[j].name, value_len) == 0
+ && map[j].name[value_len]
+ == '\0') {
+ v = map[j].value;
+ break;
+ }
+
+ ++j;
+ }
+ } else if (**str < '0' || **str > '9') {
+ // Note that "max" isn't supported while it is
+ // supported in xz. It's not useful here.
+ return "Value is not a non-negative decimal integer";
+ } else {
+ // strtoul() has locale-specific behavior so it cannot
+ // be relied on to get reproducible results since we
+ // cannot change the locate in a thread-safe library.
+ // It also needs '\0'-termination.
+ //
+ // Use a temporary pointer so that *str will point
+ // to the beginning of the value string in case
+ // an error occurs.
+ const char *p = *str;
+ v = 0;
+ do {
+ if (v > UINT32_MAX / 10)
+ return "Value out of range";
+
+ v *= 10;
+
+ const uint32_t add = (uint32_t)(*p - '0');
+ if (UINT32_MAX - add < v)
+ return "Value out of range";
+
+ v += add;
+ ++p;
+ } while (p < name_eq_value_end
+ && *p >= '0' && *p <= '9');
+
+ if (p < name_eq_value_end) {
+ // Remember this position so that it an be
+ // used for error messages that are
+ // specifically about the suffix. (Out of
+ // range values are about the whole value
+ // and those error messages point to the
+ // beginning of the number part,
+ // not to the suffix.)
+ const char *multiplier_start = p;
+
+ // If multiplier suffix shouldn't be used
+ // then don't allow them even if the value
+ // would stay within limits. This is a somewhat
+ // unnecessary check but it rejects silly
+ // things like lzma2:pb=0MiB which xz allows.
+ if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
+ == 0) {
+ *str = multiplier_start;
+ return "This option does not support "
+ "any integer suffixes";
+ }
+
+ uint32_t shift;
+
+ switch (*p) {
+ case 'k':
+ case 'K':
+ shift = 10;
+ break;
+
+ case 'm':
+ case 'M':
+ shift = 20;
+ break;
+
+ case 'g':
+ case 'G':
+ shift = 30;
+ break;
+
+ default:
+ *str = multiplier_start;
+ return "Invalid multiplier suffix "
+ "(KiB, MiB, or GiB)";
+ }
+
+ ++p;
+
+ // Allow "M", "Mi", "MB", "MiB" and the same
+ // for the other five characters from the
+ // switch-statement above. All are handled
+ // as base-2 (perhaps a mistake, perhaps not).
+ // Note that 'i' and 'B' are case sensitive.
+ if (p < name_eq_value_end && *p == 'i')
+ ++p;
+
+ if (p < name_eq_value_end && *p == 'B')
+ ++p;
+
+ // Now we must have no chars remaining.
+ if (p < name_eq_value_end) {
+ *str = multiplier_start;
+ return "Invalid multiplier suffix "
+ "(KiB, MiB, or GiB)";
+ }
+
+ if (v > (UINT32_MAX >> shift))
+ return "Value out of range";
+
+ v <<= shift;
+ }
+
+ if (v < optmap[i].u.range.min
+ || v > optmap[i].u.range.max)
+ return "Value out of range";
+ }
+
+ // Set the value in filter_options. Enums are handled
+ // specially since the underlying type isn't the same
+ // as uint32_t on all systems.
+ void *ptr = (char *)filter_options + optmap[i].offset;
+ switch (optmap[i].type) {
+ case OPTMAP_TYPE_LZMA_MODE:
+ *(lzma_mode *)ptr = (lzma_mode)v;
+ break;
+
+ case OPTMAP_TYPE_LZMA_MATCH_FINDER:
+ *(lzma_match_finder *)ptr = (lzma_match_finder)v;
+ break;
+
+ default:
+ *(uint32_t *)ptr = v;
+ break;
+ }
+
+ // This option has been successfully handled.
+ *str = name_eq_value_end;
+ }
+
+ // No errors.
+ return NULL;
+}
+
+
+/// Finds the name of the filter at the beginning of the string and
+/// calls filter_name_map[i].parse() to decode the filter-specific options.
+/// The caller must have set str_end so that exactly one filter and its
+/// options are present without any trailing characters.
+static const char *
+parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
+ const lzma_allocator *allocator, bool only_xz)
+{
+ // Search for a colon or equals sign that would separate the filter
+ // name from filter options. If neither is found, then the input
+ // string only contains a filter name and there are no options.
+ //
+ // First assume that a colon or equals sign won't be found:
+ const char *name_end = str_end;
+ const char *opts_start = str_end;
+
+ for (const char *p = *str; p < str_end; ++p) {
+ if (*p == ':' || *p == '=') {
+ name_end = p;
+
+ // Filter options (name1=value1,name2=value2,...)
+ // begin after the colon or equals sign.
+ opts_start = p + 1;
+ break;
+ }
+ }
+
+ // Reject a too long filter name so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in filter_name_map[i].name.
+ const size_t name_len = (size_t)(name_end - *str);
+ if (name_len > NAME_LEN_MAX)
+ return "Unknown filter name";
+
+ for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
+ if (memcmp(*str, filter_name_map[i].name, name_len) == 0
+ && filter_name_map[i].name[name_len] == '\0') {
+ if (only_xz && filter_name_map[i].id
+ >= LZMA_FILTER_RESERVED_START)
+ return "This filter cannot be used in "
+ "the .xz format";
+
+ // Allocate the filter-specific options and
+ // initialize the memory with zeros.
+ void *options = lzma_alloc_zero(
+ filter_name_map[i].opts_size,
+ allocator);
+ if (options == NULL)
+ return "Memory allocation failed";
+
+ // Filter name was found so the input string is good
+ // at least this far.
+ *str = opts_start;
+
+ const char *errmsg = filter_name_map[i].parse(
+ str, str_end, options);
+ if (errmsg != NULL) {
+ lzma_free(options, allocator);
+ return errmsg;
+ }
+
+ // *filter is modified only when parsing is successful.
+ filter->id = filter_name_map[i].id;
+ filter->options = options;
+ return NULL;
+ }
+ }
+
+ return "Unknown filter name";
+}
+
+
+/// Converts the string to a filter chain (array of lzma_filter structures).
+///
+/// *str is advanced everytime something has been decoded successfully.
+/// This way the caller knows where in the string a possible error occurred.
+static const char *
+str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
+ const lzma_allocator *allocator)
+{
+ const char *errmsg;
+
+ // Skip leading spaces.
+ while (**str == ' ')
+ ++*str;
+
+ if (**str == '\0')
+ return "Empty string is not allowed, "
+ "try \"6\" if a default value is needed";
+
+ // Detect the type of the string.
+ //
+ // A string beginning with a digit or a string beginning with
+ // one dash and a digit are treated as presets. Trailing spaces
+ // will be ignored too (leading spaces were already ignored above).
+ //
+ // For example, "6", "7 ", "-9e", or " -3 " are treated as presets.
+ // Strings like "-" or "- " aren't preset.
+#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
+ if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
+ if (**str == '-')
+ ++*str;
+
+ // Ignore trailing spaces.
+ const size_t str_len = strlen(*str);
+ const char *str_end = memchr(*str, ' ', str_len);
+ if (str_end != NULL) {
+ // There is at least one trailing space. Check that
+ // there are no chars other than spaces.
+ for (size_t i = 1; str_end[i] != '\0'; ++i)
+ if (str_end[i] != ' ')
+ return "Unsupported preset";
+ } else {
+ // There are no trailing spaces. Use the whole string.
+ str_end = *str + str_len;
+ }
+
+ uint32_t preset;
+ errmsg = parse_lzma12_preset(str, str_end, &preset);
+ if (errmsg != NULL)
+ return errmsg;
+
+ lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
+ if (opts == NULL)
+ return "Memory allocation failed";
+
+ if (lzma_lzma_preset(opts, preset)) {
+ lzma_free(opts, allocator);
+ return "Unsupported preset";
+ }
+
+ filters[0].id = LZMA_FILTER_LZMA2;
+ filters[0].options = opts;
+ filters[1].id = LZMA_VLI_UNKNOWN;
+ filters[1].options = NULL;
+
+ return NULL;
+ }
+
+ // Not a preset so it must be a filter chain.
+ //
+ // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
+ // can be used in .xz.
+ const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
+
+ // Use a temporary array so that we don't modify the caller-supplied
+ // one until we know that no errors occurred.
+ lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
+
+ size_t i = 0;
+ do {
+ if (i == LZMA_FILTERS_MAX) {
+ errmsg = "The maximum number of filters is four";
+ goto error;
+ }
+
+ // Skip "--" if present.
+ if ((*str)[0] == '-' && (*str)[1] == '-')
+ *str += 2;
+
+ // Locate the end of "filter:name1=value1,name2=value2",
+ // stopping at the first "--" or a single space.
+ const char *filter_end = *str;
+ while (filter_end[0] != '\0') {
+ if ((filter_end[0] == '-' && filter_end[1] == '-')
+ || filter_end[0] == ' ')
+ break;
+
+ ++filter_end;
+ }
+
+ // Inputs that have "--" at the end or "-- " in the middle
+ // will result in an empty filter name.
+ if (filter_end == *str) {
+ errmsg = "Filter name is missing";
+ goto error;
+ }
+
+ errmsg = parse_filter(str, filter_end, &temp_filters[i],
+ allocator, only_xz);
+ if (errmsg != NULL)
+ goto error;
+
+ // Skip trailing spaces.
+ while (**str == ' ')
+ ++*str;
+
+ ++i;
+ } while (**str != '\0');
+
+ // Seems to be good, terminate the array so that
+ // basic validation can be done.
+ temp_filters[i].id = LZMA_VLI_UNKNOWN;
+ temp_filters[i].options = NULL;
+
+ // Do basic validation if the application didn't prohibit it.
+ if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
+ size_t dummy;
+ const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
+ assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
+ if (ret != LZMA_OK) {
+ errmsg = "Invalid filter chain "
+ "('lzma2' missing at the end?)";
+ goto error;
+ }
+ }
+
+ // All good. Copy the filters to the application supplied array.
+ memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
+ return NULL;
+
+error:
+ // Free the filter options that were successfully decoded.
+ while (i-- > 0)
+ lzma_free(temp_filters[i].options, allocator);
+
+ return errmsg;
+}
+
+
+extern LZMA_API(const char *)
+lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+{
+ if (str == NULL || filters == NULL)
+ return "Unexpected NULL pointer argument(s) "
+ "to lzma_str_to_filters()";
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ALL_FILTERS
+ | LZMA_STR_NO_VALIDATION;
+
+ if (flags & ~supported_flags)
+ return "Unsupported flags to lzma_str_to_filters()";
+
+ const char *used = str;
+ const char *errmsg = str_to_filters(&used, filters, flags, allocator);
+
+ if (error_pos != NULL) {
+ const size_t n = (size_t)(used - str);
+ *error_pos = n > INT_MAX ? INT_MAX : (int)n;
+ }
+
+ return errmsg;
+}
+
+
+/// Converts options of one filter to a string.
+///
+/// The caller must have already put the filter name in the destination
+/// string. Since it is possible that no options will be needed, the caller
+/// won't have put a delimiter character (':' or '=') in the string yet.
+/// We will add it if at least one option will be added to the string.
+static void
+strfy_filter(lzma_str *dest, const char *delimiter,
+ const option_map *optmap, size_t optmap_count,
+ const void *filter_options)
+{
+ for (size_t i = 0; i < optmap_count; ++i) {
+ // No attempt is made to reverse LZMA1/2 preset.
+ if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
+ continue;
+
+ // All options have integer values, some just are mapped
+ // to a string with a name_value_map. LZMA1/2 preset
+ // isn't reversed back to preset=PRESET form.
+ uint32_t v;
+ const void *ptr
+ = (const char *)filter_options + optmap[i].offset;
+ switch (optmap[i].type) {
+ case OPTMAP_TYPE_LZMA_MODE:
+ v = *(const lzma_mode *)ptr;
+ break;
+
+ case OPTMAP_TYPE_LZMA_MATCH_FINDER:
+ v = *(const lzma_match_finder *)ptr;
+ break;
+
+ default:
+ v = *(const uint32_t *)ptr;
+ break;
+ }
+
+ // Skip this if this option should be omitted from
+ // the string when the value is zero.
+ if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
+ continue;
+
+ // Before the first option we add whatever delimiter
+ // the caller gave us. For later options a comma is used.
+ str_append_str(dest, delimiter);
+ delimiter = ",";
+
+ // Add the option name and equals sign.
+ str_append_str(dest, optmap[i].name);
+ str_append_str(dest, "=");
+
+ if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
+ const name_value_map *map = optmap[i].u.map;
+ size_t j = 0;
+ while (true) {
+ if (map[j].name[0] == '\0') {
+ str_append_str(dest, "UNKNOWN");
+ break;
+ }
+
+ if (map[j].value == v) {
+ str_append_str(dest, map[j].name);
+ break;
+ }
+
+ ++j;
+ }
+ } else {
+ str_append_u32(dest, v,
+ optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
+ }
+ }
+
+ return;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_str_from_filters(char **output_str, const lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+{
+ // On error *output_str is always set to NULL.
+ // Do it as the very first step.
+ if (output_str == NULL)
+ return LZMA_PROG_ERROR;
+
+ *output_str = NULL;
+
+ if (filters == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ENCODER
+ | LZMA_STR_DECODER
+ | LZMA_STR_GETOPT_LONG
+ | LZMA_STR_NO_SPACES;
+
+ if (flags & ~supported_flags)
+ return LZMA_OPTIONS_ERROR;
+
+ // There must be at least one filter.
+ if (filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_OPTIONS_ERROR;
+
+ // Allocate memory for the output string.
+ lzma_str dest;
+ return_if_error(str_init(&dest, allocator));
+
+ const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
+
+ const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
+
+ for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ // Don't add a space between filters if the caller
+ // doesn't want them.
+ if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
+ str_append_str(&dest, " ");
+
+ // Use dashes for xz getopt_long() compatible syntax but also
+ // use dashes to separate filters when spaces weren't wanted.
+ if ((flags & LZMA_STR_GETOPT_LONG)
+ || (i > 0 && (flags & LZMA_STR_NO_SPACES)))
+ str_append_str(&dest, "--");
+
+ size_t j = 0;
+ while (true) {
+ if (j == ARRAY_SIZE(filter_name_map)) {
+ // Filter ID in filters[i].id isn't supported.
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ if (filter_name_map[j].id == filters[i].id) {
+ // Add the filter name.
+ str_append_str(&dest, filter_name_map[j].name);
+
+ // If only the filter names were wanted then
+ // skip to the next filter. In this case
+ // .options is ignored and may be NULL even
+ // when the filter doesn't allow NULL options.
+ if (!show_opts)
+ break;
+
+ if (filters[i].options == NULL) {
+ if (!filter_name_map[j].allow_null) {
+ // Filter-specific options
+ // are missing but with
+ // this filter the options
+ // structure is mandatory.
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ // .options is allowed to be NULL.
+ // There is no need to add any
+ // options to the string.
+ break;
+ }
+
+ // Options structure is available. Add
+ // the filter options to the string.
+ const size_t optmap_count
+ = (flags & LZMA_STR_ENCODER)
+ ? filter_name_map[j].strfy_encoder
+ : filter_name_map[j].strfy_decoder;
+ strfy_filter(&dest, opt_delim,
+ filter_name_map[j].optmap,
+ optmap_count,
+ filters[i].options);
+ break;
+ }
+
+ ++j;
+ }
+ }
+
+ return str_finish(output_str, &dest, allocator);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
+ const lzma_allocator *allocator)
+{
+ // On error *output_str is always set to NULL.
+ // Do it as the very first step.
+ if (output_str == NULL)
+ return LZMA_PROG_ERROR;
+
+ *output_str = NULL;
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ALL_FILTERS
+ | LZMA_STR_ENCODER
+ | LZMA_STR_DECODER
+ | LZMA_STR_GETOPT_LONG;
+
+ if (flags & ~supported_flags)
+ return LZMA_OPTIONS_ERROR;
+
+ // Allocate memory for the output string.
+ lzma_str dest;
+ return_if_error(str_init(&dest, allocator));
+
+ // If only listing the filter names then separate them with spaces.
+ // Otherwise use newlines.
+ const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
+ const char *filter_delim = show_opts ? "\n" : " ";
+
+ const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
+ bool first_filter_printed = false;
+
+ for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
+ // If we are printing only one filter then skip others.
+ if (filter_id != LZMA_VLI_UNKNOWN
+ && filter_id != filter_name_map[i].id)
+ continue;
+
+ // If we are printing only .xz filters then skip the others.
+ if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
+ && (flags & LZMA_STR_ALL_FILTERS) == 0
+ && filter_id == LZMA_VLI_UNKNOWN)
+ continue;
+
+ // Add a new line if this isn't the first filter being
+ // written to the string.
+ if (first_filter_printed)
+ str_append_str(&dest, filter_delim);
+
+ first_filter_printed = true;
+
+ if (flags & LZMA_STR_GETOPT_LONG)
+ str_append_str(&dest, "--");
+
+ str_append_str(&dest, filter_name_map[i].name);
+
+ // If only the filter names were wanted then continue
+ // to the next filter.
+ if (!show_opts)
+ continue;
+
+ const option_map *optmap = filter_name_map[i].optmap;
+ const char *d = opt_delim;
+
+ const size_t end = (flags & LZMA_STR_ENCODER)
+ ? filter_name_map[i].strfy_encoder
+ : filter_name_map[i].strfy_decoder;
+
+ for (size_t j = 0; j < end; ++j) {
+ // The first option is delimited from the filter
+ // name using "=" or ":" and the rest of the options
+ // are separated with ",".
+ str_append_str(&dest, d);
+ d = ",";
+
+ // optname=<possible_values>
+ str_append_str(&dest, optmap[j].name);
+ str_append_str(&dest, "=<");
+
+ if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
+ // LZMA1/2 preset has its custom help string.
+ str_append_str(&dest, LZMA12_PRESET_STR);
+ } else if (optmap[j].flags
+ & OPTMAP_USE_NAME_VALUE_MAP) {
+ // Separate the possible option values by "|".
+ const name_value_map *m = optmap[j].u.map;
+ for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
+ if (k > 0)
+ str_append_str(&dest, "|");
+
+ str_append_str(&dest, m[k].name);
+ }
+ } else {
+ // Integer range is shown as min-max.
+ const bool use_byte_suffix = optmap[j].flags
+ & OPTMAP_USE_BYTE_SUFFIX;
+ str_append_u32(&dest, optmap[j].u.range.min,
+ use_byte_suffix);
+ str_append_str(&dest, "-");
+ str_append_u32(&dest, optmap[j].u.range.max,
+ use_byte_suffix);
+ }
+
+ str_append_str(&dest, ">");
+ }
+ }
+
+ // If no filters were added to the string then it must be because
+ // the caller provided an unsupported Filter ID.
+ if (!first_filter_printed) {
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ return str_finish(output_str, &dest, allocator);
+}