Initial changes to change the suffix of the new format to .xz.

This also fixes a bug related to --suffix option. Some issues with suffixes with --format=raw were not fixed.
author: Lasse Collin <lasse.collin@tukaani.org> 2008-10-02 22:51:46 +0300
committer: Lasse Collin <lasse.collin@tukaani.org> 2008-10-02 22:51:46 +0300
commit: bd137524f2f50e30ba054f42f1f6536cd3cee920 (patch)
tree: e28ca9e83380e9d7089d5dae4499df1967b5c064
parent: Renamed the test files from .lzma suffix to .xz suffix. (diff)
download: xz-bd137524f2f50e30ba054f42f1f6536cd3cee920.tar.xz
6 files changed, 133 insertions, 52 deletions
diff --git a/src/lzma/args.c b/src/lzma/args.c
index ddaa0f91..47ae766a 100644
--- a/src/lzma/args.c
+++ b/src/lzma/args.c
@@ -26,7 +26,7 @@
 
 
 enum tool_mode opt_mode = MODE_COMPRESS;
-enum header_type opt_header = HEADER_AUTO;
+enum format_type opt_format = FORMAT_AUTO;
 
 char *opt_suffix = NULL;
 
@@ -50,6 +50,12 @@ static size_t preset_number = 7 - 1;
 static bool preset_default = true;
 static size_t filter_count = 0;
 
+/// When compressing, which file format to use if --format=auto or no --format
+/// at all has been specified. We need a variable because this depends on
+/// with which name we are called. All names with "lz" in them makes us to
+/// use the legacy .lzma format.
+static enum format_type format_compress_auto = FORMAT_XZ;
+
 
 enum {
 	OPT_SUBBLOCK = INT_MIN,
@@ -312,18 +318,25 @@ parse_real(int argc, char **argv)
 
 		// --format
 		case 'F': {
-			static const char *types[] = {
-				"auto",
-				"native",
-				"alone",
-				// "gzip",
-				"raw",
-				NULL
+			// Just in case, support both "lzma" and "alone" since
+			// the latter was used for forward compatibility in
+			// LZMA Utils 4.32.x.
+			static const struct {
+				char str[8];
+				enum format_type format;
+			} types[] = {
+				{ "auto",   FORMAT_AUTO },
+				{ "xz",     FORMAT_XZ },
+				{ "lzma",   FORMAT_LZMA },
+				{ "alone",  FORMAT_LZMA },
+				// { "gzip",   FORMAT_GZIP },
+				// { "gz",     FORMAT_GZIP },
+				{ "raw",    FORMAT_RAW },
 			};
 
-			opt_header = 0;
-			while (strcmp(types[opt_header], optarg) != 0) {
-				if (types[++opt_header] == NULL) {
+			size_t i = 0;
+			while (strcmp(types[i].str, optarg) != 0) {
+				if (++i == ARRAY_SIZE(types)) {
 					errmsg(V_ERROR, _("%s: Unknown file "
 							"format type"),
 							optarg);
@@ -331,25 +344,25 @@ parse_real(int argc, char **argv)
 				}
 			}
 
+			opt_format = types[i].format;
 			break;
 		}
 
 		// --check
 		case 'C': {
 			static const struct {
-				const char *str;
-				unsigned int value;
+				char str[8];
+				lzma_check check;
 			} types[] = {
 				{ "none",   LZMA_CHECK_NONE },
 				{ "crc32",  LZMA_CHECK_CRC32 },
 				{ "crc64",  LZMA_CHECK_CRC64 },
 				{ "sha256", LZMA_CHECK_SHA256 },
-				{ NULL,     0 }
 			};
 
 			size_t i = 0;
 			while (strcmp(types[i].str, optarg) != 0) {
-				if (types[++i].str == NULL) {
+				if (++i == ARRAY_SIZE(types)) {
 					errmsg(V_ERROR, _("%s: Unknown "
 							"integrity check "
 							"type"), optarg);
@@ -357,7 +370,7 @@ parse_real(int argc, char **argv)
 				}
 			}
 
-			opt_check = types[i].value;
+			opt_check = types[i].check;
 			break;
 		}
 
@@ -460,7 +473,7 @@ set_compression_settings(void)
 			my_exit(ERROR);
 		}
 
-		opt_filters[0].id = opt_header == HEADER_ALONE
+		opt_filters[0].id = opt_format == FORMAT_LZMA
 				? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
 		opt_filters[0].options = &opt_lzma;
 		filter_count = 1;
@@ -471,9 +484,9 @@ set_compression_settings(void)
 
 	// If we are using the LZMA_Alone format, allow exactly one filter
 	// which has to be LZMA.
-	if (opt_header == HEADER_ALONE && (filter_count != 1
+	if (opt_format == FORMAT_LZMA && (filter_count != 1
 			|| opt_filters[0].id != LZMA_FILTER_LZMA1)) {
-		errmsg(V_ERROR, _("With --format=alone only the LZMA1 filter "
+		errmsg(V_ERROR, _("With --format=lzma only the LZMA1 filter "
 				"is supported"));
 		my_exit(ERROR);
 	}
@@ -503,6 +516,12 @@ set_compression_settings(void)
 
 			memory_usage = lzma_memusage_encoder(opt_filters);
 		}
+
+		// TODO: With --format=raw, we should print a warning since
+		// the presets may change and thus the next version may not
+		// be able to uncompress the raw stream with the same preset
+		// number.
+
 	} else {
 		if (memory_usage > opt_memory) {
 			errmsg(V_ERROR, _("Memory usage limit is too small "
@@ -532,6 +551,11 @@ parse_args(int argc, char **argv)
 	{
 		const char *name = str_filename(argv[0]);
 		if (name != NULL) {
+			// Default file format
+			if (strstr(name, "lz") != NULL)
+				format_compress_auto = FORMAT_LZMA;
+
+			// Operation mode
 			if (strstr(name, "cat") != NULL) {
 				opt_mode = MODE_DECOMPRESS;
 				opt_stdout = true;
@@ -556,7 +580,10 @@ parse_args(int argc, char **argv)
 		opt_stdout = true;
 	}
 
-	if (opt_mode == MODE_COMPRESS || opt_header == HEADER_RAW)
+	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
+		opt_format = format_compress_auto;
+
+	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
 		set_compression_settings();
 
 	// If no filenames are given, use stdin.
diff --git a/src/lzma/args.h b/src/lzma/args.h
index abc810cb..587b280f 100644
--- a/src/lzma/args.h
+++ b/src/lzma/args.h
@@ -30,12 +30,13 @@ enum tool_mode {
 	MODE_LIST,
 };
 
-enum header_type {
-	HEADER_AUTO,
-	HEADER_NATIVE,
-	HEADER_ALONE,
+// NOTE: The order of these is significant in suffix.c.
+enum format_type {
+	FORMAT_AUTO,
+	FORMAT_XZ,
+	FORMAT_LZMA,
 	// HEADER_GZIP,
-	HEADER_RAW,
+	FORMAT_RAW,
 };
 
 
@@ -51,7 +52,7 @@ extern bool opt_keep_original;
 extern bool opt_preserve_name;
 // extern bool opt_recursive;
 extern enum tool_mode opt_mode;
-extern enum header_type opt_header;
+extern enum format_type opt_format;
 
 extern lzma_check opt_check;
 extern lzma_filter opt_filters[8];
diff --git a/src/lzma/help.c b/src/lzma/help.c
index 0b530ff5..3b9e3989 100644
--- a/src/lzma/help.c
+++ b/src/lzma/help.c
@@ -46,7 +46,7 @@ show_help(void)
 "  -z, --compress      force compression\n"
 "  -d, --decompress    force decompression\n"
 "  -t, --test          test compressed file integrity\n"
-"  -l, --list          list block sizes, total sizes, and possible metadata\n"
+"  -l, --list          list information about files\n"
 ));
 
 	puts(_(
@@ -57,7 +57,7 @@ show_help(void)
 "  -c, --stdout        write to standard output and don't delete input files\n"
 "  -S, --suffix=.SUF   use suffix `.SUF' on compressed files instead of `.lzma'\n"
 "  -F, --format=FMT    file format to encode or decode; possible values are\n"
-"                      `auto' (default), `native', `alone', and `raw'\n"
+"                      `auto' (default), `xz', `lzma', and `raw'\n"
 "      --files=[FILE]  read filenames to process from FILE; if FILE is\n"
 "                      omitted, filenames are read from the standard input;\n"
 "                      filenames must be terminated with the newline character\n"
diff --git a/src/lzma/process.c b/src/lzma/process.c
index 46c27df6..fc4ef96a 100644
--- a/src/lzma/process.c
+++ b/src/lzma/process.c
@@ -155,19 +155,23 @@ single_init(thread_data *t)
 	lzma_ret ret = LZMA_PROG_ERROR;
 
 	if (opt_mode == MODE_COMPRESS) {
-		switch (opt_header) {
-		case HEADER_AUTO:
-		case HEADER_NATIVE:
+		switch (opt_format) {
+		case FORMAT_AUTO:
+			// args.c ensures this.
+			assert(0);
+			break;
+
+		case FORMAT_XZ:
 			ret = lzma_stream_encoder(&t->strm,
 					opt_filters, opt_check);
 			break;
 
-		case HEADER_ALONE:
+		case FORMAT_LZMA:
 			ret = lzma_alone_encoder(&t->strm,
 					opt_filters[0].options);
 			break;
 
-		case HEADER_RAW:
+		case FORMAT_RAW:
 			ret = lzma_raw_encoder(&t->strm, opt_filters);
 			break;
 		}
@@ -175,20 +179,20 @@ single_init(thread_data *t)
 		const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK
 				| LZMA_CONCATENATED;
 
-		switch (opt_header) {
-		case HEADER_AUTO:
+		switch (opt_format) {
+		case FORMAT_AUTO:
 			ret = lzma_auto_decoder(&t->strm, opt_memory, flags);
 			break;
 
-		case HEADER_NATIVE:
+		case FORMAT_XZ:
 			ret = lzma_stream_decoder(&t->strm, opt_memory, flags);
 			break;
 
-		case HEADER_ALONE:
+		case FORMAT_LZMA:
 			ret = lzma_alone_decoder(&t->strm, opt_memory);
 			break;
 
-		case HEADER_RAW:
+		case FORMAT_RAW:
 			// Memory usage has already been checked in args.c.
 			ret = lzma_raw_decoder(&t->strm, opt_filters);
 			break;
diff --git a/src/lzma/suffix.c b/src/lzma/suffix.c
index 57afce82..41b4c352 100644
--- a/src/lzma/suffix.c
+++ b/src/lzma/suffix.c
@@ -20,14 +20,9 @@
 #include "private.h"
 
 
-static const struct {
+struct suffix_pair {
 	const char *compressed;
 	const char *uncompressed;
-} suffixes[] = {
-	{ ".lzma",  "" },
-	{ ".tlz",   ".tar" },
-	{ ".ylz",   ".yar" },
-	{ NULL,     NULL }
 };
 
 
@@ -63,13 +58,21 @@ test_suffix(const char *suffix, const char *src_name, size_t src_len)
 /// \return     Name of the uncompressed file, or NULL if file has unknown
 ///             suffix.
 static char *
-uncompressed_name(const char *src_name)
+uncompressed_name(const char *src_name, const size_t src_len)
 {
+	static const struct suffix_pair suffixes[] = {
+		{ ".xz",    "" },
+		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
+		{ ".lzma",  "" },
+		{ ".tlz",   ".tar" },
+		// { ".gz",    "" },
+		// { ".tgz",   ".tar" },
+	};
+
 	const char *new_suffix = "";
-	const size_t src_len = strlen(src_name);
 	size_t new_len = 0;
 
-	for (size_t i = 0; suffixes[i].compressed != NULL; ++i) {
+	for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
 		new_len = test_suffix(suffixes[i].compressed,
 				src_name, src_len);
 		if (new_len != 0) {
@@ -103,10 +106,40 @@ uncompressed_name(const char *src_name)
 
 
 /// \brief      Appends suffix to src_name
+///
+/// In contrast to uncompressed_name(), we check only suffixes that are valid
+/// for the specified file format.
 static char *
-compressed_name(const char *src_name)
+compressed_name(const char *src_name, const size_t src_len)
 {
-	const size_t src_len = strlen(src_name);
+	// The order of these must match the order in args.h.
+	static const struct suffix_pair all_suffixes[][3] = {
+		{
+			{ ".xz",    "" },
+			{ ".txz",   ".tar" },
+			{ NULL, NULL }
+		}, {
+			{ ".lzma",  "" },
+			{ ".tlz",   ".tar" },
+			{ NULL,     NULL }
+/*
+		}, {
+			{ ".gz",    "" },
+			{ ".tgz",   ".tar" },
+			{ NULL,     NULL }
+*/
+		}, {
+			// --format=raw requires specifying the suffix
+			// manually or using stdout.
+			{ NULL,     NULL }
+		}
+	};
+
+	// args.c ensures this.
+	assert(opt_format != FORMAT_AUTO);
+
+	const size_t format = opt_format - 1;
+	const struct suffix_pair *const suffixes = all_suffixes[format];
 
 	for (size_t i = 0; suffixes[i].compressed != NULL; ++i) {
 		if (test_suffix(suffixes[i].compressed, src_name, src_len)
@@ -118,6 +151,15 @@ compressed_name(const char *src_name)
 		}
 	}
 
+	// TODO: Hmm, maybe it would be better to validate this in args.c,
+	// since the suffix handling when decoding is weird now.
+	if (opt_format == FORMAT_RAW && opt_suffix == NULL) {
+		errmsg(V_ERROR, _("%s: With --format=raw, --suffix=.SUF is "
+				"required unless writing to stdout"),
+				src_name);
+		return NULL;
+	}
+
 	const char *suffix = opt_suffix != NULL
 			? opt_suffix : suffixes[0].compressed;
 	const size_t suffix_len = strlen(suffix);
@@ -139,7 +181,13 @@ compressed_name(const char *src_name)
 extern char *
 get_dest_name(const char *src_name)
 {
+	assert(src_name != NULL);
+
+	// Length of the name is needed in all cases to locate the end of
+	// the string to compare the suffix, so calculate the length here.
+	const size_t src_len = strlen(src_name);
+
 	return opt_mode == MODE_COMPRESS
-			? compressed_name(src_name)
-			: uncompressed_name(src_name);
+			? compressed_name(src_name, src_len)
+			: uncompressed_name(src_name, src_len);
 }
diff --git a/tests/test_compress.sh b/tests/test_compress.sh
index d0511e44..667d8fda 100755
--- a/tests/test_compress.sh
+++ b/tests/test_compress.sh
@@ -69,7 +69,8 @@ test_lzma() {
 	echo . | tr -d '\n\r'
 }
 
-LZMA="../src/lzma/lzma --memory=15Mi --threads=1"
+# TODO: Remove --format=xz once the command name has been changed.
+LZMA="../src/lzma/lzma --memory=15Mi --threads=1 --format=xz"
 LZMADEC="../src/lzmadec/lzmadec --memory=4Mi"
 unset LZMA_OPT
author	Lasse Collin <lasse.collin@tukaani.org>	2008-10-02 22:51:46 +0300
committer	Lasse Collin <lasse.collin@tukaani.org>	2008-10-02 22:51:46 +0300
commit	bd137524f2f50e30ba054f42f1f6536cd3cee920 (patch)
tree	e28ca9e83380e9d7089d5dae4499df1967b5c064
parent	Renamed the test files from .lzma suffix to .xz suffix. (diff)
download	xz-bd137524f2f50e30ba054f42f1f6536cd3cee920.tar.xz