From bb0b1004f83cdc4d309e1471c2ecaf9f95ce60c5 Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Fri, 10 Sep 2010 10:30:33 +0300
Subject: xz: Multiple fixes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code assumed that printing numbers with thousand separators
and decimal points would always produce only US-ASCII characters.
This was used for buffer sizes (with snprintf(), no overflows)
and aligning columns of the progress indicator and --list. That
assumption was wrong (e.g. LC_ALL=fi_FI.UTF-8 with glibc), so
multibyte character support was added in this commit. The old
way is used if the operating system doesn't have enough multibyte
support (e.g. lacks wcwidth()).

The sizes of buffers were increased to accomodate multibyte
characters. I don't know how big they should be exactly, but
they aren't used for anything critical, so it's not too bad.
If they still aren't big enough, I hopefully get a bug report.
snprintf() takes care of avoiding buffer overflows.

Some static buffers were replaced with buffers allocated on
stack. double_to_str() was removed. uint64_to_str() and
uint64_to_nicestr() now share the static buffer and test
for thousand separator support.

Integrity check names "None" and "Unknown-N" (2 <= N <= 15)
were marked to be translated. I had forgot these, plus they
wouldn't have worked correctly anyway before this commit,
because printing tables with multibyte strings didn't work.

Thanks to Marek Černocký for reporting the bug about
misaligned table columns in --list output.
---
 src/xz/Makefile.am |   4 +-
 src/xz/list.c      | 205 ++++++++++++++++++++++++++++++++++++-----------------
 src/xz/message.c   |  56 +++++++++------
 src/xz/message.h   |  10 ++-
 src/xz/private.h   |   1 +
 src/xz/util.c      | 136 ++++++++++++-----------------------
 src/xz/util.h      |   7 --
 7 files changed, 232 insertions(+), 187 deletions(-)

(limited to 'src/xz')

diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am
index 4dbe0f26..da716dca 100644
--- a/src/xz/Makefile.am
+++ b/src/xz/Makefile.am
@@ -34,7 +34,9 @@ xz_SOURCES = \
 	$(top_srcdir)/src/common/tuklib_open_stdxxx.c \
 	$(top_srcdir)/src/common/tuklib_progname.c \
 	$(top_srcdir)/src/common/tuklib_exit.c \
-	$(top_srcdir)/src/common/tuklib_cpucores.c
+	$(top_srcdir)/src/common/tuklib_cpucores.c \
+	$(top_srcdir)/src/common/tuklib_mbstr_width.c \
+	$(top_srcdir)/src/common/tuklib_mbstr_fw.c
 
 if COND_W32
 xz_SOURCES += xz_w32res.rc
diff --git a/src/xz/list.c b/src/xz/list.c
index e136cc2e..bd4aee4e 100644
--- a/src/xz/list.c
+++ b/src/xz/list.c
@@ -49,31 +49,43 @@ typedef struct {
 	uint64_t memusage;
 
 	/// The filter chain of this Block in human-readable form
-	const char *filter_chain;
+	char filter_chain[FILTERS_STR_SIZE];
 
 } block_header_info;
 
 
 /// Check ID to string mapping
 static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
-	"None",
+	// TRANSLATORS: Indicates that there is no integrity check.
+	// This string is used in tables, so the width must not
+	// exceed ten columns with a fixed-width font.
+	N_("None"),
 	"CRC32",
-	"Unknown-2",
-	"Unknown-3",
+	// TRANSLATORS: Indicates that integrity check name is not known,
+	// but the Check ID is known (here 2). This and other "Unknown-N"
+	// strings are used in tables, so the width must not exceed ten
+	// columns with a fixed-width font. It's OK to omit the dash if
+	// you need space for one extra letter.
+	N_("Unknown-2"),
+	N_("Unknown-3"),
 	"CRC64",
-	"Unknown-5",
-	"Unknown-6",
-	"Unknown-7",
-	"Unknown-8",
-	"Unknown-9",
+	N_("Unknown-5"),
+	N_("Unknown-6"),
+	N_("Unknown-7"),
+	N_("Unknown-8"),
+	N_("Unknown-9"),
 	"SHA-256",
-	"Unknown-11",
-	"Unknown-12",
-	"Unknown-13",
-	"Unknown-14",
-	"Unknown-15",
+	N_("Unknown-11"),
+	N_("Unknown-12"),
+	N_("Unknown-13"),
+	N_("Unknown-14"),
+	N_("Unknown-15"),
 };
 
+/// Buffer size for get_check_names(). This may be a bit ridiculous,
+/// but at least it's enough if some language needs many multibyte chars.
+#define CHECKS_STR_SIZE 1024
+
 
 /// Value of the Check field as hexadecimal string.
 /// This is set by parse_check_value().
@@ -442,7 +454,7 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
 		xfi->memusage_max = bhi->memusage;
 
 	// Convert the filter chain to human readable form.
-	bhi->filter_chain = message_filters_to_str(filters, false);
+	message_filters_to_str(bhi->filter_chain, filters, false);
 
 	// Free the memory allocated by lzma_block_header_decode().
 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
@@ -533,7 +545,7 @@ parse_details(file_pair *pair, const lzma_index_iter *iter,
 
 /// \brief      Get the compression ratio
 ///
-/// This has slightly different format than that is used by in message.c.
+/// This has slightly different format than that is used in message.c.
 static const char *
 get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
 {
@@ -545,7 +557,7 @@ get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
 	if (ratio > 9.999)
 		return "---";
 
-	static char buf[6];
+	static char buf[16];
 	snprintf(buf, sizeof(buf), "%.3f", ratio);
 	return buf;
 }
@@ -553,19 +565,22 @@ get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
 
 /// \brief      Get a comma-separated list of Check names
 ///
+/// The check names are translated with gettext except when in robot mode.
+///
+/// \param      buf     Buffer to hold the resulting string
 /// \param      checks  Bit mask of Checks to print
 /// \param      space_after_comma
 ///                     It's better to not use spaces in table-like listings,
 ///                     but in more verbose formats a space after a comma
 ///                     is good for readability.
-static const char *
-get_check_names(uint32_t checks, bool space_after_comma)
+static void
+get_check_names(char buf[CHECKS_STR_SIZE],
+		uint32_t checks, bool space_after_comma)
 {
 	assert(checks != 0);
 
-	static char buf[sizeof(check_names)];
 	char *pos = buf;
-	size_t left = sizeof(buf);
+	size_t left = CHECKS_STR_SIZE;
 
 	const char *sep = space_after_comma ? ", " : ",";
 	bool comma = false;
@@ -573,12 +588,14 @@ get_check_names(uint32_t checks, bool space_after_comma)
 	for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
 		if (checks & (UINT32_C(1) << i)) {
 			my_snprintf(&pos, &left, "%s%s",
-					comma ? sep : "", check_names[i]);
+					comma ? sep : "",
+					opt_robot ? check_names[i]
+						: _(check_names[i]));
 			comma = true;
 		}
 	}
 
-	return buf;
+	return;
 }
 
 
@@ -596,18 +613,29 @@ print_info_basic(const xz_file_info *xfi, file_pair *pair)
 				"Check   Filename"));
 	}
 
-	printf("%5s %7s  %11s  %11s  %5s  %-7s %s\n",
-			uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
-			uint64_to_str(lzma_index_block_count(xfi->idx), 1),
-			uint64_to_nicestr(lzma_index_file_size(xfi->idx),
-				NICESTR_B, NICESTR_TIB, false, 2),
-			uint64_to_nicestr(
-				lzma_index_uncompressed_size(xfi->idx),
-				NICESTR_B, NICESTR_TIB, false, 3),
-			get_ratio(lzma_index_file_size(xfi->idx),
-				lzma_index_uncompressed_size(xfi->idx)),
-			get_check_names(lzma_index_checks(xfi->idx), false),
-			pair->src_name);
+	char checks[CHECKS_STR_SIZE];
+	get_check_names(checks, lzma_index_checks(xfi->idx), false);
+
+	const char *cols[7] = {
+		uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
+		uint64_to_str(lzma_index_block_count(xfi->idx), 1),
+		uint64_to_nicestr(lzma_index_file_size(xfi->idx),
+			NICESTR_B, NICESTR_TIB, false, 2),
+		uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx),
+			NICESTR_B, NICESTR_TIB, false, 3),
+		get_ratio(lzma_index_file_size(xfi->idx),
+			lzma_index_uncompressed_size(xfi->idx)),
+		checks,
+		pair->src_name,
+	};
+	printf("%*s %*s  %*s  %*s  %*s  %-*s %s\n",
+			tuklib_mbstr_fw(cols[0], 5), cols[0],
+			tuklib_mbstr_fw(cols[1], 7), cols[1],
+			tuklib_mbstr_fw(cols[2], 11), cols[2],
+			tuklib_mbstr_fw(cols[3], 11), cols[3],
+			tuklib_mbstr_fw(cols[4], 5), cols[4],
+			tuklib_mbstr_fw(cols[5], 7), cols[5],
+			cols[6]);
 
 	return false;
 }
@@ -618,6 +646,9 @@ print_adv_helper(uint64_t stream_count, uint64_t block_count,
 		uint64_t compressed_size, uint64_t uncompressed_size,
 		uint32_t checks, uint64_t stream_padding)
 {
+	char checks_str[CHECKS_STR_SIZE];
+	get_check_names(checks_str, checks, true);
+
 	printf(_("  Streams:            %s\n"),
 			uint64_to_str(stream_count, 0));
 	printf(_("  Blocks:             %s\n"),
@@ -630,8 +661,7 @@ print_adv_helper(uint64_t stream_count, uint64_t block_count,
 				NICESTR_B, NICESTR_TIB, true, 0));
 	printf(_("  Ratio:              %s\n"),
 			get_ratio(compressed_size, uncompressed_size));
-	printf(_("  Check:              %s\n"),
-			get_check_names(checks, true));
+	printf(_("  Check:              %s\n"), checks_str);
 	printf(_("  Stream padding:     %s\n"),
 			uint64_to_nicestr(stream_padding,
 				NICESTR_B, NICESTR_TIB, true, 0));
@@ -669,21 +699,32 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
 	lzma_index_iter_init(&iter, xfi->idx);
 
 	while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
-		printf("    %6s %9s %15s %15s ",
-				uint64_to_str(iter.stream.number, 0),
-				uint64_to_str(iter.stream.block_count, 1),
-				uint64_to_str(
-					iter.stream.compressed_offset, 2),
-				uint64_to_str(
-					iter.stream.uncompressed_offset, 3));
-		printf("%15s %15s  %5s  %-10s %7s\n",
-				uint64_to_str(iter.stream.compressed_size, 0),
-				uint64_to_str(
-					iter.stream.uncompressed_size, 1),
-				get_ratio(iter.stream.compressed_size,
-					iter.stream.uncompressed_size),
-				check_names[iter.stream.flags->check],
-				uint64_to_str(iter.stream.padding, 2));
+		const char *cols1[4] = {
+			uint64_to_str(iter.stream.number, 0),
+			uint64_to_str(iter.stream.block_count, 1),
+			uint64_to_str(iter.stream.compressed_offset, 2),
+			uint64_to_str(iter.stream.uncompressed_offset, 3),
+		};
+		printf("    %*s %*s %*s %*s ",
+				tuklib_mbstr_fw(cols1[0], 6), cols1[0],
+				tuklib_mbstr_fw(cols1[1], 9), cols1[1],
+				tuklib_mbstr_fw(cols1[2], 15), cols1[2],
+				tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
+
+		const char *cols2[5] = {
+			uint64_to_str(iter.stream.compressed_size, 0),
+			uint64_to_str(iter.stream.uncompressed_size, 1),
+			get_ratio(iter.stream.compressed_size,
+				iter.stream.uncompressed_size),
+			_(check_names[iter.stream.flags->check]),
+			uint64_to_str(iter.stream.padding, 2),
+		};
+		printf("%*s %*s  %*s  %-*s %*s\n",
+				tuklib_mbstr_fw(cols2[0], 15), cols2[0],
+				tuklib_mbstr_fw(cols2[1], 15), cols2[1],
+				tuklib_mbstr_fw(cols2[2], 5), cols2[2],
+				tuklib_mbstr_fw(cols2[3], 10), cols2[3],
+				tuklib_mbstr_fw(cols2[4], 7), cols2[4]);
 
 		// Update the maximum Check size.
 		if (lzma_check_size(iter.stream.flags->check) > check_max)
@@ -730,41 +771,63 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
 			if (detailed && parse_details(pair, &iter, &bhi, xfi))
 					return true;
 
-			printf("    %6s %9s %15s %15s ",
+			const char *cols1[4] = {
 				uint64_to_str(iter.stream.number, 0),
 				uint64_to_str(
 					iter.block.number_in_stream, 1),
 				uint64_to_str(
 					iter.block.compressed_file_offset, 2),
 				uint64_to_str(
-					iter.block.uncompressed_file_offset,
-					3));
-			printf("%15s %15s  %5s  %-*s",
+					iter.block.uncompressed_file_offset, 3)
+			};
+			printf("    %*s %*s %*s %*s ",
+				tuklib_mbstr_fw(cols1[0], 6), cols1[0],
+				tuklib_mbstr_fw(cols1[1], 9), cols1[1],
+				tuklib_mbstr_fw(cols1[2], 15), cols1[2],
+				tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
+
+			const char *cols2[4] = {
 				uint64_to_str(iter.block.total_size, 0),
 				uint64_to_str(iter.block.uncompressed_size,
 						1),
 				get_ratio(iter.block.total_size,
 					iter.block.uncompressed_size),
-				detailed ? 11 : 1,
-				check_names[iter.stream.flags->check]);
+				_(check_names[iter.stream.flags->check])
+			};
+			printf("%*s %*s  %*s  %-*s",
+				tuklib_mbstr_fw(cols2[0], 15), cols2[0],
+				tuklib_mbstr_fw(cols2[1], 15), cols2[1],
+				tuklib_mbstr_fw(cols2[2], 5), cols2[2],
+				tuklib_mbstr_fw(cols2[3], detailed ? 11 : 1),
+					cols2[3]);
 
 			if (detailed) {
-				// Show MiB for memory usage, because it
-				// is the only size which is not in bytes.
 				const lzma_vli compressed_size
 						= iter.block.unpadded_size
 						- bhi.header_size
 						- lzma_check_size(
 						iter.stream.flags->check);
-				printf("%-*s  %6s  %-5s %15s %7s MiB  %s",
-					checkval_width, check_value,
+
+				const char *cols3[6] = {
+					check_value,
 					uint64_to_str(bhi.header_size, 0),
 					bhi.flags,
 					uint64_to_str(compressed_size, 1),
 					uint64_to_str(
 						round_up_to_mib(bhi.memusage),
 						2),
-					bhi.filter_chain);
+					bhi.filter_chain
+				};
+				// Show MiB for memory usage, because it
+				// is the only size which is not in bytes.
+				printf("%-*s  %*s  %-5s %*s %*s MiB  %s",
+					checkval_width, cols3[0],
+					tuklib_mbstr_fw(cols3[1], 6), cols3[1],
+					cols3[2],
+					tuklib_mbstr_fw(cols3[3], 15),
+						cols3[3],
+					tuklib_mbstr_fw(cols3[4], 7), cols3[4],
+					cols3[5]);
 			}
 
 			putchar('\n');
@@ -785,6 +848,9 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
 static bool
 print_info_robot(xz_file_info *xfi, file_pair *pair)
 {
+	char checks[CHECKS_STR_SIZE];
+	get_check_names(checks, lzma_index_checks(xfi->idx), false);
+
 	printf("name\t%s\n", pair->src_name);
 
 	printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
@@ -795,7 +861,7 @@ print_info_robot(xz_file_info *xfi, file_pair *pair)
 			lzma_index_uncompressed_size(xfi->idx),
 			get_ratio(lzma_index_file_size(xfi->idx),
 				lzma_index_uncompressed_size(xfi->idx)),
-			get_check_names(lzma_index_checks(xfi->idx), false),
+			checks,
 			xfi->stream_padding);
 
 	if (message_verbosity_get() >= V_VERBOSE) {
@@ -893,6 +959,10 @@ print_totals_basic(void)
 	line[sizeof(line) - 1] = '\0';
 	puts(line);
 
+	// Get the check names.
+	char checks[CHECKS_STR_SIZE];
+	get_check_names(checks, totals.checks, false);
+
 	// Print the totals except the file count, which needs
 	// special handling.
 	printf("%5s %7s  %11s  %11s  %5s  %-7s ",
@@ -904,7 +974,7 @@ print_totals_basic(void)
 				NICESTR_B, NICESTR_TIB, false, 3),
 			get_ratio(totals.compressed_size,
 				totals.uncompressed_size),
-			get_check_names(totals.checks, false));
+			checks);
 
 	// Since we print totals only when there are at least two files,
 	// the English message will always use "%s files". But some other
@@ -947,6 +1017,9 @@ print_totals_adv(void)
 static void
 print_totals_robot(void)
 {
+	char checks[CHECKS_STR_SIZE];
+	get_check_names(checks, totals.checks, false);
+
 	printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
 			"\t%s\t%s\t%" PRIu64 "\t%" PRIu64,
 			totals.streams,
@@ -955,7 +1028,7 @@ print_totals_robot(void)
 			totals.uncompressed_size,
 			get_ratio(totals.compressed_size,
 				totals.uncompressed_size),
-			get_check_names(totals.checks, false),
+			checks,
 			totals.stream_padding,
 			totals.files);
 
diff --git a/src/xz/message.c b/src/xz/message.c
index 6dfa4aac..9c2dfbd8 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -321,7 +321,8 @@ progress_percentage(uint64_t in_pos)
 	double percentage = (double)(in_pos) / (double)(expected_in_size)
 			* 99.9;
 
-	static char buf[sizeof("99.9 %")];
+	// Use big enough buffer to hold e.g. a multibyte decimal point.
+	static char buf[16];
 	snprintf(buf, sizeof(buf), "%.1f %%", percentage);
 
 	return buf;
@@ -333,12 +334,8 @@ progress_percentage(uint64_t in_pos)
 static const char *
 progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final)
 {
-	// This is enough to hold sizes up to about 99 TiB if thousand
-	// separator is used, or about 1 PiB without thousand separator.
-	// After that the progress indicator will look a bit silly, since
-	// the compression ratio no longer fits with three decimal places.
-	static char buf[36];
-
+	// Use big enough buffer to hold e.g. a multibyte thousand separators.
+	static char buf[128];
 	char *pos = buf;
 	size_t left = sizeof(buf);
 
@@ -402,7 +399,8 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
 	//  - 9.9 KiB/s
 	//  - 99 KiB/s
 	//  - 999 KiB/s
-	static char buf[sizeof("999 GiB/s")];
+	// Use big enough buffer to hold e.g. a multibyte decimal point.
+	static char buf[16];
 	snprintf(buf, sizeof(buf), "%.*f %s",
 			speed > 9.9 ? 0 : 1, speed, unit[unit_index]);
 	return buf;
@@ -588,12 +586,19 @@ message_progress_update(void)
 	// Print the actual progress message. The idea is that there is at
 	// least three spaces between the fields in typical situations, but
 	// even in rare situations there is at least one space.
-	fprintf(stderr, "\r %6s %35s   %9s %10s   %10s\r",
+	const char *cols[5] = {
 		progress_percentage(in_pos),
 		progress_sizes(compressed_pos, uncompressed_pos, false),
 		progress_speed(uncompressed_pos, elapsed),
 		progress_time(elapsed),
-		progress_remaining(in_pos, elapsed));
+		progress_remaining(in_pos, elapsed),
+	};
+	fprintf(stderr, "\r %*s %*s   %*s %10s   %10s\r",
+			tuklib_mbstr_fw(cols[0], 6), cols[0],
+			tuklib_mbstr_fw(cols[1], 35), cols[1],
+			tuklib_mbstr_fw(cols[2], 9), cols[2],
+			cols[3],
+			cols[4]);
 
 #ifdef SIGALRM
 	// Updating the progress info was finished. Reset
@@ -663,12 +668,19 @@ progress_flush(bool finished)
 	// statistics are printed in the same format as the progress
 	// indicator itself.
 	if (progress_automatic) {
-		fprintf(stderr, "\r %6s %35s   %9s %10s   %10s\n",
+		const char *cols[5] = {
 			finished ? "100 %" : progress_percentage(in_pos),
 			progress_sizes(compressed_pos, uncompressed_pos, true),
 			progress_speed(uncompressed_pos, elapsed),
 			progress_time(elapsed),
-			finished ? "" : progress_remaining(in_pos, elapsed));
+			finished ? "" : progress_remaining(in_pos, elapsed),
+		};
+		fprintf(stderr, "\r %*s %*s   %*s %10s   %10s\n",
+				tuklib_mbstr_fw(cols[0], 6), cols[0],
+				tuklib_mbstr_fw(cols[1], 35), cols[1],
+				tuklib_mbstr_fw(cols[2], 9), cols[2],
+				cols[3],
+				cols[4]);
 	} else {
 		// The filename is always printed.
 		fprintf(stderr, "%s: ", filename);
@@ -848,8 +860,10 @@ message_mem_needed(enum message_verbosity v, uint64_t memusage)
 	// the user might need to +1 MiB to get high enough limit.)
 	memusage = round_up_to_mib(memusage);
 
+	// With US-ASCII:
 	// 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1
-	char memlimitstr[32];
+	// But there may be multibyte chars so reserve enough space.
+	char memlimitstr[128];
 
 	// Show the memory usage limit as MiB unless it is less than 1 MiB.
 	// This way it's easy to notice errors where one has typed
@@ -895,13 +909,12 @@ uint32_to_optstr(uint32_t num)
 }
 
 
-extern const char *
-message_filters_to_str(const lzma_filter *filters, bool all_known)
+extern void
+message_filters_to_str(char buf[FILTERS_STR_SIZE],
+		const lzma_filter *filters, bool all_known)
 {
-	static char buf[512];
-
 	char *pos = buf;
-	size_t left = sizeof(buf);
+	size_t left = FILTERS_STR_SIZE;
 
 	for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
 		// Add the dashes for the filter option. A space is
@@ -1025,7 +1038,7 @@ message_filters_to_str(const lzma_filter *filters, bool all_known)
 		}
 	}
 
-	return buf;
+	return;
 }
 
 
@@ -1035,8 +1048,9 @@ message_filters_show(enum message_verbosity v, const lzma_filter *filters)
 	if (v > verbosity)
 		return;
 
-	fprintf(stderr, _("%s: Filter chain: %s\n"), progname,
-			message_filters_to_str(filters, true));
+	char buf[FILTERS_STR_SIZE];
+	message_filters_to_str(buf, filters, true);
+	fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf);
 	return;
 }
 
diff --git a/src/xz/message.h b/src/xz/message.h
index dd5fa4d4..37e60821 100644
--- a/src/xz/message.h
+++ b/src/xz/message.h
@@ -86,15 +86,19 @@ extern const char *message_strm(lzma_ret code);
 extern void message_mem_needed(enum message_verbosity v, uint64_t memusage);
 
 
+/// Buffer size for message_filters_to_str()
+#define FILTERS_STR_SIZE 512
+
+
 /// \brief      Get the filter chain as a string
 ///
+/// \param      buf         Pointer to caller allocated buffer to hold
+///                         the filter chain string
 /// \param      filters     Pointer to the filter chain
 /// \param      all_known   If true, all filter options are printed.
 ///                         If false, only the options that get stored
 ///                         into .xz headers are printed.
-///
-/// \return     Pointer to a statically allocated buffer.
-extern const char *message_filters_to_str(
+extern void message_filters_to_str(char buf[FILTERS_STR_SIZE],
 		const lzma_filter *filters, bool all_known);
 
 
diff --git a/src/xz/private.h b/src/xz/private.h
index 15136bfe..6b01e513 100644
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -25,6 +25,7 @@
 #include "tuklib_gettext.h"
 #include "tuklib_progname.h"
 #include "tuklib_exit.h"
+#include "tuklib_mbstr.h"
 
 #if defined(_WIN32) && !defined(__CYGWIN__)
 #	define WIN32_LEAN_AND_MEAN
diff --git a/src/xz/util.c b/src/xz/util.c
index 19f5eee3..987b4430 100644
--- a/src/xz/util.c
+++ b/src/xz/util.c
@@ -14,6 +14,13 @@
 #include <stdarg.h>
 
 
+/// Buffers for uint64_to_str() and uint64_to_nicestr()
+static char bufs[4][128];
+
+/// Thousand separator support in uint64_to_str() and uint64_to_nicestr()
+static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN;
+
+
 extern void *
 xrealloc(void *ptr, size_t size)
 {
@@ -125,22 +132,28 @@ round_up_to_mib(uint64_t n)
 }
 
 
-extern const char *
-uint64_to_str(uint64_t value, uint32_t slot)
+/// Check if thousand separator is supported. Run-time checking is easiest,
+/// because it seems to be sometimes lacking even on POSIXish system.
+static void
+check_thousand_sep(uint32_t slot)
 {
-	// 2^64 with thousand separators is 26 bytes plus trailing '\0'.
-	static char bufs[4][32];
-
-	assert(slot < ARRAY_SIZE(bufs));
-
-	static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN;
 	if (thousand == UNKNOWN) {
 		bufs[slot][0] = '\0';
-		snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64,
-				UINT64_C(1));
+		snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U);
 		thousand = bufs[slot][0] == '1' ? WORKS : BROKEN;
 	}
 
+	return;
+}
+
+
+extern const char *
+uint64_to_str(uint64_t value, uint32_t slot)
+{
+	assert(slot < ARRAY_SIZE(bufs));
+
+	check_thousand_sep(slot);
+
 	if (thousand == WORKS)
 		snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value);
 	else
@@ -157,14 +170,21 @@ uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min,
 {
 	assert(unit_min <= unit_max);
 	assert(unit_max <= NICESTR_TIB);
+	assert(slot < ARRAY_SIZE(bufs));
+
+	check_thousand_sep(slot);
 
 	enum nicestr_unit unit = NICESTR_B;
-	const char *str;
+	char *pos = bufs[slot];
+	size_t left = sizeof(bufs[slot]);
 
 	if ((unit_min == NICESTR_B && value < 10000)
 			|| unit_max == NICESTR_B) {
 		// The value is shown as bytes.
-		str = uint64_to_str(value, slot);
+		if (thousand == WORKS)
+			my_snprintf(&pos, &left, "%'u", (unsigned int)value);
+		else
+			my_snprintf(&pos, &left, "%u", (unsigned int)value);
 	} else {
 		// Scale the value to a nicer unit. Unless unit_min and
 		// unit_max limit us, we will show at most five significant
@@ -175,49 +195,23 @@ uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min,
 			++unit;
 		} while (unit < unit_min || (d > 9999.9 && unit < unit_max));
 
-		str = double_to_str(d);
+		if (thousand == WORKS)
+			my_snprintf(&pos, &left, "%'.1f", d);
+		else
+			my_snprintf(&pos, &left, "%.1f", d);
 	}
 
 	static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" };
+	my_snprintf(&pos, &left, " %s", suffix[unit]);
 
-	// Minimum buffer size:
-	// 26   2^64 with thousand separators
-	//  4   " KiB"
-	//  2   " ("
-	// 26   2^64 with thousand separators
-	//  3   " B)"
-	//  1   '\0'
-	// 62   Total
-	static char buf[4][64];
-	char *pos = buf[slot];
-	size_t left = sizeof(buf[slot]);
-	my_snprintf(&pos, &left, "%s %s", str, suffix[unit]);
-
-	if (always_also_bytes && value >= 10000)
-		snprintf(pos, left, " (%s B)", uint64_to_str(value, slot));
-
-	return buf[slot];
-}
-
-
-extern const char *
-double_to_str(double value)
-{
-	static char buf[64];
-
-	static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN;
-	if (thousand == UNKNOWN) {
-		buf[0] = '\0';
-		snprintf(buf, sizeof(buf), "%'.1f", 2.0);
-		thousand = buf[0] == '2' ? WORKS : BROKEN;
+	if (always_also_bytes && value >= 10000) {
+		if (thousand == WORKS)
+			snprintf(pos, left, " (%'" PRIu64 " B)", value);
+		else
+			snprintf(pos, left, " (%" PRIu64 " B)", value);
 	}
 
-	if (thousand == WORKS)
-		snprintf(buf, sizeof(buf), "%'.1f", value);
-	else
-		snprintf(buf, sizeof(buf), "%.1f", value);
-
-	return buf;
+	return bufs[slot];
 }
 
 
@@ -231,7 +225,10 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...)
 
 	// If an error occurred, we want the caller to think that the whole
 	// buffer was used. This way no more data will be written to the
-	// buffer. We don't need better error handling here.
+	// buffer. We don't need better error handling here, although it
+	// is possible that the result looks garbage on the terminal if
+	// e.g. an UTF-8 character gets split. That shouldn't (easily)
+	// happen though, because the buffers used have some extra room.
 	if (len < 0 || (size_t)(len) >= *left) {
 		*left = 0;
 	} else {
@@ -243,45 +240,6 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...)
 }
 
 
-/*
-/// \brief      Simple quoting to get rid of ASCII control characters
-///
-/// This is not so cool and locale-dependent, but should be good enough
-/// At least we don't print any control characters on the terminal.
-///
-extern char *
-str_quote(const char *str)
-{
-	size_t dest_len = 0;
-	bool has_ctrl = false;
-
-	while (str[dest_len] != '\0')
-		if (*(unsigned char *)(str + dest_len++) < 0x20)
-			has_ctrl = true;
-
-	char *dest = malloc(dest_len + 1);
-	if (dest != NULL) {
-		if (has_ctrl) {
-			for (size_t i = 0; i < dest_len; ++i)
-				if (*(unsigned char *)(str + i) < 0x20)
-					dest[i] = '?';
-				else
-					dest[i] = str[i];
-
-			dest[dest_len] = '\0';
-
-		} else {
-			// Usually there are no control characters,
-			// so we can optimize.
-			memcpy(dest, str, dest_len + 1);
-		}
-	}
-
-	return dest;
-}
-*/
-
-
 extern bool
 is_empty_filename(const char *filename)
 {
diff --git a/src/xz/util.h b/src/xz/util.h
index 2e08b4a8..fea8cc66 100644
--- a/src/xz/util.h
+++ b/src/xz/util.h
@@ -96,13 +96,6 @@ extern const char *uint64_to_nicestr(uint64_t value,
 		bool always_also_bytes, uint32_t slot);
 
 
-/// \brief      Convert double to a string with one decimal place
-///
-/// This is like uint64_to_str() except that this converts a double and
-/// uses exactly one decimal place.
-extern const char *double_to_str(double value);
-
-
 /// \brief      Wrapper for snprintf() to help constructing a string in pieces
 ///
 /// A maximum of *left bytes is written starting from *pos. *pos and *left
-- 
cgit v1.2.3