aboutsummaryrefslogtreecommitdiff
path: root/src/common/tuklib_mbstr.h
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2010-09-10 10:30:33 +0300
committerLasse Collin <lasse.collin@tukaani.org>2010-09-10 10:30:33 +0300
commitbb0b1004f83cdc4d309e1471c2ecaf9f95ce60c5 (patch)
treed21a6ca975dbd1ed4d6181281be92eb53c82423c /src/common/tuklib_mbstr.h
parentUpdate the Czech translation. (diff)
downloadxz-bb0b1004f83cdc4d309e1471c2ecaf9f95ce60c5.tar.xz
xz: Multiple fixes.
The code assumed that printing numbers with thousand separators and decimal points would always produce only US-ASCII characters. This was used for buffer sizes (with snprintf(), no overflows) and aligning columns of the progress indicator and --list. That assumption was wrong (e.g. LC_ALL=fi_FI.UTF-8 with glibc), so multibyte character support was added in this commit. The old way is used if the operating system doesn't have enough multibyte support (e.g. lacks wcwidth()). The sizes of buffers were increased to accomodate multibyte characters. I don't know how big they should be exactly, but they aren't used for anything critical, so it's not too bad. If they still aren't big enough, I hopefully get a bug report. snprintf() takes care of avoiding buffer overflows. Some static buffers were replaced with buffers allocated on stack. double_to_str() was removed. uint64_to_str() and uint64_to_nicestr() now share the static buffer and test for thousand separator support. Integrity check names "None" and "Unknown-N" (2 <= N <= 15) were marked to be translated. I had forgot these, plus they wouldn't have worked correctly anyway before this commit, because printing tables with multibyte strings didn't work. Thanks to Marek Černocký for reporting the bug about misaligned table columns in --list output.
Diffstat (limited to 'src/common/tuklib_mbstr.h')
-rw-r--r--src/common/tuklib_mbstr.h66
1 files changed, 66 insertions, 0 deletions
diff --git a/src/common/tuklib_mbstr.h b/src/common/tuklib_mbstr.h
new file mode 100644
index 00000000..9f358355
--- /dev/null
+++ b/src/common/tuklib_mbstr.h
@@ -0,0 +1,66 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file tuklib_mstr.h
+/// \brief Utility functions for handling multibyte strings
+///
+/// If not enough multibyte string support is available in the C library,
+/// these functions keep working with the assumption that all strings
+/// are in a single-byte character set without combining characters, e.g.
+/// US-ASCII or ISO-8859-*.
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef TUKLIB_MBSTR_H
+#define TUKLIB_MBSTR_H
+
+#include "tuklib_common.h"
+TUKLIB_DECLS_BEGIN
+
+#define tuklib_mbstr_width TUKLIB_SYMBOL(tuklib_mbstr_width)
+extern size_t tuklib_mbstr_width(const char *str, size_t *bytes);
+///<
+/// \brief Get the number of columns needed for the multibyte string
+///
+/// This is somewhat similar to wcswidth() but works on multibyte strings.
+///
+/// \param str String whose width is to be calculated. If the
+/// current locale uses a multibyte character set
+/// that has shift states, the string must begin
+/// and end in the initial shift state.
+/// \param bytes If this is not NULL, *bytes is set to the
+/// value returned by strlen(str) (even if an
+/// error occurs when calculating the width).
+///
+/// \return On success, the number of columns needed to display the
+/// string e.g. in a terminal emulator is returned. On error,
+/// (size_t)-1 is returned. Possible errors include invalid,
+/// partial, or non-printable multibyte character in str, or
+/// that str doesn't end in the initial shift state.
+
+#define tuklib_mbstr_fw TUKLIB_SYMBOL(tuklib_mbstr_fw)
+extern int tuklib_mbstr_fw(const char *str, int columns_min);
+///<
+/// \brief Get the field width for printf() e.g. to align table columns
+///
+/// Printing simple tables to a terminal can be done using the field field
+/// feature in the printf() format string, but it works only with single-byte
+/// character sets. To do the same with multibyte strings, tuklib_mbstr_fw()
+/// can be used to calculate appropriate field width.
+///
+/// The behavior of this function is undefined, if
+/// - str is NULL or not terminated with '\0';
+/// - columns_min <= 0; or
+/// - the calculated field width exceeds INT_MAX.
+///
+/// \return If tuklib_mbstr_width(str, NULL) fails, -1 is returned.
+/// If str needs more columns than columns_min, zero is returned.
+/// Otherwise a positive integer is returned, which can be
+/// used as the field width, e.g. printf("%*s", fw, str).
+
+TUKLIB_DECLS_END
+#endif