aboutsummaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/tuklib_mbstr.h66
-rw-r--r--src/common/tuklib_mbstr_fw.c31
-rw-r--r--src/common/tuklib_mbstr_width.c64
3 files changed, 161 insertions, 0 deletions
diff --git a/src/common/tuklib_mbstr.h b/src/common/tuklib_mbstr.h
new file mode 100644
index 00000000..9f358355
--- /dev/null
+++ b/src/common/tuklib_mbstr.h
@@ -0,0 +1,66 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file tuklib_mstr.h
+/// \brief Utility functions for handling multibyte strings
+///
+/// If not enough multibyte string support is available in the C library,
+/// these functions keep working with the assumption that all strings
+/// are in a single-byte character set without combining characters, e.g.
+/// US-ASCII or ISO-8859-*.
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef TUKLIB_MBSTR_H
+#define TUKLIB_MBSTR_H
+
+#include "tuklib_common.h"
+TUKLIB_DECLS_BEGIN
+
+#define tuklib_mbstr_width TUKLIB_SYMBOL(tuklib_mbstr_width)
+extern size_t tuklib_mbstr_width(const char *str, size_t *bytes);
+///<
+/// \brief Get the number of columns needed for the multibyte string
+///
+/// This is somewhat similar to wcswidth() but works on multibyte strings.
+///
+/// \param str String whose width is to be calculated. If the
+/// current locale uses a multibyte character set
+/// that has shift states, the string must begin
+/// and end in the initial shift state.
+/// \param bytes If this is not NULL, *bytes is set to the
+/// value returned by strlen(str) (even if an
+/// error occurs when calculating the width).
+///
+/// \return On success, the number of columns needed to display the
+/// string e.g. in a terminal emulator is returned. On error,
+/// (size_t)-1 is returned. Possible errors include invalid,
+/// partial, or non-printable multibyte character in str, or
+/// that str doesn't end in the initial shift state.
+
+#define tuklib_mbstr_fw TUKLIB_SYMBOL(tuklib_mbstr_fw)
+extern int tuklib_mbstr_fw(const char *str, int columns_min);
+///<
+/// \brief Get the field width for printf() e.g. to align table columns
+///
+/// Printing simple tables to a terminal can be done using the field field
+/// feature in the printf() format string, but it works only with single-byte
+/// character sets. To do the same with multibyte strings, tuklib_mbstr_fw()
+/// can be used to calculate appropriate field width.
+///
+/// The behavior of this function is undefined, if
+/// - str is NULL or not terminated with '\0';
+/// - columns_min <= 0; or
+/// - the calculated field width exceeds INT_MAX.
+///
+/// \return If tuklib_mbstr_width(str, NULL) fails, -1 is returned.
+/// If str needs more columns than columns_min, zero is returned.
+/// Otherwise a positive integer is returned, which can be
+/// used as the field width, e.g. printf("%*s", fw, str).
+
+TUKLIB_DECLS_END
+#endif
diff --git a/src/common/tuklib_mbstr_fw.c b/src/common/tuklib_mbstr_fw.c
new file mode 100644
index 00000000..978a3fe1
--- /dev/null
+++ b/src/common/tuklib_mbstr_fw.c
@@ -0,0 +1,31 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file tuklib_mstr_fw.c
+/// \brief Get the field width for printf() e.g. to align table columns
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "tuklib_mbstr.h"
+
+
+extern int
+tuklib_mbstr_fw(const char *str, int columns_min)
+{
+ size_t len;
+ const size_t width = tuklib_mbstr_width(str, &len);
+ if (width == (size_t)-1)
+ return -1;
+
+ if (width > (size_t)columns_min)
+ return 0;
+
+ if (width < (size_t)columns_min)
+ len += (size_t)columns_min - width;
+
+ return len;
+}
diff --git a/src/common/tuklib_mbstr_width.c b/src/common/tuklib_mbstr_width.c
new file mode 100644
index 00000000..3c38990f
--- /dev/null
+++ b/src/common/tuklib_mbstr_width.c
@@ -0,0 +1,64 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file tuklib_mstr_width.c
+/// \brief Calculate width of a multibyte string
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "tuklib_mbstr.h"
+
+#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
+# include <wchar.h>
+#endif
+
+
+extern size_t
+tuklib_mbstr_width(const char *str, size_t *bytes)
+{
+ const size_t len = strlen(str);
+ if (bytes != NULL)
+ *bytes = len;
+
+#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
+ // In single-byte mode, the width of the string is the same
+ // as its length.
+ return len;
+
+#else
+ mbstate_t state;
+ memset(&state, 0, sizeof(state));
+
+ size_t width = 0;
+ size_t i = 0;
+
+ // Convert one multibyte character at a time to wchar_t
+ // and get its width using wcwidth().
+ while (i < len) {
+ wchar_t wc;
+ const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
+ if (ret < 1 || ret > len)
+ return (size_t)-1;
+
+ i += ret;
+
+ const int wc_width = wcwidth(wc);
+ if (wc_width < 0)
+ return (size_t)-1;
+
+ width += wc_width;
+ }
+
+ // Require that the string ends in the initial shift state.
+ // This way the caller can be combine the string with other
+ // strings without needing to worry about the shift states.
+ if (!mbsinit(&state))
+ return (size_t)-1;
+
+ return width;
+#endif
+}