diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2010-09-10 10:30:33 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2010-09-10 10:30:33 +0300 |
commit | bb0b1004f83cdc4d309e1471c2ecaf9f95ce60c5 (patch) | |
tree | d21a6ca975dbd1ed4d6181281be92eb53c82423c /src/xz/util.c | |
parent | Update the Czech translation. (diff) | |
download | xz-bb0b1004f83cdc4d309e1471c2ecaf9f95ce60c5.tar.xz |
xz: Multiple fixes.
The code assumed that printing numbers with thousand separators
and decimal points would always produce only US-ASCII characters.
This was used for buffer sizes (with snprintf(), no overflows)
and aligning columns of the progress indicator and --list. That
assumption was wrong (e.g. LC_ALL=fi_FI.UTF-8 with glibc), so
multibyte character support was added in this commit. The old
way is used if the operating system doesn't have enough multibyte
support (e.g. lacks wcwidth()).
The sizes of buffers were increased to accomodate multibyte
characters. I don't know how big they should be exactly, but
they aren't used for anything critical, so it's not too bad.
If they still aren't big enough, I hopefully get a bug report.
snprintf() takes care of avoiding buffer overflows.
Some static buffers were replaced with buffers allocated on
stack. double_to_str() was removed. uint64_to_str() and
uint64_to_nicestr() now share the static buffer and test
for thousand separator support.
Integrity check names "None" and "Unknown-N" (2 <= N <= 15)
were marked to be translated. I had forgot these, plus they
wouldn't have worked correctly anyway before this commit,
because printing tables with multibyte strings didn't work.
Thanks to Marek Černocký for reporting the bug about
misaligned table columns in --list output.
Diffstat (limited to '')
-rw-r--r-- | src/xz/util.c | 136 |
1 files changed, 47 insertions, 89 deletions
diff --git a/src/xz/util.c b/src/xz/util.c index 19f5eee3..987b4430 100644 --- a/src/xz/util.c +++ b/src/xz/util.c @@ -14,6 +14,13 @@ #include <stdarg.h> +/// Buffers for uint64_to_str() and uint64_to_nicestr() +static char bufs[4][128]; + +/// Thousand separator support in uint64_to_str() and uint64_to_nicestr() +static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + + extern void * xrealloc(void *ptr, size_t size) { @@ -125,22 +132,28 @@ round_up_to_mib(uint64_t n) } -extern const char * -uint64_to_str(uint64_t value, uint32_t slot) +/// Check if thousand separator is supported. Run-time checking is easiest, +/// because it seems to be sometimes lacking even on POSIXish system. +static void +check_thousand_sep(uint32_t slot) { - // 2^64 with thousand separators is 26 bytes plus trailing '\0'. - static char bufs[4][32]; - - assert(slot < ARRAY_SIZE(bufs)); - - static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; if (thousand == UNKNOWN) { bufs[slot][0] = '\0'; - snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, - UINT64_C(1)); + snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U); thousand = bufs[slot][0] == '1' ? WORKS : BROKEN; } + return; +} + + +extern const char * +uint64_to_str(uint64_t value, uint32_t slot) +{ + assert(slot < ARRAY_SIZE(bufs)); + + check_thousand_sep(slot); + if (thousand == WORKS) snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value); else @@ -157,14 +170,21 @@ uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, { assert(unit_min <= unit_max); assert(unit_max <= NICESTR_TIB); + assert(slot < ARRAY_SIZE(bufs)); + + check_thousand_sep(slot); enum nicestr_unit unit = NICESTR_B; - const char *str; + char *pos = bufs[slot]; + size_t left = sizeof(bufs[slot]); if ((unit_min == NICESTR_B && value < 10000) || unit_max == NICESTR_B) { // The value is shown as bytes. - str = uint64_to_str(value, slot); + if (thousand == WORKS) + my_snprintf(&pos, &left, "%'u", (unsigned int)value); + else + my_snprintf(&pos, &left, "%u", (unsigned int)value); } else { // Scale the value to a nicer unit. Unless unit_min and // unit_max limit us, we will show at most five significant @@ -175,49 +195,23 @@ uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, ++unit; } while (unit < unit_min || (d > 9999.9 && unit < unit_max)); - str = double_to_str(d); + if (thousand == WORKS) + my_snprintf(&pos, &left, "%'.1f", d); + else + my_snprintf(&pos, &left, "%.1f", d); } static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" }; + my_snprintf(&pos, &left, " %s", suffix[unit]); - // Minimum buffer size: - // 26 2^64 with thousand separators - // 4 " KiB" - // 2 " (" - // 26 2^64 with thousand separators - // 3 " B)" - // 1 '\0' - // 62 Total - static char buf[4][64]; - char *pos = buf[slot]; - size_t left = sizeof(buf[slot]); - my_snprintf(&pos, &left, "%s %s", str, suffix[unit]); - - if (always_also_bytes && value >= 10000) - snprintf(pos, left, " (%s B)", uint64_to_str(value, slot)); - - return buf[slot]; -} - - -extern const char * -double_to_str(double value) -{ - static char buf[64]; - - static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; - if (thousand == UNKNOWN) { - buf[0] = '\0'; - snprintf(buf, sizeof(buf), "%'.1f", 2.0); - thousand = buf[0] == '2' ? WORKS : BROKEN; + if (always_also_bytes && value >= 10000) { + if (thousand == WORKS) + snprintf(pos, left, " (%'" PRIu64 " B)", value); + else + snprintf(pos, left, " (%" PRIu64 " B)", value); } - if (thousand == WORKS) - snprintf(buf, sizeof(buf), "%'.1f", value); - else - snprintf(buf, sizeof(buf), "%.1f", value); - - return buf; + return bufs[slot]; } @@ -231,7 +225,10 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...) // If an error occurred, we want the caller to think that the whole // buffer was used. This way no more data will be written to the - // buffer. We don't need better error handling here. + // buffer. We don't need better error handling here, although it + // is possible that the result looks garbage on the terminal if + // e.g. an UTF-8 character gets split. That shouldn't (easily) + // happen though, because the buffers used have some extra room. if (len < 0 || (size_t)(len) >= *left) { *left = 0; } else { @@ -243,45 +240,6 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...) } -/* -/// \brief Simple quoting to get rid of ASCII control characters -/// -/// This is not so cool and locale-dependent, but should be good enough -/// At least we don't print any control characters on the terminal. -/// -extern char * -str_quote(const char *str) -{ - size_t dest_len = 0; - bool has_ctrl = false; - - while (str[dest_len] != '\0') - if (*(unsigned char *)(str + dest_len++) < 0x20) - has_ctrl = true; - - char *dest = malloc(dest_len + 1); - if (dest != NULL) { - if (has_ctrl) { - for (size_t i = 0; i < dest_len; ++i) - if (*(unsigned char *)(str + i) < 0x20) - dest[i] = '?'; - else - dest[i] = str[i]; - - dest[dest_len] = '\0'; - - } else { - // Usually there are no control characters, - // so we can optimize. - memcpy(dest, str, dest_len + 1); - } - } - - return dest; -} -*/ - - extern bool is_empty_filename(const char *filename) { |