diff options
Diffstat (limited to '')
-rw-r--r-- | src/common/i18n.cpp | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/src/common/i18n.cpp b/src/common/i18n.cpp new file mode 100644 index 000000000..9add06cb9 --- /dev/null +++ b/src/common/i18n.cpp @@ -0,0 +1,297 @@ +// Copyright (c) 2014-2015, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <string> +#include <map> +#include "include_base_utils.h" +#include "file_io_utils.h" +#include "common/util.h" +#include "common/i18n.h" + +static const unsigned char qm_magic[16] = {0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd}; + +static std::map<std::string,std::string> i18n_entries; + +/* Logging isn't initialized yet when this is run */ +/* add std::flush, because std::endl doesn't seem to flush, contrary to expected */ +// #define i18n_log(x) do { std::cout << __FILE__ << ":" << __LINE__ << ": " << x << std::endl; std::cout << std::flush; } while(0) +#define i18n_log(x) ((void)0) + +static std::string get_language() +{ + const char *e; + + e = getenv("LANG"); + i18n_log("LANG=" << e); + if (!e || !*e) { + e = getenv("LC_ALL"); + i18n_log("LC_ALL=" << e); + } + if (!e || !*e) + e = "en"; + + std::string language = e; + std::transform(language.begin(), language.end(), language.begin(), tolower); + return language; +} + +static uint32_t be32(const unsigned char *data) +{ + return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; +} + +static std::string utf16(const unsigned char *data, uint32_t len) +{ + std::string s; + while (len >= 2) { + uint32_t code = (data[0] << 8) | data[1]; + data += 2; + len -= 2; + if (code >= 0xd800 && code <= 0xdbfff && len >= 2) { + uint32_t next = (data[0] << 8) | data[1]; + if (next >= 0xdc00 && next <= 0xdfff) { + code = (code << 10) + next - 0x35dfc00; + data += 2; + len -= 2; + } + } + if (code <= 0x7f) { + s += (char)code; + } + else if (code <= 0x7ff) { + s += 0xc0 | (code >> 6); + s += 0x80 | (code & 0x3f); + } + else if (code <= 0xffff) { + s += 0xe0 | (code >> 12); + s += 0x80 | ((code >> 6) & 0x3f); + s += 0x80 | (code & 0x3f); + } + else { + s += 0xf0 | (code >> 18); + s += 0x80 | ((code >> 12) & 0x3f); + s += 0x80 | ((code >> 6) & 0x3f); + s += 0x80 | (code & 0x3f); + } + } + return s; +} + +static std::string utf8(const unsigned char *data, uint32_t len) +{ + /* assume well formedness */ + return std::string((const char *)data,len); +} + +int i18n_set_language(const char *directory, const char *base) +{ + std::string language, filename, contents; + const unsigned char *data; + size_t datalen; + size_t idx; + unsigned char chunk_type; + uint32_t chunk_size; + uint32_t num_messages = (uint32_t)-1; + uint32_t messages_idx = (uint32_t)-1; + uint32_t offsets_idx = (uint32_t)-1; + std::string translation, source, context; + + i18n_log("i18n_set_language(" << directory << "," << base << ")"); + if (!directory || !base) + return -1; + + language = get_language(); + filename = std::string(directory) + "/" + base + "_" + language + ".qm"; + i18n_log("Loading translations for language " << language); + + boost::system::error_code ignored_ec; + if (!boost::filesystem::exists(filename, ignored_ec)) { + i18n_log("Translations file not found: " << filename); + const char *underscore = strchr(language.c_str(), '_'); + if (underscore) { + std::string fallback_language = std::string(language, 0, underscore - language.c_str()); + filename = std::string(directory) + "/" + base + "_" + fallback_language + ".qm"; + i18n_log("Not found, loading translations for language " << fallback_language); + if (!boost::filesystem::exists(filename, ignored_ec)) { + i18n_log("Translations file not found: " << filename); + return -1; + } + } + } + + if (!epee::file_io_utils::load_file_to_string(filename, contents)) { + i18n_log("Failed to load translations file: " << filename); + return -1; + } + + data = (const unsigned char*)contents.c_str(); + datalen = contents.size(); + idx = 0; + i18n_log("Translations file size: " << datalen); + + /* Format of the QM file (AFAICT): + * 16 bytes magic + * chunk list: N instances of chunks: + * 1 byte: chunk type (0x42: offsets, 0x69: messages) + * 4 bytes: chunk length, big endian + * D bytes: "chunk length" bytes of data + * + * 0x42 chunk: N instances of subchunks: + * 1 byte: subchunk type + * 0x01: end, no data + * 0x02: unsupported + * 0x03: translation + * 4 bytes: string length, big endian + * N bytes: string data, UTF-16 (or UCS2-BE ?) + * 0x04: unsupported + * 0x05: obsolete, unsupported + * 0x06: source text + * 0x07: context + * 0x08: obsolete, unsupported + * other: unsupported + * 4 bytes: subchunk length, big endian - except for 0x01, which has none + * S bytes: "chunk length" bytes of data + * 0x69 chunk: + * string data indexed by the 0x42 chunk data + */ + if (datalen < sizeof(qm_magic) || memcmp(data, qm_magic, sizeof(qm_magic))) { + i18n_log("Bad translations file format: " << filename); + return -1; + } + idx += sizeof(qm_magic); + + while (idx < datalen) { + if (idx + 5 > datalen) { + i18n_log("Bad translations file format: " << filename); + return -1; + } + chunk_type = data[idx++]; + chunk_size = be32(data+idx); + idx += 4; + + i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes"); + if (chunk_size >= datalen || idx > datalen - chunk_size) { + i18n_log("Bad translations file format: " << filename); + return -1; + } + + switch (chunk_type) { + case 0x42: + i18n_log("Found offsets at " << idx); + /* two 32 bit integers, and possible padding */ + offsets_idx = idx; + num_messages = chunk_size / 8; + break; + case 0x69: + i18n_log("Found messages at " << idx); + messages_idx = idx; + break; + default: + i18n_log("Found unsupported chunk type: " << chunk_type); + break; + } + + idx += chunk_size; + } + + if (offsets_idx == (uint32_t)-1) { + i18n_log("No offsets chunk found"); + return -1; + } + if (messages_idx == (uint32_t)-1) { + i18n_log("No messages chunk found"); + return -1; + } + + for (uint32_t m = 0; m < num_messages; ++m) { + be32(data+offsets_idx+m*8); // unused + idx = be32(data+offsets_idx+m*8+4); + idx += messages_idx; + + if (idx > datalen || idx + 1 > datalen) { + i18n_log("Bad translations file format: " << filename); + return -1; + } + + while (1) { + if (idx + 5 > datalen) { + i18n_log("Bad translations file format: " << filename); + return -1; + } + chunk_type = data[idx++]; + chunk_size = 0; + if (chunk_type == 0x01) { + i18n_entries[context + std::string("",1) + source] = translation; + context = std::string(); + source = std::string(); + translation = std::string(); + break; + } + + chunk_size = be32(data+idx); + idx += 4; + i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes"); + if (chunk_size >= datalen || idx > datalen - chunk_size) { + i18n_log("Bad translations file format: " << filename); + return -1; + } + switch (chunk_type) { + case 0x03: // translation, UTF-16 + translation = utf16(data+idx, chunk_size); + i18n_log("Found translation: " << translation); + break; + case 0x06: // source, UTF-8 + source = utf8(data+idx, chunk_size); + i18n_log("Found source: " << source); + break; + case 0x07: // context, UTF-8 + context = utf8(data+idx, chunk_size); + i18n_log("Found context: " << context); + break; + } + idx += chunk_size; + } + } + + return 0; +} + +/* The entries is constant by that time */ +const char *i18n_translate(const char *s, const std::string &context) +{ + const std::string key = context + std::string("", 1) + s; + std::map<std::string,std::string>::const_iterator i = i18n_entries.find(key); + if (i == i18n_entries.end()) + return s; + return (*i).second.c_str(); +} + + |