From f52697729dc5d17293db4239d73ce072afe83d2b Mon Sep 17 00:00:00 2001 From: moneromooo-monero <moneromooo-monero@users.noreply.github.com> Date: Tue, 28 Apr 2020 13:28:55 +0000 Subject: easylogging++: sanitize log payload Some of it might be coming from untrusted sources Reported by itsunixiknowthis --- external/easylogging++/easylogging++.cc | 96 +++++++++++++++++++++++++++ src/common/CMakeLists.txt | 3 +- src/common/utf8.h | 114 ++++++++++++++++++++++++++++++++ src/mnemonics/language_base.h | 74 ++------------------- 4 files changed, 216 insertions(+), 71 deletions(-) create mode 100644 src/common/utf8.h diff --git a/external/easylogging++/easylogging++.cc b/external/easylogging++/easylogging++.cc index 8439bec0b..0d748c225 100644 --- a/external/easylogging++/easylogging++.cc +++ b/external/easylogging++/easylogging++.cc @@ -2475,6 +2475,100 @@ void DefaultLogDispatchCallback::handle(const LogDispatchData* data) { } } + +template<typename Transform> +static inline std::string utf8canonical(const std::string &s, Transform t = [](wint_t c)->wint_t { return c; }) +{ + std::string sc = ""; + size_t avail = s.size(); + const char *ptr = s.data(); + wint_t cp = 0; + int bytes = 1; + char wbuf[8], *wptr; + while (avail--) + { + if ((*ptr & 0x80) == 0) + { + cp = *ptr++; + bytes = 1; + } + else if ((*ptr & 0xe0) == 0xc0) + { + if (avail < 1) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0x1f) << 6; + cp |= *ptr++ & 0x3f; + --avail; + bytes = 2; + } + else if ((*ptr & 0xf0) == 0xe0) + { + if (avail < 2) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0xf) << 12; + cp |= (*ptr++ & 0x3f) << 6; + cp |= *ptr++ & 0x3f; + avail -= 2; + bytes = 3; + } + else if ((*ptr & 0xf8) == 0xf0) + { + if (avail < 3) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0x7) << 18; + cp |= (*ptr++ & 0x3f) << 12; + cp |= (*ptr++ & 0x3f) << 6; + cp |= *ptr++ & 0x3f; + avail -= 3; + bytes = 4; + } + else + throw std::runtime_error("Invalid UTF-8"); + + cp = t(cp); + if (cp <= 0x7f) + bytes = 1; + else if (cp <= 0x7ff) + bytes = 2; + else if (cp <= 0xffff) + bytes = 3; + else if (cp <= 0x10ffff) + bytes = 4; + else + throw std::runtime_error("Invalid code point UTF-8 transformation"); + + wptr = wbuf; + switch (bytes) + { + case 1: *wptr++ = cp; break; + case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break; + case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; + case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; + default: throw std::runtime_error("Invalid UTF-8"); + } + *wptr = 0; + sc.append(wbuf, bytes); + cp = 0; + bytes = 1; + } + return sc; +} + +void sanitize(std::string &s) +{ + s = utf8canonical(s, [](wint_t c)->wint_t { + if (c == 9 || c == 10 || c == 13) + return c; + if (c < 0x20) + return '?'; + if (c == 0x7f) + return '?'; + if (c >= 0x80 && c <= 0x9f) + return '?'; + return c; + }); +} + void DefaultLogDispatchCallback::dispatch(base::type::string_t&& rawLinePrefix, base::type::string_t&& rawLinePayload, base::type::string_t&& logLine) { if (m_data->dispatchAction() == base::DispatchAction::NormalLog || m_data->dispatchAction() == base::DispatchAction::FileOnlyLog) { if (m_data->logMessage()->logger()->m_typedConfigurations->toFile(m_data->logMessage()->level())) { @@ -2506,6 +2600,8 @@ void DefaultLogDispatchCallback::dispatch(base::type::string_t&& rawLinePrefix, m_data->logMessage()->logger()->logBuilder()->setColor(el::base::utils::colorFromLevel(level), false); ELPP_COUT << rawLinePrefix; m_data->logMessage()->logger()->logBuilder()->setColor(color == el::Color::Default ? el::base::utils::colorFromLevel(level): color, color != el::Color::Default); + try { sanitize(rawLinePayload); } + catch (const std::exception &e) { rawLinePayload = "<Invalid UTF-8 in log>"; } ELPP_COUT << rawLinePayload; m_data->logMessage()->logger()->logBuilder()->setColor(el::Color::Default, false); ELPP_COUT << std::flush; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index f06737b31..35b3555a2 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -86,7 +86,8 @@ set(common_private_headers updates.h aligned.h timings.h - combinator.h) + combinator.h + utf8.h) monero_private_headers(common ${common_private_headers}) diff --git a/src/common/utf8.h b/src/common/utf8.h new file mode 100644 index 000000000..60247f1b2 --- /dev/null +++ b/src/common/utf8.h @@ -0,0 +1,114 @@ +// Copyright (c) 2019, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include <cctype> +#include <cwchar> +#include <stdexcept> + +namespace tools +{ + template<typename T, typename Transform> + inline T utf8canonical(const T &s, Transform t = [](wint_t c)->wint_t { return c; }) + { + T sc = ""; + size_t avail = s.size(); + const char *ptr = s.data(); + wint_t cp = 0; + int bytes = 1; + char wbuf[8], *wptr; + while (avail--) + { + if ((*ptr & 0x80) == 0) + { + cp = *ptr++; + bytes = 1; + } + else if ((*ptr & 0xe0) == 0xc0) + { + if (avail < 1) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0x1f) << 6; + cp |= *ptr++ & 0x3f; + --avail; + bytes = 2; + } + else if ((*ptr & 0xf0) == 0xe0) + { + if (avail < 2) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0xf) << 12; + cp |= (*ptr++ & 0x3f) << 6; + cp |= *ptr++ & 0x3f; + avail -= 2; + bytes = 3; + } + else if ((*ptr & 0xf8) == 0xf0) + { + if (avail < 3) + throw std::runtime_error("Invalid UTF-8"); + cp = (*ptr++ & 0x7) << 18; + cp |= (*ptr++ & 0x3f) << 12; + cp |= (*ptr++ & 0x3f) << 6; + cp |= *ptr++ & 0x3f; + avail -= 3; + bytes = 4; + } + else + throw std::runtime_error("Invalid UTF-8"); + + cp = t(cp); + if (cp <= 0x7f) + bytes = 1; + else if (cp <= 0x7ff) + bytes = 2; + else if (cp <= 0xffff) + bytes = 3; + else if (cp <= 0x10ffff) + bytes = 4; + else + throw std::runtime_error("Invalid code point UTF-8 transformation"); + + wptr = wbuf; + switch (bytes) + { + case 1: *wptr++ = cp; break; + case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break; + case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; + case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; + default: throw std::runtime_error("Invalid UTF-8"); + } + *wptr = 0; + sc.append(wbuf, bytes); + cp = 0; + bytes = 1; + } + return sc; + } +} diff --git a/src/mnemonics/language_base.h b/src/mnemonics/language_base.h index 7d2599e9a..ad09dc5fa 100644 --- a/src/mnemonics/language_base.h +++ b/src/mnemonics/language_base.h @@ -41,6 +41,7 @@ #include <boost/algorithm/string.hpp> #include "misc_log_ex.h" #include "fnv1.h" +#include "common/utf8.h" /*! * \namespace Language @@ -73,78 +74,11 @@ namespace Language return prefix; } - template<typename T> - inline T utf8canonical(const T &s) - { - T sc = ""; - size_t avail = s.size(); - const char *ptr = s.data(); - wint_t cp = 0; - int bytes = 1; - char wbuf[8], *wptr; - while (avail--) - { - if ((*ptr & 0x80) == 0) - { - cp = *ptr++; - bytes = 1; - } - else if ((*ptr & 0xe0) == 0xc0) - { - if (avail < 1) - throw std::runtime_error("Invalid UTF-8"); - cp = (*ptr++ & 0x1f) << 6; - cp |= *ptr++ & 0x3f; - --avail; - bytes = 2; - } - else if ((*ptr & 0xf0) == 0xe0) - { - if (avail < 2) - throw std::runtime_error("Invalid UTF-8"); - cp = (*ptr++ & 0xf) << 12; - cp |= (*ptr++ & 0x3f) << 6; - cp |= *ptr++ & 0x3f; - avail -= 2; - bytes = 3; - } - else if ((*ptr & 0xf8) == 0xf0) - { - if (avail < 3) - throw std::runtime_error("Invalid UTF-8"); - cp = (*ptr++ & 0x7) << 18; - cp |= (*ptr++ & 0x3f) << 12; - cp |= (*ptr++ & 0x3f) << 6; - cp |= *ptr++ & 0x3f; - avail -= 3; - bytes = 4; - } - else - throw std::runtime_error("Invalid UTF-8"); - - cp = std::towlower(cp); - wptr = wbuf; - switch (bytes) - { - case 1: *wptr++ = cp; break; - case 2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f); break; - case 3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; - case 4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f); break; - default: throw std::runtime_error("Invalid UTF-8"); - } - *wptr = 0; - sc += T(wbuf, bytes); - cp = 0; - bytes = 1; - } - return sc; - } - struct WordHash { std::size_t operator()(const epee::wipeable_string &s) const { - const epee::wipeable_string sc = utf8canonical(s); + const epee::wipeable_string sc = tools::utf8canonical(s, [](wint_t c) -> wint_t { return std::towlower(c); }); return epee::fnv::FNV1a(sc.data(), sc.size()); } }; @@ -153,8 +87,8 @@ namespace Language { bool operator()(const epee::wipeable_string &s0, const epee::wipeable_string &s1) const { - const epee::wipeable_string s0c = utf8canonical(s0); - const epee::wipeable_string s1c = utf8canonical(s1); + const epee::wipeable_string s0c = tools::utf8canonical(s0, [](wint_t c) -> wint_t { return std::towlower(c); }); + const epee::wipeable_string s1c = tools::utf8canonical(s1, [](wint_t c) -> wint_t { return std::towlower(c); }); return s0c == s1c; } }; -- cgit v1.2.3