diff options
author | moneromooo-monero <moneromooo-monero@users.noreply.github.com> | 2019-04-30 19:16:11 +0000 |
---|---|---|
committer | moneromooo-monero <moneromooo-monero@users.noreply.github.com> | 2019-08-16 17:06:03 +0000 |
commit | eeca5ca0c8d7275069d45adf40546ea83edf46cc (patch) | |
tree | 71024c1a6dc4747e83f29bd4e1eeeb80094a19bd /contrib/epee/include/storages/parserse_base_utils.h | |
parent | Merge pull request #5756 (diff) | |
download | monero-eeca5ca0c8d7275069d45adf40546ea83edf46cc.tar.xz |
epee: support unicode in parsed strings
Diffstat (limited to 'contrib/epee/include/storages/parserse_base_utils.h')
-rw-r--r-- | contrib/epee/include/storages/parserse_base_utils.h | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/contrib/epee/include/storages/parserse_base_utils.h b/contrib/epee/include/storages/parserse_base_utils.h index b5c4138c5..fe53628a5 100644 --- a/contrib/epee/include/storages/parserse_base_utils.h +++ b/contrib/epee/include/storages/parserse_base_utils.h @@ -31,6 +31,9 @@ #include <algorithm> #include <boost/utility/string_ref.hpp> +#undef MONERO_DEFAULT_LOG_CATEGORY +#define MONERO_DEFAULT_LOG_CATEGORY "serialization" + namespace epee { namespace misc_utils @@ -62,6 +65,26 @@ namespace misc_utils 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; + static const constexpr unsigned char isx[256] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + inline bool isspace(char c) { return lut[(uint8_t)c] & 8; @@ -162,6 +185,42 @@ namespace misc_utils val.push_back('\\');break; case '/': //Slash character val.push_back('/');break; + case 'u': //Unicode code point + if (buf_end - it < 4) + { + ASSERT_MES_AND_THROW("Invalid Unicode escape sequence"); + } + else + { + uint32_t dst = 0; + for (int i = 0; i < 4; ++i) + { + const unsigned char tmp = isx[(int)*++it]; + CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding"); + dst = dst << 4 | tmp; + } + // encode as UTF-8 + if (dst <= 0x7f) + { + val.push_back(dst); + } + else if (dst <= 0x7ff) + { + val.push_back(0xc0 | (dst >> 6)); + val.push_back(0x80 | (dst & 0x3f)); + } + else if (dst <= 0xffff) + { + val.push_back(0xe0 | (dst >> 12)); + val.push_back(0x80 | ((dst >> 6) & 0x3f)); + val.push_back(0x80 | (dst & 0x3f)); + } + else + { + ASSERT_MES_AND_THROW("Unicode code point is out or range"); + } + } + break; default: val.push_back(*it); LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\""); |