aboutsummaryrefslogtreecommitdiff
path: root/contrib/epee/include/storages/parserse_base_utils.h
diff options
context:
space:
mode:
authormoneromooo-monero <moneromooo-monero@users.noreply.github.com>2019-04-30 19:16:11 +0000
committermoneromooo-monero <moneromooo-monero@users.noreply.github.com>2019-08-16 17:06:03 +0000
commiteeca5ca0c8d7275069d45adf40546ea83edf46cc (patch)
tree71024c1a6dc4747e83f29bd4e1eeeb80094a19bd /contrib/epee/include/storages/parserse_base_utils.h
parentMerge pull request #5756 (diff)
downloadmonero-eeca5ca0c8d7275069d45adf40546ea83edf46cc.tar.xz
epee: support unicode in parsed strings
Diffstat (limited to 'contrib/epee/include/storages/parserse_base_utils.h')
-rw-r--r--contrib/epee/include/storages/parserse_base_utils.h59
1 files changed, 59 insertions, 0 deletions
diff --git a/contrib/epee/include/storages/parserse_base_utils.h b/contrib/epee/include/storages/parserse_base_utils.h
index b5c4138c5..fe53628a5 100644
--- a/contrib/epee/include/storages/parserse_base_utils.h
+++ b/contrib/epee/include/storages/parserse_base_utils.h
@@ -31,6 +31,9 @@
#include <algorithm>
#include <boost/utility/string_ref.hpp>
+#undef MONERO_DEFAULT_LOG_CATEGORY
+#define MONERO_DEFAULT_LOG_CATEGORY "serialization"
+
namespace epee
{
namespace misc_utils
@@ -62,6 +65,26 @@ namespace misc_utils
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
+ static const constexpr unsigned char isx[256] =
+ {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+
inline bool isspace(char c)
{
return lut[(uint8_t)c] & 8;
@@ -162,6 +185,42 @@ namespace misc_utils
val.push_back('\\');break;
case '/': //Slash character
val.push_back('/');break;
+ case 'u': //Unicode code point
+ if (buf_end - it < 4)
+ {
+ ASSERT_MES_AND_THROW("Invalid Unicode escape sequence");
+ }
+ else
+ {
+ uint32_t dst = 0;
+ for (int i = 0; i < 4; ++i)
+ {
+ const unsigned char tmp = isx[(int)*++it];
+ CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding");
+ dst = dst << 4 | tmp;
+ }
+ // encode as UTF-8
+ if (dst <= 0x7f)
+ {
+ val.push_back(dst);
+ }
+ else if (dst <= 0x7ff)
+ {
+ val.push_back(0xc0 | (dst >> 6));
+ val.push_back(0x80 | (dst & 0x3f));
+ }
+ else if (dst <= 0xffff)
+ {
+ val.push_back(0xe0 | (dst >> 12));
+ val.push_back(0x80 | ((dst >> 6) & 0x3f));
+ val.push_back(0x80 | (dst & 0x3f));
+ }
+ else
+ {
+ ASSERT_MES_AND_THROW("Unicode code point is out or range");
+ }
+ }
+ break;
default:
val.push_back(*it);
LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\"");