aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/epee/include/storages/parserse_base_utils.h59
-rw-r--r--contrib/epee/include/string_tools.h24
-rw-r--r--tests/unit_tests/epee_utils.cpp17
3 files changed, 78 insertions, 22 deletions
diff --git a/contrib/epee/include/storages/parserse_base_utils.h b/contrib/epee/include/storages/parserse_base_utils.h
index b5c4138c5..fe53628a5 100644
--- a/contrib/epee/include/storages/parserse_base_utils.h
+++ b/contrib/epee/include/storages/parserse_base_utils.h
@@ -31,6 +31,9 @@
#include <algorithm>
#include <boost/utility/string_ref.hpp>
+#undef MONERO_DEFAULT_LOG_CATEGORY
+#define MONERO_DEFAULT_LOG_CATEGORY "serialization"
+
namespace epee
{
namespace misc_utils
@@ -62,6 +65,26 @@ namespace misc_utils
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
+ static const constexpr unsigned char isx[256] =
+ {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+
inline bool isspace(char c)
{
return lut[(uint8_t)c] & 8;
@@ -162,6 +185,42 @@ namespace misc_utils
val.push_back('\\');break;
case '/': //Slash character
val.push_back('/');break;
+ case 'u': //Unicode code point
+ if (buf_end - it < 4)
+ {
+ ASSERT_MES_AND_THROW("Invalid Unicode escape sequence");
+ }
+ else
+ {
+ uint32_t dst = 0;
+ for (int i = 0; i < 4; ++i)
+ {
+ const unsigned char tmp = isx[(int)*++it];
+ CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding");
+ dst = dst << 4 | tmp;
+ }
+ // encode as UTF-8
+ if (dst <= 0x7f)
+ {
+ val.push_back(dst);
+ }
+ else if (dst <= 0x7ff)
+ {
+ val.push_back(0xc0 | (dst >> 6));
+ val.push_back(0x80 | (dst & 0x3f));
+ }
+ else if (dst <= 0xffff)
+ {
+ val.push_back(0xe0 | (dst >> 12));
+ val.push_back(0x80 | ((dst >> 6) & 0x3f));
+ val.push_back(0x80 | (dst & 0x3f));
+ }
+ else
+ {
+ ASSERT_MES_AND_THROW("Unicode code point is out or range");
+ }
+ }
+ break;
default:
val.push_back(*it);
LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\"");
diff --git a/contrib/epee/include/string_tools.h b/contrib/epee/include/string_tools.h
index da47b7d55..1be5eb5e1 100644
--- a/contrib/epee/include/string_tools.h
+++ b/contrib/epee/include/string_tools.h
@@ -59,26 +59,6 @@
#pragma comment (lib, "Rpcrt4.lib")
#endif
-static const constexpr unsigned char isx[256] =
-{
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-};
-
namespace epee
{
namespace string_tools
@@ -99,10 +79,10 @@ namespace string_tools
for(size_t i = 0; i < s.size(); i += 2)
{
int tmp = *src++;
- tmp = isx[tmp];
+ tmp = epee::misc_utils::parse::isx[tmp];
if (tmp == 0xff) return false;
int t2 = *src++;
- t2 = isx[t2];
+ t2 = epee::misc_utils::parse::isx[t2];
if (t2 == 0xff) return false;
*dst++ = (tmp << 4) | t2;
}
diff --git a/tests/unit_tests/epee_utils.cpp b/tests/unit_tests/epee_utils.cpp
index 946731826..c5f9a42d0 100644
--- a/tests/unit_tests/epee_utils.cpp
+++ b/tests/unit_tests/epee_utils.cpp
@@ -946,3 +946,20 @@ TEST(parsing, number)
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "+9.34e+03");
}
+
+TEST(parsing, unicode)
+{
+ std::string bs;
+ std::string s;
+ std::string::const_iterator si;
+
+ s = "\"\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "");
+ s = "\"\\u0000\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, std::string(1, '\0'));
+ s = "\"\\u0020\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, " ");
+ s = "\"\\u1\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
+ s = "\"\\u12\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
+ s = "\"\\u123\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
+ s = "\"\\u1234\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "ሴ");
+ s = "\"foo\\u1234bar\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "fooሴbar");
+ s = "\"\\u3042\\u307e\\u3084\\u304b\\u3059\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "あまやかす");
+}