diff options
author | Riccardo Spagni <ric@spagni.net> | 2019-01-18 09:24:41 +0200 |
---|---|---|
committer | Riccardo Spagni <ric@spagni.net> | 2019-01-18 09:24:41 +0200 |
commit | b65106ce9390d615d13fd6a3d6bb265a17332cc3 (patch) | |
tree | 08c52d58cdfdc816d6fcfd00208a0f38df793cc4 | |
parent | Merge pull request #5014 (diff) | |
parent | epee: speedup word/number matching (diff) | |
download | monero-b65106ce9390d615d13fd6a3d6bb265a17332cc3.tar.xz |
Merge pull request #5017
21777daf epee: speedup word/number matching (moneromooo-monero)
-rw-r--r-- | contrib/epee/include/storages/parserse_base_utils.h | 72 | ||||
-rw-r--r-- | contrib/epee/include/storages/portable_storage_from_json.h | 62 | ||||
-rw-r--r-- | tests/unit_tests/epee_utils.cpp | 84 |
3 files changed, 173 insertions, 45 deletions
diff --git a/contrib/epee/include/storages/parserse_base_utils.h b/contrib/epee/include/storages/parserse_base_utils.h index d73fbde3a..69b650cd4 100644 --- a/contrib/epee/include/storages/parserse_base_utils.h +++ b/contrib/epee/include/storages/parserse_base_utils.h @@ -29,6 +29,7 @@ #pragma once #include <algorithm> +#include <boost/utility/string_ref.hpp> namespace epee { @@ -36,6 +37,40 @@ namespace misc_utils { namespace parse { + // 1: digit + // 2: .eE (floating point) + // 4: alpha + // 8: whitespace + // 16: allowed in float but doesn't necessarily mean it's a float + static const constexpr uint8_t lut[256]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, // 16 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 + 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 16, 18, 0, // 48 + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 0, 0, 0, 0, // 64 + 0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 96 + 0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 112 + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 128 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + inline bool isspace(char c) + { + return lut[(uint8_t)c] & 8; + } + + inline bool isdigit(char c) + { + return lut[(uint8_t)c] & 1; + } + inline std::string transform_to_escape_sequence(const std::string& src) { static const char escaped[] = "\b\f\n\r\t\v\"\\/"; @@ -159,25 +194,34 @@ namespace misc_utils return false; } } - inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val, bool& is_float_val, bool& is_signed_val) + inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val, bool& is_float_val, bool& is_signed_val) { val.clear(); - is_float_val = false; - for(std::string::const_iterator it = star_end_string;it != buf_end;it++) + uint8_t float_flag = 0; + is_signed_val = false; + size_t chars = 0; + std::string::const_iterator it = star_end_string; + if (it != buf_end && *it == '-') + { + is_signed_val = true; + ++chars; + ++it; + } + for(;it != buf_end;it++) { - if(isdigit(*it) || (it == star_end_string && *it == '-') || (val.size() && *it == '.' ) || (is_float_val && (*it == 'e' || *it == 'E' || *it == '-' || *it == '+' )) ) + const uint8_t flags = lut[(uint8_t)*it]; + if (flags & 16) { - if(!val.size() && *it == '-') - is_signed_val = true; - if(*it == '.' ) - is_float_val = true; - val.push_back(*it); + float_flag |= flags; + ++chars; } else { + val = boost::string_ref(&*star_end_string, chars); if(val.size()) { star_end_string = --it; + is_float_val = !!(float_flag & 2); return; } else @@ -186,7 +230,7 @@ namespace misc_utils } ASSERT_MES_AND_THROW("wrong number in json entry: " << std::string(star_end_string, buf_end)); } - inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val) + inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val) { try { @@ -199,15 +243,15 @@ namespace misc_utils return false; } } - inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val) + inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val) { val.clear(); for(std::string::const_iterator it = star_end_string;it != buf_end;it++) { - if(!isalpha(*it)) + if (!(lut[(uint8_t)*it] & 4)) { - val.assign(star_end_string, it); + val = boost::string_ref(&*star_end_string, std::distance(star_end_string, it)); if(val.size()) { star_end_string = --it; @@ -218,7 +262,7 @@ namespace misc_utils } ASSERT_MES_AND_THROW("failed to match word number in json entry: " << std::string(star_end_string, buf_end)); } - inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val) + inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val) { try { diff --git a/contrib/epee/include/storages/portable_storage_from_json.h b/contrib/epee/include/storages/portable_storage_from_json.h index 0307b732c..3e3052541 100644 --- a/contrib/epee/include/storages/portable_storage_from_json.h +++ b/contrib/epee/include/storages/portable_storage_from_json.h @@ -39,7 +39,7 @@ namespace epee { namespace json { -#define CHECK_ISSPACE() if(!isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));} +#define CHECK_ISSPACE() if(!epee::misc_utils::parse::isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));} /*inline void parse_error() { @@ -114,11 +114,11 @@ namespace epee std::string val; match_string2(it, buf_end, val); //insert text value - stg.set_value(name, val, current_section); + stg.set_value(name, std::move(val), current_section); state = match_state_wonder_after_value; - }else if (isdigit(*it) || *it == '-') + }else if (epee::misc_utils::parse::isdigit(*it) || *it == '-') {//just a named number value started - std::string val; + boost::string_ref val; bool is_v_float = false;bool is_signed = false; match_number2(it, buf_end, val, is_v_float, is_signed); if(!is_v_float) @@ -126,27 +126,27 @@ namespace epee if(is_signed) { errno = 0; - int64_t nval = strtoll(val.c_str(), NULL, 10); - if (errno) throw std::runtime_error("Invalid number: " + val); + int64_t nval = strtoll(val.data(), NULL, 10); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); stg.set_value(name, nval, current_section); }else { errno = 0; - uint64_t nval = strtoull(val.c_str(), NULL, 10); - if (errno) throw std::runtime_error("Invalid number: " + val); + uint64_t nval = strtoull(val.data(), NULL, 10); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); stg.set_value(name, nval, current_section); } }else { errno = 0; - double nval = strtod(val.c_str(), NULL); - if (errno) throw std::runtime_error("Invalid number: " + val); + double nval = strtod(val.data(), NULL); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); stg.set_value(name, nval, current_section); } state = match_state_wonder_after_value; }else if(isalpha(*it) ) {// could be null, true or false - std::string word; + boost::string_ref word; match_word2(it, buf_end, word); if(boost::iequals(word, "null")) { @@ -203,13 +203,13 @@ namespace epee //mean array of strings std::string val; match_string2(it, buf_end, val); - h_array = stg.insert_first_value(name, val, current_section); + h_array = stg.insert_first_value(name, std::move(val), current_section); CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values entry"); state = match_state_array_after_value; array_md = array_mode_string; - }else if (isdigit(*it) || *it == '-') + }else if (epee::misc_utils::parse::isdigit(*it) || *it == '-') {//array of numbers value started - std::string val; + boost::string_ref val; bool is_v_float = false;bool is_signed_val = false; match_number2(it, buf_end, val, is_v_float, is_signed_val); if(!is_v_float) @@ -217,22 +217,22 @@ namespace epee if (is_signed_val) { errno = 0; - int64_t nval = strtoll(val.c_str(), NULL, 10); - if (errno) throw std::runtime_error("Invalid number: " + val); + int64_t nval = strtoll(val.data(), NULL, 10); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); h_array = stg.insert_first_value(name, nval, current_section); }else { errno = 0; - uint64_t nval = strtoull(val.c_str(), NULL, 10); - if (errno) throw std::runtime_error("Invalid number: " + val); + uint64_t nval = strtoull(val.data(), NULL, 10); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); h_array = stg.insert_first_value(name, nval, current_section); } CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry"); }else { errno = 0; - double nval = strtod(val.c_str(), NULL); - if (errno) throw std::runtime_error("Invalid number: " + val); + double nval = strtod(val.data(), NULL); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); h_array = stg.insert_first_value(name, nval, current_section); CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry"); } @@ -245,7 +245,7 @@ namespace epee state = match_state_wonder_after_value; }else if(isalpha(*it) ) {// array of booleans - std::string word; + boost::string_ref word; match_word2(it, buf_end, word); if(boost::iequals(word, "true")) { @@ -291,15 +291,15 @@ namespace epee { std::string val; match_string2(it, buf_end, val); - bool res = stg.insert_next_value(h_array, val); + bool res = stg.insert_next_value(h_array, std::move(val)); CHECK_AND_ASSERT_THROW_MES(res, "failed to insert values"); state = match_state_array_after_value; }else CHECK_ISSPACE(); break; case array_mode_numbers: - if (isdigit(*it) || *it == '-') + if (epee::misc_utils::parse::isdigit(*it) || *it == '-') {//array of numbers value started - std::string val; + boost::string_ref val; bool is_v_float = false;bool is_signed_val = false; match_number2(it, buf_end, val, is_v_float, is_signed_val); bool insert_res = false; @@ -308,21 +308,21 @@ namespace epee if (is_signed_val) { errno = 0; - int64_t nval = strtoll(val.c_str(), NULL, 10); - if (errno) throw std::runtime_error("Invalid number: " + val); + int64_t nval = strtoll(val.data(), NULL, 10); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); insert_res = stg.insert_next_value(h_array, nval); }else { errno = 0; - uint64_t nval = strtoull(val.c_str(), NULL, 10); - if (errno) throw std::runtime_error("Invalid number: " + val); + uint64_t nval = strtoull(val.data(), NULL, 10); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); insert_res = stg.insert_next_value(h_array, nval); } }else { errno = 0; - double nval = strtod(val.c_str(), NULL); - if (errno) throw std::runtime_error("Invalid number: " + val); + double nval = strtod(val.data(), NULL); + if (errno) throw std::runtime_error("Invalid number: " + std::string(val)); insert_res = stg.insert_next_value(h_array, nval); } CHECK_AND_ASSERT_THROW_MES(insert_res, "Failed to insert next value"); @@ -333,7 +333,7 @@ namespace epee case array_mode_booleans: if(isalpha(*it) ) {// array of booleans - std::string word; + boost::string_ref word; match_word2(it, buf_end, word); if(boost::iequals(word, "true")) { diff --git a/tests/unit_tests/epee_utils.cpp b/tests/unit_tests/epee_utils.cpp index 75cf2fdd4..3d5882d7d 100644 --- a/tests/unit_tests/epee_utils.cpp +++ b/tests/unit_tests/epee_utils.cpp @@ -50,6 +50,7 @@ #include "p2p/net_peerlist_boost_serialization.h" #include "span.h" #include "string_tools.h" +#include "storages/parserse_base_utils.h" namespace { @@ -833,3 +834,86 @@ TEST(net_buffer, move) ASSERT_TRUE(!memcmp(span.data() + 1, std::string(4000, '0').c_str(), 4000)); } +TEST(parsing, isspace) +{ + ASSERT_FALSE(epee::misc_utils::parse::isspace(0)); + for (int c = 1; c < 256; ++c) + { + ASSERT_EQ(epee::misc_utils::parse::isspace(c), strchr("\r\n\t\f\v ", c) != NULL); + } +} + +TEST(parsing, isdigit) +{ + ASSERT_FALSE(epee::misc_utils::parse::isdigit(0)); + for (int c = 1; c < 256; ++c) + { + ASSERT_EQ(epee::misc_utils::parse::isdigit(c), strchr("0123456789", c) != NULL); + } +} + +TEST(parsing, number) +{ + boost::string_ref val; + std::string s; + std::string::const_iterator i; + + // the parser expects another character to end the number, and accepts things + // that aren't numbers, as it's meant as a pre-filter for strto* functions, + // so we just check that numbers get accepted, but don't test non numbers + + s = "0 "; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "0"); + + s = "000 "; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "000"); + + s = "10x"; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "10"); + + s = "10.09/"; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "10.09"); + + s = "-1.r"; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "-1."); + + s = "-49.;"; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "-49."); + + s = "0.78/"; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "0.78"); + + s = "33E9$"; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "33E9"); + + s = ".34e2="; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, ".34e2"); + + s = "-9.34e-2="; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "-9.34e-2"); + + s = "+9.34e+03="; + i = s.begin(); + epee::misc_utils::parse::match_number(i, s.end(), val); + ASSERT_EQ(val, "+9.34e+03"); +} |