aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRiccardo Spagni <ric@spagni.net>2019-01-18 09:24:41 +0200
committerRiccardo Spagni <ric@spagni.net>2019-01-18 09:24:41 +0200
commitb65106ce9390d615d13fd6a3d6bb265a17332cc3 (patch)
tree08c52d58cdfdc816d6fcfd00208a0f38df793cc4
parentMerge pull request #5014 (diff)
parentepee: speedup word/number matching (diff)
downloadmonero-b65106ce9390d615d13fd6a3d6bb265a17332cc3.tar.xz
Merge pull request #5017
21777daf epee: speedup word/number matching (moneromooo-monero)
-rw-r--r--contrib/epee/include/storages/parserse_base_utils.h72
-rw-r--r--contrib/epee/include/storages/portable_storage_from_json.h62
-rw-r--r--tests/unit_tests/epee_utils.cpp84
3 files changed, 173 insertions, 45 deletions
diff --git a/contrib/epee/include/storages/parserse_base_utils.h b/contrib/epee/include/storages/parserse_base_utils.h
index d73fbde3a..69b650cd4 100644
--- a/contrib/epee/include/storages/parserse_base_utils.h
+++ b/contrib/epee/include/storages/parserse_base_utils.h
@@ -29,6 +29,7 @@
#pragma once
#include <algorithm>
+#include <boost/utility/string_ref.hpp>
namespace epee
{
@@ -36,6 +37,40 @@ namespace misc_utils
{
namespace parse
{
+ // 1: digit
+ // 2: .eE (floating point)
+ // 4: alpha
+ // 8: whitespace
+ // 16: allowed in float but doesn't necessarily mean it's a float
+ static const constexpr uint8_t lut[256]={
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, // 16
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32
+ 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 16, 18, 0, // 48
+ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 0, 0, 0, 0, // 64
+ 0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 96
+ 0, 4, 4, 4, 4, 22, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 112
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 128
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+
+ inline bool isspace(char c)
+ {
+ return lut[(uint8_t)c] & 8;
+ }
+
+ inline bool isdigit(char c)
+ {
+ return lut[(uint8_t)c] & 1;
+ }
+
inline std::string transform_to_escape_sequence(const std::string& src)
{
static const char escaped[] = "\b\f\n\r\t\v\"\\/";
@@ -159,25 +194,34 @@ namespace misc_utils
return false;
}
}
- inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val, bool& is_float_val, bool& is_signed_val)
+ inline void match_number2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val, bool& is_float_val, bool& is_signed_val)
{
val.clear();
- is_float_val = false;
- for(std::string::const_iterator it = star_end_string;it != buf_end;it++)
+ uint8_t float_flag = 0;
+ is_signed_val = false;
+ size_t chars = 0;
+ std::string::const_iterator it = star_end_string;
+ if (it != buf_end && *it == '-')
+ {
+ is_signed_val = true;
+ ++chars;
+ ++it;
+ }
+ for(;it != buf_end;it++)
{
- if(isdigit(*it) || (it == star_end_string && *it == '-') || (val.size() && *it == '.' ) || (is_float_val && (*it == 'e' || *it == 'E' || *it == '-' || *it == '+' )) )
+ const uint8_t flags = lut[(uint8_t)*it];
+ if (flags & 16)
{
- if(!val.size() && *it == '-')
- is_signed_val = true;
- if(*it == '.' )
- is_float_val = true;
- val.push_back(*it);
+ float_flag |= flags;
+ ++chars;
}
else
{
+ val = boost::string_ref(&*star_end_string, chars);
if(val.size())
{
star_end_string = --it;
+ is_float_val = !!(float_flag & 2);
return;
}
else
@@ -186,7 +230,7 @@ namespace misc_utils
}
ASSERT_MES_AND_THROW("wrong number in json entry: " << std::string(star_end_string, buf_end));
}
- inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val)
+ inline bool match_number(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
{
try
{
@@ -199,15 +243,15 @@ namespace misc_utils
return false;
}
}
- inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val)
+ inline void match_word2(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
{
val.clear();
for(std::string::const_iterator it = star_end_string;it != buf_end;it++)
{
- if(!isalpha(*it))
+ if (!(lut[(uint8_t)*it] & 4))
{
- val.assign(star_end_string, it);
+ val = boost::string_ref(&*star_end_string, std::distance(star_end_string, it));
if(val.size())
{
star_end_string = --it;
@@ -218,7 +262,7 @@ namespace misc_utils
}
ASSERT_MES_AND_THROW("failed to match word number in json entry: " << std::string(star_end_string, buf_end));
}
- inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, std::string& val)
+ inline bool match_word(std::string::const_iterator& star_end_string, std::string::const_iterator buf_end, boost::string_ref& val)
{
try
{
diff --git a/contrib/epee/include/storages/portable_storage_from_json.h b/contrib/epee/include/storages/portable_storage_from_json.h
index 0307b732c..3e3052541 100644
--- a/contrib/epee/include/storages/portable_storage_from_json.h
+++ b/contrib/epee/include/storages/portable_storage_from_json.h
@@ -39,7 +39,7 @@ namespace epee
{
namespace json
{
-#define CHECK_ISSPACE() if(!isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));}
+#define CHECK_ISSPACE() if(!epee::misc_utils::parse::isspace(*it)){ ASSERT_MES_AND_THROW("Wrong JSON character at: " << std::string(it, buf_end));}
/*inline void parse_error()
{
@@ -114,11 +114,11 @@ namespace epee
std::string val;
match_string2(it, buf_end, val);
//insert text value
- stg.set_value(name, val, current_section);
+ stg.set_value(name, std::move(val), current_section);
state = match_state_wonder_after_value;
- }else if (isdigit(*it) || *it == '-')
+ }else if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
{//just a named number value started
- std::string val;
+ boost::string_ref val;
bool is_v_float = false;bool is_signed = false;
match_number2(it, buf_end, val, is_v_float, is_signed);
if(!is_v_float)
@@ -126,27 +126,27 @@ namespace epee
if(is_signed)
{
errno = 0;
- int64_t nval = strtoll(val.c_str(), NULL, 10);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ int64_t nval = strtoll(val.data(), NULL, 10);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
stg.set_value(name, nval, current_section);
}else
{
errno = 0;
- uint64_t nval = strtoull(val.c_str(), NULL, 10);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ uint64_t nval = strtoull(val.data(), NULL, 10);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
stg.set_value(name, nval, current_section);
}
}else
{
errno = 0;
- double nval = strtod(val.c_str(), NULL);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ double nval = strtod(val.data(), NULL);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
stg.set_value(name, nval, current_section);
}
state = match_state_wonder_after_value;
}else if(isalpha(*it) )
{// could be null, true or false
- std::string word;
+ boost::string_ref word;
match_word2(it, buf_end, word);
if(boost::iequals(word, "null"))
{
@@ -203,13 +203,13 @@ namespace epee
//mean array of strings
std::string val;
match_string2(it, buf_end, val);
- h_array = stg.insert_first_value(name, val, current_section);
+ h_array = stg.insert_first_value(name, std::move(val), current_section);
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values entry");
state = match_state_array_after_value;
array_md = array_mode_string;
- }else if (isdigit(*it) || *it == '-')
+ }else if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
{//array of numbers value started
- std::string val;
+ boost::string_ref val;
bool is_v_float = false;bool is_signed_val = false;
match_number2(it, buf_end, val, is_v_float, is_signed_val);
if(!is_v_float)
@@ -217,22 +217,22 @@ namespace epee
if (is_signed_val)
{
errno = 0;
- int64_t nval = strtoll(val.c_str(), NULL, 10);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ int64_t nval = strtoll(val.data(), NULL, 10);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
h_array = stg.insert_first_value(name, nval, current_section);
}else
{
errno = 0;
- uint64_t nval = strtoull(val.c_str(), NULL, 10);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ uint64_t nval = strtoull(val.data(), NULL, 10);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
h_array = stg.insert_first_value(name, nval, current_section);
}
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry");
}else
{
errno = 0;
- double nval = strtod(val.c_str(), NULL);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ double nval = strtod(val.data(), NULL);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
h_array = stg.insert_first_value(name, nval, current_section);
CHECK_AND_ASSERT_THROW_MES(h_array, " failed to insert values section entry");
}
@@ -245,7 +245,7 @@ namespace epee
state = match_state_wonder_after_value;
}else if(isalpha(*it) )
{// array of booleans
- std::string word;
+ boost::string_ref word;
match_word2(it, buf_end, word);
if(boost::iequals(word, "true"))
{
@@ -291,15 +291,15 @@ namespace epee
{
std::string val;
match_string2(it, buf_end, val);
- bool res = stg.insert_next_value(h_array, val);
+ bool res = stg.insert_next_value(h_array, std::move(val));
CHECK_AND_ASSERT_THROW_MES(res, "failed to insert values");
state = match_state_array_after_value;
}else CHECK_ISSPACE();
break;
case array_mode_numbers:
- if (isdigit(*it) || *it == '-')
+ if (epee::misc_utils::parse::isdigit(*it) || *it == '-')
{//array of numbers value started
- std::string val;
+ boost::string_ref val;
bool is_v_float = false;bool is_signed_val = false;
match_number2(it, buf_end, val, is_v_float, is_signed_val);
bool insert_res = false;
@@ -308,21 +308,21 @@ namespace epee
if (is_signed_val)
{
errno = 0;
- int64_t nval = strtoll(val.c_str(), NULL, 10);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ int64_t nval = strtoll(val.data(), NULL, 10);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
insert_res = stg.insert_next_value(h_array, nval);
}else
{
errno = 0;
- uint64_t nval = strtoull(val.c_str(), NULL, 10);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ uint64_t nval = strtoull(val.data(), NULL, 10);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
insert_res = stg.insert_next_value(h_array, nval);
}
}else
{
errno = 0;
- double nval = strtod(val.c_str(), NULL);
- if (errno) throw std::runtime_error("Invalid number: " + val);
+ double nval = strtod(val.data(), NULL);
+ if (errno) throw std::runtime_error("Invalid number: " + std::string(val));
insert_res = stg.insert_next_value(h_array, nval);
}
CHECK_AND_ASSERT_THROW_MES(insert_res, "Failed to insert next value");
@@ -333,7 +333,7 @@ namespace epee
case array_mode_booleans:
if(isalpha(*it) )
{// array of booleans
- std::string word;
+ boost::string_ref word;
match_word2(it, buf_end, word);
if(boost::iequals(word, "true"))
{
diff --git a/tests/unit_tests/epee_utils.cpp b/tests/unit_tests/epee_utils.cpp
index 75cf2fdd4..3d5882d7d 100644
--- a/tests/unit_tests/epee_utils.cpp
+++ b/tests/unit_tests/epee_utils.cpp
@@ -50,6 +50,7 @@
#include "p2p/net_peerlist_boost_serialization.h"
#include "span.h"
#include "string_tools.h"
+#include "storages/parserse_base_utils.h"
namespace
{
@@ -833,3 +834,86 @@ TEST(net_buffer, move)
ASSERT_TRUE(!memcmp(span.data() + 1, std::string(4000, '0').c_str(), 4000));
}
+TEST(parsing, isspace)
+{
+ ASSERT_FALSE(epee::misc_utils::parse::isspace(0));
+ for (int c = 1; c < 256; ++c)
+ {
+ ASSERT_EQ(epee::misc_utils::parse::isspace(c), strchr("\r\n\t\f\v ", c) != NULL);
+ }
+}
+
+TEST(parsing, isdigit)
+{
+ ASSERT_FALSE(epee::misc_utils::parse::isdigit(0));
+ for (int c = 1; c < 256; ++c)
+ {
+ ASSERT_EQ(epee::misc_utils::parse::isdigit(c), strchr("0123456789", c) != NULL);
+ }
+}
+
+TEST(parsing, number)
+{
+ boost::string_ref val;
+ std::string s;
+ std::string::const_iterator i;
+
+ // the parser expects another character to end the number, and accepts things
+ // that aren't numbers, as it's meant as a pre-filter for strto* functions,
+ // so we just check that numbers get accepted, but don't test non numbers
+
+ s = "0 ";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "0");
+
+ s = "000 ";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "000");
+
+ s = "10x";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "10");
+
+ s = "10.09/";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "10.09");
+
+ s = "-1.r";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "-1.");
+
+ s = "-49.;";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "-49.");
+
+ s = "0.78/";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "0.78");
+
+ s = "33E9$";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "33E9");
+
+ s = ".34e2=";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, ".34e2");
+
+ s = "-9.34e-2=";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "-9.34e-2");
+
+ s = "+9.34e+03=";
+ i = s.begin();
+ epee::misc_utils::parse::match_number(i, s.end(), val);
+ ASSERT_EQ(val, "+9.34e+03");
+}