aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormoneromooo-monero <moneromooo-monero@users.noreply.github.com>2017-12-12 13:44:11 +0000
committermoneromooo-monero <moneromooo-monero@users.noreply.github.com>2017-12-23 11:29:58 +0000
commitbd1f6029a367e411b66d4ff49f2537947e9d300f (patch)
tree00498c925258c39d2e1addba7dc8a30bf75ba5b5
parenttests: add levin fuzz test (diff)
downloadmonero-bd1f6029a367e411b66d4ff49f2537947e9d300f.tar.xz
http_client: rewrite header parsing manually for speed
boost::regex is stupendously atrocious at parsing malformed data
-rw-r--r--contrib/epee/include/net/http_client.h169
1 files changed, 95 insertions, 74 deletions
diff --git a/contrib/epee/include/net/http_client.h b/contrib/epee/include/net/http_client.h
index 5c448fcb3..1edf65928 100644
--- a/contrib/epee/include/net/http_client.h
+++ b/contrib/epee/include/net/http_client.h
@@ -27,6 +27,7 @@
#pragma once
+#include <ctype.h>
#include <boost/shared_ptr.hpp>
#include <boost/regex.hpp>
#include <boost/lexical_cast.hpp>
@@ -752,87 +753,107 @@ namespace net_utils
return true;
}
//---------------------------------------------------------------------------
- inline
- bool parse_header(http_header_info& body_info, const std::string& m_cache_to_process)
- {
+ inline bool parse_header(http_header_info& body_info, const std::string& m_cache_to_process)
+ {
MTRACE("http_stream_filter::parse_cached_header(*)");
-
- STATIC_REGEXP_EXPR_1(rexp_mach_field,
- "\n?((Connection)|(Referer)|(Content-Length)|(Content-Type)|(Transfer-Encoding)|(Content-Encoding)|(Host)|(Cookie)|(User-Agent)|(Origin)"
- // 12 3 4 5 6 7 8 9 10 11
- "|([\\w-]+?)) ?: ?((.*?)(\r?\n))[^\t ]",
- //12 13 14 15
- boost::regex::icase | boost::regex::normal);
-
- boost::smatch result;
- std::string::const_iterator it_current_bound = m_cache_to_process.begin();
- std::string::const_iterator it_end_bound = m_cache_to_process.end();
-
-
- //lookup all fields and fill well-known fields
- while( boost::regex_search( it_current_bound, it_end_bound, result, rexp_mach_field, boost::match_default) && result[0].matched)
+ const char *ptr = m_cache_to_process.c_str();
+ while (ptr[0] != '\r' || ptr[1] != '\n')
{
- const size_t field_val = 14;
- //const size_t field_etc_name = 11;
-
- int i = 2; //start position = 2
- if(result[i++].matched)//"Connection"
- body_info.m_connection = result[field_val];
- else if(result[i++].matched)//"Referrer"
- body_info.m_referer = result[field_val];
- else if(result[i++].matched)//"Content-Length"
- body_info.m_content_length = result[field_val];
- else if(result[i++].matched)//"Content-Type"
- body_info.m_content_type = result[field_val];
- else if(result[i++].matched)//"Transfer-Encoding"
- body_info.m_transfer_encoding = result[field_val];
- else if(result[i++].matched)//"Content-Encoding"
- body_info.m_content_encoding = result[field_val];
- else if(result[i++].matched)//"Host"
- { body_info.m_host = result[field_val];
- string_tools::trim(body_info.m_host);
+ // optional \n
+ if (*ptr == '\n')
+ ++ptr;
+ // an identifier composed of letters or -
+ const char *key_pos = ptr;
+ while (isalnum(*ptr) || *ptr == '_' || *ptr == '-')
+ ++ptr;
+ const char *key_end = ptr;
+ // optional space (not in RFC, but in previous code)
+ if (*ptr == ' ')
+ ++ptr;
+ CHECK_AND_ASSERT_MES(*ptr == ':', true, "http_stream_filter::parse_cached_header() invalid header in: " << m_cache_to_process);
+ ++ptr;
+ // optional whitespace, but not newlines - line folding is obsolete, let's ignore it
+ while (isblank(*ptr))
+ ++ptr;
+ const char *value_pos = ptr;
+ while (*ptr != '\r' && *ptr != '\n')
+ ++ptr;
+ const char *value_end = ptr;
+ // optional trailing whitespace
+ while (value_end > value_pos && isblank(*(value_end-1)))
+ --value_end;
+ if (*ptr == '\r')
+ ++ptr;
+ CHECK_AND_ASSERT_MES(*ptr == '\n', true, "http_stream_filter::parse_cached_header() invalid header in: " << m_cache_to_process);
+ ++ptr;
+
+ const std::string key = std::string(key_pos, key_end - key_pos);
+ const std::string value = std::string(value_pos, value_end - value_pos);
+ if (!key.empty())
+ {
+ if (!string_tools::compare_no_case(key, "Connection"))
+ body_info.m_connection = value;
+ else if(!string_tools::compare_no_case(key, "Referrer"))
+ body_info.m_referer = value;
+ else if(!string_tools::compare_no_case(key, "Content-Length"))
+ body_info.m_content_length = value;
+ else if(!string_tools::compare_no_case(key, "Content-Type"))
+ body_info.m_content_type = value;
+ else if(!string_tools::compare_no_case(key, "Transfer-Encoding"))
+ body_info.m_transfer_encoding = value;
+ else if(!string_tools::compare_no_case(key, "Content-Encoding"))
+ body_info.m_content_encoding = value;
+ else if(!string_tools::compare_no_case(key, "Host"))
+ body_info.m_host = value;
+ else if(!string_tools::compare_no_case(key, "Cookie"))
+ body_info.m_cookie = value;
+ else if(!string_tools::compare_no_case(key, "User-Agent"))
+ body_info.m_user_agent = value;
+ else if(!string_tools::compare_no_case(key, "Origin"))
+ body_info.m_origin = value;
+ else
+ body_info.m_etc_fields.emplace_back(key, value);
}
- else if(result[i++].matched)//"Cookie"
- body_info.m_cookie = result[field_val];
- else if(result[i++].matched)//"User-Agent"
- body_info.m_user_agent = result[field_val];
- else if(result[i++].matched)//"Origin"
- body_info.m_origin = result[field_val];
- else if(result[i++].matched)//e.t.c (HAVE TO BE MATCHED!)
- body_info.m_etc_fields.emplace_back(result[12], result[field_val]);
- else
- {CHECK_AND_ASSERT_MES(false, false, "http_stream_filter::parse_cached_header() not matched last entry in:"<<m_cache_to_process);}
-
- it_current_bound = result[(int)result.size()-1]. first;
}
- return true;
-
+ return true;
}
- inline
- bool analize_first_response_line()
+ //---------------------------------------------------------------------------
+ inline bool analize_first_response_line()
{
-
- //First line response, look like this: "HTTP/1.1 200 OK"
- STATIC_REGEXP_EXPR_1(rexp_match_first_response_line, "^HTTP/(\\d+).(\\d+) ((\\d)\\d{2})( [^\n]*)?\r?\n", boost::regex::icase | boost::regex::normal);
- // 1 2 34 5
- //size_t match_len = 0;
- boost::smatch result;
- if(boost::regex_search( m_header_cache, result, rexp_match_first_response_line, boost::match_default) && result[0].matched)
- {
- CHECK_AND_ASSERT_MES(result[1].matched&&result[2].matched, false, "http_stream_filter::handle_invoke_reply_line() assert failed...");
- m_response_info.m_http_ver_hi = boost::lexical_cast<int>(result[1]);
- m_response_info.m_http_ver_lo = boost::lexical_cast<int>(result[2]);
- m_response_info.m_response_code = boost::lexical_cast<int>(result[3]);
-
- m_header_cache.erase(to_nonsonst_iterator(m_header_cache, result[0].first), to_nonsonst_iterator(m_header_cache, result[0].second));
- return true;
- }else
- {
- LOG_ERROR("http_stream_filter::handle_invoke_reply_line(): Failed to match first response line:" << m_header_cache);
- return false;
- }
-
+ //First line response, look like this: "HTTP/1.1 200 OK"
+ const char *ptr = m_header_cache.c_str();
+ CHECK_AND_ASSERT_MES(!memcmp(ptr, "HTTP/", 5), false, "Invalid first response line: " + m_header_cache);
+ ptr += 5;
+ CHECK_AND_ASSERT_MES(isdigit(*ptr), false, "Invalid first response line: " + m_header_cache);
+ unsigned long ul;
+ char *end;
+ ul = strtoul(ptr, &end, 10);
+ CHECK_AND_ASSERT_MES(ul <= INT_MAX && *end =='.', false, "Invalid first response line: " + m_header_cache);
+ m_response_info.m_http_ver_hi = ul;
+ ptr = end + 1;
+ CHECK_AND_ASSERT_MES(isdigit(*ptr), false, "Invalid first response line: " + m_header_cache + ", ptr: " << ptr);
+ ul = strtoul(ptr, &end, 10);
+ CHECK_AND_ASSERT_MES(ul <= INT_MAX && isblank(*end), false, "Invalid first response line: " + m_header_cache + ", ptr: " << ptr);
+ m_response_info.m_http_ver_lo = ul;
+ ptr = end + 1;
+ while (isblank(*ptr))
+ ++ptr;
+ CHECK_AND_ASSERT_MES(isdigit(*ptr), false, "Invalid first response line: " + m_header_cache);
+ ul = strtoul(ptr, &end, 10);
+ CHECK_AND_ASSERT_MES(ul >= 100 && ul <= 999 && isspace(*end), false, "Invalid first response line: " + m_header_cache);
+ m_response_info.m_response_code = ul;
+ ptr = end;
+ // ignore the optional text, till the end
+ while (*ptr != '\r' && *ptr != '\n')
+ ++ptr;
+ if (*ptr == '\r')
+ ++ptr;
+ CHECK_AND_ASSERT_MES(*ptr == '\n', false, "Invalid first response line: " << m_header_cache);
+ ++ptr;
+
+ m_header_cache.erase(0, ptr - m_header_cache.c_str());
+ return true;
}
inline
bool set_reply_content_encoder()