aboutsummaryrefslogtreecommitdiff
path: root/src/ringct/multiexp.cc
diff options
context:
space:
mode:
authormoneromooo-monero <moneromooo-monero@users.noreply.github.com>2018-03-25 12:17:37 +0100
committermoneromooo-monero <moneromooo-monero@users.noreply.github.com>2018-09-11 13:37:35 +0000
commit126196b017cd93ff399212b7315f9053511afb07 (patch)
treeb8fa8d02e317a5e148def021ab0e44750db35688 /src/ringct/multiexp.cc
parentaligned: aligned memory alloc/realloc/free (diff)
downloadmonero-126196b017cd93ff399212b7315f9053511afb07.tar.xz
multiexp: some speedups
- use a raw memory block to store cache - use aligned memory - use doubling API where appropriate - calculate straus in bands
Diffstat (limited to 'src/ringct/multiexp.cc')
-rw-r--r--src/ringct/multiexp.cc144
1 files changed, 118 insertions, 26 deletions
diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc
index 4f16bd588..99bef25f3 100644
--- a/src/ringct/multiexp.cc
+++ b/src/ringct/multiexp.cc
@@ -34,6 +34,7 @@ extern "C"
{
#include "crypto/crypto-ops.h"
}
+#include "common/aligned.h"
#include "rctOps.h"
#include "multiexp.h"
@@ -43,6 +44,17 @@ extern "C"
//#define MULTIEXP_PERF(x) x
#define MULTIEXP_PERF(x)
+#define RAW_MEMORY_BLOCK
+//#define ALTERNATE_LAYOUT
+//#define TRACK_STRAUS_ZERO_IDENTITY
+
+// per points us for N/B points (B point bands)
+// raw alt 128/192 4096/192 4096/4096
+// 0 0 52.6 71 71.2
+// 0 1 53.2 72.2 72.4
+// 1 0 52.7 67 67.1
+// 1 1 52.8 70.4 70.2
+
namespace rct
{
@@ -198,6 +210,7 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data)
ge_cached cached;
ge_p1p1 p1;
+ ge_p2 p2;
MULTIEXP_PERF(PERF_TIMER_RESUME(div));
while (1)
@@ -214,8 +227,8 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data)
std::push_heap(heap.begin(), heap.end(), Comp);
}
data[index1].scalar = div2(data[index1].scalar);
- ge_p3_to_cached(&cached, &data[index1].point);
- ge_add(&p1, &data[index1].point, &cached);
+ ge_p3_to_p2(&p2, &data[index1].point);
+ ge_p2_dbl(&p1, &p2);
ge_p1p1_to_p3(&data[index1].point, &p1);
}
MULTIEXP_PERF(PERF_TIMER_PAUSE(div));
@@ -259,12 +272,32 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data)
return res;
}
+static constexpr unsigned int STRAUS_C = 4;
+
struct straus_cached_data
{
+#ifdef RAW_MEMORY_BLOCK
+ size_t size;
+ ge_cached *multiples;
+ straus_cached_data(): size(0), multiples(NULL) {}
+ ~straus_cached_data() { aligned_free(multiples); }
+#else
std::vector<std::vector<ge_cached>> multiples;
+#endif
};
-
-static constexpr unsigned int STRAUS_C = 4;
+#ifdef RAW_MEMORY_BLOCK
+#ifdef ALTERNATE_LAYOUT
+#define CACHE_OFFSET(cache,point,digit) cache->multiples[(point)*((1<<STRAUS_C)-1)+((digit)-1)]
+#else
+#define CACHE_OFFSET(cache,point,digit) cache->multiples[(point)+cache->size*((digit)-1)]
+#endif
+#else
+#ifdef ALTERNATE_LAYOUT
+#define CACHE_OFFSET(cache,point,digit) local_cache->multiples[j][digit-1]
+#else
+#define CACHE_OFFSET(cache,point,digit) local_cache->multiples[digit][j]
+#endif
+#endif
std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<MultiexpData> &data)
{
@@ -274,6 +307,36 @@ std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<Multiexp
ge_p3 p3;
std::shared_ptr<straus_cached_data> cache(new straus_cached_data());
+#ifdef RAW_MEMORY_BLOCK
+ const size_t offset = cache->size;
+ cache->multiples = (ge_cached*)aligned_realloc(cache->multiples, sizeof(ge_cached) * ((1<<STRAUS_C)-1) * std::max(offset, data.size()), 4096);
+ cache->size = data.size();
+ for (size_t j=offset;j<data.size();++j)
+ {
+ ge_p3_to_cached(&CACHE_OFFSET(cache, j, 1), &data[j].point);
+ for (size_t i=2;i<1<<STRAUS_C;++i)
+ {
+ ge_add(&p1, &data[j].point, &CACHE_OFFSET(cache, j, i-1));
+ ge_p1p1_to_p3(&p3, &p1);
+ ge_p3_to_cached(&CACHE_OFFSET(cache, j, i), &p3);
+ }
+ }
+#else
+#ifdef ALTERNATE_LAYOUT
+ const size_t offset = cache->multiples.size();
+ cache->multiples.resize(std::max(offset, data.size()));
+ for (size_t i = offset; i < data.size(); ++i)
+ {
+ cache->multiples[i].resize((1<<STRAUS_C)-1);
+ ge_p3_to_cached(&cache->multiples[i][0], &data[i].point);
+ for (size_t j=2;j<1<<STRAUS_C;++j)
+ {
+ ge_add(&p1, &data[i].point, &cache->multiples[i][j-2]);
+ ge_p1p1_to_p3(&p3, &p1);
+ ge_p3_to_cached(&cache->multiples[i][j-1], &p3);
+ }
+ }
+#else
cache->multiples.resize(1<<STRAUS_C);
size_t offset = cache->multiples[1].size();
cache->multiples[1].resize(std::max(offset, data.size()));
@@ -290,6 +353,8 @@ std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<Multiexp
ge_p3_to_cached(&cache->multiples[i][j], &p3);
}
}
+#endif
+#endif
MULTIEXP_PERF(PERF_TIMER_STOP(multiples));
return cache;
@@ -298,15 +363,20 @@ std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<Multiexp
size_t straus_get_cache_size(const std::shared_ptr<straus_cached_data> &cache)
{
size_t sz = 0;
+#ifdef RAW_MEMORY_BLOCK
+ sz += cache->size * sizeof(ge_cached) * ((1<<STRAUS_C)-1);
+#else
for (const auto &e0: cache->multiples)
- sz += e0.size() * sizeof(ge_p3);
+ sz += e0.size() * sizeof(ge_cached);
+#endif
return sz;
}
-rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<straus_cached_data> &cache)
+rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<straus_cached_data> &cache, size_t STEP)
{
MULTIEXP_PERF(PERF_TIMER_UNIT(straus, 1000000));
bool HiGi = cache != NULL;
+ STEP = STEP ? STEP : 192;
MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000));
static constexpr unsigned int mask = (1<<STRAUS_C)-1;
@@ -315,9 +385,13 @@ rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<str
ge_p1p1 p1;
ge_p3 p3;
+#ifdef TRACK_STRAUS_ZERO_IDENTITY
+ MULTIEXP_PERF(PERF_TIMER_START_UNIT(skip, 1000000));
std::vector<uint8_t> skip(data.size());
for (size_t i = 0; i < data.size(); ++i)
skip[i] = data[i].scalar == rct::zero() || !memcmp(&data[i].point, &ge_p3_identity, sizeof(ge_p3));
+ MULTIEXP_PERF(PERF_TIMER_STOP(skip));
+#endif
MULTIEXP_PERF(PERF_TIMER_START_UNIT(digits, 1000000));
std::vector<std::vector<uint8_t>> digits;
@@ -361,35 +435,53 @@ rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<str
for (size_t i = 0; i < data.size(); ++i)
if (maxscalar < data[i].scalar)
maxscalar = data[i].scalar;
- size_t i = 0;
- while (i < 256 && !(maxscalar < pow2(i)))
- i += STRAUS_C;
+ size_t start_i = 0;
+ while (start_i < 256 && !(maxscalar < pow2(start_i)))
+ start_i += STRAUS_C;
MULTIEXP_PERF(PERF_TIMER_STOP(setup));
ge_p3 res_p3 = ge_p3_identity;
- if (!(i < STRAUS_C))
- goto skipfirst;
- while (!(i < STRAUS_C))
+
+ for (size_t start_offset = 0; start_offset < data.size(); start_offset += STEP)
{
- for (size_t j = 0; j < STRAUS_C; ++j)
+ const size_t num_points = std::min(data.size() - start_offset, STEP);
+
+ ge_p3 band_p3 = ge_p3_identity;
+ size_t i = start_i;
+ if (!(i < STRAUS_C))
+ goto skipfirst;
+ while (!(i < STRAUS_C))
{
- ge_p3_to_cached(&cached, &res_p3);
- ge_add(&p1, &res_p3, &cached);
- ge_p1p1_to_p3(&res_p3, &p1);
- }
+ ge_p2 p2;
+ ge_p3_to_p2(&p2, &band_p3);
+ for (size_t j = 0; j < STRAUS_C; ++j)
+ {
+ ge_p2_dbl(&p1, &p2);
+ if (j == STRAUS_C - 1)
+ ge_p1p1_to_p3(&band_p3, &p1);
+ else
+ ge_p1p1_to_p2(&p2, &p1);
+ }
skipfirst:
- i -= STRAUS_C;
- for (size_t j = 0; j < data.size(); ++j)
- {
- if (skip[j])
- continue;
- int digit = digits[j][i];
- if (digit)
+ i -= STRAUS_C;
+ for (size_t j = start_offset; j < start_offset + num_points; ++j)
{
- ge_add(&p1, &res_p3, &local_cache->multiples[digit][j]);
- ge_p1p1_to_p3(&res_p3, &p1);
+#ifdef TRACK_STRAUS_ZERO_IDENTITY
+ if (skip[j])
+ continue;
+#endif
+ const uint8_t digit = digits[j][i];
+ if (digit)
+ {
+ ge_add(&p1, &band_p3, &CACHE_OFFSET(local_cache, j, digit));
+ ge_p1p1_to_p3(&band_p3, &p1);
+ }
}
}
+
+ ge_p3_to_cached(&cached, &band_p3);
+ ge_add(&p1, &res_p3, &cached);
+ ge_p1p1_to_p3(&res_p3, &p1);
}
rct::key res;