aboutsummaryrefslogtreecommitdiff
path: root/src/ringct/multiexp.cc
diff options
context:
space:
mode:
authorRiccardo Spagni <ric@spagni.net>2018-11-04 20:46:41 +0200
committerRiccardo Spagni <ric@spagni.net>2018-11-04 20:46:42 +0200
commit6d3d8635bea2ca04937521a2b03215c7fe8e3262 (patch)
treeca455c8c2c75ca1dc533f43a7b30825fbc02c590 /src/ringct/multiexp.cc
parentMerge pull request #4692 (diff)
parentmultiexp: some minor speedups (diff)
downloadmonero-6d3d8635bea2ca04937521a2b03215c7fe8e3262.tar.xz
Merge pull request #4693
74fb3d88 multiexp: some minor speedups (moneromooo-monero) a6d2e246 bulletproofs: only enable profiling on request (moneromooo-monero) a110e6aa multiexp: tune which variants to use for which number of points (moneromooo-monero) 8b476722 bulletproofs: speedup prover (moneromooo-monero) 6f9ae5b6 multiexp: handle pippenger multiexps with part precalc (moneromooo-monero) 10e5a927 bulletproofs: maintain -z4, -z5, and -y0 to avoid subtractions (moneromooo-monero) 8629a42c bulletproofs: rework flow to use sarang's fast batch inversion code (moneromooo-monero) fc9f7d9c bulletproofs: merge multiexps as per sarang's new python code (moneromooo-monero) 4061960a multiexp: pack the digits table when STRAUS_C is 4 (moneromooo-monero) bf8e4b98 bulletproofs: some more minor speedup (moneromooo-monero) c415df97 performance_tests: sc_check and ge_dsm_precomp (moneromooo-monero) a281b950 bulletproofs: remove single value prover (moneromooo-monero) 484155d0 bulletproofs: some more speedup (moneromooo-monero) a621d6c8 bulletproofs: random minor speedups (moneromooo-monero) a49a1761 bulletproofs: shave off a lot of scalar muls from the g/h construction (moneromooo-monero) 4564a5d1 bulletproofs: speedup PROVE (moneromooo-monero)
Diffstat (limited to 'src/ringct/multiexp.cc')
-rw-r--r--src/ringct/multiexp.cc130
1 files changed, 88 insertions, 42 deletions
diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc
index 21957b94c..6f77fed34 100644
--- a/src/ringct/multiexp.cc
+++ b/src/ringct/multiexp.cc
@@ -79,6 +79,25 @@ extern "C"
// Best/cached Straus Straus Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip
// Best/uncached Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip Pip Pip
+// New timings:
+// Pippenger:
+// 2/1 always
+// 3/2 at ~13
+// 4/3 at ~29
+// 5/4 at ~83
+// 6/5 < 200
+// 7/6 at ~470
+// 8/7 at ~1180
+// 9/8 at ~2290
+// Cached Pippenger:
+// 6/5 < 200
+// 7/6 at 460
+// 8/7 at 1180
+// 9/8 at 2300
+//
+// Cached Straus/Pippenger cross at 232
+//
+
namespace rct
{
@@ -320,7 +339,7 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data)
return res;
}
-static constexpr unsigned int STRAUS_C = 4;
+#define STRAUS_C 4
struct straus_cached_data
{
@@ -447,28 +466,26 @@ rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<str
#endif
MULTIEXP_PERF(PERF_TIMER_START_UNIT(digits, 1000000));
+#if STRAUS_C==4
+ std::unique_ptr<uint8_t[]> digits{new uint8_t[64 * data.size()]};
+#else
std::unique_ptr<uint8_t[]> digits{new uint8_t[256 * data.size()]};
+#endif
for (size_t j = 0; j < data.size(); ++j)
{
- unsigned char bytes33[33];
- memcpy(bytes33, data[j].scalar.bytes, 32);
- bytes33[32] = 0;
- const unsigned char *bytes = bytes33;
-#if 1
- static_assert(STRAUS_C == 4, "optimized version needs STRAUS_C == 4");
+ const unsigned char *bytes = data[j].scalar.bytes;
+#if STRAUS_C==4
unsigned int i;
- for (i = 0; i < 256; i += 8, bytes++)
+ for (i = 0; i < 64; i += 2, bytes++)
{
- digits[j*256+i] = bytes[0] & 0xf;
- digits[j*256+i+1] = (bytes[0] >> 1) & 0xf;
- digits[j*256+i+2] = (bytes[0] >> 2) & 0xf;
- digits[j*256+i+3] = (bytes[0] >> 3) & 0xf;
- digits[j*256+i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf;
- digits[j*256+i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf;
- digits[j*256+i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf;
- digits[j*256+i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf;
+ digits[j*64+i] = bytes[0] & 0xf;
+ digits[j*64+i+1] = bytes[0] >> 4;
}
#elif 1
+ unsigned char bytes33[33];
+ memcpy(bytes33, data[j].scalar.bytes, 32);
+ bytes33[32] = 0;
+ bytes = bytes33;
for (size_t i = 0; i < 256; ++i)
digits[j*256+i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask;
#else
@@ -521,7 +538,11 @@ skipfirst:
if (skip[j])
continue;
#endif
+#if STRAUS_C==4
+ const uint8_t digit = digits[j*64+i/4];
+#else
const uint8_t digit = digits[j*256+i];
+#endif
if (digit)
{
ge_add(&p1, &band_p3, &CACHE_OFFSET(local_cache, j, digit));
@@ -542,16 +563,13 @@ skipfirst:
size_t get_pippenger_c(size_t N)
{
-// uncached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9
-// cached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9
- if (N <= 2) return 1;
- if (N <= 8) return 2;
- if (N <= 16) return 3;
- if (N <= 64) return 4;
- if (N <= 128) return 5;
- if (N <= 256) return 6;
- if (N <= 1024) return 7;
- if (N <= 2048) return 8;
+ if (N <= 13) return 2;
+ if (N <= 29) return 3;
+ if (N <= 83) return 4;
+ if (N <= 185) return 5;
+ if (N <= 465) return 6;
+ if (N <= 1180) return 7;
+ if (N <= 2295) return 8;
return 9;
}
@@ -563,12 +581,13 @@ struct pippenger_cached_data
~pippenger_cached_data() { aligned_free(cached); }
};
-std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t N)
+std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t start_offset, size_t N)
{
MULTIEXP_PERF(PERF_TIMER_START_UNIT(pippenger_init_cache, 1000000));
+ CHECK_AND_ASSERT_THROW_MES(start_offset <= data.size(), "Bad cache base data");
if (N == 0)
- N = data.size();
- CHECK_AND_ASSERT_THROW_MES(N <= data.size(), "Bad cache base data");
+ N = data.size() - start_offset;
+ CHECK_AND_ASSERT_THROW_MES(N <= data.size() - start_offset, "Bad cache base data");
ge_cached cached;
std::shared_ptr<pippenger_cached_data> cache(new pippenger_cached_data());
@@ -576,7 +595,7 @@ std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<Mu
cache->cached = (ge_cached*)aligned_realloc(cache->cached, N * sizeof(ge_cached), 4096);
CHECK_AND_ASSERT_THROW_MES(cache->cached, "Out of memory");
for (size_t i = 0; i < N; ++i)
- ge_p3_to_cached(&cache->cached[i], &data[i].point);
+ ge_p3_to_cached(&cache->cached[i], &data[i+start_offset].point);
MULTIEXP_PERF(PERF_TIMER_STOP(pippenger_init_cache));
return cache;
@@ -587,16 +606,21 @@ size_t pippenger_get_cache_size(const std::shared_ptr<pippenger_cached_data> &ca
return cache->size * sizeof(*cache->cached);
}
-rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t c)
+rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t cache_size, size_t c)
{
- CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache->size >= data.size(), "Cache is too small");
+ if (cache != NULL && cache_size == 0)
+ cache_size = cache->size;
+ CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache_size <= cache->size, "Cache is too small");
if (c == 0)
c = get_pippenger_c(data.size());
CHECK_AND_ASSERT_THROW_MES(c <= 9, "c is too large");
ge_p3 result = ge_p3_identity;
+ bool result_init = false;
std::unique_ptr<ge_p3[]> buckets{new ge_p3[1<<c]};
+ bool buckets_init[1<<9];
std::shared_ptr<pippenger_cached_data> local_cache = cache == NULL ? pippenger_init_cache(data) : cache;
+ std::shared_ptr<pippenger_cached_data> local_cache_2 = data.size() > cache_size ? pippenger_init_cache(data, cache_size) : NULL;
rct::key maxscalar = rct::zero();
for (size_t i = 0; i < data.size(); ++i)
@@ -611,7 +635,7 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<
for (size_t k = groups; k-- > 0; )
{
- if (!ge_p3_is_point_at_infinity(&result))
+ if (result_init)
{
ge_p2 p2;
ge_p3_to_p2(&p2, &result);
@@ -625,8 +649,7 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<
ge_p1p1_to_p2(&p2, &p1);
}
}
- for (size_t i = 0; i < (1u<<c); ++i)
- buckets[i] = ge_p3_identity;
+ memset(buckets_init, 0, 1u<<c);
// partition scalars into buckets
for (size_t i = 0; i < data.size(); ++i)
@@ -638,22 +661,45 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<
if (bucket == 0)
continue;
CHECK_AND_ASSERT_THROW_MES(bucket < (1u<<c), "bucket overflow");
- if (!ge_p3_is_point_at_infinity(&buckets[bucket]))
+ if (buckets_init[bucket])
{
- add(buckets[bucket], local_cache->cached[i]);
+ if (i < cache_size)
+ add(buckets[bucket], local_cache->cached[i]);
+ else
+ add(buckets[bucket], local_cache_2->cached[i - cache_size]);
}
else
+ {
buckets[bucket] = data[i].point;
+ buckets_init[bucket] = true;
+ }
}
// sum the buckets
- ge_p3 pail = ge_p3_identity;
+ ge_p3 pail;
+ bool pail_init = false;
for (size_t i = (1<<c)-1; i > 0; --i)
{
- if (!ge_p3_is_point_at_infinity(&buckets[i]))
- add(pail, buckets[i]);
- if (!ge_p3_is_point_at_infinity(&pail))
- add(result, pail);
+ if (buckets_init[i])
+ {
+ if (pail_init)
+ add(pail, buckets[i]);
+ else
+ {
+ pail = buckets[i];
+ pail_init = true;
+ }
+ }
+ if (pail_init)
+ {
+ if (result_init)
+ add(result, pail);
+ else
+ {
+ result = pail;
+ result_init = true;
+ }
+ }
}
}