aboutsummaryrefslogtreecommitdiff
path: root/src/ringct
diff options
context:
space:
mode:
authormoneromooo-monero <moneromooo-monero@users.noreply.github.com>2018-01-14 23:06:55 +0000
committermoneromooo-monero <moneromooo-monero@users.noreply.github.com>2018-09-11 13:37:25 +0000
commit939bc2233281c47427c9987fc5310cfb77b085f9 (patch)
tree0c59e501905ba5fccc0a8cb5f1d44b6b24685c3c /src/ringct
parentringct: add bos coster multiexp (diff)
downloadmonero-939bc2233281c47427c9987fc5310cfb77b085f9.tar.xz
add Straus multiexp
Diffstat (limited to 'src/ringct')
-rw-r--r--src/ringct/bulletproofs.cc190
-rw-r--r--src/ringct/multiexp.cc137
-rw-r--r--src/ringct/multiexp.h5
3 files changed, 182 insertions, 150 deletions
diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc
index 057f19029..1c29b1b99 100644
--- a/src/ringct/bulletproofs.cc
+++ b/src/ringct/bulletproofs.cc
@@ -67,6 +67,14 @@ static const rct::keyV twoN = vector_powers(TWO, maxN);
static const rct::key ip12 = inner_product(oneN, twoN);
static boost::mutex init_mutex;
+static inline rct::key multiexp(const std::vector<MultiexpData> &data, bool HiGi)
+{
+ if (HiGi || data.size() < 1000)
+ return straus(data, HiGi);
+ else
+ return bos_coster_heap_conv_robust(data);
+}
+
//addKeys3acc_p3
//aAbB += a*A + b*B where a, b are scalars, A, B are curve points
//A and B must be input after applying "precomp"
@@ -126,35 +134,15 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b)
{
CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b");
CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN");
-#if 1
+
std::vector<MultiexpData> multiexp_data;
multiexp_data.reserve(a.size()*2);
for (size_t i = 0; i < a.size(); ++i)
{
- if (!(a[i] == rct::zero()))
- {
- multiexp_data.resize(multiexp_data.size() + 1);
- multiexp_data.back().scalar = a[i];
- multiexp_data.back().point = Gi_p3[i];
- }
- if (!(b[i] == rct::zero()))
- {
- multiexp_data.resize(multiexp_data.size() + 1);
- multiexp_data.back().scalar = b[i];
- multiexp_data.back().point = Hi_p3[i];
- }
+ multiexp_data.emplace_back(a[i], Gi_p3[i]);
+ multiexp_data.emplace_back(b[i], Hi_p3[i]);
}
- return bos_coster_heap_conv_robust(multiexp_data);
-#else
- ge_p3 res_p3 = ge_p3_identity;
- for (size_t i = 0; i < a.size(); ++i)
- {
- rct::addKeys3acc_p3(&res_p3, a[i], Gprecomp[i], b[i], Hprecomp[i]);
- }
- rct::key res;
- ge_p3_tobytes(res.bytes, &res_p3);
- return res;
-#endif
+ return multiexp(multiexp_data, true);
}
/* Compute a custom vector-scalar commitment */
@@ -164,63 +152,19 @@ static rct::key vector_exponent_custom(const rct::keyV &A, const rct::keyV &B, c
CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b");
CHECK_AND_ASSERT_THROW_MES(a.size() == A.size(), "Incompatible sizes of a and A");
CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN");
-#if 1
+
std::vector<MultiexpData> multiexp_data;
multiexp_data.reserve(a.size()*2);
for (size_t i = 0; i < a.size(); ++i)
{
- if (!(a[i] == rct::zero()))
- {
- multiexp_data.resize(multiexp_data.size() + 1);
- multiexp_data.back().scalar = a[i];
- CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, A[i].bytes) == 0, "ge_frombytes_vartime failed");
- }
- if (!(b[i] == rct::zero()))
- {
- multiexp_data.resize(multiexp_data.size() + 1);
- multiexp_data.back().scalar = b[i];
- CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, B[i].bytes) == 0, "ge_frombytes_vartime failed");
- }
- }
- return bos_coster_heap_conv_robust(multiexp_data);
-#else
- ge_p3 res_p3 = ge_p3_identity;
- for (size_t i = 0; i < a.size(); ++i)
- {
-#if 0
- rct::key term;
- // we happen to know where A and B might fall, so don't bother checking the rest
- ge_dsmp *Acache = NULL, *Bcache = NULL;
- ge_dsmp Acache_custom[1], Bcache_custom[1];
- if (Gi[i] == A[i])
- Acache = Gprecomp + i;
- else if (i<32 && Gi[i+32] == A[i])
- Acache = Gprecomp + i + 32;
- else
- {
- rct::precomp(Acache_custom[0], A[i]);
- Acache = Acache_custom;
- }
- if (i == 0 && B[i] == Hi[0])
- Bcache = Hprecomp;
- else
- {
- rct::precomp(Bcache_custom[0], B[i]);
- Bcache = Bcache_custom;
- }
- rct::addKeys3(term, a[i], *Acache, b[i], *Bcache);
- rct::addKeys(res, res, term);
-#else
- ge_dsmp Acache, Bcache;
- rct::precomp(Bcache, B[i]);
- rct::precomp(Acache, A[i]);
- addKeys3acc_p3(&res_p3, a[i], Acache, b[i], Bcache);
-#endif
+ multiexp_data.resize(multiexp_data.size() + 1);
+ multiexp_data.back().scalar = a[i];
+ CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, A[i].bytes) == 0, "ge_frombytes_vartime failed");
+ multiexp_data.resize(multiexp_data.size() + 1);
+ multiexp_data.back().scalar = b[i];
+ CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, B[i].bytes) == 0, "ge_frombytes_vartime failed");
}
- rct::key res;
- ge_p3_tobytes(res.bytes, &res_p3);
- return res;
-#endif
+ return multiexp(multiexp_data, false);
}
/* Given a scalar, construct a vector of powers */
@@ -986,26 +930,23 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
}
PERF_TIMER_STOP(VERIFY_line_61);
- // multiexp is slower for small numbers of calcs
- if (M >= 16)
+ // bos coster is slower for small numbers of calcs, straus seems not
+ if (1)
{
PERF_TIMER_START_BP(VERIFY_line_61rl_new);
sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes);
std::vector<MultiexpData> multiexp_data;
- multiexp_data.reserve(3+M);
- multiexp_data.push_back({tmp, rct::H});
- for (size_t j = 0; j < M; j++)
+ multiexp_data.reserve(3+proof.V.size());
+ multiexp_data.emplace_back(tmp, rct::H);
+ for (size_t j = 0; j < proof.V.size(); j++)
{
- if (!(zpow[j+2] == rct::zero()))
- multiexp_data.push_back({zpow[j+2], j < proof.V.size() ? proof.V[j] : rct::identity()});
+ multiexp_data.emplace_back(zpow[j+2], proof.V[j]);
}
- if (!(x == rct::zero()))
- multiexp_data.push_back({x, proof.T1});
+ multiexp_data.emplace_back(x, proof.T1);
rct::key xsq;
sc_mul(xsq.bytes, x.bytes, x.bytes);
- if (!(xsq == rct::zero()))
- multiexp_data.push_back({xsq, proof.T2});
- L61Right = bos_coster_heap_conv_robust(multiexp_data);
+ multiexp_data.emplace_back(xsq, proof.T2);
+ L61Right = multiexp(multiexp_data, false);
PERF_TIMER_STOP(VERIFY_line_61rl_new);
}
else
@@ -1114,10 +1055,8 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
sc_muladd(tmp.bytes, z.bytes, ypow.bytes, tmp.bytes);
sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes);
- if (!(g_scalar == rct::zero()))
- multiexp_data.push_back({g_scalar, Gi_p3[i]});
- if (!(h_scalar == rct::zero()))
- multiexp_data.push_back({h_scalar, Hi_p3[i]});
+ multiexp_data.emplace_back(g_scalar, Gi_p3[i]);
+ multiexp_data.emplace_back(h_scalar, Hi_p3[i]);
if (i != MN-1)
{
@@ -1126,63 +1065,28 @@ bool bulletproof_VERIFY(const Bulletproof &proof)
}
}
- rct::key inner_prod = bos_coster_heap_conv_robust(multiexp_data);
+ rct::key inner_prod = multiexp(multiexp_data, true);
PERF_TIMER_STOP(VERIFY_line_24_25);
+ // PAPER LINE 26
rct::key pprime;
- // multiexp does not seem to give any speedup here
- if(0)
- {
- PERF_TIMER_START_BP(VERIFY_line_26_new);
- // PAPER LINE 26
- std::vector<MultiexpData> multiexp_data;
- multiexp_data.reserve(1+2*rounds);
-
- sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes);
- rct::addKeys(pprime, P, rct::scalarmultBase(tmp));
- for (size_t i = 0; i < rounds; ++i)
- {
- sc_mul(tmp.bytes, w[i].bytes, w[i].bytes);
- sc_mul(tmp2.bytes, winv[i].bytes, winv[i].bytes);
- if (!(tmp == rct::zero()))
- multiexp_data.push_back({tmp, proof.L[i]});
- if (!(tmp2 == rct::zero()))
- multiexp_data.push_back({tmp2, proof.R[i]});
- }
- sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes);
- if (!(tmp == rct::zero()))
- multiexp_data.push_back({tmp, rct::H});
- addKeys(pprime, pprime, bos_coster_heap_conv_robust(multiexp_data));
- PERF_TIMER_STOP(VERIFY_line_26_new);
- }
+ PERF_TIMER_START_BP(VERIFY_line_26_new);
+ multiexp_data.clear();
+ multiexp_data.reserve(1+2*rounds);
+ sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes);
+ rct::addKeys(pprime, P, rct::scalarmultBase(tmp));
+ for (size_t i = 0; i < rounds; ++i)
{
- PERF_TIMER_START_BP(VERIFY_line_26_old);
- // PAPER LINE 26
- sc_sub(tmp.bytes, rct::zero().bytes, proof.mu.bytes);
- rct::addKeys(pprime, P, rct::scalarmultBase(tmp));
- ge_p3 pprime_p3;
- CHECK_AND_ASSERT_MES(ge_frombytes_vartime(&pprime_p3, pprime.bytes) == 0, false, "ge_frombytes_vartime failed");
-
- for (size_t i = 0; i < rounds; ++i)
- {
- sc_mul(tmp.bytes, w[i].bytes, w[i].bytes);
- sc_mul(tmp2.bytes, winv[i].bytes, winv[i].bytes);
-#if 1
- ge_dsmp cacheL, cacheR;
- rct::precomp(cacheL, proof.L[i]);
- rct::precomp(cacheR, proof.R[i]);
- addKeys3acc_p3(&pprime_p3, tmp, cacheL, tmp2, cacheR);
-#else
- rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.L[i], tmp));
- rct::addKeys(pprime, pprime, rct::scalarmultKey(proof.R[i], tmp2));
-#endif
- }
- sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes);
- addKeys_acc_p3(&pprime_p3, tmp, rct::H);
- ge_p3_tobytes(pprime.bytes, &pprime_p3);
- PERF_TIMER_STOP(VERIFY_line_26_old);
+ sc_mul(tmp.bytes, w[i].bytes, w[i].bytes);
+ sc_mul(tmp2.bytes, winv[i].bytes, winv[i].bytes);
+ multiexp_data.emplace_back(tmp, proof.L[i]);
+ multiexp_data.emplace_back(tmp2, proof.R[i]);
}
+ sc_mul(tmp.bytes, proof.t.bytes, x_ip.bytes);
+ multiexp_data.emplace_back(tmp, rct::H);
+ addKeys(pprime, pprime, multiexp(multiexp_data, false));
+ PERF_TIMER_STOP(VERIFY_line_26_new);
PERF_TIMER_START_BP(VERIFY_step2_check);
sc_mul(tmp.bytes, proof.a.bytes, proof.b.bytes);
diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc
index 2545325ae..b70d92d46 100644
--- a/src/ringct/multiexp.cc
+++ b/src/ringct/multiexp.cc
@@ -38,7 +38,7 @@ extern "C"
#include "multiexp.h"
#undef MONERO_DEFAULT_LOG_CATEGORY
-#define MONERO_DEFAULT_LOG_CATEGORY "multiexp.boscoster"
+#define MONERO_DEFAULT_LOG_CATEGORY "multiexp"
//#define MULTIEXP_PERF(x) x
#define MULTIEXP_PERF(x)
@@ -71,7 +71,15 @@ static inline rct::key div2(const rct::key &k)
return res;
}
-rct::key bos_coster_heap_conv(std::vector<MultiexpData> &data)
+static inline rct::key pow2(size_t n)
+{
+ CHECK_AND_ASSERT_THROW_MES(n < 256, "Invalid pow2 argument");
+ rct::key res = rct::zero();
+ res[n >> 3] |= 1<<(n&7);
+ return res;
+}
+
+rct::key bos_coster_heap_conv(std::vector<MultiexpData> data)
{
MULTIEXP_PERF(PERF_TIMER_START_UNIT(bos_coster, 1000000));
MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000));
@@ -142,15 +150,20 @@ rct::key bos_coster_heap_conv(std::vector<MultiexpData> &data)
return res;
}
-rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> &data)
+rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data)
{
MULTIEXP_PERF(PERF_TIMER_START_UNIT(bos_coster, 1000000));
MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000));
size_t points = data.size();
CHECK_AND_ASSERT_THROW_MES(points > 1, "Not enough points");
- std::vector<size_t> heap(points);
+ std::vector<size_t> heap;
+ heap.reserve(points);
for (size_t n = 0; n < points; ++n)
- heap[n] = n;
+ {
+ if (!(data[n].scalar == rct::zero()) && memcmp(&data[n].point, &ge_p3_identity, sizeof(ge_p3)))
+ heap.push_back(n);
+ }
+ points = heap.size();
auto Comp = [&](size_t e0, size_t e1) { return data[e0].scalar < data[e1].scalar; };
std::make_heap(heap.begin(), heap.end(), Comp);
@@ -236,4 +249,118 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> &data)
return res;
}
+rct::key straus(const std::vector<MultiexpData> &data, bool HiGi)
+{
+ MULTIEXP_PERF(PERF_TIMER_UNIT(straus, 1000000));
+
+ MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000));
+ static constexpr unsigned int c = 4;
+ static constexpr unsigned int mask = (1<<c)-1;
+ static std::vector<std::vector<ge_cached>> HiGi_multiples;
+ std::vector<std::vector<ge_cached>> local_multiples, &multiples = HiGi ? HiGi_multiples : local_multiples;
+ ge_cached cached;
+ ge_p1p1 p1;
+ ge_p3 p3;
+
+ std::vector<uint8_t> skip(data.size());
+ for (size_t i = 0; i < data.size(); ++i)
+ skip[i] = data[i].scalar == rct::zero() || !memcmp(&data[i].point, &ge_p3_identity, sizeof(ge_p3));
+
+ MULTIEXP_PERF(PERF_TIMER_START_UNIT(multiples, 1000000));
+ multiples.resize(1<<c);
+ size_t offset = multiples[1].size();
+ multiples[1].resize(std::max(offset, data.size()));
+ for (size_t i = offset; i < data.size(); ++i)
+ ge_p3_to_cached(&multiples[1][i], &data[i].point);
+ for (size_t i=2;i<1<<c;++i)
+ multiples[i].resize(std::max(offset, data.size()));
+ for (size_t j=offset;j<data.size();++j)
+ {
+ for (size_t i=2;i<1<<c;++i)
+ {
+ ge_add(&p1, &data[j].point, &multiples[i-1][j]);
+ ge_p1p1_to_p3(&p3, &p1);
+ ge_p3_to_cached(&multiples[i][j], &p3);
+ }
+ }
+ MULTIEXP_PERF(PERF_TIMER_STOP(multiples));
+
+ MULTIEXP_PERF(PERF_TIMER_START_UNIT(digits, 1000000));
+ std::vector<std::vector<uint8_t>> digits;
+ digits.resize(data.size());
+ for (size_t j = 0; j < data.size(); ++j)
+ {
+ digits[j].resize(256);
+ unsigned char bytes33[33];
+ memcpy(bytes33, data[j].scalar.bytes, 32);
+ bytes33[32] = 0;
+#if 1
+ static_assert(c == 4, "optimized version needs c == 4");
+ const unsigned char *bytes = bytes33;
+ unsigned int i;
+ for (i = 0; i < 256; i += 8, bytes++)
+ {
+ digits[j][i] = bytes[0] & 0xf;
+ digits[j][i+1] = (bytes[0] >> 1) & 0xf;
+ digits[j][i+2] = (bytes[0] >> 2) & 0xf;
+ digits[j][i+3] = (bytes[0] >> 3) & 0xf;
+ digits[j][i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf;
+ digits[j][i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf;
+ digits[j][i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf;
+ digits[j][i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf;
+ }
+#elif 1
+ for (size_t i = 0; i < 256; ++i)
+ digits[j][i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask;
+#else
+ rct::key shifted = data[j].scalar;
+ for (size_t i = 0; i < 256; ++i)
+ {
+ digits[j][i] = shifted.bytes[0] & 0xf;
+ shifted = div2(shifted, (256-i)>>3);
+ }
+#endif
+ }
+ MULTIEXP_PERF(PERF_TIMER_STOP(digits));
+
+ rct::key maxscalar = rct::zero();
+ for (size_t i = 0; i < data.size(); ++i)
+ if (maxscalar < data[i].scalar)
+ maxscalar = data[i].scalar;
+ size_t i = 0;
+ while (i < 256 && !(maxscalar < pow2(i)))
+ i += c;
+ MULTIEXP_PERF(PERF_TIMER_STOP(setup));
+
+ ge_p3 res_p3 = ge_p3_identity;
+ if (!(i < c))
+ goto skipfirst;
+ while (!(i < c))
+ {
+ for (size_t j = 0; j < c; ++j)
+ {
+ ge_p3_to_cached(&cached, &res_p3);
+ ge_add(&p1, &res_p3, &cached);
+ ge_p1p1_to_p3(&res_p3, &p1);
+ }
+skipfirst:
+ i -= c;
+ for (size_t j = 0; j < data.size(); ++j)
+ {
+ if (skip[j])
+ continue;
+ int digit = digits[j][i];
+ if (digit)
+ {
+ ge_add(&p1, &res_p3, &multiples[digit][j]);
+ ge_p1p1_to_p3(&res_p3, &p1);
+ }
+ }
+ }
+
+ rct::key res;
+ ge_p3_tobytes(res.bytes, &res_p3);
+ return res;
+}
+
}
diff --git a/src/ringct/multiexp.h b/src/ringct/multiexp.h
index 108db7c39..cc53e633e 100644
--- a/src/ringct/multiexp.h
+++ b/src/ringct/multiexp.h
@@ -52,8 +52,9 @@ struct MultiexpData {
}
};
-rct::key bos_coster_heap_conv(std::vector<MultiexpData> &data);
-rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> &data);
+rct::key bos_coster_heap_conv(std::vector<MultiexpData> data);
+rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data);
+rct::key straus(const std::vector<MultiexpData> &data, bool HiGi = false);
}