diff options
Diffstat (limited to 'src/ringct')
-rw-r--r-- | src/ringct/bulletproofs.cc | 924 | ||||
-rw-r--r-- | src/ringct/multiexp.cc | 130 | ||||
-rw-r--r-- | src/ringct/multiexp.h | 4 | ||||
-rw-r--r-- | src/ringct/rctSigs.cpp | 9 |
4 files changed, 454 insertions, 613 deletions
diff --git a/src/ringct/bulletproofs.cc b/src/ringct/bulletproofs.cc index 381f50872..bed48769a 100644 --- a/src/ringct/bulletproofs.cc +++ b/src/ringct/bulletproofs.cc @@ -29,8 +29,6 @@ // Adapted from Java code by Sarang Noether #include <stdlib.h> -#include <openssl/ssl.h> -#include <openssl/bn.h> #include <boost/thread/mutex.hpp> #include "misc_log_ex.h" #include "common/perf_timer.h" @@ -48,9 +46,15 @@ extern "C" //#define DEBUG_BP +#if 1 #define PERF_TIMER_START_BP(x) PERF_TIMER_START_UNIT(x, 1000000) +#define PERF_TIMER_STOP_BP(x) PERF_TIMER_STOP(x) +#else +#define PERF_TIMER_START_BP(x) ((void*)0) +#define PERF_TIMER_STOP_BP(x) ((void*)0) +#endif -#define STRAUS_SIZE_LIMIT 128 +#define STRAUS_SIZE_LIMIT 232 #define PIPPENGER_SIZE_LIMIT 0 namespace rct @@ -75,65 +79,20 @@ static const rct::keyV twoN = vector_powers(TWO, maxN); static const rct::key ip12 = inner_product(oneN, twoN); static boost::mutex init_mutex; -static inline rct::key multiexp(const std::vector<MultiexpData> &data, bool HiGi) +static inline rct::key multiexp(const std::vector<MultiexpData> &data, size_t HiGi_size) { - if (HiGi) + if (HiGi_size > 0) { - static_assert(128 <= STRAUS_SIZE_LIMIT, "Straus in precalc mode can only be calculated till STRAUS_SIZE_LIMIT"); - return data.size() <= 128 ? straus(data, straus_HiGi_cache, 0) : pippenger(data, pippenger_HiGi_cache, get_pippenger_c(data.size())); + static_assert(232 <= STRAUS_SIZE_LIMIT, "Straus in precalc mode can only be calculated till STRAUS_SIZE_LIMIT"); + return HiGi_size <= 232 && data.size() == HiGi_size ? straus(data, straus_HiGi_cache, 0) : pippenger(data, pippenger_HiGi_cache, HiGi_size, get_pippenger_c(data.size())); } else - return data.size() <= 64 ? straus(data, NULL, 0) : pippenger(data, NULL, get_pippenger_c(data.size())); -} - -static bool is_reduced(const rct::key &scalar) -{ - rct::key reduced = scalar; - sc_reduce32(reduced.bytes); - return scalar == reduced; -} - -static void addKeys_acc_p3(ge_p3 *acc_p3, const rct::key &a, const rct::key &point) -{ - ge_p3 p3; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed"); - ge_scalarmult_p3(&p3, a.bytes, &p3); - ge_cached cached; - ge_p3_to_cached(&cached, acc_p3); - ge_p1p1 p1; - ge_add(&p1, &p3, &cached); - ge_p1p1_to_p3(acc_p3, &p1); -} - -static void add_acc_p3(ge_p3 *acc_p3, const rct::key &point) -{ - ge_p3 p3; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed"); - ge_cached cached; - ge_p3_to_cached(&cached, &p3); - ge_p1p1 p1; - ge_add(&p1, acc_p3, &cached); - ge_p1p1_to_p3(acc_p3, &p1); + return data.size() <= 95 ? straus(data, NULL, 0) : pippenger(data, NULL, 0, get_pippenger_c(data.size())); } -static void sub_acc_p3(ge_p3 *acc_p3, const rct::key &point) +static inline bool is_reduced(const rct::key &scalar) { - ge_p3 p3; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&p3, point.bytes) == 0, "ge_frombytes_vartime failed"); - ge_cached cached; - ge_p3_to_cached(&cached, &p3); - ge_p1p1 p1; - ge_sub(&p1, acc_p3, &cached); - ge_p1p1_to_p3(acc_p3, &p1); -} - -static rct::key scalarmultKey(const ge_p3 &P, const rct::key &a) -{ - ge_p2 R; - ge_scalarmult(&R, a.bytes, &P); - rct::key aP; - ge_tobytes(aP.bytes, &R); - return aP; + return sc_check(scalar.bytes) == 0; } static rct::key get_exponent(const rct::key &base, size_t idx) @@ -160,12 +119,12 @@ static void init_exponents() Gi[i] = get_exponent(rct::H, i * 2 + 1); CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&Gi_p3[i], Gi[i].bytes) == 0, "ge_frombytes_vartime failed"); - data.push_back({rct::zero(), Gi[i]}); - data.push_back({rct::zero(), Hi[i]}); + data.push_back({rct::zero(), Gi_p3[i]}); + data.push_back({rct::zero(), Hi_p3[i]}); } straus_HiGi_cache = straus_init_cache(data, STRAUS_SIZE_LIMIT); - pippenger_HiGi_cache = pippenger_init_cache(data, PIPPENGER_SIZE_LIMIT); + pippenger_HiGi_cache = pippenger_init_cache(data, 0, PIPPENGER_SIZE_LIMIT); MINFO("Hi/Gi cache size: " << (sizeof(Hi)+sizeof(Gi))/1024 << " kB"); MINFO("Hi_p3/Gi_p3 cache size: " << (sizeof(Hi_p3)+sizeof(Gi_p3))/1024 << " kB"); @@ -189,29 +148,37 @@ static rct::key vector_exponent(const rct::keyV &a, const rct::keyV &b) multiexp_data.emplace_back(a[i], Gi_p3[i]); multiexp_data.emplace_back(b[i], Hi_p3[i]); } - return multiexp(multiexp_data, true); + return multiexp(multiexp_data, 2 * a.size()); } /* Compute a custom vector-scalar commitment */ -static rct::key vector_exponent_custom(const rct::keyV &A, const rct::keyV &B, const rct::keyV &a, const rct::keyV &b) +static rct::key cross_vector_exponent8(size_t size, const std::vector<ge_p3> &A, size_t Ao, const std::vector<ge_p3> &B, size_t Bo, const rct::keyV &a, size_t ao, const rct::keyV &b, size_t bo, const rct::keyV *scale, const ge_p3 *extra_point, const rct::key *extra_scalar) { - CHECK_AND_ASSERT_THROW_MES(A.size() == B.size(), "Incompatible sizes of A and B"); - CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); - CHECK_AND_ASSERT_THROW_MES(a.size() == A.size(), "Incompatible sizes of a and A"); - CHECK_AND_ASSERT_THROW_MES(a.size() <= maxN*maxM, "Incompatible sizes of a and maxN"); + CHECK_AND_ASSERT_THROW_MES(size + Ao <= A.size(), "Incompatible size for A"); + CHECK_AND_ASSERT_THROW_MES(size + Bo <= B.size(), "Incompatible size for B"); + CHECK_AND_ASSERT_THROW_MES(size + ao <= a.size(), "Incompatible size for a"); + CHECK_AND_ASSERT_THROW_MES(size + bo <= b.size(), "Incompatible size for b"); + CHECK_AND_ASSERT_THROW_MES(size <= maxN*maxM, "size is too large"); + CHECK_AND_ASSERT_THROW_MES(!scale || size == scale->size() / 2, "Incompatible size for scale"); + CHECK_AND_ASSERT_THROW_MES(!!extra_point == !!extra_scalar, "only one of extra point/scalar present"); std::vector<MultiexpData> multiexp_data; - multiexp_data.reserve(a.size()*2); - for (size_t i = 0; i < a.size(); ++i) + multiexp_data.resize(size*2 + (!!extra_point)); + for (size_t i = 0; i < size; ++i) { - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = a[i]; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, A[i].bytes) == 0, "ge_frombytes_vartime failed"); - multiexp_data.resize(multiexp_data.size() + 1); - multiexp_data.back().scalar = b[i]; - CHECK_AND_ASSERT_THROW_MES(ge_frombytes_vartime(&multiexp_data.back().point, B[i].bytes) == 0, "ge_frombytes_vartime failed"); + sc_mul(multiexp_data[i*2].scalar.bytes, a[ao+i].bytes, INV_EIGHT.bytes);; + multiexp_data[i*2].point = A[Ao+i]; + sc_mul(multiexp_data[i*2+1].scalar.bytes, b[bo+i].bytes, INV_EIGHT.bytes); + if (scale) + sc_mul(multiexp_data[i*2+1].scalar.bytes, multiexp_data[i*2+1].scalar.bytes, (*scale)[Bo+i].bytes); + multiexp_data[i*2+1].point = B[Bo+i]; } - return multiexp(multiexp_data, false); + if (extra_point) + { + sc_mul(multiexp_data.back().scalar.bytes, extra_scalar->bytes, INV_EIGHT.bytes); + multiexp_data.back().point = *extra_point; + } + return multiexp(multiexp_data, 0); } /* Given a scalar, construct a vector of powers */ @@ -273,16 +240,22 @@ static rct::keyV hadamard(const rct::keyV &a, const rct::keyV &b) return res; } -/* Given two curvepoint arrays, construct the Hadamard product */ -static rct::keyV hadamard2(const rct::keyV &a, const rct::keyV &b) +/* folds a curvepoint array using a two way scaled Hadamard product */ +static void hadamard_fold(std::vector<ge_p3> &v, const rct::keyV *scale, const rct::key &a, const rct::key &b) { - CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); - rct::keyV res(a.size()); - for (size_t i = 0; i < a.size(); ++i) + CHECK_AND_ASSERT_THROW_MES((v.size() & 1) == 0, "Vector size should be even"); + const size_t sz = v.size() / 2; + for (size_t n = 0; n < sz; ++n) { - rct::addKeys(res[i], a[i], b[i]); + ge_dsmp c[2]; + ge_dsm_precomp(c[0], &v[n]); + ge_dsm_precomp(c[1], &v[sz + n]); + rct::key sa, sb; + if (scale) sc_mul(sa.bytes, a.bytes, (*scale)[n].bytes); else sa = a; + if (scale) sc_mul(sb.bytes, b.bytes, (*scale)[sz + n].bytes); else sb = b; + ge_double_scalarmult_precomp_vartime2_p3(&v[n], sa.bytes, c[0], sb.bytes, c[1]); } - return res; + v.resize(sz); } /* Add two vectors */ @@ -297,88 +270,98 @@ static rct::keyV vector_add(const rct::keyV &a, const rct::keyV &b) return res; } -/* Subtract two vectors */ -static rct::keyV vector_subtract(const rct::keyV &a, const rct::keyV &b) +/* Add a scalar to all elements of a vector */ +static rct::keyV vector_add(const rct::keyV &a, const rct::key &b) { - CHECK_AND_ASSERT_THROW_MES(a.size() == b.size(), "Incompatible sizes of a and b"); rct::keyV res(a.size()); for (size_t i = 0; i < a.size(); ++i) { - sc_sub(res[i].bytes, a[i].bytes, b[i].bytes); + sc_add(res[i].bytes, a[i].bytes, b.bytes); } return res; } -/* Multiply a scalar and a vector */ -static rct::keyV vector_scalar(const rct::keyV &a, const rct::key &x) +/* Subtract a scalar from all elements of a vector */ +static rct::keyV vector_subtract(const rct::keyV &a, const rct::key &b) { rct::keyV res(a.size()); for (size_t i = 0; i < a.size(); ++i) { - sc_mul(res[i].bytes, a[i].bytes, x.bytes); + sc_sub(res[i].bytes, a[i].bytes, b.bytes); } return res; } -/* Create a vector from copies of a single value */ -static rct::keyV vector_dup(const rct::key &x, size_t N) -{ - return rct::keyV(N, x); -} - -/* Exponentiate a curve vector by a scalar */ -static rct::keyV vector_scalar2(const rct::keyV &a, const rct::key &x) +/* Multiply a scalar and a vector */ +static rct::keyV vector_scalar(const rct::keyV &a, const rct::key &x) { rct::keyV res(a.size()); for (size_t i = 0; i < a.size(); ++i) { - rct::scalarmultKey(res[i], a[i], x); + sc_mul(res[i].bytes, a[i].bytes, x.bytes); } return res; } -/* Get the sum of a vector's elements */ -static rct::key vector_sum(const rct::keyV &a) +/* Create a vector from copies of a single value */ +static rct::keyV vector_dup(const rct::key &x, size_t N) { - rct::key res = rct::zero(); - for (size_t i = 0; i < a.size(); ++i) - { - sc_add(res.bytes, res.bytes, a[i].bytes); - } - return res; + return rct::keyV(N, x); } -static rct::key switch_endianness(rct::key k) +static rct::key sm(rct::key y, int n, const rct::key &x) { - std::reverse(k.bytes, k.bytes + sizeof(k)); - return k; + while (n--) + sc_mul(y.bytes, y.bytes, y.bytes); + sc_mul(y.bytes, y.bytes, x.bytes); + return y; } -/* Compute the inverse of a scalar, the stupid way */ +/* Compute the inverse of a scalar, the clever way */ static rct::key invert(const rct::key &x) { - rct::key inv; - - BN_CTX *ctx = BN_CTX_new(); - BIGNUM *X = BN_new(); - BIGNUM *L = BN_new(); - BIGNUM *I = BN_new(); - - BN_bin2bn(switch_endianness(x).bytes, sizeof(rct::key), X); - BN_bin2bn(switch_endianness(rct::curveOrder()).bytes, sizeof(rct::key), L); - - CHECK_AND_ASSERT_THROW_MES(BN_mod_inverse(I, X, L, ctx), "Failed to invert"); - - const int len = BN_num_bytes(I); - CHECK_AND_ASSERT_THROW_MES((size_t)len <= sizeof(rct::key), "Invalid number length"); - inv = rct::zero(); - BN_bn2bin(I, inv.bytes); - std::reverse(inv.bytes, inv.bytes + len); + rct::key _1, _10, _100, _11, _101, _111, _1001, _1011, _1111; + + _1 = x; + sc_mul(_10.bytes, _1.bytes, _1.bytes); + sc_mul(_100.bytes, _10.bytes, _10.bytes); + sc_mul(_11.bytes, _10.bytes, _1.bytes); + sc_mul(_101.bytes, _10.bytes, _11.bytes); + sc_mul(_111.bytes, _10.bytes, _101.bytes); + sc_mul(_1001.bytes, _10.bytes, _111.bytes); + sc_mul(_1011.bytes, _10.bytes, _1001.bytes); + sc_mul(_1111.bytes, _100.bytes, _1011.bytes); - BN_free(I); - BN_free(L); - BN_free(X); - BN_CTX_free(ctx); + rct::key inv; + sc_mul(inv.bytes, _1111.bytes, _1.bytes); + + inv = sm(inv, 123 + 3, _101); + inv = sm(inv, 2 + 2, _11); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 4, _1001); + inv = sm(inv, 2, _11); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 1 + 3, _101); + inv = sm(inv, 3 + 3, _101); + inv = sm(inv, 3, _111); + inv = sm(inv, 1 + 4, _1111); + inv = sm(inv, 2 + 3, _111); + inv = sm(inv, 2 + 2, _11); + inv = sm(inv, 1 + 4, _1011); + inv = sm(inv, 2 + 4, _1011); + inv = sm(inv, 6 + 4, _1001); + inv = sm(inv, 2 + 2, _11); + inv = sm(inv, 3 + 2, _11); + inv = sm(inv, 3 + 2, _11); + inv = sm(inv, 1 + 4, _1001); + inv = sm(inv, 1 + 3, _111); + inv = sm(inv, 2 + 4, _1111); + inv = sm(inv, 1 + 4, _1011); + inv = sm(inv, 3, _101); + inv = sm(inv, 2 + 4, _1111); + inv = sm(inv, 3, _101); + inv = sm(inv, 1 + 2, _11); #ifdef DEBUG_BP rct::key tmp; @@ -388,6 +371,34 @@ static rct::key invert(const rct::key &x) return inv; } +static rct::keyV invert(rct::keyV x) +{ + rct::keyV scratch; + scratch.reserve(x.size()); + + rct::key acc = rct::identity(); + for (size_t n = 0; n < x.size(); ++n) + { + scratch.push_back(acc); + if (n == 0) + acc = x[0]; + else + sc_mul(acc.bytes, acc.bytes, x[n].bytes); + } + + acc = invert(acc); + + rct::key tmp; + for (int i = x.size(); i-- > 0; ) + { + sc_mul(tmp.bytes, acc.bytes, x[i].bytes); + sc_mul(x[i].bytes, acc.bytes, scratch[i].bytes); + acc = tmp; + } + + return x; +} + /* Compute the slice of a vector */ static rct::keyV slice(const rct::keyV &a, size_t start, size_t stop) { @@ -438,270 +449,12 @@ static rct::key hash_cache_mash(rct::key &hash_cache, const rct::key &mash0, con /* Given a value v (0..2^N-1) and a mask gamma, construct a range proof */ Bulletproof bulletproof_PROVE(const rct::key &sv, const rct::key &gamma) { - init_exponents(); - - PERF_TIMER_UNIT(PROVE, 1000000); - - constexpr size_t logN = 6; // log2(64) - constexpr size_t N = 1<<logN; - - rct::key V; - rct::keyV aL(N), aR(N); - - PERF_TIMER_START_BP(PROVE_v); - rct::addKeys2(V, gamma, sv, rct::H); - V = rct::scalarmultKey(V, INV_EIGHT); - PERF_TIMER_STOP(PROVE_v); - - PERF_TIMER_START_BP(PROVE_aLaR); - for (size_t i = N; i-- > 0; ) - { - if (sv[i/8] & (((uint64_t)1)<<(i%8))) - { - aL[i] = rct::identity(); - } - else - { - aL[i] = rct::zero(); - } - sc_sub(aR[i].bytes, aL[i].bytes, rct::identity().bytes); - } - PERF_TIMER_STOP(PROVE_aLaR); - - rct::key hash_cache = rct::hash_to_scalar(V); - - // DEBUG: Test to ensure this recovers the value -#ifdef DEBUG_BP - uint64_t test_aL = 0, test_aR = 0; - for (size_t i = 0; i < N; ++i) - { - if (aL[i] == rct::identity()) - test_aL += ((uint64_t)1)<<i; - if (aR[i] == rct::zero()) - test_aR += ((uint64_t)1)<<i; - } - uint64_t v_test = 0; - for (int n = 0; n < 8; ++n) v_test |= (((uint64_t)sv[n]) << (8*n)); - CHECK_AND_ASSERT_THROW_MES(test_aL == v_test, "test_aL failed"); - CHECK_AND_ASSERT_THROW_MES(test_aR == v_test, "test_aR failed"); -#endif - -try_again: - PERF_TIMER_START_BP(PROVE_step1); - // PAPER LINES 38-39 - rct::key alpha = rct::skGen(); - rct::key ve = vector_exponent(aL, aR); - rct::key A; - rct::addKeys(A, ve, rct::scalarmultBase(alpha)); - A = rct::scalarmultKey(A, INV_EIGHT); - - // PAPER LINES 40-42 - rct::keyV sL = rct::skvGen(N), sR = rct::skvGen(N); - rct::key rho = rct::skGen(); - ve = vector_exponent(sL, sR); - rct::key S; - rct::addKeys(S, ve, rct::scalarmultBase(rho)); - S = rct::scalarmultKey(S, INV_EIGHT); - - // PAPER LINES 43-45 - rct::key y = hash_cache_mash(hash_cache, A, S); - if (y == rct::zero()) - { - PERF_TIMER_STOP(PROVE_step1); - MINFO("y is 0, trying again"); - goto try_again; - } - rct::key z = hash_cache = rct::hash_to_scalar(y); - if (z == rct::zero()) - { - PERF_TIMER_STOP(PROVE_step1); - MINFO("z is 0, trying again"); - goto try_again; - } - - // Polynomial construction before PAPER LINE 46 - rct::key t0 = rct::zero(); - rct::key t1 = rct::zero(); - rct::key t2 = rct::zero(); - - const auto yN = vector_powers(y, N); - - rct::key ip1y = vector_sum(yN); - rct::key tmp; - sc_muladd(t0.bytes, z.bytes, ip1y.bytes, t0.bytes); - - rct::key zsq; - sc_mul(zsq.bytes, z.bytes, z.bytes); - sc_muladd(t0.bytes, zsq.bytes, sv.bytes, t0.bytes); - - rct::key k = rct::zero(); - sc_mulsub(k.bytes, zsq.bytes, ip1y.bytes, k.bytes); - - rct::key zcu; - sc_mul(zcu.bytes, zsq.bytes, z.bytes); - sc_mulsub(k.bytes, zcu.bytes, ip12.bytes, k.bytes); - sc_add(t0.bytes, t0.bytes, k.bytes); - - // DEBUG: Test the value of t0 has the correct form -#ifdef DEBUG_BP - rct::key test_t0 = rct::zero(); - rct::key iph = inner_product(aL, hadamard(aR, yN)); - sc_add(test_t0.bytes, test_t0.bytes, iph.bytes); - rct::key ips = inner_product(vector_subtract(aL, aR), yN); - sc_muladd(test_t0.bytes, z.bytes, ips.bytes, test_t0.bytes); - rct::key ipt = inner_product(twoN, aL); - sc_muladd(test_t0.bytes, zsq.bytes, ipt.bytes, test_t0.bytes); - sc_add(test_t0.bytes, test_t0.bytes, k.bytes); - CHECK_AND_ASSERT_THROW_MES(t0 == test_t0, "t0 check failed"); -#endif - PERF_TIMER_STOP(PROVE_step1); - - PERF_TIMER_START_BP(PROVE_step2); - const auto HyNsR = hadamard(yN, sR); - const auto vpIz = vector_dup(z, N); - const auto vp2zsq = vector_scalar(twoN, zsq); - const auto aL_vpIz = vector_subtract(aL, vpIz); - const auto aR_vpIz = vector_add(aR, vpIz); - - rct::key ip1 = inner_product(aL_vpIz, HyNsR); - sc_add(t1.bytes, t1.bytes, ip1.bytes); - - rct::key ip2 = inner_product(sL, vector_add(hadamard(yN, aR_vpIz), vp2zsq)); - sc_add(t1.bytes, t1.bytes, ip2.bytes); - - rct::key ip3 = inner_product(sL, HyNsR); - sc_add(t2.bytes, t2.bytes, ip3.bytes); - - // PAPER LINES 47-48 - rct::key tau1 = rct::skGen(), tau2 = rct::skGen(); - - rct::key T1 = rct::addKeys(rct::scalarmultH(t1), rct::scalarmultBase(tau1)); - T1 = rct::scalarmultKey(T1, INV_EIGHT); - rct::key T2 = rct::addKeys(rct::scalarmultH(t2), rct::scalarmultBase(tau2)); - T2 = rct::scalarmultKey(T2, INV_EIGHT); - - // PAPER LINES 49-51 - rct::key x = hash_cache_mash(hash_cache, z, T1, T2); - if (x == rct::zero()) - { - PERF_TIMER_STOP(PROVE_step2); - MINFO("x is 0, trying again"); - goto try_again; - } - - // PAPER LINES 52-53 - rct::key taux = rct::zero(); - sc_mul(taux.bytes, tau1.bytes, x.bytes); - rct::key xsq; - sc_mul(xsq.bytes, x.bytes, x.bytes); - sc_muladd(taux.bytes, tau2.bytes, xsq.bytes, taux.bytes); - sc_muladd(taux.bytes, gamma.bytes, zsq.bytes, taux.bytes); - rct::key mu; - sc_muladd(mu.bytes, x.bytes, rho.bytes, alpha.bytes); - - // PAPER LINES 54-57 - rct::keyV l = vector_add(aL_vpIz, vector_scalar(sL, x)); - rct::keyV r = vector_add(hadamard(yN, vector_add(aR_vpIz, vector_scalar(sR, x))), vp2zsq); - PERF_TIMER_STOP(PROVE_step2); - - PERF_TIMER_START_BP(PROVE_step3); - rct::key t = inner_product(l, r); - - // DEBUG: Test if the l and r vectors match the polynomial forms -#ifdef DEBUG_BP - rct::key test_t; - sc_muladd(test_t.bytes, t1.bytes, x.bytes, t0.bytes); - sc_muladd(test_t.bytes, t2.bytes, xsq.bytes, test_t.bytes); - CHECK_AND_ASSERT_THROW_MES(test_t == t, "test_t check failed"); -#endif - - // PAPER LINES 32-33 - rct::key x_ip = hash_cache_mash(hash_cache, x, taux, mu, t); - - // These are used in the inner product rounds - size_t nprime = N; - rct::keyV Gprime(N); - rct::keyV Hprime(N); - rct::keyV aprime(N); - rct::keyV bprime(N); - const rct::key yinv = invert(y); - rct::key yinvpow = rct::identity(); - for (size_t i = 0; i < N; ++i) - { - Gprime[i] = Gi[i]; - Hprime[i] = scalarmultKey(Hi_p3[i], yinvpow); - sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); - aprime[i] = l[i]; - bprime[i] = r[i]; - } - rct::keyV L(logN); - rct::keyV R(logN); - int round = 0; - rct::keyV w(logN); // this is the challenge x in the inner product protocol - PERF_TIMER_STOP(PROVE_step3); - - PERF_TIMER_START_BP(PROVE_step4); - // PAPER LINE 13 - while (nprime > 1) - { - // PAPER LINE 15 - nprime /= 2; - - // PAPER LINES 16-17 - rct::key cL = inner_product(slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); - rct::key cR = inner_product(slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); - - // PAPER LINES 18-19 - L[round] = vector_exponent_custom(slice(Gprime, nprime, Gprime.size()), slice(Hprime, 0, nprime), slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); - sc_mul(tmp.bytes, cL.bytes, x_ip.bytes); - rct::addKeys(L[round], L[round], rct::scalarmultH(tmp)); - L[round] = rct::scalarmultKey(L[round], INV_EIGHT); - R[round] = vector_exponent_custom(slice(Gprime, 0, nprime), slice(Hprime, nprime, Hprime.size()), slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); - sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); - rct::addKeys(R[round], R[round], rct::scalarmultH(tmp)); - R[round] = rct::scalarmultKey(R[round], INV_EIGHT); - - // PAPER LINES 21-22 - w[round] = hash_cache_mash(hash_cache, L[round], R[round]); - if (w[round] == rct::zero()) - { - PERF_TIMER_STOP(PROVE_step4); - MINFO("w[round] is 0, trying again"); - goto try_again; - } - - // PAPER LINES 24-25 - const rct::key winv = invert(w[round]); - Gprime = hadamard2(vector_scalar2(slice(Gprime, 0, nprime), winv), vector_scalar2(slice(Gprime, nprime, Gprime.size()), w[round])); - Hprime = hadamard2(vector_scalar2(slice(Hprime, 0, nprime), w[round]), vector_scalar2(slice(Hprime, nprime, Hprime.size()), winv)); - - // PAPER LINES 28-29 - aprime = vector_add(vector_scalar(slice(aprime, 0, nprime), w[round]), vector_scalar(slice(aprime, nprime, aprime.size()), winv)); - bprime = vector_add(vector_scalar(slice(bprime, 0, nprime), winv), vector_scalar(slice(bprime, nprime, bprime.size()), w[round])); - - ++round; - } - PERF_TIMER_STOP(PROVE_step4); - - // PAPER LINE 58 (with inclusions from PAPER LINE 8 and PAPER LINE 20) - return Bulletproof(V, A, S, T1, T2, taux, mu, L, R, aprime[0], bprime[0], t); + return bulletproof_PROVE(rct::keyV(1, sv), rct::keyV(1, gamma)); } Bulletproof bulletproof_PROVE(uint64_t v, const rct::key &gamma) { - // vG + gammaH - PERF_TIMER_START_BP(PROVE_v); - rct::key sv = rct::zero(); - sv.bytes[0] = v & 255; - sv.bytes[1] = (v >> 8) & 255; - sv.bytes[2] = (v >> 16) & 255; - sv.bytes[3] = (v >> 24) & 255; - sv.bytes[4] = (v >> 32) & 255; - sv.bytes[5] = (v >> 40) & 255; - sv.bytes[6] = (v >> 48) & 255; - sv.bytes[7] = (v >> 56) & 255; - PERF_TIMER_STOP(PROVE_v); - return bulletproof_PROVE(sv, gamma); + return bulletproof_PROVE(std::vector<uint64_t>(1, v), rct::keyV(1, gamma)); } /* Given a set of values v (0..2^N-1) and masks gamma, construct a range proof */ @@ -728,37 +481,39 @@ Bulletproof bulletproof_PROVE(const rct::keyV &sv, const rct::keyV &gamma) rct::keyV V(sv.size()); rct::keyV aL(MN), aR(MN); - rct::key tmp; + rct::keyV aL8(MN), aR8(MN); + rct::key tmp, tmp2; PERF_TIMER_START_BP(PROVE_v); for (size_t i = 0; i < sv.size(); ++i) { - rct::addKeys2(V[i], gamma[i], sv[i], rct::H); - V[i] = rct::scalarmultKey(V[i], INV_EIGHT); + rct::key gamma8, sv8; + sc_mul(gamma8.bytes, gamma[i].bytes, INV_EIGHT.bytes); + sc_mul(sv8.bytes, sv[i].bytes, INV_EIGHT.bytes); + rct::addKeys2(V[i], gamma8, sv8, rct::H); } - PERF_TIMER_STOP(PROVE_v); + PERF_TIMER_STOP_BP(PROVE_v); PERF_TIMER_START_BP(PROVE_aLaR); for (size_t j = 0; j < M; ++j) { for (size_t i = N; i-- > 0; ) { - if (j >= sv.size()) - { - aL[j*N+i] = rct::zero(); - } - else if (sv[j][i/8] & (((uint64_t)1)<<(i%8))) + if (j < sv.size() && (sv[j][i/8] & (((uint64_t)1)<<(i%8)))) { aL[j*N+i] = rct::identity(); + aL8[j*N+i] = INV_EIGHT; + aR[j*N+i] = aR8[j*N+i] = rct::zero(); } else { - aL[j*N+i] = rct::zero(); + aL[j*N+i] = aL8[j*N+i] = rct::zero(); + aR[j*N+i] = MINUS_ONE; + aR8[j*N+i] = MINUS_INV_EIGHT; } - sc_sub(aR[j*N+i].bytes, aL[j*N+i].bytes, rct::identity().bytes); } } - PERF_TIMER_STOP(PROVE_aLaR); + PERF_TIMER_STOP_BP(PROVE_aLaR); // DEBUG: Test to ensure this recovers the value #ifdef DEBUG_BP @@ -786,10 +541,10 @@ try_again: PERF_TIMER_START_BP(PROVE_step1); // PAPER LINES 38-39 rct::key alpha = rct::skGen(); - rct::key ve = vector_exponent(aL, aR); + rct::key ve = vector_exponent(aL8, aR8); rct::key A; - rct::addKeys(A, ve, rct::scalarmultBase(alpha)); - A = rct::scalarmultKey(A, INV_EIGHT); + sc_mul(tmp.bytes, alpha.bytes, INV_EIGHT.bytes); + rct::addKeys(A, ve, rct::scalarmultBase(tmp)); // PAPER LINES 40-42 rct::keyV sL = rct::skvGen(MN), sR = rct::skvGen(MN); @@ -803,21 +558,20 @@ try_again: rct::key y = hash_cache_mash(hash_cache, A, S); if (y == rct::zero()) { - PERF_TIMER_STOP(PROVE_step1); + PERF_TIMER_STOP_BP(PROVE_step1); MINFO("y is 0, trying again"); goto try_again; } rct::key z = hash_cache = rct::hash_to_scalar(y); if (z == rct::zero()) { - PERF_TIMER_STOP(PROVE_step1); + PERF_TIMER_STOP_BP(PROVE_step1); MINFO("z is 0, trying again"); goto try_again; } // Polynomial construction by coefficients - const auto zMN = vector_dup(z, MN); - rct::keyV l0 = vector_subtract(aL, zMN); + rct::keyV l0 = vector_subtract(aL, z); const rct::keyV &l1 = sL; // This computes the ugly sum/concatenation from PAPER LINE 65 @@ -837,7 +591,7 @@ try_again: } } - rct::keyV r0 = vector_add(aR, zMN); + rct::keyV r0 = vector_add(aR, z); const auto yMN = vector_powers(y, MN); r0 = hadamard(r0, yMN); r0 = vector_add(r0, zero_twos); @@ -850,22 +604,28 @@ try_again: sc_add(t1.bytes, t1_1.bytes, t1_2.bytes); rct::key t2 = inner_product(l1, r1); - PERF_TIMER_STOP(PROVE_step1); + PERF_TIMER_STOP_BP(PROVE_step1); PERF_TIMER_START_BP(PROVE_step2); // PAPER LINES 47-48 rct::key tau1 = rct::skGen(), tau2 = rct::skGen(); - rct::key T1 = rct::addKeys(rct::scalarmultH(t1), rct::scalarmultBase(tau1)); - T1 = rct::scalarmultKey(T1, INV_EIGHT); - rct::key T2 = rct::addKeys(rct::scalarmultH(t2), rct::scalarmultBase(tau2)); - T2 = rct::scalarmultKey(T2, INV_EIGHT); + rct::key T1, T2; + ge_p3 p3; + sc_mul(tmp.bytes, t1.bytes, INV_EIGHT.bytes); + sc_mul(tmp2.bytes, tau1.bytes, INV_EIGHT.bytes); + ge_double_scalarmult_base_vartime_p3(&p3, tmp.bytes, &ge_p3_H, tmp2.bytes); + ge_p3_tobytes(T1.bytes, &p3); + sc_mul(tmp.bytes, t2.bytes, INV_EIGHT.bytes); + sc_mul(tmp2.bytes, tau2.bytes, INV_EIGHT.bytes); + ge_double_scalarmult_base_vartime_p3(&p3, tmp.bytes, &ge_p3_H, tmp2.bytes); + ge_p3_tobytes(T2.bytes, &p3); // PAPER LINES 49-51 rct::key x = hash_cache_mash(hash_cache, z, T1, T2); if (x == rct::zero()) { - PERF_TIMER_STOP(PROVE_step2); + PERF_TIMER_STOP_BP(PROVE_step2); MINFO("x is 0, trying again"); goto try_again; } @@ -889,7 +649,7 @@ try_again: l = vector_add(l, vector_scalar(l1, x)); rct::keyV r = r0; r = vector_add(r, vector_scalar(r1, x)); - PERF_TIMER_STOP(PROVE_step2); + PERF_TIMER_STOP_BP(PROVE_step2); PERF_TIMER_START_BP(PROVE_step3); rct::key t = inner_product(l, r); @@ -907,24 +667,27 @@ try_again: rct::key x_ip = hash_cache_mash(hash_cache, x, taux, mu, t); if (x_ip == rct::zero()) { - PERF_TIMER_STOP(PROVE_step3); + PERF_TIMER_STOP_BP(PROVE_step3); MINFO("x_ip is 0, trying again"); goto try_again; } // These are used in the inner product rounds size_t nprime = MN; - rct::keyV Gprime(MN); - rct::keyV Hprime(MN); + std::vector<ge_p3> Gprime(MN); + std::vector<ge_p3> Hprime(MN); rct::keyV aprime(MN); rct::keyV bprime(MN); const rct::key yinv = invert(y); - rct::key yinvpow = rct::identity(); + rct::keyV yinvpow(MN); + yinvpow[0] = rct::identity(); + yinvpow[1] = yinv; for (size_t i = 0; i < MN; ++i) { - Gprime[i] = Gi[i]; - Hprime[i] = scalarmultKey(Hi_p3[i], yinvpow); - sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); + Gprime[i] = Gi_p3[i]; + Hprime[i] = Hi_p3[i]; + if (i > 1) + sc_mul(yinvpow[i].bytes, yinvpow[i-1].bytes, yinv.bytes); aprime[i] = l[i]; bprime[i] = r[i]; } @@ -932,53 +695,62 @@ try_again: rct::keyV R(logMN); int round = 0; rct::keyV w(logMN); // this is the challenge x in the inner product protocol - PERF_TIMER_STOP(PROVE_step3); + PERF_TIMER_STOP_BP(PROVE_step3); PERF_TIMER_START_BP(PROVE_step4); // PAPER LINE 13 + const rct::keyV *scale = &yinvpow; while (nprime > 1) { // PAPER LINE 15 nprime /= 2; // PAPER LINES 16-17 + PERF_TIMER_START_BP(PROVE_inner_product); rct::key cL = inner_product(slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); rct::key cR = inner_product(slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); + PERF_TIMER_STOP_BP(PROVE_inner_product); // PAPER LINES 18-19 - L[round] = vector_exponent_custom(slice(Gprime, nprime, Gprime.size()), slice(Hprime, 0, nprime), slice(aprime, 0, nprime), slice(bprime, nprime, bprime.size())); + PERF_TIMER_START_BP(PROVE_LR); sc_mul(tmp.bytes, cL.bytes, x_ip.bytes); - rct::addKeys(L[round], L[round], rct::scalarmultH(tmp)); - L[round] = rct::scalarmultKey(L[round], INV_EIGHT); - R[round] = vector_exponent_custom(slice(Gprime, 0, nprime), slice(Hprime, nprime, Hprime.size()), slice(aprime, nprime, aprime.size()), slice(bprime, 0, nprime)); + L[round] = cross_vector_exponent8(nprime, Gprime, nprime, Hprime, 0, aprime, 0, bprime, nprime, scale, &ge_p3_H, &tmp); sc_mul(tmp.bytes, cR.bytes, x_ip.bytes); - rct::addKeys(R[round], R[round], rct::scalarmultH(tmp)); - R[round] = rct::scalarmultKey(R[round], INV_EIGHT); + R[round] = cross_vector_exponent8(nprime, Gprime, 0, Hprime, nprime, aprime, nprime, bprime, 0, scale, &ge_p3_H, &tmp); + PERF_TIMER_STOP_BP(PROVE_LR); // PAPER LINES 21-22 w[round] = hash_cache_mash(hash_cache, L[round], R[round]); if (w[round] == rct::zero()) { - PERF_TIMER_STOP(PROVE_step4); + PERF_TIMER_STOP_BP(PROVE_step4); MINFO("w[round] is 0, trying again"); goto try_again; } // PAPER LINES 24-25 const rct::key winv = invert(w[round]); - Gprime = hadamard2(vector_scalar2(slice(Gprime, 0, nprime), winv), vector_scalar2(slice(Gprime, nprime, Gprime.size()), w[round])); - Hprime = hadamard2(vector_scalar2(slice(Hprime, 0, nprime), w[round]), vector_scalar2(slice(Hprime, nprime, Hprime.size()), winv)); + if (nprime > 1) + { + PERF_TIMER_START_BP(PROVE_hadamard2); + hadamard_fold(Gprime, NULL, winv, w[round]); + hadamard_fold(Hprime, scale, w[round], winv); + PERF_TIMER_STOP_BP(PROVE_hadamard2); + } // PAPER LINES 28-29 + PERF_TIMER_START_BP(PROVE_prime); aprime = vector_add(vector_scalar(slice(aprime, 0, nprime), w[round]), vector_scalar(slice(aprime, nprime, aprime.size()), winv)); bprime = vector_add(vector_scalar(slice(bprime, 0, nprime), winv), vector_scalar(slice(bprime, nprime, bprime.size()), w[round])); + PERF_TIMER_STOP_BP(PROVE_prime); + scale = NULL; ++round; } - PERF_TIMER_STOP(PROVE_step4); + PERF_TIMER_STOP_BP(PROVE_step4); // PAPER LINE 58 (with inclusions from PAPER LINE 8 and PAPER LINE 20) - return Bulletproof(V, A, S, T1, T2, taux, mu, L, R, aprime[0], bprime[0], t); + return Bulletproof(std::move(V), A, S, T1, T2, taux, mu, std::move(L), std::move(R), aprime[0], bprime[0], t); } Bulletproof bulletproof_PROVE(const std::vector<uint64_t> &v, const rct::keyV &gamma) @@ -1000,10 +772,17 @@ Bulletproof bulletproof_PROVE(const std::vector<uint64_t> &v, const rct::keyV &g sv[i].bytes[6] = (v[i] >> 48) & 255; sv[i].bytes[7] = (v[i] >> 56) & 255; } - PERF_TIMER_STOP(PROVE_v); + PERF_TIMER_STOP_BP(PROVE_v); return bulletproof_PROVE(sv, gamma); } +struct proof_data_t +{ + rct::key x, y, z, x_ip; + std::vector<rct::key> w; + size_t logM, inv_offset; +}; + /* Given a range proof, determine if it is valid */ bool bulletproof_VERIFY(const std::vector<const Bulletproof*> &proofs) { @@ -1011,8 +790,17 @@ bool bulletproof_VERIFY(const std::vector<const Bulletproof*> &proofs) PERF_TIMER_START_BP(VERIFY); + const size_t logN = 6; + const size_t N = 1 << logN; + // sanity and figure out which proof is longest size_t max_length = 0; + size_t nV = 0; + std::vector<proof_data_t> proof_data; + proof_data.reserve(proofs.size()); + size_t inv_offset = 0; + std::vector<rct::key> to_invert; + to_invert.reserve(11 * sizeof(proofs)); for (const Bulletproof *p: proofs) { const Bulletproof &proof = *p; @@ -1029,44 +817,76 @@ bool bulletproof_VERIFY(const std::vector<const Bulletproof*> &proofs) CHECK_AND_ASSERT_MES(proof.L.size() > 0, false, "Empty proof"); max_length = std::max(max_length, proof.L.size()); + nV += proof.V.size(); + + // Reconstruct the challenges + PERF_TIMER_START_BP(VERIFY_start); + proof_data.resize(proof_data.size() + 1); + proof_data_t &pd = proof_data.back(); + rct::key hash_cache = rct::hash_to_scalar(proof.V); + pd.y = hash_cache_mash(hash_cache, proof.A, proof.S); + CHECK_AND_ASSERT_MES(!(pd.y == rct::zero()), false, "y == 0"); + pd.z = hash_cache = rct::hash_to_scalar(pd.y); + CHECK_AND_ASSERT_MES(!(pd.z == rct::zero()), false, "z == 0"); + pd.x = hash_cache_mash(hash_cache, pd.z, proof.T1, proof.T2); + CHECK_AND_ASSERT_MES(!(pd.x == rct::zero()), false, "x == 0"); + pd.x_ip = hash_cache_mash(hash_cache, pd.x, proof.taux, proof.mu, proof.t); + CHECK_AND_ASSERT_MES(!(pd.x_ip == rct::zero()), false, "x_ip == 0"); + PERF_TIMER_STOP_BP(VERIFY_start); + + size_t M; + for (pd.logM = 0; (M = 1<<pd.logM) <= maxM && M < proof.V.size(); ++pd.logM); + CHECK_AND_ASSERT_MES(proof.L.size() == 6+pd.logM, false, "Proof is not the expected size"); + + const size_t rounds = pd.logM+logN; + CHECK_AND_ASSERT_MES(rounds > 0, false, "Zero rounds"); + + PERF_TIMER_START_BP(VERIFY_line_21_22); + // PAPER LINES 21-22 + // The inner product challenges are computed per round + pd.w.resize(rounds); + for (size_t i = 0; i < rounds; ++i) + { + pd.w[i] = hash_cache_mash(hash_cache, proof.L[i], proof.R[i]); + CHECK_AND_ASSERT_MES(!(pd.w[i] == rct::zero()), false, "w[i] == 0"); + } + PERF_TIMER_STOP_BP(VERIFY_line_21_22); + + pd.inv_offset = inv_offset; + for (size_t i = 0; i < rounds; ++i) + to_invert.push_back(pd.w[i]); + to_invert.push_back(pd.y); + inv_offset += rounds + 1; } CHECK_AND_ASSERT_MES(max_length < 32, false, "At least one proof is too large"); size_t maxMN = 1u << max_length; - const size_t logN = 6; - const size_t N = 1 << logN; rct::key tmp; + std::vector<MultiexpData> multiexp_data; + multiexp_data.reserve(nV + (2 * (10/*logM*/ + logN) + 4) * proofs.size() + 2 * maxMN); + multiexp_data.resize(2 * maxMN); + + PERF_TIMER_START_BP(VERIFY_line_24_25_invert); + const std::vector<rct::key> inverses = invert(to_invert); + PERF_TIMER_STOP_BP(VERIFY_line_24_25_invert); + // setup weighted aggregates - rct::key Z0 = rct::identity(); rct::key z1 = rct::zero(); - rct::key Z2 = rct::identity(); rct::key z3 = rct::zero(); - rct::keyV z4(maxMN, rct::zero()), z5(maxMN, rct::zero()); - rct::key Y2 = rct::identity(), Y3 = rct::identity(), Y4 = rct::identity(); - rct::key y0 = rct::zero(), y1 = rct::zero(); + rct::keyV m_z4(maxMN, rct::zero()), m_z5(maxMN, rct::zero()); + rct::key m_y0 = rct::zero(), y1 = rct::zero(); + int proof_data_index = 0; for (const Bulletproof *p: proofs) { const Bulletproof &proof = *p; + const proof_data_t &pd = proof_data[proof_data_index++]; - size_t M, logM; - for (logM = 0; (M = 1<<logM) <= maxM && M < proof.V.size(); ++logM); - CHECK_AND_ASSERT_MES(proof.L.size() == 6+logM, false, "Proof is not the expected size"); + CHECK_AND_ASSERT_MES(proof.L.size() == 6+pd.logM, false, "Proof is not the expected size"); + const size_t M = 1 << pd.logM; const size_t MN = M*N; - rct::key weight = rct::skGen(); - - // Reconstruct the challenges - PERF_TIMER_START_BP(VERIFY_start); - rct::key hash_cache = rct::hash_to_scalar(proof.V); - rct::key y = hash_cache_mash(hash_cache, proof.A, proof.S); - CHECK_AND_ASSERT_MES(!(y == rct::zero()), false, "y == 0"); - rct::key z = hash_cache = rct::hash_to_scalar(y); - CHECK_AND_ASSERT_MES(!(z == rct::zero()), false, "z == 0"); - rct::key x = hash_cache_mash(hash_cache, z, proof.T1, proof.T2); - CHECK_AND_ASSERT_MES(!(x == rct::zero()), false, "x == 0"); - rct::key x_ip = hash_cache_mash(hash_cache, x, proof.taux, proof.mu, proof.t); - CHECK_AND_ASSERT_MES(!(x_ip == rct::zero()), false, "x_ip == 0"); - PERF_TIMER_STOP(VERIFY_start); + const rct::key weight_y = rct::skGen(); + const rct::key weight_z = rct::skGen(); // pre-multiply some points by 8 rct::keyV proof8_V = proof.V; for (rct::key &k: proof8_V) k = rct::scalarmult8(k); @@ -1075,177 +895,161 @@ bool bulletproof_VERIFY(const std::vector<const Bulletproof*> &proofs) rct::key proof8_T1 = rct::scalarmult8(proof.T1); rct::key proof8_T2 = rct::scalarmult8(proof.T2); rct::key proof8_S = rct::scalarmult8(proof.S); + rct::key proof8_A = rct::scalarmult8(proof.A); PERF_TIMER_START_BP(VERIFY_line_61); // PAPER LINE 61 - sc_muladd(y0.bytes, proof.taux.bytes, weight.bytes, y0.bytes); + sc_mulsub(m_y0.bytes, proof.taux.bytes, weight_y.bytes, m_y0.bytes); - const rct::keyV zpow = vector_powers(z, M+3); + const rct::keyV zpow = vector_powers(pd.z, M+3); rct::key k; - const rct::key ip1y = vector_power_sum(y, MN); + const rct::key ip1y = vector_power_sum(pd.y, MN); sc_mulsub(k.bytes, zpow[2].bytes, ip1y.bytes, rct::zero().bytes); for (size_t j = 1; j <= M; ++j) { CHECK_AND_ASSERT_MES(j+2 < zpow.size(), false, "invalid zpow index"); sc_mulsub(k.bytes, zpow[j+2].bytes, ip12.bytes, k.bytes); } - PERF_TIMER_STOP(VERIFY_line_61); + PERF_TIMER_STOP_BP(VERIFY_line_61); PERF_TIMER_START_BP(VERIFY_line_61rl_new); - sc_muladd(tmp.bytes, z.bytes, ip1y.bytes, k.bytes); - std::vector<MultiexpData> multiexp_data; - multiexp_data.reserve(proof.V.size()); + sc_muladd(tmp.bytes, pd.z.bytes, ip1y.bytes, k.bytes); sc_sub(tmp.bytes, proof.t.bytes, tmp.bytes); - sc_muladd(y1.bytes, tmp.bytes, weight.bytes, y1.bytes); + sc_muladd(y1.bytes, tmp.bytes, weight_y.bytes, y1.bytes); for (size_t j = 0; j < proof8_V.size(); j++) { - multiexp_data.emplace_back(zpow[j+2], proof8_V[j]); + sc_mul(tmp.bytes, zpow[j+2].bytes, weight_y.bytes); + multiexp_data.emplace_back(tmp, proof8_V[j]); } - rct::addKeys(Y2, Y2, rct::scalarmultKey(multiexp(multiexp_data, false), weight)); - sc_mul(tmp.bytes, x.bytes, weight.bytes); - rct::addKeys(Y3, Y3, rct::scalarmultKey(proof8_T1, tmp)); + sc_mul(tmp.bytes, pd.x.bytes, weight_y.bytes); + multiexp_data.emplace_back(tmp, proof8_T1); rct::key xsq; - sc_mul(xsq.bytes, x.bytes, x.bytes); - sc_mul(tmp.bytes, xsq.bytes, weight.bytes); - rct::addKeys(Y4, Y4, rct::scalarmultKey(proof8_T2, tmp)); - PERF_TIMER_STOP(VERIFY_line_61rl_new); + sc_mul(xsq.bytes, pd.x.bytes, pd.x.bytes); + sc_mul(tmp.bytes, xsq.bytes, weight_y.bytes); + multiexp_data.emplace_back(tmp, proof8_T2); + PERF_TIMER_STOP_BP(VERIFY_line_61rl_new); PERF_TIMER_START_BP(VERIFY_line_62); // PAPER LINE 62 - rct::addKeys(Z0, Z0, rct::scalarmultKey(rct::addKeys(rct::scalarmult8(proof.A), rct::scalarmultKey(proof8_S, x)), weight)); - PERF_TIMER_STOP(VERIFY_line_62); + multiexp_data.emplace_back(weight_z, proof8_A); + sc_mul(tmp.bytes, pd.x.bytes, weight_z.bytes); + multiexp_data.emplace_back(tmp, proof8_S); + PERF_TIMER_STOP_BP(VERIFY_line_62); // Compute the number of rounds for the inner product - const size_t rounds = logM+logN; + const size_t rounds = pd.logM+logN; CHECK_AND_ASSERT_MES(rounds > 0, false, "Zero rounds"); - PERF_TIMER_START_BP(VERIFY_line_21_22); - // PAPER LINES 21-22 - // The inner product challenges are computed per round - rct::keyV w(rounds); - for (size_t i = 0; i < rounds; ++i) - { - w[i] = hash_cache_mash(hash_cache, proof.L[i], proof.R[i]); - CHECK_AND_ASSERT_MES(!(w[i] == rct::zero()), false, "w[i] == 0"); - } - PERF_TIMER_STOP(VERIFY_line_21_22); - PERF_TIMER_START_BP(VERIFY_line_24_25); // Basically PAPER LINES 24-25 // Compute the curvepoints from G[i] and H[i] rct::key yinvpow = rct::identity(); rct::key ypow = rct::identity(); - PERF_TIMER_START_BP(VERIFY_line_24_25_invert); - const rct::key yinv = invert(y); - rct::keyV winv(rounds); - for (size_t i = 0; i < rounds; ++i) - winv[i] = invert(w[i]); - PERF_TIMER_STOP(VERIFY_line_24_25_invert); + const rct::key *winv = &inverses[pd.inv_offset]; + const rct::key yinv = inverses[pd.inv_offset + rounds]; + + // precalc + PERF_TIMER_START_BP(VERIFY_line_24_25_precalc); + rct::keyV w_cache(1<<rounds); + w_cache[0] = winv[0]; + w_cache[1] = pd.w[0]; + for (size_t j = 1; j < rounds; ++j) + { + const size_t slots = 1<<(j+1); + for (size_t s = slots; s-- > 0; --s) + { + sc_mul(w_cache[s].bytes, w_cache[s/2].bytes, pd.w[j].bytes); + sc_mul(w_cache[s-1].bytes, w_cache[s/2].bytes, winv[j].bytes); + } + } + PERF_TIMER_STOP_BP(VERIFY_line_24_25_precalc); for (size_t i = 0; i < MN; ++i) { - // Convert the index to binary IN REVERSE and construct the scalar exponent rct::key g_scalar = proof.a; rct::key h_scalar; - sc_mul(h_scalar.bytes, proof.b.bytes, yinvpow.bytes); + if (i == 0) + h_scalar = proof.b; + else + sc_mul(h_scalar.bytes, proof.b.bytes, yinvpow.bytes); - for (size_t j = rounds; j-- > 0; ) - { - size_t J = w.size() - j - 1; - - if ((i & (((size_t)1)<<j)) == 0) - { - sc_mul(g_scalar.bytes, g_scalar.bytes, winv[J].bytes); - sc_mul(h_scalar.bytes, h_scalar.bytes, w[J].bytes); - } - else - { - sc_mul(g_scalar.bytes, g_scalar.bytes, w[J].bytes); - sc_mul(h_scalar.bytes, h_scalar.bytes, winv[J].bytes); - } - } + // Convert the index to binary IN REVERSE and construct the scalar exponent + sc_mul(g_scalar.bytes, g_scalar.bytes, w_cache[i].bytes); + sc_mul(h_scalar.bytes, h_scalar.bytes, w_cache[(~i) & (MN-1)].bytes); // Adjust the scalars using the exponents from PAPER LINE 62 - sc_add(g_scalar.bytes, g_scalar.bytes, z.bytes); + sc_add(g_scalar.bytes, g_scalar.bytes, pd.z.bytes); CHECK_AND_ASSERT_MES(2+i/N < zpow.size(), false, "invalid zpow index"); CHECK_AND_ASSERT_MES(i%N < twoN.size(), false, "invalid twoN index"); sc_mul(tmp.bytes, zpow[2+i/N].bytes, twoN[i%N].bytes); - sc_muladd(tmp.bytes, z.bytes, ypow.bytes, tmp.bytes); - sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes); + if (i == 0) + { + sc_add(tmp.bytes, tmp.bytes, pd.z.bytes); + sc_sub(h_scalar.bytes, h_scalar.bytes, tmp.bytes); + } + else + { + sc_muladd(tmp.bytes, pd.z.bytes, ypow.bytes, tmp.bytes); + sc_mulsub(h_scalar.bytes, tmp.bytes, yinvpow.bytes, h_scalar.bytes); + } - sc_muladd(z4[i].bytes, g_scalar.bytes, weight.bytes, z4[i].bytes); - sc_muladd(z5[i].bytes, h_scalar.bytes, weight.bytes, z5[i].bytes); + sc_mulsub(m_z4[i].bytes, g_scalar.bytes, weight_z.bytes, m_z4[i].bytes); + sc_mulsub(m_z5[i].bytes, h_scalar.bytes, weight_z.bytes, m_z5[i].bytes); - if (i != MN-1) + if (i == 0) + { + yinvpow = yinv; + ypow = pd.y; + } + else if (i != MN-1) { sc_mul(yinvpow.bytes, yinvpow.bytes, yinv.bytes); - sc_mul(ypow.bytes, ypow.bytes, y.bytes); + sc_mul(ypow.bytes, ypow.bytes, pd.y.bytes); } } - PERF_TIMER_STOP(VERIFY_line_24_25); + PERF_TIMER_STOP_BP(VERIFY_line_24_25); // PAPER LINE 26 PERF_TIMER_START_BP(VERIFY_line_26_new); - multiexp_data.clear(); - multiexp_data.reserve(2*rounds); - - sc_muladd(z1.bytes, proof.mu.bytes, weight.bytes, z1.bytes); + sc_muladd(z1.bytes, proof.mu.bytes, weight_z.bytes, z1.bytes); for (size_t i = 0; i < rounds; ++i) { - sc_mul(tmp.bytes, w[i].bytes, w[i].bytes); + sc_mul(tmp.bytes, pd.w[i].bytes, pd.w[i].bytes); + sc_mul(tmp.bytes, tmp.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_L[i]); sc_mul(tmp.bytes, winv[i].bytes, winv[i].bytes); + sc_mul(tmp.bytes, tmp.bytes, weight_z.bytes); multiexp_data.emplace_back(tmp, proof8_R[i]); } - rct::key acc = multiexp(multiexp_data, false); - rct::addKeys(Z2, Z2, rct::scalarmultKey(acc, weight)); sc_mulsub(tmp.bytes, proof.a.bytes, proof.b.bytes, proof.t.bytes); - sc_mul(tmp.bytes, tmp.bytes, x_ip.bytes); - sc_muladd(z3.bytes, tmp.bytes, weight.bytes, z3.bytes); - PERF_TIMER_STOP(VERIFY_line_26_new); + sc_mul(tmp.bytes, tmp.bytes, pd.x_ip.bytes); + sc_muladd(z3.bytes, tmp.bytes, weight_z.bytes, z3.bytes); + PERF_TIMER_STOP_BP(VERIFY_line_26_new); } // now check all proofs at once PERF_TIMER_START_BP(VERIFY_step2_check); - ge_p3 check1; - ge_scalarmult_base(&check1, y0.bytes); - addKeys_acc_p3(&check1, y1, rct::H); - sub_acc_p3(&check1, Y2); - sub_acc_p3(&check1, Y3); - sub_acc_p3(&check1, Y4); - if (!ge_p3_is_point_at_infinity(&check1)) - { - MERROR("Verification failure at step 1"); - return false; - } - ge_p3 check2; - sc_sub(tmp.bytes, rct::zero().bytes, z1.bytes); - ge_double_scalarmult_base_vartime_p3(&check2, z3.bytes, &ge_p3_H, tmp.bytes); - add_acc_p3(&check2, Z0); - add_acc_p3(&check2, Z2); - - std::vector<MultiexpData> multiexp_data; - multiexp_data.reserve(2 * maxMN); + sc_sub(tmp.bytes, m_y0.bytes, z1.bytes); + multiexp_data.emplace_back(tmp, rct::G); + sc_sub(tmp.bytes, z3.bytes, y1.bytes); + multiexp_data.emplace_back(tmp, rct::H); for (size_t i = 0; i < maxMN; ++i) { - sc_sub(tmp.bytes, rct::zero().bytes, z4[i].bytes); - multiexp_data.emplace_back(tmp, Gi_p3[i]); - sc_sub(tmp.bytes, rct::zero().bytes, z5[i].bytes); - multiexp_data.emplace_back(tmp, Hi_p3[i]); + multiexp_data[i * 2] = {m_z4[i], Gi_p3[i]}; + multiexp_data[i * 2 + 1] = {m_z5[i], Hi_p3[i]}; } - add_acc_p3(&check2, multiexp(multiexp_data, true)); - PERF_TIMER_STOP(VERIFY_step2_check); - - if (!ge_p3_is_point_at_infinity(&check2)) + if (!(multiexp(multiexp_data, 2 * maxMN) == rct::identity())) { - MERROR("Verification failure at step 2"); + PERF_TIMER_STOP_BP(VERIFY_step2_check); + MERROR("Verification failure"); return false; } + PERF_TIMER_STOP_BP(VERIFY_step2_check); - PERF_TIMER_STOP(VERIFY); + PERF_TIMER_STOP_BP(VERIFY); return true; } diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc index 21957b94c..6f77fed34 100644 --- a/src/ringct/multiexp.cc +++ b/src/ringct/multiexp.cc @@ -79,6 +79,25 @@ extern "C" // Best/cached Straus Straus Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip // Best/uncached Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip Pip Pip +// New timings: +// Pippenger: +// 2/1 always +// 3/2 at ~13 +// 4/3 at ~29 +// 5/4 at ~83 +// 6/5 < 200 +// 7/6 at ~470 +// 8/7 at ~1180 +// 9/8 at ~2290 +// Cached Pippenger: +// 6/5 < 200 +// 7/6 at 460 +// 8/7 at 1180 +// 9/8 at 2300 +// +// Cached Straus/Pippenger cross at 232 +// + namespace rct { @@ -320,7 +339,7 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data) return res; } -static constexpr unsigned int STRAUS_C = 4; +#define STRAUS_C 4 struct straus_cached_data { @@ -447,28 +466,26 @@ rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<str #endif MULTIEXP_PERF(PERF_TIMER_START_UNIT(digits, 1000000)); +#if STRAUS_C==4 + std::unique_ptr<uint8_t[]> digits{new uint8_t[64 * data.size()]}; +#else std::unique_ptr<uint8_t[]> digits{new uint8_t[256 * data.size()]}; +#endif for (size_t j = 0; j < data.size(); ++j) { - unsigned char bytes33[33]; - memcpy(bytes33, data[j].scalar.bytes, 32); - bytes33[32] = 0; - const unsigned char *bytes = bytes33; -#if 1 - static_assert(STRAUS_C == 4, "optimized version needs STRAUS_C == 4"); + const unsigned char *bytes = data[j].scalar.bytes; +#if STRAUS_C==4 unsigned int i; - for (i = 0; i < 256; i += 8, bytes++) + for (i = 0; i < 64; i += 2, bytes++) { - digits[j*256+i] = bytes[0] & 0xf; - digits[j*256+i+1] = (bytes[0] >> 1) & 0xf; - digits[j*256+i+2] = (bytes[0] >> 2) & 0xf; - digits[j*256+i+3] = (bytes[0] >> 3) & 0xf; - digits[j*256+i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf; - digits[j*256+i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf; - digits[j*256+i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf; - digits[j*256+i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf; + digits[j*64+i] = bytes[0] & 0xf; + digits[j*64+i+1] = bytes[0] >> 4; } #elif 1 + unsigned char bytes33[33]; + memcpy(bytes33, data[j].scalar.bytes, 32); + bytes33[32] = 0; + bytes = bytes33; for (size_t i = 0; i < 256; ++i) digits[j*256+i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask; #else @@ -521,7 +538,11 @@ skipfirst: if (skip[j]) continue; #endif +#if STRAUS_C==4 + const uint8_t digit = digits[j*64+i/4]; +#else const uint8_t digit = digits[j*256+i]; +#endif if (digit) { ge_add(&p1, &band_p3, &CACHE_OFFSET(local_cache, j, digit)); @@ -542,16 +563,13 @@ skipfirst: size_t get_pippenger_c(size_t N) { -// uncached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9 -// cached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9 - if (N <= 2) return 1; - if (N <= 8) return 2; - if (N <= 16) return 3; - if (N <= 64) return 4; - if (N <= 128) return 5; - if (N <= 256) return 6; - if (N <= 1024) return 7; - if (N <= 2048) return 8; + if (N <= 13) return 2; + if (N <= 29) return 3; + if (N <= 83) return 4; + if (N <= 185) return 5; + if (N <= 465) return 6; + if (N <= 1180) return 7; + if (N <= 2295) return 8; return 9; } @@ -563,12 +581,13 @@ struct pippenger_cached_data ~pippenger_cached_data() { aligned_free(cached); } }; -std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t N) +std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t start_offset, size_t N) { MULTIEXP_PERF(PERF_TIMER_START_UNIT(pippenger_init_cache, 1000000)); + CHECK_AND_ASSERT_THROW_MES(start_offset <= data.size(), "Bad cache base data"); if (N == 0) - N = data.size(); - CHECK_AND_ASSERT_THROW_MES(N <= data.size(), "Bad cache base data"); + N = data.size() - start_offset; + CHECK_AND_ASSERT_THROW_MES(N <= data.size() - start_offset, "Bad cache base data"); ge_cached cached; std::shared_ptr<pippenger_cached_data> cache(new pippenger_cached_data()); @@ -576,7 +595,7 @@ std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<Mu cache->cached = (ge_cached*)aligned_realloc(cache->cached, N * sizeof(ge_cached), 4096); CHECK_AND_ASSERT_THROW_MES(cache->cached, "Out of memory"); for (size_t i = 0; i < N; ++i) - ge_p3_to_cached(&cache->cached[i], &data[i].point); + ge_p3_to_cached(&cache->cached[i], &data[i+start_offset].point); MULTIEXP_PERF(PERF_TIMER_STOP(pippenger_init_cache)); return cache; @@ -587,16 +606,21 @@ size_t pippenger_get_cache_size(const std::shared_ptr<pippenger_cached_data> &ca return cache->size * sizeof(*cache->cached); } -rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t c) +rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t cache_size, size_t c) { - CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache->size >= data.size(), "Cache is too small"); + if (cache != NULL && cache_size == 0) + cache_size = cache->size; + CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache_size <= cache->size, "Cache is too small"); if (c == 0) c = get_pippenger_c(data.size()); CHECK_AND_ASSERT_THROW_MES(c <= 9, "c is too large"); ge_p3 result = ge_p3_identity; + bool result_init = false; std::unique_ptr<ge_p3[]> buckets{new ge_p3[1<<c]}; + bool buckets_init[1<<9]; std::shared_ptr<pippenger_cached_data> local_cache = cache == NULL ? pippenger_init_cache(data) : cache; + std::shared_ptr<pippenger_cached_data> local_cache_2 = data.size() > cache_size ? pippenger_init_cache(data, cache_size) : NULL; rct::key maxscalar = rct::zero(); for (size_t i = 0; i < data.size(); ++i) @@ -611,7 +635,7 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr< for (size_t k = groups; k-- > 0; ) { - if (!ge_p3_is_point_at_infinity(&result)) + if (result_init) { ge_p2 p2; ge_p3_to_p2(&p2, &result); @@ -625,8 +649,7 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr< ge_p1p1_to_p2(&p2, &p1); } } - for (size_t i = 0; i < (1u<<c); ++i) - buckets[i] = ge_p3_identity; + memset(buckets_init, 0, 1u<<c); // partition scalars into buckets for (size_t i = 0; i < data.size(); ++i) @@ -638,22 +661,45 @@ rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr< if (bucket == 0) continue; CHECK_AND_ASSERT_THROW_MES(bucket < (1u<<c), "bucket overflow"); - if (!ge_p3_is_point_at_infinity(&buckets[bucket])) + if (buckets_init[bucket]) { - add(buckets[bucket], local_cache->cached[i]); + if (i < cache_size) + add(buckets[bucket], local_cache->cached[i]); + else + add(buckets[bucket], local_cache_2->cached[i - cache_size]); } else + { buckets[bucket] = data[i].point; + buckets_init[bucket] = true; + } } // sum the buckets - ge_p3 pail = ge_p3_identity; + ge_p3 pail; + bool pail_init = false; for (size_t i = (1<<c)-1; i > 0; --i) { - if (!ge_p3_is_point_at_infinity(&buckets[i])) - add(pail, buckets[i]); - if (!ge_p3_is_point_at_infinity(&pail)) - add(result, pail); + if (buckets_init[i]) + { + if (pail_init) + add(pail, buckets[i]); + else + { + pail = buckets[i]; + pail_init = true; + } + } + if (pail_init) + { + if (result_init) + add(result, pail); + else + { + result = pail; + result_init = true; + } + } } } diff --git a/src/ringct/multiexp.h b/src/ringct/multiexp.h index 559ab664a..b52707933 100644 --- a/src/ringct/multiexp.h +++ b/src/ringct/multiexp.h @@ -61,10 +61,10 @@ rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data); std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<MultiexpData> &data, size_t N =0); size_t straus_get_cache_size(const std::shared_ptr<straus_cached_data> &cache); rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<straus_cached_data> &cache = NULL, size_t STEP = 0); -std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t N =0); +std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t start_offset = 0, size_t N =0); size_t pippenger_get_cache_size(const std::shared_ptr<pippenger_cached_data> &cache); size_t get_pippenger_c(size_t N); -rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache = NULL, size_t c = 0); +rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache = NULL, size_t cache_size = 0, size_t c = 0); } diff --git a/src/ringct/rctSigs.cpp b/src/ringct/rctSigs.cpp index 181e89c45..dccd18867 100644 --- a/src/ringct/rctSigs.cpp +++ b/src/ringct/rctSigs.cpp @@ -58,15 +58,6 @@ namespace } namespace rct { - Bulletproof proveRangeBulletproof(key &C, key &mask, uint64_t amount) - { - mask = rct::skGen(); - Bulletproof proof = bulletproof_PROVE(amount, mask); - CHECK_AND_ASSERT_THROW_MES(proof.V.size() == 1, "V has not exactly one element"); - C = proof.V[0]; - return proof; - } - Bulletproof proveRangeBulletproof(keyV &C, keyV &masks, const std::vector<uint64_t> &amounts) { masks = rct::skvGen(amounts.size()); |