diff options
author | Riccardo Spagni <ric@spagni.net> | 2018-09-11 15:45:56 +0200 |
---|---|---|
committer | Riccardo Spagni <ric@spagni.net> | 2018-09-11 15:45:56 +0200 |
commit | e6d36c17015e179aaa21f2353bdc608967833303 (patch) | |
tree | 39fb0829fe9f3b0d468bbb27393cd79fdefbda15 /src/ringct/multiexp.cc | |
parent | Merge pull request #4218 (diff) | |
parent | blockchain: add a testnet v9 a day after v8 (diff) | |
download | monero-e6d36c17015e179aaa21f2353bdc608967833303.tar.xz |
Merge pull request #4219
9137ad2c blockchain: add a testnet v9 a day after v8 (moneromooo-monero)
ac4f71c2 wallet2: bump testnet rollback to account for coming reorg (moneromooo-monero)
8f418a6d bulletproofs: #include <openssl/bn.h> (moneromooo-monero)
2bf63650 bulletproofs: speed up the latest changes a bit (moneromooo-monero)
044dff5a bulletproofs: scale points by 8 to ensure subgroup validity (moneromooo-monero)
c83012c4 bulletproofs: match aggregated verification to sarang's latest prototype (moneromooo-monero)
ce0c7432 performance_tests: add padded bulletproof construction (moneromooo-monero)
1224e53b core_tests: add a test for 4-aggregated BP verification (moneromooo-monero)
0e6ed559 fuzz_tests: add a bulletproof fuzz test (moneromooo-monero)
463434d1 more comprehensive test for ge_p3 comparison to identity/point at infinity (moneromooo-monero)
d0a0565f unit_tests: add a few more multiexp unit tests (moneromooo-monero)
6526d87f core_tests: add a test for a tx with empty bulletproof (moneromooo-monero)
a129bbd9 multiexp: fix maxscalar off by one (moneromooo-monero)
7ed496cc ringct: error out when hashToPoint* returns the point at infinity (moneromooo-monero)
d1591853 cryptonote_basic: check output type before using it (moneromooo-monero)
61632dc1 ringct: prevent a potential very large allocation (moneromooo-monero)
a4317e61 crypto: some paranoid checks in generate_signature/check_signature (moneromooo-monero)
7434df1c crypto: never return zero in random32_unbiased (moneromooo-monero)
0825e974 multiexp: fix wrong Bos-Coster result for 1 non trivial input (moneromooo-monero)
a1359ad4 Check inputs to addKeys are in range (moneromooo-monero)
fe0fa3b9 bulletproofs: reject x, y, z, or w[i] being zero (moneromooo-monero)
5ffb2ff9 v8: per byte fee, pad bulletproofs, fixed 11 ring size (moneromooo-monero)
869b3bf8 bulletproofs: a few fixes from the Kudelski review (moneromooo-monero)
c4291762 bulletproofs: reject points not in the main subgroup (moneromooo-monero)
15697177 bulletproofs: speed up a few multiplies using existing Hi cache (moneromooo-monero)
0b05a0fa Add Pippenger cache and limit Straus cache size (moneromooo-monero)
51eb3bdc add pippenger unit tests (moneromooo-monero)
b17b8db3 performance_tests: add stats and loop count multiplier options (moneromooo-monero)
7314d919 perf_timer: split timer class into a base one and a logging one (moneromooo-monero)
d126a02b performance_tests: add aggregated bulletproof tx verification (moneromooo-monero)
263431c4 Pippenger multiexp (moneromooo-monero)
1ed0ed4d multiexp: cut down on memory allocations (moneromooo-monero)
1b867e7f precalc the ge_p3 representation of H (moneromooo-monero)
ef56529f performance_tests: document the tested bulletproof layouts (moneromooo-monero)
30111780 unit_tests: a couple more bulletproof unit tests for gamma (moneromooo-monero)
c444b1b2 require canonical multi output bulletproof layout (moneromooo-monero)
7e67c52f Add a define for the max number of bulletproof multi-outputs (moneromooo-monero)
2a8fcb42 Bulletproof aggregated verification and tests (moneromooo-monero)
126196b0 multiexp: some speedups (moneromooo-monero)
71d67bda aligned: aligned memory alloc/realloc/free (moneromooo-monero)
cb9ecab1 performance_tests: add signature generation/verification (moneromooo-monero)
bacf0a1e bulletproofs: add aggregated verification (moneromooo-monero)
e895c3de make straus cached mode thread safe, and add tests for it (moneromooo-monero)
7f48bf05 multiexp: bos coster now works for just one point (moneromooo-monero)
9ce9f8ca bulletproofs: add multi output bulletproofs to rct (moneromooo-monero)
f34e2e20 performance_tests: add tx checking tests with more than 2 outputs (moneromooo-monero)
0793184b performance_tests: add a --verbose flag, and default to terse (moneromooo-monero)
939bc223 add Straus multiexp (moneromooo-monero)
9ff6e6a0 ringct: add bos coster multiexp (moneromooo-monero)
e9164bb3 bulletproofs: misc optimizations (moneromooo-monero)
112f32f0 performance_tests: add crypto ops (moneromooo-monero)
f5d7b993 performance_tests: add bulletproofs (moneromooo-monero)
8f4ce989 performance_tests: add RingCT MLSAG gen/ver tests (moneromooo-monero)
1aa10c43 performance_tests: add (Borromean) range proofs (moneromooo-monero)
aacfd6e3 bulletproofs: multi-output bulletproofs (moneromooo-monero)
cb1cc757 performance_tests: don't override log level to 0 (moneromooo-monero)
Diffstat (limited to 'src/ringct/multiexp.cc')
-rw-r--r-- | src/ringct/multiexp.cc | 665 |
1 files changed, 665 insertions, 0 deletions
diff --git a/src/ringct/multiexp.cc b/src/ringct/multiexp.cc new file mode 100644 index 000000000..21957b94c --- /dev/null +++ b/src/ringct/multiexp.cc @@ -0,0 +1,665 @@ +// Copyright (c) 2017, The Monero Project +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, are +// permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, this list +// of conditions and the following disclaimer in the documentation and/or other +// materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors may be +// used to endorse or promote products derived from this software without specific +// prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Adapted from Python code by Sarang Noether + +#include "misc_log_ex.h" +#include "common/perf_timer.h" +extern "C" +{ +#include "crypto/crypto-ops.h" +} +#include "common/aligned.h" +#include "rctOps.h" +#include "multiexp.h" + +#undef MONERO_DEFAULT_LOG_CATEGORY +#define MONERO_DEFAULT_LOG_CATEGORY "multiexp" + +//#define MULTIEXP_PERF(x) x +#define MULTIEXP_PERF(x) + +#define RAW_MEMORY_BLOCK +//#define ALTERNATE_LAYOUT +//#define TRACK_STRAUS_ZERO_IDENTITY + +// per points us for N/B points (B point bands) +// raw alt 128/192 4096/192 4096/4096 +// 0 0 52.6 71 71.2 +// 0 1 53.2 72.2 72.4 +// 1 0 52.7 67 67.1 +// 1 1 52.8 70.4 70.2 + +// Pippenger: +// 1 2 3 4 5 6 7 8 9 bestN +// 2 555 598 621 804 1038 1733 2486 5020 8304 1 +// 4 783 747 800 1006 1428 2132 3285 5185 9806 2 +// 8 1174 1071 1095 1286 1640 2398 3869 6378 12080 2 +// 16 2279 1874 1745 1739 2144 2831 4209 6964 12007 4 +// 32 3910 3706 2588 2477 2782 3467 4856 7489 12618 4 +// 64 7184 5429 4710 4368 4010 4672 6027 8559 13684 5 +// 128 14097 10574 8452 7297 6841 6718 8615 10580 15641 6 +// 256 27715 20800 16000 13550 11875 11400 11505 14090 18460 6 +// 512 55100 41250 31740 26570 22030 19830 20760 21380 25215 6 +// 1024 111520 79000 61080 49720 43080 38320 37600 35040 36750 8 +// 2048 219480 162680 122120 102080 83760 70360 66600 63920 66160 8 +// 4096 453320 323080 247240 210200 180040 150240 132440 114920 110560 9 + +// 2 4 8 16 32 64 128 256 512 1024 2048 4096 +// Bos Coster 858 994 1316 1949 3183 5512 9865 17830 33485 63160 124280 246320 +// Straus 226 341 548 980 1870 3538 7039 14490 29020 57200 118640 233640 +// Straus/cached 226 315 485 785 1514 2858 5753 11065 22970 45120 98880 194840 +// Pippenger 555 747 1071 1739 2477 4010 6718 11400 19830 35040 63920 110560 + +// Best/cached Straus Straus Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip +// Best/uncached Straus Straus Straus Straus Straus Straus Pip Pip Pip Pip Pip Pip + +namespace rct +{ + +static inline bool operator<(const rct::key &k0, const rct::key&k1) +{ + for (int n = 31; n >= 0; --n) + { + if (k0.bytes[n] < k1.bytes[n]) + return true; + if (k0.bytes[n] > k1.bytes[n]) + return false; + } + return false; +} + +static inline rct::key div2(const rct::key &k) +{ + rct::key res; + int carry = 0; + for (int n = 31; n >= 0; --n) + { + int new_carry = (k.bytes[n] & 1) << 7; + res.bytes[n] = k.bytes[n] / 2 + carry; + carry = new_carry; + } + return res; +} + +static inline rct::key pow2(size_t n) +{ + CHECK_AND_ASSERT_THROW_MES(n < 256, "Invalid pow2 argument"); + rct::key res = rct::zero(); + res[n >> 3] |= 1<<(n&7); + return res; +} + +static inline int test(const rct::key &k, size_t n) +{ + if (n >= 256) return 0; + return k[n >> 3] & (1 << (n & 7)); +} + +static inline void add(ge_p3 &p3, const ge_cached &other) +{ + ge_p1p1 p1; + ge_add(&p1, &p3, &other); + ge_p1p1_to_p3(&p3, &p1); +} + +static inline void add(ge_p3 &p3, const ge_p3 &other) +{ + ge_cached cached; + ge_p3_to_cached(&cached, &other); + add(p3, cached); +} + +rct::key bos_coster_heap_conv(std::vector<MultiexpData> data) +{ + MULTIEXP_PERF(PERF_TIMER_START_UNIT(bos_coster, 1000000)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000)); + size_t points = data.size(); + CHECK_AND_ASSERT_THROW_MES(points > 1, "Not enough points"); + std::vector<size_t> heap(points); + for (size_t n = 0; n < points; ++n) + heap[n] = n; + + auto Comp = [&](size_t e0, size_t e1) { return data[e0].scalar < data[e1].scalar; }; + std::make_heap(heap.begin(), heap.end(), Comp); + MULTIEXP_PERF(PERF_TIMER_STOP(setup)); + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(loop, 1000000)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(pop, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(pop)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(add, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(add)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(sub, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(sub)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(push, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(push)); + while (heap.size() > 1) + { + MULTIEXP_PERF(PERF_TIMER_RESUME(pop)); + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index1 = heap.back(); + heap.pop_back(); + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index2 = heap.back(); + heap.pop_back(); + MULTIEXP_PERF(PERF_TIMER_PAUSE(pop)); + + MULTIEXP_PERF(PERF_TIMER_RESUME(add)); + ge_cached cached; + ge_p3_to_cached(&cached, &data[index1].point); + ge_p1p1 p1; + ge_add(&p1, &data[index2].point, &cached); + ge_p1p1_to_p3(&data[index2].point, &p1); + MULTIEXP_PERF(PERF_TIMER_PAUSE(add)); + + MULTIEXP_PERF(PERF_TIMER_RESUME(sub)); + sc_sub(data[index1].scalar.bytes, data[index1].scalar.bytes, data[index2].scalar.bytes); + MULTIEXP_PERF(PERF_TIMER_PAUSE(sub)); + + MULTIEXP_PERF(PERF_TIMER_RESUME(push)); + if (!(data[index1].scalar == rct::zero())) + { + heap.push_back(index1); + std::push_heap(heap.begin(), heap.end(), Comp); + } + + heap.push_back(index2); + std::push_heap(heap.begin(), heap.end(), Comp); + MULTIEXP_PERF(PERF_TIMER_PAUSE(push)); + } + MULTIEXP_PERF(PERF_TIMER_STOP(push)); + MULTIEXP_PERF(PERF_TIMER_STOP(sub)); + MULTIEXP_PERF(PERF_TIMER_STOP(add)); + MULTIEXP_PERF(PERF_TIMER_STOP(pop)); + MULTIEXP_PERF(PERF_TIMER_STOP(loop)); + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(end, 1000000)); + //return rct::scalarmultKey(data[index1].point, data[index1].scalar); + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index1 = heap.back(); + heap.pop_back(); + ge_p2 p2; + ge_scalarmult(&p2, data[index1].scalar.bytes, &data[index1].point); + rct::key res; + ge_tobytes(res.bytes, &p2); + return res; +} + +rct::key bos_coster_heap_conv_robust(std::vector<MultiexpData> data) +{ + MULTIEXP_PERF(PERF_TIMER_START_UNIT(bos_coster, 1000000)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000)); + size_t points = data.size(); + CHECK_AND_ASSERT_THROW_MES(points > 0, "Not enough points"); + std::vector<size_t> heap; + heap.reserve(points); + for (size_t n = 0; n < points; ++n) + { + if (!(data[n].scalar == rct::zero()) && !ge_p3_is_point_at_infinity(&data[n].point)) + heap.push_back(n); + } + points = heap.size(); + if (points == 0) + return rct::identity(); + + auto Comp = [&](size_t e0, size_t e1) { return data[e0].scalar < data[e1].scalar; }; + std::make_heap(heap.begin(), heap.end(), Comp); + + if (points < 2) + { + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index1 = heap.back(); + ge_p2 p2; + ge_scalarmult(&p2, data[index1].scalar.bytes, &data[index1].point); + rct::key res; + ge_tobytes(res.bytes, &p2); + return res; + } + + MULTIEXP_PERF(PERF_TIMER_STOP(setup)); + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(loop, 1000000)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(pop, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(pop)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(div, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(div)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(add, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(add)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(sub, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(sub)); + MULTIEXP_PERF(PERF_TIMER_START_UNIT(push, 1000000)); MULTIEXP_PERF(PERF_TIMER_PAUSE(push)); + while (heap.size() > 1) + { + MULTIEXP_PERF(PERF_TIMER_RESUME(pop)); + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index1 = heap.back(); + heap.pop_back(); + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index2 = heap.back(); + heap.pop_back(); + MULTIEXP_PERF(PERF_TIMER_PAUSE(pop)); + + ge_cached cached; + ge_p1p1 p1; + ge_p2 p2; + + MULTIEXP_PERF(PERF_TIMER_RESUME(div)); + while (1) + { + rct::key s1_2 = div2(data[index1].scalar); + if (!(data[index2].scalar < s1_2)) + break; + if (data[index1].scalar.bytes[0] & 1) + { + data.resize(data.size()+1); + data.back().scalar = rct::identity(); + data.back().point = data[index1].point; + heap.push_back(data.size() - 1); + std::push_heap(heap.begin(), heap.end(), Comp); + } + data[index1].scalar = div2(data[index1].scalar); + ge_p3_to_p2(&p2, &data[index1].point); + ge_p2_dbl(&p1, &p2); + ge_p1p1_to_p3(&data[index1].point, &p1); + } + MULTIEXP_PERF(PERF_TIMER_PAUSE(div)); + + MULTIEXP_PERF(PERF_TIMER_RESUME(add)); + ge_p3_to_cached(&cached, &data[index1].point); + ge_add(&p1, &data[index2].point, &cached); + ge_p1p1_to_p3(&data[index2].point, &p1); + MULTIEXP_PERF(PERF_TIMER_PAUSE(add)); + + MULTIEXP_PERF(PERF_TIMER_RESUME(sub)); + sc_sub(data[index1].scalar.bytes, data[index1].scalar.bytes, data[index2].scalar.bytes); + MULTIEXP_PERF(PERF_TIMER_PAUSE(sub)); + + MULTIEXP_PERF(PERF_TIMER_RESUME(push)); + if (!(data[index1].scalar == rct::zero())) + { + heap.push_back(index1); + std::push_heap(heap.begin(), heap.end(), Comp); + } + + heap.push_back(index2); + std::push_heap(heap.begin(), heap.end(), Comp); + MULTIEXP_PERF(PERF_TIMER_PAUSE(push)); + } + MULTIEXP_PERF(PERF_TIMER_STOP(push)); + MULTIEXP_PERF(PERF_TIMER_STOP(sub)); + MULTIEXP_PERF(PERF_TIMER_STOP(add)); + MULTIEXP_PERF(PERF_TIMER_STOP(pop)); + MULTIEXP_PERF(PERF_TIMER_STOP(loop)); + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(end, 1000000)); + //return rct::scalarmultKey(data[index1].point, data[index1].scalar); + std::pop_heap(heap.begin(), heap.end(), Comp); + size_t index1 = heap.back(); + heap.pop_back(); + ge_p2 p2; + ge_scalarmult(&p2, data[index1].scalar.bytes, &data[index1].point); + rct::key res; + ge_tobytes(res.bytes, &p2); + return res; +} + +static constexpr unsigned int STRAUS_C = 4; + +struct straus_cached_data +{ +#ifdef RAW_MEMORY_BLOCK + size_t size; + ge_cached *multiples; + straus_cached_data(): size(0), multiples(NULL) {} + ~straus_cached_data() { aligned_free(multiples); } +#else + std::vector<std::vector<ge_cached>> multiples; +#endif +}; +#ifdef RAW_MEMORY_BLOCK +#ifdef ALTERNATE_LAYOUT +#define CACHE_OFFSET(cache,point,digit) cache->multiples[(point)*((1<<STRAUS_C)-1)+((digit)-1)] +#else +#define CACHE_OFFSET(cache,point,digit) cache->multiples[(point)+cache->size*((digit)-1)] +#endif +#else +#ifdef ALTERNATE_LAYOUT +#define CACHE_OFFSET(cache,point,digit) local_cache->multiples[j][digit-1] +#else +#define CACHE_OFFSET(cache,point,digit) local_cache->multiples[digit][j] +#endif +#endif + +std::shared_ptr<straus_cached_data> straus_init_cache(const std::vector<MultiexpData> &data, size_t N) +{ + MULTIEXP_PERF(PERF_TIMER_START_UNIT(multiples, 1000000)); + if (N == 0) + N = data.size(); + CHECK_AND_ASSERT_THROW_MES(N <= data.size(), "Bad cache base data"); + ge_cached cached; + ge_p1p1 p1; + ge_p3 p3; + std::shared_ptr<straus_cached_data> cache(new straus_cached_data()); + +#ifdef RAW_MEMORY_BLOCK + const size_t offset = cache->size; + cache->multiples = (ge_cached*)aligned_realloc(cache->multiples, sizeof(ge_cached) * ((1<<STRAUS_C)-1) * std::max(offset, N), 4096); + CHECK_AND_ASSERT_THROW_MES(cache->multiples, "Out of memory"); + cache->size = N; + for (size_t j=offset;j<N;++j) + { + ge_p3_to_cached(&CACHE_OFFSET(cache, j, 1), &data[j].point); + for (size_t i=2;i<1<<STRAUS_C;++i) + { + ge_add(&p1, &data[j].point, &CACHE_OFFSET(cache, j, i-1)); + ge_p1p1_to_p3(&p3, &p1); + ge_p3_to_cached(&CACHE_OFFSET(cache, j, i), &p3); + } + } +#else +#ifdef ALTERNATE_LAYOUT + const size_t offset = cache->multiples.size(); + cache->multiples.resize(std::max(offset, N)); + for (size_t i = offset; i < N; ++i) + { + cache->multiples[i].resize((1<<STRAUS_C)-1); + ge_p3_to_cached(&cache->multiples[i][0], &data[i].point); + for (size_t j=2;j<1<<STRAUS_C;++j) + { + ge_add(&p1, &data[i].point, &cache->multiples[i][j-2]); + ge_p1p1_to_p3(&p3, &p1); + ge_p3_to_cached(&cache->multiples[i][j-1], &p3); + } + } +#else + cache->multiples.resize(1<<STRAUS_C); + size_t offset = cache->multiples[1].size(); + cache->multiples[1].resize(std::max(offset, N)); + for (size_t i = offset; i < N; ++i) + ge_p3_to_cached(&cache->multiples[1][i], &data[i].point); + for (size_t i=2;i<1<<STRAUS_C;++i) + cache->multiples[i].resize(std::max(offset, N)); + for (size_t j=offset;j<N;++j) + { + for (size_t i=2;i<1<<STRAUS_C;++i) + { + ge_add(&p1, &data[j].point, &cache->multiples[i-1][j]); + ge_p1p1_to_p3(&p3, &p1); + ge_p3_to_cached(&cache->multiples[i][j], &p3); + } + } +#endif +#endif + MULTIEXP_PERF(PERF_TIMER_STOP(multiples)); + + return cache; +} + +size_t straus_get_cache_size(const std::shared_ptr<straus_cached_data> &cache) +{ + size_t sz = 0; +#ifdef RAW_MEMORY_BLOCK + sz += cache->size * sizeof(ge_cached) * ((1<<STRAUS_C)-1); +#else + for (const auto &e0: cache->multiples) + sz += e0.size() * sizeof(ge_cached); +#endif + return sz; +} + +rct::key straus(const std::vector<MultiexpData> &data, const std::shared_ptr<straus_cached_data> &cache, size_t STEP) +{ + CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache->size >= data.size(), "Cache is too small"); + MULTIEXP_PERF(PERF_TIMER_UNIT(straus, 1000000)); + bool HiGi = cache != NULL; + STEP = STEP ? STEP : 192; + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(setup, 1000000)); + static constexpr unsigned int mask = (1<<STRAUS_C)-1; + std::shared_ptr<straus_cached_data> local_cache = cache == NULL ? straus_init_cache(data) : cache; + ge_cached cached; + ge_p1p1 p1; + ge_p3 p3; + +#ifdef TRACK_STRAUS_ZERO_IDENTITY + MULTIEXP_PERF(PERF_TIMER_START_UNIT(skip, 1000000)); + std::vector<uint8_t> skip(data.size()); + for (size_t i = 0; i < data.size(); ++i) + skip[i] = data[i].scalar == rct::zero() || ge_p3_is_point_at_infinity(&data[i].point); + MULTIEXP_PERF(PERF_TIMER_STOP(skip)); +#endif + + MULTIEXP_PERF(PERF_TIMER_START_UNIT(digits, 1000000)); + std::unique_ptr<uint8_t[]> digits{new uint8_t[256 * data.size()]}; + for (size_t j = 0; j < data.size(); ++j) + { + unsigned char bytes33[33]; + memcpy(bytes33, data[j].scalar.bytes, 32); + bytes33[32] = 0; + const unsigned char *bytes = bytes33; +#if 1 + static_assert(STRAUS_C == 4, "optimized version needs STRAUS_C == 4"); + unsigned int i; + for (i = 0; i < 256; i += 8, bytes++) + { + digits[j*256+i] = bytes[0] & 0xf; + digits[j*256+i+1] = (bytes[0] >> 1) & 0xf; + digits[j*256+i+2] = (bytes[0] >> 2) & 0xf; + digits[j*256+i+3] = (bytes[0] >> 3) & 0xf; + digits[j*256+i+4] = ((bytes[0] >> 4) | (bytes[1]<<4)) & 0xf; + digits[j*256+i+5] = ((bytes[0] >> 5) | (bytes[1]<<3)) & 0xf; + digits[j*256+i+6] = ((bytes[0] >> 6) | (bytes[1]<<2)) & 0xf; + digits[j*256+i+7] = ((bytes[0] >> 7) | (bytes[1]<<1)) & 0xf; + } +#elif 1 + for (size_t i = 0; i < 256; ++i) + digits[j*256+i] = ((bytes[i>>3] | (bytes[(i>>3)+1]<<8)) >> (i&7)) & mask; +#else + rct::key shifted = data[j].scalar; + for (size_t i = 0; i < 256; ++i) + { + digits[j*256+i] = shifted.bytes[0] & 0xf; + shifted = div2(shifted, (256-i)>>3); + } +#endif + } + MULTIEXP_PERF(PERF_TIMER_STOP(digits)); + + rct::key maxscalar = rct::zero(); + for (size_t i = 0; i < data.size(); ++i) + if (maxscalar < data[i].scalar) + maxscalar = data[i].scalar; + size_t start_i = 0; + while (start_i < 256 && !(maxscalar < pow2(start_i))) + start_i += STRAUS_C; + MULTIEXP_PERF(PERF_TIMER_STOP(setup)); + + ge_p3 res_p3 = ge_p3_identity; + + for (size_t start_offset = 0; start_offset < data.size(); start_offset += STEP) + { + const size_t num_points = std::min(data.size() - start_offset, STEP); + + ge_p3 band_p3 = ge_p3_identity; + size_t i = start_i; + if (!(i < STRAUS_C)) + goto skipfirst; + while (!(i < STRAUS_C)) + { + ge_p2 p2; + ge_p3_to_p2(&p2, &band_p3); + for (size_t j = 0; j < STRAUS_C; ++j) + { + ge_p2_dbl(&p1, &p2); + if (j == STRAUS_C - 1) + ge_p1p1_to_p3(&band_p3, &p1); + else + ge_p1p1_to_p2(&p2, &p1); + } +skipfirst: + i -= STRAUS_C; + for (size_t j = start_offset; j < start_offset + num_points; ++j) + { +#ifdef TRACK_STRAUS_ZERO_IDENTITY + if (skip[j]) + continue; +#endif + const uint8_t digit = digits[j*256+i]; + if (digit) + { + ge_add(&p1, &band_p3, &CACHE_OFFSET(local_cache, j, digit)); + ge_p1p1_to_p3(&band_p3, &p1); + } + } + } + + ge_p3_to_cached(&cached, &band_p3); + ge_add(&p1, &res_p3, &cached); + ge_p1p1_to_p3(&res_p3, &p1); + } + + rct::key res; + ge_p3_tobytes(res.bytes, &res_p3); + return res; +} + +size_t get_pippenger_c(size_t N) +{ +// uncached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9 +// cached: 2:1, 4:2, 8:2, 16:3, 32:4, 64:4, 128:5, 256:6, 512:7, 1024:7, 2048:8, 4096:9 + if (N <= 2) return 1; + if (N <= 8) return 2; + if (N <= 16) return 3; + if (N <= 64) return 4; + if (N <= 128) return 5; + if (N <= 256) return 6; + if (N <= 1024) return 7; + if (N <= 2048) return 8; + return 9; +} + +struct pippenger_cached_data +{ + size_t size; + ge_cached *cached; + pippenger_cached_data(): size(0), cached(NULL) {} + ~pippenger_cached_data() { aligned_free(cached); } +}; + +std::shared_ptr<pippenger_cached_data> pippenger_init_cache(const std::vector<MultiexpData> &data, size_t N) +{ + MULTIEXP_PERF(PERF_TIMER_START_UNIT(pippenger_init_cache, 1000000)); + if (N == 0) + N = data.size(); + CHECK_AND_ASSERT_THROW_MES(N <= data.size(), "Bad cache base data"); + ge_cached cached; + std::shared_ptr<pippenger_cached_data> cache(new pippenger_cached_data()); + + cache->size = N; + cache->cached = (ge_cached*)aligned_realloc(cache->cached, N * sizeof(ge_cached), 4096); + CHECK_AND_ASSERT_THROW_MES(cache->cached, "Out of memory"); + for (size_t i = 0; i < N; ++i) + ge_p3_to_cached(&cache->cached[i], &data[i].point); + + MULTIEXP_PERF(PERF_TIMER_STOP(pippenger_init_cache)); + return cache; +} + +size_t pippenger_get_cache_size(const std::shared_ptr<pippenger_cached_data> &cache) +{ + return cache->size * sizeof(*cache->cached); +} + +rct::key pippenger(const std::vector<MultiexpData> &data, const std::shared_ptr<pippenger_cached_data> &cache, size_t c) +{ + CHECK_AND_ASSERT_THROW_MES(cache == NULL || cache->size >= data.size(), "Cache is too small"); + if (c == 0) + c = get_pippenger_c(data.size()); + CHECK_AND_ASSERT_THROW_MES(c <= 9, "c is too large"); + + ge_p3 result = ge_p3_identity; + std::unique_ptr<ge_p3[]> buckets{new ge_p3[1<<c]}; + std::shared_ptr<pippenger_cached_data> local_cache = cache == NULL ? pippenger_init_cache(data) : cache; + + rct::key maxscalar = rct::zero(); + for (size_t i = 0; i < data.size(); ++i) + { + if (maxscalar < data[i].scalar) + maxscalar = data[i].scalar; + } + size_t groups = 0; + while (groups < 256 && !(maxscalar < pow2(groups))) + ++groups; + groups = (groups + c - 1) / c; + + for (size_t k = groups; k-- > 0; ) + { + if (!ge_p3_is_point_at_infinity(&result)) + { + ge_p2 p2; + ge_p3_to_p2(&p2, &result); + for (size_t i = 0; i < c; ++i) + { + ge_p1p1 p1; + ge_p2_dbl(&p1, &p2); + if (i == c - 1) + ge_p1p1_to_p3(&result, &p1); + else + ge_p1p1_to_p2(&p2, &p1); + } + } + for (size_t i = 0; i < (1u<<c); ++i) + buckets[i] = ge_p3_identity; + + // partition scalars into buckets + for (size_t i = 0; i < data.size(); ++i) + { + unsigned int bucket = 0; + for (size_t j = 0; j < c; ++j) + if (test(data[i].scalar, k*c+j)) + bucket |= 1<<j; + if (bucket == 0) + continue; + CHECK_AND_ASSERT_THROW_MES(bucket < (1u<<c), "bucket overflow"); + if (!ge_p3_is_point_at_infinity(&buckets[bucket])) + { + add(buckets[bucket], local_cache->cached[i]); + } + else + buckets[bucket] = data[i].point; + } + + // sum the buckets + ge_p3 pail = ge_p3_identity; + for (size_t i = (1<<c)-1; i > 0; --i) + { + if (!ge_p3_is_point_at_infinity(&buckets[i])) + add(pail, buckets[i]); + if (!ge_p3_is_point_at_infinity(&pail)) + add(result, pail); + } + } + + rct::key res; + ge_p3_tobytes(res.bytes, &result); + return res; +} + +} |