aboutsummaryrefslogtreecommitdiff
path: root/src/crypto/slow-hash.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/slow-hash.c')
-rw-r--r--src/crypto/slow-hash.c60
1 files changed, 26 insertions, 34 deletions
diff --git a/src/crypto/slow-hash.c b/src/crypto/slow-hash.c
index 43b9619f3..6afa28934 100644
--- a/src/crypto/slow-hash.c
+++ b/src/crypto/slow-hash.c
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2016, The Monero Project
+// Copyright (c) 2014-2017, The Monero Project
//
// All rights reserved.
//
@@ -44,6 +44,9 @@
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
+extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
+extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
+
#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_WIN64))
// Optimised code below, uses x86-specific intrinsics, SSE2, AES-NI
// Fall back to more portable code is down at the bottom
@@ -138,9 +141,6 @@
#define THREADV __thread
#endif
-extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
-extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
-
#pragma pack(push, 1)
union cn_slow_hash_state
{
@@ -494,7 +494,7 @@ void slow_hash_free_state(void)
* buffer of pseudorandom data by hashing the supplied data. It then uses this
* random data to fill a large 2MB buffer with pseudorandom data by iteratively
* encrypting it using 10 rounds of AES per entry. After this initialization,
- * it executes 500,000 rounds of mixing through the random 2MB buffer using
+ * it executes 524,288 rounds of mixing through the random 2MB buffer using
* AES (typically provided in hardware on modern CPUs) and a 64 bit multiply.
* Finally, it re-mixes this large buffer back into
* the 200 byte "text" buffer, and then hashes this buffer using one of four
@@ -530,7 +530,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash)
size_t i, j;
uint64_t *p = NULL;
- oaes_ctx *aes_ctx;
+ oaes_ctx *aes_ctx = NULL;
int useAes = !force_software_aes() && check_aes_hw();
static void (*const extra_hashes[4])(const void *, size_t, char *) =
@@ -578,8 +578,8 @@ void cn_slow_hash(const void *data, size_t length, char *hash)
U64(b)[0] = U64(&state.k[16])[0] ^ U64(&state.k[48])[0];
U64(b)[1] = U64(&state.k[16])[1] ^ U64(&state.k[48])[1];
- /* CryptoNight Step 3: Bounce randomly 1 million times through the mixing buffer,
- * using 500,000 iterations of the following mixing function. Each execution
+ /* CryptoNight Step 3: Bounce randomly 1,048,576 times (1<<20) through the mixing buffer,
+ * using 524,288 iterations of the following mixing function. Each execution
* performs two reads and writes from the mixing buffer.
*/
@@ -722,32 +722,24 @@ union cn_slow_hash_state
* key schedule. Don't try to use this for vanilla AES.
*/
static void aes_expand_key(const uint8_t *key, uint8_t *expandedKey) {
-__asm__("mov x2, %1\n\t" : : "r"(key), "r"(expandedKey));
+static const int rcon[] = {
+ 0x01,0x01,0x01,0x01,
+ 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d, // rotate-n-splat
+ 0x1b,0x1b,0x1b,0x1b };
__asm__(
-" adr x3,Lrcon\n"
-"\n"
" eor v0.16b,v0.16b,v0.16b\n"
-" ld1 {v3.16b},[x0],#16\n"
-" ld1 {v1.4s,v2.4s},[x3],#32\n"
-" b L256\n"
-".align 5\n"
-"Lrcon:\n"
-".long 0x01,0x01,0x01,0x01\n"
-".long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat\n"
-".long 0x1b,0x1b,0x1b,0x1b\n"
-"\n"
-".align 4\n"
-"L256:\n"
-" ld1 {v4.16b},[x0]\n"
-" mov w1,#5\n"
-" st1 {v3.4s},[x2],#16\n"
+" ld1 {v3.16b},[%0],#16\n"
+" ld1 {v1.4s,v2.4s},[%2],#32\n"
+" ld1 {v4.16b},[%0]\n"
+" mov w2,#5\n"
+" st1 {v3.4s},[%1],#16\n"
"\n"
-"Loop256:\n"
+"1:\n"
" tbl v6.16b,{v4.16b},v2.16b\n"
" ext v5.16b,v0.16b,v3.16b,#12\n"
-" st1 {v4.4s},[x2],#16\n"
+" st1 {v4.4s},[%1],#16\n"
" aese v6.16b,v0.16b\n"
-" subs w1,w1,#1\n"
+" subs w2,w2,#1\n"
"\n"
" eor v3.16b,v3.16b,v5.16b\n"
" ext v5.16b,v0.16b,v5.16b,#12\n"
@@ -757,8 +749,8 @@ __asm__(
" eor v3.16b,v3.16b,v5.16b\n"
" shl v1.16b,v1.16b,#1\n"
" eor v3.16b,v3.16b,v6.16b\n"
-" st1 {v3.4s},[x2],#16\n"
-" b.eq Ldone\n"
+" st1 {v3.4s},[%1],#16\n"
+" b.eq 2f\n"
"\n"
" dup v6.4s,v3.s[3] // just splat\n"
" ext v5.16b,v0.16b,v4.16b,#12\n"
@@ -771,9 +763,9 @@ __asm__(
" eor v4.16b,v4.16b,v5.16b\n"
"\n"
" eor v4.16b,v4.16b,v6.16b\n"
-" b Loop256\n"
+" b 1b\n"
"\n"
-"Ldone:\n");
+"2:\n" : : "r"(key), "r"(expandedKey), "r"(rcon));
}
/* An ordinary AES round is a sequence of SubBytes, ShiftRows, MixColumns, AddRoundKey. There
@@ -895,8 +887,8 @@ void cn_slow_hash(const void *data, size_t length, char *hash)
U64(b)[0] = U64(&state.k[16])[0] ^ U64(&state.k[48])[0];
U64(b)[1] = U64(&state.k[16])[1] ^ U64(&state.k[48])[1];
- /* CryptoNight Step 3: Bounce randomly 1 million times through the mixing buffer,
- * using 500,000 iterations of the following mixing function. Each execution
+ /* CryptoNight Step 3: Bounce randomly 1,048,576 times (1<<20) through the mixing buffer,
+ * using 524,288 iterations of the following mixing function. Each execution
* performs two reads and writes from the mixing buffer.
*/