aboutsummaryrefslogtreecommitdiff
path: root/src/crypto/slow-hash.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/slow-hash.c')
-rw-r--r--src/crypto/slow-hash.c82
1 files changed, 44 insertions, 38 deletions
diff --git a/src/crypto/slow-hash.c b/src/crypto/slow-hash.c
index 6bdc1b28c..7f36c9dc3 100644
--- a/src/crypto/slow-hash.c
+++ b/src/crypto/slow-hash.c
@@ -1,4 +1,4 @@
-// Copyright (c) 2014-2018, The Monero Project
+// Copyright (c) 2014-2019, The Monero Project
//
// All rights reserved.
//
@@ -76,7 +76,7 @@ static inline int use_v4_jit(void)
const char *env = getenv("MONERO_USE_CNV4_JIT");
if (!env) {
- use_v4_jit_flag = 0;
+ use_v4_jit_flag = 1;
}
else if (!strcmp(env, "0") || !strcmp(env, "no")) {
use_v4_jit_flag = 0;
@@ -274,10 +274,10 @@ static inline int use_v4_jit(void)
#define VARIANT2_2() \
do if (variant == 2 || variant == 3) \
{ \
- *U64(hp_state + (j ^ 0x10)) ^= SWAP64LE(hi); \
- *(U64(hp_state + (j ^ 0x10)) + 1) ^= SWAP64LE(lo); \
- hi ^= SWAP64LE(*U64(hp_state + (j ^ 0x20))); \
- lo ^= SWAP64LE(*(U64(hp_state + (j ^ 0x20)) + 1)); \
+ *U64(local_hp_state + (j ^ 0x10)) ^= SWAP64LE(hi); \
+ *(U64(local_hp_state + (j ^ 0x10)) + 1) ^= SWAP64LE(lo); \
+ hi ^= SWAP64LE(*U64(local_hp_state + (j ^ 0x20))); \
+ lo ^= SWAP64LE(*(U64(local_hp_state + (j ^ 0x20)) + 1)); \
} while (0)
#define V4_REG_LOAD(dst, src) \
@@ -405,7 +405,7 @@ static inline int use_v4_jit(void)
#define pre_aes() \
j = state_index(a); \
- _c = _mm_load_si128(R128(&hp_state[j])); \
+ _c = _mm_load_si128(R128(&local_hp_state[j])); \
_a = _mm_load_si128(R128(a)); \
/*
@@ -418,20 +418,20 @@ static inline int use_v4_jit(void)
* This code is based upon an optimized implementation by dga.
*/
#define post_aes() \
- VARIANT2_SHUFFLE_ADD_SSE2(hp_state, j); \
+ VARIANT2_SHUFFLE_ADD_SSE2(local_hp_state, j); \
_mm_store_si128(R128(c), _c); \
- _mm_store_si128(R128(&hp_state[j]), _mm_xor_si128(_b, _c)); \
- VARIANT1_1(&hp_state[j]); \
+ _mm_store_si128(R128(&local_hp_state[j]), _mm_xor_si128(_b, _c)); \
+ VARIANT1_1(&local_hp_state[j]); \
j = state_index(c); \
- p = U64(&hp_state[j]); \
+ p = U64(&local_hp_state[j]); \
b[0] = p[0]; b[1] = p[1]; \
VARIANT2_INTEGER_MATH_SSE2(b, c); \
VARIANT4_RANDOM_MATH(a, b, r, &_b, &_b1); \
__mul(); \
VARIANT2_2(); \
- VARIANT2_SHUFFLE_ADD_SSE2(hp_state, j); \
+ VARIANT2_SHUFFLE_ADD_SSE2(local_hp_state, j); \
a[0] += hi; a[1] += lo; \
- p = U64(&hp_state[j]); \
+ p = U64(&local_hp_state[j]); \
p[0] = a[0]; p[1] = a[1]; \
a[0] ^= b[0]; a[1] ^= b[1]; \
VARIANT1_2(p + 1); \
@@ -756,10 +756,10 @@ void slow_hash_allocate_state(void)
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
defined(__DragonFly__) || defined(__NetBSD__)
hp_state = mmap(0, MEMORY, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, 0, 0);
+ MAP_PRIVATE | MAP_ANON, -1, 0);
#else
hp_state = mmap(0, MEMORY, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, 0, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
#endif
if(hp_state == MAP_FAILED)
hp_state = NULL;
@@ -778,11 +778,16 @@ void slow_hash_allocate_state(void)
#else
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
defined(__DragonFly__) || defined(__NetBSD__)
- hp_jitfunc_memory = mmap(0, 4096 + 4095, PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_PRIVATE | MAP_ANON, 0, 0);
+#ifdef __NetBSD__
+#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
+#else
+#define RESERVED_FLAGS 0
+#endif
+ hp_jitfunc_memory = mmap(0, 4096 + 4096, PROT_READ | PROT_WRITE | RESERVED_FLAGS,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
#else
- hp_jitfunc_memory = mmap(0, 4096 + 4095, PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ hp_jitfunc_memory = mmap(0, 4096 + 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
if(hp_jitfunc_memory == MAP_FAILED)
hp_jitfunc_memory = NULL;
@@ -794,9 +799,6 @@ void slow_hash_allocate_state(void)
hp_jitfunc_memory = malloc(4096 + 4095);
}
hp_jitfunc = (v4_random_math_JIT_func)((size_t)(hp_jitfunc_memory + 4095) & ~4095);
-#if !(defined(_MSC_VER) || defined(__MINGW32__))
- mprotect(hp_jitfunc, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
-#endif
}
/**
@@ -893,6 +895,10 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
if(hp_state == NULL)
slow_hash_allocate_state();
+ // locals to avoid constant TLS dereferencing
+ uint8_t *local_hp_state = hp_state;
+ v4_random_math_JIT_func local_hp_jitfunc = hp_jitfunc;
+
/* CryptoNight Step 1: Use Keccak1600 to initialize the 'state' (and 'text') buffers from the data. */
if (prehashed) {
memcpy(&state.hs, data, length);
@@ -915,7 +921,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
aes_pseudo_round(text, text, expandedKey, INIT_SIZE_BLK);
- memcpy(&hp_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
+ memcpy(&local_hp_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
}
}
else
@@ -927,7 +933,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
for(j = 0; j < INIT_SIZE_BLK; j++)
aesb_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], aes_ctx->key->exp_data);
- memcpy(&hp_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
+ memcpy(&local_hp_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
}
}
@@ -975,7 +981,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
// add the xor to the pseudo round
- aes_pseudo_round_xor(text, text, expandedKey, &hp_state[i * INIT_SIZE_BYTE], INIT_SIZE_BLK);
+ aes_pseudo_round_xor(text, text, expandedKey, &local_hp_state[i * INIT_SIZE_BYTE], INIT_SIZE_BLK);
}
}
else
@@ -985,7 +991,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
{
for(j = 0; j < INIT_SIZE_BLK; j++)
{
- xor_blocks(&text[j * AES_BLOCK_SIZE], &hp_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
+ xor_blocks(&text[j * AES_BLOCK_SIZE], &local_hp_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
aesb_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], aes_ctx->key->exp_data);
}
}
@@ -1065,24 +1071,24 @@ union cn_slow_hash_state
#define pre_aes() \
j = state_index(a); \
- _c = vld1q_u8(&hp_state[j]); \
+ _c = vld1q_u8(&local_hp_state[j]); \
_a = vld1q_u8((const uint8_t *)a); \
#define post_aes() \
- VARIANT2_SHUFFLE_ADD_NEON(hp_state, j); \
+ VARIANT2_SHUFFLE_ADD_NEON(local_hp_state, j); \
vst1q_u8((uint8_t *)c, _c); \
- vst1q_u8(&hp_state[j], veorq_u8(_b, _c)); \
- VARIANT1_1(&hp_state[j]); \
+ vst1q_u8(&local_hp_state[j], veorq_u8(_b, _c)); \
+ VARIANT1_1(&local_hp_state[j]); \
j = state_index(c); \
- p = U64(&hp_state[j]); \
+ p = U64(&local_hp_state[j]); \
b[0] = p[0]; b[1] = p[1]; \
VARIANT2_PORTABLE_INTEGER_MATH(b, c); \
VARIANT4_RANDOM_MATH(a, b, r, &_b, &_b1); \
__mul(); \
VARIANT2_2(); \
- VARIANT2_SHUFFLE_ADD_NEON(hp_state, j); \
+ VARIANT2_SHUFFLE_ADD_NEON(local_hp_state, j); \
a[0] += hi; a[1] += lo; \
- p = U64(&hp_state[j]); \
+ p = U64(&local_hp_state[j]); \
p[0] = a[0]; p[1] = a[1]; \
a[0] ^= b[0]; a[1] ^= b[1]; \
VARIANT1_2(p + 1); \
@@ -1245,9 +1251,9 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
RDATA_ALIGN16 uint8_t expandedKey[240];
#ifndef FORCE_USE_HEAP
- RDATA_ALIGN16 uint8_t hp_state[MEMORY];
+ RDATA_ALIGN16 uint8_t local_hp_state[MEMORY];
#else
- uint8_t *hp_state = (uint8_t *)aligned_malloc(MEMORY,16);
+ uint8_t *local_hp_state = (uint8_t *)aligned_malloc(MEMORY,16);
#endif
uint8_t text[INIT_SIZE_BYTE];
@@ -1287,7 +1293,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
aes_pseudo_round(text, text, expandedKey, INIT_SIZE_BLK);
- memcpy(&hp_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
+ memcpy(&local_hp_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
}
U64(a)[0] = U64(&state.k[0])[0] ^ U64(&state.k[32])[0];
@@ -1322,7 +1328,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
// add the xor to the pseudo round
- aes_pseudo_round_xor(text, text, expandedKey, &hp_state[i * INIT_SIZE_BYTE], INIT_SIZE_BLK);
+ aes_pseudo_round_xor(text, text, expandedKey, &local_hp_state[i * INIT_SIZE_BYTE], INIT_SIZE_BLK);
}
/* CryptoNight Step 5: Apply Keccak to the state again, and then
@@ -1337,7 +1343,7 @@ void cn_slow_hash(const void *data, size_t length, char *hash, int variant, int
extra_hashes[state.hs.b[0] & 3](&state, 200, hash);
#ifdef FORCE_USE_HEAP
- aligned_free(hp_state);
+ aligned_free(local_hp_state);
#endif
}
#else /* aarch64 && crypto */