aboutsummaryrefslogtreecommitdiff
path: root/src/crypto/slow-hash.c
diff options
context:
space:
mode:
authorRiccardo Spagni <ric@spagni.net>2017-06-18 17:32:03 +0200
committerRiccardo Spagni <ric@spagni.net>2017-06-18 17:32:03 +0200
commitf973a2f81a0f6a9c14750a7d97d0e3d742716265 (patch)
treecbe07a0f32c37f4e17375251f6cdfefaee9c1b79 /src/crypto/slow-hash.c
parentMerge pull request #2076 (diff)
parentFix #1991 asm multiply again (diff)
downloadmonero-f973a2f81a0f6a9c14750a7d97d0e3d742716265.tar.xz
Merge pull request #2078
1dd52415 Fix #1991 asm multiply again (Howard Chu)
Diffstat (limited to 'src/crypto/slow-hash.c')
-rw-r--r--src/crypto/slow-hash.c29
1 files changed, 13 insertions, 16 deletions
diff --git a/src/crypto/slow-hash.c b/src/crypto/slow-hash.c
index 6afa28934..b92b6e6c3 100644
--- a/src/crypto/slow-hash.c
+++ b/src/crypto/slow-hash.c
@@ -979,34 +979,31 @@ STATIC void cn_mul128(const uint64_t *a, const uint64_t *b, uint64_t *r)
r[1] = lo;
}
#else /* ARM32 */
-/* Can work as inline, but actually runs slower. Keep it separate */
#define mul(a, b, c) cn_mul128((const uint32_t *)a, (const uint32_t *)b, (uint32_t *)c)
-void cn_mul128(const uint32_t *aa, const uint32_t *bb, uint32_t *r)
+STATIC void cn_mul128(const uint32_t *aa, const uint32_t *bb, uint32_t *r)
{
- uint32_t t0, t1;
+ uint32_t t0, t1, t2=0, t3=0;
__asm__ __volatile__(
"umull %[t0], %[t1], %[a], %[b]\n\t"
- "str %[t0], [%[r], #8]\n\t"
+ "str %[t0], %[ll]\n\t"
// accumulating with 0 can never overflow/carry
- "mov %[t0], #0\n\t"
+ "eor %[t0], %[t0]\n\t"
"umlal %[t1], %[t0], %[a], %[B]\n\t"
- "mov %[a], #0\n\t"
- "umlal %[t1], %[a], %[A], %[b]\n\t"
- "str %[t1], [%[r], #12]\n\t"
+ "umlal %[t1], %[t2], %[A], %[b]\n\t"
+ "str %[t1], %[lh]\n\t"
- "mov %[b], #0\n\t"
- "umlal %[t0], %[b], %[A], %[B]\n\t"
+ "umlal %[t0], %[t3], %[A], %[B]\n\t"
// final add may have a carry
- "adds %[t0], %[t0], %[a]\n\t"
- "adc %[t1], %[b], #0\n\t"
+ "adds %[t0], %[t0], %[t2]\n\t"
+ "adc %[t1], %[t3], #0\n\t"
- "str %[t0], [%[r]]\n\t"
- "str %[t1], [%[r], #4]\n\t"
- : [t0]"=&r"(t0), [t1]"=&r"(t1), "=m"(r[0]), "=m"(r[1]), "=m"(r[2]), "=m"(r[3])
- : [A]"r"(aa[1]), [a]"r"(aa[0]), [B]"r"(bb[1]), [b]"r"(bb[0]), [r]"r"(r)
+ "str %[t0], %[hl]\n\t"
+ "str %[t1], %[hh]\n\t"
+ : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"+r"(t2), [t3]"+r"(t3), [hl]"=m"(r[0]), [hh]"=m"(r[1]), [ll]"=m"(r[2]), [lh]"=m"(r[3])
+ : [A]"r"(aa[1]), [a]"r"(aa[0]), [B]"r"(bb[1]), [b]"r"(bb[0])
: "cc");
}
#endif /* !aarch64 */