diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2014-08-03 21:08:12 +0300 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2014-08-03 21:08:12 +0300 |
commit | a9477d1e0c6fd0e47e637d051e7b9e2a5d9af517 (patch) | |
tree | 2775facf874762fff0d2f2fafacbbe2b70f07327 /src/liblzma | |
parent | liblzma: SHA-256: Remove the GCC #pragma that became unneeded. (diff) | |
download | xz-a9477d1e0c6fd0e47e637d051e7b9e2a5d9af517.tar.xz |
liblzma: SHA-256: Optimize the way rotations are done.
This looks weird because the rotations become sequential,
but it helps quite a bit on both 32-bit and 64-bit x86:
- It requires fewer instructions on two-operand
instruction sets like x86.
- It requires one register less which matters especially
on 32-bit x86.
I hope this doesn't hurt other archs.
I didn't invent this idea myself, but I don't remember where
I saw it first.
Diffstat (limited to 'src/liblzma')
-rw-r--r-- | src/liblzma/check/sha256.c | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/src/liblzma/check/sha256.c b/src/liblzma/check/sha256.c index 6e2f65f1..e0e2f10b 100644 --- a/src/liblzma/check/sha256.c +++ b/src/liblzma/check/sha256.c @@ -23,8 +23,13 @@ #include "check.h" -// At least on x86, GCC is able to optimize this to a rotate instruction. -#define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount))) +// Rotate a uint32_t. GCC can optimize this to a rotate instruction +// at least on x86. +static inline uint32_t +rotr_32(uint32_t num, unsigned amount) +{ + return (num >> amount) | (num << (32 - amount)); +} #define blk0(i) (W[i] = conv32be(data[i])) #define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \ @@ -49,10 +54,10 @@ #define R0(i) R(i, 0, blk0(i)) #define R2(i) R(i, j, blk2(i)) -#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22)) -#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25)) -#define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3)) -#define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10)) +#define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2) +#define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6) +#define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3)) +#define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10)) static const uint32_t SHA256_K[64] = { |