aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/check/crc64_table.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2022-11-14 21:34:57 +0200
committerLasse Collin <lasse.collin@tukaani.org>2022-11-14 23:05:46 +0200
commitf644473a211394447824ea00518d0a214ff3f7f2 (patch)
tree8fc19fa7f3811b090f3f73398ce8bd0708d10a53 /src/liblzma/check/crc64_table.c
parentTranslations: Update the Swedish translation one more time. (diff)
downloadxz-f644473a211394447824ea00518d0a214ff3f7f2.tar.xz
liblzma: Add fast CRC64 for 32/64-bit x86 using SSSE3 + SSE4.1 + CLMUL.
It also works on E2K as it supports these intrinsics. On x86-64 runtime detection is used so the code keeps working on older processors too. A CLMUL-only build can be done by using -msse4.1 -mpclmul in CFLAGS and this will reduce the library size since the generic implementation and its 8 KiB lookup table will be omitted. On 32-bit x86 this isn't used by default for now because by default on 32-bit x86 the separate assembly file crc64_x86.S is used. If --disable-assembler is used then this new CLMUL code is used the same way as on 64-bit x86. However, a CLMUL-only build (-msse4.1 -mpclmul) won't omit the 8 KiB lookup table on 32-bit x86 due to a currently-missing check for disabled assembler usage. The configure.ac check should be such that the code won't be built if something in the toolchain doesn't support it but --disable-clmul-crc option can be used to unconditionally disable this feature. CLMUL speeds up decompression of files that have compressed very well (assuming CRC64 is used as a check type). It is know that the CLMUL code is significantly slower than the generic code for tiny inputs (especially 1-8 bytes but up to 16 bytes). If that is a real-world problem then there is already a commented-out variant that uses the generic version for small inputs. Thanks to Ilya Kurdyukov for the original patch which was derived from a white paper from Intel [1] (published in 2009) and public domain code from [2] (released in 2016). [1] https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf [2] https://github.com/rawrunprotected/crc
Diffstat (limited to '')
-rw-r--r--src/liblzma/check/crc64_table.c21
1 files changed, 17 insertions, 4 deletions
diff --git a/src/liblzma/check/crc64_table.c b/src/liblzma/check/crc64_table.c
index 7560eb0a..241adcd4 100644
--- a/src/liblzma/check/crc64_table.c
+++ b/src/liblzma/check/crc64_table.c
@@ -12,11 +12,24 @@
#include "common.h"
+
+// FIXME: Compared to crc64_fast.c this has to check for __x86_64__ too
+// so that in 32-bit builds crc64_x86.S won't break due to a missing table.
+#if (defined(__x86_64__) && defined(__SSSE3__) \
+ && defined(__SSE4_1__) && defined(__PCLMUL__)) \
+ || (defined(__e2k__) && __iset__ >= 6)
+// No table needed but something has to be exported to keep some toolchains
+// happy. Also use a declaration to silence compiler warnings.
+extern const char lzma_crc64_dummy;
+const char lzma_crc64_dummy;
+
+#else
// Having the declaration here silences clang -Wmissing-variable-declarations.
extern const uint64_t lzma_crc64_table[4][256];
-#ifdef WORDS_BIGENDIAN
-# include "crc64_table_be.h"
-#else
-# include "crc64_table_le.h"
+# if defined(WORDS_BIGENDIAN)
+# include "crc64_table_be.h"
+# else
+# include "crc64_table_le.h"
+# endif
#endif