diff options
author | Jia Tan <jiat0218@gmail.com> | 2023-10-20 19:17:46 +0800 |
---|---|---|
committer | Jia Tan <jiat0218@gmail.com> | 2023-10-21 00:01:29 +0800 |
commit | 988e09f27b9b04a43d45d10f92782e0092ee27a9 (patch) | |
tree | 35149423b8a1da0a3c249ab44f2602ceb5365f3f | |
parent | Build: Remove check for COND_CHECK_CRC32 in check/Makefile.inc. (diff) | |
download | xz-988e09f27b9b04a43d45d10f92782e0092ee27a9.tar.xz |
liblzma: Move is_clmul_supported() back to crc_common.h.
This partially reverts creating crc_clmul.c
(8c0f9376f58c0696d5d6719705164d35542dd891) where is_clmul_supported()
was moved, extern'ed, and renamed to lzma_is_clmul_supported(). This
caused a problem when the function call to lzma_is_clmul_supported()
results in a call through the PLT. ifunc resolvers run very early in
the dynamic loading sequence, so the PLT may not be setup properly at
this point. Whether the PLT is used or not for
lzma_is_clmul_supported() depened upon the compiler-toolchain used and
flags.
In liblzma compiled with GCC, for instance, GCC will go through the PLT
for function calls internal to liblzma if the version scripts and
symbol visibility hiding are not used. If lazy-binding is disabled,
then it would have made any program linked with liblzma fail during
dynamic loading in the ifunc resolver.
-rw-r--r-- | src/liblzma/check/crc32_fast.c | 2 | ||||
-rw-r--r-- | src/liblzma/check/crc64_fast.c | 2 | ||||
-rw-r--r-- | src/liblzma/check/crc_clmul.c | 45 | ||||
-rw-r--r-- | src/liblzma/check/crc_common.h | 52 |
4 files changed, 51 insertions, 50 deletions
diff --git a/src/liblzma/check/crc32_fast.c b/src/liblzma/check/crc32_fast.c index add93d55..73659049 100644 --- a/src/liblzma/check/crc32_fast.c +++ b/src/liblzma/check/crc32_fast.c @@ -130,7 +130,7 @@ typedef uint32_t (*crc32_func_type)( static crc32_func_type crc32_resolve(void) { - return lzma_is_clmul_supported() ? &lzma_crc32_clmul : &crc32_generic; + return is_clmul_supported() ? &lzma_crc32_clmul : &crc32_generic; } #if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__) diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c index 8acdc713..4e6633db 100644 --- a/src/liblzma/check/crc64_fast.c +++ b/src/liblzma/check/crc64_fast.c @@ -94,7 +94,7 @@ typedef uint64_t (*crc64_func_type)( static crc64_func_type crc64_resolve(void) { - return lzma_is_clmul_supported() ? &lzma_crc64_clmul : &crc64_generic; + return is_clmul_supported() ? &lzma_crc64_clmul : &crc64_generic; } #if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__) diff --git a/src/liblzma/check/crc_clmul.c b/src/liblzma/check/crc_clmul.c index 7110fd7e..640415e7 100644 --- a/src/liblzma/check/crc_clmul.c +++ b/src/liblzma/check/crc_clmul.c @@ -372,48 +372,3 @@ lzma_crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc) && defined(_M_IX86) # pragma optimize("", on) #endif - - -//////////////////////// -// Detect CPU support // -//////////////////////// - -extern bool -lzma_is_clmul_supported(void) -{ - int success = 1; - uint32_t r[4]; // eax, ebx, ecx, edx - -#if defined(_MSC_VER) - // This needs <intrin.h> with MSVC. ICC has it as a built-in - // on all platforms. - __cpuid(r, 1); -#elif defined(HAVE_CPUID_H) - // Compared to just using __asm__ to run CPUID, this also checks - // that CPUID is supported and saves and restores ebx as that is - // needed with GCC < 5 with position-independent code (PIC). - success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); -#else - // Just a fallback that shouldn't be needed. - __asm__("cpuid\n\t" - : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) - : "a"(1), "c"(0)); -#endif - - // Returns true if these are supported: - // CLMUL (bit 1 in ecx) - // SSSE3 (bit 9 in ecx) - // SSE4.1 (bit 19 in ecx) - const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); - return success && (r[2] & ecx_mask) == ecx_mask; - - // Alternative methods that weren't used: - // - ICC's _may_i_use_cpu_feature: the other methods should work too. - // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") - // - // CPUID decding is needed with MSVC anyway and older GCC. This keeps - // the feature checks in the build system simpler too. The nice thing - // about __builtin_cpu_supports would be that it generates very short - // code as is it only reads a variable set at startup but a few bytes - // doesn't matter here. -} diff --git a/src/liblzma/check/crc_common.h b/src/liblzma/check/crc_common.h index 51ddd9d5..1783b5e7 100644 --- a/src/liblzma/check/crc_common.h +++ b/src/liblzma/check/crc_common.h @@ -99,11 +99,57 @@ # elif defined(HAVE_CPUID_H) # include <cpuid.h> # endif + +// is_clmul_supported() must be inlined in this header file because the +// ifunc resolver function may not support calling a function in another +// translation unit. Depending on compiler-toolchain and flags, a call to +// a function defined in another translation unit could result in a +// reference to the PLT, which is unsafe to do in an ifunc resolver. The +// ifunc resolver runs very early when loading a shared library, so the PLT +// entries may not be setup at that time. Inlining this function duplicates +// the function body in crc32_resolve() and crc64_resolve(), but this is +// acceptable because the function results in very few instructions. +static inline bool +is_clmul_supported(void) +{ + int success = 1; + uint32_t r[4]; // eax, ebx, ecx, edx + +#if defined(_MSC_VER) + // This needs <intrin.h> with MSVC. ICC has it as a built-in + // on all platforms. + __cpuid(r, 1); +#elif defined(HAVE_CPUID_H) + // Compared to just using __asm__ to run CPUID, this also checks + // that CPUID is supported and saves and restores ebx as that is + // needed with GCC < 5 with position-independent code (PIC). + success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); +#else + // Just a fallback that shouldn't be needed. + __asm__("cpuid\n\t" + : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) + : "a"(1), "c"(0)); #endif -/// Detect at runtime if the CPU supports the x86 CLMUL instruction when -/// both the generic and CLMUL implementations are built. -extern bool lzma_is_clmul_supported(void); + // Returns true if these are supported: + // CLMUL (bit 1 in ecx) + // SSSE3 (bit 9 in ecx) + // SSE4.1 (bit 19 in ecx) + const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); + return success && (r[2] & ecx_mask) == ecx_mask; + + // Alternative methods that weren't used: + // - ICC's _may_i_use_cpu_feature: the other methods should work too. + // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") + // + // CPUID decding is needed with MSVC anyway and older GCC. This keeps + // the feature checks in the build system simpler too. The nice thing + // about __builtin_cpu_supports would be that it generates very short + // code as is it only reads a variable set at startup but a few bytes + // doesn't matter here. +} + +#endif /// CRC32 implemented with the x86 CLMUL instruction. extern uint32_t lzma_crc32_clmul(const uint8_t *buf, size_t size, |