aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/check/crc_common.h
diff options
context:
space:
mode:
authorJia Tan <jiat0218@gmail.com>2024-01-22 20:49:30 +0800
committerJia Tan <jiat0218@gmail.com>2024-02-01 20:09:11 +0800
commit455a08609caa3223066a717fb01bfa42c5dba47d (patch)
treea693bbb9237ad28a04e6ed29af6bf3037c40bac4 /src/liblzma/check/crc_common.h
parentCMake: Add support for ARM64 CRC32 instruction detection. (diff)
downloadxz-455a08609caa3223066a717fb01bfa42c5dba47d.tar.xz
liblzma: Refactor crc_common.h.
The CRC_GENERIC is now split into CRC32_GENERIC and CRC64_GENERIC, since the ARM64 optimizations will be different between CRC32 and CRC64. For the same reason, CRC_ARCH_OPTIMIZED is split into CRC32_ARCH_OPTIMIZED and CRC64_ARCH_OPTIMIZED. ifunc will only be used with x86-64 CLMUL because the runtime detection methods needed with ARM64 are not compatible with ifunc.
Diffstat (limited to '')
-rw-r--r--src/liblzma/check/crc_common.h108
1 files changed, 74 insertions, 34 deletions
diff --git a/src/liblzma/check/crc_common.h b/src/liblzma/check/crc_common.h
index 7c7f098d..35f60d95 100644
--- a/src/liblzma/check/crc_common.h
+++ b/src/liblzma/check/crc_common.h
@@ -48,54 +48,94 @@
# define crc_attr_no_sanitize_address
#endif
+// Keep this in sync with changes to crc32_arm64.h
+#if defined(_WIN32) || defined(HAVE_GETAUXVAL) \
+ || defined(HAVE_ELF_AUX_INFO) \
+ || (defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME))
+# define ARM64_RUNTIME_DETECTION 1
+#endif
+
+
+#undef CRC32_GENERIC
+#undef CRC64_GENERIC
-#undef CRC_GENERIC
-#undef CRC_ARCH_OPTIMIZED
+#undef CRC32_ARCH_OPTIMIZED
+#undef CRC64_ARCH_OPTIMIZED
+
+// The x86 CLMUL is used for both CRC32 and CRC64.
#undef CRC_X86_CLMUL
+
#undef CRC32_ARM64
+#undef CRC64_ARM64_CLMUL
+
#undef CRC_USE_IFUNC
+
#undef CRC_USE_GENERIC_FOR_SMALL_INPUTS
+// ARM64 CRC32 instruction is only useful for CRC32. Currently, only
+// little endian is supported since we were unable to test on a big
+// endian machine.
+#if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN)
+// Allow ARM64 CRC32 instruction without a runtime check if
+// __ARM_FEATURE_CRC32 is defined. GCC and Clang only define this if the
+// proper compiler options are used.
+# if defined(__ARM_FEATURE_CRC32)
+# define CRC32_ARCH_OPTIMIZED 1
+# define CRC32_ARM64 1
+# elif defined(ARM64_RUNTIME_DETECTION)
+# define CRC32_ARCH_OPTIMIZED 1
+# define CRC32_ARM64 1
+# define CRC32_GENERIC 1
+# endif
+#endif
+
+#if defined(HAVE_USABLE_CLMUL)
// If CLMUL is allowed unconditionally in the compiler options then the
// generic version can be omitted. Note that this doesn't work with MSVC
// as I don't know how to detect the features here.
//
// NOTE: Keep this this in sync with crc32_table.c.
-#if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \
+# if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6)
-# define CRC_ARCH_OPTIMIZED 1
-# define CRC_X86_CLMUL 1
-
-#elif (defined(__aarch64__))
-# define CRC_ARCH_OPTIMIZED 1
-# define CRC32_ARM64 1
-// If CLMUL cannot be used then only the generic slice-by-eight (CRC32)
-// or slice-by-four (CRC64) is built.
-#elif !defined(HAVE_USABLE_CLMUL)
-# define CRC_GENERIC 1
-// Otherwise build both and detect at runtime which version to use.
-#else
-# define CRC_GENERIC 1
-# define CRC_ARCH_OPTIMIZED 1
-# define CRC_X86_CLMUL 1
-# define CRC32_ARM64 1
-
-# ifdef HAVE_FUNC_ATTRIBUTE_IFUNC
-# define CRC_USE_IFUNC 1
-# endif
-
+# define CRC32_ARCH_OPTIMIZED 1
+# define CRC64_ARCH_OPTIMIZED 1
+# define CRC_X86_CLMUL 1
+# else
+# define CRC32_GENERIC 1
+# define CRC64_GENERIC 1
+# define CRC32_ARCH_OPTIMIZED 1
+# define CRC64_ARCH_OPTIMIZED 1
+# define CRC_X86_CLMUL 1
+
+# ifdef HAVE_FUNC_ATTRIBUTE_IFUNC
+# define CRC_USE_IFUNC 1
+# endif
/*
- // The generic code is much faster with 1-8-byte inputs and has
- // similar performance up to 16 bytes at least in microbenchmarks
- // (it depends on input buffer alignment too). If both versions are
- // built, this #define will use the generic version for inputs up to
- // 16 bytes and CLMUL for bigger inputs. It saves a little in code
- // size since the special cases for 0-16-byte inputs will be omitted
- // from the CLMUL code.
-# ifndef CRC_USE_IFUNC
-# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
-# endif
+ // The generic code is much faster with 1-8-byte inputs and
+ // has similar performance up to 16 bytes at least in
+ // microbenchmarks (it depends on input buffer alignment
+ // too). If both versions are built, this #define will use
+ // the generic version for inputs up to 16 bytes and CLMUL
+ // for bigger inputs. It saves a little in code size since
+ // the special cases for 0-16-byte inputs will be omitted
+ // from the CLMUL code.
+# ifndef CRC_USE_IFUNC
+# define CRC_USE_GENERIC_FOR_SMALL_INPUTS 1
+# endif
*/
+# endif
+#endif
+
+// For CRC32 use the generic slice-by-eight implementation if no optimized
+// version is available.
+#if !defined(CRC32_ARCH_OPTIMIZED) && !defined(CRC32_GENERIC)
+# define CRC32_GENERIC 1
+#endif
+
+// For CRC64 use the generic slice-by-four implementation if no optimized
+// version is available.
+#if !defined(CRC64_ARCH_OPTIMIZED) && !defined(CRC64_GENERIC)
+# define CRC64_GENERIC 1
#endif
#endif