aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/check/crc64_fast.c
diff options
context:
space:
mode:
authorJia Tan <jiat0218@gmail.com>2023-10-14 13:23:23 +0800
committerJia Tan <jiat0218@gmail.com>2023-10-18 23:54:41 +0800
commit96b663f67c0e738a99ba8f35d9f4ced9add74544 (patch)
tree4137a06a8c13df0a0122836634b3e73271b97336 /src/liblzma/check/crc64_fast.c
parentliblzma: Create crc_clmul.c. (diff)
downloadxz-96b663f67c0e738a99ba8f35d9f4ced9add74544.tar.xz
liblzma: Refactor CRC comments.
A detailed description of the three dispatch methods was added. Also, duplicated comments now only appear in crc32_fast.c or were removed from both crc32_fast.c and crc64_fast.c if they appeared in crc_clmul.c.
Diffstat (limited to 'src/liblzma/check/crc64_fast.c')
-rw-r--r--src/liblzma/check/crc64_fast.c61
1 files changed, 8 insertions, 53 deletions
diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c
index 07f4f576..8acdc713 100644
--- a/src/liblzma/check/crc64_fast.c
+++ b/src/liblzma/check/crc64_fast.c
@@ -2,23 +2,6 @@
//
/// \file crc64.c
/// \brief CRC64 calculation
-///
-/// There are two methods in this file. crc64_generic uses the
-/// the slice-by-four algorithm. This is the same idea that is
-/// used in crc32_fast.c, but for CRC64 we use only four tables
-/// instead of eight to avoid increasing CPU cache usage.
-///
-/// crc64_clmul uses 32/64-bit x86 SSSE3, SSE4.1, and CLMUL instructions.
-/// It was derived from
-/// https://www.researchgate.net/publication/263424619_Fast_CRC_computation
-/// and the public domain code from https://github.com/rawrunprotected/crc
-/// (URLs were checked on 2023-09-29).
-///
-/// FIXME: Builds for 32-bit x86 use crc64_x86.S by default instead
-/// of this file and thus CLMUL version isn't available on 32-bit x86
-/// unless configured with --disable-assembler. Even then the lookup table
-/// isn't omitted in crc64_table.c since it doesn't know that assembly
-/// code has been disabled.
//
// Authors: Lasse Collin
// Ilya Kurdyukov
@@ -93,12 +76,16 @@ crc64_generic(const uint8_t *buf, size_t size, uint64_t crc)
#endif
#if defined(CRC_GENERIC) && defined(CRC_CLMUL)
+//////////////////////////
+// Function dispatching //
+//////////////////////////
+
+// If both the generic and CLMUL implementations are usable, then the
+// function that is used is selected at runtime. See crc32_fast.c.
+
typedef uint64_t (*crc64_func_type)(
const uint8_t *buf, size_t size, uint64_t crc);
-// Clang 16.0.0 and older has a bug where it marks the ifunc resolver
-// function as unused since it is static and never used outside of
-// __attribute__((__ifunc__())).
#if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-function"
@@ -139,13 +126,6 @@ crc64_set_func(void)
static uint64_t
crc64_dispatch(const uint8_t *buf, size_t size, uint64_t crc)
{
- // When __attribute__((__ifunc__(...))) and
- // __attribute__((__constructor__)) isn't supported, set the
- // function pointer without any locking. If multiple threads run
- // the detection code in parallel, they will all end up setting
- // the pointer to the same value. This avoids the use of
- // mythread_once() on every call to lzma_crc64() but this likely
- // isn't strictly standards compliant. Let's change it if it breaks.
crc64_set_func();
return crc64_func(buf, size, crc);
}
@@ -163,36 +143,11 @@ extern LZMA_API(uint64_t)
lzma_crc64(const uint8_t *buf, size_t size, uint64_t crc)
{
#if defined(CRC_GENERIC) && defined(CRC_CLMUL)
- // If CLMUL is available, it is the best for non-tiny inputs,
- // being over twice as fast as the generic slice-by-four version.
- // However, for size <= 16 it's different. In the extreme case
- // of size == 1 the generic version can be five times faster.
- // At size >= 8 the CLMUL starts to become reasonable. It
- // varies depending on the alignment of buf too.
- //
- // The above doesn't include the overhead of mythread_once().
- // At least on x86-64 GNU/Linux, pthread_once() is very fast but
- // it still makes lzma_crc64(buf, 1, crc) 50-100 % slower. When
- // size reaches 12-16 bytes the overhead becomes negligible.
- //
- // So using the generic version for size <= 16 may give better
- // performance with tiny inputs but if such inputs happen rarely
- // it's not so obvious because then the lookup table of the
- // generic version may not be in the processor cache.
+
#ifdef CRC_USE_GENERIC_FOR_SMALL_INPUTS
if (size <= 16)
return crc64_generic(buf, size, crc);
#endif
-
-/*
-#ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR
- // See crc64_dispatch(). This would be the alternative which uses
- // locking and doesn't use crc64_dispatch(). Note that on Windows
- // this method needs Vista threads.
- mythread_once(crc64_set_func);
-#endif
-*/
-
return crc64_func(buf, size, crc);
#elif defined(CRC_CLMUL)