aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/check/crc32_fast.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-12-31 00:30:49 +0200
committerLasse Collin <lasse.collin@tukaani.org>2008-12-31 00:30:49 +0200
commit7ed9d943b31d3ee9c5fb2387e84a241ba33afe90 (patch)
tree5f9107c718aa996be6850b431ba319584064c9d7 /src/liblzma/check/crc32_fast.c
parentUse 28 MiB as memory usage limit for encoding in test_compress.sh. (diff)
downloadxz-7ed9d943b31d3ee9c5fb2387e84a241ba33afe90.tar.xz
Remove lzma_init() and other init functions from liblzma API.
Half of developers were already forgetting to use these functions, which could have caused total breakage in some future liblzma version or even now if --enable-small was used. Now liblzma uses pthread_once() to do the initializations unless it has been built with --disable-threads which make these initializations thread-unsafe. When --enable-small isn't used, liblzma currently gets needlessly linked against libpthread (on systems that have it). While it is stupid for now, liblzma will need threads in future anyway, so this stupidity will be temporary only. When --enable-small is used, different code CRC32 and CRC64 is now used than without --enable-small. This made the resulting binary slightly smaller, but the main reason was to clean it up and to handle the lack of lzma_init_check(). The pkg-config file lzma.pc was renamed to liblzma.pc. I'm not sure if it works correctly and portably for static linking (Libs.private includes -pthread or other operating system specific flags). Hopefully someone complains if it is bad. lzma_rc_prices[] is now included as a precomputed array even with --enable-small. It's just 128 bytes now that it uses uint8_t instead of uint32_t. Smaller array seemed to be at least as fast as the more bloated uint32_t array on x86; hopefully it's not bad on other architectures.
Diffstat (limited to 'src/liblzma/check/crc32_fast.c')
-rw-r--r--src/liblzma/check/crc32_fast.c88
1 files changed, 88 insertions, 0 deletions
diff --git a/src/liblzma/check/crc32_fast.c b/src/liblzma/check/crc32_fast.c
new file mode 100644
index 00000000..698cf768
--- /dev/null
+++ b/src/liblzma/check/crc32_fast.c
@@ -0,0 +1,88 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file crc32.c
+/// \brief CRC32 calculation
+//
+// This code has been put into the public domain.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "check.h"
+#include "crc_macros.h"
+
+
+// If you make any changes, do some bench marking! Seemingly unrelated
+// changes can very easily ruin the performance (and very probably is
+// very compiler dependent).
+extern LZMA_API uint32_t
+lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc)
+{
+ crc = ~crc;
+
+#ifdef WORDS_BIGENDIAN
+ crc = bswap_32(crc);
+#endif
+
+ if (size > 8) {
+ // Fix the alignment, if needed. The if statement above
+ // ensures that this won't read past the end of buf[].
+ while ((uintptr_t)(buf) & 7) {
+ crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc);
+ --size;
+ }
+
+ // Calculate the position where to stop.
+ const uint8_t *const limit = buf + (size & ~(size_t)(7));
+
+ // Calculate how many bytes must be calculated separately
+ // before returning the result.
+ size &= (size_t)(7);
+
+ // Calculate the CRC32 using the slice-by-eight algorithm.
+ // It is explained in this document:
+ // http://www.intel.com/technology/comms/perfnet/download/CRC_generators.pdf
+ //
+ // The code below is different than the code in Intel's
+ // paper, but the principle is identical. This should be
+ // faster with GCC than Intel's code. This is tested only
+ // with GCC 3.4.6 and 4.1.2 on x86, so your results may vary.
+ //
+ // Using -Os and -fomit-frame-pointer seem to give the best
+ // results at least with GCC 4.1.2 on x86. It's sill far
+ // from the speed of hand-optimized assembler.
+ while (buf < limit) {
+ crc ^= *(uint32_t *)(buf);
+ buf += 4;
+
+ crc = lzma_crc32_table[7][A(crc)]
+ ^ lzma_crc32_table[6][B(crc)]
+ ^ lzma_crc32_table[5][C(crc)]
+ ^ lzma_crc32_table[4][D(crc)];
+
+ const uint32_t tmp = *(uint32_t *)(buf);
+ buf += 4;
+
+ // It is critical for performance, that
+ // the crc variable is XORed between the
+ // two table-lookup pairs.
+ crc = lzma_crc32_table[3][A(tmp)]
+ ^ lzma_crc32_table[2][B(tmp)]
+ ^ crc
+ ^ lzma_crc32_table[1][C(tmp)]
+ ^ lzma_crc32_table[0][D(tmp)];
+ }
+ }
+
+ while (size-- != 0)
+ crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc);
+
+#ifdef WORDS_BIGENDIAN
+ crc = bswap_32(crc);
+#endif
+
+ return ~crc;
+}