diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2024-02-22 14:41:29 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2024-02-22 14:41:29 +0200 |
commit | 120da10ae139ea52ca4275452adf8eda02d07cc8 (patch) | |
tree | 6c70ce0bb03c15e070b5b8f72245b80af2ef3612 /src/liblzma/rangecoder | |
parent | INSTALL: Clarify that --disable-assembler affects only 32-bit x86. (diff) | |
download | xz-120da10ae139ea52ca4275452adf8eda02d07cc8.tar.xz |
liblzma: Disable branchless C version in range decoder.
Thanks to Sebastian Andrzej Siewior and Sam James for
benchmarking on various systems.
Diffstat (limited to 'src/liblzma/rangecoder')
-rw-r--r-- | src/liblzma/rangecoder/range_decoder.h | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/src/liblzma/rangecoder/range_decoder.h b/src/liblzma/rangecoder/range_decoder.h index 6cd0d892..b6422247 100644 --- a/src/liblzma/rangecoder/range_decoder.h +++ b/src/liblzma/rangecoder/range_decoder.h @@ -24,8 +24,8 @@ // Bitwise-or of the following enable branchless C versions: // 0x01 normal bittrees // 0x02 fixed-sized reverse bittrees -// 0x04 variable-sized reverse bittrees (disabled by default, not faster?) -// 0x08 matched literal (disabled by default, not faster?) +// 0x04 variable-sized reverse bittrees (not faster) +// 0x08 matched literal (not faster) // // GCC & Clang compatible x86-64 inline assembly: // 0x010 normal bittrees @@ -36,12 +36,19 @@ // // The default can be overridden at build time by defining // LZMA_RANGE_DECODER_CONFIG to the desired mask. +// +// 2024-02-22: Feedback from benchmarks: +// - Brancless C (0x003) can be better than basic on x86-64 but often it's +// slightly worse on other archs. Since asm is much better on x86-64, +// branchless C is not used at all. +// - With x86-64 asm, there are slight differences between GCC and Clang +// and different processors. Overall 0x1F0 seems to be the best choice. #ifndef LZMA_RANGE_DECODER_CONFIG # if defined(__x86_64__) && !defined(__ILP32__) \ && (defined(__GNUC__) || defined(__clang__)) # define LZMA_RANGE_DECODER_CONFIG 0x1F0 # else -# define LZMA_RANGE_DECODER_CONFIG 0x03 +# define LZMA_RANGE_DECODER_CONFIG 0 # endif #endif |