diff options
Diffstat (limited to 'src/liblzma/lzma/fastpos.h')
-rw-r--r-- | src/liblzma/lzma/fastpos.h | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h new file mode 100644 index 00000000..57a94556 --- /dev/null +++ b/src/liblzma/lzma/fastpos.h @@ -0,0 +1,156 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file fastpos.h +/// \brief Kind of two-bit version of bit scan reverse +// +// Copyright (C) 1999-2007 Igor Pavlov +// Copyright (C) 2008 Lasse Collin +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef LZMA_FASTPOS_H +#define LZMA_FASTPOS_H + +// LZMA encodes match distances (positions) by storing the highest two +// bits using a six-bit value [0, 63], and then the missing lower bits. +// Dictionary size is also stored using this encoding in the new .lzma +// file format header. +// +// fastpos.h provides a way to quickly find out the correct six-bit +// values. The following table gives some examples of this encoding: +// +// pos return +// 0 0 +// 1 1 +// 2 2 +// 3 3 +// 4 4 +// 5 4 +// 6 5 +// 7 5 +// 8 6 +// 11 6 +// 12 7 +// ... ... +// 15 7 +// 16 8 +// 17 8 +// ... ... +// 23 8 +// 24 9 +// 25 9 +// ... ... +// +// +// Provided functions or macros +// ---------------------------- +// +// get_pos_slot(pos) is the basic version. get_pos_slot_2(pos) +// assumes that pos >= FULL_DISTANCES, thus the result is at least +// FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of +// get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos) +// should be tiny bit faster due to the assumption being made. +// +// +// Size vs. speed +// -------------- +// +// With some CPUs that have fast BSR (bit scan reverse) instruction, the +// size optimized version is slightly faster than the bigger table based +// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III +// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that +// would still have speed roughly comparable to the table version. Older +// x86 CPUs like the original Pentium have very slow BSR; on those systems +// the table version is a lot faster. +// +// On some CPUs, the table version is a lot faster when using position +// dependent code, but with position independent code the size optimized +// version is slightly faster. This occurs at least on 32-bit SPARC (no +// ASM optimizations). +// +// I'm making the table version the default, because that has good speed +// on all systems I have tried. The size optimized version is sometimes +// slightly faster, but sometimes it is a lot slower. +// +// Finally, this code isn't a major bottle neck in LZMA encoding anyway. + +#ifdef HAVE_SMALL +# include "bsr.h" + +# define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos)) + +static inline uint32_t +get_pos_slot_2(uint32_t pos) +{ + uint32_t i; + lzma_bsr(i, pos); + return (i + i) + ((pos >> (i - 1)) & 1); +} + + +#else + +#define FASTPOS_BITS 13 + +extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; + + +#define fastpos_shift(extra, n) \ + ((extra) + (n) * (FASTPOS_BITS - 1)) + +#define fastpos_limit(extra, n) \ + (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) + +#define fastpos_result(pos, extra, n) \ + lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \ + + 2 * fastpos_shift(extra, n) + + +static inline uint32_t +get_pos_slot(uint32_t pos) +{ + // If it is small enough, we can pick the result directly from + // the precalculated table. + if (pos < fastpos_limit(0, 0)) + return lzma_fastpos[pos]; + + if (pos < fastpos_limit(0, 1)) + return fastpos_result(pos, 0, 1); + + return fastpos_result(pos, 0, 2); +} + + +#ifdef FULL_DISTANCES_BITS +static inline uint32_t +get_pos_slot_2(uint32_t pos) +{ + // FIXME: This assert() cannot be enabled at the moment, because + // lzma_getoptimum.c calls this function so that this assertion + // fails; however, it ignores the result of this function when + // this assert() would have failed. + // assert(pos >= FULL_DISTANCES); + + if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0); + + if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1); + + return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2); +} +#endif + +#endif + +#endif |