aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lzma/fastpos.h
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-01-15 14:02:22 +0200
committerLasse Collin <lasse.collin@tukaani.org>2008-01-15 14:02:22 +0200
commite5728142a2048979f5c0c2149ce71ae952a092e1 (patch)
tree7ea154a3a970afecf25b5de654d9c915e9c0c84d /src/liblzma/lzma/fastpos.h
parentAdded bsr.h. (diff)
downloadxz-e5728142a2048979f5c0c2149ce71ae952a092e1.tar.xz
Revised the fastpos code. It now uses the slightly faster
table-based version from LZMA SDK 4.57. This should be fast on most systems. A simpler and smaller alternative version is also provided. On some CPUs this can be even a little faster than the default table-based version (see comments in fastpos.h), but on most systems the table-based code is faster.
Diffstat (limited to 'src/liblzma/lzma/fastpos.h')
-rw-r--r--src/liblzma/lzma/fastpos.h156
1 files changed, 156 insertions, 0 deletions
diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h
new file mode 100644
index 00000000..57a94556
--- /dev/null
+++ b/src/liblzma/lzma/fastpos.h
@@ -0,0 +1,156 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file fastpos.h
+/// \brief Kind of two-bit version of bit scan reverse
+//
+// Copyright (C) 1999-2007 Igor Pavlov
+// Copyright (C) 2008 Lasse Collin
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_FASTPOS_H
+#define LZMA_FASTPOS_H
+
+// LZMA encodes match distances (positions) by storing the highest two
+// bits using a six-bit value [0, 63], and then the missing lower bits.
+// Dictionary size is also stored using this encoding in the new .lzma
+// file format header.
+//
+// fastpos.h provides a way to quickly find out the correct six-bit
+// values. The following table gives some examples of this encoding:
+//
+// pos return
+// 0 0
+// 1 1
+// 2 2
+// 3 3
+// 4 4
+// 5 4
+// 6 5
+// 7 5
+// 8 6
+// 11 6
+// 12 7
+// ... ...
+// 15 7
+// 16 8
+// 17 8
+// ... ...
+// 23 8
+// 24 9
+// 25 9
+// ... ...
+//
+//
+// Provided functions or macros
+// ----------------------------
+//
+// get_pos_slot(pos) is the basic version. get_pos_slot_2(pos)
+// assumes that pos >= FULL_DISTANCES, thus the result is at least
+// FULL_DISTANCES_BITS * 2. Using get_pos_slot(pos) instead of
+// get_pos_slot_2(pos) would give the same result, but get_pos_slot_2(pos)
+// should be tiny bit faster due to the assumption being made.
+//
+//
+// Size vs. speed
+// --------------
+//
+// With some CPUs that have fast BSR (bit scan reverse) instruction, the
+// size optimized version is slightly faster than the bigger table based
+// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
+// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
+// would still have speed roughly comparable to the table version. Older
+// x86 CPUs like the original Pentium have very slow BSR; on those systems
+// the table version is a lot faster.
+//
+// On some CPUs, the table version is a lot faster when using position
+// dependent code, but with position independent code the size optimized
+// version is slightly faster. This occurs at least on 32-bit SPARC (no
+// ASM optimizations).
+//
+// I'm making the table version the default, because that has good speed
+// on all systems I have tried. The size optimized version is sometimes
+// slightly faster, but sometimes it is a lot slower.
+//
+// Finally, this code isn't a major bottle neck in LZMA encoding anyway.
+
+#ifdef HAVE_SMALL
+# include "bsr.h"
+
+# define get_pos_slot(pos) ((pos) <= 4 ? (pos) : get_pos_slot_2(pos))
+
+static inline uint32_t
+get_pos_slot_2(uint32_t pos)
+{
+ uint32_t i;
+ lzma_bsr(i, pos);
+ return (i + i) + ((pos >> (i - 1)) & 1);
+}
+
+
+#else
+
+#define FASTPOS_BITS 13
+
+extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
+
+
+#define fastpos_shift(extra, n) \
+ ((extra) + (n) * (FASTPOS_BITS - 1))
+
+#define fastpos_limit(extra, n) \
+ (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
+
+#define fastpos_result(pos, extra, n) \
+ lzma_fastpos[(pos) >> fastpos_shift(extra, n)] \
+ + 2 * fastpos_shift(extra, n)
+
+
+static inline uint32_t
+get_pos_slot(uint32_t pos)
+{
+ // If it is small enough, we can pick the result directly from
+ // the precalculated table.
+ if (pos < fastpos_limit(0, 0))
+ return lzma_fastpos[pos];
+
+ if (pos < fastpos_limit(0, 1))
+ return fastpos_result(pos, 0, 1);
+
+ return fastpos_result(pos, 0, 2);
+}
+
+
+#ifdef FULL_DISTANCES_BITS
+static inline uint32_t
+get_pos_slot_2(uint32_t pos)
+{
+ // FIXME: This assert() cannot be enabled at the moment, because
+ // lzma_getoptimum.c calls this function so that this assertion
+ // fails; however, it ignores the result of this function when
+ // this assert() would have failed.
+ // assert(pos >= FULL_DISTANCES);
+
+ if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
+ return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 0);
+
+ if (pos < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
+ return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 1);
+
+ return fastpos_result(pos, FULL_DISTANCES_BITS - 1, 2);
+}
+#endif
+
+#endif
+
+#endif