aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/api/lzma/lzma.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/liblzma/api/lzma/lzma.h')
-rw-r--r--src/liblzma/api/lzma/lzma.h46
1 files changed, 31 insertions, 15 deletions
diff --git a/src/liblzma/api/lzma/lzma.h b/src/liblzma/api/lzma/lzma.h
index 9d31a5ce..ae577282 100644
--- a/src/liblzma/api/lzma/lzma.h
+++ b/src/liblzma/api/lzma/lzma.h
@@ -46,6 +46,10 @@
* Match finder has major effect on both speed and compression ratio.
* Usually hash chains are faster than binary trees.
*
+ * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better
+ * choice, because binary trees get much higher compression ratio penalty
+ * with LZMA_SYNC_FLUSH.
+ *
* The memory usage formulas are only rough estimates, which are closest to
* reality when dict_size is a power of two. The formulas are more complex
* in reality, and can also change a little between liblzma versions. Use
@@ -173,6 +177,7 @@ extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode)
* Since LZMA1 and LZMA2 share most of the code, it's simplest to share
* the options structure too. For encoding, all but the reserved variables
* need to be initialized unless specifically mentioned otherwise.
+ * lzma_lzma_preset() can be used to get a good starting point.
*
* For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and
* preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb.
@@ -255,7 +260,13 @@ typedef struct {
* eight-bit byte (also known as `literal') are taken into
* account when predicting the bits of the next literal.
*
- * \todo Example
+ * E.g. in typical English text, an upper-case letter is
+ * often followed by a lower-case letter, and a lower-case
+ * letter is usually followed by another lower-case letter.
+ * In the US-ASCII character set, the highest three bits are 010
+ * for upper-case letters and 011 for lower-case letters.
+ * When lc is at least 3, the literal coding can take advantage of
+ * this property in the uncompressed data.
*
* There is a limit that applies to literal context bits and literal
* position bits together: lc + lp <= 4. Without this limit the
@@ -275,12 +286,9 @@ typedef struct {
/**
* \brief Number of literal position bits
*
- * How many of the lowest bits of the current position (number
- * of bytes from the beginning of the uncompressed data) in the
- * uncompressed data is taken into account when predicting the
- * bits of the next literal (a single eight-bit byte).
- *
- * \todo Example
+ * lp affects what kind of alignment in the uncompressed data is
+ * assumed when encoding literals. A literal is a single 8-bit byte.
+ * See pb below for more information about alignment.
*/
uint32_t lp;
# define LZMA_LP_DEFAULT 0
@@ -288,14 +296,22 @@ typedef struct {
/**
* \brief Number of position bits
*
- * How many of the lowest bits of the current position in the
- * uncompressed data is taken into account when estimating
- * probabilities of matches. A match is a sequence of bytes for
- * which a matching sequence is found from the dictionary and
- * thus can be stored as distance-length pair.
+ * pb affects what kind of alignment in the uncompressed data is
+ * assumed in general. The default means four-byte alignment
+ * (2^ pb =2^2=4), which is often a good choice when there's
+ * no better guess.
+ *
+ * When the aligment is known, setting pb accordingly may reduce
+ * the file size a little. E.g. with text files having one-byte
+ * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can
+ * improve compression slightly. For UTF-16 text, pb=1 is a good
+ * choice. If the alignment is an odd number like 3 bytes, pb=0
+ * might be the best choice.
*
- * Example: If most of the matches occur at byte positions of
- * 8 * n + 3, that is, 3, 11, 19, ... set pb to 3, because 2**3 == 8.
+ * Even though the assumed alignment can be adjusted with pb and
+ * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment.
+ * It might be worth taking into account when designing file formats
+ * that are likely to be often compressed with LZMA1 or LZMA2.
*/
uint32_t pb;
# define LZMA_PB_MIN 0
@@ -346,7 +362,7 @@ typedef struct {
*
* Setting depth to zero tells liblzma to use an automatic default
* value, that depends on the selected match finder and nice_len.
- * The default is in the range [10, 200] or so (it may vary between
+ * The default is in the range [4, 200] or so (it may vary between
* liblzma versions).
*
* Using a bigger depth value than the default can increase