aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/api/lzma/lzma.h
blob: 5a1cd91252a6b1aa1bab7cd9b15e7434fd74ea9b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/**
 * \file        lzma/lzma.h
 * \brief       LZMA filter
 *
 * \author      Copyright (C) 1999-2006 Igor Pavlov
 * \author      Copyright (C) 2007 Lasse Collin
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 */

#ifndef LZMA_H_INTERNAL
#	error Never include this file directly. Use <lzma.h> instead.
#endif


/**
 * \brief       Filter ID
 *
 * Filter ID of the LZMA filter. This is used as lzma_filter.id.
 */
#define LZMA_FILTER_LZMA        LZMA_VLI_C(0x40)

#define LZMA_FILTER_LZMA2       LZMA_VLI_C(0x21)


/**
 * \brief       Match finders
 *
 * Match finder has major effect on both speed and compression ratio.
 * Usually hash chains are faster than binary trees.
 */
typedef enum {
	LZMA_MF_INVALID = -1,
		/**<
		 * \brief       Invalid match finder ID
		 *
		 * Used as array terminator in lzma_available_match_finders.
		 */

	LZMA_MF_HC3     = 0x03,
		/**<
		 * \brief       Hash Chain with 3 bytes hashing
		 *
		 * \todo Memory requirements
		 *
		 * \note        It's possible that this match finder gets
		 *              removed in future. The definition will stay
		 *              in this header, but liblzma may return
		 *              LZMA_HEADER_ERROR if it is specified (just
		 *              like it would if the match finder had been
		 *              disabled at compile time).
		 */

	LZMA_MF_HC4     = 0x04,
		/**<
		 * \brief       Hash Chain with 4 bytes hashing
		 *
		 * Memory requirements: 7.5 * dictionary_size + 4 MiB
		 *
		 * \note        It's possible that this match finder gets
		 *              removed in future. The definition will stay
		 *              in this header, but liblzma may return
		 *              LZMA_HEADER_ERROR if it is specified (just
		 *              like it would if the match finder had been
		 *              disabled at compile time).
		 */

	LZMA_MF_BT2     = 0x12,
		/**<
		 * \brief       Binary Tree with 2 bytes hashing
		 *
		 * Memory requirements: 9.5 * dictionary_size + 4 MiB
		 */

	LZMA_MF_BT3     = 0x13,
		/**<
		 * \brief       Binary Tree with 3 bytes hashing
		 *
		 * Memory requirements: 11.5 * dictionary_size + 4 MiB
		 */

	LZMA_MF_BT4     = 0x14
		/**<
		 * \brief       Binary Tree with 4 bytes hashing
		 *
		 * Memory requirements: 11.5 * dictionary_size + 4 MiB
		 */
} lzma_match_finder;


/**
 * \brief       Test if given match finder is supported
 *
 * Returns true if the given match finder is supported by this liblzma build.
 * Otherwise false is returned. It is safe to call this with a value that
 * isn't listed in lzma_match_finder enumeration; the return value will be
 * false.
 *
 * There is no way to list which match finders are available in this
 * particular liblzma version and build. It would be useless, because
 * a new match finder, which the application developer wasn't aware,
 * could require giving additional options to the encoder that the older
 * match finders don't need.
 */
extern lzma_bool lzma_mf_is_supported(lzma_match_finder match_finder)
		lzma_attr_const;


/**
 * \brief       LZMA compression modes
 *
 * This selects the function used to analyze the data produced by the match
 * finder.
 */
typedef enum {
	LZMA_MODE_INVALID = -1,
		/**<
		 * \brief       Invalid mode
		 *
		 * Used as array terminator in lzma_available_modes.
		 */

	LZMA_MODE_FAST = 0,
		/**<
		 * \brief       Fast compression
		 *
		 * Fast mode is usually at its best when combined with
		 * a hash chain match finder.
		 */

	LZMA_MODE_NORMAL = 1
		/**<
		 * \brief       Normal compression
		 *
		 * This is usually notably slower than fast mode. Use this
		 * together with binary tree match finders to expose the
		 * full potential of the LZMA encoder.
		 */
} lzma_mode;


/**
 * \brief       Test if given compression mode is supported
 *
 * Returns true if the given compression mode is supported by this liblzma
 * build. Otherwise false is returned. It is safe to call this with a value
 * that isn't listed in lzma_mode enumeration; the return value will be false.
 *
 * There is no way to list which modes are available in this particular
 * liblzma version and build. It would be useless, because a new compression
 * mode, which the application developer wasn't aware, could require giving
 * additional options to the encoder that the older modes don't need.
 */
extern lzma_bool lzma_mode_is_available(lzma_mode mode) lzma_attr_const;


/**
 * \brief       Options specific to the LZMA method handler
 */
typedef struct {
	/**********************************
	 * LZMA encoding/decoding options *
	 **********************************/

	/* These options are required in encoder and also with raw decoding. */

	/**
	 * \brief       Dictionary size in bytes
	 *
	 * Dictionary size indicates how many bytes of the recently processed
	 * uncompressed data is kept in memory. One method to reduce size of
	 * the uncompressed data is to store distance-length pairs, which
	 * indicate what data to repeat from the dictionary buffer. Thus,
	 * the bigger the dictionary, the better compression ratio usually is.
	 *
	 * Raw decoding: Too big dictionary does no other harm than
	 * wasting memory. This value is ignored by lzma_raw_decode_buffer(),
	 * because it uses the target buffer as the dictionary.
	 */
	uint32_t dictionary_size;
#	define LZMA_DICTIONARY_SIZE_MIN            (UINT32_C(1) << 12)
#	define LZMA_DICTIONARY_SIZE_MAX            (UINT32_C(1) << 30)
#	define LZMA_DICTIONARY_SIZE_DEFAULT        (UINT32_C(1) << 23)

	/**
	 * \brief       Pointer to an initial dictionary
	 *
	 * It is possible to initialize the LZ77 history window using
	 * a preset dictionary. Here is a good quote from zlib's
	 * documentation; this applies to LZMA as is:
	 *
	 * "The dictionary should consist of strings (byte sequences) that
	 * are likely to be encountered later in the data to be compressed,
	 * with the most commonly used strings preferably put towards the
	 * end of the dictionary. Using a dictionary is most useful when
	 * the data to be compressed is short and can be predicted with
	 * good accuracy; the data can then be compressed better than
	 * with the default empty dictionary."
	 * (From deflateSetDictionary() in zlib.h of zlib version 1.2.3)
	 *
	 * This feature should be used only in special situations.
	 * It works correctly only with raw encoding and decoding.
	 * Currently none of the container formats supported by
	 * liblzma allow preset dictionary when decoding, thus if
	 * you create a .lzma file with preset dictionary, it cannot
	 * be decoded with the regular .lzma decoder functions.
	 *
	 * \todo        This feature is not implemented yet.
	 */
	const uint8_t *preset_dictionary;

	/**
	 * \brief       Size of the preset dictionary
	 *
	 * Specifies the size of the preset dictionary. If the size is
	 * bigger than dictionary_size, only the last dictionary_size
	 * bytes are processed.
	 *
	 * This variable is read only when preset_dictionary is not NULL.
	 */
	uint32_t preset_dictionary_size;

	/**
	 * \brief       Number of literal context bits
	 *
	 * How many of the highest bits of the previous uncompressed
	 * eight-bit byte (also known as `literal') are taken into
	 * account when predicting the bits of the next literal.
	 *
	 * \todo        Example
	 */
	uint32_t literal_context_bits;
#	define LZMA_LITERAL_CONTEXT_BITS_MIN       0
#	define LZMA_LITERAL_CONTEXT_BITS_MAX       4
#	define LZMA_LITERAL_CONTEXT_BITS_DEFAULT   3

	/**
	 * \brief       Number of literal position bits
	 *
	 * How many of the lowest bits of the current position (number
	 * of bytes from the beginning of the uncompressed data) in the
	 * uncompressed data is taken into account when predicting the
	 * bits of the next literal (a single eight-bit byte).
	 *
	 * \todo        Example
	 */
	uint32_t literal_pos_bits;
#	define LZMA_LITERAL_POS_BITS_MIN           0
#	define LZMA_LITERAL_POS_BITS_MAX           4
#	define LZMA_LITERAL_POS_BITS_DEFAULT       0

	/**
	 * \brief       Number of position bits
	 *
	 * How many of the lowest bits of the current position in the
	 * uncompressed data is taken into account when estimating
	 * probabilities of matches. A match is a sequence of bytes for
	 * which a matching sequence is found from the dictionary and
	 * thus can be stored as distance-length pair.
	 *
	 * Example: If most of the matches occur at byte positions
	 * of 8 * n + 3, that is, 3, 11, 19, ... set pos_bits to 3,
	 * because 2**3 == 8.
	 */
	uint32_t pos_bits;
#	define LZMA_POS_BITS_MIN                   0
#	define LZMA_POS_BITS_MAX                   4
#	define LZMA_POS_BITS_DEFAULT               2

	/******************************************
	 * LZMA options needed only when encoding *
	 ******************************************/

	/**
	 * \brief       Indicate if the options structure is persistent
	 *
	 * If this is true, the application must keep this options structure
	 * available after the LZMA2 encoder has been initialized. With
	 * persistent structure it is possible to change some encoder options
	 * in the middle of the encoding process without resetting the encoder.
	 *
	 * This option is used only by LZMA2. LZMA1 ignores this and it is
	 * safeto not initialize this when encoding with LZMA1.
	 */
	lzma_bool persistent;

	/** LZMA compression mode */
	lzma_mode mode;

	/**
	 * \brief       Number of fast bytes
	 *
	 * Number of fast bytes determines how many bytes the encoder
	 * compares from the match candidates when looking for the best
	 * match. Bigger fast bytes value usually increase both compression
	 * ratio and time.
	 */
	uint32_t fast_bytes;
#	define LZMA_FAST_BYTES_MIN                 5
#	define LZMA_FAST_BYTES_MAX                 273
#	define LZMA_FAST_BYTES_DEFAULT             128

	/** Match finder ID */
	lzma_match_finder match_finder;

	/**
	 * \brief       Match finder cycles
	 *
	 * Higher values give slightly better compression ratio but
	 * decrease speed. Use special value 0 to let liblzma use
	 * match-finder-dependent default value.
	 *
	 * \todo        Write much better description.
	 */
	uint32_t match_finder_cycles;

	/**
	 * \brief       Reserved space for possible future extensions
	 *
	 * You should not touch these, because the names of these variables
	 * may change. These are and will never be used with the currently
	 * supported options, so it is safe to leave these uninitialized.
	 */
	uint32_t reserved_int1;
	uint32_t reserved_int2;
	uint32_t reserved_int3;
	uint32_t reserved_int4;
	void *reserved_ptr1;
	void *reserved_ptr2;

} lzma_options_lzma;


/**
 * \brief       Maximum sum of literal_context_bits and literal_pos_bits
 *
 * literal_context_bits + literal_pos_bits <= LZMA_LITERAL_BITS_MAX
 */
#define LZMA_LITERAL_BITS_MAX 4


/**
 * \brief       Table of presets for the LZMA filter
 *
 * lzma_preset_lzma[0] is the fastest and lzma_preset_lzma[8] is the slowest.
 * These presets match the switches -1 .. -9 of the lzma command line tool
 *
 * The preset values are subject to changes between liblzma versions.
 *
 * This variable is available only if LZMA encoder has been enabled.
 */
extern const lzma_options_lzma lzma_preset_lzma[9];