aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lz/lz_encoder.h
blob: b39c88e58c216dd44db31bc09abfc649eb3ffbf2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
///////////////////////////////////////////////////////////////////////////////
//
/// \file       lz_encoder.h
/// \brief      LZ in window and match finder API
//
//  Copyright (C) 1999-2006 Igor Pavlov
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

#ifndef LZMA_LZ_ENCODER_H
#define LZMA_LZ_ENCODER_H

#include "common.h"


typedef struct lzma_lz_encoder_s lzma_lz_encoder;
struct lzma_lz_encoder_s {
	enum {
		SEQ_INIT,
		SEQ_RUN,
		SEQ_FINISH,
		SEQ_END
	} sequence;

	bool (*process)(lzma_coder *coder, uint8_t *restrict out,
			size_t *restrict out_pos, size_t out_size);

	lzma_vli uncompressed_size;

	///////////////
	// In Window //
	///////////////

	/// Pointer to buffer with data to be compressed
	uint8_t *buffer;

	/// Total size of the allocated buffer (that is, including all
	/// the extra space)
	size_t size;

	/// Match finders store locations of matches using 32-bit integers.
	/// To avoid adjusting several megabytes of integers every time the
	/// input window is moved with move_window(), we only adjust the
	/// offset of the buffer. Thus, buffer[match_finder_pos - offset]
	/// is the byte pointed by match_finder_pos.
	size_t offset;

	/// buffer[read_pos] is the current byte.
	size_t read_pos;

	/// As long as read_pos is less than read_limit, there is enough
	/// input available in buffer for at least one encoding loop.
	///
	/// Because of the stateful API, read_limit may and will get greater
	/// than read_pos quite often. This is taken into account when
	/// calculating the value for keep_size_after.
	size_t read_limit;

	/// buffer[write_pos] is the first byte that doesn't contain valid
	/// uncompressed data; that is, the next input byte will be copied
	/// to buffer[write_pos].
	size_t write_pos;

	/// When read_pos >= must_move_pos, move_window() must be called
	/// to make more space for the input data.
	size_t must_move_pos;

	/// Number of bytes that must be kept available in our input history.
	/// That is, once keep_size_before bytes have been processed,
	/// buffer[read_pos - keep_size_before] is the oldest byte that
	/// must be available for reading.
	size_t keep_size_before;

	/// Number of bytes that must be kept in buffer after read_pos.
	/// That is, read_pos <= write_pos - keep_size_after as long as
	/// stream_end_was_reached is false (once it is true, read_pos
	/// is allowed to reach write_pos).
	size_t keep_size_after;

	/// This is set to true once the last byte of the input data has
	/// been copied to buffer.
	bool stream_end_was_reached;

	//////////////////
	// Match Finder //
	//////////////////

	// Pointers to match finder functions
	void (*get_matches)(lzma_lz_encoder *restrict lz,
			uint32_t *restrict distances);
	void (*skip)(lzma_lz_encoder *restrict lz, uint32_t num);

	// Match finder data
	uint32_t *hash; // TODO: Check if hash aliases son
	uint32_t *son;  //       and add 'restrict' if possible.
	uint32_t cyclic_buffer_pos;
	uint32_t cyclic_buffer_size; // Must be dictionary_size + 1.
	uint32_t hash_mask;
	uint32_t cut_value;
	uint32_t hash_size_sum;
	uint32_t num_items;
	uint32_t match_max_len;
};


#define LZMA_LZ_ENCODER_INIT \
	(lzma_lz_encoder){ \
		.buffer = NULL, \
		.size = 0, \
		.hash = NULL, \
		.num_items = 0, \
	}


/// Calculates
extern uint32_t lzma_lz_encoder_hash_properties(lzma_match_finder match_finder,
		uint32_t history_size, uint32_t *restrict hash_mask,
		uint32_t *restrict hash_size_sum,
		uint32_t *restrict num_items);

// NOTE: liblzma doesn't use callback API like LZMA SDK does. The caller
// must make sure that keep_size_after is big enough for single encoding pass
// i.e. keep_size_after >= maximum number of bytes possibly needed after
// the current position between calls to lzma_lz_read().
extern lzma_ret lzma_lz_encoder_reset(lzma_lz_encoder *lz,
		lzma_allocator *allocator,
		bool (*process)(lzma_coder *coder, uint8_t *restrict out,
			size_t *restrict out_pos, size_t out_size),
		lzma_vli uncompressed_size,
		size_t history_size, size_t additional_buffer_before,
		size_t match_max_len, size_t additional_buffer_after,
		lzma_match_finder match_finder, uint32_t match_finder_cycles,
		const uint8_t *preset_dictionary,
		size_t preset_dictionary_size);

/// Frees memory allocated for in window and match finder buffers.
extern void lzma_lz_encoder_end(
		lzma_lz_encoder *lz, lzma_allocator *allocator);

extern lzma_ret lzma_lz_encode(lzma_coder *coder,
		lzma_allocator *allocator lzma_attribute((unused)),
		const uint8_t *restrict in, size_t *restrict in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size,
		lzma_action action);

/// This should not be called directly, but only via move_pos() macro.
extern void lzma_lz_encoder_normalize(lzma_lz_encoder *lz);

#endif