src/liblzma/lz/lz_decoder.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

///////////////////////////////////////////////////////////////////////////////
//
/// \file       lz_decoder.h
/// \brief      LZ out window
//
//  Copyright (C) 1999-2006 Igor Pavlov
//  Copyright (C) 2007 Lasse Collin
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
///////////////////////////////////////////////////////////////////////////////

#ifndef LZMA_LZ_OUT_H
#define LZMA_LZ_OUT_H

#include "common.h"


/// Get a byte from the history buffer.
#define lz_get_byte(lz, distance) \
	((distance) < (lz).pos \
		? (lz).dict[(lz).pos - (distance) - 1] \
		: (lz).dict[(lz).pos - (distance) - 1 + (lz).end])


#define LZMA_LZ_DECODER_INIT \
	(lzma_lz_decoder){ .dict = NULL, .size = 0, .match_max_len = 0 }


typedef struct {
	/// Function to do the actual decoding (LZMA or Inflate)
	bool (*process)(lzma_coder *restrict coder, const uint8_t *restrict in,
			size_t *restrict in_pos, size_t size_in,
			bool has_safe_buffer);

	/// Pointer to dictionary (history) buffer.
	/// \note Not 'restrict' because can alias next_out.
	uint8_t *dict;

	/// Next write goes to dict[pos].
	size_t pos;

	/// Next byte to flush is buffer[start].
	size_t start;

	/// First byte to not flush is buffer[end].
	size_t end;

	/// First position to which data must not be written.
	size_t limit;

	/// True if dictionary has needed wrapping.
	bool is_full;

	/// True if process() has detected End of Payload Marker.
	bool eopm_detected;

	/// True if the next coder in the chain has returned LZMA_STREAM_END.
	bool next_finished;

	/// True if the LZ decoder (e.g. LZMA) has detected End of Payload
	/// Marker. This may become true before next_finished becomes true.
	bool this_finished;

	/// When pos >= must_flush_pos, we must not call process().
	size_t must_flush_pos;

	/// Maximum number of bytes that a single decoding loop inside
	/// process() can produce data into dict. This amount is kept
	/// always available at dict + pos i.e. it is safe to write a byte
	/// to dict[pos + match_max_len - 1].
	size_t match_max_len;

	/// Number of bytes allocated to dict.
	size_t size;

	/// Requested size of the dictionary. This is needed because we avoid
	/// using extremely tiny history buffers.
	size_t requested_size;

	/// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if unknown.
	lzma_vli uncompressed_size;

	/// Number of bytes currently in temp[].
	size_t temp_size;

	/// Temporary buffer needed when
	/// 1) we cannot make the input buffer completely empty; or
	/// 2) we are not the last filter in the chain.
	uint8_t temp[LZMA_BUFFER_SIZE];

} lzma_lz_decoder;


/////////////////////////
// Function prototypes //
/////////////////////////

extern lzma_ret lzma_lz_decoder_reset(lzma_lz_decoder *lz,
		lzma_allocator *allocator, bool (*process)(
			lzma_coder *restrict coder, const uint8_t *restrict in,
			size_t *restrict in_pos, size_t in_size,
			bool has_safe_buffer),
		lzma_vli uncompressed_size,
		size_t history_size, size_t match_max_len);

extern lzma_ret lzma_lz_decode(lzma_coder *coder, lzma_allocator *allocator,
		const uint8_t *restrict in, size_t *restrict in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size,
		lzma_action action);

/// Deallocates the history buffer if one exists.
extern void lzma_lz_decoder_end(
		lzma_lz_decoder *lz, lzma_allocator *allocator);

//////////////////////
// Inline functions //
//////////////////////

// Repeat a block of data from the history. Because memcpy() is faster
// than copying byte by byte in a loop, the copying process gets split
// into three cases:
// 1. distance < length
//    Source and target areas overlap, thus we can't use memcpy()
//    (nor memmove()) safely.
//    TODO: If this is common enough, it might be worth optimizing this
//    more e.g. by checking if distance > sizeof(uint8_t*) and using
//    memcpy in small chunks.
// 2. distance < pos
//    This is the easiest and the fastest case. The block being copied
//    is a contiguous piece in the history buffer. The buffer offset
//    doesn't need wrapping.
// 3. distance >= pos
//    We need to wrap the position, because otherwise we would try copying
//    behind the first byte of the allocated buffer. It is possible that
//    the block is fragmeneted into two pieces, thus we might need to call
//    memcpy() twice.
// NOTE: The function using this macro must ensure that length is positive
// and that distance is FIXME
static inline bool
lzma_lz_out_repeat(lzma_lz_decoder *lz, size_t distance, size_t length)
{
	// Validate offset of the block to be repeated. It doesn't
	// make sense to copy data behind the beginning of the stream.
	// Leaving this check away would lead to a security problem,
	// in which e.g. the data of the previously decoded file(s)
	// would be leaked (or whatever happens to be in unused
	// part of the dictionary buffer).
	if (distance >= lz->pos && !lz->is_full)
		return false;

	// It also doesn't make sense to copy data farer than
	// the dictionary size.
	if (distance >= lz->requested_size)
		return false;

	// The caller must have checked these!
	assert(distance <= lz->size);
	assert(length > 0);
	assert(length <= lz->match_max_len);

	// Copy the amount of data requested by the decoder.
	if (distance < length) {
		// Source and target areas overlap, thus we can't use
		// memcpy() nor even memmove() safely. :-(
		// TODO: Copying byte by byte is slow. It might be
		// worth optimizing this more if this case is common.
		do {
			lz->dict[lz->pos] = lz_get_byte(*lz, distance);
			++lz->pos;
		} while (--length > 0);

	} else if (distance < lz->pos) {
		// The easiest and fastest case
		memcpy(lz->dict + lz->pos,
				lz->dict + lz->pos - distance - 1,
				length);
		lz->pos += length;

	} else {
		// The bigger the dictionary, the more rare this
		// case occurs. We need to "wrap" the dict, thus
		// we might need two memcpy() to copy all the data.
		assert(lz->is_full);
		const uint32_t copy_pos = lz->pos - distance - 1 + lz->end;
		uint32_t copy_size = lz->end - copy_pos;

		if (copy_size < length) {
			memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
					copy_size);
			lz->pos += copy_size;
			copy_size = length - copy_size;
			memcpy(lz->dict + lz->pos, lz->dict, copy_size);
			lz->pos += copy_size;
		} else {
			memcpy(lz->dict + lz->pos, lz->dict + copy_pos,
					length);
			lz->pos += length;
		}
	}

	return true;
}

#endif