diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-01-14 13:39:54 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-01-14 13:39:54 +0200 |
commit | e22b37968d153683fec61ad37b6b160cb7ca4ddc (patch) | |
tree | d9631e988ead9de0fcac67a9abc803a37324e3a6 /src/liblzma/lz | |
parent | Added one assert() to process.c of the command line tool. (diff) | |
download | xz-e22b37968d153683fec61ad37b6b160cb7ca4ddc.tar.xz |
Major changes to LZ encoder, LZMA encoder, and range encoder.
These changes implement support for LZMA_SYNC_FLUSH in LZMA
encoder, and move the temporary buffer needed by range encoder
from lzma_range_encoder structure to lzma_lz_encoder.
Diffstat (limited to 'src/liblzma/lz')
-rw-r--r-- | src/liblzma/lz/lz_encoder.c | 138 | ||||
-rw-r--r-- | src/liblzma/lz/lz_encoder.h | 17 |
2 files changed, 130 insertions, 25 deletions
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 629f9df2..8d2277ec 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -141,8 +141,9 @@ lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, const uint8_t *preset_dictionary, size_t preset_dictionary_size) { - // Set uncompressed size. + lz->sequence = SEQ_RUN; lz->uncompressed_size = uncompressed_size; + lz->temp_size = 0; /////////////// // In Window // @@ -187,7 +188,6 @@ lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, lz->read_pos = 0; lz->read_limit = 0; lz->write_pos = 0; - lz->stream_end_was_reached = false; ////////////////// @@ -368,35 +368,59 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, lzma_action action) { assert(coder->lz.read_pos <= coder->lz.write_pos); - lzma_ret ret; // Move the sliding window if needed. if (coder->lz.read_pos >= coder->lz.size - coder->lz.keep_size_after) move_window(&coder->lz); + size_t in_used; + lzma_ret ret; if (coder->next.code == NULL) { // Not using a filter, simply memcpy() as much as possible. - bufcpy(in, in_pos, in_size, coder->lz.buffer, + in_used = bufcpy(in, in_pos, in_size, coder->lz.buffer, &coder->lz.write_pos, coder->lz.size); - if (action == LZMA_FINISH && *in_pos == in_size) + if (action != LZMA_RUN && *in_pos == in_size) ret = LZMA_STREAM_END; else ret = LZMA_OK; } else { + const size_t in_start = *in_pos; ret = coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, coder->lz.buffer, &coder->lz.write_pos, coder->lz.size, action); + in_used = *in_pos - in_start; } - // If end of stream has been reached, we allow the encoder to process - // all the input (that is, read_pos is allowed to reach write_pos). - // Otherwise we keep keep_size_after bytes available as prebuffer. + assert(coder->lz.uncompressed_size >= in_used); + if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->lz.uncompressed_size -= in_used; + + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. if (ret == LZMA_STREAM_END) { - coder->lz.stream_end_was_reached = true; + assert(*in_pos == in_size); coder->lz.read_limit = coder->lz.write_pos; + ret = LZMA_OK; + + switch (action) { + case LZMA_SYNC_FLUSH: + coder->lz.sequence = SEQ_FLUSH; + break; + + case LZMA_FINISH: + coder->lz.sequence = SEQ_FINISH; + break; + + default: + assert(0); + ret = LZMA_PROG_ERROR; + break; + } } else if (coder->lz.write_pos > coder->lz.keep_size_after) { // This needs to be done conditionally, because if we got @@ -406,6 +430,19 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, - coder->lz.keep_size_after; } + // Switch to finishing mode if we have got all the input data. + // lzma_lz_encode() won't return LZMA_STREAM_END until LZMA_FINISH + // is used. + // + // NOTE: When LZMA is used together with other filters, it is possible + // that coder->lz.sequence gets set to SEQ_FINISH before the next + // encoder has returned LZMA_STREAM_END. This is somewhat ugly, but + // works correctly, because the next encoder cannot have any more + // output left to be produced. If it had, then our known Uncompressed + // Size would be invalid, which would mean that we have a bad bug. + if (ret == LZMA_OK && coder->lz.uncompressed_size == 0) + coder->lz.sequence = SEQ_FINISH; + return ret; } @@ -417,20 +454,81 @@ lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - while (*out_pos < out_size - && (*in_pos < in_size || action == LZMA_FINISH)) { - // Fill the input window if there is no more usable data. - if (!coder->lz.stream_end_was_reached && coder->lz.read_pos - >= coder->lz.read_limit) { - const lzma_ret ret = fill_window(coder, allocator, - in, in_pos, in_size, action); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - return ret; + // Flush the temporary output buffer, which may be used when the + // encoder runs of out of space in primary output buffer (the out, + // *out_pos, and out_size variables). + if (coder->lz.temp_size > 0) { + const size_t out_avail = out_size - *out_pos; + if (out_avail < coder->lz.temp_size) { + // Cannot copy everything. Copy as much as possible + // and move the data in lz.temp to the beginning of + // that buffer. + memcpy(out + *out_pos, coder->lz.temp, out_avail); + *out_pos += out_avail; + memmove(coder->lz.temp, coder->lz.temp + out_avail, + coder->lz.temp_size - out_avail); + coder->lz.temp_size -= out_avail; + return LZMA_OK; } + // We can copy everything from coder->lz.temp to out. + memcpy(out + *out_pos, coder->lz.temp, coder->lz.temp_size); + *out_pos += coder->lz.temp_size; + coder->lz.temp_size = 0; + } + + if (coder->lz.sequence == SEQ_FLUSH_END) { + // During an earlier call to this function, flushing was + // otherwise finished except some data was left pending + // in coder->lz.buffer. Now we have copied all that data + // to the output buffer and can return LZMA_STREAM_END. + coder->lz.sequence = SEQ_RUN; + assert(action == LZMA_SYNC_FLUSH); + return LZMA_STREAM_END; + } + + if (coder->lz.sequence == SEQ_END) { + // This is like the above flushing case, but for finishing + // the encoding. + // + // NOTE: action is not necesarily LZMA_FINISH; it can + // be LZMA_SYNC_FLUSH too in case it is used at the + // end of the stream with known Uncompressed Size. + return action != LZMA_RUN ? LZMA_STREAM_END : LZMA_OK; + } + + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->lz.buffer if needed. + if (coder->lz.sequence == SEQ_RUN + && coder->lz.read_pos >= coder->lz.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + // Encode - if (coder->lz.process(coder, out, out_pos, out_size)) - return LZMA_STREAM_END; + if (coder->lz.process(coder, out, out_pos, out_size)) { + if (coder->lz.sequence == SEQ_FLUSH) { + assert(action == LZMA_SYNC_FLUSH); + if (coder->lz.temp_size == 0) { + // Flushing was finished successfully. + coder->lz.sequence = SEQ_RUN; + } else { + // Flushing was otherwise finished, + // except that some data was left + // into coder->lz.buffer. + coder->lz.sequence = SEQ_FLUSH_END; + } + } else { + // NOTE: action may be LZMA_RUN here in case + // Uncompressed Size is known and we have + // processed all the data already. + assert(coder->lz.sequence == SEQ_FINISH); + coder->lz.sequence = SEQ_END; + } + + return action != LZMA_RUN && coder->lz.temp_size == 0 + ? LZMA_STREAM_END : LZMA_OK; + } } return LZMA_OK; diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h index fe94618b..11d12722 100644 --- a/src/liblzma/lz/lz_encoder.h +++ b/src/liblzma/lz/lz_encoder.h @@ -24,11 +24,15 @@ #include "common.h" +#define LZMA_LZ_TEMP_SIZE 64 + + typedef struct lzma_lz_encoder_s lzma_lz_encoder; struct lzma_lz_encoder_s { enum { - SEQ_INIT, SEQ_RUN, + SEQ_FLUSH, + SEQ_FLUSH_END, SEQ_FINISH, SEQ_END } sequence; @@ -36,8 +40,15 @@ struct lzma_lz_encoder_s { bool (*process)(lzma_coder *coder, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size); + /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if using EOPM. We need + /// to track Uncompressed Size to prevent writing flush marker to the + /// very end of stream that doesn't use EOPM. lzma_vli uncompressed_size; + /// Temporary buffer for range encoder. + uint8_t temp[LZMA_LZ_TEMP_SIZE]; + size_t temp_size; + /////////////// // In Window // /////////////// @@ -84,10 +95,6 @@ struct lzma_lz_encoder_s { /// is allowed to reach write_pos). size_t keep_size_after; - /// This is set to true once the last byte of the input data has - /// been copied to buffer. - bool stream_end_was_reached; - ////////////////// // Match Finder // ////////////////// |