diff options
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/lz/lz_encoder.c | 138 | ||||
-rw-r--r-- | src/liblzma/lz/lz_encoder.h | 17 | ||||
-rw-r--r-- | src/liblzma/lzma/lzma_encoder.c | 74 |
3 files changed, 173 insertions, 56 deletions
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 629f9df2..8d2277ec 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -141,8 +141,9 @@ lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, const uint8_t *preset_dictionary, size_t preset_dictionary_size) { - // Set uncompressed size. + lz->sequence = SEQ_RUN; lz->uncompressed_size = uncompressed_size; + lz->temp_size = 0; /////////////// // In Window // @@ -187,7 +188,6 @@ lzma_lz_encoder_reset(lzma_lz_encoder *lz, lzma_allocator *allocator, lz->read_pos = 0; lz->read_limit = 0; lz->write_pos = 0; - lz->stream_end_was_reached = false; ////////////////// @@ -368,35 +368,59 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, size_t *in_pos, size_t in_size, lzma_action action) { assert(coder->lz.read_pos <= coder->lz.write_pos); - lzma_ret ret; // Move the sliding window if needed. if (coder->lz.read_pos >= coder->lz.size - coder->lz.keep_size_after) move_window(&coder->lz); + size_t in_used; + lzma_ret ret; if (coder->next.code == NULL) { // Not using a filter, simply memcpy() as much as possible. - bufcpy(in, in_pos, in_size, coder->lz.buffer, + in_used = bufcpy(in, in_pos, in_size, coder->lz.buffer, &coder->lz.write_pos, coder->lz.size); - if (action == LZMA_FINISH && *in_pos == in_size) + if (action != LZMA_RUN && *in_pos == in_size) ret = LZMA_STREAM_END; else ret = LZMA_OK; } else { + const size_t in_start = *in_pos; ret = coder->next.code(coder->next.coder, allocator, in, in_pos, in_size, coder->lz.buffer, &coder->lz.write_pos, coder->lz.size, action); + in_used = *in_pos - in_start; } - // If end of stream has been reached, we allow the encoder to process - // all the input (that is, read_pos is allowed to reach write_pos). - // Otherwise we keep keep_size_after bytes available as prebuffer. + assert(coder->lz.uncompressed_size >= in_used); + if (coder->lz.uncompressed_size != LZMA_VLI_VALUE_UNKNOWN) + coder->lz.uncompressed_size -= in_used; + + // If end of stream has been reached or flushing completed, we allow + // the encoder to process all the input (that is, read_pos is allowed + // to reach write_pos). Otherwise we keep keep_size_after bytes + // available as prebuffer. if (ret == LZMA_STREAM_END) { - coder->lz.stream_end_was_reached = true; + assert(*in_pos == in_size); coder->lz.read_limit = coder->lz.write_pos; + ret = LZMA_OK; + + switch (action) { + case LZMA_SYNC_FLUSH: + coder->lz.sequence = SEQ_FLUSH; + break; + + case LZMA_FINISH: + coder->lz.sequence = SEQ_FINISH; + break; + + default: + assert(0); + ret = LZMA_PROG_ERROR; + break; + } } else if (coder->lz.write_pos > coder->lz.keep_size_after) { // This needs to be done conditionally, because if we got @@ -406,6 +430,19 @@ fill_window(lzma_coder *coder, lzma_allocator *allocator, const uint8_t *in, - coder->lz.keep_size_after; } + // Switch to finishing mode if we have got all the input data. + // lzma_lz_encode() won't return LZMA_STREAM_END until LZMA_FINISH + // is used. + // + // NOTE: When LZMA is used together with other filters, it is possible + // that coder->lz.sequence gets set to SEQ_FINISH before the next + // encoder has returned LZMA_STREAM_END. This is somewhat ugly, but + // works correctly, because the next encoder cannot have any more + // output left to be produced. If it had, then our known Uncompressed + // Size would be invalid, which would mean that we have a bad bug. + if (ret == LZMA_OK && coder->lz.uncompressed_size == 0) + coder->lz.sequence = SEQ_FINISH; + return ret; } @@ -417,20 +454,81 @@ lzma_lz_encode(lzma_coder *coder, lzma_allocator *allocator, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size, lzma_action action) { - while (*out_pos < out_size - && (*in_pos < in_size || action == LZMA_FINISH)) { - // Fill the input window if there is no more usable data. - if (!coder->lz.stream_end_was_reached && coder->lz.read_pos - >= coder->lz.read_limit) { - const lzma_ret ret = fill_window(coder, allocator, - in, in_pos, in_size, action); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) - return ret; + // Flush the temporary output buffer, which may be used when the + // encoder runs of out of space in primary output buffer (the out, + // *out_pos, and out_size variables). + if (coder->lz.temp_size > 0) { + const size_t out_avail = out_size - *out_pos; + if (out_avail < coder->lz.temp_size) { + // Cannot copy everything. Copy as much as possible + // and move the data in lz.temp to the beginning of + // that buffer. + memcpy(out + *out_pos, coder->lz.temp, out_avail); + *out_pos += out_avail; + memmove(coder->lz.temp, coder->lz.temp + out_avail, + coder->lz.temp_size - out_avail); + coder->lz.temp_size -= out_avail; + return LZMA_OK; } + // We can copy everything from coder->lz.temp to out. + memcpy(out + *out_pos, coder->lz.temp, coder->lz.temp_size); + *out_pos += coder->lz.temp_size; + coder->lz.temp_size = 0; + } + + if (coder->lz.sequence == SEQ_FLUSH_END) { + // During an earlier call to this function, flushing was + // otherwise finished except some data was left pending + // in coder->lz.buffer. Now we have copied all that data + // to the output buffer and can return LZMA_STREAM_END. + coder->lz.sequence = SEQ_RUN; + assert(action == LZMA_SYNC_FLUSH); + return LZMA_STREAM_END; + } + + if (coder->lz.sequence == SEQ_END) { + // This is like the above flushing case, but for finishing + // the encoding. + // + // NOTE: action is not necesarily LZMA_FINISH; it can + // be LZMA_SYNC_FLUSH too in case it is used at the + // end of the stream with known Uncompressed Size. + return action != LZMA_RUN ? LZMA_STREAM_END : LZMA_OK; + } + + while (*out_pos < out_size + && (*in_pos < in_size || action != LZMA_RUN)) { + // Read more data to coder->lz.buffer if needed. + if (coder->lz.sequence == SEQ_RUN + && coder->lz.read_pos >= coder->lz.read_limit) + return_if_error(fill_window(coder, allocator, + in, in_pos, in_size, action)); + // Encode - if (coder->lz.process(coder, out, out_pos, out_size)) - return LZMA_STREAM_END; + if (coder->lz.process(coder, out, out_pos, out_size)) { + if (coder->lz.sequence == SEQ_FLUSH) { + assert(action == LZMA_SYNC_FLUSH); + if (coder->lz.temp_size == 0) { + // Flushing was finished successfully. + coder->lz.sequence = SEQ_RUN; + } else { + // Flushing was otherwise finished, + // except that some data was left + // into coder->lz.buffer. + coder->lz.sequence = SEQ_FLUSH_END; + } + } else { + // NOTE: action may be LZMA_RUN here in case + // Uncompressed Size is known and we have + // processed all the data already. + assert(coder->lz.sequence == SEQ_FINISH); + coder->lz.sequence = SEQ_END; + } + + return action != LZMA_RUN && coder->lz.temp_size == 0 + ? LZMA_STREAM_END : LZMA_OK; + } } return LZMA_OK; diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h index fe94618b..11d12722 100644 --- a/src/liblzma/lz/lz_encoder.h +++ b/src/liblzma/lz/lz_encoder.h @@ -24,11 +24,15 @@ #include "common.h" +#define LZMA_LZ_TEMP_SIZE 64 + + typedef struct lzma_lz_encoder_s lzma_lz_encoder; struct lzma_lz_encoder_s { enum { - SEQ_INIT, SEQ_RUN, + SEQ_FLUSH, + SEQ_FLUSH_END, SEQ_FINISH, SEQ_END } sequence; @@ -36,8 +40,15 @@ struct lzma_lz_encoder_s { bool (*process)(lzma_coder *coder, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size); + /// Uncompressed Size or LZMA_VLI_VALUE_UNKNOWN if using EOPM. We need + /// to track Uncompressed Size to prevent writing flush marker to the + /// very end of stream that doesn't use EOPM. lzma_vli uncompressed_size; + /// Temporary buffer for range encoder. + uint8_t temp[LZMA_LZ_TEMP_SIZE]; + size_t temp_size; + /////////////// // In Window // /////////////// @@ -84,10 +95,6 @@ struct lzma_lz_encoder_s { /// is allowed to reach write_pos). size_t keep_size_after; - /// This is set to true once the last byte of the input data has - /// been copied to buffer. - bool stream_end_was_reached; - ////////////////// // Match Finder // ////////////////// diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c index f9c1e3fe..2c46b0c5 100644 --- a/src/liblzma/lzma/lzma_encoder.c +++ b/src/liblzma/lzma/lzma_encoder.c @@ -149,20 +149,11 @@ extern bool lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, size_t *restrict out_pos, size_t out_size) { - // Flush the range encoder's temporary buffer to out[]. - // Return immediatelly if not everything could be flushed. - if (rc_flush_buffer(&coder->rc, out, out_pos, out_size)) - return false; - - // Return immediatelly if we have already finished our work. - if (coder->lz.stream_end_was_reached - && coder->is_initialized - && coder->lz.read_pos == coder->lz.write_pos - && coder->additional_offset == 0) - return true; +#define rc_buffer coder->lz.temp +#define rc_buffer_size coder->lz.temp_size // Local copies - rc_to_local(coder->rc); + lzma_range_encoder rc = coder->rc; size_t out_pos_local = *out_pos; const uint32_t pos_mask = coder->pos_mask; const bool best_compression = coder->best_compression; @@ -170,13 +161,30 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, // Initialize the stream if no data has been encoded yet. if (!coder->is_initialized) { if (coder->lz.read_pos == coder->lz.read_limit) { - // Cannot initialize, because there is no input data. - if (!coder->lz.stream_end_was_reached) + switch (coder->lz.sequence) { + case SEQ_RUN: + // Cannot initialize, because there is + // no input data. return false; - // If we get here, we are encoding an empty file. - // Initialization is skipped completely. - assert(coder->lz.write_pos == coder->lz.read_pos); + case SEQ_FLUSH: + // Nothing to flush. There cannot be a flush + // marker when no data has been processed + // yet (file format doesn't allow it, and + // it would be just waste of space). + return true; + + case SEQ_FINISH: + // We are encoding an empty file. No need + // to initialize the encoder. + assert(coder->lz.write_pos == coder->lz.read_pos); + break; + + default: + // We never get here. + assert(0); + return true; + } } else { // Do the actual initialization. @@ -214,9 +222,10 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, // Check that there is some input to process. if (coder->lz.read_pos >= coder->lz.read_limit) { - // If end of input has been reached, we must keep - // encoding until additional_offset becomes zero. - if (!coder->lz.stream_end_was_reached + // If flushing or finishing, we must keep encoding + // until additional_offset becomes zero to make + // all the input available at output. + if (coder->lz.sequence == SEQ_RUN || coder->additional_offset == 0) break; } @@ -224,7 +233,7 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, assert(coder->lz.read_pos <= coder->lz.write_pos); #ifndef NDEBUG - if (coder->lz.stream_end_was_reached) { + if (coder->lz.sequence != SEQ_RUN) { assert(coder->lz.read_limit == coder->lz.write_pos); } else { assert(coder->lz.read_limit + coder->lz.keep_size_after @@ -363,19 +372,21 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, // Check if everything is done. bool all_done = false; - if (coder->lz.stream_end_was_reached + if (coder->lz.sequence != SEQ_RUN && coder->lz.read_pos == coder->lz.write_pos && coder->additional_offset == 0) { - // Write end of stream marker. It is encoded as a match with - // distance of UINT32_MAX. Match length is needed but it is - // ignored by the decoder. - if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN) { + if (coder->lz.uncompressed_size == LZMA_VLI_VALUE_UNKNOWN + || coder->lz.sequence == SEQ_FLUSH) { + // Write special marker: flush marker or end of payload + // marker. Both are encoded as a match with distance of + // UINT32_MAX. The match length codes the type of the marker. const uint32_t pos_state = coder->now_pos & pos_mask; bit_encode_1(coder->is_match[coder->state][pos_state]); bit_encode_0(coder->is_rep[coder->state]); update_match(coder->state); - const uint32_t len = MATCH_MIN_LEN; // MATCH_MAX_LEN; + const uint32_t len = coder->lz.sequence == SEQ_FLUSH + ? LEN_SPECIAL_FLUSH : LEN_SPECIAL_EOPM; length_encode(coder->len_encoder, len - MATCH_MIN_LEN, pos_state, best_compression); @@ -398,15 +409,16 @@ lzma_lzma_encode(lzma_coder *coder, uint8_t *restrict out, // the range coder to the output buffer. rc_flush(); + rc_reset(rc); + // All done. Note that some output bytes might be - // pending in coder->buffer. lzma_encode() will + // pending in coder->lz.temp. lzma_lz_encode() will // take care of those bytes. - if (rc_buffer_size == 0) - all_done = true; + all_done = true; } // Store local variables back to *coder. - rc_from_local(coder->rc); + coder->rc = rc; *out_pos = out_pos_local; return all_done; |