aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lzma/lzma2_encoder.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/liblzma/lzma/lzma2_encoder.c')
-rw-r--r--src/liblzma/lzma/lzma2_encoder.c406
1 files changed, 406 insertions, 0 deletions
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
new file mode 100644
index 00000000..b2cd176b
--- /dev/null
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -0,0 +1,406 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lzma2_encoder.c
+/// \brief LZMA2 encoder
+//
+// Copyright (C) 1999-2008 Igor Pavlov
+// Copyright (C) 2008 Lasse Collin
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lz_encoder.h"
+#include "lzma_encoder.h"
+#include "fastpos.h"
+#include "lzma2_encoder.h"
+
+
+/// Maximum number of bytes of actual data per chunk (no headers)
+#define LZMA2_CHUNK_MAX (UINT32_C(1) << 16)
+
+/// Maximum uncompressed size of LZMA chunk (no headers)
+#define LZMA2_UNCOMPRESSED_MAX (UINT32_C(1) << 21)
+
+/// Maximum size of LZMA2 headers
+#define LZMA2_HEADER_MAX 6
+
+/// Size of a header for uncompressed chunk
+#define LZMA2_HEADER_UNCOMPRESSED 3
+
+
+struct lzma_coder_s {
+ enum {
+ SEQ_INIT,
+ SEQ_LZMA_ENCODE,
+ SEQ_LZMA_COPY,
+ SEQ_UNCOMPRESSED_HEADER,
+ SEQ_UNCOMPRESSED_COPY,
+ } sequence;
+
+ /// LZMA encoder
+ lzma_coder *lzma;
+
+ /// If this is not NULL, we will check new options from this
+ /// structure when starting a new chunk.
+ const lzma_options_lzma *opt_new;
+
+ /// LZMA options currently in use.
+ lzma_options_lzma opt_cur;
+
+ bool need_properties;
+ bool need_state_reset;
+ bool need_dictionary_reset;
+
+ /// Uncompressed size of a chunk
+ size_t uncompressed_size;
+
+ /// Compressed size of a chunk (excluding headers); this is also used
+ /// to indicate the end of buf[] in SEQ_LZMA_COPY.
+ size_t compressed_size;
+
+ /// Read position in buf[]
+ size_t buf_pos;
+
+ /// Buffer to hold the chunk header and LZMA compressed data
+ uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX];
+};
+
+
+static void
+lzma2_header_lzma(lzma_coder *coder)
+{
+ assert(coder->uncompressed_size > 0);
+ assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
+ assert(coder->compressed_size > 0);
+ assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
+
+ size_t pos;
+
+ if (coder->need_properties) {
+ pos = 0;
+
+ if (coder->need_dictionary_reset)
+ coder->buf[pos] = 0x80 + (3 << 5);
+ else
+ coder->buf[pos] = 0x80 + (2 << 5);
+ } else {
+ pos = 1;
+
+ if (coder->need_state_reset)
+ coder->buf[pos] = 0x80 + (1 << 5);
+ else
+ coder->buf[pos] = 0x80;
+ }
+
+ // Set the start position for copying.
+ coder->buf_pos = pos;
+
+ // Uncompressed size
+ size_t size = coder->uncompressed_size - 1;
+ coder->buf[pos++] += size >> 16;
+ coder->buf[pos++] = (size >> 8) & 0xFF;
+ coder->buf[pos++] = size & 0xFF;
+
+ // Compressed size
+ size = coder->compressed_size - 1;
+ coder->buf[pos++] = size >> 8;
+ coder->buf[pos++] = size & 0xFF;
+
+ // Properties, if needed
+ if (coder->need_properties)
+ lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos);
+
+ coder->need_properties = false;
+ coder->need_state_reset = false;
+ coder->need_dictionary_reset = false;
+
+ // The copying code uses coder->compressed_size to indicate the end
+ // of coder->buf[], so we need add the maximum size of the header here.
+ coder->compressed_size += LZMA2_HEADER_MAX;
+
+ return;
+}
+
+
+static void
+lzma2_header_uncompressed(lzma_coder *coder)
+{
+ assert(coder->uncompressed_size > 0);
+ assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX);
+
+ // If this is the first chunk, we need to include dictionary
+ // reset indicator.
+ if (coder->need_dictionary_reset)
+ coder->buf[0] = 1;
+ else
+ coder->buf[0] = 2;
+
+ coder->need_dictionary_reset = false;
+
+ // "Compressed" size
+ coder->buf[1] = (coder->uncompressed_size - 1) >> 8;
+ coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF;
+
+ // Set the start position for copying.
+ coder->buf_pos = 0;
+ return;
+}
+
+
+static lzma_ret
+lzma2_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size)
+{
+ while (*out_pos < out_size)
+ switch (coder->sequence) {
+ case SEQ_INIT:
+ // If there's no input left and we are flushing or finishing,
+ // don't start a new chunk.
+ if (mf_unencoded(mf) == 0) {
+ // Write end of payload marker if finishing.
+ if (mf->action == LZMA_FINISH)
+ out[(*out_pos)++] = 0;
+
+ return mf->action == LZMA_RUN
+ ? LZMA_OK : LZMA_STREAM_END;
+ }
+
+ // Look if there are new options. At least for now,
+ // only lc/lp/pb can be changed.
+ if (coder->opt_new != NULL
+ && (coder->opt_cur.literal_context_bits
+ != coder->opt_new->literal_context_bits
+ || coder->opt_cur.literal_pos_bits
+ != coder->opt_new->literal_pos_bits
+ || coder->opt_cur.pos_bits
+ != coder->opt_new->pos_bits)) {
+ // Options have been changed, copy them to opt_cur.
+ coder->opt_cur.literal_context_bits
+ = coder->opt_new->literal_context_bits;
+ coder->opt_cur.literal_pos_bits
+ = coder->opt_new->literal_pos_bits;
+ coder->opt_cur.pos_bits
+ = coder->opt_new->pos_bits;
+
+ // We need to write the new options and reset
+ // the encoder state.
+ coder->need_properties = true;
+ coder->need_state_reset = true;
+ }
+
+ if (coder->need_state_reset)
+ lzma_lzma_encoder_reset(coder->lzma, &coder->opt_cur);
+
+ coder->uncompressed_size = 0;
+ coder->compressed_size = 0;
+ coder->sequence = SEQ_LZMA_ENCODE;
+
+ // Fall through
+
+ case SEQ_LZMA_ENCODE: {
+ // Calculate how much more uncompressed data this chunk
+ // could accept.
+ const uint32_t left = LZMA2_UNCOMPRESSED_MAX
+ - coder->uncompressed_size;
+ uint32_t limit;
+
+ if (left < mf->match_len_max) {
+ // Must flush immediatelly since the next LZMA symbol
+ // could make the uncompressed size of the chunk too
+ // big.
+ limit = 0;
+ } else {
+ // Calculate maximum read_limit that is OK from point
+ // of view of LZMA2 chunk size.
+ limit = mf->read_pos - mf->read_ahead
+ + left - mf->match_len_max;
+ }
+
+ // Save the start position so that we can update
+ // coder->uncompressed_size.
+ const uint32_t read_start = mf->read_pos - mf->read_ahead;
+
+ // Call the LZMA encoder until the chunk is finished.
+ const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf,
+ coder->buf + LZMA2_HEADER_MAX,
+ &coder->compressed_size,
+ LZMA2_CHUNK_MAX, limit);
+
+ coder->uncompressed_size += mf->read_pos - mf->read_ahead
+ - read_start;
+
+ assert(coder->compressed_size <= LZMA2_CHUNK_MAX);
+ assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX);
+
+ if (ret != LZMA_STREAM_END)
+ return LZMA_OK;
+
+ // See if the chunk compressed. If it didn't, we encode it
+ // as uncompressed chunk. This saves a few bytes of space
+ // and makes decoding faster.
+ if (coder->compressed_size >= coder->uncompressed_size) {
+ coder->uncompressed_size += mf->read_ahead;
+ assert(coder->uncompressed_size
+ <= LZMA2_UNCOMPRESSED_MAX);
+ mf->read_ahead = 0;
+ lzma2_header_uncompressed(coder);
+ coder->need_state_reset = true;
+ coder->sequence = SEQ_UNCOMPRESSED_HEADER;
+ break;
+ }
+
+ // The chunk did compress at least by one byte, so we store
+ // the chunk as LZMA.
+ lzma2_header_lzma(coder);
+
+ coder->sequence = SEQ_LZMA_COPY;
+ }
+
+ // Fall through
+
+ case SEQ_LZMA_COPY:
+ // Copy the compressed chunk along its headers to the
+ // output buffer.
+ lzma_bufcpy(coder->buf, &coder->buf_pos,
+ coder->compressed_size,
+ out, out_pos, out_size);
+ if (coder->buf_pos != coder->compressed_size)
+ return LZMA_OK;
+
+ coder->sequence = SEQ_INIT;
+ break;
+
+ case SEQ_UNCOMPRESSED_HEADER:
+ // Copy the three-byte header to indicate uncompressed chunk.
+ lzma_bufcpy(coder->buf, &coder->buf_pos,
+ LZMA2_HEADER_UNCOMPRESSED,
+ out, out_pos, out_size);
+ if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED)
+ return LZMA_OK;
+
+ coder->sequence = SEQ_UNCOMPRESSED_COPY;
+
+ // Fall through
+
+ case SEQ_UNCOMPRESSED_COPY:
+ // Copy the uncompressed data as is from the dictionary
+ // to the output buffer.
+ mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size);
+ if (coder->uncompressed_size != 0)
+ return LZMA_OK;
+
+ coder->sequence = SEQ_INIT;
+ break;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+lzma2_encoder_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+ lzma_free(coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator,
+ const void *options, lzma_lz_options *lz_options)
+{
+ if (options == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (lz->coder == NULL) {
+ lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+ if (lz->coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ lz->code = &lzma2_encode;
+ lz->end = &lzma2_encoder_end;
+
+ lz->coder->lzma = NULL;
+ }
+
+ lz->coder->sequence = SEQ_INIT;
+ lz->coder->need_properties = true;
+ lz->coder->need_state_reset = false;
+ lz->coder->need_dictionary_reset = true;
+
+ lz->coder->opt_cur = *(const lzma_options_lzma *)(options);
+ lz->coder->opt_new = lz->coder->opt_cur.persistent
+ ? options : NULL;
+
+ // Initialize LZMA encoder
+ return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator,
+ &lz->coder->opt_cur, lz_options));
+
+ // Make sure that we will always have enough history available in
+ // case we need to use uncompressed chunks. They are used when the
+ // compressed size of a chunk is not smaller than the uncompressed
+ // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes
+ // history available.
+ if (lz_options->before_size + lz_options->dictionary_size
+ < LZMA2_CHUNK_MAX)
+ lz_options->before_size = LZMA2_CHUNK_MAX
+ - lz_options->dictionary_size;
+
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_lzma2_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return lzma_lz_encoder_init(
+ next, allocator, filters, &lzma2_encoder_init);
+}
+
+
+extern uint64_t
+lzma_lzma2_encoder_memusage(const void *options)
+{
+ const uint64_t lzma_memusage = lzma_lzma_encoder_memusage(options);
+ if (lzma_memusage == UINT64_MAX)
+ return UINT64_MAX;
+
+ return sizeof(lzma_coder) + lzma_memusage;
+}
+
+
+extern lzma_ret
+lzma_lzma2_props_encode(const void *options, uint8_t *out)
+{
+ const lzma_options_lzma *const opt = options;
+ uint32_t d = MAX(opt->dictionary_size, LZMA_DICTIONARY_SIZE_MIN);
+
+ // Round up to to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending
+ // on which one is the next:
+ --d;
+ d |= d >> 2;
+ d |= d >> 3;
+ d |= d >> 4;
+ d |= d >> 8;
+ d |= d >> 16;
+
+ // Get the highest two bits using the proper encoding:
+ if (d == UINT32_MAX)
+ out[0] = 40;
+ else
+ out[0] = get_pos_slot(d + 1) - 24;
+
+ return LZMA_OK;
+}