aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/lzma
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/liblzma/lzma/lzma_encoder.c127
-rw-r--r--src/liblzma/lzma/lzma_encoder_private.h12
2 files changed, 104 insertions, 35 deletions
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index 07d2b87b..62bb6343 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -268,6 +268,7 @@ static bool
encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
{
assert(mf_position(mf) == 0);
+ assert(coder->uncomp_size == 0);
if (mf->read_pos == mf->read_limit) {
if (mf->action == LZMA_RUN)
@@ -283,6 +284,7 @@ encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
mf->read_ahead = 0;
rc_bit(&coder->rc, &coder->is_match[0][0], 0);
rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
+ ++coder->uncomp_size;
}
// Initialization is done (except if empty file).
@@ -317,21 +319,28 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
if (!coder->is_initialized && !encode_init(coder, mf))
return LZMA_OK;
- // Get the lowest bits of the uncompressed offset from the LZ layer.
- uint32_t position = mf_position(mf);
+ // Encode pending output bytes from the range encoder.
+ // At the start of the stream, encode_init() encodes one literal.
+ // Later there can be pending output only with LZMA1 because LZMA2
+ // ensures that there is always enough output space. Thus when using
+ // LZMA2, rc_encode() calls in this function will always return false.
+ if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ // We don't get here with LZMA2.
+ assert(limit == UINT32_MAX);
+ return LZMA_OK;
+ }
- while (true) {
- // Encode pending bits, if any. Calling this before encoding
- // the next symbol is needed only with plain LZMA, since
- // LZMA2 always provides big enough buffer to flush
- // everything out from the range encoder. For the same reason,
- // rc_encode() never returns true when this function is used
- // as part of LZMA2 encoder.
- if (rc_encode(&coder->rc, out, out_pos, out_size)) {
- assert(limit == UINT32_MAX);
- return LZMA_OK;
- }
+ // If the range encoder was flushed in an earlier call to this
+ // function but there wasn't enough output buffer space, those
+ // bytes would have now been encoded by the above rc_encode() call
+ // and the stream has now been finished. This can only happen with
+ // LZMA1 as LZMA2 always provides enough output buffer space.
+ if (coder->is_flushed) {
+ assert(limit == UINT32_MAX);
+ return LZMA_STREAM_END;
+ }
+ while (true) {
// With LZMA2 we need to take care that compressed size of
// a chunk doesn't get too big.
// FIXME? Check if this could be improved.
@@ -365,37 +374,64 @@ lzma_lzma_encode(lzma_lzma1_encoder *restrict coder, lzma_mf *restrict mf,
if (coder->fast_mode)
lzma_lzma_optimum_fast(coder, mf, &back, &len);
else
- lzma_lzma_optimum_normal(
- coder, mf, &back, &len, position);
-
- encode_symbol(coder, mf, back, len, position);
-
- position += len;
- }
+ lzma_lzma_optimum_normal(coder, mf, &back, &len,
+ (uint32_t)(coder->uncomp_size));
+
+ encode_symbol(coder, mf, back, len,
+ (uint32_t)(coder->uncomp_size));
+
+ // If output size limiting is active (out_limit != 0), check
+ // if encoding this LZMA symbol would make the output size
+ // exceed the specified limit.
+ if (coder->out_limit != 0 && rc_encode_dummy(
+ &coder->rc, coder->out_limit)) {
+ // The most recent LZMA symbol would make the output
+ // too big. Throw it away.
+ rc_forget(&coder->rc);
+
+ // FIXME: Tell the LZ layer to not read more input as
+ // it would be waste of time. This doesn't matter if
+ // output-size-limited encoding is done with a single
+ // call though.
- if (!coder->is_flushed) {
- coder->is_flushed = true;
-
- // We don't support encoding plain LZMA streams without EOPM,
- // and LZMA2 doesn't use EOPM at LZMA level.
- if (limit == UINT32_MAX)
- encode_eopm(coder, position);
+ break;
+ }
- // Flush the remaining bytes from the range encoder.
- rc_flush(&coder->rc);
+ // This symbol will be encoded so update the uncompressed size.
+ coder->uncomp_size += len;
- // Copy the remaining bytes to the output buffer. If there
- // isn't enough output space, we will copy out the remaining
- // bytes on the next call to this function by using
- // the rc_encode() call in the encoding loop above.
+ // Encode the LZMA symbol.
if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ // Once again, this can only happen with LZMA1.
assert(limit == UINT32_MAX);
return LZMA_OK;
}
}
- // Make it ready for the next LZMA2 chunk.
- coder->is_flushed = false;
+ // Make the uncompressed size available to the application.
+ if (coder->uncomp_size_ptr != NULL)
+ *coder->uncomp_size_ptr = coder->uncomp_size;
+
+ // LZMA2 doesn't use EOPM at LZMA level.
+ //
+ // Plain LZMA streams without EOPM aren't supported except when
+ // output size limiting is enabled.
+ if (limit == UINT32_MAX && coder->out_limit == 0)
+ encode_eopm(coder, (uint32_t)(coder->uncomp_size));
+
+ // Flush the remaining bytes from the range encoder.
+ rc_flush(&coder->rc);
+
+ // Copy the remaining bytes to the output buffer. If there
+ // isn't enough output space, we will copy out the remaining
+ // bytes on the next call to this function.
+ if (rc_encode(&coder->rc, out, out_pos, out_size)) {
+ // This cannot happen with LZMA2.
+ assert(limit == UINT32_MAX);
+
+ coder->is_flushed = true;
+ return LZMA_OK;
+ }
return LZMA_STREAM_END;
}
@@ -414,6 +450,22 @@ lzma_encode(void *coder, lzma_mf *restrict mf,
}
+static lzma_ret
+lzma_lzma_set_out_limit(
+ void *coder_ptr, uint64_t *uncomp_size, uint64_t out_limit)
+{
+ // Minimum output size is 5 bytes but that cannot hold any output
+ // so we use 6 bytes.
+ if (out_limit < 6)
+ return LZMA_BUF_ERROR;
+
+ lzma_lzma1_encoder *coder = coder_ptr;
+ coder->out_limit = out_limit;
+ coder->uncomp_size_ptr = uncomp_size;
+ return LZMA_OK;
+}
+
+
////////////////////
// Initialization //
////////////////////
@@ -598,6 +650,10 @@ lzma_lzma_encoder_create(void **coder_ptr,
coder->is_initialized = options->preset_dict != NULL
&& options->preset_dict_size > 0;
coder->is_flushed = false;
+ coder->uncomp_size = 0;
+
+ // Output size limitting is disabled by default.
+ coder->out_limit = 0;
set_lz_options(lz_options, options);
@@ -610,6 +666,7 @@ lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
const void *options, lzma_lz_options *lz_options)
{
lz->code = &lzma_encode;
+ lz->set_out_limit = &lzma_lzma_set_out_limit;
return lzma_lzma_encoder_create(
&lz->coder, allocator, options, lz_options);
}
diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h
index 2e34aace..8960c52c 100644
--- a/src/liblzma/lzma/lzma_encoder_private.h
+++ b/src/liblzma/lzma/lzma_encoder_private.h
@@ -72,6 +72,18 @@ struct lzma_lzma1_encoder_s {
/// Range encoder
lzma_range_encoder rc;
+ /// Uncompressed size (doesn't include possible preset dictionary)
+ uint64_t uncomp_size;
+
+ /// If non-zero, produce at most this much output.
+ /// Some input may then be missing from the output.
+ uint64_t out_limit;
+
+ /// If the above out_limit is non-zero, *uncomp_size_ptr is set to
+ /// the amount of uncompressed data that we were able to fit
+ /// in the output buffer.
+ uint64_t *uncomp_size_ptr;
+
/// State
lzma_lzma_state state;