aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common/microlzma_decoder.c
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2021-09-05 20:38:12 +0300
committerLasse Collin <lasse.collin@tukaani.org>2021-09-05 20:38:12 +0300
commitd267d109c370a40b502e73f8664b154b15e4f253 (patch)
tree0159c362319f4c47d309b8ed09db97998ddbf02d /src/liblzma/common/microlzma_decoder.c
parentxzdiff: Update the man page about the exit status. (diff)
downloadxz-d267d109c370a40b502e73f8664b154b15e4f253.tar.xz
liblzma: Rename EROFS LZMA to MicroLZMA.
It still exists primarily for EROFS but MicroLZMA is a more generic name (that hopefully doesn't clash with something that already exists).
Diffstat (limited to 'src/liblzma/common/microlzma_decoder.c')
-rw-r--r--src/liblzma/common/microlzma_decoder.c219
1 files changed, 219 insertions, 0 deletions
diff --git a/src/liblzma/common/microlzma_decoder.c b/src/liblzma/common/microlzma_decoder.c
new file mode 100644
index 00000000..37907109
--- /dev/null
+++ b/src/liblzma/common/microlzma_decoder.c
@@ -0,0 +1,219 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file microlzma_decoder.c
+/// \brief Decode MicroLZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_decoder.h"
+#include "lz_decoder.h"
+
+
+typedef struct {
+ /// LZMA1 decoder
+ lzma_next_coder lzma;
+
+ /// Compressed size of the stream as given by the application.
+ /// This must be exactly correct.
+ ///
+ /// This will be decremented when input is read.
+ uint64_t comp_size;
+
+ /// Uncompressed size of the stream as given by the application.
+ /// This may be less than the actual uncompressed size if
+ /// uncomp_size_is_exact is false.
+ ///
+ /// This will be decremented when output is produced.
+ lzma_vli uncomp_size;
+
+ /// LZMA dictionary size as given by the application
+ uint32_t dict_size;
+
+ /// If true, the exact uncompressed size is known. If false,
+ /// uncomp_size may be smaller than the real uncompressed size;
+ /// uncomp_size may never be bigger than the real uncompressed size.
+ bool uncomp_size_is_exact;
+
+ /// True once the first byte of the MicroLZMA stream
+ /// has been processed.
+ bool props_decoded;
+} lzma_microlzma_coder;
+
+
+static lzma_ret
+microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+
+ // Remember the in start position so that we can update comp_size.
+ const size_t in_start = *in_pos;
+
+ // Remember the out start position so that we can update uncomp_size.
+ const size_t out_start = *out_pos;
+
+ // Limit the amount of input so that the decoder won't read more than
+ // comp_size. This is required when uncomp_size isn't exact because
+ // in that case the LZMA decoder will try to decode more input even
+ // when it has no output space (it can be looking for EOPM).
+ if (in_size - *in_pos > coder->comp_size)
+ in_size = *in_pos + (size_t)(coder->comp_size);
+
+ // When the exact uncompressed size isn't known, we must limit
+ // the available output space to prevent the LZMA decoder from
+ // trying to decode too much.
+ if (!coder->uncomp_size_is_exact
+ && out_size - *out_pos > coder->uncomp_size)
+ out_size = *out_pos + (size_t)(coder->uncomp_size);
+
+ if (!coder->props_decoded) {
+ // There must be at least one byte of input to decode
+ // the properties byte.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ lzma_options_lzma options = {
+ .preset_dict = NULL,
+ .preset_dict_size = 0,
+ };
+
+ // The properties are stored as bitwise-negation
+ // of the typical encoding.
+ if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
+ return LZMA_OPTIONS_ERROR;
+
+ ++*in_pos;
+
+ // Initialize the decoder.
+ options.dict_size = coder->dict_size;
+ lzma_filter_info filters[2] = {
+ {
+ .init = &lzma_lzma_decoder_init,
+ .options = &options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma,
+ allocator, filters));
+
+ // Use a hack to set the uncompressed size.
+ if (coder->uncomp_size_is_exact)
+ lzma_lz_decoder_uncompressed(coder->lzma.coder,
+ coder->uncomp_size);
+
+ // Pass one dummy 0x00 byte to the LZMA decoder since that
+ // is what it expects the first byte to be.
+ const uint8_t dummy_in = 0;
+ size_t dummy_in_pos = 0;
+ if (coder->lzma.code(coder->lzma.coder, allocator,
+ &dummy_in, &dummy_in_pos, 1,
+ out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ assert(dummy_in_pos == 1);
+ coder->props_decoded = true;
+ }
+
+ // The rest is normal LZMA decoding.
+ lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ // Update the remaining compressed size.
+ assert(coder->comp_size >= *in_pos - in_start);
+ coder->comp_size -= *in_pos - in_start;
+
+ if (coder->uncomp_size_is_exact) {
+ // After successful decompression of the complete stream
+ // the compressed size must match.
+ if (ret == LZMA_STREAM_END && coder->comp_size != 0)
+ ret = LZMA_DATA_ERROR;
+ } else {
+ // Update the amount of output remaining.
+ assert(coder->uncomp_size >= *out_pos - out_start);
+ coder->uncomp_size -= *out_pos - out_start;
+
+ // - We must not get LZMA_STREAM_END because the stream
+ // shouldn't have EOPM.
+ // - We must use uncomp_size to determine when to
+ // return LZMA_STREAM_END.
+ if (ret == LZMA_STREAM_END)
+ ret = LZMA_DATA_ERROR;
+ else if (coder->uncomp_size == 0)
+ ret = LZMA_STREAM_END;
+ }
+
+ return ret;
+}
+
+
+static void
+microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t comp_size,
+ uint64_t uncomp_size, bool uncomp_size_is_exact,
+ uint32_t dict_size)
+{
+ lzma_next_coder_init(&microlzma_decoder_init, next, allocator);
+
+ lzma_microlzma_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &microlzma_decode;
+ next->end = &microlzma_decoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // The public API is uint64_t but the internal LZ decoder API uses
+ // lzma_vli.
+ if (uncomp_size > LZMA_VLI_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->comp_size = comp_size;
+ coder->uncomp_size = uncomp_size;
+ coder->uncomp_size_is_exact = uncomp_size_is_exact;
+ coder->dict_size = dict_size;
+
+ coder->props_decoded = false;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
+ uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
+ uint32_t dict_size)
+{
+ lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
+ uncomp_size, uncomp_size_is_exact, dict_size);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}