aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/common
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2021-01-14 20:07:01 +0200
committerLasse Collin <lasse.collin@tukaani.org>2021-01-14 20:10:59 +0200
commit601ec0311e769fc704daaaa7dac0ca840aff080e (patch)
treeec13c2c53062e7fa6ec8210380c0efc97c8cd3f7 /src/liblzma/common
parentliblzma: Add rough support for output-size-limited encoding in LZMA1. (diff)
downloadxz-601ec0311e769fc704daaaa7dac0ca840aff080e.tar.xz
liblzma: Add EROFS LZMA encoder and decoder.
Right now this is just a planned extra-compact format for use in the EROFS file system in Linux. At this point it's possible that the format will either change or be abandoned and removed completely. The special thing about the encoder is that it uses the output-size-limited encoding added in the previous commit. EROFS uses fixed-sized blocks (e.g. 4 KiB) to hold compressed data so the compressors must be able to create valid streams that fill the given block size.
Diffstat (limited to '')
-rw-r--r--src/liblzma/common/Makefile.inc2
-rw-r--r--src/liblzma/common/erofs_decoder.c148
-rw-r--r--src/liblzma/common/erofs_encoder.c139
3 files changed, 289 insertions, 0 deletions
diff --git a/src/liblzma/common/Makefile.inc b/src/liblzma/common/Makefile.inc
index 0408f9a4..8205eb7f 100644
--- a/src/liblzma/common/Makefile.inc
+++ b/src/liblzma/common/Makefile.inc
@@ -36,6 +36,7 @@ liblzma_la_SOURCES += \
common/easy_buffer_encoder.c \
common/easy_encoder.c \
common/easy_encoder_memusage.c \
+ common/erofs_encoder.c \
common/filter_buffer_encoder.c \
common/filter_encoder.c \
common/filter_encoder.h \
@@ -65,6 +66,7 @@ liblzma_la_SOURCES += \
common/block_decoder.h \
common/block_header_decoder.c \
common/easy_decoder_memusage.c \
+ common/erofs_decoder.c \
common/file_info.c \
common/filter_buffer_decoder.c \
common/filter_decoder.c \
diff --git a/src/liblzma/common/erofs_decoder.c b/src/liblzma/common/erofs_decoder.c
new file mode 100644
index 00000000..ef584373
--- /dev/null
+++ b/src/liblzma/common/erofs_decoder.c
@@ -0,0 +1,148 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file erofs_decoder.c
+/// \brief Decode EROFS LZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_decoder.h"
+#include "lz_decoder.h"
+
+
+typedef struct {
+ /// LZMA1 decoder
+ lzma_next_coder lzma;
+
+ /// Uncompressed size of the stream as given by the application
+ lzma_vli uncomp_size;
+
+ /// LZMA dictionary size as given by the application
+ uint32_t dict_size;
+
+ /// True once the first byte of the EROFS LZMA stream
+ /// has been processed.
+ bool props_decoded;
+} lzma_erofs_coder;
+
+
+static lzma_ret
+erofs_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+
+ if (!coder->props_decoded) {
+ // There must be at least one byte of input to decode
+ // the properties byte.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ lzma_options_lzma options = {
+ .preset_dict = NULL,
+ .preset_dict_size = 0,
+ };
+
+ // The properties are stored as bitwise-negation
+ // of the typical encoding.
+ if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
+ return LZMA_OPTIONS_ERROR;
+
+ ++*in_pos;
+
+ // Initialize the decoder.
+ options.dict_size = coder->dict_size;
+ lzma_filter_info filters[2] = {
+ {
+ .init = &lzma_lzma_decoder_init,
+ .options = &options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma,
+ allocator, filters));
+
+ // Use a hack to set the uncompressed size.
+ lzma_lz_decoder_uncompressed(coder->lzma.coder,
+ coder->uncomp_size);
+
+ // Pass one dummy 0x00 byte to the LZMA decoder since that
+ // is what it expects the first byte to be.
+ const uint8_t dummy_in = 0;
+ size_t dummy_in_pos = 0;
+ if (coder->lzma.code(coder->lzma.coder, allocator,
+ &dummy_in, &dummy_in_pos, 1,
+ out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ assert(dummy_in_pos == 1);
+ coder->props_decoded = true;
+ }
+
+ // The rest is normal LZMA decoding.
+ return coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+}
+
+
+static void
+erofs_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t uncomp_size, uint32_t dict_size)
+{
+ lzma_next_coder_init(&erofs_decoder_init, next, allocator);
+
+ lzma_erofs_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_erofs_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &erofs_decode;
+ next->end = &erofs_decoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ if (uncomp_size > LZMA_VLI_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->uncomp_size = uncomp_size;
+ coder->dict_size = dict_size;
+
+ coder->props_decoded = false;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_erofs_decoder(lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size)
+{
+ lzma_next_strm_init(erofs_decoder_init, strm, uncomp_size, dict_size);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/erofs_encoder.c b/src/liblzma/common/erofs_encoder.c
new file mode 100644
index 00000000..4cdd08f1
--- /dev/null
+++ b/src/liblzma/common/erofs_encoder.c
@@ -0,0 +1,139 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file erofs_encoder.c
+/// \brief Encode into EROFS LZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder.h"
+
+
+typedef struct {
+ /// LZMA1 encoder
+ lzma_next_coder lzma;
+
+ /// LZMA properties byte (lc/lp/pb)
+ uint8_t props;
+} lzma_erofs_coder;
+
+
+static lzma_ret
+erofs_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+
+ // Remember *out_pos so that we can overwrite the first byte with
+ // the LZMA properties byte.
+ const size_t out_start = *out_pos;
+
+ // Remember *in_pos so that we can set it based on how many
+ // uncompressed bytes were actually encoded.
+ const size_t in_start = *in_pos;
+
+ // Set the output size limit based on the available output space.
+ // We know that the encoder supports set_out_limit() so
+ // LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible
+ // but lzma_code() has an assertion to not allow it to be returned
+ // from here and I don't want to change that for now, so
+ // LZMA_BUF_ERROR becomes LZMA_PROG_ERROR.
+ uint64_t uncomp_size;
+ if (coder->lzma.set_out_limit(coder->lzma.coder,
+ &uncomp_size, out_size - *out_pos) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // set_out_limit fails if this isn't true.
+ assert(out_size - *out_pos >= 6);
+
+ // Encode as much as possible.
+ const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size, action);
+
+ if (ret != LZMA_STREAM_END) {
+ if (ret == LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ return ret;
+ }
+
+ // The first output byte is bitwise-negation of the properties byte.
+ // We know that there is space for this byte because set_out_limit
+ // and the actual encoding succeeded.
+ out[out_start] = (uint8_t)(~coder->props);
+
+ // The LZMA encoder likely read more input than it was able to encode.
+ // Set *in_pos based on uncomp_size.
+ assert(uncomp_size <= in_size - in_start);
+ *in_pos = in_start + (size_t)(uncomp_size);
+
+ return ret;
+}
+
+
+static void
+erofs_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+erofs_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_options_lzma *options)
+{
+ lzma_next_coder_init(&erofs_encoder_init, next, allocator);
+
+ lzma_erofs_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_erofs_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &erofs_encode;
+ next->end = &erofs_encoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Encode the properties byte. Bitwise-negation of it will be the
+ // first output byte.
+ return_if_error(lzma_lzma_lclppb_encode(options, &coder->props));
+
+ // Initialize the LZMA encoder.
+ const lzma_filter_info filters[2] = {
+ {
+ .init = &lzma_lzma_encoder_init,
+ .options = (void *)(options),
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return lzma_next_filter_init(&coder->lzma, allocator, filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_erofs_encoder(lzma_stream *strm, const lzma_options_lzma *options)
+{
+ lzma_next_strm_init(erofs_encoder_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+
+}