aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/liblzma/api/lzma/container.h76
-rw-r--r--src/liblzma/common/Makefile.inc2
-rw-r--r--src/liblzma/common/erofs_decoder.c148
-rw-r--r--src/liblzma/common/erofs_encoder.c139
-rw-r--r--src/liblzma/liblzma.map2
5 files changed, 367 insertions, 0 deletions
diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h
index 9fbf4df0..581f3507 100644
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@@ -444,6 +444,55 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
lzma_nothrow lzma_attr_warn_unused_result;
+/**
+ * \brief EROFS LZMA encoder
+ *
+ * The EROFS LZMA format is a raw LZMA stream whose first byte (always 0x00)
+ * has been replaced with bitwise-negation of the LZMA properties (lc/lp/pb).
+ * This encoding ensures that the first byte of EROFS LZMA stream is never
+ * 0x00. There is no end of payload marker and thus the uncompressed size
+ * must be stored separately. For the best error detection the dictionary
+ * size should be stored separately as well but alternatively one may use
+ * the uncompressed size as the dictionary size when decoding.
+ *
+ * With the EROFS LZMA encoder, lzma_code() behaves slightly unusually.
+ * The action argument must be LZMA_FINISH and the return value cannot be
+ * LZMA_OK. Thus the encoding is always done with a single lzma_code() after
+ * the initialization. The benefit of the combination of initialization
+ * function and lzma_code() is that memory allocations can be re-used for
+ * better performance.
+ *
+ * lzma_code() will try to encode as much input as is possible to fit into
+ * the given output buffer. If not all input can be encoded, the stream will
+ * be finished without encoding all the input. The caller must check both
+ * input and output buffer usage after lzma_code() (total_in and total_out
+ * in lzma_stream can be convenient). Often lzma_code() can fill the output
+ * buffer completely if there is a lot of input, but sometimes a few bytes
+ * may remain unused because the next LZMA symbol would require more space.
+ *
+ * lzma_stream.avail_out must be at least 6. Otherwise LZMA_PROG_ERROR
+ * will be returned.
+ *
+ * The LZMA dictionary should be reasonably low to speed up the encoder
+ * re-initialization. A good value is bigger than the resulting
+ * uncompressed size of most of the output chunks. For example, if output
+ * size is 4 KiB, dictionary size of 32 KiB or 64 KiB is good. If the
+ * data compresses extremely well, even 128 KiB may be useful.
+ *
+ * \return - LZMA_STREAM_END: All good. Check the amounts of input used
+ * and output produced. Store the amount of input used
+ * (uncompressed size) as it needs to be known to decompress
+ * the data.
+ * - LZMA_OPTIONS_ERROR
+ * - LZMA_MEM_ERROR
+ * - LZMA_PROG_ERROR: In addition to the generic reasons for this
+ * error code, this may also be returned if there isn't enough
+ * output space (6 bytes) to create a valid EROFS LZMA stream.
+ */
+extern LZMA_API(lzma_ret) lzma_erofs_encoder(
+ lzma_stream *strm, const lzma_options_lzma *options);
+
+
/************
* Decoding *
************/
@@ -630,3 +679,30 @@ extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
const uint8_t *in, size_t *in_pos, size_t in_size,
uint8_t *out, size_t *out_pos, size_t out_size)
lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
+ * \brief EROFS LZMA decoder
+ *
+ * See lzma_erofs_decoder() for more information.
+ *
+ * The lzma_code() usage with this decoder is completely normal.
+ * The special behavior of lzma_code() applies to lzma_erofs_encoder() only.
+ *
+ * \param strm Pointer to properly prepared lzma_stream
+ * \param uncomp_size Uncompressed size of the EROFS LZMA stream.
+ * The caller must somehow know this. Note that
+ * while the EROFS LZMA decoder in XZ Embedded needs
+ * also the compressed size, the implementation in
+ * liblzma doesn't need to know the compressed size.
+ * \param dict_size LZMA dictionary size that was used when
+ * compressing the data. It is OK to use a bigger
+ * value too but liblzma will then allocate more
+ * memory than would actually be required and error
+ * detection will be slightly worse. (Note that with
+ * the implementation in XZ Embedded it doesn't
+ * affect the memory usage if one specifies bigger
+ * dictionary than actually required.)
+ */
+extern LZMA_API(lzma_ret) lzma_erofs_decoder(
+ lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size);
diff --git a/src/liblzma/common/Makefile.inc b/src/liblzma/common/Makefile.inc
index 0408f9a4..8205eb7f 100644
--- a/src/liblzma/common/Makefile.inc
+++ b/src/liblzma/common/Makefile.inc
@@ -36,6 +36,7 @@ liblzma_la_SOURCES += \
common/easy_buffer_encoder.c \
common/easy_encoder.c \
common/easy_encoder_memusage.c \
+ common/erofs_encoder.c \
common/filter_buffer_encoder.c \
common/filter_encoder.c \
common/filter_encoder.h \
@@ -65,6 +66,7 @@ liblzma_la_SOURCES += \
common/block_decoder.h \
common/block_header_decoder.c \
common/easy_decoder_memusage.c \
+ common/erofs_decoder.c \
common/file_info.c \
common/filter_buffer_decoder.c \
common/filter_decoder.c \
diff --git a/src/liblzma/common/erofs_decoder.c b/src/liblzma/common/erofs_decoder.c
new file mode 100644
index 00000000..ef584373
--- /dev/null
+++ b/src/liblzma/common/erofs_decoder.c
@@ -0,0 +1,148 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file erofs_decoder.c
+/// \brief Decode EROFS LZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_decoder.h"
+#include "lz_decoder.h"
+
+
+typedef struct {
+ /// LZMA1 decoder
+ lzma_next_coder lzma;
+
+ /// Uncompressed size of the stream as given by the application
+ lzma_vli uncomp_size;
+
+ /// LZMA dictionary size as given by the application
+ uint32_t dict_size;
+
+ /// True once the first byte of the EROFS LZMA stream
+ /// has been processed.
+ bool props_decoded;
+} lzma_erofs_coder;
+
+
+static lzma_ret
+erofs_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+
+ if (!coder->props_decoded) {
+ // There must be at least one byte of input to decode
+ // the properties byte.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ lzma_options_lzma options = {
+ .preset_dict = NULL,
+ .preset_dict_size = 0,
+ };
+
+ // The properties are stored as bitwise-negation
+ // of the typical encoding.
+ if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
+ return LZMA_OPTIONS_ERROR;
+
+ ++*in_pos;
+
+ // Initialize the decoder.
+ options.dict_size = coder->dict_size;
+ lzma_filter_info filters[2] = {
+ {
+ .init = &lzma_lzma_decoder_init,
+ .options = &options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma,
+ allocator, filters));
+
+ // Use a hack to set the uncompressed size.
+ lzma_lz_decoder_uncompressed(coder->lzma.coder,
+ coder->uncomp_size);
+
+ // Pass one dummy 0x00 byte to the LZMA decoder since that
+ // is what it expects the first byte to be.
+ const uint8_t dummy_in = 0;
+ size_t dummy_in_pos = 0;
+ if (coder->lzma.code(coder->lzma.coder, allocator,
+ &dummy_in, &dummy_in_pos, 1,
+ out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ assert(dummy_in_pos == 1);
+ coder->props_decoded = true;
+ }
+
+ // The rest is normal LZMA decoding.
+ return coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+}
+
+
+static void
+erofs_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t uncomp_size, uint32_t dict_size)
+{
+ lzma_next_coder_init(&erofs_decoder_init, next, allocator);
+
+ lzma_erofs_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_erofs_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &erofs_decode;
+ next->end = &erofs_decoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ if (uncomp_size > LZMA_VLI_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->uncomp_size = uncomp_size;
+ coder->dict_size = dict_size;
+
+ coder->props_decoded = false;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_erofs_decoder(lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size)
+{
+ lzma_next_strm_init(erofs_decoder_init, strm, uncomp_size, dict_size);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/erofs_encoder.c b/src/liblzma/common/erofs_encoder.c
new file mode 100644
index 00000000..4cdd08f1
--- /dev/null
+++ b/src/liblzma/common/erofs_encoder.c
@@ -0,0 +1,139 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file erofs_encoder.c
+/// \brief Encode into EROFS LZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder.h"
+
+
+typedef struct {
+ /// LZMA1 encoder
+ lzma_next_coder lzma;
+
+ /// LZMA properties byte (lc/lp/pb)
+ uint8_t props;
+} lzma_erofs_coder;
+
+
+static lzma_ret
+erofs_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+
+ // Remember *out_pos so that we can overwrite the first byte with
+ // the LZMA properties byte.
+ const size_t out_start = *out_pos;
+
+ // Remember *in_pos so that we can set it based on how many
+ // uncompressed bytes were actually encoded.
+ const size_t in_start = *in_pos;
+
+ // Set the output size limit based on the available output space.
+ // We know that the encoder supports set_out_limit() so
+ // LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible
+ // but lzma_code() has an assertion to not allow it to be returned
+ // from here and I don't want to change that for now, so
+ // LZMA_BUF_ERROR becomes LZMA_PROG_ERROR.
+ uint64_t uncomp_size;
+ if (coder->lzma.set_out_limit(coder->lzma.coder,
+ &uncomp_size, out_size - *out_pos) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // set_out_limit fails if this isn't true.
+ assert(out_size - *out_pos >= 6);
+
+ // Encode as much as possible.
+ const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size, action);
+
+ if (ret != LZMA_STREAM_END) {
+ if (ret == LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ return ret;
+ }
+
+ // The first output byte is bitwise-negation of the properties byte.
+ // We know that there is space for this byte because set_out_limit
+ // and the actual encoding succeeded.
+ out[out_start] = (uint8_t)(~coder->props);
+
+ // The LZMA encoder likely read more input than it was able to encode.
+ // Set *in_pos based on uncomp_size.
+ assert(uncomp_size <= in_size - in_start);
+ *in_pos = in_start + (size_t)(uncomp_size);
+
+ return ret;
+}
+
+
+static void
+erofs_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_erofs_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+erofs_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_options_lzma *options)
+{
+ lzma_next_coder_init(&erofs_encoder_init, next, allocator);
+
+ lzma_erofs_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_erofs_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &erofs_encode;
+ next->end = &erofs_encoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Encode the properties byte. Bitwise-negation of it will be the
+ // first output byte.
+ return_if_error(lzma_lzma_lclppb_encode(options, &coder->props));
+
+ // Initialize the LZMA encoder.
+ const lzma_filter_info filters[2] = {
+ {
+ .init = &lzma_lzma_encoder_init,
+ .options = (void *)(options),
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return lzma_next_filter_init(&coder->lzma, allocator, filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_erofs_encoder(lzma_stream *strm, const lzma_options_lzma *options)
+{
+ lzma_next_strm_init(erofs_encoder_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+
+}
diff --git a/src/liblzma/liblzma.map b/src/liblzma/liblzma.map
index bad8633c..251ef022 100644
--- a/src/liblzma/liblzma.map
+++ b/src/liblzma/liblzma.map
@@ -106,6 +106,8 @@ global:
XZ_5.3.1alpha {
global:
+ lzma_erofs_decoder;
+ lzma_erofs_encoder;
lzma_file_info_decoder;
local: