liblzma: Add EROFS LZMA encoder and decoder.

Right now this is just a planned extra-compact format for use in the EROFS file system in Linux. At this point it's possible that the format will either change or be abandoned and removed completely. The special thing about the encoder is that it uses the output-size-limited encoding added in the previous commit. EROFS uses fixed-sized blocks (e.g. 4 KiB) to hold compressed data so the compressors must be able to create valid streams that fill the given block size.
author: Lasse Collin <lasse.collin@tukaani.org> 2021-01-14 20:07:01 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2021-01-14 20:10:59 +0200
commit: 601ec0311e769fc704daaaa7dac0ca840aff080e (patch)
tree: ec13c2c53062e7fa6ec8210380c0efc97c8cd3f7 /src/liblzma/common/erofs_encoder.c
parent: liblzma: Add rough support for output-size-limited encoding in LZMA1. (diff)
download: xz-601ec0311e769fc704daaaa7dac0ca840aff080e.tar.xz
1 files changed, 139 insertions, 0 deletions
diff --git a/src/liblzma/common/erofs_encoder.c b/src/liblzma/common/erofs_encoder.c
new file mode 100644
index 00000000..4cdd08f1
--- /dev/null
+++ b/src/liblzma/common/erofs_encoder.c
@@ -0,0 +1,139 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       erofs_encoder.c
+/// \brief      Encode into EROFS LZMA format
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder.h"
+
+
+typedef struct {
+	/// LZMA1 encoder
+	lzma_next_coder lzma;
+
+	/// LZMA properties byte (lc/lp/pb)
+	uint8_t props;
+} lzma_erofs_coder;
+
+
+static lzma_ret
+erofs_encode(void *coder_ptr, const lzma_allocator *allocator,
+		const uint8_t *restrict in, size_t *restrict in_pos,
+		size_t in_size, uint8_t *restrict out,
+		size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+	lzma_erofs_coder *coder = coder_ptr;
+
+	// Remember *out_pos so that we can overwrite the first byte with
+	// the LZMA properties byte.
+	const size_t out_start = *out_pos;
+
+	// Remember *in_pos so that we can set it based on how many
+	// uncompressed bytes were actually encoded.
+	const size_t in_start = *in_pos;
+
+	// Set the output size limit based on the available output space.
+	// We know that the encoder supports set_out_limit() so
+	// LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible
+	// but lzma_code() has an assertion to not allow it to be returned
+	// from here and I don't want to change that for now, so
+	// LZMA_BUF_ERROR becomes LZMA_PROG_ERROR.
+	uint64_t uncomp_size;
+	if (coder->lzma.set_out_limit(coder->lzma.coder,
+			&uncomp_size, out_size - *out_pos) != LZMA_OK)
+		return LZMA_PROG_ERROR;
+
+	// set_out_limit fails if this isn't true.
+	assert(out_size - *out_pos >= 6);
+
+	// Encode as much as possible.
+	const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+			in, in_pos, in_size, out, out_pos, out_size, action);
+
+	if (ret != LZMA_STREAM_END) {
+		if (ret == LZMA_OK) {
+			assert(0);
+			return LZMA_PROG_ERROR;
+		}
+
+		return ret;
+	}
+
+	// The first output byte is bitwise-negation of the properties byte.
+	// We know that there is space for this byte because set_out_limit
+	// and the actual encoding succeeded.
+	out[out_start] = (uint8_t)(~coder->props);
+
+	// The LZMA encoder likely read more input than it was able to encode.
+	// Set *in_pos based on uncomp_size.
+	assert(uncomp_size <= in_size - in_start);
+	*in_pos = in_start + (size_t)(uncomp_size);
+
+	return ret;
+}
+
+
+static void
+erofs_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+	lzma_erofs_coder *coder = coder_ptr;
+	lzma_next_end(&coder->lzma, allocator);
+	lzma_free(coder, allocator);
+	return;
+}
+
+
+static lzma_ret
+erofs_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+		const lzma_options_lzma *options)
+{
+	lzma_next_coder_init(&erofs_encoder_init, next, allocator);
+
+	lzma_erofs_coder *coder = next->coder;
+
+	if (coder == NULL) {
+		coder = lzma_alloc(sizeof(lzma_erofs_coder), allocator);
+		if (coder == NULL)
+			return LZMA_MEM_ERROR;
+
+		next->coder = coder;
+		next->code = &erofs_encode;
+		next->end = &erofs_encoder_end;
+
+		coder->lzma = LZMA_NEXT_CODER_INIT;
+	}
+
+	// Encode the properties byte. Bitwise-negation of it will be the
+	// first output byte.
+	return_if_error(lzma_lzma_lclppb_encode(options, &coder->props));
+
+	// Initialize the LZMA encoder.
+	const lzma_filter_info filters[2] = {
+		{
+			.init = &lzma_lzma_encoder_init,
+			.options = (void *)(options),
+		}, {
+			.init = NULL,
+		}
+	};
+
+	return lzma_next_filter_init(&coder->lzma, allocator, filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_erofs_encoder(lzma_stream *strm, const lzma_options_lzma *options)
+{
+	lzma_next_strm_init(erofs_encoder_init, strm, options);
+
+	strm->internal->supported_actions[LZMA_FINISH] = true;
+
+	return LZMA_OK;
+
+}
author	Lasse Collin <lasse.collin@tukaani.org>	2021-01-14 20:07:01 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2021-01-14 20:10:59 +0200
commit	601ec0311e769fc704daaaa7dac0ca840aff080e (patch)
tree	ec13c2c53062e7fa6ec8210380c0efc97c8cd3f7 /src/liblzma/common/erofs_encoder.c
parent	liblzma: Add rough support for output-size-limited encoding in LZMA1. (diff)
download	xz-601ec0311e769fc704daaaa7dac0ca840aff080e.tar.xz