diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2021-01-14 20:07:01 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2021-01-14 20:10:59 +0200 |
commit | 601ec0311e769fc704daaaa7dac0ca840aff080e (patch) | |
tree | ec13c2c53062e7fa6ec8210380c0efc97c8cd3f7 /src/liblzma/common/erofs_decoder.c | |
parent | liblzma: Add rough support for output-size-limited encoding in LZMA1. (diff) | |
download | xz-601ec0311e769fc704daaaa7dac0ca840aff080e.tar.xz |
liblzma: Add EROFS LZMA encoder and decoder.
Right now this is just a planned extra-compact format for use
in the EROFS file system in Linux. At this point it's possible
that the format will either change or be abandoned and removed
completely.
The special thing about the encoder is that it uses the
output-size-limited encoding added in the previous commit.
EROFS uses fixed-sized blocks (e.g. 4 KiB) to hold compressed
data so the compressors must be able to create valid streams
that fill the given block size.
Diffstat (limited to '')
-rw-r--r-- | src/liblzma/common/erofs_decoder.c | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/src/liblzma/common/erofs_decoder.c b/src/liblzma/common/erofs_decoder.c new file mode 100644 index 00000000..ef584373 --- /dev/null +++ b/src/liblzma/common/erofs_decoder.c @@ -0,0 +1,148 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file erofs_decoder.c +/// \brief Decode EROFS LZMA format +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "lzma_decoder.h" +#include "lz_decoder.h" + + +typedef struct { + /// LZMA1 decoder + lzma_next_coder lzma; + + /// Uncompressed size of the stream as given by the application + lzma_vli uncomp_size; + + /// LZMA dictionary size as given by the application + uint32_t dict_size; + + /// True once the first byte of the EROFS LZMA stream + /// has been processed. + bool props_decoded; +} lzma_erofs_coder; + + +static lzma_ret +erofs_decode(void *coder_ptr, const lzma_allocator *allocator, + const uint8_t *restrict in, size_t *restrict in_pos, + size_t in_size, uint8_t *restrict out, + size_t *restrict out_pos, size_t out_size, lzma_action action) +{ + lzma_erofs_coder *coder = coder_ptr; + + if (!coder->props_decoded) { + // There must be at least one byte of input to decode + // the properties byte. + if (*in_pos >= in_size) + return LZMA_OK; + + lzma_options_lzma options = { + .preset_dict = NULL, + .preset_dict_size = 0, + }; + + // The properties are stored as bitwise-negation + // of the typical encoding. + if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos])) + return LZMA_OPTIONS_ERROR; + + ++*in_pos; + + // Initialize the decoder. + options.dict_size = coder->dict_size; + lzma_filter_info filters[2] = { + { + .init = &lzma_lzma_decoder_init, + .options = &options, + }, { + .init = NULL, + } + }; + + return_if_error(lzma_next_filter_init(&coder->lzma, + allocator, filters)); + + // Use a hack to set the uncompressed size. + lzma_lz_decoder_uncompressed(coder->lzma.coder, + coder->uncomp_size); + + // Pass one dummy 0x00 byte to the LZMA decoder since that + // is what it expects the first byte to be. + const uint8_t dummy_in = 0; + size_t dummy_in_pos = 0; + if (coder->lzma.code(coder->lzma.coder, allocator, + &dummy_in, &dummy_in_pos, 1, + out, out_pos, out_size, LZMA_RUN) != LZMA_OK) + return LZMA_PROG_ERROR; + + assert(dummy_in_pos == 1); + coder->props_decoded = true; + } + + // The rest is normal LZMA decoding. + return coder->lzma.code(coder->lzma.coder, allocator, + in, in_pos, in_size, + out, out_pos, out_size, action); +} + + +static void +erofs_decoder_end(void *coder_ptr, const lzma_allocator *allocator) +{ + lzma_erofs_coder *coder = coder_ptr; + lzma_next_end(&coder->lzma, allocator); + lzma_free(coder, allocator); + return; +} + + +static lzma_ret +erofs_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, + uint64_t uncomp_size, uint32_t dict_size) +{ + lzma_next_coder_init(&erofs_decoder_init, next, allocator); + + lzma_erofs_coder *coder = next->coder; + + if (coder == NULL) { + coder = lzma_alloc(sizeof(lzma_erofs_coder), allocator); + if (coder == NULL) + return LZMA_MEM_ERROR; + + next->coder = coder; + next->code = &erofs_decode; + next->end = &erofs_decoder_end; + + coder->lzma = LZMA_NEXT_CODER_INIT; + } + + if (uncomp_size > LZMA_VLI_MAX) + return LZMA_OPTIONS_ERROR; + + coder->uncomp_size = uncomp_size; + coder->dict_size = dict_size; + + coder->props_decoded = false; + + return LZMA_OK; +} + + +extern LZMA_API(lzma_ret) +lzma_erofs_decoder(lzma_stream *strm, uint64_t uncomp_size, uint32_t dict_size) +{ + lzma_next_strm_init(erofs_decoder_init, strm, uncomp_size, dict_size); + + strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_FINISH] = true; + + return LZMA_OK; +} |