Added initial support for preset dictionary for raw LZMA1

and LZMA2. It is not supported by the .xz format or the xz command line tool yet.
author: Lasse Collin <lasse.collin@tukaani.org> 2009-01-27 18:36:05 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2009-01-27 18:36:05 +0200
commit: f76e39cf930f888d460b443d18f977ebedea8b2a (patch)
tree: 314f531dc9953c5b87a5268d53373e6646598323 /src/liblzma/lz
parent: Regenerate the CRC tables without trailing blanks. (diff)
download: xz-f76e39cf930f888d460b443d18f977ebedea8b2a.tar.xz
3 files changed, 49 insertions, 13 deletions
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index 5ad5c966..99430c9f 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 		const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
 			lzma_allocator *allocator, const void *options,
-			size_t *dict_size))
+			lzma_lz_options *lz_options))
 {
 	// Allocate the base structure if it isn't already allocated.
 	if (next->coder == NULL) {
@@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 	// Allocate and initialize the LZ-based decoder. It will also give
 	// us the dictionary size.
-	size_t dict_size;
+	lzma_lz_options lz_options;
 	return_if_error(lz_init(&next->coder->lz, allocator,
-			filters[0].options, &dict_size));
+			filters[0].options, &lz_options));
 
 	// If the dictionary size is very small, increase it to 4096 bytes.
 	// This is to prevent constant wrapping of the dictionary, which
 	// would slow things down. The downside is that since we don't check
 	// separately for the real dictionary size, we may happily accept
 	// corrupt files.
-	if (dict_size < 4096)
-		dict_size = 4096;
+	if (lz_options.dict_size < 4096)
+		lz_options.dict_size = 4096;
 
 	// Make dictionary size a multipe of 16. Some LZ-based decoders like
 	// LZMA use the lowest bits lzma_dict.pos to know the alignment of the
@@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 	// recommended to give aligned buffers to liblzma.
 	//
 	// Avoid integer overflow.
-	if (dict_size > SIZE_MAX - 15)
+	if (lz_options.dict_size > SIZE_MAX - 15)
 		return LZMA_MEM_ERROR;
 
-	dict_size = (dict_size + 15) & ~((size_t)(15));
+	lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
 
 	// Allocate and initialize the dictionary.
-	if (next->coder->dict.size != dict_size) {
+	if (next->coder->dict.size != lz_options.dict_size) {
 		lzma_free(next->coder->dict.buf, allocator);
-		next->coder->dict.buf = lzma_alloc(dict_size, allocator);
+		next->coder->dict.buf
+				= lzma_alloc(lz_options.dict_size, allocator);
 		if (next->coder->dict.buf == NULL)
 			return LZMA_MEM_ERROR;
 
-		next->coder->dict.size = dict_size;
+		next->coder->dict.size = lz_options.dict_size;
 	}
 
 	lz_decoder_reset(next->coder);
 
+	// Use the preset dictionary if it was given to us.
+	if (lz_options.preset_dict != NULL
+			&& lz_options.preset_dict_size > 0) {
+		// If the preset dictionary is bigger than the actual
+		// dictionary, copy only the tail.
+		const size_t copy_size = MIN(lz_options.preset_dict_size,
+				lz_options.dict_size);
+		const size_t offset = lz_options.preset_dict_size - copy_size;
+		memcpy(next->coder->dict.buf, lz_options.preset_dict + offset,
+				copy_size);
+		next->coder->dict.pos = copy_size;
+		next->coder->dict.full = copy_size;
+	}
+
 	// Miscellaneous initializations
 	next->coder->next_finished = false;
 	next->coder->this_finished = false;
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index 5ac44057..9041d0bd 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -52,6 +52,13 @@ typedef struct {
 
 
 typedef struct {
+	size_t dict_size;
+	const uint8_t *preset_dict;
+	size_t preset_dict_size;
+} lzma_lz_options;
+
+
+typedef struct {
 	/// Data specific to the LZ-based decoder
 	lzma_coder *coder;
 
@@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
 		lzma_allocator *allocator, const lzma_filter_info *filters,
 		lzma_ret (*lz_init)(lzma_lz_decoder *lz,
 			lzma_allocator *allocator, const void *options,
-			size_t *dict_size));
+			lzma_lz_options *lz_options));
 
 extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
 
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c
index 7bd6d03e..bd379533 100644
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
 
 
 static bool
-lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
+lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator,
+		const lzma_lz_options *lz_options)
 {
 	// Allocate the history buffer.
 	if (mf->buffer == NULL) {
@@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator)
 	// we avoid wasting RAM and improve initialization speed a lot.
 	//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));
 
+	// Handle preset dictionary.
+	if (lz_options->preset_dict != NULL
+			&& lz_options->preset_dict_size > 0) {
+		// If the preset dictionary is bigger than the actual
+		// dictionary, use only the tail.
+		mf->write_pos = MIN(lz_options->preset_dict_size, mf->size);
+		memcpy(mf->buffer, lz_options->preset_dict
+				+ lz_options->preset_dict_size - mf->write_pos,
+				mf->write_pos);
+		mf->action = LZMA_SYNC_FLUSH;
+		mf->skip(mf, mf->write_pos);
+	}
+
 	mf->action = LZMA_RUN;
 
 	return false;
@@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
 
 	// Allocate new buffers if needed, and do the rest of
 	// the initialization.
-	if (lz_encoder_init(&next->coder->mf, allocator))
+	if (lz_encoder_init(&next->coder->mf, allocator, &lz_options))
 		return LZMA_MEM_ERROR;
 
 	// Initialize the next filter in the chain, if any.
author	Lasse Collin <lasse.collin@tukaani.org>	2009-01-27 18:36:05 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2009-01-27 18:36:05 +0200
commit	f76e39cf930f888d460b443d18f977ebedea8b2a (patch)
tree	314f531dc9953c5b87a5268d53373e6646598323 /src/liblzma/lz
parent	Regenerate the CRC tables without trailing blanks. (diff)
download	xz-f76e39cf930f888d460b443d18f977ebedea8b2a.tar.xz