aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/liblzma/api/lzma/container.h163
-rw-r--r--src/liblzma/common/Makefile.inc7
-rw-r--r--src/liblzma/common/common.c9
-rw-r--r--src/liblzma/common/common.h14
-rw-r--r--src/liblzma/common/outqueue.c180
-rw-r--r--src/liblzma/common/outqueue.h155
-rw-r--r--src/liblzma/common/stream_encoder_mt.c1011
7 files changed, 1538 insertions, 1 deletions
diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h
index 7a9ffc64..e68c8940 100644
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@@ -61,6 +61,127 @@
/**
+ * \brief Multithreading options
+ */
+typedef struct {
+ /**
+ * \brief Flags
+ *
+ * Set this to zero if no flags are wanted.
+ *
+ * No flags are currently supported.
+ */
+ uint32_t flags;
+
+ /**
+ * \brief Number of worker threads to use
+ */
+ uint32_t threads;
+
+ /**
+ * \brief Maximum uncompressed size of a Block
+ *
+ * The encoder will start a new .xz Block every block_size bytes.
+ * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code()
+ * the caller may tell liblzma to start a new Block earlier.
+ *
+ * With LZMA2, a recommended block size is 2-4 times the LZMA2
+ * dictionary size. With very small dictionaries, it is recommended
+ * to use at least 1 MiB block size for good compression ratio, even
+ * if this is more than four times the dictionary size. Note that
+ * these are only recommendations for typical use cases; feel free
+ * to use other values. Just keep in mind that using a block size
+ * less than the LZMA2 dictionary size is waste of RAM.
+ *
+ * Set this to 0 to let liblzma choose the block size depending
+ * on the compression options. For LZMA2 it will be 3*dict_size
+ * or 1 MiB, whichever is more.
+ */
+ uint64_t block_size;
+
+ /**
+ * \brief Timeout to allow lzma_code() to return early
+ *
+ * Multithreading can make liblzma to consume input and produce
+ * output in a very bursty way: it may first read a lot of input
+ * to fill internal buffers, then no input or output occurs for
+ * a while.
+ *
+ * In single-threaded mode, lzma_code() won't return until it has
+ * either consumed all the input or filled the output buffer. If
+ * this is done in multithreaded mode, it may cause a call
+ * lzma_code() to take even tens of seconds, which isn't acceptable
+ * in all applications.
+ *
+ * To avoid very long blocking times in lzma_code(), a timeout
+ * (in milliseconds) may be set here. If lzma_code() would block
+ * longer than this number of milliseconds, it will return with
+ * LZMA_OK. Reasonable values are 100 ms or more. The xz command
+ * line tool uses 300 ms.
+ *
+ * If long blocking times are fine for you, set timeout to a special
+ * value of 0, which will disable the timeout mechanism and will make
+ * lzma_code() block until all the input is consumed or the output
+ * buffer has been filled.
+ *
+ * \note Even with a timeout, lzma_code() might sometimes take
+ * somewhat long time to return. No timing guarantees
+ * are made.
+ */
+ uint32_t timeout;
+
+ /**
+ * \brief Compression preset (level and possible flags)
+ *
+ * The preset is set just like with lzma_easy_encoder().
+ * The preset is ignored if filters below is non-NULL.
+ */
+ uint32_t preset;
+
+ /**
+ * \brief Filter chain (alternative to a preset)
+ *
+ * If this is NULL, the preset above is used. Otherwise the preset
+ * is ignored and the filter chain specified here is used.
+ */
+ const lzma_filter *filters;
+
+ /**
+ * \brief Integrity check type
+ *
+ * See check.h for available checks. The xz command line tool
+ * defaults to LZMA_CHECK_CRC64, which is a good choice if you
+ * are unsure.
+ */
+ lzma_check check;
+
+ /*
+ * Reserved space to allow possible future extensions without
+ * breaking the ABI. You should not touch these, because the names
+ * of these variables may change. These are and will never be used
+ * with the currently supported options, so it is safe to leave these
+ * uninitialized.
+ */
+ lzma_reserved_enum reserved_enum1;
+ lzma_reserved_enum reserved_enum2;
+ lzma_reserved_enum reserved_enum3;
+ uint32_t reserved_int1;
+ uint32_t reserved_int2;
+ uint32_t reserved_int3;
+ uint32_t reserved_int4;
+ uint64_t reserved_int5;
+ uint64_t reserved_int6;
+ uint64_t reserved_int7;
+ uint64_t reserved_int8;
+ void *reserved_ptr1;
+ void *reserved_ptr2;
+ void *reserved_ptr3;
+ void *reserved_ptr4;
+
+} lzma_mt;
+
+
+/**
* \brief Calculate approximate memory usage of easy encoder
*
* This function is a wrapper for lzma_raw_encoder_memusage().
@@ -191,6 +312,48 @@ extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm,
/**
+ * \brief Calculate approximate memory usage of multithreaded .xz encoder
+ *
+ * Since doing the encoding in threaded mode doesn't affect the memory
+ * requirements of single-threaded decompressor, you can use
+ * lzma_easy_decoder_memusage(options->preset) or
+ * lzma_raw_decoder_memusage(options->filters) to calculate
+ * the decompressor memory requirements.
+ *
+ * \param options Compression options
+ *
+ * \return Number of bytes of memory required for encoding with the
+ * given options. If an error occurs, for example due to
+ * unsupported preset or filter chain, UINT64_MAX is returned.
+ */
+extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure;
+
+
+/**
+ * \brief Initialize multithreaded .xz Stream encoder
+ *
+ * This provides the functionality of lzma_easy_encoder() and
+ * lzma_stream_encoder() as a single function for multithreaded use.
+ *
+ * TODO: For lzma_code(), only LZMA_RUN and LZMA_FINISH are currently
+ * supported. Support for other actions has been planned.
+ *
+ * \param strm Pointer to properly prepared lzma_stream
+ * \param options Pointer to multithreaded compression options
+ *
+ * \return - LZMA_OK
+ * - LZMA_MEM_ERROR
+ * - LZMA_UNSUPPORTED_CHECK
+ * - LZMA_OPTIONS_ERROR
+ * - LZMA_PROG_ERROR
+ */
+extern LZMA_API(lzma_ret) lzma_stream_encoder_mt(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+
+/**
* \brief Initialize .lzma encoder (legacy file format)
*
* The .lzma format is sometimes called the LZMA_Alone format, which is the
diff --git a/src/liblzma/common/Makefile.inc b/src/liblzma/common/Makefile.inc
index 81d751ee..dd5a8c88 100644
--- a/src/liblzma/common/Makefile.inc
+++ b/src/liblzma/common/Makefile.inc
@@ -40,6 +40,13 @@ liblzma_la_SOURCES += \
common/stream_encoder.c \
common/stream_flags_encoder.c \
common/vli_encoder.c
+
+if COND_THREADS
+liblzma_la_SOURCES += \
+ common/outqueue.c \
+ common/outqueue.h \
+ common/stream_encoder_mt.c
+endif
endif
if COND_MAIN_DECODER
diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c
index 3005cca9..6afb4fbf 100644
--- a/src/liblzma/common/common.c
+++ b/src/liblzma/common/common.c
@@ -263,7 +263,9 @@ lzma_code(lzma_stream *strm, lzma_action action)
strm->internal->avail_in = strm->avail_in;
- switch (ret) {
+ // Cast is needed to silence a warning about LZMA_TIMED_OUT, which
+ // isn't part of lzma_ret enumeration.
+ switch ((unsigned int)(ret)) {
case LZMA_OK:
// Don't return LZMA_BUF_ERROR when it happens the first time.
// This is to avoid returning LZMA_BUF_ERROR when avail_out
@@ -279,6 +281,11 @@ lzma_code(lzma_stream *strm, lzma_action action)
}
break;
+ case LZMA_TIMED_OUT:
+ strm->internal->allow_buf_error = false;
+ ret = LZMA_OK;
+ break;
+
case LZMA_STREAM_END:
if (strm->internal->sequence == ISEQ_SYNC_FLUSH
|| strm->internal->sequence == ISEQ_FULL_FLUSH)
diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h
index b8194323..8e9a387c 100644
--- a/src/liblzma/common/common.h
+++ b/src/liblzma/common/common.h
@@ -49,6 +49,13 @@
#define LZMA_BUFFER_SIZE 4096
+/// Maximum number of worker threads within one multithreaded component.
+/// The limit exists solely to make it simpler to prevent integer overflows
+/// when allocating structures etc. This should be big enough for now...
+/// the code won't scale anywhere close to this number anyway.
+#define LZMA_THREADS_MAX 16384
+
+
/// Starting value for memory usage estimates. Instead of calculating size
/// of _every_ structure and taking into account malloc() overhead etc., we
/// add a base size to all memory usage estimates. It's not very accurate
@@ -69,6 +76,13 @@
| LZMA_CONCATENATED )
+/// Special return value (lzma_ret) to indicate that a timeout was reached
+/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to
+/// LZMA_OK in lzma_code(). This is not in the lzma_ret enumeration because
+/// there's no need to have it in the public API.
+#define LZMA_TIMED_OUT 32
+
+
/// Type of encoder/decoder specific data; the actual structure is defined
/// differently in different coders.
typedef struct lzma_coder_s lzma_coder;
diff --git a/src/liblzma/common/outqueue.c b/src/liblzma/common/outqueue.c
new file mode 100644
index 00000000..b9eac16d
--- /dev/null
+++ b/src/liblzma/common/outqueue.c
@@ -0,0 +1,180 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file outqueue.c
+/// \brief Output queue handling in multithreaded coding
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "outqueue.h"
+
+
+/// This is to ease integer overflow checking: We may allocate up to
+/// 2 * LZMA_THREADS_MAX buffers and we need some extra memory for other
+/// data structures (that's the second /2).
+#define BUF_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX / 2 / 2)
+
+
+static lzma_ret
+get_options(uint64_t *bufs_alloc_size, uint32_t *bufs_count,
+ uint64_t buf_size_max, uint32_t threads)
+{
+ if (threads > LZMA_THREADS_MAX || buf_size_max > BUF_SIZE_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ // The number of buffers is twice the number of threads.
+ // This wastes RAM but keeps the threads busy when buffers
+ // finish out of order.
+ //
+ // NOTE: If this is changed, update BUF_SIZE_MAX too.
+ *bufs_count = threads * 2;
+ *bufs_alloc_size = *bufs_count * buf_size_max;
+
+ return LZMA_OK;
+}
+
+
+extern uint64_t
+lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads)
+{
+ uint64_t bufs_alloc_size;
+ uint32_t bufs_count;
+
+ if (get_options(&bufs_alloc_size, &bufs_count, buf_size_max, threads)
+ != LZMA_OK)
+ return UINT64_MAX;
+
+ return sizeof(lzma_outq) + bufs_count * sizeof(lzma_outbuf)
+ + bufs_alloc_size;
+}
+
+
+extern lzma_ret
+lzma_outq_init(lzma_outq *outq, lzma_allocator *allocator,
+ uint64_t buf_size_max, uint32_t threads)
+{
+ uint64_t bufs_alloc_size;
+ uint32_t bufs_count;
+
+ // Set bufs_count and bufs_alloc_size.
+ return_if_error(get_options(&bufs_alloc_size, &bufs_count,
+ buf_size_max, threads));
+
+ // Allocate memory if needed.
+ if (outq->buf_size_max != buf_size_max
+ || outq->bufs_allocated != bufs_count) {
+ lzma_outq_end(outq, allocator);
+
+#if SIZE_MAX < UINT64_MAX
+ if (bufs_alloc_size > SIZE_MAX)
+ return LZMA_MEM_ERROR;
+#endif
+
+ outq->bufs = lzma_alloc(bufs_count * sizeof(lzma_outbuf),
+ allocator);
+ outq->bufs_mem = lzma_alloc((size_t)(bufs_alloc_size),
+ allocator);
+
+ if (outq->bufs == NULL || outq->bufs_mem == NULL) {
+ lzma_outq_end(outq, allocator);
+ return LZMA_MEM_ERROR;
+ }
+ }
+
+ // Initialize the rest of the main structure. Initialization of
+ // outq->bufs[] is done when they are actually needed.
+ outq->buf_size_max = (size_t)(buf_size_max);
+ outq->bufs_allocated = bufs_count;
+ outq->bufs_pos = 0;
+ outq->bufs_used = 0;
+ outq->read_pos = 0;
+
+ return LZMA_OK;
+}
+
+
+extern void
+lzma_outq_end(lzma_outq *outq, lzma_allocator *allocator)
+{
+ lzma_free(outq->bufs, allocator);
+ lzma_free(outq->bufs_mem, allocator);
+ return;
+}
+
+
+extern lzma_outbuf *
+lzma_outq_get_buf(lzma_outq *outq)
+{
+ // Caller must have checked it with lzma_outq_has_buf().
+ assert(outq->bufs_used < outq->bufs_allocated);
+
+ // Initialize the new buffer.
+ lzma_outbuf *buf = &outq->bufs[outq->bufs_pos];
+ buf->buf = outq->bufs_mem + outq->bufs_pos * outq->buf_size_max;
+ buf->size = 0;
+ buf->finished = false;
+
+ // Update the queue state.
+ if (++outq->bufs_pos == outq->bufs_allocated)
+ outq->bufs_pos = 0;
+
+ ++outq->bufs_used;
+
+ return buf;
+}
+
+
+extern bool
+lzma_outq_is_readable(const lzma_outq *outq)
+{
+ uint32_t i = outq->bufs_pos - outq->bufs_used;
+ if (outq->bufs_pos < outq->bufs_used)
+ i += outq->bufs_allocated;
+
+ return outq->bufs[i].finished;
+}
+
+
+extern lzma_ret
+lzma_outq_read(lzma_outq *restrict outq, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size,
+ lzma_vli *restrict unpadded_size,
+ lzma_vli *restrict uncompressed_size)
+{
+ // There must be at least one buffer from which to read.
+ if (outq->bufs_used == 0)
+ return LZMA_OK;
+
+ // Get the buffer.
+ uint32_t i = outq->bufs_pos - outq->bufs_used;
+ if (outq->bufs_pos < outq->bufs_used)
+ i += outq->bufs_allocated;
+
+ lzma_outbuf *buf = &outq->bufs[i];
+
+ // If it isn't finished yet, we cannot read from it.
+ if (!buf->finished)
+ return LZMA_OK;
+
+ // Copy from the buffer to output.
+ lzma_bufcpy(buf->buf, &outq->read_pos, buf->size,
+ out, out_pos, out_size);
+
+ // Return if we didn't get all the data from the buffer.
+ if (outq->read_pos < buf->size)
+ return LZMA_OK;
+
+ // The buffer was finished. Tell the caller its size information.
+ *unpadded_size = buf->unpadded_size;
+ *uncompressed_size = buf->uncompressed_size;
+
+ // Free this buffer for further use.
+ --outq->bufs_used;
+ outq->read_pos = 0;
+
+ return LZMA_STREAM_END;
+}
diff --git a/src/liblzma/common/outqueue.h b/src/liblzma/common/outqueue.h
new file mode 100644
index 00000000..154f91bc
--- /dev/null
+++ b/src/liblzma/common/outqueue.h
@@ -0,0 +1,155 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file outqueue.h
+/// \brief Output queue handling in multithreaded coding
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+/// Output buffer for a single thread
+typedef struct {
+ /// Pointer to the output buffer of lzma_outq.buf_size_max bytes
+ uint8_t *buf;
+
+ /// Amount of data written to buf
+ size_t size;
+
+ /// Additional size information
+ lzma_vli unpadded_size;
+ lzma_vli uncompressed_size;
+
+ /// True when no more data will be written into this buffer.
+ ///
+ /// \note This is read by another thread and thus access
+ /// to this variable needs a mutex.
+ bool finished;
+
+} lzma_outbuf;
+
+
+typedef struct {
+ /// Array of buffers that are used cyclically.
+ lzma_outbuf *bufs;
+
+ /// Memory allocated for all the buffers
+ uint8_t *bufs_mem;
+
+ /// Amount of buffer space available in each buffer
+ size_t buf_size_max;
+
+ /// Number of buffers allocated
+ uint32_t bufs_allocated;
+
+ /// Position in the bufs array. The next buffer to be taken
+ /// into use is bufs[bufs_pos].
+ uint32_t bufs_pos;
+
+ /// Number of buffers in use
+ uint32_t bufs_used;
+
+ /// Position in the buffer in lzma_outq_read()
+ size_t read_pos;
+
+} lzma_outq;
+
+
+/**
+ * \brief Calculate the memory usage of an output queue
+ *
+ * \return Approximate memory usage in bytes or UINT64_MAX on error.
+ */
+extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief Initialize an output queue
+///
+/// \param outq Pointer to an output queue. Before calling
+/// this function the first time, *outq should
+/// have been zeroed with memzero() so that this
+/// function knows that there are no previous
+/// allocations to free.
+/// \param allocator Pointer to allocator or NULL
+/// \param buf_size_max Maximum amount of data that a single buffer
+/// in the queue may need to store.
+/// \param threads Number of buffers that may be in use
+/// concurrently. Note that more than this number
+/// of buffers will actually get allocated to
+/// improve performance when buffers finish
+/// out of order.
+///
+/// \return - LZMA_OK
+/// - LZMA_MEM_ERROR
+///
+extern lzma_ret lzma_outq_init(lzma_outq *outq, lzma_allocator *allocator,
+ uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief Free the memory associated with the output queue
+extern void lzma_outq_end(lzma_outq *outq, lzma_allocator *allocator);
+
+
+/// \brief Get a new buffer
+///
+/// lzma_outq_has_buf() must be used to check that there is a buffer
+/// available before calling lzma_outq_get_buf().
+///
+extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq);
+
+
+/// \brief Test if there is data ready to be read
+///
+/// Call to this function must be protected with the same mutex that
+/// is used to protect lzma_outbuf.finished.
+///
+extern bool lzma_outq_is_readable(const lzma_outq *outq);
+
+
+/// \brief Read finished data
+///
+/// \param outq Pointer to an output queue
+/// \param out Beginning of the output buffer
+/// \param out_pos The next byte will be written to
+/// out[*out_pos].
+/// \param out_size Size of the out buffer; the first byte into
+/// which no data is written to is out[out_size].
+/// \param unpadded_size Unpadded Size from the Block encoder
+/// \param uncompressed_size Uncompressed Size from the Block encoder
+///
+/// \return - LZMA: All OK. Either no data was available or the buffer
+/// being read didn't become empty yet.
+/// - LZMA_STREAM_END: The buffer being read was finished.
+/// *unpadded_size and *uncompressed_size were set.
+///
+/// \note This reads lzma_outbuf.finished variables and thus call
+/// to this function needs to be protected with a mutex.
+///
+extern lzma_ret lzma_outq_read(lzma_outq *restrict outq,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size, lzma_vli *restrict unpadded_size,
+ lzma_vli *restrict uncompressed_size);
+
+
+/// \brief Test if there is at least one buffer free
+///
+/// This must be used before getting a new buffer with lzma_outq_get_buf().
+///
+static inline bool
+lzma_outq_has_buf(const lzma_outq *outq)
+{
+ return outq->bufs_used < outq->bufs_allocated;
+}
+
+
+/// \brief Test if the queue is completely empty
+static inline bool
+lzma_outq_is_empty(const lzma_outq *outq)
+{
+ return outq->bufs_used == 0;
+}
diff --git a/src/liblzma/common/stream_encoder_mt.c b/src/liblzma/common/stream_encoder_mt.c
new file mode 100644
index 00000000..323f04a2
--- /dev/null
+++ b/src/liblzma/common/stream_encoder_mt.c
@@ -0,0 +1,1011 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_encoder_mt.c
+/// \brief Multithreaded .xz Stream encoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+#include "easy_preset.h"
+#include "block_encoder.h"
+#include "index_encoder.h"
+#include "outqueue.h"
+
+
+/// Maximum supported block size. This makes it simpler to prevent integer
+/// overflows if we are given unusually large block size.
+#define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX)
+
+
+typedef enum {
+ /// Waiting for work.
+ THR_IDLE,
+
+ /// Encoding is in progress.
+ THR_RUN,
+
+ /// Encoding is in progress but no more input data will
+ /// be read.
+ THR_FINISH,
+
+ /// The main thread wants the thread to stop whatever it was doing
+ /// but not exit.
+ THR_STOP,
+
+ /// The main thread wants the thread to exit. We could use
+ /// cancellation but since there's stopped anyway, this is lazier.
+ THR_EXIT,
+
+} worker_state;
+
+
+typedef struct worker_thread_s worker_thread;
+struct worker_thread_s {
+ worker_state state;
+
+ /// Input buffer of coder->block_size bytes. The main thread will
+ /// put new input into this and update in_size accordingly. Once
+ /// no more input is coming, state will be set to THR_FINISH.
+ uint8_t *in;
+
+ /// Amount of data available in the input buffer. This is modified
+ /// only by the main thread.
+ size_t in_size;
+
+ /// Output buffer for this thread. This is set by the main
+ /// thread every time a new Block is started with this thread
+ /// structure.
+ lzma_outbuf *outbuf;
+
+ /// Pointer to the main structure is needed when putting this
+ /// thread back to the stack of free threads.
+ lzma_coder *coder;
+
+ /// The allocator is set by the main thread. Since a copy of the
+ /// pointer is kept here, the application must not change the
+ /// allocator before calling lzma_end().
+ lzma_allocator *allocator;
+
+ /// Block encoder
+ lzma_next_coder block_encoder;
+
+ /// Compression options for this Block
+ lzma_block block_options;
+
+ /// Next structure in the stack of free worker threads.
+ worker_thread *next;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ /// The ID of this thread is used to join the thread
+ /// when it's not needed anymore.
+ pthread_t thread_id;
+};
+
+
+struct lzma_coder_s {
+ enum {
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK,
+ SEQ_INDEX,
+ SEQ_STREAM_FOOTER,
+ } sequence;
+
+ /// Start a new Block every block_size bytes of input unless
+ /// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier.
+ size_t block_size;
+
+ /// The filter chain currently in use
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+
+ /// Index to hold sizes of the Blocks
+ lzma_index *index;
+
+ /// Index encoder
+ lzma_next_coder index_encoder;
+
+
+ /// Stream Flags for encoding the Stream Header and Stream Footer.
+ lzma_stream_flags stream_flags;
+
+ /// Buffer to hold Stream Header and Stream Footer.
+ uint8_t header[LZMA_STREAM_HEADER_SIZE];
+
+ /// Read position in header[]
+ size_t header_pos;
+
+
+ /// Output buffer queue for compressed data
+ lzma_outq outq;
+
+
+ /// True if wait_max is used.
+ bool has_timeout;
+
+ /// Maximum wait time if cannot use all the input and cannot
+ /// fill the output buffer.
+ struct timespec wait_max;
+
+
+ /// Error code from a worker thread
+ lzma_ret thread_error;
+
+ /// Array of allocated thread-specific structures
+ worker_thread *threads;
+
+ /// Number of structures in "threads" above. This is also the
+ /// number of threads that will be created at maximum.
+ uint32_t threads_max;
+
+ /// Number of thread structures that have been initialized, and
+ /// thus the number of worker threads actually created so far.
+ uint32_t threads_initialized;
+
+ /// Stack of free threads. When a thread finishes, it puts itself
+ /// back into this stack. This starts as empty because threads
+ /// are created only when actually needed.
+ worker_thread *threads_free;
+
+ /// The most recent worker thread to which the main thread writes
+ /// the new input from the application.
+ worker_thread *thr;
+
+ pthread_mutex_t mutex;
+ mythread_cond cond;
+};
+
+
+/// Tell the main thread that something has gone wrong.
+static void
+worker_error(worker_thread *thr, lzma_ret ret)
+{
+ assert(ret != LZMA_OK);
+ assert(ret != LZMA_STREAM_END);
+
+ mythread_sync(thr->coder->mutex) {
+ if (thr->coder->thread_error == LZMA_OK)
+ thr->coder->thread_error = ret;
+
+ mythread_cond_signal(&thr->coder->cond);
+ }
+
+ return;
+}
+
+
+static worker_state
+worker_encode(worker_thread *thr, worker_state state)
+{
+ // Set the Block options.
+ thr->block_options = (lzma_block){
+ .version = 0,
+ .check = thr->coder->stream_flags.check,
+ .compressed_size = thr->coder->outq.buf_size_max,
+ .uncompressed_size = thr->coder->block_size,
+
+ // TODO: To allow changing the filter chain, the filters
+ // array must be copied to each worker_thread.
+ .filters = thr->coder->filters,
+ };
+
+ // Calculate maximum size of the Block Header. This amount is
+ // reserved in the beginning of the buffer so that Block Header
+ // along with Compressed Size and Uncompressed Size can be
+ // written there.
+ lzma_ret ret = lzma_block_header_size(&thr->block_options);
+ if (ret != LZMA_OK) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ // Initialize the Block encoder.
+ ret = lzma_block_encoder_init(&thr->block_encoder,
+ thr->allocator, &thr->block_options);
+ if (ret != LZMA_OK) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ size_t in_pos = 0;
+ size_t in_size = 0;
+
+ thr->outbuf->size = thr->block_options.header_size;
+ const size_t out_size = thr->coder->outq.buf_size_max;
+
+ do {
+ mythread_sync(thr->mutex) {
+ while (in_size == thr->in_size
+ && thr->state == THR_RUN)
+ pthread_cond_wait(&thr->cond, &thr->mutex);
+
+ state = thr->state;
+ in_size = thr->in_size;
+
+ // TODO? Store in_pos and out_pos into *thr here
+ // so that the application may read them via
+ // some currently non-existing function to get
+ // progress information.
+ }
+
+ // Return if we were asked to stop or exit.
+ if (state >= THR_STOP)
+ return state;
+
+ lzma_action action = state == THR_FINISH
+ ? LZMA_FINISH : LZMA_RUN;
+
+ // Limit the amount of input given to the Block encoder
+ // at once. This way this thread can react fairly quickly
+ // if the main thread wants us to stop or exit.
+ static const size_t in_chunk_max = 16384;
+ size_t in_limit = in_size;
+ if (in_size - in_pos > in_chunk_max) {
+ in_limit = in_pos + in_chunk_max;
+ action = LZMA_RUN;
+ }
+
+ ret = thr->block_encoder.code(
+ thr->block_encoder.coder, thr->allocator,
+ thr->in, &in_pos, in_limit, thr->outbuf->buf,
+ &thr->outbuf->size, out_size, action);
+ } while (ret == LZMA_OK);
+
+ if (ret != LZMA_STREAM_END) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ assert(state == THR_FINISH);
+
+ // Encode the Block Header. By doing it after the compression,
+ // we can store the Compressed Size and Uncompressed Size fields.
+ ret = lzma_block_header_encode(&thr->block_options, thr->outbuf->buf);
+ if (ret != LZMA_OK) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ // Set the size information that will be read by the main thread
+ // to write the Index field.
+ thr->outbuf->unpadded_size
+ = lzma_block_unpadded_size(&thr->block_options);
+ assert(thr->outbuf->unpadded_size != 0);
+ thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size;
+
+ return THR_FINISH;
+}
+
+
+static void *
+worker_start(void *thr_ptr)
+{
+ worker_thread *thr = thr_ptr;
+ worker_state state = THR_IDLE; // Init to silence a warning
+
+ while (true) {
+ // Wait for work.
+ mythread_sync(thr->mutex) {
+ while (true) {
+ // The thread is already idle so if we are
+ // requested to stop, just set the state.
+ if (thr->state == THR_STOP)
+ thr->state = THR_IDLE;
+
+ state = thr->state;
+ if (state != THR_IDLE)
+ break;
+
+ pthread_cond_wait(&thr->cond, &thr->mutex);
+ }
+ }
+
+ assert(state != THR_IDLE);
+ assert(state != THR_STOP);
+
+ if (state <= THR_FINISH)
+ state = worker_encode(thr, state);
+
+ if (state == THR_EXIT)
+ break;
+
+ // Mark the thread as idle. Signal is needed for the case
+ // where the main thread is waiting for the threads to stop.
+ mythread_sync(thr->mutex) {
+ thr->state = THR_IDLE;
+ pthread_cond_signal(&thr->cond);
+ }
+
+ mythread_sync(thr->coder->mutex) {
+ // Mark the output buffer as finished if
+ // no errors occurred.
+ thr->outbuf->finished = state == THR_FINISH;
+
+ // Return this thread to the stack of free threads.
+ thr->next = thr->coder->threads_free;
+ thr->coder->threads_free = thr;
+
+ mythread_cond_signal(&thr->coder->cond);
+ }
+ }
+
+ // Exiting, free the resources.
+ pthread_mutex_destroy(&thr->mutex);
+ pthread_cond_destroy(&thr->cond);
+
+ lzma_next_end(&thr->block_encoder, thr->allocator);
+ lzma_free(thr->in, thr->allocator);
+ return NULL;
+}
+
+
+/// Make the threads stop but not exit. Optionally wait for them to stop.
+static void
+threads_stop(lzma_coder *coder, bool wait)
+{
+ // Tell the threads to stop.
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ coder->threads[i].state = THR_STOP;
+ pthread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+
+ if (!wait)
+ return;
+
+ // Wait for the threads to settle in the idle state.
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ while (coder->threads[i].state != THR_IDLE)
+ pthread_cond_wait(&coder->threads[i].cond,
+ &coder->threads[i].mutex);
+ }
+ }
+
+ return;
+}
+
+
+/// Stop the threads and free the resources associated with them.
+/// Wait until the threads have exited.
+static void
+threads_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ coder->threads[i].state = THR_EXIT;
+ pthread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ int ret = pthread_join(coder->threads[i].thread_id, NULL);
+ assert(ret == 0);
+ (void)ret;
+ }
+
+ lzma_free(coder->threads, allocator);
+ return;
+}
+
+
+/// Initialize a new worker_thread structure and create a new thread.
+static lzma_ret
+initialize_new_thread(lzma_coder *coder, lzma_allocator *allocator)
+{
+ worker_thread *thr = &coder->threads[coder->threads_initialized];
+
+ thr->in = lzma_alloc(coder->block_size, allocator);
+ if (thr->in == NULL)
+ return LZMA_MEM_ERROR;
+
+ if (pthread_mutex_init(&thr->mutex, NULL))
+ goto error_mutex;
+
+ if (pthread_cond_init(&thr->cond, NULL))
+ goto error_cond;
+
+ thr->state = THR_IDLE;
+ thr->allocator = allocator;
+ thr->coder = coder;
+ thr->block_encoder = LZMA_NEXT_CODER_INIT;
+
+ if (mythread_create(&thr->thread_id, &worker_start, thr))
+ goto error_thread;
+
+ ++coder->threads_initialized;
+ coder->thr = thr;
+
+ return LZMA_OK;
+
+error_thread:
+ pthread_cond_destroy(&thr->cond);
+
+error_cond:
+ pthread_mutex_destroy(&thr->mutex);
+
+error_mutex:
+ lzma_free(thr->in, allocator);
+ return LZMA_MEM_ERROR;
+}
+
+
+static lzma_ret
+get_thread(lzma_coder *coder, lzma_allocator *allocator)
+{
+ // If there are no free output subqueues, there is no
+ // point to try getting a thread.
+ if (!lzma_outq_has_buf(&coder->outq))
+ return LZMA_OK;
+
+ // If there is a free structure on the stack, use it.
+ mythread_sync(coder->mutex) {
+ if (coder->threads_free != NULL) {
+ coder->thr = coder->threads_free;
+ coder->threads_free = coder->threads_free->next;
+ }
+ }
+
+ if (coder->thr == NULL) {
+ // If there are no uninitialized structures left, return.
+ if (coder->threads_initialized == coder->threads_max)
+ return LZMA_OK;
+
+ // Initialize a new thread.
+ return_if_error(initialize_new_thread(coder, allocator));
+ }
+
+ // Reset the parts of the thread state that have to be done
+ // in the main thread.
+ mythread_sync(coder->thr->mutex) {
+ coder->thr->state = THR_RUN;
+ coder->thr->in_size = 0;
+ coder->thr->outbuf = lzma_outq_get_buf(&coder->outq);
+ pthread_cond_signal(&coder->thr->cond);
+ }
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_encode_in(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, lzma_action action)
+{
+ while (*in_pos < in_size
+ || (coder->thr != NULL && action != LZMA_RUN)) {
+ if (coder->thr == NULL) {
+ // Get a new thread.
+ const lzma_ret ret = get_thread(coder, allocator);
+ if (coder->thr == NULL)
+ return ret;
+ }
+
+ // Copy the input data to thread's buffer.
+ size_t thr_in_size = coder->thr->in_size;
+ lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
+ &thr_in_size, coder->block_size);
+
+ // Tell the Block encoder to finish if
+ // - it has got block_size bytes of input; or
+ // - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH,
+ // or LZMA_FULL_BARRIER was used.
+ //
+ // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+ const bool finish = thr_in_size == coder->block_size
+ || (*in_pos == in_size && action != LZMA_RUN);
+
+ bool block_error = false;
+
+ mythread_sync(coder->thr->mutex) {
+ if (coder->thr->state == THR_IDLE) {
+ // Something has gone wrong with the Block
+ // encoder. It has set coder->thread_error
+ // which we will read a few lines later.
+ block_error = true;
+ } else {
+ // Tell the Block encoder its new amount
+ // of input and update the state if needed.
+ coder->thr->in_size = thr_in_size;
+
+ if (finish)
+ coder->thr->state = THR_FINISH;
+
+ pthread_cond_signal(&coder->thr->cond);
+ }
+ }
+
+ if (block_error) {
+ lzma_ret ret;
+
+ mythread_sync(coder->mutex) {
+ ret = coder->thread_error;
+ }
+
+ return ret;
+ }
+
+ if (finish)
+ coder->thr = NULL;
+ }
+
+ return LZMA_OK;
+}
+
+
+/// Wait until more input can be consumed, more output can be read, or
+/// an optional timeout is reached.
+static bool
+wait_for_work(lzma_coder *coder, struct timespec *wait_abs,
+ bool *has_blocked, bool has_input)
+{
+ if (coder->has_timeout && !*has_blocked) {
+ // Every time when stream_encode_mt() is called via
+ // lzma_code(), *has_block starts as false. We set it
+ // to true here and calculate the absolute time when
+ // we must return if there's nothing to do.
+ //
+ // The idea of *has_blocked is to avoid unneeded calls
+ // to mythread_cond_abstime(), which may do a syscall
+ // depending on the operating system.
+ *has_blocked = true;
+ *wait_abs = coder->wait_max;
+ mythread_cond_abstime(&coder->cond, wait_abs);
+ }
+
+ bool timed_out = false;
+
+ mythread_sync(coder->mutex) {
+ // There are four things that we wait. If one of them
+ // becomes possible, we return.
+ // - If there is input left, we need to get a free
+ // worker thread and an output buffer for it.
+ // - Data ready to be read from the output queue.
+ // - A worker thread indicates an error.
+ // - Time out occurs.
+ while ((!has_input || coder->threads_free == NULL
+ || !lzma_outq_has_buf(&coder->outq))
+ && !lzma_outq_is_readable(&coder->outq)
+ && coder->thread_error == LZMA_OK
+ && !timed_out) {
+ if (coder->has_timeout)
+ timed_out = mythread_cond_timedwait(
+ &coder->cond, &coder->mutex,
+ wait_abs) != 0;
+ else
+ mythread_cond_wait(&coder->cond,
+ &coder->mutex);
+ }
+ }
+
+ return timed_out;
+}
+
+
+static lzma_ret
+stream_encode_mt(lzma_coder *coder, lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ switch (coder->sequence) {
+ case SEQ_STREAM_HEADER:
+ lzma_bufcpy(coder->header, &coder->header_pos,
+ sizeof(coder->header),
+ out, out_pos, out_size);
+ if (coder->header_pos < sizeof(coder->header))
+ return LZMA_OK;
+
+ coder->header_pos = 0;
+ coder->sequence = SEQ_BLOCK;
+
+ // Fall through
+
+ case SEQ_BLOCK: {
+ // Initialized to silence warnings.
+ lzma_vli unpadded_size = 0;
+ lzma_vli uncompressed_size = 0;
+ lzma_ret ret = LZMA_OK;
+
+ // These are for wait_for_work().
+ bool has_blocked = false;
+ struct timespec wait_abs;
+
+ while (true) {
+ mythread_sync(coder->mutex) {
+ // Check for Block encoder errors.
+ ret = coder->thread_error;
+ if (ret != LZMA_OK) {
+ assert(ret != LZMA_STREAM_END);
+ break;
+ }
+
+ // Try to read compressed data to out[].
+ ret = lzma_outq_read(&coder->outq,
+ out, out_pos, out_size,
+ &unpadded_size,
+ &uncompressed_size);
+ }
+
+ if (ret == LZMA_STREAM_END) {
+ // End of Block. Add it to the Index.
+ ret = lzma_index_append(coder->index,
+ allocator, unpadded_size,
+ uncompressed_size);
+
+ // If we didn't fill the output buffer yet,
+ // try to read more data. Maybe the next
+ // outbuf has been finished already too.
+ if (*out_pos < out_size)
+ continue;
+ }
+
+ if (ret != LZMA_OK) {
+ // coder->thread_error was set or
+ // lzma_index_append() failed.
+ threads_stop(coder, false);
+ return ret;
+ }
+
+ // Check if the last Block was finished.
+ if (action == LZMA_FINISH
+ && *in_pos == in_size
+ && lzma_outq_is_empty(
+ &coder->outq))
+ break;
+
+ // Try to give uncompressed data to a worker thread.
+ ret = stream_encode_in(coder, allocator,
+ in, in_pos, in_size, action);
+ if (ret != LZMA_OK) {
+ threads_stop(coder, false);
+ return ret;
+ }
+
+ // Return if
+ // - we have used all the input and expect to
+ // get more input; or
+ // - the output buffer has been filled.
+ //
+ // TODO: Support flushing.
+ if ((*in_pos == in_size && action != LZMA_FINISH)
+ || *out_pos == out_size)
+ return LZMA_OK;
+
+ // Neither in nor out has been used completely.
+ // Wait until there's something we can do.
+ if (wait_for_work(coder, &wait_abs, &has_blocked,
+ *in_pos < in_size))
+ return LZMA_TIMED_OUT;
+ }
+
+ // All Blocks have been encoded and the threads have stopped.
+ // Prepare to encode the Index field.
+ return_if_error(lzma_index_encoder_init(
+ &coder->index_encoder, allocator,
+ coder->index));
+ coder->sequence = SEQ_INDEX;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX: {
+ // Call the Index encoder. It doesn't take any input, so
+ // those pointers can be NULL.
+ const lzma_ret ret = coder->index_encoder.code(
+ coder->index_encoder.coder, allocator,
+ NULL, NULL, 0,
+ out, out_pos, out_size, LZMA_RUN);
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Encode the Stream Footer into coder->buffer.
+ coder->stream_flags.backward_size
+ = lzma_index_size(coder->index);
+ if (lzma_stream_footer_encode(&coder->stream_flags,
+ coder->header) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ coder->sequence = SEQ_STREAM_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_FOOTER:
+ lzma_bufcpy(coder->header, &coder->header_pos,
+ sizeof(coder->header),
+ out, out_pos, out_size);
+ return coder->header_pos < sizeof(coder->header)
+ ? LZMA_OK : LZMA_STREAM_END;
+ }
+
+ assert(0);
+ return LZMA_PROG_ERROR;
+}
+
+
+static void
+stream_encoder_mt_end(lzma_coder *coder, lzma_allocator *allocator)
+{
+ // Threads must be killed before the output queue can be freed.
+ threads_end(coder, allocator);
+ lzma_outq_end(&coder->outq, allocator);
+
+ for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
+ lzma_free(coder->filters[i].options, allocator);
+
+ lzma_next_end(&coder->index_encoder, allocator);
+ lzma_index_end(coder->index, allocator);
+
+ mythread_cond_destroy(&coder->cond);
+ pthread_mutex_destroy(&coder->mutex);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+/// Options handling for lzma_stream_encoder_mt_init() and
+/// lzma_stream_encoder_mt_memusage()
+static lzma_ret
+get_options(const lzma_mt *options, lzma_options_easy *opt_easy,
+ const lzma_filter **filters, uint64_t *block_size,
+ uint64_t *outbuf_size_max)
+{
+ // Validate some of the options.
+ if (options == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (options->flags != 0 || options->threads == 0
+ || options->threads > LZMA_THREADS_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ if (options->filters != NULL) {
+ // Filter chain was given, use it as is.
+ *filters = options->filters;
+ } else {
+ // Use a preset.
+ if (lzma_easy_preset(opt_easy, options->preset))
+ return LZMA_OPTIONS_ERROR;
+
+ *filters = opt_easy->filters;
+ }
+
+ // Block size
+ if (options->block_size > 0) {
+ if (options->block_size > BLOCK_SIZE_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ *block_size = options->block_size;
+ } else {
+ // Determine the Block size from the filter chain.
+ *block_size = lzma_mt_block_size(*filters);
+ if (*block_size == 0)
+ return LZMA_OPTIONS_ERROR;
+
+ assert(*block_size <= BLOCK_SIZE_MAX);
+ }
+
+ // Calculate the maximum amount output that a single output buffer
+ // may need to hold. This is the same as the maximum total size of
+ // a Block.
+ //
+ // FIXME: As long as the encoder keeps the whole input buffer
+ // available and doesn't start writing output before finishing
+ // the Block, it could use lzma_stream_buffer_bound() and use
+ // uncompressed LZMA2 chunks if the data doesn't compress.
+ *outbuf_size_max = *block_size + *block_size / 16 + 16384;
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_encoder_mt_init(lzma_next_coder *next, lzma_allocator *allocator,
+ const lzma_mt *options)
+{
+ lzma_next_coder_init(&stream_encoder_mt_init, next, allocator);
+
+ // Get the filter chain.
+ lzma_options_easy easy;
+ const lzma_filter *filters;
+ uint64_t block_size;
+ uint64_t outbuf_size_max;
+ return_if_error(get_options(options, &easy, &filters,
+ &block_size, &outbuf_size_max));
+
+#if SIZE_MAX < UINT64_MAX
+ if (block_size > SIZE_MAX)
+ return LZMA_MEM_ERROR;
+#endif
+
+ // FIXME TODO: Validate the filter chain so that we can give
+ // an error in this function instead of delaying it to the first
+ // call to lzma_code().
+
+ // Validate the Check ID.
+ if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
+ return LZMA_PROG_ERROR;
+
+ if (!lzma_check_is_supported(options->check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ // Allocate and initialize the base structure if needed.
+ if (next->coder == NULL) {
+ next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
+ if (next->coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ // For the mutex and condition variable initializations
+ // the error handling has to be done here because
+ // stream_encoder_mt_end() doesn't know if they have
+ // already been initialized or not.
+ if (pthread_mutex_init(&next->coder->mutex, NULL)) {
+ lzma_free(next->coder, allocator);
+ next->coder = NULL;
+ return LZMA_MEM_ERROR;
+ }
+
+ if (mythread_cond_init(&next->coder->cond)) {
+ pthread_mutex_destroy(&next->coder->mutex);
+ lzma_free(next->coder, allocator);
+ next->coder = NULL;
+ return LZMA_MEM_ERROR;
+ }
+
+ next->code = &stream_encode_mt;
+ next->end = &stream_encoder_mt_end;
+// next->update = &stream_encoder_mt_update;
+
+ next->coder->filters[0].id = LZMA_VLI_UNKNOWN;
+ next->coder->index_encoder = LZMA_NEXT_CODER_INIT;
+ next->coder->index = NULL;
+ memzero(&next->coder->outq, sizeof(next->coder->outq));
+ next->coder->threads = NULL;
+ next->coder->threads_max = 0;
+ next->coder->threads_initialized = 0;
+ }
+
+ // Basic initializations
+ next->coder->sequence = SEQ_STREAM_HEADER;
+ next->coder->block_size = (size_t)(block_size);
+ next->coder->thread_error = LZMA_OK;
+ next->coder->thr = NULL;
+
+ // Allocate the thread-specific base structures.
+ assert(options->threads > 0);
+ if (next->coder->threads_max != options->threads) {
+ threads_end(next->coder, allocator);
+
+ next->coder->threads = NULL;
+ next->coder->threads_max = 0;
+
+ next->coder->threads_initialized = 0;
+ next->coder->threads_free = NULL;
+
+ next->coder->threads = lzma_alloc(
+ options->threads * sizeof(worker_thread),
+ allocator);
+ if (next->coder->threads == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder->threads_max = options->threads;
+ } else {
+ // Reuse the old structures and threads. Tell the running
+ // threads to stop and wait until they have stopped.
+ threads_stop(next->coder, true);
+ }
+
+ // Output queue
+ return_if_error(lzma_outq_init(&next->coder->outq, allocator,
+ outbuf_size_max, options->threads));
+
+ // Timeout
+ if (options->timeout > 0) {
+ next->coder->wait_max.tv_sec = options->timeout / 1000;
+ next->coder->wait_max.tv_nsec
+ = (options->timeout % 1000) * 1000000L;
+ next->coder->has_timeout = true;
+ } else {
+ next->coder->has_timeout = false;
+ }
+
+ // Free the old filter chain and copy the new one.
+ for (size_t i = 0; next->coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
+ lzma_free(next->coder->filters[i].options, allocator);
+
+ return_if_error(lzma_filters_copy(options->filters,
+ next->coder->filters, allocator));
+
+ // Index
+ lzma_index_end(next->coder->index, allocator);
+ next->coder->index = lzma_index_init(allocator);
+ if (next->coder->index == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Stream Header
+ next->coder->stream_flags.version = 0;
+ next->coder->stream_flags.check = options->check;
+ return_if_error(lzma_stream_header_encode(
+ &next->coder->stream_flags, next->coder->header));
+
+ next->coder->header_pos = 0;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
+{
+ lzma_next_strm_init(stream_encoder_mt_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+// strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+// strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+// strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
+
+
+// This function name is a monster but it's consistent with the older
+// monster names. :-( 31 chars is the max that C99 requires so in that
+// sense it's not too long. ;-)
+extern LZMA_API(uint64_t)
+lzma_stream_encoder_mt_memusage(const lzma_mt *options)
+{
+ lzma_options_easy easy;
+ const lzma_filter *filters;
+ uint64_t block_size;
+ uint64_t outbuf_size_max;
+
+ if (get_options(options, &easy, &filters, &block_size,
+ &outbuf_size_max) != LZMA_OK)
+ return UINT64_MAX;
+
+ // Memory usage of the input buffers
+ const uint64_t inbuf_memusage = options->threads * block_size;
+
+ // Memory usage of the filter encoders
+ uint64_t filters_memusage
+ = lzma_raw_encoder_memusage(options->filters);
+ if (filters_memusage == UINT64_MAX)
+ return UINT64_MAX;
+
+ filters_memusage *= options->threads;
+
+ // Memory usage of the output queue
+ const uint64_t outq_memusage = lzma_outq_memusage(
+ outbuf_size_max, options->threads);
+ if (outq_memusage == UINT64_MAX)
+ return UINT64_MAX;
+
+ // Sum them with overflow checking.
+ uint64_t total_memusage = LZMA_MEMUSAGE_BASE + sizeof(lzma_coder)
+ + options->threads * sizeof(worker_thread);
+
+ if (UINT64_MAX - total_memusage < inbuf_memusage)
+ return UINT64_MAX;
+
+ total_memusage += inbuf_memusage;
+
+ if (UINT64_MAX - total_memusage < filters_memusage)
+ return UINT64_MAX;
+
+ total_memusage += filters_memusage;
+
+ if (UINT64_MAX - total_memusage < outq_memusage)
+ return UINT64_MAX;
+
+ return total_memusage + outq_memusage;
+}