aboutsummaryrefslogtreecommitdiff
path: root/src/liblzma/rangecoder
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2021-01-13 19:16:32 +0200
committerLasse Collin <lasse.collin@tukaani.org>2021-01-14 18:58:13 +0200
commit625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c (patch)
tree0706a4ff7aa1d64ecffdbc9e13c3899d67d3db2f /src/liblzma/rangecoder
parentScripts: Add zstd support to xzdiff. (diff)
downloadxz-625f4c7c99b2fcc4db9e7ab2deb4884790e2e17c.tar.xz
liblzma: Add rough support for output-size-limited encoding in LZMA1.
With this it is possible to encode LZMA1 data without EOPM so that the encoder will encode as much input as it can without exceeding the specified output size limit. The resulting LZMA1 stream will be a normal LZMA1 stream without EOPM. The actual uncompressed size will be available to the caller via the uncomp_size pointer. One missing thing is that the LZMA layer doesn't inform the LZ layer when the encoding is finished and thus the LZ may read more input when it won't be used. However, this doesn't matter if encoding is done with a single call (which is the planned use case for now). For proper multi-call encoding this should be improved. This commit only adds the functionality for internal use. Nothing uses it yet.
Diffstat (limited to 'src/liblzma/rangecoder')
-rw-r--r--src/liblzma/rangecoder/range_encoder.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/src/liblzma/rangecoder/range_encoder.h b/src/liblzma/rangecoder/range_encoder.h
index 4f3b30ca..1bcfd7a5 100644
--- a/src/liblzma/rangecoder/range_encoder.h
+++ b/src/liblzma/rangecoder/range_encoder.h
@@ -30,6 +30,9 @@ typedef struct {
uint32_t range;
uint8_t cache;
+ /// Number of bytes written out by rc_encode() -> rc_shift_low()
+ uint64_t out_total;
+
/// Number of symbols in the tables
size_t count;
@@ -58,12 +61,22 @@ rc_reset(lzma_range_encoder *rc)
rc->cache_size = 1;
rc->range = UINT32_MAX;
rc->cache = 0;
+ rc->out_total = 0;
rc->count = 0;
rc->pos = 0;
}
static inline void
+rc_forget(lzma_range_encoder *rc)
+{
+ // This must not be called when rc_encode() is partially done.
+ assert(rc->pos == 0);
+ rc->count = 0;
+}
+
+
+static inline void
rc_bit(lzma_range_encoder *rc, probability *prob, uint32_t bit)
{
rc->symbols[rc->count] = bit;
@@ -132,6 +145,7 @@ rc_shift_low(lzma_range_encoder *rc,
out[*out_pos] = rc->cache + (uint8_t)(rc->low >> 32);
++*out_pos;
+ ++rc->out_total;
rc->cache = 0xFF;
} while (--rc->cache_size != 0);
@@ -147,6 +161,31 @@ rc_shift_low(lzma_range_encoder *rc,
static inline bool
+rc_shift_low_dummy(uint64_t *low, uint64_t *cache_size, uint8_t *cache,
+ size_t *out_pos, size_t out_size)
+{
+ if ((uint32_t)(*low) < (uint32_t)(0xFF000000)
+ || (uint32_t)(*low >> 32) != 0) {
+ do {
+ if (*out_pos == out_size)
+ return true;
+
+ ++*out_pos;
+ *cache = 0xFF;
+
+ } while (--*cache_size != 0);
+
+ *cache = (*low >> 24) & 0xFF;
+ }
+
+ ++*cache_size;
+ *low = (*low & 0x00FFFFFF) << RC_SHIFT_BITS;
+
+ return false;
+}
+
+
+static inline bool
rc_encode(lzma_range_encoder *rc,
uint8_t *out, size_t *out_pos, size_t out_size)
{
@@ -222,6 +261,78 @@ rc_encode(lzma_range_encoder *rc,
}
+static inline bool
+rc_encode_dummy(const lzma_range_encoder *rc, size_t out_size)
+{
+ assert(rc->count <= RC_SYMBOLS_MAX);
+
+ uint64_t low = rc->low;
+ uint64_t cache_size = rc->cache_size;
+ uint32_t range = rc->range;
+ uint8_t cache = rc->cache;
+ uint64_t out_pos = rc->out_total;
+
+ size_t pos = rc->pos;
+
+ while (pos < rc->count) {
+ // Normalize
+ if (range < RC_TOP_VALUE) {
+ if (rc_shift_low_dummy(&low, &cache_size, &cache,
+ &out_pos, out_size))
+ return true;
+
+ range <<= RC_SHIFT_BITS;
+ }
+
+ // Encode a bit
+ switch (rc->symbols[pos]) {
+ case RC_BIT_0: {
+ probability prob = *rc->probs[pos];
+ range = (range >> RC_BIT_MODEL_TOTAL_BITS)
+ * prob;
+ break;
+ }
+
+ case RC_BIT_1: {
+ probability prob = *rc->probs[pos];
+ const uint32_t bound = prob * (range
+ >> RC_BIT_MODEL_TOTAL_BITS);
+ low += bound;
+ range -= bound;
+ break;
+ }
+
+ case RC_DIRECT_0:
+ range >>= 1;
+ break;
+
+ case RC_DIRECT_1:
+ range >>= 1;
+ low += range;
+ break;
+
+ case RC_FLUSH:
+ default:
+ assert(0);
+ break;
+ }
+
+ ++pos;
+ }
+
+ // Flush the last bytes. This isn't in rc->symbols[] so we do
+ // it after the above loop to take into account the size of
+ // the flushing that will be done at the end of the stream.
+ for (pos = 0; pos < 5; ++pos) {
+ if (rc_shift_low_dummy(&low, &cache_size,
+ &cache, &out_pos, out_size))
+ return true;
+ }
+
+ return false;
+}
+
+
static inline uint64_t
rc_pending(const lzma_range_encoder *rc)
{