aboutsummaryrefslogtreecommitdiff
path: root/src/xz
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2012-07-03 21:16:39 +0300
committerLasse Collin <lasse.collin@tukaani.org>2012-07-03 21:16:39 +0300
commit88ccf47205d7f3aa314d358c72ef214f10f68b43 (patch)
tree9dc89c76bfad5f94d68e1fc80c838602d4271323 /src/xz
parentxz: Update the man page about the new field in --robot -lvv. (diff)
downloadxz-88ccf47205d7f3aa314d358c72ef214f10f68b43.tar.xz
xz: Add incomplete support for --block-list.
It's broken with threads and when also --block-size is used.
Diffstat (limited to 'src/xz')
-rw-r--r--src/xz/args.c78
-rw-r--r--src/xz/args.h1
-rw-r--r--src/xz/coder.c48
-rw-r--r--src/xz/coder.h4
-rw-r--r--src/xz/main.c1
-rw-r--r--src/xz/message.c6
-rw-r--r--src/xz/xz.123
7 files changed, 151 insertions, 10 deletions
diff --git a/src/xz/args.c b/src/xz/args.c
index 54b3ff32..9a4f82be 100644
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -55,6 +55,67 @@ parse_memlimit(const char *name, const char *name_percentage, char *str,
static void
+parse_block_list(char *str)
+{
+ // It must be non-empty and not begin with a comma.
+ if (str[0] == '\0' || str[0] == ',')
+ message_fatal(_("%s: Invalid argument to --block-list"), str);
+
+ // Count the number of comma-separated strings.
+ size_t count = 1;
+ for (size_t i = 0; str[i] != '\0'; ++i)
+ if (str[i] == ',')
+ ++count;
+
+ // Prevent an unlikely integer overflow.
+ if (count > SIZE_MAX / sizeof(uint64_t) - 1)
+ message_fatal(_("%s: Too many arguments to --block-list"),
+ str);
+
+ // Allocate memory to hold all the sizes specified.
+ // If --block-list was specified already, its value is forgotten.
+ free(opt_block_list);
+ opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
+
+ for (size_t i = 0; i < count; ++i) {
+ // Locate the next comma and replace it with \0.
+ char *p = strchr(str, ',');
+ if (p != NULL)
+ *p = '\0';
+
+ if (str[0] == '\0') {
+ // There is no string, that is, a comma follows
+ // another comma. Use the previous value.
+ //
+ // NOTE: We checked earler that the first char
+ // of the whole list cannot be a comma.
+ assert(i > 0);
+ opt_block_list[i] = opt_block_list[i - 1];
+ } else {
+ opt_block_list[i] = str_to_uint64("block-list", str,
+ 0, UINT64_MAX);
+
+ // Zero indicates no more new Blocks.
+ if (opt_block_list[i] == 0) {
+ if (i + 1 != count)
+ message_fatal(_("0 can only be used "
+ "as the last element "
+ "in --block-list"));
+
+ opt_block_list[i] = UINT64_MAX;
+ }
+ }
+
+ str = p + 1;
+ }
+
+ // Terminate the array.
+ opt_block_list[count] = 0;
+ return;
+}
+
+
+static void
parse_real(args_info *args, int argc, char **argv)
{
enum {
@@ -73,6 +134,7 @@ parse_real(args_info *args, int argc, char **argv)
OPT_FILES,
OPT_FILES0,
OPT_BLOCK_SIZE,
+ OPT_BLOCK_LIST,
OPT_MEM_COMPRESS,
OPT_MEM_DECOMPRESS,
OPT_NO_ADJUST,
@@ -107,6 +169,7 @@ parse_real(args_info *args, int argc, char **argv)
{ "format", required_argument, NULL, 'F' },
{ "check", required_argument, NULL, 'C' },
{ "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
+ { "block-list", required_argument, NULL, OPT_BLOCK_LIST },
{ "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
{ "memlimit", required_argument, NULL, 'M' },
@@ -378,6 +441,11 @@ parse_real(args_info *args, int argc, char **argv)
0, LZMA_VLI_MAX);
break;
+ case OPT_BLOCK_LIST: {
+ parse_block_list(optarg);
+ break;
+ }
+
case OPT_SINGLE_STREAM:
opt_single_stream = true;
break;
@@ -590,3 +658,13 @@ args_parse(args_info *args, int argc, char **argv)
return;
}
+
+
+#ifndef NDEBUG
+extern void
+args_free(void)
+{
+ free(opt_block_list);
+ return;
+}
+#endif
diff --git a/src/xz/args.h b/src/xz/args.h
index b23f4ef1..53c4a98a 100644
--- a/src/xz/args.h
+++ b/src/xz/args.h
@@ -40,3 +40,4 @@ extern bool opt_robot;
extern const char stdin_filename[];
extern void args_parse(args_info *args, int argc, char **argv);
+extern void args_free(void);
diff --git a/src/xz/coder.c b/src/xz/coder.c
index 2ed88cdb..a98be97f 100644
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -26,6 +26,7 @@ enum format_type opt_format = FORMAT_AUTO;
bool opt_auto_adjust = true;
bool opt_single_stream = false;
uint64_t opt_block_size = 0;
+uint64_t *opt_block_list = NULL;
/// Stream used to communicate with liblzma
@@ -522,15 +523,36 @@ coder_normal(file_pair *pair)
// Assume that something goes wrong.
bool success = false;
- // block_remaining indicates how many input bytes to encode until
+ // block_remaining indicates how many input bytes to encode before
// finishing the current .xz Block. The Block size is set with
- // --block-size=SIZE. It has an effect only when compressing
- // to the .xz format. If block_remaining == UINT64_MAX, only
- // a single block is created.
+ // --block-size=SIZE and --block-list. They have an effect only when
+ // compressing to the .xz format. If block_remaining == UINT64_MAX,
+ // only a single block is created.
uint64_t block_remaining = UINT64_MAX;
- if (hardware_threads_get() == 1 && opt_mode == MODE_COMPRESS
- && opt_format == FORMAT_XZ && opt_block_size > 0)
- block_remaining = opt_block_size;
+
+ // Position in opt_block_list. Unused if --block-list wasn't used.
+ size_t list_pos = 0;
+
+ // Handle --block-size for single-threaded mode and the first step
+ // of --block-list.
+ if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
+ // --block-size doesn't do anything here in threaded mode,
+ // because the threaded encoder will take care of splitting
+ // to fixed-sized Blocks.
+ if (hardware_threads_get() == 1 && opt_block_size > 0)
+ block_remaining = opt_block_size;
+
+ // If --block-list was used, start with the first size.
+ //
+ // FIXME: Currently this overrides --block-size but this isn't
+ // good. For threaded case, we want --block-size to specify
+ // how big Blocks the encoder needs to be prepared to create
+ // at maximum and --block-list will simultaneously cause new
+ // Blocks to be started at specified intervals. To keep things
+ // logical, the same should be done in single-threaded mode.
+ if (opt_block_list != NULL)
+ block_remaining = opt_block_list[list_pos];
+ }
strm.next_out = out_buf.u8;
strm.avail_out = IO_BUFFER_SIZE;
@@ -575,7 +597,17 @@ coder_normal(file_pair *pair)
if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
// Start a new Block.
action = LZMA_RUN;
- block_remaining = opt_block_size;
+
+ if (opt_block_list == NULL) {
+ block_remaining = opt_block_size;
+ } else {
+ // FIXME: Make it work together with
+ // --block-size.
+ if (opt_block_list[list_pos + 1] != 0)
+ ++list_pos;
+
+ block_remaining = opt_block_list[list_pos];
+ }
} else if (ret != LZMA_OK) {
// Determine if the return value indicates that we
diff --git a/src/xz/coder.h b/src/xz/coder.h
index 578d2d7e..583da8f6 100644
--- a/src/xz/coder.h
+++ b/src/xz/coder.h
@@ -48,6 +48,10 @@ extern bool opt_single_stream;
/// of input. This has an effect only when compressing to the .xz format.
extern uint64_t opt_block_size;
+/// This is non-NULL if --block-list was used. This contains the Block sizes
+/// as an array that is terminated with 0.
+extern uint64_t *opt_block_list;
+
/// Set the integrity check type used when compressing
extern void coder_set_check(lzma_check check);
diff --git a/src/xz/main.c b/src/xz/main.c
index 4e5b49e5..a8f0683a 100644
--- a/src/xz/main.c
+++ b/src/xz/main.c
@@ -277,6 +277,7 @@ main(int argc, char **argv)
#ifndef NDEBUG
coder_free();
+ args_free();
#endif
// If we have got a signal, raise it to kill the program instead
diff --git a/src/xz/message.c b/src/xz/message.c
index 2b6ac5f0..abbd1713 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -1153,10 +1153,16 @@ message_help(bool long_help)
" does not affect decompressor memory requirements"));
if (long_help) {
+ // FIXME? Mention something about threading?
puts(_(
" --block-size=SIZE\n"
" when compressing to the .xz format, start a new block\n"
" after every SIZE bytes of input; 0=disabled (default)"));
+ // FIXME
+ puts(_(
+" --block-list=SIZES\n"
+" when compressing to the .xz format, start a new block\n"
+" after the given intervals of uncompressed data"));
puts(_( // xgettext:no-c-format
" --memlimit-compress=LIMIT\n"
" --memlimit-decompress=LIMIT\n"
diff --git a/src/xz/xz.1 b/src/xz/xz.1
index 4da09baf..0368f05b 100644
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@@ -5,7 +5,7 @@
.\" This file has been put into the public domain.
.\" You can do whatever you want with this file.
.\"
-.TH XZ 1 "2012-07-01" "Tukaani" "XZ Utils"
+.TH XZ 1 "2012-07-03" "Tukaani" "XZ Utils"
.
.SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@@ -807,7 +807,26 @@ format, split the input data into blocks of
.I size
bytes.
The blocks are compressed independently from each other.
-.\" FIXME: Explain how to his can be used for random access and threading.
+.\" FIXME: Explain how to these can be used for random access and threading.
+.TP
+.BI \-\-block\-list= sizes
+When compressing to the
+.B .xz
+format, start a new block after
+the given intervals of uncompressed data.
+.IP ""
+The uncompressed
+.I sizes
+of the blocks are specified as a comma-separated list.
+Omitting a size (two or more consecutive commas) is a shorthand
+to use the size of the previous block.
+A special value of
+.B 0
+may be used as the last value to indicate that
+the rest of the file should be encoded as a single block.
+.IP ""
+.B "Currently this option is badly broken if used together with"
+.B "\-\-block\-size or with multithreading."
.TP
.BI \-\-memlimit\-compress= limit
Set a memory usage limit for compression.