aboutsummaryrefslogtreecommitdiff
path: root/src/xz
diff options
context:
space:
mode:
authorLasse Collin <lasse.collin@tukaani.org>2008-11-19 23:52:24 +0200
committerLasse Collin <lasse.collin@tukaani.org>2008-11-19 23:52:24 +0200
commit1880a3927b23f265f63b2adb86fbdb81ea09eb06 (patch)
tree2fe1b65d21f81b28f46eb707378d97f553e99ee1 /src/xz
parentOh well, big messy commit again. Some highlights: (diff)
downloadxz-1880a3927b23f265f63b2adb86fbdb81ea09eb06.tar.xz
Renamed lzma to xz and lzmadec to xzdec. We create symlinks
lzma, unlzma, and lzcat in "make install" for backwards compatibility with LZMA Utils 4.32.x; I'm not sure if this should be the default though.
Diffstat (limited to 'src/xz')
-rw-r--r--src/xz/Makefile.am74
-rw-r--r--src/xz/args.c500
-rw-r--r--src/xz/args.h56
-rw-r--r--src/xz/hardware.c122
-rw-r--r--src/xz/hardware.h45
-rw-r--r--src/xz/io.c658
-rw-r--r--src/xz/io.h97
-rw-r--r--src/xz/list.c477
-rw-r--r--src/xz/main.c402
-rw-r--r--src/xz/main.h60
-rw-r--r--src/xz/message.c892
-rw-r--r--src/xz/message.h132
-rw-r--r--src/xz/options.c352
-rw-r--r--src/xz/options.h46
-rw-r--r--src/xz/private.h52
-rw-r--r--src/xz/process.c391
-rw-r--r--src/xz/process.h70
-rw-r--r--src/xz/suffix.c213
-rw-r--r--src/xz/suffix.h40
-rw-r--r--src/xz/util.c199
-rw-r--r--src/xz/util.h71
21 files changed, 4949 insertions, 0 deletions
diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am
new file mode 100644
index 00000000..16e55461
--- /dev/null
+++ b/src/xz/Makefile.am
@@ -0,0 +1,74 @@
+##
+## Copyright (C) 2007 Lasse Collin
+##
+## This program is free software; you can redistribute it and/or
+## modify it under the terms of the GNU Lesser General Public
+## License as published by the Free Software Foundation; either
+## version 2.1 of the License, or (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## Lesser General Public License for more details.
+##
+
+bin_PROGRAMS = xz
+
+xz_SOURCES = \
+ args.c \
+ args.h \
+ hardware.c \
+ hardware.h \
+ io.c \
+ io.h \
+ main.c \
+ main.h \
+ message.c \
+ message.h \
+ options.c \
+ options.h \
+ private.h \
+ process.c \
+ process.h \
+ suffix.c \
+ suffix.h \
+ util.c \
+ util.h
+
+xz_CPPFLAGS = \
+ -DLOCALEDIR=\"$(localedir)\" \
+ -I@top_srcdir@/src/common \
+ -I@top_srcdir@/src/liblzma/api \
+ -I@top_builddir@/lib \
+ -I@top_srcdir@/lib
+
+xz_CFLAGS = @PTHREAD_CFLAGS@
+
+## Always link the command line tool statically against liblzma. It is
+## faster on x86, because no need for PIC. We also have one dependency less,
+## which allows users to more freely copy the xz binary to other boxes.
+xz_LDFLAGS = -static
+xz_LDADD = \
+ @top_builddir@/src/liblzma/liblzma.la \
+ @LTLIBINTL@ \
+ @PTHREAD_LIBS@
+
+if COND_GNULIB
+xz_LDADD += @top_builddir@/lib/libgnu.a
+endif
+
+
+## Create symlinks for unxz and xzcat for convenicen. Create symlinks also
+## for lzma, unlzma, and lzcat for compatibility with LZMA Utils 4.32.x.
+install-exec-hook:
+ cd $(DESTDIR)$(bindir) && \
+ rm -f unxz xzcat lzma unlzma lzcat && \
+ $(LN_S) xz unxz && \
+ $(LN_S) xz xzcat && \
+ $(LN_S) xz lzma && \
+ $(LN_S) xz unlzma && \
+ $(LN_S) xz lzcat
+
+uninstall-hook:
+ cd $(DESTDIR)$(bindir) && \
+ rm -f unxz xzcat lzma unlzma lzcat
diff --git a/src/xz/args.c b/src/xz/args.c
new file mode 100644
index 00000000..a2efb277
--- /dev/null
+++ b/src/xz/args.c
@@ -0,0 +1,500 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file args.c
+/// \brief Argument parsing
+///
+/// \note Filter-specific options parsing is in options.c.
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#include "getopt.h"
+#include <ctype.h>
+
+
+bool opt_stdout = false;
+bool opt_force = false;
+bool opt_keep_original = false;
+
+// We don't modify or free() this, but we need to assign it in some
+// non-const pointers.
+const char *stdin_filename = "(stdin)";
+
+
+static void
+parse_real(args_info *args, int argc, char **argv)
+{
+ enum {
+ OPT_SUBBLOCK = INT_MIN,
+ OPT_X86,
+ OPT_POWERPC,
+ OPT_IA64,
+ OPT_ARM,
+ OPT_ARMTHUMB,
+ OPT_SPARC,
+ OPT_DELTA,
+ OPT_LZMA1,
+ OPT_LZMA2,
+
+ OPT_FILES,
+ OPT_FILES0,
+ };
+
+ static const char short_opts[] = "cC:dfF:hHlLkM:p:qrS:tT:vVz123456789";
+
+ static const struct option long_opts[] = {
+ // Operation mode
+ { "compress", no_argument, NULL, 'z' },
+ { "decompress", no_argument, NULL, 'd' },
+ { "uncompress", no_argument, NULL, 'd' },
+ { "test", no_argument, NULL, 't' },
+ { "list", no_argument, NULL, 'l' },
+ { "info", no_argument, NULL, 'l' },
+
+ // Operation modifiers
+ { "keep", no_argument, NULL, 'k' },
+ { "force", no_argument, NULL, 'f' },
+ { "stdout", no_argument, NULL, 'c' },
+ { "to-stdout", no_argument, NULL, 'c' },
+ { "suffix", required_argument, NULL, 'S' },
+ // { "recursive", no_argument, NULL, 'r' }, // TODO
+ { "files", optional_argument, NULL, OPT_FILES },
+ { "files0", optional_argument, NULL, OPT_FILES0 },
+
+ // Basic compression settings
+ { "format", required_argument, NULL, 'F' },
+ { "check", required_argument, NULL, 'C' },
+ { "preset", required_argument, NULL, 'p' },
+ { "memory", required_argument, NULL, 'M' },
+ { "threads", required_argument, NULL, 'T' },
+
+ { "fast", no_argument, NULL, '1' },
+ { "best", no_argument, NULL, '9' },
+
+ // Filters
+ { "lzma1", optional_argument, NULL, OPT_LZMA1 },
+ { "lzma2", optional_argument, NULL, OPT_LZMA2 },
+ { "x86", no_argument, NULL, OPT_X86 },
+ { "bcj", no_argument, NULL, OPT_X86 },
+ { "powerpc", no_argument, NULL, OPT_POWERPC },
+ { "ppc", no_argument, NULL, OPT_POWERPC },
+ { "ia64", no_argument, NULL, OPT_IA64 },
+ { "itanium", no_argument, NULL, OPT_IA64 },
+ { "arm", no_argument, NULL, OPT_ARM },
+ { "armthumb", no_argument, NULL, OPT_ARMTHUMB },
+ { "sparc", no_argument, NULL, OPT_SPARC },
+ { "delta", optional_argument, NULL, OPT_DELTA },
+ { "subblock", optional_argument, NULL, OPT_SUBBLOCK },
+
+ // Other options
+ { "quiet", no_argument, NULL, 'q' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, 'h' },
+ { "long-help", no_argument, NULL, 'H' },
+ { "version", no_argument, NULL, 'V' },
+
+ { NULL, 0, NULL, 0 }
+ };
+
+ int c;
+
+ while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
+ != -1) {
+ switch (c) {
+ // gzip-like options
+
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ coder_set_preset(c - '0');
+ break;
+
+ case 'p': {
+ const uint64_t preset = str_to_uint64(
+ "preset", optarg, 1, 9);
+ coder_set_preset(preset);
+ break;
+ }
+
+ // --memory
+ case 'M':
+ // On 32-bit systems, SIZE_MAX would make more sense
+ // than UINT64_MAX. But use UINT64_MAX still so that
+ // scripts that assume > 4 GiB values don't break.
+ hardware_memlimit_set(str_to_uint64(
+ "memory", optarg, 0, UINT64_MAX));
+ break;
+
+ // --suffix
+ case 'S':
+ suffix_set(optarg);
+ break;
+
+ case 'T':
+ opt_threads = str_to_uint64("threads", optarg,
+ 1, SIZE_MAX);
+ break;
+
+ // --version
+ case 'V':
+ // This doesn't return.
+ message_version();
+
+ // --stdout
+ case 'c':
+ opt_stdout = true;
+ break;
+
+ // --decompress
+ case 'd':
+ opt_mode = MODE_DECOMPRESS;
+ break;
+
+ // --force
+ case 'f':
+ opt_force = true;
+ break;
+
+ // --help
+ case 'h':
+ // This doesn't return.
+ message_help(false);
+
+ // --long-help
+ case 'H':
+ // This doesn't return.
+ message_help(true);
+
+ // --list
+ case 'l':
+ opt_mode = MODE_LIST;
+ break;
+
+ // --keep
+ case 'k':
+ opt_keep_original = true;
+ break;
+
+ // --quiet
+ case 'q':
+ message_verbosity_decrease();
+ break;
+
+ case 't':
+ opt_mode = MODE_TEST;
+ break;
+
+ // --verbose
+ case 'v':
+ message_verbosity_increase();
+ break;
+
+ case 'z':
+ opt_mode = MODE_COMPRESS;
+ break;
+
+ // Filter setup
+
+ case OPT_SUBBLOCK:
+ coder_add_filter(LZMA_FILTER_SUBBLOCK,
+ options_subblock(optarg));
+ break;
+
+ case OPT_X86:
+ coder_add_filter(LZMA_FILTER_X86, NULL);
+ break;
+
+ case OPT_POWERPC:
+ coder_add_filter(LZMA_FILTER_POWERPC, NULL);
+ break;
+
+ case OPT_IA64:
+ coder_add_filter(LZMA_FILTER_IA64, NULL);
+ break;
+
+ case OPT_ARM:
+ coder_add_filter(LZMA_FILTER_ARM, NULL);
+ break;
+
+ case OPT_ARMTHUMB:
+ coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL);
+ break;
+
+ case OPT_SPARC:
+ coder_add_filter(LZMA_FILTER_SPARC, NULL);
+ break;
+
+ case OPT_DELTA:
+ coder_add_filter(LZMA_FILTER_DELTA,
+ options_delta(optarg));
+ break;
+
+ case OPT_LZMA1:
+ coder_add_filter(LZMA_FILTER_LZMA1,
+ options_lzma(optarg));
+ break;
+
+ case OPT_LZMA2:
+ coder_add_filter(LZMA_FILTER_LZMA2,
+ options_lzma(optarg));
+ break;
+
+ // Other
+
+ // --format
+ case 'F': {
+ // Just in case, support both "lzma" and "alone" since
+ // the latter was used for forward compatibility in
+ // LZMA Utils 4.32.x.
+ static const struct {
+ char str[8];
+ enum format_type format;
+ } types[] = {
+ { "auto", FORMAT_AUTO },
+ { "xz", FORMAT_XZ },
+ { "lzma", FORMAT_LZMA },
+ { "alone", FORMAT_LZMA },
+ // { "gzip", FORMAT_GZIP },
+ // { "gz", FORMAT_GZIP },
+ { "raw", FORMAT_RAW },
+ };
+
+ size_t i = 0;
+ while (strcmp(types[i].str, optarg) != 0)
+ if (++i == ARRAY_SIZE(types))
+ message_fatal(_("%s: Unknown file "
+ "format type"),
+ optarg);
+
+ opt_format = types[i].format;
+ break;
+ }
+
+ // --check
+ case 'C': {
+ static const struct {
+ char str[8];
+ lzma_check check;
+ } types[] = {
+ { "none", LZMA_CHECK_NONE },
+ { "crc32", LZMA_CHECK_CRC32 },
+ { "crc64", LZMA_CHECK_CRC64 },
+ { "sha256", LZMA_CHECK_SHA256 },
+ };
+
+ size_t i = 0;
+ while (strcmp(types[i].str, optarg) != 0) {
+ if (++i == ARRAY_SIZE(types))
+ message_fatal(_("%s: Unknown integrity"
+ "check type"), optarg);
+ }
+
+ coder_set_check(types[i].check);
+ break;
+ }
+
+ case OPT_FILES:
+ args->files_delim = '\n';
+
+ // Fall through
+
+ case OPT_FILES0:
+ if (args->files_name != NULL)
+ message_fatal(_("Only one file can be "
+ "specified with `--files'"
+ "or `--files0'."));
+
+ if (optarg == NULL) {
+ args->files_name = (char *)stdin_filename;
+ args->files_file = stdin;
+ } else {
+ args->files_name = optarg;
+ args->files_file = fopen(optarg,
+ c == OPT_FILES ? "r" : "rb");
+ if (args->files_file == NULL)
+ message_fatal("%s: %s", optarg,
+ strerror(errno));
+ }
+
+ break;
+
+ default:
+ message_try_help();
+ my_exit(E_ERROR);
+ }
+ }
+
+ return;
+}
+
+
+static void
+parse_environment(args_info *args, char *argv0)
+{
+ char *env = getenv("XZ_OPT");
+ if (env == NULL)
+ return;
+
+ // We modify the string, so make a copy of it.
+ env = xstrdup(env);
+
+ // Calculate the number of arguments in env. argc stats at one
+ // to include space for the program name.
+ int argc = 1;
+ bool prev_was_space = true;
+ for (size_t i = 0; env[i] != '\0'; ++i) {
+ if (isspace(env[i])) {
+ prev_was_space = true;
+ } else if (prev_was_space) {
+ prev_was_space = false;
+
+ // Keep argc small enough to fit into a singed int
+ // and to keep it usable for memory allocation.
+ if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
+ message_fatal(_("The environment variable "
+ "XZ_OPT contains too many "
+ "arguments"));
+ }
+ }
+
+ // Allocate memory to hold pointers to the arguments. Add one to get
+ // space for the terminating NULL (if some systems happen to need it).
+ char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
+ argv[0] = argv0;
+ argv[argc] = NULL;
+
+ // Go through the string again. Split the arguments using '\0'
+ // characters and add pointers to the resulting strings to argv.
+ argc = 1;
+ prev_was_space = true;
+ for (size_t i = 0; env[i] != '\0'; ++i) {
+ if (isspace(env[i])) {
+ prev_was_space = true;
+ env[i] = '\0';
+ } else if (prev_was_space) {
+ prev_was_space = false;
+ argv[argc++] = env + i;
+ }
+ }
+
+ // Parse the argument list we got from the environment. All non-option
+ // arguments i.e. filenames are ignored.
+ parse_real(args, argc, argv);
+
+ // Reset the state of the getopt_long() so that we can parse the
+ // command line options too. There are two incompatible ways to
+ // do it.
+#ifdef HAVE_OPTRESET
+ // BSD
+ optind = 1;
+ optreset = 1;
+#else
+ // GNU, Solaris
+ optind = 0;
+#endif
+
+ // We don't need the argument list from environment anymore.
+ free(argv);
+ free(env);
+
+ return;
+}
+
+
+extern void
+args_parse(args_info *args, int argc, char **argv)
+{
+ // Initialize those parts of *args that we need later.
+ args->files_name = NULL;
+ args->files_file = NULL;
+ args->files_delim = '\0';
+
+ // Type of the file format to use when --format=auto or no --format
+ // was specified.
+ enum format_type format_compress_auto = FORMAT_XZ;
+
+ // Check how we were called.
+ {
+ // Remove the leading path name, if any.
+ const char *name = strrchr(argv[0], '/');
+ if (name == NULL)
+ name = argv[0];
+ else
+ ++name;
+
+ // NOTE: It's possible that name[0] is now '\0' if argv[0]
+ // is weird, but it doesn't matter here.
+
+ // The default file format is .lzma if the command name
+ // contains "lz".
+ if (strstr(name, "lz") != NULL)
+ format_compress_auto = FORMAT_LZMA;
+
+ // Operation mode
+ if (strstr(name, "cat") != NULL) {
+ // Imply --decompress --stdout
+ opt_mode = MODE_DECOMPRESS;
+ opt_stdout = true;
+ } else if (strstr(name, "un") != NULL) {
+ // Imply --decompress
+ opt_mode = MODE_DECOMPRESS;
+ }
+ }
+
+ // First the flags from environment
+ parse_environment(args, argv[0]);
+
+ // Then from the command line
+ optind = 1;
+ parse_real(args, argc, argv);
+
+ // Never remove the source file when the destination is not on disk.
+ // In test mode the data is written nowhere, but setting opt_stdout
+ // will make the rest of the code behave well.
+ if (opt_stdout || opt_mode == MODE_TEST) {
+ opt_keep_original = true;
+ opt_stdout = true;
+ }
+
+ // If no --format flag was used, or it was --format=auto, we need to
+ // decide what is the target file format we are going to use. This
+ // depends on how we were called (checked earlier in this function).
+ if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
+ opt_format = format_compress_auto;
+
+ // Compression settings need to be validated (options themselves and
+ // their memory usage) when compressing to any file format. It has to
+ // be done also when uncompressing raw data, since for raw decoding
+ // the options given on the command line are used to know what kind
+ // of raw data we are supposed to decode.
+ if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
+ coder_set_compression_settings();
+
+ // If no filenames are given, use stdin.
+ if (argv[optind] == NULL && args->files_name == NULL) {
+ // We don't modify or free() the "-" constant. The caller
+ // modifies this so don't make the struct itself const.
+ static char *names_stdin[2] = { (char *)"-", NULL };
+ args->arg_names = names_stdin;
+ args->arg_count = 1;
+ } else {
+ // We got at least one filename from the command line, or
+ // --files or --files0 was specified.
+ args->arg_names = argv + optind;
+ args->arg_count = argc - optind;
+ }
+
+ return;
+}
diff --git a/src/xz/args.h b/src/xz/args.h
new file mode 100644
index 00000000..6d4e8282
--- /dev/null
+++ b/src/xz/args.h
@@ -0,0 +1,56 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file args.h
+/// \brief Argument parsing
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef ARGS_H
+#define ARGS_H
+
+#include "private.h"
+
+
+typedef struct {
+ /// Filenames from command line
+ char **arg_names;
+
+ /// Number of filenames from command line
+ size_t arg_count;
+
+ /// Name of the file from which to read filenames. This is NULL
+ /// if --files or --files0 was not used.
+ char *files_name;
+
+ /// File opened for reading from which filenames are read. This is
+ /// non-NULL only if files_name is non-NULL.
+ FILE *files_file;
+
+ /// Delimiter for filenames read from files_file
+ char files_delim;
+
+} args_info;
+
+
+extern bool opt_stdout;
+extern bool opt_force;
+extern bool opt_keep_original;
+// extern bool opt_recursive;
+
+extern const char *stdin_filename;
+
+extern void args_parse(args_info *args, int argc, char **argv);
+
+#endif
diff --git a/src/xz/hardware.c b/src/xz/hardware.c
new file mode 100644
index 00000000..63bf0937
--- /dev/null
+++ b/src/xz/hardware.c
@@ -0,0 +1,122 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file hardware.c
+/// \brief Detection of available hardware resources
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+#include "physmem.h"
+
+
+/// Maximum number of free *coder* threads. This can be set with
+/// the --threads=NUM command line option.
+size_t opt_threads = 1;
+
+
+/// Memory usage limit for encoding
+static uint64_t memlimit_encoder;
+
+/// Memory usage limit for decoding
+static uint64_t memlimit_decoder;
+
+/// Memory usage limit given on the command line or environment variable.
+/// Zero indicates the default (memlimit_encoder or memlimit_decoder).
+static uint64_t memlimit_custom = 0;
+
+
+/// Get the number of CPU cores, and set opt_threads to default to that value.
+/// User can then override this with --threads command line option.
+static void
+hardware_cores(void)
+{
+#if defined(HAVE_NUM_PROCESSORS_SYSCONF)
+ const long cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpus > 0)
+ opt_threads = (size_t)(cpus);
+
+#elif defined(HAVE_NUM_PROCESSORS_SYSCTL)
+ int name[2] = { CTL_HW, HW_NCPU };
+ int cpus;
+ size_t cpus_size = sizeof(cpus);
+ if (!sysctl(name, &cpus, &cpus_size, NULL, NULL)
+ && cpus_size == sizeof(cpus) && cpus > 0)
+ opt_threads = (size_t)(cpus);
+#endif
+
+ // Limit opt_threads so that maximum number of threads doesn't exceed.
+
+#if defined(_SC_THREAD_THREADS_MAX)
+ const long threads_max = sysconf(_SC_THREAD_THREADS_MAX);
+ if (threads_max > 0 && (size_t)(threads_max) < opt_threads)
+ opt_threads = (size_t)(threads_max);
+
+#elif defined(PTHREAD_THREADS_MAX)
+ if (opt_threads > PTHREAD_THREADS_MAX)
+ opt_threads = PTHREAD_THREADS_MAX;
+#endif
+
+ return;
+}
+
+
+static void
+hardware_memlimit_init(void)
+{
+ uint64_t mem = physmem();
+
+ // If we cannot determine the amount of RAM, assume 32 MiB. Maybe
+ // even that is too much on some systems. But on most systems it's
+ // far too little, and can be annoying.
+ if (mem == 0)
+ mem = UINT64_C(16) * 1024 * 1024;
+
+ // Use at maximum of 90 % of RAM when encoding and 33 % when decoding.
+ memlimit_encoder = mem - mem / 10;
+ memlimit_decoder = mem / 3;
+
+ return;
+}
+
+
+extern void
+hardware_memlimit_set(uint64_t memlimit)
+{
+ memlimit_custom = memlimit;
+ return;
+}
+
+
+extern uint64_t
+hardware_memlimit_encoder(void)
+{
+ return memlimit_custom != 0 ? memlimit_custom : memlimit_encoder;
+}
+
+
+extern uint64_t
+hardware_memlimit_decoder(void)
+{
+ return memlimit_custom != 0 ? memlimit_custom : memlimit_decoder;
+}
+
+
+extern void
+hardware_init(void)
+{
+ hardware_memlimit_init();
+ hardware_cores();
+ return;
+}
diff --git a/src/xz/hardware.h b/src/xz/hardware.h
new file mode 100644
index 00000000..f604df20
--- /dev/null
+++ b/src/xz/hardware.h
@@ -0,0 +1,45 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file hardware.c
+/// \brief Detection of available hardware resources
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef HARDWARE_H
+#define HARDWARE_H
+
+#include "private.h"
+
+
+extern size_t opt_threads;
+
+
+/// Initialize some hardware-specific variables, which are needed by other
+/// hardware_* functions.
+extern void hardware_init(void);
+
+
+/// Set custom memory usage limit. This is used for both encoding and
+/// decoding. Zero indicates resetting the limit back to defaults.
+extern void hardware_memlimit_set(uint64_t memlimit);
+
+/// Get the memory usage limit for encoding. By default this is 90 % of RAM.
+extern uint64_t hardware_memlimit_encoder(void);
+
+
+/// Get the memory usage limit for decoding. By default this is 30 % of RAM.
+extern uint64_t hardware_memlimit_decoder(void);
+
+#endif
diff --git a/src/xz/io.c b/src/xz/io.c
new file mode 100644
index 00000000..0ec63f03
--- /dev/null
+++ b/src/xz/io.c
@@ -0,0 +1,658 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file io.c
+/// \brief File opening, unlinking, and closing
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#include <fcntl.h>
+
+#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
+# include <sys/time.h>
+#elif defined(HAVE_UTIME)
+# include <utime.h>
+#endif
+
+
+/// \brief Unlinks a file
+///
+/// This tries to verify that the file being unlinked really is the file that
+/// we want to unlink by verifying device and inode numbers. There's still
+/// a small unavoidable race, but this is much better than nothing (the file
+/// could have been moved/replaced even hours earlier).
+static void
+io_unlink(const char *name, const struct stat *known_st)
+{
+ struct stat new_st;
+
+ if (lstat(name, &new_st)
+ || new_st.st_dev != known_st->st_dev
+ || new_st.st_ino != known_st->st_ino) {
+ message_error(_("%s: File seems to be moved, not removing"),
+ name);
+ } else {
+ // There's a race condition between lstat() and unlink()
+ // but at least we have tried to avoid removing wrong file.
+ if (unlink(name))
+ message_error(_("%s: Cannot remove: %s"),
+ name, strerror(errno));
+ }
+
+ return;
+}
+
+
+/// \brief Copies owner/group and permissions
+///
+/// \todo ACL and EA support
+///
+static void
+io_copy_attrs(const file_pair *pair)
+{
+ // This function is more tricky than you may think at first.
+ // Blindly copying permissions may permit users to access the
+ // destination file who didn't have permission to access the
+ // source file.
+
+ // Simple cache to avoid repeated calls to geteuid().
+ static enum {
+ WARN_FCHOWN_UNKNOWN,
+ WARN_FCHOWN_NO,
+ WARN_FCHOWN_YES,
+ } warn_fchown = WARN_FCHOWN_UNKNOWN;
+
+ // Try changing the owner of the file. If we aren't root or the owner
+ // isn't already us, fchown() probably doesn't succeed. We warn
+ // about failing fchown() only if we are root.
+ if (fchown(pair->dest_fd, pair->src_st.st_uid, -1)
+ && warn_fchown != WARN_FCHOWN_NO) {
+ if (warn_fchown == WARN_FCHOWN_UNKNOWN)
+ warn_fchown = geteuid() == 0
+ ? WARN_FCHOWN_YES : WARN_FCHOWN_NO;
+
+ if (warn_fchown == WARN_FCHOWN_YES)
+ message_warning(_("%s: Cannot set the file owner: %s"),
+ pair->dest_name, strerror(errno));
+ }
+
+ mode_t mode;
+
+ if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
+ message_warning(_("%s: Cannot set the file group: %s"),
+ pair->dest_name, strerror(errno));
+ // We can still safely copy some additional permissions:
+ // `group' must be at least as strict as `other' and
+ // also vice versa.
+ //
+ // NOTE: After this, the owner of the source file may
+ // get additional permissions. This shouldn't be too bad,
+ // because the owner would have had permission to chmod
+ // the original file anyway.
+ mode = ((pair->src_st.st_mode & 0070) >> 3)
+ & (pair->src_st.st_mode & 0007);
+ mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
+ } else {
+ // Drop the setuid, setgid, and sticky bits.
+ mode = pair->src_st.st_mode & 0777;
+ }
+
+ if (fchmod(pair->dest_fd, mode))
+ message_warning(_("%s: Cannot set the file permissions: %s"),
+ pair->dest_name, strerror(errno));
+
+ // Copy the timestamps. We have several possible ways to do this, of
+ // which some are better in both security and precision.
+ //
+ // First, get the nanosecond part of the timestamps. As of writing,
+ // it's not standardized by POSIX, and there are several names for
+ // the same thing in struct stat.
+ long atime_nsec;
+ long mtime_nsec;
+
+# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
+ // GNU and Solaris
+ atime_nsec = pair->src_st.st_atim.tv_nsec;
+ mtime_nsec = pair->src_st.st_mtim.tv_nsec;
+
+# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
+ // BSD
+ atime_nsec = pair->src_st.st_atimespec.tv_nsec;
+ mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
+
+# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
+ // GNU and BSD without extensions
+ atime_nsec = pair->src_st.st_atimensec;
+ mtime_nsec = pair->src_st.st_mtimensec;
+
+# elif defined(HAVE_STRUCT_STAT_ST_UATIME)
+ // Tru64
+ atime_nsec = pair->src_st.st_uatime * 1000;
+ mtime_nsec = pair->src_st.st_umtime * 1000;
+
+# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
+ // UnixWare
+ atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
+ mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
+
+# else
+ // Safe fallback
+ atime_nsec = 0;
+ mtime_nsec = 0;
+# endif
+
+ // Construct a structure to hold the timestamps and call appropriate
+ // function to set the timestamps.
+#if defined(HAVE_FUTIMENS)
+ // Use nanosecond precision.
+ struct timespec tv[2];
+ tv[0].tv_sec = pair->src_st.st_atime;
+ tv[0].tv_nsec = atime_nsec;
+ tv[1].tv_sec = pair->src_st.st_mtime;
+ tv[1].tv_nsec = mtime_nsec;
+
+ (void)futimens(pair->dest_fd, tv);
+
+#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
+ // Use microsecond precision.
+ struct timeval tv[2];
+ tv[0].tv_sec = pair->src_st.st_atime;
+ tv[0].tv_usec = atime_nsec / 1000;
+ tv[1].tv_sec = pair->src_st.st_mtime;
+ tv[1].tv_usec = mtime_nsec / 1000;
+
+# if defined(HAVE_FUTIMES)
+ (void)futimes(pair->dest_fd, tv);
+# elif defined(HAVE_FUTIMESAT)
+ (void)futimesat(pair->dest_fd, NULL, tv);
+# else
+ // Argh, no function to use a file descriptor to set the timestamp.
+ (void)utimes(pair->src_name, tv);
+# endif
+
+#elif defined(HAVE_UTIME)
+ // Use one-second precision. utime() doesn't support using file
+ // descriptor either.
+ const struct utimbuf buf = {
+ .actime = pair->src_st.st_atime;
+ .modtime = pair->src_st.st_mtime;
+ };
+
+ // Avoid warnings.
+ (void)atime_nsec;
+ (void)mtime_nsec;
+
+ (void)utime(pair->src_name, &buf);
+#endif
+
+ return;
+}
+
+
+/// Opens the source file. Returns false on success, true on error.
+static bool
+io_open_src(file_pair *pair)
+{
+ // There's nothing to open when reading from stdin.
+ if (pair->src_name == stdin_filename) {
+ pair->src_fd = STDIN_FILENO;
+ return false;
+ }
+
+ // We accept only regular files if we are writing the output
+ // to disk too, and if --force was not given.
+ const bool reg_files_only = !opt_stdout && !opt_force;
+
+ // Flags for open()
+ int flags = O_RDONLY | O_NOCTTY;
+
+ // If we accept only regular files, we need to be careful to avoid
+ // problems with special files like devices and FIFOs. O_NONBLOCK
+ // prevents blocking when opening such files. When we want to accept
+ // special files, we must not use O_NONBLOCK, or otherwise we won't
+ // block waiting e.g. FIFOs to become readable.
+ if (reg_files_only)
+ flags |= O_NONBLOCK;
+
+#ifdef O_NOFOLLOW
+ if (reg_files_only)
+ flags |= O_NOFOLLOW;
+#else
+ // Some POSIX-like systems lack O_NOFOLLOW (it's not required
+ // by POSIX). Check for symlinks with a separate lstat() on
+ // these systems.
+ if (reg_files_only) {
+ struct stat st;
+ if (lstat(pair->src_name, &st)) {
+ message_error("%s: %s", pair->src_name,
+ strerror(errno));
+ return true;
+
+ } else if (S_ISLNK(st.st_mode)) {
+ message_warning(_("%s: Is a symbolic link, "
+ "skipping"), pair->src_name);
+ return true;
+ }
+ }
+#endif
+
+ // Try to open the file. If we are accepting non-regular files,
+ // unblock the caught signals so that open() can be interrupted
+ // if it blocks e.g. due to a FIFO file.
+ if (!reg_files_only)
+ signals_unblock();
+
+ // Maybe this wouldn't need a loop, since all the signal handlers for
+ // which we don't use SA_RESTART set user_abort to true. But it
+ // doesn't hurt to have it just in case.
+ do {
+ pair->src_fd = open(pair->src_name, flags);
+ } while (pair->src_fd == -1 && errno == EINTR && !user_abort);
+
+ if (!reg_files_only)
+ signals_block();
+
+ if (pair->src_fd == -1) {
+ // If we were interrupted, don't display any error message.
+ if (errno == EINTR) {
+ // All the signals that don't have SA_RESTART
+ // set user_abort.
+ assert(user_abort);
+ return true;
+ }
+
+#ifdef O_NOFOLLOW
+ // Give an understandable error message in if reason
+ // for failing was that the file was a symbolic link.
+ //
+ // Note that at least Linux, OpenBSD, Solaris, and Darwin
+ // use ELOOP to indicate if O_NOFOLLOW was the reason
+ // that open() failed. Because there may be
+ // directories in the pathname, ELOOP may occur also
+ // because of a symlink loop in the directory part.
+ // So ELOOP doesn't tell us what actually went wrong.
+ //
+ // FreeBSD associates EMLINK with O_NOFOLLOW and
+ // Tru64 uses ENOTSUP. We use these directly here
+ // and skip the lstat() call and the associated race.
+ // I want to hear if there are other kernels that
+ // fail with something else than ELOOP with O_NOFOLLOW.
+ bool was_symlink = false;
+
+# if defined(__FreeBSD__) || defined(__DragonFly__)
+ if (errno == EMLINK)
+ was_symlink = true;
+
+# elif defined(__digital__) && defined(__unix__)
+ if (errno == ENOTSUP)
+ was_symlink = true;
+
+# else
+ if (errno == ELOOP && reg_files_only) {
+ const int saved_errno = errno;
+ struct stat st;
+ if (lstat(pair->src_name, &st) == 0
+ && S_ISLNK(st.st_mode))
+ was_symlink = true;
+
+ errno = saved_errno;
+ }
+# endif
+
+ if (was_symlink)
+ message_warning(_("%s: Is a symbolic link, "
+ "skipping"), pair->src_name);
+ else
+#endif
+ // Something else than O_NOFOLLOW failing
+ // (assuming that the race conditions didn't
+ // confuse us).
+ message_error("%s: %s", pair->src_name,
+ strerror(errno));
+
+ return true;
+ }
+
+ // Drop O_NONBLOCK, which is used only when we are accepting only
+ // regular files. After the open() call, we want things to block
+ // instead of giving EAGAIN.
+ if (reg_files_only) {
+ flags = fcntl(pair->src_fd, F_GETFL);
+ if (flags == -1)
+ goto error_msg;
+
+ flags &= ~O_NONBLOCK;
+
+ if (fcntl(pair->src_fd, F_SETFL, flags))
+ goto error_msg;
+ }
+
+ // Stat the source file. We need the result also when we copy
+ // the permissions, and when unlinking.
+ if (fstat(pair->src_fd, &pair->src_st))
+ goto error_msg;
+
+ if (S_ISDIR(pair->src_st.st_mode)) {
+ message_warning(_("%s: Is a directory, skipping"),
+ pair->src_name);
+ goto error;
+ }
+
+ if (reg_files_only) {
+ if (!S_ISREG(pair->src_st.st_mode)) {
+ message_warning(_("%s: Not a regular file, "
+ "skipping"), pair->src_name);
+ goto error;
+ }
+
+ if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
+ // gzip rejects setuid and setgid files even
+ // when --force was used. bzip2 doesn't check
+ // for them, but calls fchown() after fchmod(),
+ // and many systems automatically drop setuid
+ // and setgid bits there.
+ //
+ // We accept setuid and setgid files if
+ // --force was used. We drop these bits
+ // explicitly in io_copy_attr().
+ message_warning(_("%s: File has setuid or "
+ "setgid bit set, skipping"),
+ pair->src_name);
+ goto error;
+ }
+
+ if (pair->src_st.st_mode & S_ISVTX) {
+ message_warning(_("%s: File has sticky bit "
+ "set, skipping"),
+ pair->src_name);
+ goto error;
+ }
+
+ if (pair->src_st.st_nlink > 1) {
+ message_warning(_("%s: Input file has more "
+ "than one hard link, "
+ "skipping"), pair->src_name);
+ goto error;
+ }
+ }
+
+ return false;
+
+error_msg:
+ message_error("%s: %s", pair->src_name, strerror(errno));
+error:
+ (void)close(pair->src_fd);
+ return true;
+}
+
+
+/// \brief Closes source file of the file_pair structure
+///
+/// \param pair File whose src_fd should be closed
+/// \param success If true, the file will be removed from the disk if
+/// closing succeeds and --keep hasn't been used.
+static void
+io_close_src(file_pair *pair, bool success)
+{
+ if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
+ // If we are going to unlink(), do it before closing the file.
+ // This way there's no risk that someone replaces the file and
+ // happens to get same inode number, which would make us
+ // unlink() wrong file.
+ if (success && !opt_keep_original)
+ io_unlink(pair->src_name, &pair->src_st);
+
+ (void)close(pair->src_fd);
+ }
+
+ return;
+}
+
+
+static bool
+io_open_dest(file_pair *pair)
+{
+ if (opt_stdout || pair->src_fd == STDIN_FILENO) {
+ // We don't modify or free() this.
+ pair->dest_name = (char *)"(stdout)";
+ pair->dest_fd = STDOUT_FILENO;
+ return false;
+ }
+
+ pair->dest_name = suffix_get_dest_name(pair->src_name);
+ if (pair->dest_name == NULL)
+ return true;
+
+ // If --force was used, unlink the target file first.
+ if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
+ message_error("%s: Cannot unlink: %s",
+ pair->dest_name, strerror(errno));
+ free(pair->dest_name);
+ return true;
+ }
+
+ if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
+ message_error("%s: Cannot unlink: %s", pair->dest_name,
+ strerror(errno));
+ free(pair->dest_name);
+ return true;
+ }
+
+ // Open the file.
+ const int flags = O_WRONLY | O_NOCTTY | O_CREAT | O_EXCL;
+ const mode_t mode = S_IRUSR | S_IWUSR;
+ pair->dest_fd = open(pair->dest_name, flags, mode);
+
+ if (pair->dest_fd == -1) {
+ // Don't bother with error message if user requested
+ // us to exit anyway.
+ if (!user_abort)
+ message_error("%s: %s", pair->dest_name,
+ strerror(errno));
+
+ free(pair->dest_name);
+ return true;
+ }
+
+ // If this really fails... well, we have a safe fallback.
+ if (fstat(pair->dest_fd, &pair->dest_st)) {
+ pair->dest_st.st_dev = 0;
+ pair->dest_st.st_ino = 0;
+ }
+
+ return false;
+}
+
+
+/// \brief Closes destination file of the file_pair structure
+///
+/// \param pair File whose dest_fd should be closed
+/// \param success If false, the file will be removed from the disk.
+///
+/// \return Zero if closing succeeds. On error, -1 is returned and
+/// error message printed.
+static int
+io_close_dest(file_pair *pair, bool success)
+{
+ if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
+ return 0;
+
+ if (close(pair->dest_fd)) {
+ message_error(_("%s: Closing the file failed: %s"),
+ pair->dest_name, strerror(errno));
+
+ // Closing destination file failed, so we cannot trust its
+ // contents. Get rid of junk:
+ io_unlink(pair->dest_name, &pair->dest_st);
+ free(pair->dest_name);
+ return -1;
+ }
+
+ // If the operation using this file wasn't successful, we git rid
+ // of the junk file.
+ if (!success)
+ io_unlink(pair->dest_name, &pair->dest_st);
+
+ free(pair->dest_name);
+
+ return 0;
+}
+
+
+extern file_pair *
+io_open(const char *src_name)
+{
+ if (is_empty_filename(src_name))
+ return NULL;
+
+ // Since we have only one file open at a time, we can use
+ // a statically allocated structure.
+ static file_pair pair;
+
+ pair = (file_pair){
+ .src_name = src_name,
+ .dest_name = NULL,
+ .src_fd = -1,
+ .dest_fd = -1,
+ .src_eof = false,
+ };
+
+ // Block the signals, for which we have a custom signal handler, so
+ // that we don't need to worry about EINTR.
+ signals_block();
+
+ file_pair *ret = NULL;
+ if (!io_open_src(&pair)) {
+ // io_open_src() may have unblocked the signals temporarily,
+ // and thus user_abort may have got set even if open()
+ // succeeded.
+ if (user_abort || io_open_dest(&pair))
+ io_close_src(&pair, false);
+ else
+ ret = &pair;
+ }
+
+ signals_unblock();
+
+ return ret;
+}
+
+
+extern void
+io_close(file_pair *pair, bool success)
+{
+ signals_block();
+
+ if (success && pair->dest_fd != STDOUT_FILENO)
+ io_copy_attrs(pair);
+
+ // Close the destination first. If it fails, we must not remove
+ // the source file!
+ if (io_close_dest(pair, success))
+ success = false;
+
+ // Close the source file, and unlink it if the operation using this
+ // file pair was successful and we haven't requested to keep the
+ // source file.
+ io_close_src(pair, success);
+
+ signals_unblock();
+
+ return;
+}
+
+
+extern size_t
+io_read(file_pair *pair, uint8_t *buf, size_t size)
+{
+ // We use small buffers here.
+ assert(size < SSIZE_MAX);
+
+ size_t left = size;
+
+ while (left > 0) {
+ const ssize_t amount = read(pair->src_fd, buf, left);
+
+ if (amount == 0) {
+ pair->src_eof = true;
+ break;
+ }
+
+ if (amount == -1) {
+ if (errno == EINTR) {
+ if (user_abort)
+ return SIZE_MAX;
+
+ continue;
+ }
+
+ message_error(_("%s: Read error: %s"),
+ pair->src_name, strerror(errno));
+
+ // FIXME Is this needed?
+ pair->src_eof = true;
+
+ return SIZE_MAX;
+ }
+
+ buf += (size_t)(amount);
+ left -= (size_t)(amount);
+ }
+
+ return size - left;
+}
+
+
+extern bool
+io_write(const file_pair *pair, const uint8_t *buf, size_t size)
+{
+ assert(size < SSIZE_MAX);
+
+ while (size > 0) {
+ const ssize_t amount = write(pair->dest_fd, buf, size);
+ if (amount == -1) {
+ if (errno == EINTR) {
+ if (user_abort)
+ return -1;
+
+ continue;
+ }
+
+ // Handle broken pipe specially. gzip and bzip2
+ // don't print anything on SIGPIPE. In addition,
+ // gzip --quiet uses exit status 2 (warning) on
+ // broken pipe instead of whatever raise(SIGPIPE)
+ // would make it return. It is there to hide "Broken
+ // pipe" message on some old shells (probably old
+ // GNU bash).
+ //
+ // We don't do anything special with --quiet, which
+ // is what bzip2 does too. If we get SIGPIPE, we
+ // will handle it like other signals by setting
+ // user_abort, and get EPIPE here.
+ if (errno != EPIPE)
+ message_error(_("%s: Write error: %s"),
+ pair->dest_name, strerror(errno));
+
+ return true;
+ }
+
+ buf += (size_t)(amount);
+ size -= (size_t)(amount);
+ }
+
+ return false;
+}
diff --git a/src/xz/io.h b/src/xz/io.h
new file mode 100644
index 00000000..4d8e61b2
--- /dev/null
+++ b/src/xz/io.h
@@ -0,0 +1,97 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file io.h
+/// \brief I/O types and functions
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef IO_H
+#define IO_H
+
+#include "private.h"
+
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+#if BUFSIZ <= 1024
+# define IO_BUFFER_SIZE 8192
+#else
+# define IO_BUFFER_SIZE BUFSIZ
+#endif
+
+
+typedef struct {
+ /// Name of the source filename (as given on the command line) or
+ /// pointer to static "(stdin)" when reading from standard input.
+ const char *src_name;
+
+ /// Destination filename converted from src_name or pointer to static
+ /// "(stdout)" when writing to standard output.
+ char *dest_name;
+
+ /// File descriptor of the source file
+ int src_fd;
+
+ /// File descriptor of the target file
+ int dest_fd;
+
+ /// Stat of the source file.
+ struct stat src_st;
+
+ /// Stat of the destination file.
+ struct stat dest_st;
+
+ /// True once end of the source file has been detected.
+ bool src_eof;
+
+} file_pair;
+
+
+/// \brief Opens a file pair
+extern file_pair *io_open(const char *src_name);
+
+
+/// \brief Closes the file descriptors and frees possible allocated memory
+///
+/// The success argument determines if source or destination file gets
+/// unlinked:
+/// - false: The destination file is unlinked.
+/// - true: The source file is unlinked unless writing to stdout or --keep
+/// was used.
+extern void io_close(file_pair *pair, bool success);
+
+
+/// \brief Reads from the source file to a buffer
+///
+/// \param pair File pair having the source file open for reading
+/// \param buf Destination buffer to hold the read data
+/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
+///
+/// \return On success, number of bytes read is returned. On end of
+/// file zero is returned and pair->src_eof set to true.
+/// On error, SIZE_MAX is returned and error message printed.
+extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size);
+
+
+/// \brief Writes a buffer to the destination file
+///
+/// \param pair File pair having the destination file open for writing
+/// \param buf Buffer containing the data to be written
+/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX
+///
+/// \return On success, zero is returned. On error, -1 is returned
+/// and error message printed.
+extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size);
+
+#endif
diff --git a/src/xz/list.c b/src/xz/list.c
new file mode 100644
index 00000000..8728d47b
--- /dev/null
+++ b/src/xz/list.c
@@ -0,0 +1,477 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file list.c
+/// \brief Listing information about .lzma files
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+
+/*
+
+1. Check the file type: native, alone, unknown
+
+Alone:
+1. Show info about header. Don't look for concatenated parts.
+
+Native:
+1. Check that Stream Header is valid.
+2. Seek to the end of the file.
+3. Skip padding.
+4. Reverse decode Stream Footer.
+5. Seek Backward Size bytes.
+6.
+
+*/
+
+
+static void
+unsupported_file(file_handle *handle)
+{
+ errmsg(V_ERROR, "%s: Unsupported file type", handle->name);
+ set_exit_status(ERROR);
+ (void)io_close(handle);
+ return;
+}
+
+
+/// Primitive escaping function, that escapes only ASCII control characters.
+static void
+print_escaped(const uint8_t *str)
+{
+ while (*str != '\0') {
+ if (*str <= 0x1F || *str == 0x7F)
+ printf("\\x%02X", *str);
+ else
+ putchar(*str);
+
+ ++str;
+ }
+
+ return;
+}
+
+
+static void
+list_native(file_handle *handle)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_stream_flags flags;
+ lzma_ret ret = lzma_stream_header_decoder(&strm, &flags);
+
+}
+
+
+static void
+list_alone(const listing_handle *handle)
+{
+ if (handle->buffer[0] > (4 * 5 + 4) * 9 + 8) {
+ unsupported_file(handle);
+ return;
+ }
+
+ const unsigned int pb = handle->buffer[0] / (9 * 5);
+ handle->buffer[0] -= pb * 9 * 5;
+ const unsigned int lp = handle->buffer[0] / 9;
+ const unsigned int lc = handle->buffer[0] - lp * 9;
+
+ uint32_t dict = 0;
+ for (size_t i = 1; i < 5; ++i) {
+ dict <<= 8;
+ dict |= header[i];
+ }
+
+ if (dict > LZMA_DICTIONARY_SIZE_MAX) {
+ unsupported_file(handle);
+ return;
+ }
+
+ uint64_t uncompressed_size = 0;
+ for (size_t i = 5; i < 13; ++i) {
+ uncompressed_size <<= 8;
+ uncompressed_size |= header[i];
+ }
+
+ // Reject files with uncompressed size of 256 GiB or more. It's
+ // an arbitrary limit trying to avoid at least some false positives.
+ if (uncompressed_size != UINT64_MAX
+ && uncompressed_size >= (UINT64_C(1) << 38)) {
+ unsupported_file(handle);
+ return;
+ }
+
+ if (verbosity < V_WARNING) {
+ printf("name=");
+ print_escaped(handle->name);
+ printf("\nformat=alone\n");
+
+ if (uncompressed_size == UINT64_MAX)
+ printf("uncompressed_size=unknown\n");
+ else
+ printf("uncompressed_size=%" PRIu64 "\n",
+ uncompressed_size);
+
+ printf("dict=%" PRIu32 "\n", dict);
+
+ printf("lc=%u\nlp=%u\npb=%u\n\n", lc, lp, pb);
+
+ } else {
+ printf("File name: ");
+ print_escaped(handle->name);
+ printf("\nFile format: LZMA_Alone\n")
+
+ printf("Uncompressed size: ");
+ if (uncompressed_size == UINT64_MAX)
+ printf("unknown\n");
+ else
+ printf("%," PRIu64 " bytes (%" PRIu64 " MiB)\n",
+ uncompressed_size,
+ (uncompressed_size + 1024 * 512)
+ / (1024 * 1024));
+
+ printf("Dictionary size: %," PRIu32 " bytes "
+ "(%" PRIu32 " MiB)\n",
+ dict, (dict + 1024 * 512) / (1024 * 1024));
+
+ printf("Literal context bits (lc): %u\n", lc);
+ printf("Literal position bits (lc): %u\n", lp);
+ printf("Position bits (pb): %u\n", pb);
+ }
+
+ return;
+}
+
+
+
+
+typedef struct {
+ const char *filename;
+ struct stat st;
+ int fd;
+
+ lzma_stream strm;
+ lzma_stream_flags stream_flags;
+ lzma_info *info;
+
+ lzma_vli backward_size;
+ lzma_vli uncompressed_size;
+
+ size_t buffer_size;
+ uint8_t buffer[IO_BUFFER_SIZE];
+} listing_handle;
+
+
+static bool
+listing_pread(listing_handle *handle, uint64_t offset)
+{
+ if (offset >= (uint64_t)(handle->st.st_size)) {
+ errmsg(V_ERROR, "%s: Trying to read past the end of "
+ "the file.", handle->filename);
+ return true;
+ }
+
+#ifdef HAVE_PREAD
+ const ssize_t ret = pread(handle->fd, handle->buffer, IO_BUFFER_SIZE,
+ (off_t)(offset));
+#else
+ // Use lseek() + read() since we don't have pread(). We don't care
+ // to which offset the reading position is left.
+ if (lseek(handle->fd, (off_t)(offset), SEEK_SET) == -1) {
+ errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno));
+ return true;
+ }
+
+ const ssize_t ret = read(handle->fd, handle->buffer, IO_BUFFER_SIZE);
+#endif
+
+ if (ret == -1) {
+ errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno));
+ return true;
+ }
+
+ if (ret == 0) {
+ errmsg(V_ERROR, "%s: Trying to read past the end of "
+ "the file.", handle->filename);
+ return true;
+ }
+
+ handle->buffer_size = (size_t)(ret);
+ return false;
+}
+
+
+
+static bool
+parse_stream_header(listing_handle *handle)
+{
+ if (listing_pread(handle, 0))
+ return true;
+
+ // TODO Got enough input?
+
+ lzma_ret ret = lzma_stream_header_decoder(
+ &handle->strm, &handle->stream_flags);
+ if (ret != LZMA_OK) {
+ errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
+ return true;
+ }
+
+ handle->strm.next_in = handle->buffer;
+ handle->strm.avail_in = handle->buffer_size;
+ ret = lzma_code(&handle->strm, LZMA_RUN);
+ if (ret != LZMA_STREAM_END) {
+ assert(ret != LZMA_OK);
+ errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
+ return true;
+ }
+
+ return false;
+}
+
+
+static bool
+parse_stream_tail(listing_handle *handle)
+{
+ uint64_t offset = (uint64_t)(handle->st.st_size);
+
+ // Skip padding
+ do {
+ if (offset == 0) {
+ errmsg(V_ERROR, "%s: %s", handle->name,
+ str_strm_error(LZMA_DATA_ERROR));
+ return true;
+ }
+
+ if (offset < IO_BUFFER_SIZE)
+ offset = 0;
+ else
+ offset -= IO_BUFFER_SIZE;
+
+ if (listing_pread(handle, offset))
+ return true;
+
+ while (handle->buffer_size > 0
+ && handle->buffer[handle->buffer_size - 1]
+ == '\0')
+ --handle->buffer_size;
+
+ } while (handle->buffer_size == 0);
+
+ if (handle->buffer_size < LZMA_STREAM_TAIL_SIZE) {
+ // TODO
+ }
+
+ lzma_stream_flags stream_flags;
+ lzma_ret ret = lzma_stream_tail_decoder(&handle->strm, &stream_flags);
+ if (ret != LZMA_OK) {
+ errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
+ return true;
+ }
+
+ handle->strm.next_in = handle->buffer + handle->buffer_size
+ - LZMA_STREAM_TAIL_SIZE;
+ handle->strm.avail_in = LZMA_STREAM_TAIL_SIZE;
+ handle->buffer_size -= LZMA_STREAM_TAIL_SIZE;
+ ret = lzma_code(&handle->strm, LZMA_RUN);
+ if (ret != LZMA_OK) {
+ assert(ret != LZMA_OK);
+ errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
+ return true;
+ }
+
+ if (!lzma_stream_flags_is_equal(handle->stream_flags, stream_flags)) {
+ // TODO
+ // Possibly corrupt, possibly concatenated file.
+ }
+
+ handle->backward_size = 0;
+ ret = lzma_vli_reverse_decode(&handle->backward_size, handle->buffer,
+ &handle->buffer_size);
+ if (ret != LZMA_OK) {
+ // It may be LZMA_BUF_ERROR too, but it doesn't make sense
+ // as an error message displayed to the user.
+ errmsg(V_ERROR, "%s: %s", handle->name,
+ str_strm_error(LZMA_DATA_ERROR));
+ return true;
+ }
+
+ if (!stream_flags.is_multi) {
+ handle->uncompressed_size = 0;
+ size_t tmp = handle->buffer_size;
+ ret = lzma_vli_reverse_decode(&handle->uncompressed_size,
+ handle->buffer, &tmp);
+ if (ret != LZMA_OK)
+ handle->uncompressed_size = LZMA_VLI_UNKNOWN;
+ }
+
+ // Calculate the Header Metadata Block start offset.
+
+
+ return false;
+}
+
+
+
+static void
+list_native(listing_handle *handle)
+{
+ lzma_memory_limiter *limiter
+ = lzma_memory_limiter_create(opt_memory);
+ if (limiter == NULL) {
+ errmsg(V_ERROR,
+ }
+ lzma_info *info =
+
+
+ // Parse Stream Header
+ //
+ // Single-Block Stream:
+ // - Parse Block Header
+ // - Parse Stream Footer
+ // - If Backward Size doesn't match, error out
+ //
+ // Multi-Block Stream:
+ // - Parse Header Metadata Block, if any
+ // - Parse Footer Metadata Block
+ // - Parse Stream Footer
+ // - If Footer Metadata Block doesn't match the Stream, error out
+ //
+ // In other words, we don't support concatened files.
+ if (parse_stream_header(handle))
+ return;
+
+ if (parse_block_header(handle))
+ return;
+
+ if (handle->stream_flags.is_multi) {
+ if (handle->block_options.is_metadata) {
+ if (parse_metadata(handle)
+ return;
+ }
+
+ if (my_seek(handle,
+
+ } else {
+ if (handle->block_options.is_metadata) {
+ FILE_IS_CORRUPT();
+ return;
+ }
+
+ if (parse_stream_footer(handle))
+ return;
+
+ // If Uncompressed Size isn't present in Block Header,
+ // it must be present in Stream Footer.
+ if (handle->block_options.uncompressed_size
+ == LZMA_VLI_UNKNOWN
+ && handle->stream_flags.uncompressed_size
+ == LZMA_VLI_UNKNOWN) {
+ FILE_IS_CORRUPT();
+ return;
+ }
+
+ // Construct a single-Record Index.
+ lzma_index *index = malloc(sizeof(lzma_index));
+ if (index == NULL) {
+ out_of_memory();
+ return;
+ }
+
+ // Pohdintaa:
+ // Jos Block coder hoitaisi Uncompressed ja Backward Sizet,
+ // voisi index->total_sizeksi laittaa suoraan Backward Sizen.
+ index->total_size =
+
+ if () {
+
+ }
+ }
+
+
+ if (handle->block_options.is_metadata) {
+ if (!handle->stream_flags.is_multi) {
+ FILE_IS_CORRUPT();
+ return;
+ }
+
+ if (parse_metadata(handle))
+ return;
+
+ }
+}
+
+
+
+extern void
+list(const char *filename)
+{
+ if (strcmp(filename, "-") == 0) {
+ errmsg(V_ERROR, "%s: --list does not support reading from "
+ "standard input", filename);
+ return;
+ }
+
+ if (is_empty_filename(filename))
+ return;
+
+ listing_handle handle;
+ handle.filename = filename;
+
+ handle.fd = open(filename, O_RDONLY | O_NOCTTY);
+ if (handle.fd == -1) {
+ errmsg(V_ERROR, "%s: %s", filename, strerror(errno));
+ return;
+ }
+
+ if (fstat(handle.fd, &handle.st)) {
+ errmsg(V_ERROR, "%s: %s", filename, strerror(errno));
+ goto out;
+ }
+
+ if (!S_ISREG(handle.st.st_mode)) {
+ errmsg(V_WARNING, _("%s: Not a regular file, skipping"),
+ filename);
+ goto out;
+ }
+
+ if (handle.st.st_size <= 0) {
+ errmsg(V_ERROR, _("%s: File is empty"), filename);
+ goto out;
+ }
+
+ if (listing_pread(&handle, 0))
+ goto out;
+
+ if (handle.buffer[0] == 0xFF) {
+ if (opt_header == HEADER_ALONE) {
+ errmsg(V_ERROR, "%s: FIXME", filename); // FIXME
+ goto out;
+ }
+
+ list_native(&handle);
+ } else {
+ if (opt_header != HEADER_AUTO && opt_header != HEADER_ALONE) {
+ errmsg(V_ERROR, "%s: FIXME", filename); // FIXME
+ goto out;
+ }
+
+ list_alone(&handle);
+ }
+
+out:
+ (void)close(fd);
+ return;
+}
diff --git a/src/xz/main.c b/src/xz/main.c
new file mode 100644
index 00000000..4e24b98d
--- /dev/null
+++ b/src/xz/main.c
@@ -0,0 +1,402 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file main.c
+/// \brief main()
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+#include "open_stdxxx.h"
+#include <ctype.h>
+
+
+volatile sig_atomic_t user_abort = false;
+
+/// Exit status to use. This can be changed with set_exit_status().
+static enum exit_status_type exit_status = E_SUCCESS;
+
+/// If we were interrupted by a signal, we store the signal number so that
+/// we can raise that signal to kill the program when all cleanups have
+/// been done.
+static volatile sig_atomic_t exit_signal = 0;
+
+/// Mask of signals for which have have established a signal handler to set
+/// user_abort to true.
+static sigset_t hooked_signals;
+
+/// signals_block() and signals_unblock() can be called recursively.
+static size_t signals_block_count = 0;
+
+
+static void
+signal_handler(int sig)
+{
+ exit_signal = sig;
+ user_abort = true;
+ return;
+}
+
+
+static void
+establish_signal_handlers(void)
+{
+ // List of signals for which we establish the signal handler.
+ static const int sigs[] = {
+ SIGINT,
+ SIGTERM,
+#ifdef SIGHUP
+ SIGHUP,
+#endif
+#ifdef SIGPIPE
+ SIGPIPE,
+#endif
+#ifdef SIGXCPU
+ SIGXCPU,
+#endif
+#ifdef SIGXFSZ
+ SIGXFSZ,
+#endif
+ };
+
+ // Mask of the signals for which we have established a signal handler.
+ sigemptyset(&hooked_signals);
+ for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i)
+ sigaddset(&hooked_signals, sigs[i]);
+
+ struct sigaction sa;
+
+ // All the signals that we handle we also blocked while the signal
+ // handler runs.
+ sa.sa_mask = hooked_signals;
+
+ // Don't set SA_RESTART, because we want EINTR so that we can check
+ // for user_abort and cleanup before exiting. We block the signals
+ // for which we have established a handler when we don't want EINTR.
+ sa.sa_flags = 0;
+ sa.sa_handler = &signal_handler;
+
+ for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) {
+ // If the parent process has left some signals ignored,
+ // we don't unignore them.
+ struct sigaction old;
+ if (sigaction(sigs[i], NULL, &old) == 0
+ && old.sa_handler == SIG_IGN)
+ continue;
+
+ // Establish the signal handler.
+ if (sigaction(sigs[i], &sa, NULL))
+ message_signal_handler();
+ }
+
+ return;
+}
+
+
+extern void
+signals_block(void)
+{
+ if (signals_block_count++ == 0) {
+ const int saved_errno = errno;
+ sigprocmask(SIG_BLOCK, &hooked_signals, NULL);
+ errno = saved_errno;
+ }
+
+ return;
+}
+
+
+extern void
+signals_unblock(void)
+{
+ assert(signals_block_count > 0);
+
+ if (--signals_block_count == 0) {
+ const int saved_errno = errno;
+ sigprocmask(SIG_UNBLOCK, &hooked_signals, NULL);
+ errno = saved_errno;
+ }
+
+ return;
+}
+
+
+extern void
+set_exit_status(enum exit_status_type new_status)
+{
+ assert(new_status == E_WARNING || new_status == E_ERROR);
+
+ if (exit_status != E_ERROR)
+ exit_status = new_status;
+
+ return;
+}
+
+
+extern void
+my_exit(enum exit_status_type status)
+{
+ // Close stdout. If something goes wrong, print an error message
+ // to stderr.
+ {
+ const int ferror_err = ferror(stdout);
+ const int fclose_err = fclose(stdout);
+ if (ferror_err || fclose_err) {
+ // If it was fclose() that failed, we have the reason
+ // in errno. If only ferror() indicated an error,
+ // we have no idea what the reason was.
+ message(V_ERROR, _("Writing to standard output "
+ "failed: %s"),
+ fclose_err ? strerror(errno)
+ : _("Unknown error"));
+ status = E_ERROR;
+ }
+ }
+
+ // Close stderr. If something goes wrong, there's nothing where we
+ // could print an error message. Just set the exit status.
+ {
+ const int ferror_err = ferror(stderr);
+ const int fclose_err = fclose(stderr);
+ if (fclose_err || ferror_err)
+ status = E_ERROR;
+ }
+
+ // If we have got a signal, raise it to kill the program.
+ const int sig = exit_signal;
+ if (sig != 0) {
+ struct sigaction sa;
+ sa.sa_handler = SIG_DFL;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ sigaction(sig, &sa, NULL);
+ raise(exit_signal);
+
+ // If, for some weird reason, the signal doesn't kill us,
+ // we safely fall to the exit below.
+ }
+
+ exit(status);
+}
+
+
+static const char *
+read_name(const args_info *args)
+{
+ // FIXME: Maybe we should have some kind of memory usage limit here
+ // like the tool has for the actual compression and uncompression.
+ // Giving some huge text file with --files0 makes us to read the
+ // whole file in RAM.
+ static char *name = NULL;
+ static size_t size = 256;
+
+ // Allocate the initial buffer. This is never freed, since after it
+ // is no longer needed, the program exits very soon. It is safe to
+ // use xmalloc() and xrealloc() in this function, because while
+ // executing this function, no files are open for writing, and thus
+ // there's no need to cleanup anything before exiting.
+ if (name == NULL)
+ name = xmalloc(size);
+
+ // Write position in name
+ size_t pos = 0;
+
+ // Read one character at a time into name.
+ while (!user_abort) {
+ const int c = fgetc(args->files_file);
+
+ if (ferror(args->files_file)) {
+ // Take care of EINTR since we have established
+ // the signal handlers already.
+ if (errno == EINTR)
+ continue;
+
+ message_error(_("%s: Error reading filenames: %s"),
+ args->files_name, strerror(errno));
+ return NULL;
+ }
+
+ if (feof(args->files_file)) {
+ if (pos != 0)
+ message_error(_("%s: Unexpected end of input "
+ "when reading filenames"),
+ args->files_name);
+
+ return NULL;
+ }
+
+ if (c == args->files_delim) {
+ // We allow consecutive newline (--files) or '\0'
+ // characters (--files0), and ignore such empty
+ // filenames.
+ if (pos == 0)
+ continue;
+
+ // A non-empty name was read. Terminate it with '\0'
+ // and return it.
+ name[pos] = '\0';
+ return name;
+ }
+
+ if (c == '\0') {
+ // A null character was found when using --files,
+ // which expects plain text input separated with
+ // newlines.
+ message_error(_("%s: Null character found when "
+ "reading filenames; maybe you meant "
+ "to use `--files0' instead "
+ "of `--files'?"), args->files_name);
+ return NULL;
+ }
+
+ name[pos++] = c;
+
+ // Allocate more memory if needed. There must always be space
+ // at least for one character to allow terminating the string
+ // with '\0'.
+ if (pos == size) {
+ size *= 2;
+ name = xrealloc(name, size);
+ }
+ }
+
+ return NULL;
+}
+
+
+int
+main(int argc, char **argv)
+{
+ // Make sure that stdin, stdout, and and stderr are connected to
+ // a valid file descriptor. Exit immediatelly with exit code ERROR
+ // if we cannot make the file descriptors valid. Maybe we should
+ // print an error message, but our stderr could be screwed anyway.
+ open_stdxxx(E_ERROR);
+
+ // This has to be done before calling any liblzma functions.
+ lzma_init();
+
+ // Set up the locale.
+ setlocale(LC_ALL, "");
+
+#ifdef ENABLE_NLS
+ // Set up the message translations too.
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+#endif
+
+ // Set the program invocation name used in various messages, and
+ // do other message handling related initializations.
+ message_init(argv[0]);
+
+ // Set hardware-dependent default values. These can be overriden
+ // on the command line, thus this must be done before parse_args().
+ hardware_init();
+
+ // Parse the command line arguments and get an array of filenames.
+ // This doesn't return if something is wrong with the command line
+ // arguments. If there are no arguments, one filename ("-") is still
+ // returned to indicate stdin.
+ args_info args;
+ args_parse(&args, argc, argv);
+
+ // Tell the message handling code how many input files there are if
+ // we know it. This way the progress indicator can show it.
+ if (args.files_name != NULL)
+ message_set_files(0);
+ else
+ message_set_files(args.arg_count);
+
+ // Refuse to write compressed data to standard output if it is
+ // a terminal and --force wasn't used.
+ if (opt_mode == MODE_COMPRESS) {
+ if (opt_stdout || (args.arg_count == 1
+ && strcmp(args.arg_names[0], "-") == 0)) {
+ if (is_tty_stdout()) {
+ message_try_help();
+ my_exit(E_ERROR);
+ }
+ }
+ }
+
+ if (opt_mode == MODE_LIST) {
+ message_fatal("--list is not implemented yet.");
+ }
+
+ // Hook the signal handlers. We don't need these before we start
+ // the actual action, so this is done after parsing the command
+ // line arguments.
+ establish_signal_handlers();
+
+ // Process the files given on the command line. Note that if no names
+ // were given, parse_args() gave us a fake "-" filename.
+ for (size_t i = 0; i < args.arg_count && !user_abort; ++i) {
+ if (strcmp("-", args.arg_names[i]) == 0) {
+ // Processing from stdin to stdout. Unless --force
+ // was used, check that we aren't writing compressed
+ // data to a terminal or reading it from terminal.
+ if (!opt_force) {
+ if (opt_mode == MODE_COMPRESS) {
+ if (is_tty_stdout())
+ continue;
+ } else if (is_tty_stdin()) {
+ continue;
+ }
+ }
+
+ // It doesn't make sense to compress data from stdin
+ // if we are supposed to read filenames from stdin
+ // too (enabled with --files or --files0).
+ if (args.files_name == stdin_filename) {
+ message_error(_("Cannot read data from "
+ "standard input when "
+ "reading filenames "
+ "from standard input"));
+ continue;
+ }
+
+ // Replace the "-" with a special pointer, which is
+ // recognized by process_file() and other things.
+ // This way error messages get a proper filename
+ // string and the code still knows that it is
+ // handling the special case of stdin.
+ args.arg_names[i] = (char *)stdin_filename;
+ }
+
+ // Do the actual compression or uncompression.
+ process_file(args.arg_names[i]);
+ }
+
+ // If --files or --files0 was used, process the filenames from the
+ // given file or stdin. Note that here we don't consider "-" to
+ // indicate stdin like we do with the command line arguments.
+ if (args.files_name != NULL) {
+ // read_name() checks for user_abort so we don't need to
+ // check it as loop termination condition.
+ while (true) {
+ const char *name = read_name(&args);
+ if (name == NULL)
+ break;
+
+ // read_name() doesn't return empty names.
+ assert(name[0] != '\0');
+ process_file(name);
+ }
+
+ if (args.files_name != stdin_filename)
+ (void)fclose(args.files_file);
+ }
+
+ my_exit(exit_status);
+}
diff --git a/src/xz/main.h b/src/xz/main.h
new file mode 100644
index 00000000..1e369425
--- /dev/null
+++ b/src/xz/main.h
@@ -0,0 +1,60 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file main.h
+/// \brief Miscellanous declarations
+//
+// Copyright (C) 2008 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef MAIN_H
+#define MAIN_H
+
+/// Possible exit status values. These are the same as used by gzip and bzip2.
+enum exit_status_type {
+ E_SUCCESS = 0,
+ E_ERROR = 1,
+ E_WARNING = 2,
+};
+
+
+/// If this is true, we will clean up the possibly incomplete output file,
+/// return to main() as soon as practical. That is, the code needs to poll
+/// this variable in various places.
+extern volatile sig_atomic_t user_abort;
+
+
+/// Block the signals which don't have SA_RESTART and which would just set
+/// user_abort to true. This is handy when we don't want to handle EINTR
+/// and don't want SA_RESTART either.
+extern void signals_block(void);
+
+
+/// Unblock the signals blocked by signals_block().
+extern void signals_unblock(void);
+
+
+/// Sets the exit status after a warning or error has occurred. If new_status
+/// is EX_WARNING and the old exit status was already EX_ERROR, the exit
+/// status is not changed.
+extern void set_exit_status(enum exit_status_type new_status);
+
+
+/// Exits the program using the given status. This takes care of closing
+/// stdin, stdout, and stderr and catches possible errors. If we had got
+/// a signal, this function will raise it so that to the parent process it
+/// appears that we were killed by the signal sent by the user.
+extern void my_exit(enum exit_status_type status) lzma_attribute((noreturn));
+
+
+#endif
diff --git a/src/xz/message.c b/src/xz/message.c
new file mode 100644
index 00000000..caba9fbc
--- /dev/null
+++ b/src/xz/message.c
@@ -0,0 +1,892 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file message.c
+/// \brief Printing messages to stderr
+//
+// Copyright (C) 2007-2008 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+#if defined(HAVE_SYS_TIME_H)
+# include <sys/time.h>
+#elif defined(SIGALRM)
+// FIXME
+#endif
+
+#include <stdarg.h>
+
+
+/// Name of the program which is prefixed to the error messages.
+static const char *argv0;
+
+/// Number of the current file
+static unsigned int files_pos = 0;
+
+/// Total number of input files; zero if unknown.
+static unsigned int files_total;
+
+/// Verbosity level
+static enum message_verbosity verbosity = V_WARNING;
+
+/// Filename which we will print with the verbose messages
+static const char *filename;
+
+/// True once the a filename has been printed to stderr as part of progress
+/// message. If automatic progress updating isn't enabled, this becomes true
+/// after the first progress message has been printed due to user sending
+/// SIGALRM. Once this variable is true, we will print an empty line before
+/// the next filename to make the output more readable.
+static bool first_filename_printed = false;
+
+/// This is set to true when we have printed the current filename to stderr
+/// as part of a progress message. This variable is useful only if not
+/// updating progress automatically: if user sends many SIGALRM signals,
+/// we won't print the name of the same file multiple times.
+static bool current_filename_printed = false;
+
+/// True if we should print progress indicator and update it automatically.
+static bool progress_automatic;
+
+/// This is true when a progress message was printed and the cursor is still
+/// on the same line with the progress message. In that case, a newline has
+/// to be printed before any error messages.
+static bool progress_active = false;
+
+/// Expected size of the input stream is needed to show completion percentage
+/// and estimate remaining time.
+static uint64_t expected_in_size;
+
+/// Time when we started processing the file
+static double start_time;
+
+/// The signal handler for SIGALRM sets this to true. It is set back to false
+/// once the progress message has been updated.
+static volatile sig_atomic_t progress_needs_updating = false;
+
+
+/// Signal handler for SIGALRM
+static void
+progress_signal_handler(int sig lzma_attribute((unused)))
+{
+ progress_needs_updating = true;
+ return;
+}
+
+
+/// Get the current time as double
+static double
+my_time(void)
+{
+ struct timeval tv;
+
+ // This really shouldn't fail. I'm not sure what to return if it
+ // still fails. It doesn't look so useful to check the return value
+ // everywhere. FIXME?
+ if (gettimeofday(&tv, NULL))
+ return -1.0;
+
+ return (double)(tv.tv_sec) + (double)(tv.tv_usec) / 1.0e9;
+}
+
+
+/// Wrapper for snprintf() to help constructing a string in pieces.
+static void /* lzma_attribute((format(printf, 3, 4))) */
+my_snprintf(char **pos, size_t *left, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ const int len = vsnprintf(*pos, *left, fmt, ap);
+ va_end(ap);
+
+ // If an error occurred, we want the caller to think that the whole
+ // buffer was used. This way no more data will be written to the
+ // buffer. We don't need better error handling here.
+ if (len < 0 || (size_t)(len) >= *left) {
+ *left = 0;
+ } else {
+ *pos += len;
+ *left -= len;
+ }
+
+ return;
+}
+
+
+extern void
+message_init(const char *given_argv0)
+{
+ // Name of the program
+ argv0 = given_argv0;
+
+ // If --verbose is used, we use a progress indicator if and only
+ // if stderr is a terminal. If stderr is not a terminal, we print
+ // verbose information only after finishing the file. As a special
+ // exception, even if --verbose was not used, user can send SIGALRM
+ // to make us print progress information once without automatic
+ // updating.
+ progress_automatic = isatty(STDERR_FILENO);
+
+/*
+ if (progress_automatic) {
+ // stderr is a terminal. Check the COLUMNS environment
+ // variable to see if the terminal is wide enough. If COLUMNS
+ // doesn't exist or it has some unparseable value, we assume
+ // that the terminal is wide enough.
+ const char *columns_str = getenv("COLUMNS");
+ uint64_t columns;
+ if (columns_str != NULL
+ && !str_to_uint64_raw(&columns, columns_str)
+ && columns < 80)
+ progress_automatic = false;
+ }
+*/
+
+#ifdef SIGALRM
+ // Establish the signal handler for SIGALRM. Since this signal
+ // doesn't require any quick action, we set SA_RESTART.
+ struct sigaction sa;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART;
+ sa.sa_handler = &progress_signal_handler;
+ if (sigaction(SIGALRM, &sa, NULL))
+ message_signal_handler();
+#endif
+
+ return;
+}
+
+
+extern void
+message_verbosity_increase(void)
+{
+ if (verbosity < V_DEBUG)
+ ++verbosity;
+
+ return;
+}
+
+
+extern void
+message_verbosity_decrease(void)
+{
+ if (verbosity > V_SILENT)
+ --verbosity;
+
+ return;
+}
+
+
+extern void
+message_set_files(unsigned int files)
+{
+ files_total = files;
+ return;
+}
+
+
+/// Prints the name of the current file if it hasn't been printed already,
+/// except if we are processing exactly one stream from stdin to stdout.
+/// I think it looks nicer to not print "(stdin)" when --verbose is used
+/// in a pipe and no other files are processed.
+static void
+print_filename(void)
+{
+ if (!current_filename_printed
+ && (files_total != 1 || filename != stdin_filename)) {
+ signals_block();
+
+ // If a file was already processed, put an empty line
+ // before the next filename to improve readability.
+ if (first_filename_printed)
+ fputc('\n', stderr);
+
+ first_filename_printed = true;
+ current_filename_printed = true;
+
+ // If we don't know how many files there will be due
+ // to usage of --files or --files0.
+ if (files_total == 0)
+ fprintf(stderr, "%s (%u)\n", filename,
+ files_pos);
+ else
+ fprintf(stderr, "%s (%u/%u)\n", filename,
+ files_pos, files_total);
+
+ signals_unblock();
+ }
+
+ return;
+}
+
+
+extern void
+message_progress_start(const char *src_name, uint64_t in_size)
+{
+ // Store the processing start time of the file and its expected size.
+ // If we aren't printing any statistics, then these are unused. But
+ // since it is possible that the user tells us with SIGALRM to show
+ // statistics, we need to have these available anyway.
+ start_time = my_time();
+ filename = src_name;
+ expected_in_size = in_size;
+
+ // Indicate the name of this file hasn't been printed to
+ // stderr yet.
+ current_filename_printed = false;
+
+ // Start numbering the files starting from one.
+ ++files_pos;
+
+ // If progress indicator is wanted, print the filename and possibly
+ // the file count now. As an exception, if there is exactly one file,
+ // do not print the filename at all.
+ if (verbosity >= V_VERBOSE && progress_automatic) {
+ // Print the filename to stderr if that is appropriate with
+ // the current settings.
+ print_filename();
+
+ // Start the timer to set progress_needs_updating to true
+ // after about one second. An alternative would to be set
+ // progress_needs_updating to true here immediatelly, but
+ // setting the timer looks better to me, since extremely
+ // early progress info is pretty much useless.
+ alarm(1);
+ }
+
+ return;
+}
+
+
+/// Make the string indicating completion percentage.
+static const char *
+progress_percentage(uint64_t in_pos)
+{
+ // If the size of the input file is unknown or the size told us is
+ // clearly wrong since we have processed more data than the alleged
+ // size of the file, show a static string indicating that we have
+ // no idea of the completion percentage.
+ if (expected_in_size == 0 || in_pos > expected_in_size)
+ return "--- %";
+
+ static char buf[sizeof("99.9 %")];
+
+ // Never show 100.0 % before we actually are finished (that case is
+ // handled separately in message_progress_end()).
+ snprintf(buf, sizeof(buf), "%.1f %%",
+ (double)(in_pos) / (double)(expected_in_size) * 99.9);
+
+ return buf;
+}
+
+
+static void
+progress_sizes_helper(char **pos, size_t *left, uint64_t value, bool final)
+{
+ if (final) {
+ // At maximum of four digits is allowed for exact byte count.
+ if (value < 10000) {
+ my_snprintf(pos, left, "%'" PRIu64 " B", value);
+ return;
+ }
+
+// // At maximum of four significant digits is allowed for KiB.
+// if (value < UINT64_C(1023900)) {
+ // At maximum of five significant digits is allowed for KiB.
+ if (value < UINT64_C(10239900)) {
+ my_snprintf(pos, left, "%'.1f KiB",
+ (double)(value) / 1024.0);
+ return;
+ }
+ }
+
+ // Otherwise we use MiB.
+ my_snprintf(pos, left, "%'.1f MiB",
+ (double)(value) / (1024.0 * 1024.0));
+ return;
+}
+
+
+/// Make the string containing the amount of input processed, amount of
+/// output produced, and the compression ratio.
+static const char *
+progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final)
+{
+ // This is enough to hold sizes up to about 99 TiB if thousand
+ // separator is used, or about 1 PiB without thousand separator.
+ // After that the progress indicator will look a bit silly, since
+ // the compression ratio no longer fits with three decimal places.
+ static char buf[44];
+
+ char *pos = buf;
+ size_t left = sizeof(buf);
+
+ // Print the sizes. If this the final message, use more reasonable
+ // units than MiB if the file was small.
+ progress_sizes_helper(&pos, &left, compressed_pos, final);
+ my_snprintf(&pos, &left, " / ");
+ progress_sizes_helper(&pos, &left, uncompressed_pos, final);
+
+ // Avoid division by zero. If we cannot calculate the ratio, set
+ // it to some nice number greater than 10.0 so that it gets caught
+ // in the next if-clause.
+ const double ratio = uncompressed_pos > 0
+ ? (double)(compressed_pos) / (double)(uncompressed_pos)
+ : 16.0;
+
+ // If the ratio is very bad, just indicate that it is greater than
+ // 9.999. This way the length of the ratio field stays fixed.
+ if (ratio > 9.999)
+ snprintf(pos, left, " > %.3f", 9.999);
+ else
+ snprintf(pos, left, " = %.3f", ratio);
+
+ return buf;
+}
+
+
+/// Make the string containing the processing speed of uncompressed data.
+static const char *
+progress_speed(uint64_t uncompressed_pos, double elapsed)
+{
+ // Don't print the speed immediatelly, since the early values look
+ // like somewhat random.
+ if (elapsed < 3.0)
+ return "";
+
+ static const char unit[][8] = {
+ "KiB/s",
+ "MiB/s",
+ "GiB/s",
+ };
+
+ size_t unit_index = 0;
+
+ // Calculate the speed as KiB/s.
+ double speed = (double)(uncompressed_pos) / (elapsed * 1024.0);
+
+ // Adjust the unit of the speed if needed.
+ while (speed > 999.9) {
+ speed /= 1024.0;
+ if (++unit_index == ARRAY_SIZE(unit))
+ return ""; // Way too fast ;-)
+ }
+
+ static char buf[sizeof("999.9 GiB/s")];
+ snprintf(buf, sizeof(buf), "%.1f %s", speed, unit[unit_index]);
+ return buf;
+}
+
+
+/// Make a string indicating elapsed or remaining time. The format is either
+/// M:SS or H:MM:SS depending on if the time is an hour or more.
+static const char *
+progress_time(uint32_t seconds)
+{
+ // 9999 hours = 416 days
+ static char buf[sizeof("9999:59:59")];
+
+ // Don't show anything if the time is zero or ridiculously big.
+ if (seconds == 0 || seconds > ((UINT32_C(9999) * 60) + 59) * 60 + 59)
+ return "";
+
+ uint32_t minutes = seconds / 60;
+ seconds %= 60;
+
+ if (minutes >= 60) {
+ const uint32_t hours = minutes / 60;
+ minutes %= 60;
+ snprintf(buf, sizeof(buf),
+ "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32,
+ hours, minutes, seconds);
+ } else {
+ snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32,
+ minutes, seconds);
+ }
+
+ return buf;
+}
+
+
+/// Make the string to contain the estimated remaining time, or if the amount
+/// of input isn't known, how much time has elapsed.
+static const char *
+progress_remaining(uint64_t in_pos, double elapsed)
+{
+ // If we don't know the size of the input, we indicate the time
+ // spent so far.
+ if (expected_in_size == 0 || in_pos > expected_in_size)
+ return progress_time((uint32_t)(elapsed));
+
+ // If we are at the very beginning of the file or the file is very
+ // small, don't give any estimate to avoid far too wrong estimations.
+ if (in_pos < (UINT64_C(1) << 19) || elapsed < 8.0)
+ return "";
+
+ // Calculate the estimate. Don't give an estimate of zero seconds,
+ // since it is possible that all the input has been already passed
+ // to the library, but there is still quite a bit of output pending.
+ uint32_t remaining = (double)(expected_in_size - in_pos)
+ * elapsed / (double)(in_pos);
+ if (remaining == 0)
+ remaining = 1;
+
+ return progress_time(remaining);
+}
+
+
+extern void
+message_progress_update(uint64_t in_pos, uint64_t out_pos)
+{
+ // If there's nothing to do, return immediatelly.
+ if (!progress_needs_updating || in_pos == 0)
+ return;
+
+ // Print the filename if it hasn't been printed yet.
+ print_filename();
+
+ // Calculate how long we have been processing this file.
+ const double elapsed = my_time() - start_time;
+
+ // Set compressed_pos and uncompressed_pos.
+ uint64_t compressed_pos;
+ uint64_t uncompressed_pos;
+ if (opt_mode == MODE_COMPRESS) {
+ compressed_pos = out_pos;
+ uncompressed_pos = in_pos;
+ } else {
+ compressed_pos = in_pos;
+ uncompressed_pos = out_pos;
+ }
+
+ signals_block();
+
+ // Print the actual progress message. The idea is that there is at
+ // least three spaces between the fields in typical situations, but
+ // even in rare situations there is at least one space.
+ fprintf(stderr, " %7s %43s %11s %10s\r",
+ progress_percentage(in_pos),
+ progress_sizes(compressed_pos, uncompressed_pos, false),
+ progress_speed(uncompressed_pos, elapsed),
+ progress_remaining(in_pos, elapsed));
+
+ // Updating the progress info was finished. Reset
+ // progress_needs_updating to wait for the next SIGALRM.
+ //
+ // NOTE: This has to be done before alarm() call or with (very) bad
+ // luck we could be setting this to false after the alarm has already
+ // been triggered.
+ progress_needs_updating = false;
+
+ if (progress_automatic) {
+ // Mark that the progress indicator is active, so if an error
+ // occurs, the error message gets printed cleanly.
+ progress_active = true;
+
+ // Restart the timer so that progress_needs_updating gets
+ // set to true after about one second.
+ alarm(1);
+ } else {
+ // The progress message was printed because user had sent us
+ // SIGALRM. In this case, each progress message is printed
+ // on its own line.
+ fputc('\n', stderr);
+ }
+
+ signals_unblock();
+
+ return;
+}
+
+
+extern void
+message_progress_end(uint64_t in_pos, uint64_t out_pos, bool success)
+{
+ // If we are not in verbose mode, we have nothing to do.
+ if (verbosity < V_VERBOSE || user_abort)
+ return;
+
+ // Cancel a pending alarm, if any.
+ if (progress_automatic) {
+ alarm(0);
+ progress_active = false;
+ }
+
+ const double elapsed = my_time() - start_time;
+
+ uint64_t compressed_pos;
+ uint64_t uncompressed_pos;
+ if (opt_mode == MODE_COMPRESS) {
+ compressed_pos = out_pos;
+ uncompressed_pos = in_pos;
+ } else {
+ compressed_pos = in_pos;
+ uncompressed_pos = out_pos;
+ }
+
+ // If it took less than a second, don't display the time.
+ const char *elapsed_str = progress_time((double)(elapsed));
+
+ signals_block();
+
+ // When using the auto-updating progress indicator, the final
+ // statistics are printed in the same format as the progress
+ // indicator itself.
+ if (progress_automatic && in_pos > 0) {
+ // Using floating point conversion for the percentage instead
+ // of static "100.0 %" string, because the decimal separator
+ // isn't a dot in all locales.
+ fprintf(stderr, " %5.1f %% %43s %11s %10s\n",
+ 100.0,
+ progress_sizes(compressed_pos, uncompressed_pos, true),
+ progress_speed(uncompressed_pos, elapsed),
+ elapsed_str);
+
+ // When no automatic progress indicator is used, don't print a verbose
+ // message at all if we something went wrong and we couldn't produce
+ // any output. If we did produce output, then it is sometimes useful
+ // to tell that to the user, especially if we detected an error after
+ // a time-consuming operation.
+ } else if (success || out_pos > 0) {
+ // The filename and size information are always printed.
+ fprintf(stderr, "%s: %s", filename, progress_sizes(
+ compressed_pos, uncompressed_pos, true));
+
+ // The speed and elapsed time aren't always shown.
+ const char *speed = progress_speed(uncompressed_pos, elapsed);
+ if (speed[0] != '\0')
+ fprintf(stderr, ", %s", speed);
+
+ if (elapsed_str[0] != '\0')
+ fprintf(stderr, ", %s", elapsed_str);
+
+ fputc('\n', stderr);
+ }
+
+ signals_unblock();
+
+ return;
+}
+
+
+static void
+vmessage(enum message_verbosity v, const char *fmt, va_list ap)
+{
+ if (v <= verbosity) {
+ signals_block();
+
+ // If there currently is a progress message on the screen,
+ // print a newline so that the progress message is left
+ // readable. This is good, because it is nice to be able to
+ // see where the error occurred. (The alternative would be
+ // to clear the progress message and replace it with the
+ // error message.)
+ if (progress_active) {
+ progress_active = false;
+ fputc('\n', stderr);
+ }
+
+ fprintf(stderr, "%s: ", argv0);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+
+ signals_unblock();
+ }
+
+ return;
+}
+
+
+extern void
+message(enum message_verbosity v, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(v, fmt, ap);
+ va_end(ap);
+ return;
+}
+
+
+extern void
+message_warning(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(V_WARNING, fmt, ap);
+ va_end(ap);
+
+ set_exit_status(E_WARNING);
+ return;
+}
+
+
+extern void
+message_error(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(V_ERROR, fmt, ap);
+ va_end(ap);
+
+ set_exit_status(E_ERROR);
+ return;
+}
+
+
+extern void
+message_fatal(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vmessage(V_ERROR, fmt, ap);
+ va_end(ap);
+
+ my_exit(E_ERROR);
+}
+
+
+extern void
+message_bug(void)
+{
+ message_fatal(_("Internal error (bug)"));
+}
+
+
+extern void
+message_signal_handler(void)
+{
+ message_fatal(_("Cannot establish signal handlers"));
+}
+
+
+extern const char *
+message_strm(lzma_ret code)
+{
+ switch (code) {
+ case LZMA_NO_CHECK:
+ return _("No integrity check; not verifying file integrity");
+
+ case LZMA_UNSUPPORTED_CHECK:
+ return _("Unsupported type of integrity check; "
+ "not verifying file integrity");
+
+ case LZMA_MEM_ERROR:
+ return strerror(ENOMEM);
+
+ case LZMA_MEMLIMIT_ERROR:
+ return _("Memory usage limit reached");
+
+ case LZMA_FORMAT_ERROR:
+ return _("File format not recognized");
+
+ case LZMA_OPTIONS_ERROR:
+ return _("Unsupported options");
+
+ case LZMA_DATA_ERROR:
+ return _("Compressed data is corrupt");
+
+ case LZMA_BUF_ERROR:
+ return _("Unexpected end of input");
+
+ case LZMA_OK:
+ case LZMA_STREAM_END:
+ case LZMA_GET_CHECK:
+ case LZMA_PROG_ERROR:
+ return _("Internal error (bug)");
+ }
+
+ return NULL;
+}
+
+
+extern void
+message_try_help(void)
+{
+ // Print this with V_WARNING instead of V_ERROR to prevent it from
+ // showing up when --quiet has been specified.
+ message(V_WARNING, _("Try `%s --help' for more information."), argv0);
+ return;
+}
+
+
+extern void
+message_version(void)
+{
+ // It is possible that liblzma version is different than the command
+ // line tool version, so print both.
+ printf("xz " PACKAGE_VERSION "\n");
+ printf("liblzma %s\n", lzma_version_string());
+ my_exit(E_SUCCESS);
+}
+
+
+extern void
+message_help(bool long_help)
+{
+ printf(_("Usage: %s [OPTION]... [FILE]...\n"
+ "Compress or decompress FILEs in the .xz format.\n\n"),
+ argv0);
+
+ puts(_("Mandatory arguments to long options are mandatory for "
+ "short options too.\n"));
+
+ if (long_help)
+ puts(_(" Operation mode:\n"));
+
+ puts(_(
+" -z, --compress force compression\n"
+" -d, --decompress force decompression\n"
+" -t, --test test compressed file integrity\n"
+" -l, --list list information about files"));
+
+ if (long_help)
+ puts(_("\n Operation modifiers:\n"));
+
+ puts(_(
+" -k, --keep keep (don't delete) input files\n"
+" -f, --force force overwrite of output file and (de)compress links\n"
+" -c, --stdout write to standard output and don't delete input files"));
+
+ if (long_help)
+ puts(_(
+" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
+" --files=[FILE] read filenames to process from FILE; if FILE is\n"
+" omitted, filenames are read from the standard input;\n"
+" filenames must be terminated with the newline character\n"
+" --files0=[FILE] like --files but use the null character as terminator"));
+
+ if (long_help) {
+ puts(_("\n Basic file format and compression options:\n"));
+ puts(_(
+" -F, --format=FMT file format to encode or decode; possible values are\n"
+" `auto' (default), `xz', `lzma', and `raw'\n"
+" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n"
+" or `sha256'"));
+ }
+
+ puts(_(
+" -p, --preset=NUM compression preset: 1-2 fast compression, 3-6 good\n"
+" compression, 7-9 excellent compression; default is 7"));
+
+ puts(_(
+" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
+" the default setting, which depends on the operation mode\n"
+" and the amount of physical memory (RAM)"));
+
+ if (long_help) {
+ puts(_(
+"\n Custom filter chain for compression (alternative for using presets):"));
+
+#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \
+ || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
+ puts(_(
+"\n"
+" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n"
+" --lzma2=[OPTS] more of the following options (valid values; default):\n"
+" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n"
+" lc=NUM number of literal context bits (0-4; 3)\n"
+" lp=NUM number of literal position bits (0-4; 0)\n"
+" pb=NUM number of position bits (0-4; 2)\n"
+" mode=MODE compression mode (fast, normal; normal)\n"
+" nice=NUM nice length of a match (2-273; 64)\n"
+" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n"
+" depth=NUM maximum search depth; 0=automatic (default)"));
+#endif
+
+ puts(_(
+"\n"
+" --x86 x86 filter (sometimes called BCJ filter)\n"
+" --powerpc PowerPC (big endian) filter\n"
+" --ia64 IA64 (Itanium) filter\n"
+" --arm ARM filter\n"
+" --armthumb ARM-Thumb filter\n"
+" --sparc SPARC filter"));
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+ puts(_(
+"\n"
+" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n"
+" dist=NUM distance between bytes being subtracted\n"
+" from each other (1-256; 1)"));
+#endif
+
+#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK)
+ puts(_(
+"\n"
+" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n"
+" size=NUM number of bytes of data per subblock\n"
+" (1 - 256Mi; 4Ki)\n"
+" rle=NUM run-length encoder chunk size (0-256; 0)"));
+#endif
+ }
+
+/*
+ if (long_help)
+ puts(_(
+"\n"
+" Resource usage options:\n"
+"\n"
+" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n"
+" the default setting, which depends on the operation mode\n"
+" and the amount of physical memory (RAM)\n"
+" -T, --threads=NUM use a maximum of NUM (de)compression threads"
+// " --threading=STR threading style; possible values are `auto' (default),\n"
+// " `files', and `stream'
+));
+*/
+ if (long_help)
+ puts(_("\n Other options:\n"));
+
+ puts(_(
+" -q, --quiet suppress warnings; specify twice to suppress errors too\n"
+" -v, --verbose be verbose; specify twice for even more verbose"));
+
+ if (long_help)
+ puts(_(
+"\n"
+" -h, --help display the short help (lists only the basic options)\n"
+" -H, --long-help display this long help"));
+ else
+ puts(_(
+" -h, --help display this short help\n"
+" -H, --long-help display the long help (lists also the advanced options)"));
+
+ puts(_(
+" -V, --version display the version number"));
+
+ puts(_("\nWith no FILE, or when FILE is -, read standard input.\n"));
+
+ if (long_help) {
+ // FIXME !!!
+ size_t mem_limit = hardware_memlimit_encoder() / (1024 * 1024);
+ if (mem_limit == 0)
+ mem_limit = 1;
+
+ // We use PRIu64 instead of %zu to support pre-C99 libc.
+ // FIXME: Use ' but avoid warnings.
+ puts(_("On this system and configuration, the tool will use"));
+ printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"),
+ (uint64_t)(mem_limit));
+ printf(N_(" * at maximum of one thread for (de)compression.\n\n",
+ " * at maximum of %" PRIu64
+ " threads for (de)compression.\n\n",
+ (uint64_t)(opt_threads)), (uint64_t)(opt_threads));
+ }
+
+ printf(_("Report bugs to <%s> (in English or Finnish).\n"),
+ PACKAGE_BUGREPORT);
+
+ my_exit(E_SUCCESS);
+}
diff --git a/src/xz/message.h b/src/xz/message.h
new file mode 100644
index 00000000..7ef9b165
--- /dev/null
+++ b/src/xz/message.h
@@ -0,0 +1,132 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file message.h
+/// \brief Printing messages to stderr
+//
+// Copyright (C) 2007-2008 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef MESSAGE_H
+#define MESSAGE_H
+
+
+/// Verbosity levels
+enum message_verbosity {
+ V_SILENT, ///< No messages
+ V_ERROR, ///< Only error messages
+ V_WARNING, ///< Errors and warnings
+ V_VERBOSE, ///< Errors, warnings, and verbose statistics
+ V_DEBUG, ///< Debugging, FIXME remove?
+};
+
+
+/// \brief Initializes the message functions
+///
+/// \param argv0 Name of the program i.e. argv[0] from main()
+/// \param verbosity Verbosity level
+///
+/// If an error occurs, this function doesn't return.
+///
+extern void message_init(const char *argv0);
+
+
+/// Increase verbosity level by one step unless it was at maximum.
+extern void message_verbosity_increase(void);
+
+/// Decrease verbosity level by one step unless it was at minimum.
+extern void message_verbosity_decrease(void);
+
+
+/// Set the total number of files to be processed (stdin is counted as a file
+/// here). The default is one.
+extern void message_set_files(unsigned int files);
+
+
+/// \brief Print a message if verbosity level is at least "verbosity"
+///
+/// This doesn't touch the exit status.
+extern void message(enum message_verbosity verbosity, const char *fmt, ...)
+ lzma_attribute((format(printf, 2, 3)));
+
+
+/// \brief Prints a warning and possibly sets exit status
+///
+/// The message is printed only if verbosity level is at least V_WARNING.
+/// The exit status is set to WARNING unless it was already at ERROR.
+extern void message_warning(const char *fmt, ...)
+ lzma_attribute((format(printf, 1, 2)));
+
+
+/// \brief Prints an error message and sets exit status
+///
+/// The message is printed only if verbosity level is at least V_ERROR.
+/// The exit status is set to ERROR.
+extern void message_error(const char *fmt, ...)
+ lzma_attribute((format(printf, 1, 2)));
+
+
+/// \brief Prints an error message and exits with EXIT_ERROR
+///
+/// The message is printed only if verbosity level is at least V_ERROR.
+extern void message_fatal(const char *fmt, ...)
+ lzma_attribute((format(printf, 1, 2)))
+ lzma_attribute((noreturn));
+
+
+/// Print an error message that an internal error occurred and exit with
+/// EXIT_ERROR.
+extern void message_bug(void) lzma_attribute((noreturn));
+
+
+/// Print a message that establishing signal handlers failed, and exit with
+/// exit status ERROR.
+extern void message_signal_handler(void) lzma_attribute((noreturn));
+
+
+/// Converts lzma_ret to a string.
+extern const char *message_strm(lzma_ret code);
+
+
+/// Print a message that user should try --help.
+extern void message_try_help(void);
+
+
+/// Prints the version number to stdout and exits with exit status SUCCESS.
+extern void message_version(void) lzma_attribute((noreturn));
+
+
+/// Print the help message.
+extern void message_help(bool long_help) lzma_attribute((noreturn));
+
+
+///
+extern void message_progress_start(const char *filename, uint64_t in_size);
+
+
+///
+extern void message_progress_update(uint64_t in_pos, uint64_t out_pos);
+
+
+/// \brief Finishes the progress message if we were in verbose mode
+///
+/// \param in_pos Final input position i.e. how much input there was.
+/// \param out_pos Final output position
+/// \param success True if the operation was successful. We don't
+/// print the final progress message if the operation
+/// wasn't successful.
+///
+extern void message_progress_end(
+ uint64_t in_pos, uint64_t out_pos, bool success);
+
+#endif
diff --git a/src/xz/options.c b/src/xz/options.c
new file mode 100644
index 00000000..77ebddd6
--- /dev/null
+++ b/src/xz/options.c
@@ -0,0 +1,352 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file options.c
+/// \brief Parser for filter-specific options
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+
+///////////////////
+// Generic stuff //
+///////////////////
+
+typedef struct {
+ const char *name;
+ uint64_t id;
+} name_id_map;
+
+
+typedef struct {
+ const char *name;
+ const name_id_map *map;
+ uint64_t min;
+ uint64_t max;
+} option_map;
+
+
+/// Parses option=value pairs that are separated with colons, semicolons,
+/// or commas: opt=val:opt=val;opt=val,opt=val
+///
+/// Each option is a string, that is converted to an integer using the
+/// index where the option string is in the array.
+///
+/// Value can be either a number with minimum and maximum value limit, or
+/// a string-id map mapping a list of possible string values to integers.
+///
+/// When parsing both option and value succeed, a filter-specific function
+/// is called, which should update the given value to filter-specific
+/// options structure.
+///
+/// \param str String containing the options from the command line
+/// \param opts Filter-specific option map
+/// \param set Filter-specific function to update filter_options
+/// \param filter_options Pointer to filter-specific options structure
+///
+/// \return Returns only if no errors occur.
+///
+static void
+parse_options(const char *str, const option_map *opts,
+ void (*set)(void *filter_options,
+ uint32_t key, uint64_t value),
+ void *filter_options)
+{
+ if (str == NULL || str[0] == '\0')
+ return;
+
+ char *s = xstrdup(str);
+ char *name = s;
+
+ while (true) {
+ char *split = strchr(name, ',');
+ if (split != NULL)
+ *split = '\0';
+
+ char *value = strchr(name, '=');
+ if (value != NULL)
+ *value++ = '\0';
+
+ if (value == NULL || value[0] == '\0')
+ message_fatal(_("%s: Options must be `name=value' "
+ "pairs separated with commas"), str);
+
+ // Look for the option name from the option map.
+ bool found = false;
+ for (size_t i = 0; opts[i].name != NULL; ++i) {
+ if (strcmp(name, opts[i].name) != 0)
+ continue;
+
+ if (opts[i].map == NULL) {
+ // value is an integer.
+ const uint64_t v = str_to_uint64(name, value,
+ opts[i].min, opts[i].max);
+ set(filter_options, i, v);
+ } else {
+ // value is a string which we should map
+ // to an integer.
+ size_t j;
+ for (j = 0; opts[i].map[j].name != NULL; ++j) {
+ if (strcmp(opts[i].map[j].name, value)
+ == 0)
+ break;
+ }
+
+ if (opts[i].map[j].name == NULL)
+ message_fatal(_("%s: Invalid option "
+ "value"), value);
+
+ set(filter_options, i, opts[i].map[j].id);
+ }
+
+ found = true;
+ break;
+ }
+
+ if (!found)
+ message_fatal(_("%s: Invalid option name"), name);
+
+ if (split == NULL)
+ break;
+
+ name = split + 1;
+ }
+
+ free(s);
+ return;
+}
+
+
+//////////////
+// Subblock //
+//////////////
+
+enum {
+ OPT_SIZE,
+ OPT_RLE,
+ OPT_ALIGN,
+};
+
+
+static void
+set_subblock(void *options, uint32_t key, uint64_t value)
+{
+ lzma_options_subblock *opt = options;
+
+ switch (key) {
+ case OPT_SIZE:
+ opt->subblock_data_size = value;
+ break;
+
+ case OPT_RLE:
+ opt->rle = value;
+ break;
+
+ case OPT_ALIGN:
+ opt->alignment = value;
+ break;
+ }
+}
+
+
+extern lzma_options_subblock *
+options_subblock(const char *str)
+{
+ static const option_map opts[] = {
+ { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN,
+ LZMA_SUBBLOCK_DATA_SIZE_MAX },
+ { "rle", NULL, LZMA_SUBBLOCK_RLE_OFF,
+ LZMA_SUBBLOCK_RLE_MAX },
+ { "align",NULL, LZMA_SUBBLOCK_ALIGNMENT_MIN,
+ LZMA_SUBBLOCK_ALIGNMENT_MAX },
+ { NULL, NULL, 0, 0 }
+ };
+
+ lzma_options_subblock *options
+ = xmalloc(sizeof(lzma_options_subblock));
+ *options = (lzma_options_subblock){
+ .allow_subfilters = false,
+ .alignment = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT,
+ .subblock_data_size = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT,
+ .rle = LZMA_SUBBLOCK_RLE_OFF,
+ };
+
+ parse_options(str, opts, &set_subblock, options);
+
+ return options;
+}
+
+
+///////////
+// Delta //
+///////////
+
+enum {
+ OPT_DIST,
+};
+
+
+static void
+set_delta(void *options, uint32_t key, uint64_t value)
+{
+ lzma_options_delta *opt = options;
+ switch (key) {
+ case OPT_DIST:
+ opt->dist = value;
+ break;
+ }
+}
+
+
+extern lzma_options_delta *
+options_delta(const char *str)
+{
+ static const option_map opts[] = {
+ { "dist", NULL, LZMA_DELTA_DIST_MIN,
+ LZMA_DELTA_DIST_MAX },
+ { NULL, NULL, 0, 0 }
+ };
+
+ lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta));
+ *options = (lzma_options_delta){
+ // It's hard to give a useful default for this.
+ .type = LZMA_DELTA_TYPE_BYTE,
+ .dist = LZMA_DELTA_DIST_MIN,
+ };
+
+ parse_options(str, opts, &set_delta, options);
+
+ return options;
+}
+
+
+//////////
+// LZMA //
+//////////
+
+enum {
+ OPT_DICT,
+ OPT_LC,
+ OPT_LP,
+ OPT_PB,
+ OPT_MODE,
+ OPT_NICE,
+ OPT_MF,
+ OPT_DEPTH,
+};
+
+
+static void
+set_lzma(void *options, uint32_t key, uint64_t value)
+{
+ lzma_options_lzma *opt = options;
+
+ switch (key) {
+ case OPT_DICT:
+ opt->dict_size = value;
+ break;
+
+ case OPT_LC:
+ opt->lc = value;
+ break;
+
+ case OPT_LP:
+ opt->lp = value;
+ break;
+
+ case OPT_PB:
+ opt->pb = value;
+ break;
+
+ case OPT_MODE:
+ opt->mode = value;
+ break;
+
+ case OPT_NICE:
+ opt->nice_len = value;
+ break;
+
+ case OPT_MF:
+ opt->mf = value;
+ break;
+
+ case OPT_DEPTH:
+ opt->depth = value;
+ break;
+ }
+}
+
+
+extern lzma_options_lzma *
+options_lzma(const char *str)
+{
+ static const name_id_map modes[] = {
+ { "fast", LZMA_MODE_FAST },
+ { "normal", LZMA_MODE_NORMAL },
+ { NULL, 0 }
+ };
+
+ static const name_id_map mfs[] = {
+ { "hc3", LZMA_MF_HC3 },
+ { "hc4", LZMA_MF_HC4 },
+ { "bt2", LZMA_MF_BT2 },
+ { "bt3", LZMA_MF_BT3 },
+ { "bt4", LZMA_MF_BT4 },
+ { NULL, 0 }
+ };
+
+ static const option_map opts[] = {
+ { "dict", NULL, LZMA_DICT_SIZE_MIN,
+ (UINT32_C(1) << 30) + (UINT32_C(1) << 29) },
+ { "lc", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX },
+ { "lp", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX },
+ { "pb", NULL, LZMA_PB_MIN, LZMA_PB_MAX },
+ { "mode", modes, 0, 0 },
+ { "nice", NULL, 2, 273 },
+ { "mf", mfs, 0, 0 },
+ { "depth", NULL, 0, UINT32_MAX },
+ { NULL, NULL, 0, 0 }
+ };
+
+ // TODO There should be a way to take some preset as the base for
+ // custom settings.
+ lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma));
+ *options = (lzma_options_lzma){
+ .dict_size = LZMA_DICT_SIZE_DEFAULT,
+ .preset_dict = NULL,
+ .preset_dict_size = 0,
+ .lc = LZMA_LC_DEFAULT,
+ .lp = LZMA_LP_DEFAULT,
+ .pb = LZMA_PB_DEFAULT,
+ .persistent = false,
+ .mode = LZMA_MODE_NORMAL,
+ .nice_len = 64,
+ .mf = LZMA_MF_BT4,
+ .depth = 0,
+ };
+
+ parse_options(str, opts, &set_lzma, options);
+
+ if (options->lc + options->lp > LZMA_LCLP_MAX)
+ message_fatal(_("The sum of lc and lp must be at "
+ "maximum of 4"));
+
+ const uint32_t nice_len_min = options->mf & 0x0F;
+ if (options->nice_len < nice_len_min)
+ message_fatal(_("The selected match finder requires at "
+ "least nice=%" PRIu32), nice_len_min);
+
+ return options;
+}
diff --git a/src/xz/options.h b/src/xz/options.h
new file mode 100644
index 00000000..4253ac3c
--- /dev/null
+++ b/src/xz/options.h
@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file options.h
+/// \brief Parser for filter-specific options
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef OPTIONS_H
+#define OPTIONS_H
+
+#include "private.h"
+
+
+/// \brief Parser for Subblock options
+///
+/// \return Pointer to allocated options structure.
+/// Doesn't return on error.
+extern lzma_options_subblock *options_subblock(const char *str);
+
+
+/// \brief Parser for Delta options
+///
+/// \return Pointer to allocated options structure.
+/// Doesn't return on error.
+extern lzma_options_delta *options_delta(const char *str);
+
+
+/// \brief Parser for LZMA options
+///
+/// \return Pointer to allocated options structure.
+/// Doesn't return on error.
+extern lzma_options_lzma *options_lzma(const char *str);
+
+#endif
diff --git a/src/xz/private.h b/src/xz/private.h
new file mode 100644
index 00000000..b463a08e
--- /dev/null
+++ b/src/xz/private.h
@@ -0,0 +1,52 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file private.h
+/// \brief Common includes, definions, and prototypes
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef PRIVATE_H
+#define PRIVATE_H
+
+#include "sysdefs.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <signal.h>
+#include <locale.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#ifdef ENABLE_NLS
+# include <libintl.h>
+# define _(msgid) gettext(msgid)
+# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n)
+#else
+# define _(msgid) (msgid)
+# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2))
+#endif
+
+#include "main.h"
+#include "process.h"
+#include "message.h"
+#include "args.h"
+#include "hardware.h"
+#include "io.h"
+#include "options.h"
+#include "suffix.h"
+#include "util.h"
+
+#endif
diff --git a/src/xz/process.c b/src/xz/process.c
new file mode 100644
index 00000000..d30878e4
--- /dev/null
+++ b/src/xz/process.c
@@ -0,0 +1,391 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file process.c
+/// \brief Compresses or uncompresses a file
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+
+enum operation_mode opt_mode = MODE_COMPRESS;
+
+enum format_type opt_format = FORMAT_AUTO;
+
+
+/// Stream used to communicate with liblzma
+static lzma_stream strm = LZMA_STREAM_INIT;
+
+/// Filters needed for all encoding all formats, and also decoding in raw data
+static lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+/// Number of filters. Zero indicates that we are using a preset.
+static size_t filters_count = 0;
+
+/// Number of the preset (1-9)
+static size_t preset_number = 7;
+
+/// Indicate if no preset has been given. In that case, we will auto-adjust
+/// the compression preset so that it doesn't use too much RAM.
+// FIXME
+static bool preset_default = true;
+
+/// Integrity check type
+static lzma_check check = LZMA_CHECK_CRC64;
+
+
+extern void
+coder_set_check(lzma_check new_check)
+{
+ check = new_check;
+ return;
+}
+
+
+extern void
+coder_set_preset(size_t new_preset)
+{
+ preset_number = new_preset;
+ preset_default = false;
+ return;
+}
+
+
+extern void
+coder_add_filter(lzma_vli id, void *options)
+{
+ if (filters_count == LZMA_FILTERS_MAX)
+ message_fatal(_("Maximum number of filters is four"));
+
+ filters[filters_count].id = id;
+ filters[filters_count].options = options;
+ ++filters_count;
+
+ return;
+}
+
+
+extern void
+coder_set_compression_settings(void)
+{
+ // Options for LZMA1 or LZMA2 in case we are using a preset.
+ static lzma_options_lzma opt_lzma;
+
+ if (filters_count == 0) {
+ // We are using a preset. This is not a good idea in raw mode
+ // except when playing around with things. Different versions
+ // of this software may use different options in presets, and
+ // thus make uncompressing the raw data difficult.
+ if (opt_format == FORMAT_RAW) {
+ // The message is shown only if warnings are allowed
+ // but the exit status isn't changed.
+ message(V_WARNING, _("Using a preset in raw mode "
+ "is discouraged."));
+ message(V_WARNING, _("The exact options of the "
+ "presets may vary between software "
+ "versions."));
+ }
+
+ // Get the preset for LZMA1 or LZMA2.
+ if (lzma_lzma_preset(&opt_lzma, preset_number))
+ message_bug();
+
+ // Use LZMA2 except with --format=lzma we use LZMA1.
+ filters[0].id = opt_format == FORMAT_LZMA
+ ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
+ filters[0].options = &opt_lzma;
+ filters_count = 1;
+ }
+
+ // Terminate the filter options array.
+ filters[filters_count].id = LZMA_VLI_UNKNOWN;
+
+ // If we are using the LZMA_Alone format, allow exactly one filter
+ // which has to be LZMA.
+ if (opt_format == FORMAT_LZMA && (filters_count != 1
+ || filters[0].id != LZMA_FILTER_LZMA1))
+ message_fatal(_("With --format=lzma only the LZMA1 filter "
+ "is supported"));
+
+ // TODO: liblzma probably needs an API to validate the filter chain.
+
+ // If using --format=raw, we can be decoding.
+ uint64_t memory_usage;
+ uint64_t memory_limit;
+ if (opt_mode == MODE_COMPRESS) {
+ memory_usage = lzma_memusage_encoder(filters);
+ memory_limit = hardware_memlimit_encoder();
+ } else {
+ memory_usage = lzma_memusage_decoder(filters);
+ memory_limit = hardware_memlimit_decoder();
+ }
+
+ if (memory_usage == UINT64_MAX)
+ message_bug();
+
+ if (preset_default) {
+ // When no preset was explicitly requested, we use the default
+ // preset only if the memory usage limit allows. Otherwise we
+ // select a lower preset automatically.
+ while (memory_usage > memory_limit) {
+ if (preset_number == 1)
+ message_fatal(_("Memory usage limit is too "
+ "small for any internal "
+ "filter preset"));
+
+ if (lzma_lzma_preset(&opt_lzma, --preset_number))
+ message_bug();
+
+ memory_usage = lzma_memusage_encoder(filters);
+ }
+ } else {
+ if (memory_usage > memory_limit)
+ message_fatal(_("Memory usage limit is too small "
+ "for the given filter setup"));
+ }
+
+ // Limit the number of worked threads so that memory usage
+ // limit isn't exceeded.
+ assert(memory_usage > 0);
+ size_t thread_limit = memory_limit / memory_usage;
+ if (thread_limit == 0)
+ thread_limit = 1;
+
+ if (opt_threads > thread_limit)
+ opt_threads = thread_limit;
+
+ return;
+}
+
+
+static bool
+coder_init(void)
+{
+ lzma_ret ret = LZMA_PROG_ERROR;
+
+ if (opt_mode == MODE_COMPRESS) {
+ switch (opt_format) {
+ case FORMAT_AUTO:
+ // args.c ensures this.
+ assert(0);
+ break;
+
+ case FORMAT_XZ:
+ ret = lzma_stream_encoder(&strm, filters, check);
+ break;
+
+ case FORMAT_LZMA:
+ ret = lzma_alone_encoder(&strm, filters[0].options);
+ break;
+
+ case FORMAT_RAW:
+ ret = lzma_raw_encoder(&strm, filters);
+ break;
+ }
+ } else {
+ const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK
+ | LZMA_CONCATENATED;
+
+ switch (opt_format) {
+ case FORMAT_AUTO:
+ ret = lzma_auto_decoder(&strm,
+ hardware_memlimit_decoder(), flags);
+ break;
+
+ case FORMAT_XZ:
+ ret = lzma_stream_decoder(&strm,
+ hardware_memlimit_decoder(), flags);
+ break;
+
+ case FORMAT_LZMA:
+ ret = lzma_alone_decoder(&strm,
+ hardware_memlimit_decoder());
+ break;
+
+ case FORMAT_RAW:
+ // Memory usage has already been checked in args.c.
+ // FIXME Comment
+ ret = lzma_raw_decoder(&strm, filters);
+ break;
+ }
+ }
+
+ if (ret != LZMA_OK) {
+ if (ret == LZMA_MEM_ERROR)
+ message_error("%s", message_strm(LZMA_MEM_ERROR));
+ else
+ message_bug();
+
+ return true;
+ }
+
+ return false;
+}
+
+
+static bool
+coder_run(file_pair *pair)
+{
+ // Buffers to hold input and output data.
+ uint8_t in_buf[IO_BUFFER_SIZE];
+ uint8_t out_buf[IO_BUFFER_SIZE];
+
+ // Initialize the progress indicator.
+ const uint64_t in_size = pair->src_st.st_size <= (off_t)(0)
+ ? 0 : (uint64_t)(pair->src_st.st_size);
+ message_progress_start(pair->src_name, in_size);
+
+ lzma_action action = LZMA_RUN;
+ lzma_ret ret;
+
+ strm.avail_in = 0;
+ strm.next_out = out_buf;
+ strm.avail_out = IO_BUFFER_SIZE;
+
+ while (!user_abort) {
+ // Fill the input buffer if it is empty and we haven't reached
+ // end of file yet.
+ if (strm.avail_in == 0 && !pair->src_eof) {
+ strm.next_in = in_buf;
+ strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE);
+
+ if (strm.avail_in == SIZE_MAX)
+ break;
+
+ // Encoder needs to know when we have given all the
+ // input to it. The decoders need to know it too when
+ // we are using LZMA_CONCATENATED.
+ if (pair->src_eof)
+ action = LZMA_FINISH;
+ }
+
+ // Let liblzma do the actual work.
+ ret = lzma_code(&strm, action);
+
+ // Write out if the output buffer became full.
+ if (strm.avail_out == 0) {
+ if (opt_mode != MODE_TEST && io_write(pair, out_buf,
+ IO_BUFFER_SIZE - strm.avail_out))
+ return false;
+
+ strm.next_out = out_buf;
+ strm.avail_out = IO_BUFFER_SIZE;
+ }
+
+ if (ret != LZMA_OK) {
+ // Determine if the return value indicates that we
+ // won't continue coding.
+ const bool stop = ret != LZMA_NO_CHECK
+ && ret != LZMA_UNSUPPORTED_CHECK;
+
+ if (stop) {
+ // First print the final progress info.
+ // This way the user sees more accurately
+ // where the error occurred. Note that we
+ // print this *before* the possible error
+ // message.
+ //
+ // FIXME: What if something goes wrong
+ // after this?
+ message_progress_end(strm.total_in,
+ strm.total_out,
+ ret == LZMA_STREAM_END);
+
+ // Write the remaining bytes even if something
+ // went wrong, because that way the user gets
+ // as much data as possible, which can be good
+ // when trying to get at least some useful
+ // data out of damaged files.
+ if (opt_mode != MODE_TEST && io_write(pair,
+ out_buf, IO_BUFFER_SIZE
+ - strm.avail_out))
+ return false;
+ }
+
+ if (ret == LZMA_STREAM_END) {
+ // Check that there is no trailing garbage.
+ // This is needed for LZMA_Alone and raw
+ // streams.
+ if (strm.avail_in == 0 && (pair->src_eof
+ || io_read(pair, in_buf, 1)
+ == 0)) {
+ assert(pair->src_eof);
+ return true;
+ }
+
+ // FIXME: What about io_read() failing?
+
+ // We hadn't reached the end of the file.
+ ret = LZMA_DATA_ERROR;
+ assert(stop);
+ }
+
+ // If we get here and stop is true, something went
+ // wrong and we print an error. Otherwise it's just
+ // a warning and coding can continue.
+ if (stop) {
+ message_error("%s: %s", pair->src_name,
+ message_strm(ret));
+ } else {
+ message_warning("%s: %s", pair->src_name,
+ message_strm(ret));
+
+ // When compressing, all possible errors set
+ // stop to true.
+ assert(opt_mode != MODE_COMPRESS);
+ }
+
+ if (ret == LZMA_MEMLIMIT_ERROR) {
+ // Figure out how much memory would have
+ // actually needed.
+ // TODO
+ }
+
+ if (stop)
+ return false;
+ }
+
+ // Show progress information if --verbose was specified and
+ // stderr is a terminal.
+ message_progress_update(strm.total_in, strm.total_out);
+ }
+
+ return false;
+}
+
+
+extern void
+process_file(const char *filename)
+{
+ // First try initializing the coder. If it fails, it's useless to try
+ // opening the file. Check also for user_abort just in case if we had
+ // got a signal while initializing the coder.
+ if (coder_init() || user_abort)
+ return;
+
+ // Try to open the input and output files.
+ file_pair *pair = io_open(filename);
+ if (pair == NULL)
+ return;
+
+ // Do the actual coding.
+ const bool success = coder_run(pair);
+
+ // Close the file pair. It needs to know if coding was successful to
+ // know if the source or target file should be unlinked.
+ io_close(pair, success);
+
+ return;
+}
diff --git a/src/xz/process.h b/src/xz/process.h
new file mode 100644
index 00000000..de23eacb
--- /dev/null
+++ b/src/xz/process.h
@@ -0,0 +1,70 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file process.c
+/// \brief Compresses or uncompresses a file
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef PROCESS_H
+#define PROCESS_H
+
+#include "private.h"
+
+
+enum operation_mode {
+ MODE_COMPRESS,
+ MODE_DECOMPRESS,
+ MODE_TEST,
+ MODE_LIST,
+};
+
+
+// NOTE: The order of these is significant in suffix.c.
+enum format_type {
+ FORMAT_AUTO,
+ FORMAT_XZ,
+ FORMAT_LZMA,
+ // HEADER_GZIP,
+ FORMAT_RAW,
+};
+
+
+/// Operation mode of the command line tool. This is set in args.c and read
+/// in several files.
+extern enum operation_mode opt_mode;
+
+/// File format to use when encoding or what format(s) to accept when
+/// decoding. This is a global because it's needed also in suffix.c.
+/// This is set in args.c.
+extern enum format_type opt_format;
+
+
+/// Set the integrity check type used when compressing
+extern void coder_set_check(lzma_check check);
+
+/// Set preset number
+extern void coder_set_preset(size_t new_preset);
+
+/// Add a filter to the custom filter chain
+extern void coder_add_filter(lzma_vli id, void *options);
+
+///
+extern void coder_set_compression_settings(void);
+
+extern void process_init(void);
+
+extern void process_file(const char *filename);
+
+#endif
diff --git a/src/xz/suffix.c b/src/xz/suffix.c
new file mode 100644
index 00000000..0d46855a
--- /dev/null
+++ b/src/xz/suffix.c
@@ -0,0 +1,213 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file suffix.c
+/// \brief Checks filename suffix and creates the destination filename
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+
+static char *custom_suffix = NULL;
+
+
+struct suffix_pair {
+ const char *compressed;
+ const char *uncompressed;
+};
+
+
+/// \brief Checks if src_name has given compressed_suffix
+///
+/// \param suffix Filename suffix to look for
+/// \param src_name Input filename
+/// \param src_len strlen(src_name)
+///
+/// \return If src_name has the suffix, src_len - strlen(suffix) is
+/// returned. It's always a positive integer. Otherwise zero
+/// is returned.
+static size_t
+test_suffix(const char *suffix, const char *src_name, size_t src_len)
+{
+ const size_t suffix_len = strlen(suffix);
+
+ // The filename must have at least one character in addition to
+ // the suffix. src_name may contain path to the filename, so we
+ // need to check for directory separator too.
+ if (src_len <= suffix_len || src_name[src_len - suffix_len - 1] == '/')
+ return 0;
+
+ if (strcmp(suffix, src_name + src_len - suffix_len) == 0)
+ return src_len - suffix_len;
+
+ return 0;
+}
+
+
+/// \brief Removes the filename suffix of the compressed file
+///
+/// \return Name of the uncompressed file, or NULL if file has unknown
+/// suffix.
+static char *
+uncompressed_name(const char *src_name, const size_t src_len)
+{
+ static const struct suffix_pair suffixes[] = {
+ { ".xz", "" },
+ { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare.
+ { ".lzma", "" },
+ { ".tlz", ".tar" },
+ // { ".gz", "" },
+ // { ".tgz", ".tar" },
+ };
+
+ const char *new_suffix = "";
+ size_t new_len = 0;
+
+ if (opt_format == FORMAT_RAW) {
+ // Don't check for known suffixes when --format=raw was used.
+ if (custom_suffix == NULL) {
+ message_error(_("%s: With --format=raw, "
+ "--suffix=.SUF is required unless "
+ "writing to stdout"), src_name);
+ return NULL;
+ }
+ } else {
+ for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
+ new_len = test_suffix(suffixes[i].compressed,
+ src_name, src_len);
+ if (new_len != 0) {
+ new_suffix = suffixes[i].uncompressed;
+ break;
+ }
+ }
+ }
+
+ if (new_len == 0 && custom_suffix != NULL)
+ new_len = test_suffix(custom_suffix, src_name, src_len);
+
+ if (new_len == 0) {
+ message_warning(_("%s: Filename has an unknown suffix, "
+ "skipping"), src_name);
+ return NULL;
+ }
+
+ const size_t new_suffix_len = strlen(new_suffix);
+ char *dest_name = xmalloc(new_len + new_suffix_len + 1);
+
+ memcpy(dest_name, src_name, new_len);
+ memcpy(dest_name + new_len, new_suffix, new_suffix_len);
+ dest_name[new_len + new_suffix_len] = '\0';
+
+ return dest_name;
+}
+
+
+/// \brief Appends suffix to src_name
+///
+/// In contrast to uncompressed_name(), we check only suffixes that are valid
+/// for the specified file format.
+static char *
+compressed_name(const char *src_name, const size_t src_len)
+{
+ // The order of these must match the order in args.h.
+ static const struct suffix_pair all_suffixes[][3] = {
+ {
+ { ".xz", "" },
+ { ".txz", ".tar" },
+ { NULL, NULL }
+ }, {
+ { ".lzma", "" },
+ { ".tlz", ".tar" },
+ { NULL, NULL }
+/*
+ }, {
+ { ".gz", "" },
+ { ".tgz", ".tar" },
+ { NULL, NULL }
+*/
+ }, {
+ // --format=raw requires specifying the suffix
+ // manually or using stdout.
+ { NULL, NULL }
+ }
+ };
+
+ // args.c ensures this.
+ assert(opt_format != FORMAT_AUTO);
+
+ const size_t format = opt_format - 1;
+ const struct suffix_pair *const suffixes = all_suffixes[format];
+
+ for (size_t i = 0; suffixes[i].compressed != NULL; ++i) {
+ if (test_suffix(suffixes[i].compressed, src_name, src_len)
+ != 0) {
+ message_warning(_("%s: File already has `%s' "
+ "suffix, skipping"), src_name,
+ suffixes[i].compressed);
+ return NULL;
+ }
+ }
+
+ // TODO: Hmm, maybe it would be better to validate this in args.c,
+ // since the suffix handling when decoding is weird now.
+ if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
+ message_error(_("%s: With --format=raw, "
+ "--suffix=.SUF is required unless "
+ "writing to stdout"), src_name);
+ return NULL;
+ }
+
+ const char *suffix = custom_suffix != NULL
+ ? custom_suffix : suffixes[0].compressed;
+ const size_t suffix_len = strlen(suffix);
+
+ char *dest_name = xmalloc(src_len + suffix_len + 1);
+
+ memcpy(dest_name, src_name, src_len);
+ memcpy(dest_name + src_len, suffix, suffix_len);
+ dest_name[src_len + suffix_len] = '\0';
+
+ return dest_name;
+}
+
+
+extern char *
+suffix_get_dest_name(const char *src_name)
+{
+ assert(src_name != NULL);
+
+ // Length of the name is needed in all cases to locate the end of
+ // the string to compare the suffix, so calculate the length here.
+ const size_t src_len = strlen(src_name);
+
+ return opt_mode == MODE_COMPRESS
+ ? compressed_name(src_name, src_len)
+ : uncompressed_name(src_name, src_len);
+}
+
+
+extern void
+suffix_set(const char *suffix)
+{
+ // Empty suffix and suffixes having a slash are rejected. Such
+ // suffixes would break things later.
+ if (suffix[0] == '\0' || strchr(suffix, '/') != NULL)
+ message_fatal(_("%s: Invalid filename suffix"), optarg);
+
+ // Replace the old custom_suffix (if any) with the new suffix.
+ free(custom_suffix);
+ custom_suffix = xstrdup(suffix);
+ return;
+}
diff --git a/src/xz/suffix.h b/src/xz/suffix.h
new file mode 100644
index 00000000..c92b92dc
--- /dev/null
+++ b/src/xz/suffix.h
@@ -0,0 +1,40 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file suffix.h
+/// \brief Checks filename suffix and creates the destination filename
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef SUFFIX_H
+#define SUFFIX_H
+
+/// \brief Get the name of the destination file
+///
+/// Depending on the global variable opt_mode, this tries to find a matching
+/// counterpart for src_name. If the name can be constructed, it is allocated
+/// and returned (caller must free it). On error, a message is printed and
+/// NULL is returned.
+extern char *suffix_get_dest_name(const char *src_name);
+
+
+/// \brief Set a custom filename suffix
+///
+/// This function calls xstrdup() for the given suffix, thus the caller
+/// doesn't need to keep the memory allocated. There can be only one custom
+/// suffix, thus if this is called multiple times, the old suffixes are freed
+/// and forgotten.
+extern void suffix_set(const char *suffix);
+
+#endif
diff --git a/src/xz/util.c b/src/xz/util.c
new file mode 100644
index 00000000..13b67925
--- /dev/null
+++ b/src/xz/util.c
@@ -0,0 +1,199 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file util.c
+/// \brief Miscellaneous utility functions
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "private.h"
+
+
+extern void *
+xrealloc(void *ptr, size_t size)
+{
+ assert(size > 0);
+
+ ptr = realloc(ptr, size);
+ if (ptr == NULL)
+ message_fatal("%s", strerror(errno));
+
+ return ptr;
+}
+
+
+extern char *
+xstrdup(const char *src)
+{
+ assert(src != NULL);
+ const size_t size = strlen(src) + 1;
+ char *dest = xmalloc(size);
+ return memcpy(dest, src, size);
+}
+
+
+extern uint64_t
+str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max)
+{
+ uint64_t result = 0;
+
+ // Skip blanks.
+ while (*value == ' ' || *value == '\t')
+ ++value;
+
+ if (*value < '0' || *value > '9')
+ message_fatal(_("%s: Value is not a non-negative "
+ "decimal integer"), value);
+
+ do {
+ // Don't overflow.
+ if (result > (UINT64_MAX - 9) / 10)
+ goto error;
+
+ result *= 10;
+ result += *value - '0';
+ ++value;
+ } while (*value >= '0' && *value <= '9');
+
+ if (*value != '\0') {
+ // Look for suffix.
+ static const struct {
+ const char name[4];
+ uint64_t multiplier;
+ } suffixes[] = {
+ { "k", UINT64_C(1000) },
+ { "kB", UINT64_C(1000) },
+ { "M", UINT64_C(1000000) },
+ { "MB", UINT64_C(1000000) },
+ { "G", UINT64_C(1000000000) },
+ { "GB", UINT64_C(1000000000) },
+ { "Ki", UINT64_C(1024) },
+ { "KiB", UINT64_C(1024) },
+ { "Mi", UINT64_C(1048576) },
+ { "MiB", UINT64_C(1048576) },
+ { "Gi", UINT64_C(1073741824) },
+ { "GiB", UINT64_C(1073741824) }
+ };
+
+ uint64_t multiplier = 0;
+ for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
+ if (strcmp(value, suffixes[i].name) == 0) {
+ multiplier = suffixes[i].multiplier;
+ break;
+ }
+ }
+
+ if (multiplier == 0) {
+ message(V_ERROR, _("%s: Invalid multiplier suffix. "
+ "Valid suffixes:"), value);
+ message_fatal("`k' (10^3), `M' (10^6), `G' (10^9) "
+ "`Ki' (2^10), `Mi' (2^20), "
+ "`Gi' (2^30)");
+ }
+
+ // Don't overflow here either.
+ if (result > UINT64_MAX / multiplier)
+ goto error;
+
+ result *= multiplier;
+ }
+
+ if (result < min || result > max)
+ goto error;
+
+ return result;
+
+error:
+ message_fatal(_("Value of the option `%s' must be in the range "
+ "[%llu, %llu]"), name,
+ (unsigned long long)(min),
+ (unsigned long long)(max));
+}
+
+
+/*
+/// \brief Simple quoting to get rid of ASCII control characters
+///
+/// This is not so cool and locale-dependent, but should be good enough
+/// At least we don't print any control characters on the terminal.
+///
+extern char *
+str_quote(const char *str)
+{
+ size_t dest_len = 0;
+ bool has_ctrl = false;
+
+ while (str[dest_len] != '\0')
+ if (*(unsigned char *)(str + dest_len++) < 0x20)
+ has_ctrl = true;
+
+ char *dest = malloc(dest_len + 1);
+ if (dest != NULL) {
+ if (has_ctrl) {
+ for (size_t i = 0; i < dest_len; ++i)
+ if (*(unsigned char *)(str + i) < 0x20)
+ dest[i] = '?';
+ else
+ dest[i] = str[i];
+
+ dest[dest_len] = '\0';
+
+ } else {
+ // Usually there are no control characters,
+ // so we can optimize.
+ memcpy(dest, str, dest_len + 1);
+ }
+ }
+
+ return dest;
+}
+*/
+
+
+extern bool
+is_empty_filename(const char *filename)
+{
+ if (filename[0] == '\0') {
+ message_error(_("Empty filename, skipping"));
+ return true;
+ }
+
+ return false;
+}
+
+
+extern bool
+is_tty_stdin(void)
+{
+ const bool ret = isatty(STDIN_FILENO);
+
+ if (ret)
+ message_error(_("Compressed data not read from a terminal "
+ "unless `--force' is used."));
+
+ return ret;
+}
+
+
+extern bool
+is_tty_stdout(void)
+{
+ const bool ret = isatty(STDOUT_FILENO);
+
+ if (ret)
+ message_error(_("Compressed data not written to a terminal "
+ "unless `--force' is used."));
+
+ return ret;
+}
diff --git a/src/xz/util.h b/src/xz/util.h
new file mode 100644
index 00000000..dca62b26
--- /dev/null
+++ b/src/xz/util.h
@@ -0,0 +1,71 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file util.h
+/// \brief Miscellaneous utility functions
+//
+// Copyright (C) 2007 Lasse Collin
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef UTIL_H
+#define UTIL_H
+
+/// \brief Safe malloc() that never returns NULL
+///
+/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when
+/// there are files open for writing, that should be cleaned up
+/// before exiting.
+#define xmalloc(size) xrealloc(NULL, size)
+
+
+/// \brief Safe realloc() that never returns NULL
+extern void *xrealloc(void *ptr, size_t size);
+
+
+/// \brief Safe strdup() that never returns NULL
+extern char *xstrdup(const char *src);
+
+
+/// \brief Fancy version of strtoull()
+///
+/// \param name Name of the option to show in case of an error
+/// \param value String containing the number to be parsed; may
+/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi"
+/// \param min Minimum valid value
+/// \param max Maximum valid value
+///
+/// \return Parsed value that is in the range [min, max]. Does not return
+/// if an error occurs.
+///
+extern uint64_t str_to_uint64(const char *name, const char *value,
+ uint64_t min, uint64_t max);
+
+
+/// \brief Check if filename is empty and print an error message
+extern bool is_empty_filename(const char *filename);
+
+
+/// \brief Test if stdin is a terminal
+///
+/// If stdin is a terminal, an error message is printed and exit status set
+/// to EXIT_ERROR.
+extern bool is_tty_stdin(void);
+
+
+/// \brief Test if stdout is a terminal
+///
+/// If stdout is a terminal, an error message is printed and exit status set
+/// to EXIT_ERROR.
+extern bool is_tty_stdout(void);
+
+#endif