diff options
author | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 23:52:24 +0200 |
---|---|---|
committer | Lasse Collin <lasse.collin@tukaani.org> | 2008-11-19 23:52:24 +0200 |
commit | 1880a3927b23f265f63b2adb86fbdb81ea09eb06 (patch) | |
tree | 2fe1b65d21f81b28f46eb707378d97f553e99ee1 /src/xz | |
parent | Oh well, big messy commit again. Some highlights: (diff) | |
download | xz-1880a3927b23f265f63b2adb86fbdb81ea09eb06.tar.xz |
Renamed lzma to xz and lzmadec to xzdec. We create symlinks
lzma, unlzma, and lzcat in "make install" for backwards
compatibility with LZMA Utils 4.32.x; I'm not sure if this
should be the default though.
Diffstat (limited to 'src/xz')
-rw-r--r-- | src/xz/Makefile.am | 74 | ||||
-rw-r--r-- | src/xz/args.c | 500 | ||||
-rw-r--r-- | src/xz/args.h | 56 | ||||
-rw-r--r-- | src/xz/hardware.c | 122 | ||||
-rw-r--r-- | src/xz/hardware.h | 45 | ||||
-rw-r--r-- | src/xz/io.c | 658 | ||||
-rw-r--r-- | src/xz/io.h | 97 | ||||
-rw-r--r-- | src/xz/list.c | 477 | ||||
-rw-r--r-- | src/xz/main.c | 402 | ||||
-rw-r--r-- | src/xz/main.h | 60 | ||||
-rw-r--r-- | src/xz/message.c | 892 | ||||
-rw-r--r-- | src/xz/message.h | 132 | ||||
-rw-r--r-- | src/xz/options.c | 352 | ||||
-rw-r--r-- | src/xz/options.h | 46 | ||||
-rw-r--r-- | src/xz/private.h | 52 | ||||
-rw-r--r-- | src/xz/process.c | 391 | ||||
-rw-r--r-- | src/xz/process.h | 70 | ||||
-rw-r--r-- | src/xz/suffix.c | 213 | ||||
-rw-r--r-- | src/xz/suffix.h | 40 | ||||
-rw-r--r-- | src/xz/util.c | 199 | ||||
-rw-r--r-- | src/xz/util.h | 71 |
21 files changed, 4949 insertions, 0 deletions
diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am new file mode 100644 index 00000000..16e55461 --- /dev/null +++ b/src/xz/Makefile.am @@ -0,0 +1,74 @@ +## +## Copyright (C) 2007 Lasse Collin +## +## This program is free software; you can redistribute it and/or +## modify it under the terms of the GNU Lesser General Public +## License as published by the Free Software Foundation; either +## version 2.1 of the License, or (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## Lesser General Public License for more details. +## + +bin_PROGRAMS = xz + +xz_SOURCES = \ + args.c \ + args.h \ + hardware.c \ + hardware.h \ + io.c \ + io.h \ + main.c \ + main.h \ + message.c \ + message.h \ + options.c \ + options.h \ + private.h \ + process.c \ + process.h \ + suffix.c \ + suffix.h \ + util.c \ + util.h + +xz_CPPFLAGS = \ + -DLOCALEDIR=\"$(localedir)\" \ + -I@top_srcdir@/src/common \ + -I@top_srcdir@/src/liblzma/api \ + -I@top_builddir@/lib \ + -I@top_srcdir@/lib + +xz_CFLAGS = @PTHREAD_CFLAGS@ + +## Always link the command line tool statically against liblzma. It is +## faster on x86, because no need for PIC. We also have one dependency less, +## which allows users to more freely copy the xz binary to other boxes. +xz_LDFLAGS = -static +xz_LDADD = \ + @top_builddir@/src/liblzma/liblzma.la \ + @LTLIBINTL@ \ + @PTHREAD_LIBS@ + +if COND_GNULIB +xz_LDADD += @top_builddir@/lib/libgnu.a +endif + + +## Create symlinks for unxz and xzcat for convenicen. Create symlinks also +## for lzma, unlzma, and lzcat for compatibility with LZMA Utils 4.32.x. +install-exec-hook: + cd $(DESTDIR)$(bindir) && \ + rm -f unxz xzcat lzma unlzma lzcat && \ + $(LN_S) xz unxz && \ + $(LN_S) xz xzcat && \ + $(LN_S) xz lzma && \ + $(LN_S) xz unlzma && \ + $(LN_S) xz lzcat + +uninstall-hook: + cd $(DESTDIR)$(bindir) && \ + rm -f unxz xzcat lzma unlzma lzcat diff --git a/src/xz/args.c b/src/xz/args.c new file mode 100644 index 00000000..a2efb277 --- /dev/null +++ b/src/xz/args.c @@ -0,0 +1,500 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include <ctype.h> + + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char *stdin_filename = "(stdin)"; + + +static void +parse_real(args_info *args, int argc, char **argv) +{ + enum { + OPT_SUBBLOCK = INT_MIN, + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_FILES, + OPT_FILES0, + }; + + static const char short_opts[] = "cC:dfF:hHlLkM:p:qrS:tT:vVz123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + { "info", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "preset", required_argument, NULL, 'p' }, + { "memory", required_argument, NULL, 'M' }, + { "threads", required_argument, NULL, 'T' }, + + { "fast", no_argument, NULL, '1' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", no_argument, NULL, OPT_X86 }, + { "bcj", no_argument, NULL, OPT_X86 }, + { "powerpc", no_argument, NULL, OPT_POWERPC }, + { "ppc", no_argument, NULL, OPT_POWERPC }, + { "ia64", no_argument, NULL, OPT_IA64 }, + { "itanium", no_argument, NULL, OPT_IA64 }, + { "arm", no_argument, NULL, OPT_ARM }, + { "armthumb", no_argument, NULL, OPT_ARMTHUMB }, + { "sparc", no_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // gzip-like options + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + coder_set_preset(c - '0'); + break; + + case 'p': { + const uint64_t preset = str_to_uint64( + "preset", optarg, 1, 9); + coder_set_preset(preset); + break; + } + + // --memory + case 'M': + // On 32-bit systems, SIZE_MAX would make more sense + // than UINT64_MAX. But use UINT64_MAX still so that + // scripts that assume > 4 GiB values don't break. + hardware_memlimit_set(str_to_uint64( + "memory", optarg, 0, UINT64_MAX)); + break; + + // --suffix + case 'S': + suffix_set(optarg); + break; + + case 'T': + opt_threads = str_to_uint64("threads", optarg, + 1, SIZE_MAX); + break; + + // --version + case 'V': + // This doesn't return. + message_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --force + case 'f': + opt_force = true; + break; + + // --help + case 'h': + // This doesn't return. + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + // --quiet + case 'q': + message_verbosity_decrease(); + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + message_verbosity_increase(); + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // Filter setup + + case OPT_SUBBLOCK: + coder_add_filter(LZMA_FILTER_SUBBLOCK, + options_subblock(optarg)); + break; + + case OPT_X86: + coder_add_filter(LZMA_FILTER_X86, NULL); + break; + + case OPT_POWERPC: + coder_add_filter(LZMA_FILTER_POWERPC, NULL); + break; + + case OPT_IA64: + coder_add_filter(LZMA_FILTER_IA64, NULL); + break; + + case OPT_ARM: + coder_add_filter(LZMA_FILTER_ARM, NULL); + break; + + case OPT_ARMTHUMB: + coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL); + break; + + case OPT_SPARC: + coder_add_filter(LZMA_FILTER_SPARC, NULL); + break; + + case OPT_DELTA: + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); + break; + + case OPT_LZMA1: + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); + break; + + case OPT_LZMA2: + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); + break; + + // Other + + // --format + case 'F': { + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, + // { "gzip", FORMAT_GZIP }, + // { "gz", FORMAT_GZIP }, + { "raw", FORMAT_RAW }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " + "format type"), + optarg); + + opt_format = types[i].format; + break; + } + + // --check + case 'C': { + static const struct { + char str[8]; + lzma_check check; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown integrity" + "check type"), optarg); + } + + coder_set_check(types[i].check); + break; + } + + case OPT_FILES: + args->files_delim = '\n'; + + // Fall through + + case OPT_FILES0: + if (args->files_name != NULL) + message_fatal(_("Only one file can be " + "specified with `--files'" + "or `--files0'.")); + + if (optarg == NULL) { + args->files_name = (char *)stdin_filename; + args->files_file = stdin; + } else { + args->files_name = optarg; + args->files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (args->files_file == NULL) + message_fatal("%s: %s", optarg, + strerror(errno)); + } + + break; + + default: + message_try_help(); + my_exit(E_ERROR); + } + } + + return; +} + + +static void +parse_environment(args_info *args, char *argv0) +{ + char *env = getenv("XZ_OPT"); + if (env == NULL) + return; + + // We modify the string, so make a copy of it. + env = xstrdup(env); + + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace(env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + + // Keep argc small enough to fit into a singed int + // and to keep it usable for memory allocation. + if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "XZ_OPT contains too many " + "arguments")); + } + } + + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); + argv[0] = argv0; + argv[argc] = NULL; + + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace(env[i])) { + prev_was_space = true; + env[i] = '\0'; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); + free(env); + + return; +} + + +extern void +args_parse(args_info *args, int argc, char **argv) +{ + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; + + // Type of the file format to use when --format=auto or no --format + // was specified. + enum format_type format_compress_auto = FORMAT_XZ; + + // Check how we were called. + { + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // The default file format is .lzma if the command name + // contains "lz". + if (strstr(name, "lz") != NULL) + format_compress_auto = FORMAT_LZMA; + + // Operation mode + if (strstr(name, "cat") != NULL) { + // Imply --decompress --stdout + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "un") != NULL) { + // Imply --decompress + opt_mode = MODE_DECOMPRESS; + } + } + + // First the flags from environment + parse_environment(args, argv[0]); + + // Then from the command line + optind = 1; + parse_real(args, argc, argv); + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + // If no --format flag was used, or it was --format=auto, we need to + // decide what is the target file format we are going to use. This + // depends on how we were called (checked earlier in this function). + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = format_compress_auto; + + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. + if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) + coder_set_compression_settings(); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = argc - optind; + } + + return; +} diff --git a/src/xz/args.h b/src/xz/args.h new file mode 100644 index 00000000..6d4e8282 --- /dev/null +++ b/src/xz/args.h @@ -0,0 +1,56 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.h +/// \brief Argument parsing +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef ARGS_H +#define ARGS_H + +#include "private.h" + + +typedef struct { + /// Filenames from command line + char **arg_names; + + /// Number of filenames from command line + size_t arg_count; + + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + char *files_name; + + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; + + +extern bool opt_stdout; +extern bool opt_force; +extern bool opt_keep_original; +// extern bool opt_recursive; + +extern const char *stdin_filename; + +extern void args_parse(args_info *args, int argc, char **argv); + +#endif diff --git a/src/xz/hardware.c b/src/xz/hardware.c new file mode 100644 index 00000000..63bf0937 --- /dev/null +++ b/src/xz/hardware.c @@ -0,0 +1,122 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "physmem.h" + + +/// Maximum number of free *coder* threads. This can be set with +/// the --threads=NUM command line option. +size_t opt_threads = 1; + + +/// Memory usage limit for encoding +static uint64_t memlimit_encoder; + +/// Memory usage limit for decoding +static uint64_t memlimit_decoder; + +/// Memory usage limit given on the command line or environment variable. +/// Zero indicates the default (memlimit_encoder or memlimit_decoder). +static uint64_t memlimit_custom = 0; + + +/// Get the number of CPU cores, and set opt_threads to default to that value. +/// User can then override this with --threads command line option. +static void +hardware_cores(void) +{ +#if defined(HAVE_NUM_PROCESSORS_SYSCONF) + const long cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (cpus > 0) + opt_threads = (size_t)(cpus); + +#elif defined(HAVE_NUM_PROCESSORS_SYSCTL) + int name[2] = { CTL_HW, HW_NCPU }; + int cpus; + size_t cpus_size = sizeof(cpus); + if (!sysctl(name, &cpus, &cpus_size, NULL, NULL) + && cpus_size == sizeof(cpus) && cpus > 0) + opt_threads = (size_t)(cpus); +#endif + + // Limit opt_threads so that maximum number of threads doesn't exceed. + +#if defined(_SC_THREAD_THREADS_MAX) + const long threads_max = sysconf(_SC_THREAD_THREADS_MAX); + if (threads_max > 0 && (size_t)(threads_max) < opt_threads) + opt_threads = (size_t)(threads_max); + +#elif defined(PTHREAD_THREADS_MAX) + if (opt_threads > PTHREAD_THREADS_MAX) + opt_threads = PTHREAD_THREADS_MAX; +#endif + + return; +} + + +static void +hardware_memlimit_init(void) +{ + uint64_t mem = physmem(); + + // If we cannot determine the amount of RAM, assume 32 MiB. Maybe + // even that is too much on some systems. But on most systems it's + // far too little, and can be annoying. + if (mem == 0) + mem = UINT64_C(16) * 1024 * 1024; + + // Use at maximum of 90 % of RAM when encoding and 33 % when decoding. + memlimit_encoder = mem - mem / 10; + memlimit_decoder = mem / 3; + + return; +} + + +extern void +hardware_memlimit_set(uint64_t memlimit) +{ + memlimit_custom = memlimit; + return; +} + + +extern uint64_t +hardware_memlimit_encoder(void) +{ + return memlimit_custom != 0 ? memlimit_custom : memlimit_encoder; +} + + +extern uint64_t +hardware_memlimit_decoder(void) +{ + return memlimit_custom != 0 ? memlimit_custom : memlimit_decoder; +} + + +extern void +hardware_init(void) +{ + hardware_memlimit_init(); + hardware_cores(); + return; +} diff --git a/src/xz/hardware.h b/src/xz/hardware.h new file mode 100644 index 00000000..f604df20 --- /dev/null +++ b/src/xz/hardware.h @@ -0,0 +1,45 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef HARDWARE_H +#define HARDWARE_H + +#include "private.h" + + +extern size_t opt_threads; + + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. +extern void hardware_init(void); + + +/// Set custom memory usage limit. This is used for both encoding and +/// decoding. Zero indicates resetting the limit back to defaults. +extern void hardware_memlimit_set(uint64_t memlimit); + +/// Get the memory usage limit for encoding. By default this is 90 % of RAM. +extern uint64_t hardware_memlimit_encoder(void); + + +/// Get the memory usage limit for decoding. By default this is 30 % of RAM. +extern uint64_t hardware_memlimit_decoder(void); + +#endif diff --git a/src/xz/io.c b/src/xz/io.c new file mode 100644 index 00000000..0ec63f03 --- /dev/null +++ b/src/xz/io.c @@ -0,0 +1,658 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file io.c +/// \brief File opening, unlinking, and closing +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include <fcntl.h> + +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include <sys/time.h> +#elif defined(HAVE_UTIME) +# include <utime.h> +#endif + + +/// \brief Unlinks a file +/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). +static void +io_unlink(const char *name, const struct stat *known_st) +{ + struct stat new_st; + + if (lstat(name, &new_st) + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino) { + message_error(_("%s: File seems to be moved, not removing"), + name); + } else { + // There's a race condition between lstat() and unlink() + // but at least we have tried to avoid removing wrong file. + if (unlink(name)) + message_error(_("%s: Cannot remove: %s"), + name, strerror(errno)); + } + + return; +} + + +/// \brief Copies owner/group and permissions +/// +/// \todo ACL and EA support +/// +static void +io_copy_attrs(const file_pair *pair) +{ + // This function is more tricky than you may think at first. + // Blindly copying permissions may permit users to access the + // destination file who didn't have permission to access the + // source file. + + // Simple cache to avoid repeated calls to geteuid(). + static enum { + WARN_FCHOWN_UNKNOWN, + WARN_FCHOWN_NO, + WARN_FCHOWN_YES, + } warn_fchown = WARN_FCHOWN_UNKNOWN; + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) + && warn_fchown != WARN_FCHOWN_NO) { + if (warn_fchown == WARN_FCHOWN_UNKNOWN) + warn_fchown = geteuid() == 0 + ? WARN_FCHOWN_YES : WARN_FCHOWN_NO; + + if (warn_fchown == WARN_FCHOWN_YES) + message_warning(_("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + } + + mode_t mode; + + if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { + message_warning(_("%s: Cannot set the file group: %s"), + pair->dest_name, strerror(errno)); + // We can still safely copy some additional permissions: + // `group' must be at least as strict as `other' and + // also vice versa. + // + // NOTE: After this, the owner of the source file may + // get additional permissions. This shouldn't be too bad, + // because the owner would have had permission to chmod + // the original file anyway. + mode = ((pair->src_st.st_mode & 0070) >> 3) + & (pair->src_st.st_mode & 0007); + mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; + } else { + // Drop the setuid, setgid, and sticky bits. + mode = pair->src_st.st_mode & 0777; + } + + if (fchmod(pair->dest_fd, mode)) + message_warning(_("%s: Cannot set the file permissions: %s"), + pair->dest_name, strerror(errno)); + + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; + +# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; + +# else + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; +# endif + + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) + (void)futimes(pair->dest_fd, tv); +# elif defined(HAVE_FUTIMESAT) + (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->src_name, tv); +# endif + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. + const struct utimbuf buf = { + .actime = pair->src_st.st_atime; + .modtime = pair->src_st.st_mtime; + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->src_name, &buf); +#endif + + return; +} + + +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src(file_pair *pair) +{ + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; + return false; + } + + // We accept only regular files if we are writing the output + // to disk too, and if --force was not given. + const bool reg_files_only = !opt_stdout && !opt_force; + + // Flags for open() + int flags = O_RDONLY | O_NOCTTY; + + // If we accept only regular files, we need to be careful to avoid + // problems with special files like devices and FIFOs. O_NONBLOCK + // prevents blocking when opening such files. When we want to accept + // special files, we must not use O_NONBLOCK, or otherwise we won't + // block waiting e.g. FIFOs to become readable. + if (reg_files_only) + flags |= O_NONBLOCK; + +#ifdef O_NOFOLLOW + if (reg_files_only) + flags |= O_NOFOLLOW; +#else + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (reg_files_only) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error("%s: %s", pair->src_name, + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + return true; + } + } +#endif + + // Try to open the file. If we are accepting non-regular files, + // unblock the caught signals so that open() can be interrupted + // if it blocks e.g. due to a FIFO file. + if (!reg_files_only) + signals_unblock(); + + // Maybe this wouldn't need a loop, since all the signal handlers for + // which we don't use SA_RESTART set user_abort to true. But it + // doesn't hurt to have it just in case. + do { + pair->src_fd = open(pair->src_name, flags); + } while (pair->src_fd == -1 && errno == EINTR && !user_abort); + + if (!reg_files_only) + signals_block(); + + if (pair->src_fd == -1) { + // If we were interrupted, don't display any error message. + if (errno == EINTR) { + // All the signals that don't have SA_RESTART + // set user_abort. + assert(user_abort); + return true; + } + +#ifdef O_NOFOLLOW + // Give an understandable error message in if reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate if O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; + +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; + +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; + +# else + if (errno == ELOOP && reg_files_only) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif + + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + else +#endif + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error("%s: %s", pair->src_name, + strerror(errno)); + + return true; + } + + // Drop O_NONBLOCK, which is used only when we are accepting only + // regular files. After the open() call, we want things to block + // instead of giving EAGAIN. + if (reg_files_only) { + flags = fcntl(pair->src_fd, F_GETFL); + if (flags == -1) + goto error_msg; + + flags &= ~O_NONBLOCK; + + if (fcntl(pair->src_fd, F_SETFL, flags)) + goto error_msg; + } + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (reg_files_only) { + if (!S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, " + "skipping"), pair->src_name); + goto error; + } + + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; + } + } + + return false; + +error_msg: + message_error("%s: %s", pair->src_name, strerror(errno)); +error: + (void)close(pair->src_fd); + return true; +} + + +/// \brief Closes source file of the file_pair structure +/// +/// \param pair File whose src_fd should be closed +/// \param success If true, the file will be removed from the disk if +/// closing succeeds and --keep hasn't been used. +static void +io_close_src(file_pair *pair, bool success) +{ + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { + // If we are going to unlink(), do it before closing the file. + // This way there's no risk that someone replaces the file and + // happens to get same inode number, which would make us + // unlink() wrong file. + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + + (void)close(pair->src_fd); + } + + return; +} + + +static bool +io_open_dest(file_pair *pair) +{ + if (opt_stdout || pair->src_fd == STDIN_FILENO) { + // We don't modify or free() this. + pair->dest_name = (char *)"(stdout)"; + pair->dest_fd = STDOUT_FILENO; + return false; + } + + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; + + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error("%s: Cannot unlink: %s", + pair->dest_name, strerror(errno)); + free(pair->dest_name); + return true; + } + + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error("%s: Cannot unlink: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return true; + } + + // Open the file. + const int flags = O_WRONLY | O_NOCTTY | O_CREAT | O_EXCL; + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + // Don't bother with error message if user requested + // us to exit anyway. + if (!user_abort) + message_error("%s: %s", pair->dest_name, + strerror(errno)); + + free(pair->dest_name); + return true; + } + + // If this really fails... well, we have a safe fallback. + if (fstat(pair->dest_fd, &pair->dest_st)) { + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; + } + + return false; +} + + +/// \brief Closes destination file of the file_pair structure +/// +/// \param pair File whose dest_fd should be closed +/// \param success If false, the file will be removed from the disk. +/// +/// \return Zero if closing succeeds. On error, -1 is returned and +/// error message printed. +static int +io_close_dest(file_pair *pair, bool success) +{ + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) + return 0; + + if (close(pair->dest_fd)) { + message_error(_("%s: Closing the file failed: %s"), + pair->dest_name, strerror(errno)); + + // Closing destination file failed, so we cannot trust its + // contents. Get rid of junk: + io_unlink(pair->dest_name, &pair->dest_st); + free(pair->dest_name); + return -1; + } + + // If the operation using this file wasn't successful, we git rid + // of the junk file. + if (!success) + io_unlink(pair->dest_name, &pair->dest_st); + + free(pair->dest_name); + + return 0; +} + + +extern file_pair * +io_open(const char *src_name) +{ + if (is_empty_filename(src_name)) + return NULL; + + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; + + pair = (file_pair){ + .src_name = src_name, + .dest_name = NULL, + .src_fd = -1, + .dest_fd = -1, + .src_eof = false, + }; + + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + + file_pair *ret = NULL; + if (!io_open_src(&pair)) { + // io_open_src() may have unblocked the signals temporarily, + // and thus user_abort may have got set even if open() + // succeeded. + if (user_abort || io_open_dest(&pair)) + io_close_src(&pair, false); + else + ret = &pair; + } + + signals_unblock(); + + return ret; +} + + +extern void +io_close(file_pair *pair, bool success) +{ + signals_block(); + + if (success && pair->dest_fd != STDOUT_FILENO) + io_copy_attrs(pair); + + // Close the destination first. If it fails, we must not remove + // the source file! + if (io_close_dest(pair, success)) + success = false; + + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); + + signals_unblock(); + + return; +} + + +extern size_t +io_read(file_pair *pair, uint8_t *buf, size_t size) +{ + // We use small buffers here. + assert(size < SSIZE_MAX); + + size_t left = size; + + while (left > 0) { + const ssize_t amount = read(pair->src_fd, buf, left); + + if (amount == 0) { + pair->src_eof = true; + break; + } + + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return SIZE_MAX; + + continue; + } + + message_error(_("%s: Read error: %s"), + pair->src_name, strerror(errno)); + + // FIXME Is this needed? + pair->src_eof = true; + + return SIZE_MAX; + } + + buf += (size_t)(amount); + left -= (size_t)(amount); + } + + return size - left; +} + + +extern bool +io_write(const file_pair *pair, const uint8_t *buf, size_t size) +{ + assert(size < SSIZE_MAX); + + while (size > 0) { + const ssize_t amount = write(pair->dest_fd, buf, size); + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return -1; + + continue; + } + + // Handle broken pipe specially. gzip and bzip2 + // don't print anything on SIGPIPE. In addition, + // gzip --quiet uses exit status 2 (warning) on + // broken pipe instead of whatever raise(SIGPIPE) + // would make it return. It is there to hide "Broken + // pipe" message on some old shells (probably old + // GNU bash). + // + // We don't do anything special with --quiet, which + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), + pair->dest_name, strerror(errno)); + + return true; + } + + buf += (size_t)(amount); + size -= (size_t)(amount); + } + + return false; +} diff --git a/src/xz/io.h b/src/xz/io.h new file mode 100644 index 00000000..4d8e61b2 --- /dev/null +++ b/src/xz/io.h @@ -0,0 +1,97 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file io.h +/// \brief I/O types and functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef IO_H +#define IO_H + +#include "private.h" + + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE BUFSIZ +#endif + + +typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. + const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. + char *dest_name; + + /// File descriptor of the source file + int src_fd; + + /// File descriptor of the target file + int dest_fd; + + /// Stat of the source file. + struct stat src_st; + + /// Stat of the destination file. + struct stat dest_st; + + /// True once end of the source file has been detected. + bool src_eof; + +} file_pair; + + +/// \brief Opens a file pair +extern file_pair *io_open(const char *src_name); + + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. +extern void io_close(file_pair *pair, bool success); + + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. +extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size); + + +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size); + +#endif diff --git a/src/xz/list.c b/src/xz/list.c new file mode 100644 index 00000000..8728d47b --- /dev/null +++ b/src/xz/list.c @@ -0,0 +1,477 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .lzma files +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/* + +1. Check the file type: native, alone, unknown + +Alone: +1. Show info about header. Don't look for concatenated parts. + +Native: +1. Check that Stream Header is valid. +2. Seek to the end of the file. +3. Skip padding. +4. Reverse decode Stream Footer. +5. Seek Backward Size bytes. +6. + +*/ + + +static void +unsupported_file(file_handle *handle) +{ + errmsg(V_ERROR, "%s: Unsupported file type", handle->name); + set_exit_status(ERROR); + (void)io_close(handle); + return; +} + + +/// Primitive escaping function, that escapes only ASCII control characters. +static void +print_escaped(const uint8_t *str) +{ + while (*str != '\0') { + if (*str <= 0x1F || *str == 0x7F) + printf("\\x%02X", *str); + else + putchar(*str); + + ++str; + } + + return; +} + + +static void +list_native(file_handle *handle) +{ + lzma_stream strm = LZMA_STREAM_INIT; + lzma_stream_flags flags; + lzma_ret ret = lzma_stream_header_decoder(&strm, &flags); + +} + + +static void +list_alone(const listing_handle *handle) +{ + if (handle->buffer[0] > (4 * 5 + 4) * 9 + 8) { + unsupported_file(handle); + return; + } + + const unsigned int pb = handle->buffer[0] / (9 * 5); + handle->buffer[0] -= pb * 9 * 5; + const unsigned int lp = handle->buffer[0] / 9; + const unsigned int lc = handle->buffer[0] - lp * 9; + + uint32_t dict = 0; + for (size_t i = 1; i < 5; ++i) { + dict <<= 8; + dict |= header[i]; + } + + if (dict > LZMA_DICTIONARY_SIZE_MAX) { + unsupported_file(handle); + return; + } + + uint64_t uncompressed_size = 0; + for (size_t i = 5; i < 13; ++i) { + uncompressed_size <<= 8; + uncompressed_size |= header[i]; + } + + // Reject files with uncompressed size of 256 GiB or more. It's + // an arbitrary limit trying to avoid at least some false positives. + if (uncompressed_size != UINT64_MAX + && uncompressed_size >= (UINT64_C(1) << 38)) { + unsupported_file(handle); + return; + } + + if (verbosity < V_WARNING) { + printf("name="); + print_escaped(handle->name); + printf("\nformat=alone\n"); + + if (uncompressed_size == UINT64_MAX) + printf("uncompressed_size=unknown\n"); + else + printf("uncompressed_size=%" PRIu64 "\n", + uncompressed_size); + + printf("dict=%" PRIu32 "\n", dict); + + printf("lc=%u\nlp=%u\npb=%u\n\n", lc, lp, pb); + + } else { + printf("File name: "); + print_escaped(handle->name); + printf("\nFile format: LZMA_Alone\n") + + printf("Uncompressed size: "); + if (uncompressed_size == UINT64_MAX) + printf("unknown\n"); + else + printf("%," PRIu64 " bytes (%" PRIu64 " MiB)\n", + uncompressed_size, + (uncompressed_size + 1024 * 512) + / (1024 * 1024)); + + printf("Dictionary size: %," PRIu32 " bytes " + "(%" PRIu32 " MiB)\n", + dict, (dict + 1024 * 512) / (1024 * 1024)); + + printf("Literal context bits (lc): %u\n", lc); + printf("Literal position bits (lc): %u\n", lp); + printf("Position bits (pb): %u\n", pb); + } + + return; +} + + + + +typedef struct { + const char *filename; + struct stat st; + int fd; + + lzma_stream strm; + lzma_stream_flags stream_flags; + lzma_info *info; + + lzma_vli backward_size; + lzma_vli uncompressed_size; + + size_t buffer_size; + uint8_t buffer[IO_BUFFER_SIZE]; +} listing_handle; + + +static bool +listing_pread(listing_handle *handle, uint64_t offset) +{ + if (offset >= (uint64_t)(handle->st.st_size)) { + errmsg(V_ERROR, "%s: Trying to read past the end of " + "the file.", handle->filename); + return true; + } + +#ifdef HAVE_PREAD + const ssize_t ret = pread(handle->fd, handle->buffer, IO_BUFFER_SIZE, + (off_t)(offset)); +#else + // Use lseek() + read() since we don't have pread(). We don't care + // to which offset the reading position is left. + if (lseek(handle->fd, (off_t)(offset), SEEK_SET) == -1) { + errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno)); + return true; + } + + const ssize_t ret = read(handle->fd, handle->buffer, IO_BUFFER_SIZE); +#endif + + if (ret == -1) { + errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno)); + return true; + } + + if (ret == 0) { + errmsg(V_ERROR, "%s: Trying to read past the end of " + "the file.", handle->filename); + return true; + } + + handle->buffer_size = (size_t)(ret); + return false; +} + + + +static bool +parse_stream_header(listing_handle *handle) +{ + if (listing_pread(handle, 0)) + return true; + + // TODO Got enough input? + + lzma_ret ret = lzma_stream_header_decoder( + &handle->strm, &handle->stream_flags); + if (ret != LZMA_OK) { + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + handle->strm.next_in = handle->buffer; + handle->strm.avail_in = handle->buffer_size; + ret = lzma_code(&handle->strm, LZMA_RUN); + if (ret != LZMA_STREAM_END) { + assert(ret != LZMA_OK); + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + return false; +} + + +static bool +parse_stream_tail(listing_handle *handle) +{ + uint64_t offset = (uint64_t)(handle->st.st_size); + + // Skip padding + do { + if (offset == 0) { + errmsg(V_ERROR, "%s: %s", handle->name, + str_strm_error(LZMA_DATA_ERROR)); + return true; + } + + if (offset < IO_BUFFER_SIZE) + offset = 0; + else + offset -= IO_BUFFER_SIZE; + + if (listing_pread(handle, offset)) + return true; + + while (handle->buffer_size > 0 + && handle->buffer[handle->buffer_size - 1] + == '\0') + --handle->buffer_size; + + } while (handle->buffer_size == 0); + + if (handle->buffer_size < LZMA_STREAM_TAIL_SIZE) { + // TODO + } + + lzma_stream_flags stream_flags; + lzma_ret ret = lzma_stream_tail_decoder(&handle->strm, &stream_flags); + if (ret != LZMA_OK) { + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + handle->strm.next_in = handle->buffer + handle->buffer_size + - LZMA_STREAM_TAIL_SIZE; + handle->strm.avail_in = LZMA_STREAM_TAIL_SIZE; + handle->buffer_size -= LZMA_STREAM_TAIL_SIZE; + ret = lzma_code(&handle->strm, LZMA_RUN); + if (ret != LZMA_OK) { + assert(ret != LZMA_OK); + errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); + return true; + } + + if (!lzma_stream_flags_is_equal(handle->stream_flags, stream_flags)) { + // TODO + // Possibly corrupt, possibly concatenated file. + } + + handle->backward_size = 0; + ret = lzma_vli_reverse_decode(&handle->backward_size, handle->buffer, + &handle->buffer_size); + if (ret != LZMA_OK) { + // It may be LZMA_BUF_ERROR too, but it doesn't make sense + // as an error message displayed to the user. + errmsg(V_ERROR, "%s: %s", handle->name, + str_strm_error(LZMA_DATA_ERROR)); + return true; + } + + if (!stream_flags.is_multi) { + handle->uncompressed_size = 0; + size_t tmp = handle->buffer_size; + ret = lzma_vli_reverse_decode(&handle->uncompressed_size, + handle->buffer, &tmp); + if (ret != LZMA_OK) + handle->uncompressed_size = LZMA_VLI_UNKNOWN; + } + + // Calculate the Header Metadata Block start offset. + + + return false; +} + + + +static void +list_native(listing_handle *handle) +{ + lzma_memory_limiter *limiter + = lzma_memory_limiter_create(opt_memory); + if (limiter == NULL) { + errmsg(V_ERROR, + } + lzma_info *info = + + + // Parse Stream Header + // + // Single-Block Stream: + // - Parse Block Header + // - Parse Stream Footer + // - If Backward Size doesn't match, error out + // + // Multi-Block Stream: + // - Parse Header Metadata Block, if any + // - Parse Footer Metadata Block + // - Parse Stream Footer + // - If Footer Metadata Block doesn't match the Stream, error out + // + // In other words, we don't support concatened files. + if (parse_stream_header(handle)) + return; + + if (parse_block_header(handle)) + return; + + if (handle->stream_flags.is_multi) { + if (handle->block_options.is_metadata) { + if (parse_metadata(handle) + return; + } + + if (my_seek(handle, + + } else { + if (handle->block_options.is_metadata) { + FILE_IS_CORRUPT(); + return; + } + + if (parse_stream_footer(handle)) + return; + + // If Uncompressed Size isn't present in Block Header, + // it must be present in Stream Footer. + if (handle->block_options.uncompressed_size + == LZMA_VLI_UNKNOWN + && handle->stream_flags.uncompressed_size + == LZMA_VLI_UNKNOWN) { + FILE_IS_CORRUPT(); + return; + } + + // Construct a single-Record Index. + lzma_index *index = malloc(sizeof(lzma_index)); + if (index == NULL) { + out_of_memory(); + return; + } + + // Pohdintaa: + // Jos Block coder hoitaisi Uncompressed ja Backward Sizet, + // voisi index->total_sizeksi laittaa suoraan Backward Sizen. + index->total_size = + + if () { + + } + } + + + if (handle->block_options.is_metadata) { + if (!handle->stream_flags.is_multi) { + FILE_IS_CORRUPT(); + return; + } + + if (parse_metadata(handle)) + return; + + } +} + + + +extern void +list(const char *filename) +{ + if (strcmp(filename, "-") == 0) { + errmsg(V_ERROR, "%s: --list does not support reading from " + "standard input", filename); + return; + } + + if (is_empty_filename(filename)) + return; + + listing_handle handle; + handle.filename = filename; + + handle.fd = open(filename, O_RDONLY | O_NOCTTY); + if (handle.fd == -1) { + errmsg(V_ERROR, "%s: %s", filename, strerror(errno)); + return; + } + + if (fstat(handle.fd, &handle.st)) { + errmsg(V_ERROR, "%s: %s", filename, strerror(errno)); + goto out; + } + + if (!S_ISREG(handle.st.st_mode)) { + errmsg(V_WARNING, _("%s: Not a regular file, skipping"), + filename); + goto out; + } + + if (handle.st.st_size <= 0) { + errmsg(V_ERROR, _("%s: File is empty"), filename); + goto out; + } + + if (listing_pread(&handle, 0)) + goto out; + + if (handle.buffer[0] == 0xFF) { + if (opt_header == HEADER_ALONE) { + errmsg(V_ERROR, "%s: FIXME", filename); // FIXME + goto out; + } + + list_native(&handle); + } else { + if (opt_header != HEADER_AUTO && opt_header != HEADER_ALONE) { + errmsg(V_ERROR, "%s: FIXME", filename); // FIXME + goto out; + } + + list_alone(&handle); + } + +out: + (void)close(fd); + return; +} diff --git a/src/xz/main.c b/src/xz/main.c new file mode 100644 index 00000000..4e24b98d --- /dev/null +++ b/src/xz/main.c @@ -0,0 +1,402 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.c +/// \brief main() +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "open_stdxxx.h" +#include <ctype.h> + + +volatile sig_atomic_t user_abort = false; + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which have have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; + + +static void +signal_handler(int sig) +{ + exit_signal = sig; + user_abort = true; + return; +} + + +static void +establish_signal_handlers(void) +{ + // List of signals for which we establish the signal handler. + static const int sigs[] = { + SIGINT, + SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + + struct sigaction sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + sa.sa_flags = 0; + sa.sa_handler = &signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + struct sigaction old; + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); + } + + return; +} + + +extern void +signals_block(void) +{ + if (signals_block_count++ == 0) { + const int saved_errno = errno; + sigprocmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + + return; +} + + +extern void +signals_unblock(void) +{ + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + sigprocmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + + return; +} + + +extern void +set_exit_status(enum exit_status_type new_status) +{ + assert(new_status == E_WARNING || new_status == E_ERROR); + + if (exit_status != E_ERROR) + exit_status = new_status; + + return; +} + + +extern void +my_exit(enum exit_status_type status) +{ + // Close stdout. If something goes wrong, print an error message + // to stderr. + { + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (ferror_err || fclose_err) { + // If it was fclose() that failed, we have the reason + // in errno. If only ferror() indicated an error, + // we have no idea what the reason was. + message(V_ERROR, _("Writing to standard output " + "failed: %s"), + fclose_err ? strerror(errno) + : _("Unknown error")); + status = E_ERROR; + } + } + + // Close stderr. If something goes wrong, there's nothing where we + // could print an error message. Just set the exit status. + { + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = E_ERROR; + } + + // If we have got a signal, raise it to kill the program. + const int sig = exit_signal; + if (sig != 0) { + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); + raise(exit_signal); + + // If, for some weird reason, the signal doesn't kill us, + // we safely fall to the exit below. + } + + exit(status); +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and uncompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + args->files_name, strerror(errno)); + return NULL; + } + + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + args->files_name); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use `--files0' instead " + "of `--files'?"), args->files_name); + return NULL; + } + + name[pos++] = c; + + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. + if (pos == size) { + size *= 2; + name = xrealloc(name, size); + } + } + + return NULL; +} + + +int +main(int argc, char **argv) +{ + // Make sure that stdin, stdout, and and stderr are connected to + // a valid file descriptor. Exit immediatelly with exit code ERROR + // if we cannot make the file descriptors valid. Maybe we should + // print an error message, but our stderr could be screwed anyway. + open_stdxxx(E_ERROR); + + // This has to be done before calling any liblzma functions. + lzma_init(); + + // Set up the locale. + setlocale(LC_ALL, ""); + +#ifdef ENABLE_NLS + // Set up the message translations too. + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); +#endif + + // Set the program invocation name used in various messages, and + // do other message handling related initializations. + message_init(argv[0]); + + // Set hardware-dependent default values. These can be overriden + // on the command line, thus this must be done before parse_args(). + hardware_init(); + + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); + else + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal and --force wasn't used. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + my_exit(E_ERROR); + } + } + } + + if (opt_mode == MODE_LIST) { + message_fatal("--list is not implemented yet."); + } + + // Hook the signal handlers. We don't need these before we start + // the actual action, so this is done after parsing the command + // line arguments. + establish_signal_handlers(); + + // Process the files given on the command line. Note that if no names + // were given, parse_args() gave us a fake "-" filename. + for (size_t i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Unless --force + // was used, check that we aren't writing compressed + // data to a terminal or reading it from terminal. + if (!opt_force) { + if (opt_mode == MODE_COMPRESS) { + if (is_tty_stdout()) + continue; + } else if (is_tty_stdin()) { + continue; + } + } + + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " + "standard input when " + "reading filenames " + "from standard input")); + continue; + } + + // Replace the "-" with a special pointer, which is + // recognized by process_file() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; + } + + // Do the actual compression or uncompression. + process_file(args.arg_names[i]); + } + + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. + while (true) { + const char *name = read_name(&args); + if (name == NULL) + break; + + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + process_file(name); + } + + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); + } + + my_exit(exit_status); +} diff --git a/src/xz/main.h b/src/xz/main.h new file mode 100644 index 00000000..1e369425 --- /dev/null +++ b/src/xz/main.h @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellanous declarations +// +// Copyright (C) 2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MAIN_H +#define MAIN_H + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is EX_WARNING and the old exit status was already EX_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Exits the program using the given status. This takes care of closing +/// stdin, stdout, and stderr and catches possible errors. If we had got +/// a signal, this function will raise it so that to the parent process it +/// appears that we were killed by the signal sent by the user. +extern void my_exit(enum exit_status_type status) lzma_attribute((noreturn)); + + +#endif diff --git a/src/xz/message.c b/src/xz/message.c new file mode 100644 index 00000000..caba9fbc --- /dev/null +++ b/src/xz/message.c @@ -0,0 +1,892 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages to stderr +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if defined(HAVE_SYS_TIME_H) +# include <sys/time.h> +#elif defined(SIGALRM) +// FIXME +#endif + +#include <stdarg.h> + + +/// Name of the program which is prefixed to the error messages. +static const char *argv0; + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGALRM. Once this variable is true, we will print an empty line before +/// the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGALRM signals, +/// we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically. +static bool progress_automatic; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + +/// Time when we started processing the file +static double start_time; + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((unused))) +{ + progress_needs_updating = true; + return; +} + + +/// Get the current time as double +static double +my_time(void) +{ + struct timeval tv; + + // This really shouldn't fail. I'm not sure what to return if it + // still fails. It doesn't look so useful to check the return value + // everywhere. FIXME? + if (gettimeofday(&tv, NULL)) + return -1.0; + + return (double)(tv.tv_sec) + (double)(tv.tv_usec) / 1.0e9; +} + + +/// Wrapper for snprintf() to help constructing a string in pieces. +static void /* lzma_attribute((format(printf, 3, 4))) */ +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= len; + } + + return; +} + + +extern void +message_init(const char *given_argv0) +{ + // Name of the program + argv0 = given_argv0; + + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = isatty(STDERR_FILENO); + +/* + if (progress_automatic) { + // stderr is a terminal. Check the COLUMNS environment + // variable to see if the terminal is wide enough. If COLUMNS + // doesn't exist or it has some unparseable value, we assume + // that the terminal is wide enough. + const char *columns_str = getenv("COLUMNS"); + uint64_t columns; + if (columns_str != NULL + && !str_to_uint64_raw(&columns, columns_str) + && columns < 80) + progress_automatic = false; + } +*/ + +#ifdef SIGALRM + // Establish the signal handler for SIGALRM. Since this signal + // doesn't require any quick action, we set SA_RESTART. + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sa.sa_handler = &progress_signal_handler; + if (sigaction(SIGALRM, &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (!current_filename_printed + && (files_total != 1 || filename != stdin_filename)) { + signals_block(); + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', stderr); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(stderr, "%s (%u)\n", filename, + files_pos); + else + fprintf(stderr, "%s (%u/%u)\n", filename, + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_progress_start(const char *src_name, uint64_t in_size) +{ + // Store the processing start time of the file and its expected size. + // If we aren't printing any statistics, then these are unused. But + // since it is possible that the user tells us with SIGALRM to show + // statistics, we need to have these available anyway. + start_time = my_time(); + filename = src_name; + expected_in_size = in_size; + + // Indicate the name of this file hasn't been printed to + // stderr yet. + current_filename_printed = false; + + // Start numbering the files starting from one. + ++files_pos; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. As an exception, if there is exactly one file, + // do not print the filename at all. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Print the filename to stderr if that is appropriate with + // the current settings. + print_filename(); + + // Start the timer to set progress_needs_updating to true + // after about one second. An alternative would to be set + // progress_needs_updating to true here immediatelly, but + // setting the timer looks better to me, since extremely + // early progress info is pretty much useless. + alarm(1); + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + static char buf[sizeof("99.9 %")]; + + // Never show 100.0 % before we actually are finished (that case is + // handled separately in message_progress_end()). + snprintf(buf, sizeof(buf), "%.1f %%", + (double)(in_pos) / (double)(expected_in_size) * 99.9); + + return buf; +} + + +static void +progress_sizes_helper(char **pos, size_t *left, uint64_t value, bool final) +{ + if (final) { + // At maximum of four digits is allowed for exact byte count. + if (value < 10000) { + my_snprintf(pos, left, "%'" PRIu64 " B", value); + return; + } + +// // At maximum of four significant digits is allowed for KiB. +// if (value < UINT64_C(1023900)) { + // At maximum of five significant digits is allowed for KiB. + if (value < UINT64_C(10239900)) { + my_snprintf(pos, left, "%'.1f KiB", + (double)(value) / 1024.0); + return; + } + } + + // Otherwise we use MiB. + my_snprintf(pos, left, "%'.1f MiB", + (double)(value) / (1024.0 * 1024.0)); + return; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // This is enough to hold sizes up to about 99 TiB if thousand + // separator is used, or about 1 PiB without thousand separator. + // After that the progress indicator will look a bit silly, since + // the compression ratio no longer fits with three decimal places. + static char buf[44]; + + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + progress_sizes_helper(&pos, &left, compressed_pos, final); + my_snprintf(&pos, &left, " / "); + progress_sizes_helper(&pos, &left, uncompressed_pos, final); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, double elapsed) +{ + // Don't print the speed immediatelly, since the early values look + // like somewhat random. + if (elapsed < 3.0) + return ""; + + static const char unit[][8] = { + "KiB/s", + "MiB/s", + "GiB/s", + }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) / (elapsed * 1024.0); + + // Adjust the unit of the speed if needed. + while (speed > 999.9) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + static char buf[sizeof("999.9 GiB/s")]; + snprintf(buf, sizeof(buf), "%.1f %s", speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed or remaining time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint32_t seconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((UINT32_C(9999) * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Make the string to contain the estimated remaining time, or if the amount +/// of input isn't known, how much time has elapsed. +static const char * +progress_remaining(uint64_t in_pos, double elapsed) +{ + // If we don't know the size of the input, we indicate the time + // spent so far. + if (expected_in_size == 0 || in_pos > expected_in_size) + return progress_time((uint32_t)(elapsed)); + + // If we are at the very beginning of the file or the file is very + // small, don't give any estimate to avoid far too wrong estimations. + if (in_pos < (UINT64_C(1) << 19) || elapsed < 8.0) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (double)(expected_in_size - in_pos) + * elapsed / (double)(in_pos); + if (remaining == 0) + remaining = 1; + + return progress_time(remaining); +} + + +extern void +message_progress_update(uint64_t in_pos, uint64_t out_pos) +{ + // If there's nothing to do, return immediatelly. + if (!progress_needs_updating || in_pos == 0) + return; + + // Print the filename if it hasn't been printed yet. + print_filename(); + + // Calculate how long we have been processing this file. + const double elapsed = my_time() - start_time; + + // Set compressed_pos and uncompressed_pos. + uint64_t compressed_pos; + uint64_t uncompressed_pos; + if (opt_mode == MODE_COMPRESS) { + compressed_pos = out_pos; + uncompressed_pos = in_pos; + } else { + compressed_pos = in_pos; + uncompressed_pos = out_pos; + } + + signals_block(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + fprintf(stderr, " %7s %43s %11s %10s\r", + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_remaining(in_pos, elapsed)); + + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm() call or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(uint64_t in_pos, uint64_t out_pos, bool success) +{ + // If we are not in verbose mode, we have nothing to do. + if (verbosity < V_VERBOSE || user_abort) + return; + + // Cancel a pending alarm, if any. + if (progress_automatic) { + alarm(0); + progress_active = false; + } + + const double elapsed = my_time() - start_time; + + uint64_t compressed_pos; + uint64_t uncompressed_pos; + if (opt_mode == MODE_COMPRESS) { + compressed_pos = out_pos; + uncompressed_pos = in_pos; + } else { + compressed_pos = in_pos; + uncompressed_pos = out_pos; + } + + // If it took less than a second, don't display the time. + const char *elapsed_str = progress_time((double)(elapsed)); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic && in_pos > 0) { + // Using floating point conversion for the percentage instead + // of static "100.0 %" string, because the decimal separator + // isn't a dot in all locales. + fprintf(stderr, " %5.1f %% %43s %11s %10s\n", + 100.0, + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + elapsed_str); + + // When no automatic progress indicator is used, don't print a verbose + // message at all if we something went wrong and we couldn't produce + // any output. If we did produce output, then it is sometimes useful + // to tell that to the user, especially if we detected an error after + // a time-consuming operation. + } else if (success || out_pos > 0) { + // The filename and size information are always printed. + fprintf(stderr, "%s: %s", filename, progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + // If there currently is a progress message on the screen, + // print a newline so that the progress message is left + // readable. This is good, because it is nice to be able to + // see where the error occurred. (The alternative would be + // to clear the progress message and replace it with the + // error message.) + if (progress_active) { + progress_active = false; + fputc('\n', stderr); + } + + fprintf(stderr, "%s: ", argv0); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + my_exit(E_ERROR); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + return _("Internal error (bug)"); + } + + return NULL; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try `%s --help' for more information."), argv0); + return; +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + printf("xz " PACKAGE_VERSION "\n"); + printf("liblzma %s\n", lzma_version_string()); + my_exit(E_SUCCESS); +} + + +extern void +message_help(bool long_help) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .xz format.\n\n"), + argv0); + + puts(_("Mandatory arguments to long options are mandatory for " + "short options too.\n")); + + if (long_help) + puts(_(" Operation mode:\n")); + + puts(_( +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about files")); + + if (long_help) + puts(_("\n Operation modifiers:\n")); + + puts(_( +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files")); + + if (long_help) + puts(_( +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the null character as terminator")); + + if (long_help) { + puts(_("\n Basic file format and compression options:\n")); + puts(_( +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', and `raw'\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" or `sha256'")); + } + + puts(_( +" -p, --preset=NUM compression preset: 1-2 fast compression, 3-6 good\n" +" compression, 7-9 excellent compression; default is 7")); + + puts(_( +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which depends on the operation mode\n" +" and the amount of physical memory (RAM)")); + + if (long_help) { + puts(_( +"\n Custom filter chain for compression (alternative for using presets):")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + puts(_( +"\n" +" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" +" --lzma2=[OPTS] more of the following options (valid values; default):\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" depth=NUM maximum search depth; 0=automatic (default)")); +#endif + + puts(_( +"\n" +" --x86 x86 filter (sometimes called BCJ filter)\n" +" --powerpc PowerPC (big endian) filter\n" +" --ia64 IA64 (Itanium) filter\n" +" --arm ARM filter\n" +" --armthumb ARM-Thumb filter\n" +" --sparc SPARC filter")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + puts(_( +"\n" +" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)")); +#endif + +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + puts(_( +"\n" +" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)")); +#endif + } + +/* + if (long_help) + puts(_( +"\n" +" Resource usage options:\n" +"\n" +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which depends on the operation mode\n" +" and the amount of physical memory (RAM)\n" +" -T, --threads=NUM use a maximum of NUM (de)compression threads" +// " --threading=STR threading style; possible values are `auto' (default),\n" +// " `files', and `stream' +)); +*/ + if (long_help) + puts(_("\n Other options:\n")); + + puts(_( +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose")); + + if (long_help) + puts(_( +"\n" +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help")); + else + puts(_( +" -h, --help display this short help\n" +" -H, --long-help display the long help (lists also the advanced options)")); + + puts(_( +" -V, --version display the version number")); + + puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); + + if (long_help) { + // FIXME !!! + size_t mem_limit = hardware_memlimit_encoder() / (1024 * 1024); + if (mem_limit == 0) + mem_limit = 1; + + // We use PRIu64 instead of %zu to support pre-C99 libc. + // FIXME: Use ' but avoid warnings. + puts(_("On this system and configuration, the tool will use")); + printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"), + (uint64_t)(mem_limit)); + printf(N_(" * at maximum of one thread for (de)compression.\n\n", + " * at maximum of %" PRIu64 + " threads for (de)compression.\n\n", + (uint64_t)(opt_threads)), (uint64_t)(opt_threads)); + } + + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + + my_exit(E_SUCCESS); +} diff --git a/src/xz/message.h b/src/xz/message.h new file mode 100644 index 00000000..7ef9b165 --- /dev/null +++ b/src/xz/message.h @@ -0,0 +1,132 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MESSAGE_H +#define MESSAGE_H + + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Debugging, FIXME remove? +}; + + +/// \brief Initializes the message functions +/// +/// \param argv0 Name of the program i.e. argv[0] from main() +/// \param verbosity Verbosity level +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(const char *argv0); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + + +/// Set the total number of files to be processed (stdin is counted as a file +/// here). The default is one. +extern void message_set_files(unsigned int files); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +extern void message(enum message_verbosity verbosity, const char *fmt, ...) + lzma_attribute((format(printf, 2, 3))); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +extern void message_warning(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +extern void message_error(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +extern void message_fatal(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))) + lzma_attribute((noreturn)); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +extern void message_bug(void) lzma_attribute((noreturn)); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +extern void message_signal_handler(void) lzma_attribute((noreturn)); + + +/// Converts lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +extern void message_version(void) lzma_attribute((noreturn)); + + +/// Print the help message. +extern void message_help(bool long_help) lzma_attribute((noreturn)); + + +/// +extern void message_progress_start(const char *filename, uint64_t in_size); + + +/// +extern void message_progress_update(uint64_t in_pos, uint64_t out_pos); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param in_pos Final input position i.e. how much input there was. +/// \param out_pos Final output position +/// \param success True if the operation was successful. We don't +/// print the final progress message if the operation +/// wasn't successful. +/// +extern void message_progress_end( + uint64_t in_pos, uint64_t out_pos, bool success); + +#endif diff --git a/src/xz/options.c b/src/xz/options.c new file mode 100644 index 00000000..77ebddd6 --- /dev/null +++ b/src/xz/options.c @@ -0,0 +1,352 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.c +/// \brief Parser for filter-specific options +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/////////////////// +// Generic stuff // +/////////////////// + +typedef struct { + const char *name; + uint64_t id; +} name_id_map; + + +typedef struct { + const char *name; + const name_id_map *map; + uint64_t min; + uint64_t max; +} option_map; + + +/// Parses option=value pairs that are separated with colons, semicolons, +/// or commas: opt=val:opt=val;opt=val,opt=val +/// +/// Each option is a string, that is converted to an integer using the +/// index where the option string is in the array. +/// +/// Value can be either a number with minimum and maximum value limit, or +/// a string-id map mapping a list of possible string values to integers. +/// +/// When parsing both option and value succeed, a filter-specific function +/// is called, which should update the given value to filter-specific +/// options structure. +/// +/// \param str String containing the options from the command line +/// \param opts Filter-specific option map +/// \param set Filter-specific function to update filter_options +/// \param filter_options Pointer to filter-specific options structure +/// +/// \return Returns only if no errors occur. +/// +static void +parse_options(const char *str, const option_map *opts, + void (*set)(void *filter_options, + uint32_t key, uint64_t value), + void *filter_options) +{ + if (str == NULL || str[0] == '\0') + return; + + char *s = xstrdup(str); + char *name = s; + + while (true) { + char *split = strchr(name, ','); + if (split != NULL) + *split = '\0'; + + char *value = strchr(name, '='); + if (value != NULL) + *value++ = '\0'; + + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: Options must be `name=value' " + "pairs separated with commas"), str); + + // Look for the option name from the option map. + bool found = false; + for (size_t i = 0; opts[i].name != NULL; ++i) { + if (strcmp(name, opts[i].name) != 0) + continue; + + if (opts[i].map == NULL) { + // value is an integer. + const uint64_t v = str_to_uint64(name, value, + opts[i].min, opts[i].max); + set(filter_options, i, v); + } else { + // value is a string which we should map + // to an integer. + size_t j; + for (j = 0; opts[i].map[j].name != NULL; ++j) { + if (strcmp(opts[i].map[j].name, value) + == 0) + break; + } + + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: Invalid option " + "value"), value); + + set(filter_options, i, opts[i].map[j].id); + } + + found = true; + break; + } + + if (!found) + message_fatal(_("%s: Invalid option name"), name); + + if (split == NULL) + break; + + name = split + 1; + } + + free(s); + return; +} + + +////////////// +// Subblock // +////////////// + +enum { + OPT_SIZE, + OPT_RLE, + OPT_ALIGN, +}; + + +static void +set_subblock(void *options, uint32_t key, uint64_t value) +{ + lzma_options_subblock *opt = options; + + switch (key) { + case OPT_SIZE: + opt->subblock_data_size = value; + break; + + case OPT_RLE: + opt->rle = value; + break; + + case OPT_ALIGN: + opt->alignment = value; + break; + } +} + + +extern lzma_options_subblock * +options_subblock(const char *str) +{ + static const option_map opts[] = { + { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN, + LZMA_SUBBLOCK_DATA_SIZE_MAX }, + { "rle", NULL, LZMA_SUBBLOCK_RLE_OFF, + LZMA_SUBBLOCK_RLE_MAX }, + { "align",NULL, LZMA_SUBBLOCK_ALIGNMENT_MIN, + LZMA_SUBBLOCK_ALIGNMENT_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_subblock *options + = xmalloc(sizeof(lzma_options_subblock)); + *options = (lzma_options_subblock){ + .allow_subfilters = false, + .alignment = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT, + .subblock_data_size = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT, + .rle = LZMA_SUBBLOCK_RLE_OFF, + }; + + parse_options(str, opts, &set_subblock, options); + + return options; +} + + +/////////// +// Delta // +/////////// + +enum { + OPT_DIST, +}; + + +static void +set_delta(void *options, uint32_t key, uint64_t value) +{ + lzma_options_delta *opt = options; + switch (key) { + case OPT_DIST: + opt->dist = value; + break; + } +} + + +extern lzma_options_delta * +options_delta(const char *str) +{ + static const option_map opts[] = { + { "dist", NULL, LZMA_DELTA_DIST_MIN, + LZMA_DELTA_DIST_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); + *options = (lzma_options_delta){ + // It's hard to give a useful default for this. + .type = LZMA_DELTA_TYPE_BYTE, + .dist = LZMA_DELTA_DIST_MIN, + }; + + parse_options(str, opts, &set_delta, options); + + return options; +} + + +////////// +// LZMA // +////////// + +enum { + OPT_DICT, + OPT_LC, + OPT_LP, + OPT_PB, + OPT_MODE, + OPT_NICE, + OPT_MF, + OPT_DEPTH, +}; + + +static void +set_lzma(void *options, uint32_t key, uint64_t value) +{ + lzma_options_lzma *opt = options; + + switch (key) { + case OPT_DICT: + opt->dict_size = value; + break; + + case OPT_LC: + opt->lc = value; + break; + + case OPT_LP: + opt->lp = value; + break; + + case OPT_PB: + opt->pb = value; + break; + + case OPT_MODE: + opt->mode = value; + break; + + case OPT_NICE: + opt->nice_len = value; + break; + + case OPT_MF: + opt->mf = value; + break; + + case OPT_DEPTH: + opt->depth = value; + break; + } +} + + +extern lzma_options_lzma * +options_lzma(const char *str) +{ + static const name_id_map modes[] = { + { "fast", LZMA_MODE_FAST }, + { "normal", LZMA_MODE_NORMAL }, + { NULL, 0 } + }; + + static const name_id_map mfs[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { NULL, 0 } + }; + + static const option_map opts[] = { + { "dict", NULL, LZMA_DICT_SIZE_MIN, + (UINT32_C(1) << 30) + (UINT32_C(1) << 29) }, + { "lc", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "lp", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "pb", NULL, LZMA_PB_MIN, LZMA_PB_MAX }, + { "mode", modes, 0, 0 }, + { "nice", NULL, 2, 273 }, + { "mf", mfs, 0, 0 }, + { "depth", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + // TODO There should be a way to take some preset as the base for + // custom settings. + lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma)); + *options = (lzma_options_lzma){ + .dict_size = LZMA_DICT_SIZE_DEFAULT, + .preset_dict = NULL, + .preset_dict_size = 0, + .lc = LZMA_LC_DEFAULT, + .lp = LZMA_LP_DEFAULT, + .pb = LZMA_PB_DEFAULT, + .persistent = false, + .mode = LZMA_MODE_NORMAL, + .nice_len = 64, + .mf = LZMA_MF_BT4, + .depth = 0, + }; + + parse_options(str, opts, &set_lzma, options); + + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must be at " + "maximum of 4")); + + const uint32_t nice_len_min = options->mf & 0x0F; + if (options->nice_len < nice_len_min) + message_fatal(_("The selected match finder requires at " + "least nice=%" PRIu32), nice_len_min); + + return options; +} diff --git a/src/xz/options.h b/src/xz/options.h new file mode 100644 index 00000000..4253ac3c --- /dev/null +++ b/src/xz/options.h @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.h +/// \brief Parser for filter-specific options +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef OPTIONS_H +#define OPTIONS_H + +#include "private.h" + + +/// \brief Parser for Subblock options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_subblock *options_subblock(const char *str); + + +/// \brief Parser for Delta options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_delta *options_delta(const char *str); + + +/// \brief Parser for LZMA options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_lzma *options_lzma(const char *str); + +#endif diff --git a/src/xz/private.h b/src/xz/private.h new file mode 100644 index 00000000..b463a08e --- /dev/null +++ b/src/xz/private.h @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file private.h +/// \brief Common includes, definions, and prototypes +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef PRIVATE_H +#define PRIVATE_H + +#include "sysdefs.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <signal.h> +#include <locale.h> +#include <stdio.h> +#include <unistd.h> + +#ifdef ENABLE_NLS +# include <libintl.h> +# define _(msgid) gettext(msgid) +# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#else +# define _(msgid) (msgid) +# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) +#endif + +#include "main.h" +#include "process.h" +#include "message.h" +#include "args.h" +#include "hardware.h" +#include "io.h" +#include "options.h" +#include "suffix.h" +#include "util.h" + +#endif diff --git a/src/xz/process.c b/src/xz/process.c new file mode 100644 index 00000000..d30878e4 --- /dev/null +++ b/src/xz/process.c @@ -0,0 +1,391 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file process.c +/// \brief Compresses or uncompresses a file +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +enum operation_mode opt_mode = MODE_COMPRESS; + +enum format_type opt_format = FORMAT_AUTO; + + +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; + +/// Filters needed for all encoding all formats, and also decoding in raw data +static lzma_filter filters[LZMA_FILTERS_MAX + 1]; + +/// Number of filters. Zero indicates that we are using a preset. +static size_t filters_count = 0; + +/// Number of the preset (1-9) +static size_t preset_number = 7; + +/// Indicate if no preset has been given. In that case, we will auto-adjust +/// the compression preset so that it doesn't use too much RAM. +// FIXME +static bool preset_default = true; + +/// Integrity check type +static lzma_check check = LZMA_CHECK_CRC64; + + +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + return; +} + + +extern void +coder_set_preset(size_t new_preset) +{ + preset_number = new_preset; + preset_default = false; + return; +} + + +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); + + filters[filters_count].id = id; + filters[filters_count].options = options; + ++filters_count; + + return; +} + + +extern void +coder_set_compression_settings(void) +{ + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + if (filters_count == 0) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } + + // Get the preset for LZMA1 or LZMA2. + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); + + // Use LZMA2 except with --format=lzma we use LZMA1. + filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + filters[0].options = &opt_lzma; + filters_count = 1; + } + + // Terminate the filter options array. + filters[filters_count].id = LZMA_VLI_UNKNOWN; + + // If we are using the LZMA_Alone format, allow exactly one filter + // which has to be LZMA. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("With --format=lzma only the LZMA1 filter " + "is supported")); + + // TODO: liblzma probably needs an API to validate the filter chain. + + // If using --format=raw, we can be decoding. + uint64_t memory_usage; + uint64_t memory_limit; + if (opt_mode == MODE_COMPRESS) { + memory_usage = lzma_memusage_encoder(filters); + memory_limit = hardware_memlimit_encoder(); + } else { + memory_usage = lzma_memusage_decoder(filters); + memory_limit = hardware_memlimit_decoder(); + } + + if (memory_usage == UINT64_MAX) + message_bug(); + + if (preset_default) { + // When no preset was explicitly requested, we use the default + // preset only if the memory usage limit allows. Otherwise we + // select a lower preset automatically. + while (memory_usage > memory_limit) { + if (preset_number == 1) + message_fatal(_("Memory usage limit is too " + "small for any internal " + "filter preset")); + + if (lzma_lzma_preset(&opt_lzma, --preset_number)) + message_bug(); + + memory_usage = lzma_memusage_encoder(filters); + } + } else { + if (memory_usage > memory_limit) + message_fatal(_("Memory usage limit is too small " + "for the given filter setup")); + } + + // Limit the number of worked threads so that memory usage + // limit isn't exceeded. + assert(memory_usage > 0); + size_t thread_limit = memory_limit / memory_usage; + if (thread_limit == 0) + thread_limit = 1; + + if (opt_threads > thread_limit) + opt_threads = thread_limit; + + return; +} + + +static bool +coder_init(void) +{ + lzma_ret ret = LZMA_PROG_ERROR; + + if (opt_mode == MODE_COMPRESS) { + switch (opt_format) { + case FORMAT_AUTO: + // args.c ensures this. + assert(0); + break; + + case FORMAT_XZ: + ret = lzma_stream_encoder(&strm, filters, check); + break; + + case FORMAT_LZMA: + ret = lzma_alone_encoder(&strm, filters[0].options); + break; + + case FORMAT_RAW: + ret = lzma_raw_encoder(&strm, filters); + break; + } + } else { + const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK + | LZMA_CONCATENATED; + + switch (opt_format) { + case FORMAT_AUTO: + ret = lzma_auto_decoder(&strm, + hardware_memlimit_decoder(), flags); + break; + + case FORMAT_XZ: + ret = lzma_stream_decoder(&strm, + hardware_memlimit_decoder(), flags); + break; + + case FORMAT_LZMA: + ret = lzma_alone_decoder(&strm, + hardware_memlimit_decoder()); + break; + + case FORMAT_RAW: + // Memory usage has already been checked in args.c. + // FIXME Comment + ret = lzma_raw_decoder(&strm, filters); + break; + } + } + + if (ret != LZMA_OK) { + if (ret == LZMA_MEM_ERROR) + message_error("%s", message_strm(LZMA_MEM_ERROR)); + else + message_bug(); + + return true; + } + + return false; +} + + +static bool +coder_run(file_pair *pair) +{ + // Buffers to hold input and output data. + uint8_t in_buf[IO_BUFFER_SIZE]; + uint8_t out_buf[IO_BUFFER_SIZE]; + + // Initialize the progress indicator. + const uint64_t in_size = pair->src_st.st_size <= (off_t)(0) + ? 0 : (uint64_t)(pair->src_st.st_size); + message_progress_start(pair->src_name, in_size); + + lzma_action action = LZMA_RUN; + lzma_ret ret; + + strm.avail_in = 0; + strm.next_out = out_buf; + strm.avail_out = IO_BUFFER_SIZE; + + while (!user_abort) { + // Fill the input buffer if it is empty and we haven't reached + // end of file yet. + if (strm.avail_in == 0 && !pair->src_eof) { + strm.next_in = in_buf; + strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE); + + if (strm.avail_in == SIZE_MAX) + break; + + // Encoder needs to know when we have given all the + // input to it. The decoders need to know it too when + // we are using LZMA_CONCATENATED. + if (pair->src_eof) + action = LZMA_FINISH; + } + + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); + + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (opt_mode != MODE_TEST && io_write(pair, out_buf, + IO_BUFFER_SIZE - strm.avail_out)) + return false; + + strm.next_out = out_buf; + strm.avail_out = IO_BUFFER_SIZE; + } + + if (ret != LZMA_OK) { + // Determine if the return value indicates that we + // won't continue coding. + const bool stop = ret != LZMA_NO_CHECK + && ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // First print the final progress info. + // This way the user sees more accurately + // where the error occurred. Note that we + // print this *before* the possible error + // message. + // + // FIXME: What if something goes wrong + // after this? + message_progress_end(strm.total_in, + strm.total_out, + ret == LZMA_STREAM_END); + + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (opt_mode != MODE_TEST && io_write(pair, + out_buf, IO_BUFFER_SIZE + - strm.avail_out)) + return false; + } + + if (ret == LZMA_STREAM_END) { + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. + if (strm.avail_in == 0 && (pair->src_eof + || io_read(pair, in_buf, 1) + == 0)) { + assert(pair->src_eof); + return true; + } + + // FIXME: What about io_read() failing? + + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } + + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + } else { + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } + + if (ret == LZMA_MEMLIMIT_ERROR) { + // Figure out how much memory would have + // actually needed. + // TODO + } + + if (stop) + return false; + } + + // Show progress information if --verbose was specified and + // stderr is a terminal. + message_progress_update(strm.total_in, strm.total_out); + } + + return false; +} + + +extern void +process_file(const char *filename) +{ + // First try initializing the coder. If it fails, it's useless to try + // opening the file. Check also for user_abort just in case if we had + // got a signal while initializing the coder. + if (coder_init() || user_abort) + return; + + // Try to open the input and output files. + file_pair *pair = io_open(filename); + if (pair == NULL) + return; + + // Do the actual coding. + const bool success = coder_run(pair); + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); + + return; +} diff --git a/src/xz/process.h b/src/xz/process.h new file mode 100644 index 00000000..de23eacb --- /dev/null +++ b/src/xz/process.h @@ -0,0 +1,70 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file process.c +/// \brief Compresses or uncompresses a file +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef PROCESS_H +#define PROCESS_H + +#include "private.h" + + +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, + // HEADER_GZIP, + FORMAT_RAW, +}; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(size_t new_preset); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// +extern void coder_set_compression_settings(void); + +extern void process_init(void); + +extern void process_file(const char *filename); + +#endif diff --git a/src/xz/suffix.c b/src/xz/suffix.c new file mode 100644 index 00000000..0d46855a --- /dev/null +++ b/src/xz/suffix.c @@ -0,0 +1,213 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.c +/// \brief Checks filename suffix and creates the destination filename +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +static char *custom_suffix = NULL; + + +struct suffix_pair { + const char *compressed; + const char *uncompressed; +}; + + +/// \brief Checks if src_name has given compressed_suffix +/// +/// \param suffix Filename suffix to look for +/// \param src_name Input filename +/// \param src_len strlen(src_name) +/// +/// \return If src_name has the suffix, src_len - strlen(suffix) is +/// returned. It's always a positive integer. Otherwise zero +/// is returned. +static size_t +test_suffix(const char *suffix, const char *src_name, size_t src_len) +{ + const size_t suffix_len = strlen(suffix); + + // The filename must have at least one character in addition to + // the suffix. src_name may contain path to the filename, so we + // need to check for directory separator too. + if (src_len <= suffix_len || src_name[src_len - suffix_len - 1] == '/') + return 0; + + if (strcmp(suffix, src_name + src_len - suffix_len) == 0) + return src_len - suffix_len; + + return 0; +} + + +/// \brief Removes the filename suffix of the compressed file +/// +/// \return Name of the uncompressed file, or NULL if file has unknown +/// suffix. +static char * +uncompressed_name(const char *src_name, const size_t src_len) +{ + static const struct suffix_pair suffixes[] = { + { ".xz", "" }, + { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. + { ".lzma", "" }, + { ".tlz", ".tar" }, + // { ".gz", "" }, + // { ".tgz", ".tar" }, + }; + + const char *new_suffix = ""; + size_t new_len = 0; + + if (opt_format == FORMAT_RAW) { + // Don't check for known suffixes when --format=raw was used. + if (custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + } else { + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { + new_len = test_suffix(suffixes[i].compressed, + src_name, src_len); + if (new_len != 0) { + new_suffix = suffixes[i].uncompressed; + break; + } + } + } + + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); + + if (new_len == 0) { + message_warning(_("%s: Filename has an unknown suffix, " + "skipping"), src_name); + return NULL; + } + + const size_t new_suffix_len = strlen(new_suffix); + char *dest_name = xmalloc(new_len + new_suffix_len + 1); + + memcpy(dest_name, src_name, new_len); + memcpy(dest_name + new_len, new_suffix, new_suffix_len); + dest_name[new_len + new_suffix_len] = '\0'; + + return dest_name; +} + + +/// \brief Appends suffix to src_name +/// +/// In contrast to uncompressed_name(), we check only suffixes that are valid +/// for the specified file format. +static char * +compressed_name(const char *src_name, const size_t src_len) +{ + // The order of these must match the order in args.h. + static const struct suffix_pair all_suffixes[][3] = { + { + { ".xz", "" }, + { ".txz", ".tar" }, + { NULL, NULL } + }, { + { ".lzma", "" }, + { ".tlz", ".tar" }, + { NULL, NULL } +/* + }, { + { ".gz", "" }, + { ".tgz", ".tar" }, + { NULL, NULL } +*/ + }, { + // --format=raw requires specifying the suffix + // manually or using stdout. + { NULL, NULL } + } + }; + + // args.c ensures this. + assert(opt_format != FORMAT_AUTO); + + const size_t format = opt_format - 1; + const struct suffix_pair *const suffixes = all_suffixes[format]; + + for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { + if (test_suffix(suffixes[i].compressed, src_name, src_len) + != 0) { + message_warning(_("%s: File already has `%s' " + "suffix, skipping"), src_name, + suffixes[i].compressed); + return NULL; + } + } + + // TODO: Hmm, maybe it would be better to validate this in args.c, + // since the suffix handling when decoding is weird now. + if (opt_format == FORMAT_RAW && custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0].compressed; + const size_t suffix_len = strlen(suffix); + + char *dest_name = xmalloc(src_len + suffix_len + 1); + + memcpy(dest_name, src_name, src_len); + memcpy(dest_name + src_len, suffix, suffix_len); + dest_name[src_len + suffix_len] = '\0'; + + return dest_name; +} + + +extern char * +suffix_get_dest_name(const char *src_name) +{ + assert(src_name != NULL); + + // Length of the name is needed in all cases to locate the end of + // the string to compare the suffix, so calculate the length here. + const size_t src_len = strlen(src_name); + + return opt_mode == MODE_COMPRESS + ? compressed_name(src_name, src_len) + : uncompressed_name(src_name, src_len); +} + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a slash are rejected. Such + // suffixes would break things later. + if (suffix[0] == '\0' || strchr(suffix, '/') != NULL) + message_fatal(_("%s: Invalid filename suffix"), optarg); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} diff --git a/src/xz/suffix.h b/src/xz/suffix.h new file mode 100644 index 00000000..c92b92dc --- /dev/null +++ b/src/xz/suffix.h @@ -0,0 +1,40 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.h +/// \brief Checks filename suffix and creates the destination filename +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef SUFFIX_H +#define SUFFIX_H + +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); + +#endif diff --git a/src/xz/util.c b/src/xz/util.c new file mode 100644 index 00000000..13b67925 --- /dev/null +++ b/src/xz/util.c @@ -0,0 +1,199 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.c +/// \brief Miscellaneous utility functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + ptr = realloc(ptr, size); + if (ptr == NULL) + message_fatal("%s", strerror(errno)); + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + +extern uint64_t +str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) +{ + uint64_t result = 0; + + // Skip blanks. + while (*value == ' ' || *value == '\t') + ++value; + + if (*value < '0' || *value > '9') + message_fatal(_("%s: Value is not a non-negative " + "decimal integer"), value); + + do { + // Don't overflow. + if (result > (UINT64_MAX - 9) / 10) + goto error; + + result *= 10; + result += *value - '0'; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. + static const struct { + const char name[4]; + uint64_t multiplier; + } suffixes[] = { + { "k", UINT64_C(1000) }, + { "kB", UINT64_C(1000) }, + { "M", UINT64_C(1000000) }, + { "MB", UINT64_C(1000000) }, + { "G", UINT64_C(1000000000) }, + { "GB", UINT64_C(1000000000) }, + { "Ki", UINT64_C(1024) }, + { "KiB", UINT64_C(1024) }, + { "Mi", UINT64_C(1048576) }, + { "MiB", UINT64_C(1048576) }, + { "Gi", UINT64_C(1073741824) }, + { "GiB", UINT64_C(1073741824) } + }; + + uint64_t multiplier = 0; + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { + if (strcmp(value, suffixes[i].name) == 0) { + multiplier = suffixes[i].multiplier; + break; + } + } + + if (multiplier == 0) { + message(V_ERROR, _("%s: Invalid multiplier suffix. " + "Valid suffixes:"), value); + message_fatal("`k' (10^3), `M' (10^6), `G' (10^9) " + "`Ki' (2^10), `Mi' (2^20), " + "`Gi' (2^30)"); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + goto error; + + result *= multiplier; + } + + if (result < min || result > max) + goto error; + + return result; + +error: + message_fatal(_("Value of the option `%s' must be in the range " + "[%llu, %llu]"), name, + (unsigned long long)(min), + (unsigned long long)(max)); +} + + +/* +/// \brief Simple quoting to get rid of ASCII control characters +/// +/// This is not so cool and locale-dependent, but should be good enough +/// At least we don't print any control characters on the terminal. +/// +extern char * +str_quote(const char *str) +{ + size_t dest_len = 0; + bool has_ctrl = false; + + while (str[dest_len] != '\0') + if (*(unsigned char *)(str + dest_len++) < 0x20) + has_ctrl = true; + + char *dest = malloc(dest_len + 1); + if (dest != NULL) { + if (has_ctrl) { + for (size_t i = 0; i < dest_len; ++i) + if (*(unsigned char *)(str + i) < 0x20) + dest[i] = '?'; + else + dest[i] = str[i]; + + dest[dest_len] = '\0'; + + } else { + // Usually there are no control characters, + // so we can optimize. + memcpy(dest, str, dest_len + 1); + } + } + + return dest; +} +*/ + + +extern bool +is_empty_filename(const char *filename) +{ + if (filename[0] == '\0') { + message_error(_("Empty filename, skipping")); + return true; + } + + return false; +} + + +extern bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data not read from a terminal " + "unless `--force' is used.")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data not written to a terminal " + "unless `--force' is used.")); + + return ret; +} diff --git a/src/xz/util.h b/src/xz/util.h new file mode 100644 index 00000000..dca62b26 --- /dev/null +++ b/src/xz/util.h @@ -0,0 +1,71 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.h +/// \brief Miscellaneous utility functions +// +// Copyright (C) 2007 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef UTIL_H +#define UTIL_H + +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + + +/// \brief Safe realloc() that never returns NULL +extern void *xrealloc(void *ptr, size_t size); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// +extern uint64_t str_to_uint64(const char *name, const char *value, + uint64_t min, uint64_t max); + + +/// \brief Check if filename is empty and print an error message +extern bool is_empty_filename(const char *filename); + + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); + +#endif |