aboutsummaryrefslogtreecommitdiff
path: root/src/xz
diff options
context:
space:
mode:
Diffstat (limited to 'src/xz')
-rw-r--r--src/xz/args.c6
-rw-r--r--src/xz/coder.c33
-rw-r--r--src/xz/file_io.c243
-rw-r--r--src/xz/file_io.h34
-rw-r--r--src/xz/message.c1
-rw-r--r--src/xz/xz.111
6 files changed, 272 insertions, 56 deletions
diff --git a/src/xz/args.c b/src/xz/args.c
index 75b62205..bb6e27bb 100644
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -43,6 +43,7 @@ parse_real(args_info *args, int argc, char **argv)
OPT_LZMA1,
OPT_LZMA2,
+ OPT_NO_SPARSE,
OPT_FILES,
OPT_FILES0,
OPT_INFO_MEMORY,
@@ -65,6 +66,7 @@ parse_real(args_info *args, int argc, char **argv)
{ "force", no_argument, NULL, 'f' },
{ "stdout", no_argument, NULL, 'c' },
{ "to-stdout", no_argument, NULL, 'c' },
+ { "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
{ "suffix", required_argument, NULL, 'S' },
// { "recursive", no_argument, NULL, 'r' }, // TODO
{ "files", optional_argument, NULL, OPT_FILES },
@@ -339,6 +341,10 @@ parse_real(args_info *args, int argc, char **argv)
break;
}
+ case OPT_NO_SPARSE:
+ io_no_sparse();
+ break;
+
case OPT_FILES:
args->files_delim = '\n';
diff --git a/src/xz/coder.c b/src/xz/coder.c
index 7cf6186f..d58e7e39 100644
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -33,8 +33,8 @@ static lzma_stream strm = LZMA_STREAM_INIT;
static lzma_filter filters[LZMA_FILTERS_MAX + 1];
/// Input and output buffers
-static uint8_t in_buf[IO_BUFFER_SIZE];
-static uint8_t out_buf[IO_BUFFER_SIZE];
+static io_buf in_buf;
+static io_buf out_buf;
/// Number of filters. Zero indicates that we are using a preset.
static size_t filters_count = 0;
@@ -275,7 +275,7 @@ coder_set_compression_settings(void)
static bool
is_format_xz(void)
{
- return strm.avail_in >= 6 && memcmp(in_buf, "\3757zXZ", 6) == 0;
+ return strm.avail_in >= 6 && memcmp(in_buf.u8, "\3757zXZ", 6) == 0;
}
@@ -289,7 +289,7 @@ is_format_lzma(void)
// Decode the LZMA1 properties.
lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
- if (lzma_properties_decode(&filter, NULL, in_buf, 5) != LZMA_OK)
+ if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK)
return false;
// A hack to ditch tons of false positives: We allow only dictionary
@@ -317,7 +317,7 @@ is_format_lzma(void)
// Again, if someone complains, this will be reconsidered.
uint64_t uncompressed_size = 0;
for (size_t i = 0; i < 8; ++i)
- uncompressed_size |= (uint64_t)(in_buf[5 + i]) << (i * 8);
+ uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8);
if (uncompressed_size != UINT64_MAX
&& uncompressed_size > (UINT64_C(1) << 38))
@@ -444,15 +444,16 @@ coder_normal(file_pair *pair)
// Assume that something goes wrong.
bool success = false;
- strm.next_out = out_buf;
+ strm.next_out = out_buf.u8;
strm.avail_out = IO_BUFFER_SIZE;
while (!user_abort) {
// Fill the input buffer if it is empty and we haven't reached
// end of file yet.
if (strm.avail_in == 0 && !pair->src_eof) {
- strm.next_in = in_buf;
- strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE);
+ strm.next_in = in_buf.u8;
+ strm.avail_in = io_read(
+ pair, &in_buf, IO_BUFFER_SIZE);
if (strm.avail_in == SIZE_MAX)
break;
@@ -466,11 +467,11 @@ coder_normal(file_pair *pair)
// Write out if the output buffer became full.
if (strm.avail_out == 0) {
- if (opt_mode != MODE_TEST && io_write(pair, out_buf,
+ if (opt_mode != MODE_TEST && io_write(pair, &out_buf,
IO_BUFFER_SIZE - strm.avail_out))
break;
- strm.next_out = out_buf;
+ strm.next_out = out_buf.u8;
strm.avail_out = IO_BUFFER_SIZE;
}
@@ -487,7 +488,7 @@ coder_normal(file_pair *pair)
// when trying to get at least some useful
// data out of damaged files.
if (opt_mode != MODE_TEST && io_write(pair,
- out_buf, IO_BUFFER_SIZE
+ &out_buf, IO_BUFFER_SIZE
- strm.avail_out))
break;
}
@@ -502,7 +503,7 @@ coder_normal(file_pair *pair)
// input, and thus pair->src_eof
// becomes true.
strm.avail_in = io_read(
- pair, in_buf, 1);
+ pair, &in_buf, 1);
if (strm.avail_in == SIZE_MAX)
break;
@@ -579,14 +580,14 @@ coder_passthru(file_pair *pair)
if (user_abort)
return false;
- if (io_write(pair, in_buf, strm.avail_in))
+ if (io_write(pair, &in_buf, strm.avail_in))
return false;
strm.total_in += strm.avail_in;
strm.total_out = strm.total_in;
message_progress_update();
- strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE);
+ strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
if (strm.avail_in == SIZE_MAX)
return false;
}
@@ -613,8 +614,8 @@ coder_run(const char *filename)
// Read the first chunk of input data. This is needed to detect
// the input file type (for now, only for decompression).
- strm.next_in = in_buf;
- strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE);
+ strm.next_in = in_buf.u8;
+ strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
switch (coder_init(pair)) {
case CODER_INIT_NORMAL:
diff --git a/src/xz/file_io.c b/src/xz/file_io.c
index b79d0b77..be5db73d 100644
--- a/src/xz/file_io.c
+++ b/src/xz/file_io.c
@@ -37,6 +37,17 @@ static bool warn_fchown;
#endif
+/// If true, try to create sparse files when decompressing.
+static bool try_sparse = true;
+
+/// File status flags of standard output. This is used by io_open_dest()
+/// and io_close_dest().
+static int stdout_flags = 0;
+
+
+static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
+
+
extern void
io_init(void)
{
@@ -63,6 +74,14 @@ io_init(void)
}
+extern void
+io_no_sparse(void)
+{
+ try_sparse = false;
+ return;
+}
+
+
/// \brief Unlink a file
///
/// This tries to verify that the file being unlinked really is the file that
@@ -498,42 +517,42 @@ io_open_dest(file_pair *pair)
#ifdef TUKLIB_DOSLIKE
setmode(STDOUT_FILENO, O_BINARY);
#endif
- return false;
- }
-
- pair->dest_name = suffix_get_dest_name(pair->src_name);
- if (pair->dest_name == NULL)
- return true;
+ } else {
+ pair->dest_name = suffix_get_dest_name(pair->src_name);
+ if (pair->dest_name == NULL)
+ return true;
- // If --force was used, unlink the target file first.
- if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
- message_error("%s: Cannot unlink: %s",
- pair->dest_name, strerror(errno));
- free(pair->dest_name);
- return true;
- }
+ // If --force was used, unlink the target file first.
+ if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
+ message_error("%s: Cannot unlink: %s",
+ pair->dest_name, strerror(errno));
+ free(pair->dest_name);
+ return true;
+ }
- if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
- message_error("%s: Cannot unlink: %s", pair->dest_name,
- strerror(errno));
- free(pair->dest_name);
- return true;
- }
+ if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
+ message_error("%s: Cannot unlink: %s",
+ pair->dest_name, strerror(errno));
+ free(pair->dest_name);
+ return true;
+ }
- // Open the file.
- const int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL;
- const mode_t mode = S_IRUSR | S_IWUSR;
- pair->dest_fd = open(pair->dest_name, flags, mode);
+ // Open the file.
+ const int flags = O_WRONLY | O_BINARY | O_NOCTTY
+ | O_CREAT | O_EXCL;
+ const mode_t mode = S_IRUSR | S_IWUSR;
+ pair->dest_fd = open(pair->dest_name, flags, mode);
- if (pair->dest_fd == -1) {
- // Don't bother with error message if user requested
- // us to exit anyway.
- if (!user_abort)
- message_error("%s: %s", pair->dest_name,
- strerror(errno));
+ if (pair->dest_fd == -1) {
+ // Don't bother with error message if user requested
+ // us to exit anyway.
+ if (!user_abort)
+ message_error("%s: %s", pair->dest_name,
+ strerror(errno));
- free(pair->dest_name);
- return true;
+ free(pair->dest_name);
+ return true;
+ }
}
// If this really fails... well, we have a safe fallback.
@@ -546,6 +565,65 @@ io_open_dest(file_pair *pair)
pair->dest_st.st_dev = 0;
pair->dest_st.st_ino = 0;
#endif
+#ifndef TUKLIB_DOSLIKE
+ } else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
+ // When writing to standard output, we need to be extra
+ // careful:
+ // - It may be connected to something else than
+ // a regular file.
+ // - We aren't necessarily writing to a new empty file
+ // or to the end of an existing file.
+ // - O_APPEND may be active.
+ //
+ // TODO: I'm keeping this disabled for DOS-like systems
+ // for now. FAT doesn't support sparse files, but NTFS
+ // does, so maybe this should be enabled on Windows after
+ // some testing.
+ if (pair->dest_fd == STDOUT_FILENO) {
+ if (!S_ISREG(pair->dest_st.st_mode))
+ return false;
+
+ const int flags = fcntl(STDOUT_FILENO, F_GETFL);
+ if (flags == -1)
+ return false;
+
+ if (flags & O_APPEND) {
+ // Creating a sparse file is not possible
+ // when O_APPEND is active (it's used by
+ // shell's >> redirection). As I understand
+ // it, it is safe to temporarily disable
+ // O_APPEND in xz, because if someone
+ // happened to write to the same file at the
+ // same time, results would be bad anyway
+ // (users shouldn't assume that xz uses any
+ // specific block size when writing data).
+ //
+ // The write position may be something else
+ // than the end of the file, so we must fix
+ // it to start writing at the end of the file
+ // to imitate O_APPEND.
+ if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
+ return false;
+
+ if (fcntl(STDOUT_FILENO, F_SETFL,
+ stdout_flags & ~O_APPEND))
+ return false;
+
+ // Remember the flags so that io_close_dest()
+ // can restore them.
+ stdout_flags = flags;
+
+ } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
+ != pair->dest_st.st_size) {
+ // Writing won't start exactly at the end
+ // of the file. We cannot use sparse output,
+ // because it would probably corrupt the file.
+ return false;
+ }
+ }
+
+ pair->dest_try_sparse = true;
+#endif
}
return false;
@@ -562,6 +640,21 @@ io_open_dest(file_pair *pair)
static int
io_close_dest(file_pair *pair, bool success)
{
+ // If io_open_dest() has disabled O_APPEND, restore it here.
+ if (stdout_flags != 0) {
+ assert(pair->dest_fd == STDOUT_FILENO);
+
+ const int fail = fcntl(STDOUT_FILENO, F_SETFL, stdout_flags);
+ stdout_flags = 0;
+
+ if (fail) {
+ message_error(_("Error restoring the O_APPEND flag "
+ "to standard output: %s"),
+ strerror(errno));
+ return -1;
+ }
+ }
+
if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
return 0;
@@ -603,6 +696,8 @@ io_open(const char *src_name)
.src_fd = -1,
.dest_fd = -1,
.src_eof = false,
+ .dest_try_sparse = false,
+ .dest_pending_sparse = 0,
};
// Block the signals, for which we have a custom signal handler, so
@@ -629,6 +724,29 @@ io_open(const char *src_name)
extern void
io_close(file_pair *pair, bool success)
{
+ // Take care of sparseness at the end of the output file.
+ if (success && pair->dest_try_sparse
+ && pair->dest_pending_sparse > 0) {
+ // Seek forward one byte less than the size of the pending
+ // hole, then write one zero-byte. This way the file grows
+ // to its correct size. An alternative would be to use
+ // ftruncate() but that isn't portable enough (e.g. it
+ // doesn't work with FAT on Linux; FAT isn't that important
+ // since it doesn't support sparse files anyway, but we don't
+ // want to create corrupt files on it).
+ if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
+ SEEK_CUR) == -1) {
+ message_error(_("%s: Seeking failed when trying "
+ "to create a sparse file: %s"),
+ pair->dest_name, strerror(errno));
+ success = false;
+ } else {
+ const uint8_t zero[1] = { '\0' };
+ if (io_write_buf(pair, zero, 1))
+ success = false;
+ }
+ }
+
signals_block();
if (success && pair->dest_fd != STDOUT_FILENO)
@@ -651,11 +769,12 @@ io_close(file_pair *pair, bool success)
extern size_t
-io_read(file_pair *pair, uint8_t *buf, size_t size)
+io_read(file_pair *pair, io_buf *buf_union, size_t size)
{
// We use small buffers here.
assert(size < SSIZE_MAX);
+ uint8_t *buf = buf_union->u8;
size_t left = size;
while (left > 0) {
@@ -691,8 +810,21 @@ io_read(file_pair *pair, uint8_t *buf, size_t size)
}
-extern bool
-io_write(const file_pair *pair, const uint8_t *buf, size_t size)
+static bool
+is_sparse(const io_buf *buf)
+{
+ assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
+
+ for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
+ if (buf->u64[i] != 0)
+ return false;
+
+ return true;
+}
+
+
+static bool
+io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
{
assert(size < SSIZE_MAX);
@@ -731,3 +863,46 @@ io_write(const file_pair *pair, const uint8_t *buf, size_t size)
return false;
}
+
+
+extern bool
+io_write(file_pair *pair, const io_buf *buf, size_t size)
+{
+ assert(size <= IO_BUFFER_SIZE);
+
+ if (pair->dest_try_sparse) {
+ // Check if the block is sparse (contains only zeros). If it
+ // sparse, we just store the amount and return. We will take
+ // care of actually skipping over the hole when we hit the
+ // next data block or close the file.
+ //
+ // Since io_close() requires that dest_pending_sparse > 0
+ // if the file ends with sparse block, we must also return
+ // if size == 0 to avoid doing the lseek().
+ if (size == IO_BUFFER_SIZE) {
+ if (is_sparse(buf)) {
+ pair->dest_pending_sparse += size;
+ return false;
+ }
+ } else if (size == 0) {
+ return false;
+ }
+
+ // This is not a sparse block. If we have a pending hole,
+ // skip it now.
+ if (pair->dest_pending_sparse > 0) {
+ if (lseek(pair->dest_fd, pair->dest_pending_sparse,
+ SEEK_CUR) == -1) {
+ message_error(_("%s: Seeking failed when "
+ "trying to create a sparse "
+ "file: %s"), pair->dest_name,
+ strerror(errno));
+ return true;
+ }
+
+ pair->dest_pending_sparse = 0;
+ }
+ }
+
+ return io_write_buf(pair, buf->u8, size);
+}
diff --git a/src/xz/file_io.h b/src/xz/file_io.h
index b0bbe11a..58bf7b5e 100644
--- a/src/xz/file_io.h
+++ b/src/xz/file_io.h
@@ -11,13 +11,22 @@
///////////////////////////////////////////////////////////////////////////////
// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
#if BUFSIZ <= 1024
# define IO_BUFFER_SIZE 8192
#else
-# define IO_BUFFER_SIZE BUFSIZ
+# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
#endif
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+ uint8_t u8[IO_BUFFER_SIZE];
+ uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
typedef struct {
/// Name of the source filename (as given on the command line) or
/// pointer to static "(stdin)" when reading from standard input.
@@ -33,15 +42,24 @@ typedef struct {
/// File descriptor of the target file
int dest_fd;
+ /// True once end of the source file has been detected.
+ bool src_eof;
+
+ /// If true, we look for long chunks of zeros and try to create
+ /// a sparse file.
+ bool dest_try_sparse;
+
+ /// This is used only if dest_try_sparse is true. This holds the
+ /// number of zero bytes we haven't written out, because we plan
+ /// to make that byte range a sparse chunk.
+ off_t dest_pending_sparse;
+
/// Stat of the source file.
struct stat src_st;
/// Stat of the destination file.
struct stat dest_st;
- /// True once end of the source file has been detected.
- bool src_eof;
-
} file_pair;
@@ -49,6 +67,10 @@ typedef struct {
extern void io_init(void);
+/// \brief Disable creation of sparse files when decompressing
+extern void io_no_sparse(void);
+
+
/// \brief Opens a file pair
extern file_pair *io_open(const char *src_name);
@@ -72,7 +94,7 @@ extern void io_close(file_pair *pair, bool success);
/// \return On success, number of bytes read is returned. On end of
/// file zero is returned and pair->src_eof set to true.
/// On error, SIZE_MAX is returned and error message printed.
-extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size);
+extern size_t io_read(file_pair *pair, io_buf *buf, size_t size);
/// \brief Writes a buffer to the destination file
@@ -83,4 +105,4 @@ extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size);
///
/// \return On success, zero is returned. On error, -1 is returned
/// and error message printed.
-extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size);
+extern bool io_write(file_pair *pair, const io_buf *buf, size_t size);
diff --git a/src/xz/message.c b/src/xz/message.c
index be7c3fac..4f8ca00d 100644
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -1072,6 +1072,7 @@ message_help(bool long_help)
if (long_help)
puts(_(
+" --no-sparse do not create sparse files when decompressing\n"
" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n"
" --files=[FILE] read filenames to process from FILE; if FILE is\n"
" omitted, filenames are read from the standard input;\n"
diff --git a/src/xz/xz.1 b/src/xz/xz.1
index b8115624..94aa562e 100644
--- a/src/xz/xz.1
+++ b/src/xz/xz.1
@@ -336,6 +336,17 @@ Write the compressed or decompressed data to standard output instead of
a file. This implies
.BR \-\-keep .
.TP
+.B \-\-no\-sparse
+Disable creation of sparse files. By default, if decompressing into
+a regular file,
+.B xz
+tries to make the file sparse if the decompressed data contains long
+sequences of binary zeros. It works also when writing to standard output
+as long as standard output is connected to a regular file, and certain
+additional conditions are met to make it safe. Creating sparse files may
+save disk space and speed up the decompression by reducing the amount of
+disk I/O.
+.TP
\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf
When compressing, use
.I .suf