Add initial version of xz --list.

This is a bit rough but should be useful for basic things. Ideas (with detailed examples) about the output format are welcome. The output of --robot --list is not necessarily stable yet, although I don't currently have any plans about changing it. The man page hasn't been updated yet.
author: Lasse Collin <lasse.collin@tukaani.org> 2010-01-24 23:50:54 +0200
committer: Lasse Collin <lasse.collin@tukaani.org> 2010-01-24 23:50:54 +0200
commit: 0bc9eab243dee3be764b3530433a7fcdc3f7c6a1 (patch)
tree: 694fda24e060ad34250224ab6088abc0c1e3bc95 /src/xz
parent: Add io_pread(). (diff)
download: xz-0bc9eab243dee3be764b3530433a7fcdc3f7c6a1.tar.xz
5 files changed, 668 insertions, 359 deletions
diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am
index 08ac236f..49307c09 100644
--- a/src/xz/Makefile.am
+++ b/src/xz/Makefile.am
@@ -16,6 +16,7 @@ xz_SOURCES = \
 	file_io.h \
 	hardware.c \
 	hardware.h \
+	list.c \
 	main.c \
 	main.h \
 	message.c \
diff --git a/src/xz/list.c b/src/xz/list.c
index ba298e43..bb793e02 100644
--- a/src/xz/list.c
+++ b/src/xz/list.c
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
 /// \file       list.c
-/// \brief      Listing information about .lzma files
+/// \brief      Listing information about .xz files
 //
 //  Author:     Lasse Collin
 //
@@ -11,460 +11,742 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #include "private.h"
+#include "tuklib_integer.h"
+
+
+/// Totals that are displayed if there was more than one file.
+/// The "files" counter is also used in print_info_adv() to show
+/// the file number.
+static struct {
+	uint64_t files;
+	uint64_t streams;
+	uint64_t blocks;
+	uint64_t compressed_size;
+	uint64_t uncompressed_size;
+	uint32_t checks;
+} totals = { 0, 0, 0, 0, 0, 0 };
+
+
+/// \brief      Parse the Index(es) from the given .xz file
+///
+/// \param      idx     If decoding is successful, *idx will be set to point
+///                     to lzma_index containing the decoded information.
+///                     On error, *idx is not modified.
+/// \param      pair    Input file
+///
+/// \return     On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(lzma_index **idx, file_pair *pair)
+{
+	if (pair->src_st.st_size <= 0) {
+		message_error(_("%s: File is empty"), pair->src_name);
+		return true;
+	}
 
+	if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+		message_error(_("%s: Too small to be a valid .xz file"),
+				pair->src_name);
+		return true;
+	}
 
-/*
-
-1. Check the file type: native, alone, unknown
+	io_buf buf;
+	lzma_stream_flags header_flags;
+	lzma_stream_flags footer_flags;
+	lzma_ret ret;
 
-Alone:
-1. Show info about header. Don't look for concatenated parts.
+	// lzma_stream for the Index decoder
+	lzma_stream strm = LZMA_STREAM_INIT;
 
-Native:
-1. Check that Stream Header is valid.
-2. Seek to the end of the file.
-3. Skip padding.
-4. Reverse decode Stream Footer.
-5. Seek Backward Size bytes.
-6.
+	// All Indexes decoded so far
+	lzma_index *combined_index = NULL;
 
-*/
+	// The Index currently being decoded
+	lzma_index *this_index = NULL;
 
+	// Current position in the file. We parse the file backwards so
+	// initialize it to point to the end of the file.
+	off_t pos = pair->src_st.st_size;
 
-static void
-unsupported_file(file_handle *handle)
-{
-	errmsg(V_ERROR, "%s: Unsupported file type", handle->name);
-	set_exit_status(ERROR);
-	(void)io_close(handle);
-	return;
-}
-
-
-/// Primitive escaping function, that escapes only ASCII control characters.
-static void
-print_escaped(const uint8_t *str)
-{
-	while (*str != '\0') {
-		if (*str <= 0x1F || *str == 0x7F)
-			printf("\\x%02X", *str);
-		else
-			putchar(*str);
+	// Each loop iteration decodes one Index.
+	do {
+		// Check that there is enough data left to contain at least
+		// the Stream Header and Stream Footer. This check cannot
+		// fail in the first pass of this loop.
+		if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(LZMA_DATA_ERROR));
+			goto error;
+		}
 
-		++str;
-	}
+		pos -= LZMA_STREAM_HEADER_SIZE;
+		lzma_vli stream_padding = 0;
+
+		// Locate the Stream Footer. There may be Stream Padding which
+		// we must skip when reading backwards.
+		while (true) {
+			if (pos < LZMA_STREAM_HEADER_SIZE) {
+				message_error("%s: %s", pair->src_name,
+						message_strm(
+							LZMA_DATA_ERROR));
+				goto error;
+			}
+
+			if (io_pread(pair, &buf,
+					LZMA_STREAM_HEADER_SIZE, pos))
+				goto error;
+
+			// Stream Padding is always a multiple of four bytes.
+			int i = 2;
+			if (buf.u32[i] != 0)
+				break;
+
+			// To avoid calling io_pread() for every four bytes
+			// of Stream Padding, take advantage that we read
+			// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
+			// check them too before calling io_pread() again.
+			do {
+				stream_padding += 4;
+				pos -= 4;
+				--i;
+			} while (i >= 0 && buf.u32[i] == 0);
+		}
 
-	return;
-}
+		// Decode the Stream Footer.
+		ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
+		if (ret != LZMA_OK) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(ret));
+			goto error;
+		}
 
+		// Check that the size of the Index field looks sane.
+		lzma_vli index_size = footer_flags.backward_size;
+		if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(LZMA_DATA_ERROR));
+			goto error;
+		}
 
-static void
-list_native(file_handle *handle)
-{
-	lzma_stream strm = LZMA_STREAM_INIT;
-	lzma_stream_flags flags;
-	lzma_ret ret = lzma_stream_header_decoder(&strm, &flags);
+		// Set pos to the beginning of the Index.
+		pos -= index_size;
 
-}
+		// See how much memory we can use for decoding this Index.
+		uint64_t memlimit = hardware_memlimit_get();
+		uint64_t memused = 0;
+		if (combined_index != NULL) {
+			memused = lzma_index_memused(combined_index);
+			if (memused > memlimit)
+				message_bug();
 
+			memlimit -= memused;
+		}
 
-static void
-list_alone(const listing_handle *handle)
-{
-	if (handle->buffer[0] > (4 * 5 + 4) * 9 + 8) {
-		unsupported_file(handle);
-		return;
-	}
+		// Decode the Index.
+		ret = lzma_index_decoder(&strm, &this_index, memlimit);
+		if (ret != LZMA_OK) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(ret));
+			goto error;
+		}
 
-	const unsigned int pb = handle->buffer[0] / (9 * 5);
-	handle->buffer[0] -= pb * 9 * 5;
-	const unsigned int lp = handle->buffer[0] / 9;
-	const unsigned int lc = handle->buffer[0] - lp * 9;
+		do {
+			// Don't give the decoder more input than the
+			// Index size.
+			strm.avail_in = MIN(IO_BUFFER_SIZE, index_size);
+			if (io_pread(pair, &buf, strm.avail_in, pos))
+				goto error;
+
+			pos += strm.avail_in;
+			index_size -= strm.avail_in;
+
+			strm.next_in = buf.u8;
+			ret = lzma_code(&strm, LZMA_RUN);
+
+		} while (ret == LZMA_OK);
+
+		// If the decoding seems to be successful, check also that
+		// the Index decoder consumed as much input as indicated
+		// by the Backward Size field.
+		if (ret == LZMA_STREAM_END)
+			if (index_size != 0 || strm.avail_in != 0)
+				ret = LZMA_DATA_ERROR;
+
+		if (ret != LZMA_STREAM_END) {
+			// LZMA_BUFFER_ERROR means that the Index decoder
+			// would have liked more input than what the Index
+			// size should be according to Stream Footer.
+			// The message for LZMA_DATA_ERROR makes more
+			// sense in that case.
+			if (ret == LZMA_BUF_ERROR)
+				ret = LZMA_DATA_ERROR;
+
+			message_error("%s: %s", pair->src_name,
+					message_strm(ret));
+
+			// If the error was too low memory usage limit,
+			// show also how much memory would have been needed.
+			if (ret == LZMA_MEMLIMIT_ERROR) {
+				uint64_t needed = lzma_memusage(&strm);
+				if (UINT64_MAX - needed < memused)
+					needed = UINT64_MAX;
+				else
+					needed += memused;
+
+				message_mem_needed(V_ERROR, needed);
+			}
+
+			goto error;
+		}
 
-	uint32_t dict = 0;
-	for (size_t i = 1; i < 5; ++i) {
-		dict <<= 8;
-		dict |= header[i];
-	}
+		// Decode the Stream Header and check that its Stream Flags
+		// match the Stream Footer.
+		pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
+		if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(LZMA_DATA_ERROR));
+			goto error;
+		}
 
-	if (dict > LZMA_DICTIONARY_SIZE_MAX) {
-		unsupported_file(handle);
-		return;
-	}
+		pos -= lzma_index_total_size(this_index);
+		if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos))
+			goto error;
 
-	uint64_t uncompressed_size = 0;
-	for (size_t i = 5; i < 13; ++i) {
-		uncompressed_size <<= 8;
-		uncompressed_size |= header[i];
-	}
+		ret = lzma_stream_header_decode(&header_flags, buf.u8);
+		if (ret != LZMA_OK) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(ret));
+			goto error;
+		}
 
-	// Reject files with uncompressed size of 256 GiB or more. It's
-	// an arbitrary limit trying to avoid at least some false positives.
-	if (uncompressed_size != UINT64_MAX
-			&& uncompressed_size >= (UINT64_C(1) << 38)) {
-		unsupported_file(handle);
-		return;
-	}
+		ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
+		if (ret != LZMA_OK) {
+			message_error("%s: %s", pair->src_name,
+					message_strm(ret));
+			goto error;
+		}
 
-	if (verbosity < V_WARNING) {
-		printf("name=");
-		print_escaped(handle->name);
-		printf("\nformat=alone\n");
+		// Store the decoded Stream Flags into this_index. This is
+		// needed so that we can print which Check is used in each
+		// Stream.
+		ret = lzma_index_stream_flags(this_index, &footer_flags);
+		if (ret != LZMA_OK)
+			message_bug();
 
-		if (uncompressed_size == UINT64_MAX)
-			printf("uncompressed_size=unknown\n");
-		else
-			printf("uncompressed_size=%" PRIu64 "\n",
-					uncompressed_size);
+		// Store also the size of the Stream Padding field. It is
+		// needed to show the offsets of the Streams correctly.
+		ret = lzma_index_stream_padding(this_index, stream_padding);
+		if (ret != LZMA_OK)
+			message_bug();
+
+		if (combined_index != NULL) {
+			// Append the earlier decoded Indexes
+			// after this_index.
+			ret = lzma_index_cat(
+					this_index, combined_index, NULL);
+			if (ret != LZMA_OK) {
+				message_error("%s: %s", pair->src_name,
+						message_strm(ret));
+				goto error;
+			}
+		}
 
-		printf("dict=%" PRIu32 "\n", dict);
+		combined_index = this_index;
+		this_index = NULL;
 
-		printf("lc=%u\nlp=%u\npb=%u\n\n", lc, lp, pb);
+	} while (pos > 0);
 
-	} else {
-		printf("File name:                   ");
-		print_escaped(handle->name);
-		printf("\nFile format:                 LZMA_Alone\n")
+	lzma_end(&strm);
 
-		printf("Uncompressed size:           ");
-		if (uncompressed_size == UINT64_MAX)
-			printf("unknown\n");
-		else
-			printf("%," PRIu64 " bytes (%" PRIu64 " MiB)\n",
-					uncompressed_size,
-					(uncompressed_size + 1024 * 512)
-						/ (1024 * 1024));
-
-		printf("Dictionary size:             %," PRIu32 " bytes "
-				"(%" PRIu32 " MiB)\n",
-				dict, (dict + 1024 * 512) / (1024 * 1024));
-
-		printf("Literal context bits (lc):   %u\n", lc);
-		printf("Literal position bits (lc):  %u\n", lp);
-		printf("Position bits (pb):          %u\n", pb);
-	}
+	// All OK. Make combined_index available to the caller.
+	*idx = combined_index;
+	return false;
 
-	return;
+error:
+	// Something went wrong, free the allocated memory.
+	lzma_end(&strm);
+	lzma_index_end(combined_index, NULL);
+	lzma_index_end(this_index, NULL);
+	return true;
 }
 
 
+/// \brief      Get the compression ratio
+///
+/// This has slightly different format than that is used by in message.c.
+static const char *
+get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
+{
+	if (uncompressed_size == 0)
+		return "---";
 
+	const double ratio = (double)(compressed_size)
+			/ (double)(uncompressed_size);
+	if (ratio > 9.999)
+		return "---";
 
-typedef struct {
-	const char *filename;
-	struct stat st;
-	int fd;
-
-	lzma_stream strm;
-	lzma_stream_flags stream_flags;
-	lzma_info *info;
-
-	lzma_vli backward_size;
-	lzma_vli uncompressed_size;
-
-	size_t buffer_size;
-	uint8_t buffer[IO_BUFFER_SIZE];
-} listing_handle;
+	static char buf[6];
+	snprintf(buf, sizeof(buf), "%.3f", ratio);
+	return buf;
+}
 
 
-static bool
-listing_pread(listing_handle *handle, uint64_t offset)
+static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
+	"None",
+	"CRC32",
+	"Unknown-2",
+	"Unknown-3",
+	"CRC64",
+	"Unknown-5",
+	"Unknown-6",
+	"Unknown-7",
+	"Unknown-8",
+	"Unknown-9",
+	"SHA-256",
+	"Unknown-11",
+	"Unknown-12",
+	"Unknown-13",
+	"Unknown-14",
+	"Unknown-15",
+};
+
+
+/// \brief      Get a comma-separated list of Check names
+///
+/// \param      checks  Bit mask of Checks to print
+/// \param      space_after_comma
+///                     It's better to not use spaces in table-like listings,
+///                     but in more verbose formats a space after a comma
+///                     is good for readability.
+static const char *
+get_check_names(uint32_t checks, bool space_after_comma)
 {
-	if (offset >= (uint64_t)(handle->st.st_size)) {
-		errmsg(V_ERROR, "%s: Trying to read past the end of "
-				"the file.", handle->filename);
-		return true;
-	}
+	assert(checks != 0);
 
-#ifdef HAVE_PREAD
-	const ssize_t ret = pread(handle->fd, handle->buffer, IO_BUFFER_SIZE,
-			(off_t)(offset));
-#else
-	// Use lseek() + read() since we don't have pread(). We don't care
-	// to which offset the reading position is left.
-	if (lseek(handle->fd, (off_t)(offset), SEEK_SET) == -1) {
-		errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno));
-		return true;
-	}
+	static char buf[sizeof(check_names)];
+	char *pos = buf;
+	size_t left = sizeof(buf);
 
-	const ssize_t ret = read(handle->fd, handle->buffer, IO_BUFFER_SIZE);
-#endif
-
-	if (ret == -1) {
-		errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno));
-		return true;
-	}
+	const char *sep = space_after_comma ? ", " : ",";
+	bool comma = false;
 
-	if (ret == 0) {
-		errmsg(V_ERROR, "%s: Trying to read past the end of "
-				"the file.", handle->filename);
-		return true;
+	for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
+		if (checks & (UINT32_C(1) << i)) {
+			my_snprintf(&pos, &left, "%s%s",
+					comma ? sep : "", check_names[i]);
+			comma = true;
+		}
 	}
 
-	handle->buffer_size = (size_t)(ret);
-	return false;
+	return buf;
 }
 
 
-
+/// \brief      Read the Check value from the .xz file and print it
+///
+/// Since this requires a seek, listing all Check values for all Blocks can
+/// be slow.
+///
+/// \param      pair    Input file
+/// \param      iter    Location of the Block whose Check value should
+///                     be printed.
+///
+/// \return     False on success, true on I/O error.
 static bool
-parse_stream_header(listing_handle *handle)
+print_check_value(file_pair *pair, const lzma_index_iter *iter)
 {
-	if (listing_pread(handle, 0))
-		return true;
-
-	// TODO Got enough input?
-
-	lzma_ret ret = lzma_stream_header_decoder(
-			&handle->strm, &handle->stream_flags);
-	if (ret != LZMA_OK) {
-		errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
-		return true;
+	// Don't read anything from the file if there is no integrity Check.
+	if (iter->stream.flags->check == LZMA_CHECK_NONE) {
+		printf("---");
+		return false;
 	}
 
-	handle->strm.next_in = handle->buffer;
-	handle->strm.avail_in = handle->buffer_size;
-	ret = lzma_code(&handle->strm, LZMA_RUN);
-	if (ret != LZMA_STREAM_END) {
-		assert(ret != LZMA_OK);
-		errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
+	// Locate and read the Check field.
+	const uint32_t size = lzma_check_size(iter->stream.flags->check);
+	const off_t offset = iter->block.compressed_file_offset
+			+ iter->block.total_size - size;
+	io_buf buf;
+	if (io_pread(pair, &buf, size, offset))
 		return true;
+
+	// CRC32 and CRC64 are in little endian. Guess that all the future
+	// 32-bit and 64-bit Check values are little endian too. It shouldn't
+	// be a too big problem if this guess is wrong.
+	if (size == 4) {
+		printf("%08" PRIx32, conv32le(buf.u32[0]));
+	} else if (size == 8) {
+		printf("%016" PRIx64, conv64le(buf.u64[0]));
+	} else {
+		for (size_t i = 0; i < size; ++i)
+			printf("%02x", buf.u8[i]);
 	}
 
 	return false;
 }
 
 
-static bool
-parse_stream_tail(listing_handle *handle)
+static void
+print_info_basic(const lzma_index *idx, file_pair *pair)
 {
-	uint64_t offset = (uint64_t)(handle->st.st_size);
-
-	// Skip padding
-	do {
-		if (offset == 0) {
-			errmsg(V_ERROR, "%s: %s", handle->name,
-					str_strm_error(LZMA_DATA_ERROR));
-			return true;
-		}
+	static bool headings_displayed = false;
+	if (!headings_displayed) {
+		headings_displayed = true;
+		// TRANSLATORS: These are column titles. From Strms (Streams)
+		// to Ratio, the columns are right aligned. Check and Filename
+		// are left aligned. If you need longer words, it's OK to
+		// use two lines here. Test with xz --list.
+		puts(_("Strms  Blocks   Compressed Uncompressed  Ratio  "
+				"Check   Filename"));
+	}
 
-		if (offset < IO_BUFFER_SIZE)
-			offset = 0;
-		else
-			offset -= IO_BUFFER_SIZE;
+	printf("%5s %7s  %11s  %11s  %5s  %-7s %s\n",
+			uint64_to_str(lzma_index_stream_count(idx), 0),
+			uint64_to_str(lzma_index_block_count(idx), 1),
+			uint64_to_nicestr(lzma_index_file_size(idx),
+				NICESTR_B, NICESTR_TIB, false, 2),
+			uint64_to_nicestr(lzma_index_uncompressed_size(idx),
+				NICESTR_B, NICESTR_TIB, false, 3),
+			get_ratio(lzma_index_file_size(idx),
+				lzma_index_uncompressed_size(idx)),
+			get_check_names(lzma_index_checks(idx), false),
+			pair->src_name);
 
-		if (listing_pread(handle, offset))
-			return true;
+	return;
+}
 
-		while (handle->buffer_size > 0
-				&& handle->buffer[handle->buffer_size - 1]
-					== '\0')
-			--handle->buffer_size;
 
-	} while (handle->buffer_size == 0);
+static void
+print_adv_helper(uint64_t stream_count, uint64_t block_count,
+		uint64_t compressed_size, uint64_t uncompressed_size,
+		uint32_t checks)
+{
+	printf(_("  Stream count:       %s\n"),
+			uint64_to_str(stream_count, 0));
+	printf(_("  Block count:        %s\n"),
+			uint64_to_str(block_count, 0));
+	printf(_("  Compressed size:    %s\n"),
+			uint64_to_nicestr(compressed_size,
+				NICESTR_B, NICESTR_TIB, true, 0));
+	printf(_("  Uncompressed size:  %s\n"),
+			uint64_to_nicestr(uncompressed_size,
+				NICESTR_B, NICESTR_TIB, true, 0));
+	printf(_("  Ratio:              %s\n"),
+			get_ratio(compressed_size, uncompressed_size));
+	printf(_("  Check:              %s\n"),
+			get_check_names(checks, true));
+	return;
+}
 
-	if (handle->buffer_size < LZMA_STREAM_TAIL_SIZE) {
-		// TODO
-	}
 
-	lzma_stream_flags stream_flags;
-	lzma_ret ret = lzma_stream_tail_decoder(&handle->strm, &stream_flags);
-	if (ret != LZMA_OK) {
-		errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
-		return true;
+static void
+print_info_adv(const lzma_index *idx, file_pair *pair)
+{
+	// Print an empty line between files.
+	static bool first_filename_printed = false;
+	if (!first_filename_printed)
+		first_filename_printed = true;
+	else
+		putchar('\n');
+
+	// Print the filename and overall information.
+	printf("%s (%" PRIu64 "):\n", pair->src_name, totals.files);
+	print_adv_helper(lzma_index_stream_count(idx),
+			lzma_index_block_count(idx),
+			lzma_index_file_size(idx),
+			lzma_index_uncompressed_size(idx),
+			lzma_index_checks(idx));
+
+	// TODO: The rest of this function needs some work. Currently
+	// the offsets are not printed, which could be useful even when
+	// printed in a less accurate format. On the other hand, maybe
+	// this should print the information with exact byte values,
+	// or maybe there should be at least an option to do that.
+	//
+	// We could also display some other info. E.g. it could be useful
+	// to quickly see how big is the biggest Block (uncompressed size)
+	// and if all Blocks have Compressed Size and Uncompressed Size
+	// fields present, which can be used e.g. for multithreaded
+	// decompression.
+
+	// Avoid printing Stream and Block lists when they wouldn't be useful.
+	bool show_blocks = false;
+	if (lzma_index_stream_count(idx) > 1) {
+		puts(_("  Streams:"));
+		puts(_("      Number      Blocks    Compressed   "
+				"Uncompressed   Ratio   Check"));
+
+		lzma_index_iter iter;
+		lzma_index_iter_init(&iter, idx);
+		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
+			if (iter.stream.block_count > 1)
+				show_blocks = true;
+
+			printf("    %8s  %10s   %11s    %11s   %5s   %s\n",
+				uint64_to_str(iter.stream.number, 0),
+				uint64_to_str(iter.stream.block_count, 1),
+				uint64_to_nicestr(
+					iter.stream.compressed_size,
+					NICESTR_B, NICESTR_TIB, false, 2),
+				uint64_to_nicestr(
+					iter.stream.uncompressed_size,
+					NICESTR_B, NICESTR_TIB, false, 3),
+				get_ratio(iter.stream.compressed_size,
+					iter.stream.uncompressed_size),
+				check_names[iter.stream.flags->check]);
+		}
 	}
 
-	handle->strm.next_in = handle->buffer + handle->buffer_size
-			- LZMA_STREAM_TAIL_SIZE;
-	handle->strm.avail_in = LZMA_STREAM_TAIL_SIZE;
-	handle->buffer_size -= LZMA_STREAM_TAIL_SIZE;
-	ret = lzma_code(&handle->strm, LZMA_RUN);
-	if (ret != LZMA_OK) {
-		assert(ret != LZMA_OK);
-		errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret));
-		return true;
+	if (show_blocks || lzma_index_block_count(idx)
+				> lzma_index_stream_count(idx)
+			|| message_verbosity_get() >= V_DEBUG) {
+		puts(_("  Blocks:"));
+		// FIXME: Number in Stream/file, which one is better?
+		puts(_("      Stream      Number    Compressed   "
+				"Uncompressed   Ratio   Check"));
+
+		lzma_index_iter iter;
+		lzma_index_iter_init(&iter, idx);
+		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
+			printf("    %8s  %10s   %11s    %11s   %5s   %-7s",
+				uint64_to_str(iter.stream.number, 0),
+				uint64_to_str(iter.block.number_in_stream, 1),
+				uint64_to_nicestr(iter.block.total_size,
+					NICESTR_B, NICESTR_TIB, false, 2),
+				uint64_to_nicestr(
+					iter.block.uncompressed_size,
+					NICESTR_B, NICESTR_TIB, false, 3),
+				get_ratio(iter.block.total_size,
+					iter.block.uncompressed_size),
+				check_names[iter.stream.flags->check]);
+
+			if (message_verbosity_get() >= V_DEBUG)
+				if (print_check_value(pair, &iter))
+					return;
+
+			putchar('\n');
+		}
 	}
+}
 
-	if (!lzma_stream_flags_is_equal(handle->stream_flags, stream_flags)) {
-		// TODO
-		// Possibly corrupt, possibly concatenated file.
-	}
 
-	handle->backward_size = 0;
-	ret = lzma_vli_reverse_decode(&handle->backward_size, handle->buffer,
-			&handle->buffer_size);
-	if (ret != LZMA_OK) {
-		// It may be LZMA_BUF_ERROR too, but it doesn't make sense
-		// as an error message displayed to the user.
-		errmsg(V_ERROR, "%s: %s", handle->name,
-				str_strm_error(LZMA_DATA_ERROR));
-		return true;
-	}
-
-	if (!stream_flags.is_multi) {
-		handle->uncompressed_size = 0;
-		size_t tmp = handle->buffer_size;
-		ret = lzma_vli_reverse_decode(&handle->uncompressed_size,
-				handle->buffer, &tmp);
-		if (ret != LZMA_OK)
-			handle->uncompressed_size = LZMA_VLI_UNKNOWN;
+static void
+print_info_robot(const lzma_index *idx, file_pair *pair)
+{
+	printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
+			"\t%s\t%s\t%s\n",
+			lzma_index_stream_count(idx),
+			lzma_index_block_count(idx),
+			lzma_index_file_size(idx),
+			lzma_index_uncompressed_size(idx),
+			get_ratio(lzma_index_file_size(idx),
+				lzma_index_uncompressed_size(idx)),
+			get_check_names(lzma_index_checks(idx), false),
+			pair->src_name);
+
+	if (message_verbosity_get() >= V_VERBOSE) {
+		lzma_index_iter iter;
+		lzma_index_iter_init(&iter, idx);
+
+		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM))
+			printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
+				"\t%" PRIu64 "\t%" PRIu64
+				"\t%s\t%" PRIu64 "\t%s\n",
+				iter.stream.number,
+				iter.stream.compressed_offset,
+				iter.stream.uncompressed_offset,
+				iter.stream.compressed_size,
+				iter.stream.uncompressed_size,
+				get_ratio(iter.stream.compressed_size,
+					iter.stream.uncompressed_size),
+				iter.stream.padding,
+				check_names[iter.stream.flags->check]);
+
+		lzma_index_iter_rewind(&iter);
+		while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
+			printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
+					"\t%" PRIu64 "\t%" PRIu64
+					"\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s",
+					iter.stream.number,
+					iter.block.number_in_stream,
+					iter.block.number_in_file,
+					iter.block.compressed_file_offset,
+					iter.block.uncompressed_file_offset,
+					iter.block.total_size,
+					iter.block.uncompressed_size,
+					get_ratio(iter.block.total_size,
+						iter.block.uncompressed_size),
+					check_names[iter.stream.flags->check]);
+
+			if (message_verbosity_get() >= V_DEBUG) {
+				putchar('\t');
+				if (print_check_value(pair, &iter))
+					return;
+			}
+
+			putchar('\n');
+		}
 	}
 
-	// Calculate the Header Metadata Block start offset.
-
-
-	return false;
+	return;
 }
 
 
-
 static void
-list_native(listing_handle *handle)
+update_totals(const lzma_index *idx)
 {
-	lzma_memory_limiter *limiter
-			= lzma_memory_limiter_create(opt_memory);
-	if (limiter == NULL) {
-		errmsg(V_ERROR,
-	}
-	lzma_info *info =
+	// TODO: Integer overflow checks
+	++totals.files;
+	totals.streams += lzma_index_stream_count(idx);
+	totals.blocks += lzma_index_block_count(idx);
+	totals.compressed_size += lzma_index_file_size(idx);
+	totals.uncompressed_size += lzma_index_uncompressed_size(idx);
+	totals.checks |= lzma_index_checks(idx);
+	return;
+}
 
 
-	// Parse Stream Header
-	//
-	// Single-Block Stream:
-	//  - Parse Block Header
-	//  - Parse Stream Footer
-	//  - If Backward Size doesn't match, error out
-	//
-	// Multi-Block Stream:
-	//  - Parse Header Metadata Block, if any
-	//  - Parse Footer Metadata Block
-	//  - Parse Stream Footer
-	//  - If Footer Metadata Block doesn't match the Stream, error out
+static void
+print_totals_basic(void)
+{
+	// Print a separator line.
+	char line[80];
+	memset(line, '-', sizeof(line));
+	line[sizeof(line) - 1] = '\0';
+	puts(line);
+
+	// Print the totals except the file count, which needs
+	// special handling.
+	printf("%5s %7s  %11s  %11s  %5s  %-7s ",
+			uint64_to_str(totals.streams, 0),
+			uint64_to_str(totals.blocks, 1),
+			uint64_to_nicestr(totals.compressed_size,
+				NICESTR_B, NICESTR_TIB, false, 2),
+			uint64_to_nicestr(totals.uncompressed_size,
+				NICESTR_B, NICESTR_TIB, false, 3),
+			get_ratio(totals.compressed_size,
+				totals.uncompressed_size),
+			get_check_names(totals.checks, false));
+
+	// Since we print totals only when there are at least two files,
+	// the English message will always use "%s files". But some other
+	// languages need different forms for different plurals so we
+	// have to translate this string still.
 	//
-	// In other words, we don't support concatened files.
-	if (parse_stream_header(handle))
-		return;
+	// TRANSLATORS: This simply indicates the number of files shown
+	// by --list even though the format string uses %s.
+	printf(N_("%s file", "%s files\n",
+			totals.files <= ULONG_MAX ? totals.files
+				: (totals.files % 1000000) + 1000000),
+			uint64_to_str(totals.files, 0));
 
-	if (parse_block_header(handle))
-		return;
+	return;
+}
 
-	if (handle->stream_flags.is_multi) {
-		if (handle->block_options.is_metadata) {
-			if (parse_metadata(handle)
-				return;
-		}
 
-		if (my_seek(handle,
+static void
+print_totals_adv(void)
+{
+	putchar('\n');
+	puts(_("Totals:"));
+	printf(_("  Number of files:    %s\n"),
+			uint64_to_str(totals.files, 0));
+	print_adv_helper(totals.streams, totals.blocks,
+			totals.compressed_size, totals.uncompressed_size,
+			totals.checks);
 
-	} else {
-		if (handle->block_options.is_metadata) {
-			FILE_IS_CORRUPT();
-			return;
-		}
+	return;
+}
 
-		if (parse_stream_footer(handle))
-			return;
-
-		// If Uncompressed Size isn't present in Block Header,
-		// it must be present in Stream Footer.
-		if (handle->block_options.uncompressed_size
-					== LZMA_VLI_UNKNOWN
-				&& handle->stream_flags.uncompressed_size
-					== LZMA_VLI_UNKNOWN) {
-			FILE_IS_CORRUPT();
-			return;
-		}
 
-		// Construct a single-Record Index.
-		lzma_index *index = malloc(sizeof(lzma_index));
-		if (index == NULL) {
-			out_of_memory();
-			return;
-		}
+static void
+print_totals_robot(void)
+{
+	printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
+			"\t%s\t%s\t%" PRIu64 "\n",
+			totals.streams,
+			totals.blocks,
+			totals.compressed_size,
+			totals.uncompressed_size,
+			get_ratio(totals.compressed_size,
+				totals.uncompressed_size),
+			get_check_names(totals.checks, false),
+			totals.files);
 
-		// Pohdintaa:
-		// Jos Block coder hoitaisi Uncompressed ja Backward Sizet,
-		// voisi index->total_sizeksi laittaa suoraan Backward Sizen.
-		index->total_size =
+	return;
+}
 
-		if () {
 
-		}
+extern void
+list_totals(void)
+{
+	if (opt_robot) {
+		// Always print totals in --robot mode. It can be convenient
+		// in some cases and doesn't complicate usage of the
+		// single-file case much.
+		print_totals_robot();
+
+	} else if (totals.files > 1) {
+		// For non-robot mode, totals are printed only if there
+		// is more than one file.
+		if (message_verbosity_get() <= V_WARNING)
+			print_totals_basic();
+		else
+			print_totals_adv();
 	}
 
-
-	if (handle->block_options.is_metadata) {
-		if (!handle->stream_flags.is_multi) {
-			FILE_IS_CORRUPT();
-			return;
-		}
-
-		if (parse_metadata(handle))
-			return;
-
-	}
+	return;
 }
 
 
-
 extern void
-list(const char *filename)
+list_file(const char *filename)
 {
+	if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO)
+		message_fatal(_("--list works only on .xz files "
+				"(--format=xz or --format=auto)"));
+
 	if (strcmp(filename, "-") == 0) {
-		errmsg(V_ERROR, "%s: --list does not support reading from "
-				"standard input", filename);
+		message_error(_("--list does not support reading from "
+				"standard input"));
 		return;
 	}
 
 	if (is_empty_filename(filename))
 		return;
 
-	listing_handle handle;
-	handle.filename = filename;
-
-	handle.fd = open(filename, O_RDONLY | O_NOCTTY);
-	if (handle.fd == -1) {
-		errmsg(V_ERROR, "%s: %s", filename, strerror(errno));
+	// Set opt_stdout so that io_open() won't create a new file.
+	// Disable also sparse mode so that it doesn't remove O_APPEND
+	// from stdout.
+	opt_stdout = true;
+	io_no_sparse();
+	file_pair *pair = io_open(filename);
+	if (pair == NULL)
 		return;
-	}
-
-	if (fstat(handle.fd, &handle.st)) {
-		errmsg(V_ERROR, "%s: %s", filename, strerror(errno));
-		goto out;
-	}
 
-	if (!S_ISREG(handle.st.st_mode)) {
-		errmsg(V_WARNING, _("%s: Not a regular file, skipping"),
-				filename);
-		goto out;
-	}
-
-	if (handle.st.st_size <= 0) {
-		errmsg(V_ERROR, _("%s: File is empty"), filename);
-		goto out;
-	}
-
-	if (listing_pread(&handle, 0))
-		goto out;
-
-	if (handle.buffer[0] == 0xFF) {
-		if (opt_header == HEADER_ALONE) {
-			errmsg(V_ERROR, "%s: FIXME", filename); // FIXME
-			goto out;
-		}
-
-		list_native(&handle);
-	} else {
-		if (opt_header != HEADER_AUTO && opt_header != HEADER_ALONE) {
-			errmsg(V_ERROR, "%s: FIXME", filename); // FIXME
-			goto out;
-		}
+	lzma_index *idx;
+	if (!parse_indexes(&idx, pair)) {
+		// Update the totals that are displayed after all
+		// the individual files have been listed.
+		update_totals(idx);
+
+		// We have three main modes:
+		//  - --robot, which has submodes if --verbose is specified
+		//     once or twice
+		//  - Normal --list without --verbose
+		//  - --list with one or two --verbose
+		if (opt_robot)
+			print_info_robot(idx, pair);
+		else if (message_verbosity_get() <= V_WARNING)
+			print_info_basic(idx, pair);
+		else
+			print_info_adv(idx, pair);
 
-		list_alone(&handle);
+		lzma_index_end(idx, NULL);
 	}
 
-out:
-	(void)close(fd);
+	io_close(pair, false);
 	return;
 }
diff --git a/src/xz/list.h b/src/xz/list.h
new file mode 100644
index 00000000..a4c6ec7d
--- /dev/null
+++ b/src/xz/list.h
@@ -0,0 +1,18 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       list.h
+/// \brief      List information about .xz files
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+/// \brief      List information about the given .xz file
+extern void list_file(const char *filename);
+
+
+/// \brief      Show the totals after all files have been listed
+extern void list_totals(void);
diff --git a/src/xz/main.c b/src/xz/main.c
index 7445e98a..a2681f23 100644
--- a/src/xz/main.c
+++ b/src/xz/main.c
@@ -153,10 +153,7 @@ main(int argc, char **argv)
 	args_info args;
 	args_parse(&args, argc, argv);
 
-	if (opt_mode == MODE_LIST)
-		message_fatal("--list is not implemented yet.");
-
-	if (opt_robot)
+	if (opt_mode != MODE_LIST && opt_robot)
 		message_fatal(_("Compression and decompression with --robot "
 			"are not supported yet."));
 
@@ -184,6 +181,11 @@ main(int argc, char **argv)
 	// line arguments.
 	signals_init();
 
+	// coder_run() handles compression, decopmression, and testing.
+	// list_file() is for --list.
+	void (*run)(const char *filename) = opt_mode == MODE_LIST
+			 ? &list_file : &coder_run;
+
 	// Process the files given on the command line. Note that if no names
 	// were given, parse_args() gave us a fake "-" filename.
 	for (size_t i = 0; i < args.arg_count && !user_abort; ++i) {
@@ -218,7 +220,7 @@ main(int argc, char **argv)
 		}
 
 		// Do the actual compression or uncompression.
-		coder_run(args.arg_names[i]);
+		run(args.arg_names[i]);
 	}
 
 	// If --files or --files0 was used, process the filenames from the
@@ -234,13 +236,18 @@ main(int argc, char **argv)
 
 			// read_name() doesn't return empty names.
 			assert(name[0] != '\0');
-			coder_run(name);
+			run(name);
 		}
 
 		if (args.files_name != stdin_filename)
 			(void)fclose(args.files_file);
 	}
 
+	// All files have now been handled. If in --list mode, display
+	// the totals before exiting.
+	if (opt_mode == MODE_LIST)
+		list_totals();
+
 	// If we have got a signal, raise it to kill the program instead
 	// of calling tuklib_exit().
 	signals_exit();
diff --git a/src/xz/private.h b/src/xz/private.h
index 8d9ce978..b5434357 100644
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -48,3 +48,4 @@
 #include "signals.h"
 #include "suffix.h"
 #include "util.h"
+#include "list.h"
author	Lasse Collin <lasse.collin@tukaani.org>	2010-01-24 23:50:54 +0200
committer	Lasse Collin <lasse.collin@tukaani.org>	2010-01-24 23:50:54 +0200
commit	0bc9eab243dee3be764b3530433a7fcdc3f7c6a1 (patch)
tree	694fda24e060ad34250224ab6088abc0c1e3bc95 /src/xz
parent	Add io_pread(). (diff)
download	xz-0bc9eab243dee3be764b3530433a7fcdc3f7c6a1.tar.xz