diff options
Diffstat (limited to '')
112 files changed, 3240 insertions, 2724 deletions
@@ -16,6 +16,7 @@ In alphabetical order: - Jim Meyering - Igor Pavlov - Mikko Pouru + - Bernhard Reutner-Fischer - Alexandre Sauvé - Andreas Schwab - Julian Seward diff --git a/configure.ac b/configure.ac index fbe023a3..8f6340c4 100644 --- a/configure.ac +++ b/configure.ac @@ -54,7 +54,7 @@ AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug], [Enable debugging code.]), if test "x$enable_debug" = xyes; then AC_MSG_RESULT([yes]) else - AC_DEFINE(NDEBUG, 1, [Define to disable debugging code.]) + AC_DEFINE(NDEBUG, 1, [Define to 1 to disable debugging code.]) AC_MSG_RESULT([no]) fi @@ -440,13 +440,34 @@ AC_CHECK_HEADERS([fcntl.h limits.h sys/time.h], [AC_MSG_ERROR([Required header file(s) are missing.])]) # If any of these headers are missing, things should still work correctly: -AC_CHECK_HEADERS([assert.h errno.h byteswap.h sys/param.h sys/sysctl.h], +AC_CHECK_HEADERS([sys/param.h sys/sysctl.h byteswap.h], [], [], [ #ifdef HAVE_SYS_PARAM_H # include <sys/param.h> #endif ]) +# Even if we have byteswap.h, we may lack the specific macros/functions. +if test x$ac_cv_header_byteswap_h = xyes ; then + m4_foreach([FUNC], [bswap_16,bswap_32,bswap_64], [ + AC_MSG_CHECKING([if FUNC is available]) + AC_LINK_IFELSE([AC_LANG_SOURCE([ +#include <byteswap.h> +int +main(void) +{ + FUNC[](42); + return 0; +} + ])], [ + AC_DEFINE(HAVE_[]m4_toupper(FUNC), [1], + [Define to 1 if] FUNC [is available.]) + AC_MSG_RESULT([yes]) + ], [AC_MSG_RESULT([no])]) + + ])dnl +fi + ############################################################################### # Checks for typedefs, structures, and compiler characteristics. @@ -469,9 +490,13 @@ AC_CHECK_SIZEOF([size_t]) # The command line tool can copy high resolution timestamps if such # information is availabe in struct stat. Otherwise one second accuracy -# is used. Most systems seem to have st_xtim but BSDs have st_xtimespec. -AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec, struct stat.st_mtim.tv_nsec, - struct stat.st_atimespec.tv_nsec, struct stat.st_mtimespec.tv_nsec]) +# is used. +AC_CHECK_MEMBERS([ + struct stat.st_atim.tv_nsec, + struct stat.st_atimespec.tv_nsec, + struct stat.st_atimensec, + struct stat.st_uatime, + struct stat.st_atim.st__tim.tv_nsec]) AC_SYS_LARGEFILE AC_C_BIGENDIAN @@ -484,16 +509,15 @@ AC_C_BIGENDIAN # Gnulib replacements as needed gl_GETOPT -# Functions that are not mandatory i.e. we have alternatives for them -# or we can just drop some functionality: -AC_CHECK_FUNCS([futimes futimesat]) +# Find the best function to set timestamps. +AC_CHECK_FUNCS([futimens futimes futimesat utimes utime], [break]) # Check how to find out the amount of physical memory in the system. The # lzma command line tool uses this to automatically limits its memory usage. # - sysconf() gives all the needed info on GNU+Linux and Solaris. # - BSDs use sysctl(). AC_MSG_CHECKING([how to detect the amount of physical memory]) -AC_COMPILE_IFELSE([ +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #include <unistd.h> int main() @@ -503,7 +527,7 @@ main() i = sysconf(_SC_PHYS_PAGES); return 0; } -], [ +]])], [ AC_DEFINE([HAVE_PHYSMEM_SYSCONF], 1, [Define to 1 if the amount of physical memory can be detected with sysconf(_SC_PAGESIZE) and sysconf(_SC_PHYS_PAGES).]) @@ -537,7 +561,7 @@ main() # sysconf(_SC_NPROCESSORS_ONLN) works on most systems, except that BSDs # use sysctl(). AC_MSG_CHECKING([how to detect the number of available CPU cores]) -AC_COMPILE_IFELSE([ +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #include <unistd.h> int main() @@ -546,7 +570,7 @@ main() i = sysconf(_SC_NPROCESSORS_ONLN); return 0; } -], [ +]])], [ AC_DEFINE([HAVE_NCPU_SYSCONF], 1, [Define to 1 if the number of available CPU cores can be detected with sysconf(_SC_NPROCESSORS_ONLN).]) diff --git a/debug/full_flush.c b/debug/full_flush.c index 3c914549..71106b56 100644 --- a/debug/full_flush.c +++ b/debug/full_flush.c @@ -75,17 +75,17 @@ main(int argc, char **argv) // Config lzma_options_lzma opt_lzma; - if (lzma_lzma_preset(&opt_lzma, 0)) { + if (lzma_lzma_preset(&opt_lzma, 1)) { fprintf(stderr, "preset failed\n"); exit(1); } - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; filters[0].id = LZMA_FILTER_LZMA2; filters[0].options = &opt_lzma; filters[1].id = LZMA_VLI_UNKNOWN; // Init - if (lzma_stream_encoder(&strm, filters, LZMA_CHECK_SHA256) != LZMA_OK) { + if (lzma_stream_encoder(&strm, filters, LZMA_CHECK_CRC32) != LZMA_OK) { fprintf(stderr, "init failed\n"); exit(1); } diff --git a/debug/known_sizes.c b/debug/known_sizes.c index ef7472de..75ac813c 100644 --- a/debug/known_sizes.c +++ b/debug/known_sizes.c @@ -48,7 +48,7 @@ main(void) // Filter setup lzma_options_lzma opt_lzma; - if (lzma_lzma_preset(&opt_lzma, 0)) + if (lzma_lzma_preset(&opt_lzma, 1)) return 1; lzma_filter filters[] = { diff --git a/debug/memusage.c b/debug/memusage.c index 2dbb39e0..716dc8b5 100644 --- a/debug/memusage.c +++ b/debug/memusage.c @@ -26,7 +26,7 @@ main(void) lzma_init(); lzma_options_lzma lzma = { - .dict_size = (1U << 27) + (1U << 26), + .dict_size = (1U << 30) + (1U << 29), .lc = 3, .lp = 0, .pb = 2, diff --git a/debug/sync_flush.c b/debug/sync_flush.c index a161ca31..19fbef53 100644 --- a/debug/sync_flush.c +++ b/debug/sync_flush.c @@ -87,7 +87,7 @@ main(int argc, char **argv) }; lzma_options_delta opt_delta = { - .distance = 16 + .dist = 16 }; lzma_options_subblock opt_subblock = { @@ -102,7 +102,7 @@ main(int argc, char **argv) opt_subblock.subfilter_options.id = LZMA_FILTER_DELTA; opt_subblock.subfilter_options.options = &opt_delta; - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; filters[0].id = LZMA_FILTER_LZMA2; filters[0].options = &opt_lzma; filters[1].id = LZMA_VLI_UNKNOWN; @@ -114,20 +114,20 @@ main(int argc, char **argv) } // Encoding -/* + encode(0, LZMA_SYNC_FLUSH); encode(6, LZMA_SYNC_FLUSH); encode(0, LZMA_SYNC_FLUSH); encode(7, LZMA_SYNC_FLUSH); encode(0, LZMA_SYNC_FLUSH); encode(0, LZMA_FINISH); -*/ +/* encode(53, LZMA_SYNC_FLUSH); // opt_lzma.literal_context_bits = 2; // opt_lzma.literal_pos_bits = 1; // opt_lzma.pos_bits = 0; encode(404, LZMA_FINISH); - +*/ // Clean up lzma_end(&strm); diff --git a/doc/file-format.txt b/doc/file-format.txt index b703d680..7fcaf956 100644 --- a/doc/file-format.txt +++ b/doc/file-format.txt @@ -30,12 +30,13 @@ The .xz File Format 3.1.6. Header Padding 3.1.7. CRC32 3.2. Compressed Data - 3.3. Check + 3.3. Block Padding + 3.4. Check 4. Index 4.1. Index Indicator 4.2. Number of Records 4.3. List of Records - 4.3.1. Total Size + 4.3.1. Unpadded Size 4.3.2. Uncompressed Size 4.4. Index Padding 4.5. CRC32 @@ -56,7 +57,7 @@ The .xz File Format 0. Preface This document describes the .xz file format (filename suffix - `.xz', MIME type `application/x-xz'). It is intended that this + ".xz", MIME type "application/x-xz"). It is intended that this this format replace the old .lzma format used by LZMA SDK and LZMA Utils. @@ -80,12 +81,12 @@ The .xz File Format Special thanks for helping with this document goes to Igor Pavlov. Thanks for helping with this document goes to - Mark Adler, H. Peter Anvin, and Mikko Pouru. + Mark Adler, H. Peter Anvin, Mikko Pouru, and Lars Wirzenius. 0.2. Changes - Last modified: 2008-09-24 21:05+0300 + Last modified: 2008-11-03 00:35+0200 (A changelog will be kept once the first official version is made.) @@ -93,20 +94,19 @@ The .xz File Format 1. Conventions - The keywords `must', `must not', `required', `should', - `should not', `recommended', `may', and `optional' in this + The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD", + "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in [RFC-2119]. - These words are not capitalized in this document. Indicating a warning means displaying a message, returning - appropriate exit status, or something else to let the user - know that something worth warning occurred. The operation - should still finish if a warning is indicated. + appropriate exit status, or doing something else to let the + user know that something worth warning occurred. The operation + SHOULD still finish if a warning is indicated. Indicating an error means displaying a message, returning - appropriate exit status, or something else to let the user - know that something prevented successfully finishing the - operation. The operation must be aborted once an error has + appropriate exit status, or doing something else to let the + user know that something prevented successfully finishing the + operation. The operation MUST be aborted once an error has been indicated. @@ -114,7 +114,7 @@ The .xz File Format In this document, byte is always 8 bits. - A `nul byte' has all bits unset. That is, the value of a nul + A "null byte" has all bits unset. That is, the value of a null byte is 0x00. To represent byte blocks, this document uses notation that @@ -133,8 +133,25 @@ The .xz File Format +=======+ In this document, a boxed byte or a byte sequence declared - using this notation is called `a field'. The example field - above would be called `the Foo field' or plain `Foo'. + using this notation is called "a field". The example field + above would be called "the Foo field" or plain "Foo". + + If there are many fields, they may be split to multiple lines. + This is indicated with an arrow ("--->"): + + +=====+ + | Foo | + +=====+ + + +=====+ + ---> | Bar | + +=====+ + + The above is equivalent to this: + + +=====+=====+ + | Foo | Bar | + +=====+=====+ 1.2. Multibyte Integers @@ -166,7 +183,7 @@ The .xz File Format size_t encode(uint8_t buf[static 9], uint64_t num) { - if (num >= UINT64_MAX / 2) + if (num > UINT64_MAX / 2) return 0; size_t i = 0; @@ -194,7 +211,7 @@ The .xz File Format size_t i = 0; while (buf[i++] & 0x80) { - if (i > size_max || buf[i] == 0x00) + if (i >= size_max || buf[i] == 0x00) return 0; *num |= (uint64_t)(buf[i] & 0x7F) << (i * 7); @@ -206,15 +223,22 @@ The .xz File Format 2. Overall Structure of .xz File - +========+================+========+================+ - | Stream | Stream Padding | Stream | Stream Padding | ... - +========+================+========+================+ + A standalone .xz files consist of one or more Streams which may + have Stream Padding between or after them: + + +========+================+========+================+ + | Stream | Stream Padding | Stream | Stream Padding | ... + +========+================+========+================+ + + While a typical file contains only one Stream and no Stream + Padding, a decoder handling standalone .xz files SHOULD support + files that have more than one Stream or Stream Padding. - A file contains usually only one Stream. However, it is - possible to concatenate multiple Streams together with no - additional processing. It is up to the implementation to - decide if the decoder will continue decoding from the next - Stream once the end of the first Stream has been reached. + In contrast to standalone .xz files, when the .xz file format + is used as an internal part of some other file format or + communication protocol, it usually is expected that the decoder + stops after the first Stream, and doesn't look for Stream + Padding or possibly other Streams. 2.1. Stream @@ -229,7 +253,7 @@ The .xz File Format All the above fields have a size that is a multiple of four. If Stream is used as an internal part of another file format, it - is recommended to make the Stream start at an offset that is + is RECOMMENDED to make the Stream start at an offset that is a multiple of four bytes. Stream Header, Index, and Stream Footer are always present in @@ -238,12 +262,12 @@ The .xz File Format There are zero or more Blocks. The maximum number of Blocks is limited only by the maximum size of the Index field. - Total size of a Stream must be less than 8 EiB (2^63 bytes). + Total size of a Stream MUST be less than 8 EiB (2^63 bytes). The same limit applies to the total amount of uncompressed data stored in a Stream. If an implementation supports handling .xz files with multiple - concatenated Streams, it may apply the above limits to the file + concatenated Streams, it MAY apply the above limits to the file as a whole instead of limiting per Stream basis. @@ -273,20 +297,20 @@ The .xz File Format - The sixth byte (0x00) was chosen to prevent applications from misdetecting the file as a text file. - If the Header Magic Bytes don't match, the decoder must + If the Header Magic Bytes don't match, the decoder MUST indicate an error. 2.1.1.2. Stream Flags - The first byte of Stream Flags is always a nul byte. In future + The first byte of Stream Flags is always a null byte. In future this byte may be used to indicate new Stream version or other Stream properties. The second byte of Stream Flags is a bit field: Bit(s) Mask Description - 0-3 0x0F Type of Check (see Section 3.3): + 0-3 0x0F Type of Check (see Section 3.4): ID Size Check name 0x00 0 bytes None 0x01 4 bytes CRC32 @@ -304,14 +328,14 @@ The .xz File Format 0x0D 64 bytes (Reserved) 0x0E 64 bytes (Reserved) 0x0F 64 bytes (Reserved) - 4-7 0xF0 Reserved for future use; must be zero for now. + 4-7 0xF0 Reserved for future use; MUST be zero for now. - Implementations must support at least the Check IDs 0x00 (None) - and 0x01 (CRC32). Supporting other Check IDs is optional. If - an unsupported Check is used, the decoder should indicate a - warning or error. + Implementations SHOULD support at least the Check IDs 0x00 + (None) and 0x01 (CRC32). Supporting other Check IDs is + OPTIONAL. If an unsupported Check is used, the decoder SHOULD + indicate a warning or error. - If any reserved bit is set, the decoder must indicate an error. + If any reserved bit is set, the decoder MUST indicate an error. It is possible that there is a new field present which the decoder is not aware of, and can thus parse the Stream Header incorrectly. @@ -322,7 +346,7 @@ The .xz File Format The CRC32 is calculated from the Stream Flags field. It is stored as an unsigned 32-bit little endian integer. If the calculated value does not match the stored one, the decoder - must indicate an error. + MUST indicate an error. The idea is that Stream Flags would always be two bytes, even if new features are needed. This way old decoders will be able @@ -344,7 +368,7 @@ The .xz File Format The CRC32 is calculated from the Backward Size and Stream Flags fields. It is stored as an unsigned 32-bit little endian integer. If the calculated value does not match the stored one, - the decoder must indicate an error. + the decoder MUST indicate an error. The reason to have the CRC32 field before the Backward Size and Stream Flags fields is to keep the four-byte fields aligned to @@ -359,8 +383,11 @@ The .xz File Format real_backward_size = (stored_backward_size + 1) * 4; - Using a fixed-size integer to store this value makes it - slightly simpler to parse the Stream Footer when the + If the stored value does not match the real size of the Index + field, the decoder MUST indicate an error. + + Using a fixed-size integer to store Backward Size makes + it slightly simpler to parse the Stream Footer when the application needs to parse the Stream backwards. @@ -368,16 +395,16 @@ The .xz File Format This is a copy of the Stream Flags field from the Stream Header. The information stored to Stream Flags is needed - when parsing the Stream backwards. The decoder must compare + when parsing the Stream backwards. The decoder MUST compare the Stream Flags fields in both Stream Header and Stream Footer, and indicate an error if they are not identical. 2.1.2.4. Footer Magic Bytes - As the last step of the decoding process, the decoder must + As the last step of the decoding process, the decoder MUST verify the existence of Footer Magic Bytes. If they don't - match, an error must be indicated. + match, an error MUST be indicated. Using a C array and ASCII: const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' }; @@ -396,28 +423,28 @@ The .xz File Format 2.2. Stream Padding Only the decoders that support decoding of concatenated Streams - must support Stream Padding. + MUST support Stream Padding. - Stream Padding must contain only nul bytes. Any non-nul byte - should be considered as the beginning of a new Stream. To - preserve the four-byte alignment of consecutive Streams, the - size of Stream Padding must be a multiple of four bytes. Empty - Stream Padding is allowed. + Stream Padding MUST contain only null bytes. To preserve the + four-byte alignment of consecutive Streams, the size of Stream + Padding MUST be a multiple of four bytes. Empty Stream Padding + is allowed. Note that non-empty Stream Padding is allowed at the end of the file; there doesn't need to be a new Stream after non-empty Stream Padding. This can be convenient in certain situations [GNU-tar]. - The possibility of Padding should be taken into account when - designing an application that parses the Stream backwards. + The possibility of Padding MUST be taken into account when + designing an application that parses Streams backwards, and + the application supports concatenated Streams. 3. Block - +==============+=================+=======+ - | Block Header | Compressed Data | Check | - +==============+=================+=======+ + +==============+=================+===============+=======+ + | Block Header | Compressed Data | Block Padding | Check | + +==============+=================+===============+=======+ 3.1. Block Header @@ -460,11 +487,11 @@ The .xz File Format Bit(s) Mask Description 0-1 0x03 Number of filters (1-4) - 2-5 0x3C Reserved for future use; must be zero for now. + 2-5 0x3C Reserved for future use; MUST be zero for now. 6 0x40 The Compressed Size field is present. 7 0x80 The Uncompressed Size field is present. - If any reserved bit is set, the decoder must indicate an error. + If any reserved bit is set, the decoder MUST indicate an error. It is possible that there is a new field present which the decoder is not aware of, and can thus parse the Block Header incorrectly. @@ -475,14 +502,11 @@ The .xz File Format This field is present only if the appropriate bit is set in the Block Flags field (see Section 3.1.2). - This field contains the size of the Compressed Data field as - multiple of four bytes, minimum value being four bytes: - - real_compressed_size = (stored_compressed_size + 1) * 4; - - The size is stored using the encoding described in Section 1.2. - If the Compressed Size does not match the real size of the - Compressed Data field, the decoder must indicate an error. + The Compressed Size field contains the size of the Compressed + Data field, which MUST be non-zero. Compressed Size is stored + using the encoding described in Section 1.2. If the Compressed + Size doesn't match the size of the Compressed Data field, the + decoder MUST indicate an error. 3.1.4. Uncompressed Size @@ -493,7 +517,7 @@ The .xz File Format The Uncompressed Size field contains the size of the Block after uncompressing. Uncompressed Size is stored using the encoding described in Section 1.2. If the Uncompressed Size - does not match the real uncompressed size, the decoder must + does not match the real uncompressed size, the decoder MUST indicate an error. Storing the Compressed Size and Uncompressed Size fields serves @@ -532,14 +556,14 @@ The .xz File Format Filter IDs greater than or equal to 0x4000_0000_0000_0000 (2^62) are reserved for implementation-specific internal use. - These Filter IDs must never be used in List of Filter Flags. + These Filter IDs MUST never be used in List of Filter Flags. 3.1.6. Header Padding - This field contains as many nul byte as it is needed to make + This field contains as many null byte as it is needed to make the Block Header have the size specified in Block Header Size. - If any of the bytes are not nul bytes, the decoder must + If any of the bytes are not null bytes, the decoder MUST indicate an error. It is possible that there is a new field present which the decoder is not aware of, and can thus parse the Block Header incorrectly. @@ -550,7 +574,7 @@ The .xz File Format The CRC32 is calculated over everything in the Block Header field except the CRC32 field itself. It is stored as an unsigned 32-bit little endian integer. If the calculated - value does not match the stored one, the decoder must indicate + value does not match the stored one, the decoder MUST indicate an error. By verifying the CRC32 of the Block Header before parsing the @@ -565,20 +589,23 @@ The .xz File Format filters in Section 5.3, the format of the filter-specific encoded data is out of scope of this document. - If the natural size of Compressed Data is not a multiple of - four bytes, it must be padded with 1-3 nul bytes to make it - a multiple of four bytes. +3.3. Block Padding -3.3. Check + Block Padding MUST contain 0-3 null bytes to make the size of + the Block a multiple of four bytes. This can be needed when + the size of Compressed Data is not a multiple of four. + + +3.4. Check The type and size of the Check field depends on which bits are set in the Stream Flags field (see Section 2.1.1.2). The Check, when used, is calculated from the original uncompressed data. If the calculated Check does not match the - stored one, the decoder must indicate an error. If the selected - type of Check is not supported by the decoder, it must indicate + stored one, the decoder MUST indicate an error. If the selected + type of Check is not supported by the decoder, it MUST indicate a warning or error. @@ -611,7 +638,7 @@ The .xz File Format Stream. The value is stored using the encoding described in Section 1.2. If the decoder has decoded all the Blocks of the Stream, and then notices that the Number of Records doesn't - match the real number of Blocks, the decoder must indicate an + match the real number of Blocks, the decoder MUST indicate an error. @@ -624,39 +651,49 @@ The .xz File Format | Record | Record | ... +========+========+ - Each Record contains two fields: + Each Record contains information about one Block: - +============+===================+ - | Total Size | Uncompressed Size | - +============+===================+ + +===============+===================+ + | Unpadded Size | Uncompressed Size | + +===============+===================+ If the decoder has decoded all the Blocks of the Stream, it - must verify that the contents of the Records match the real - Total Size and Uncompressed Size of the respective Blocks. + MUST verify that the contents of the Records match the real + Unpadded Size and Uncompressed Size of the respective Blocks. Implementation hint: It is possible to verify the Index with constant memory usage by calculating for example SHA256 of both the real size values and the List of Records, then comparing the check values. Implementing this using non-cryptographic - check like CRC32 should be avoided unless small code size is + check like CRC32 SHOULD be avoided unless small code size is important. - If the decoder supports random-access reading, it must verify - that Total Size and Uncompressed Size of every completely + If the decoder supports random-access reading, it MUST verify + that Unpadded Size and Uncompressed Size of every completely decoded Block match the sizes stored in the Index. If only - partial Block is decoded, the decoder must verify that the + partial Block is decoded, the decoder MUST verify that the processed sizes don't exceed the sizes stored in the Index. -4.3.1. Total Size +4.3.1. Unpadded Size - This field indicates the encoded size of the respective Block - as multiples of four bytes, minimum value being four bytes: + This field indicates the size of the Block excluding the Block + Padding field. That is, Unpadded Size is the size of the Block + Header, Compressed Data, and Check fields. Unpadded Size is + stored using the encoding described in Section 1.2. The value + MUST never be zero; with the current structure of Blocks, the + actual minimum value for Unpadded Size is five. - real_total_size = (stored_total_size + 1) * 4; + Implementation note: Because the size of the Block Padding + field is not included in Unpadded Size, calculating the total + size of a Stream or doing random-access reading requires + calculating the actual size of the Blocks by rounding Unpadded + Sizes up to the next multiple of four. - The value is stored using the encoding described in Section - 1.2. + The reason to exclude Block Padding from Unpadded Size is to + ease making a raw copy of Compressed Data without Block + Padding. This can be useful, for example, if someone wants + to convert Streams to some other file format quickly. 4.3.2. Uncompressed Size @@ -668,7 +705,7 @@ The .xz File Format 4.4. Index Padding - This field must contain 0-3 nul bytes to pad the Index to + This field MUST contain 0-3 null bytes to pad the Index to a multiple of four bytes. @@ -677,7 +714,7 @@ The .xz File Format The CRC32 is calculated over everything in the Index field except the CRC32 field itself. The CRC32 is stored as an unsigned 32-bit little endian integer. If the calculated - value does not match the stored one, the decoder must indicate + value does not match the stored one, the decoder MUST indicate an error. @@ -748,7 +785,7 @@ The .xz File Format gets very little work done. To prevent this kind of slow files, there are restrictions on - how the filters can be chained. These restrictions must be + how the filters can be chained. These restrictions MUST be taken into account when designing new filters. The maximum number of filters in the chain has been limited to @@ -756,11 +793,11 @@ The .xz File Format Of these three non-last filters, only two are allowed to change the size of the data. - The non-last filters, that change the size of the data, must + The non-last filters, that change the size of the data, MUST have a limit how much the decoder can compress the data: the - decoder should produce at least n bytes of output when the + decoder SHOULD produce at least n bytes of output when the filter is given 2n bytes of input. This limit is not - absolute, but significant deviations must be avoided. + absolute, but significant deviations MUST be avoided. The above limitations guarantee that if the last filter in the chain produces 4n bytes of output, the chain as a whole will @@ -797,7 +834,7 @@ The .xz File Format Bits Mask Description 0-5 0x3F Dictionary Size - 6-7 0xC0 Reserved for future use; must be zero for now. + 6-7 0xC0 Reserved for future use; MUST be zero for now. Dictionary Size is encoded with one-bit mantissa and five-bit exponent. The smallest dictionary size is 4 KiB and the biggest @@ -847,11 +884,6 @@ The .xz File Format Allow as a non-last filter: Yes Allow as the last filter: No - Detecting when all of the data has been decoded: - Uncompressed size: Yes - End of Payload Marker: No - End of Input: Yes - Below is the list of filters in this category. The alignment is the same for both input and output data. @@ -968,7 +1000,7 @@ The .xz File Format There are several incompatible variations to calculate CRC32 and CRC64. For simplicity and clarity, complete examples are provided to calculate the checks as they are used in this file - format. Implementations may use different code as long as it + format. Implementations MAY use different code as long as it gives identical results. The program below reads data from standard input, calculates @@ -1069,19 +1101,19 @@ The .xz File Format [RFC-1952] GZIP file format specification version 4.3 http://www.ietf.org/rfc/rfc1952.txt - - Notation of byte boxes in section `2.1. Overall conventions' + - Notation of byte boxes in section "2.1. Overall conventions" [RFC-2119] Key words for use in RFCs to Indicate Requirement Levels http://www.ietf.org/rfc/rfc2119.txt [GNU-tar] - GNU tar 1.16.1 manual + GNU tar 1.20 manual http://www.gnu.org/software/tar/manual/html_node/Blocking-Factor.html - - Node 9.4.2 `Blocking Factor', paragraph that begins - `gzip will complain about trailing garbage' + - Node 9.4.2 "Blocking Factor", paragraph that begins + "gzip will complain about trailing garbage" - Note that this URL points to the latest version of the manual, and may some day not contain the note which is in - 1.16.1. For the exact version of the manual, download GNU - tar 1.16.1: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.16.1.tar.gz + 1.20. For the exact version of the manual, download GNU + tar 1.20: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.20.tar.gz diff --git a/lib/Makefile.am b/lib/Makefile.am index 0ce2a676..34b6bdd9 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,6 +1,5 @@ ## ## Copyright (C) 2004-2007 Free Software Foundation, Inc. -## Copyright (C) 2007 Lasse Collin ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -13,7 +12,8 @@ ## GNU General Public License for more details. ## -## Not using gnulib-tool, at least for now. Less mess this way. +## Not using gnulib-tool, at least for now. It is likely that we won't +## need anything else from Gnulib than getopt_long(). noinst_LIBRARIES = libgnu.a @@ -21,12 +21,12 @@ libgnu_a_SOURCES = libgnu_a_DEPENDENCIES = $(LIBOBJS) libgnu_a_LIBADD = $(LIBOBJS) -EXTRA_DIST = gettext.h getopt_.h getopt.c getopt1.c getopt_int.h +EXTRA_DIST = getopt.in.h getopt.c getopt1.c getopt_int.h BUILT_SOURCES = $(GETOPT_H) MOSTLYCLEANFILES = getopt.h getopt.h-t -getopt.h: getopt_.h +getopt.h: getopt.in.h { echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \ - cat $(srcdir)/getopt_.h; \ + cat $(srcdir)/getopt.in.h; \ } > $@-t mv -f $@-t $@ diff --git a/lib/getopt.c b/lib/getopt.c index 3580ad82..1d14b245 100644 --- a/lib/getopt.c +++ b/lib/getopt.c @@ -7,16 +7,16 @@ This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + GNU Lesser General Public License for more details. - You should have received a copy of the GNU General Public License along + You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ @@ -35,12 +35,18 @@ # include <unixlib.h> #endif +/* Completely disable NLS for getopt. We won't include translations for it + anyway. If the system lacks getopt_long, missing translations probably + aren't a problem. */ +/* #ifdef _LIBC # include <libintl.h> #else # include "gettext.h" # define _(msgid) gettext (msgid) #endif +*/ +#define _(msgid) (msgid) #if defined _LIBC && defined USE_IN_LIBIO # include <wchar.h> diff --git a/lib/getopt_.h b/lib/getopt.in.h index 615ef9a3..ea77e3d7 100644 --- a/lib/getopt_.h +++ b/lib/getopt.in.h @@ -4,16 +4,16 @@ This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + GNU Lesser General Public License for more details. - You should have received a copy of the GNU General Public License along + You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ diff --git a/lib/getopt1.c b/lib/getopt1.c index cc0746ea..da5d5330 100644 --- a/lib/getopt1.c +++ b/lib/getopt1.c @@ -4,16 +4,16 @@ This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + GNU Lesser General Public License for more details. - You should have received a copy of the GNU General Public License along + You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ diff --git a/lib/gettext.h b/lib/gettext.h deleted file mode 100644 index b6282e54..00000000 --- a/lib/gettext.h +++ /dev/null @@ -1,240 +0,0 @@ -/* Convenience header for conditional use of GNU <libintl.h>. - Copyright (C) 1995-1998, 2000-2002, 2004-2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU Library General Public License as published - by the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - USA. */ - -#ifndef _LIBGETTEXT_H -#define _LIBGETTEXT_H 1 - -/* NLS can be disabled through the configure --disable-nls option. - * - * Extra hack in LZMA Utils: if DISABLE_NLS is defined, NLS is disabled - * even if ENABLE_NLS is true. See Makefile.am for more information. - */ -#if ENABLE_NLS && !defined(DISABLE_NLS) - -/* Get declarations of GNU message catalog functions. */ -# include <libintl.h> - -/* You can set the DEFAULT_TEXT_DOMAIN macro to specify the domain used by - the gettext() and ngettext() macros. This is an alternative to calling - textdomain(), and is useful for libraries. */ -# ifdef DEFAULT_TEXT_DOMAIN -# undef gettext -# define gettext(Msgid) \ - dgettext (DEFAULT_TEXT_DOMAIN, Msgid) -# undef ngettext -# define ngettext(Msgid1, Msgid2, N) \ - dngettext (DEFAULT_TEXT_DOMAIN, Msgid1, Msgid2, N) -# endif - -#else - -/* Solaris /usr/include/locale.h includes /usr/include/libintl.h, which - chokes if dcgettext is defined as a macro. So include it now, to make - later inclusions of <locale.h> a NOP. We don't include <libintl.h> - as well because people using "gettext.h" will not include <libintl.h>, - and also including <libintl.h> would fail on SunOS 4, whereas <locale.h> - is OK. */ -#if defined(__sun) -# include <locale.h> -#endif - -/* Many header files from the libstdc++ coming with g++ 3.3 or newer include - <libintl.h>, which chokes if dcgettext is defined as a macro. So include - it now, to make later inclusions of <libintl.h> a NOP. */ -#if defined(__cplusplus) && defined(__GNUG__) && (__GNUC__ >= 3) -# include <cstdlib> -# if (__GLIBC__ >= 2) || _GLIBCXX_HAVE_LIBINTL_H -# include <libintl.h> -# endif -#endif - -/* Disabled NLS. - The casts to 'const char *' serve the purpose of producing warnings - for invalid uses of the value returned from these functions. - On pre-ANSI systems without 'const', the config.h file is supposed to - contain "#define const". */ -# define gettext(Msgid) ((const char *) (Msgid)) -# define dgettext(Domainname, Msgid) ((const char *) (Msgid)) -# define dcgettext(Domainname, Msgid, Category) ((const char *) (Msgid)) -# define ngettext(Msgid1, Msgid2, N) \ - ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2)) -# define dngettext(Domainname, Msgid1, Msgid2, N) \ - ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2)) -# define dcngettext(Domainname, Msgid1, Msgid2, N, Category) \ - ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2)) -# define textdomain(Domainname) ((const char *) (Domainname)) -# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname)) -# define bind_textdomain_codeset(Domainname, Codeset) ((const char *) (Codeset)) - -#endif - -/* A pseudo function call that serves as a marker for the automated - extraction of messages, but does not call gettext(). The run-time - translation is done at a different place in the code. - The argument, String, should be a literal string. Concatenated strings - and other string expressions won't work. - The macro's expansion is not parenthesized, so that it is suitable as - initializer for static 'char[]' or 'const char[]' variables. */ -#define gettext_noop(String) String - -/* The separator between msgctxt and msgid in a .mo file. */ -#define GETTEXT_CONTEXT_GLUE "\004" - -/* Pseudo function calls, taking a MSGCTXT and a MSGID instead of just a - MSGID. MSGCTXT and MSGID must be string literals. MSGCTXT should be - short and rarely need to change. - The letter 'p' stands for 'particular' or 'special'. */ -#ifdef DEFAULT_TEXT_DOMAIN -# define pgettext(Msgctxt, Msgid) \ - pgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) -#else -# define pgettext(Msgctxt, Msgid) \ - pgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) -#endif -#define dpgettext(Domainname, Msgctxt, Msgid) \ - pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES) -#define dcpgettext(Domainname, Msgctxt, Msgid, Category) \ - pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, Category) -#ifdef DEFAULT_TEXT_DOMAIN -# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \ - npgettext_aux (DEFAULT_TEXT_DOMAIN, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) -#else -# define npgettext(Msgctxt, Msgid, MsgidPlural, N) \ - npgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) -#endif -#define dnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N) \ - npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES) -#define dcnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N, Category) \ - npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, Category) - -static inline const char * -pgettext_aux (const char *domain, - const char *msg_ctxt_id, const char *msgid, - int category) -{ - const char *translation = dcgettext (domain, msg_ctxt_id, category); - if (translation == msg_ctxt_id) - return msgid; - else - return translation; -} - -static inline const char * -npgettext_aux (const char *domain, - const char *msg_ctxt_id, const char *msgid, - const char *msgid_plural, unsigned long int n, - int category) -{ - const char *translation = - dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); - if (translation == msg_ctxt_id || translation == msgid_plural) - return (n == 1 ? msgid : msgid_plural); - else - return translation; -} - -/* The same thing extended for non-constant arguments. Here MSGCTXT and MSGID - can be arbitrary expressions. But for string literals these macros are - less efficient than those above. */ - -#include <string.h> - -#define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS 1 - -#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS -#include <stdlib.h> -#endif - -#define pgettext_expr(Msgctxt, Msgid) \ - dcpgettext_expr (NULL, Msgctxt, Msgid, LC_MESSAGES) -#define dpgettext_expr(Domainname, Msgctxt, Msgid) \ - dcpgettext_expr (Domainname, Msgctxt, Msgid, LC_MESSAGES) - -static inline const char * -dcpgettext_expr (const char *domain, - const char *msgctxt, const char *msgid, - int category) -{ - size_t msgctxt_len = strlen (msgctxt) + 1; - size_t msgid_len = strlen (msgid) + 1; - const char *translation; -#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - char msg_ctxt_id[msgctxt_len + msgid_len]; -#else - char buf[1024]; - char *msg_ctxt_id = - (msgctxt_len + msgid_len <= sizeof (buf) - ? buf - : (char *) malloc (msgctxt_len + msgid_len)); - if (msg_ctxt_id != NULL) -#endif - { - memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); - msg_ctxt_id[msgctxt_len - 1] = '\004'; - memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); - translation = dcgettext (domain, msg_ctxt_id, category); -#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - if (msg_ctxt_id != buf) - free (msg_ctxt_id); -#endif - if (translation != msg_ctxt_id) - return translation; - } - return msgid; -} - -#define npgettext_expr(Msgctxt, Msgid, MsgidPlural, N) \ - dcnpgettext_expr (NULL, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES) -#define dnpgettext_expr(Domainname, Msgctxt, Msgid, MsgidPlural, N) \ - dcnpgettext_expr (Domainname, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES) - -static inline const char * -dcnpgettext_expr (const char *domain, - const char *msgctxt, const char *msgid, - const char *msgid_plural, unsigned long int n, - int category) -{ - size_t msgctxt_len = strlen (msgctxt) + 1; - size_t msgid_len = strlen (msgid) + 1; - const char *translation; -#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - char msg_ctxt_id[msgctxt_len + msgid_len]; -#else - char buf[1024]; - char *msg_ctxt_id = - (msgctxt_len + msgid_len <= sizeof (buf) - ? buf - : (char *) malloc (msgctxt_len + msgid_len)); - if (msg_ctxt_id != NULL) -#endif - { - memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1); - msg_ctxt_id[msgctxt_len - 1] = '\004'; - memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len); - translation = dcngettext (domain, msg_ctxt_id, msgid_plural, n, category); -#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS - if (msg_ctxt_id != buf) - free (msg_ctxt_id); -#endif - if (!(translation == msg_ctxt_id || translation == msgid_plural)) - return translation; - } - return (n == 1 ? msgid : msgid_plural); -} - -#endif /* _LIBGETTEXT_H */ diff --git a/m4/getopt.m4 b/m4/getopt.m4 index c0a73b2c..ffba95e2 100644 --- a/m4/getopt.m4 +++ b/m4/getopt.m4 @@ -1,5 +1,5 @@ -# getopt.m4 serial 13 -dnl Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# getopt.m4 serial 14 (modified version) +dnl Copyright (C) 2002-2006, 2008 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. @@ -13,7 +13,6 @@ AC_DEFUN([gl_GETOPT_SUBSTITUTE], AC_LIBOBJ([getopt]) AC_LIBOBJ([getopt1]) gl_GETOPT_SUBSTITUTE_HEADER - gl_PREREQ_GETOPT ]) AC_DEFUN([gl_GETOPT_SUBSTITUTE_HEADER], @@ -31,41 +30,32 @@ AC_DEFUN([gl_GETOPT_CHECK_HEADERS], AC_CHECK_HEADERS([getopt.h], [], [GETOPT_H=getopt.h]) fi + dnl BSD getopt_long uses a way to reset option processing, that is different + dnl from GNU and Solaris (which copied the GNU behavior). We support both + dnl GNU and BSD style resetting of getopt_long(), so there's no need to use + dnl GNU getopt_long() on BSD due to different resetting style. + dnl + dnl With getopt_long(), some BSD versions have a bug in handling optional + dnl arguments. This bug appears only if the environment variable + dnl POSIXLY_CORRECT has been set, so it shouldn't be too bad in most + dnl cases; probably most don't have that variable set. But if we actually + dnl hit this bug, it is a real problem due to our heavy use of optional + dnl arguments. + dnl + dnl According to CVS logs, the bug was introduced in OpenBSD in 2003-09-22 + dnl and copied to FreeBSD in 2004-02-24. It was fixed in both in 2006-09-22, + dnl so the affected versions shouldn't be popular anymore anyway. NetBSD + dnl never had this bug. TODO: What about Darwin and others? if test -z "$GETOPT_H"; then - AC_CHECK_FUNCS([getopt_long_only], [], [GETOPT_H=getopt.h]) - fi - - dnl BSD getopt_long uses an incompatible method to reset option processing, - dnl and (as of 2004-10-15) mishandles optional option-arguments. - if test -z "$GETOPT_H"; then - AC_CHECK_DECL([optreset], [GETOPT_H=getopt.h], [], [#include <getopt.h>]) + AC_CHECK_DECL([optreset], + [AC_DEFINE([HAVE_OPTRESET], 1, + [Define to 1 if getopt.h declares extern int optreset.])], + [], [#include <getopt.h>]) fi dnl Solaris 10 getopt doesn't handle `+' as a leading character in an - dnl option string (as of 2005-05-05). - if test -z "$GETOPT_H"; then - AC_CACHE_CHECK([for working GNU getopt function], [gl_cv_func_gnu_getopt], - [AC_RUN_IFELSE( - [AC_LANG_PROGRAM([#include <getopt.h>], - [[ - char *myargv[3]; - myargv[0] = "conftest"; - myargv[1] = "-+"; - myargv[2] = 0; - return getopt (2, myargv, "+a") != '?'; - ]])], - [gl_cv_func_gnu_getopt=yes], - [gl_cv_func_gnu_getopt=no], - [dnl cross compiling - pessimistically guess based on decls - dnl Solaris 10 getopt doesn't handle `+' as a leading character in an - dnl option string (as of 2005-05-05). - AC_CHECK_DECL([getopt_clip], - [gl_cv_func_gnu_getopt=no], [gl_cv_func_gnu_getopt=yes], - [#include <getopt.h>])])]) - if test "$gl_cv_func_gnu_getopt" = "no"; then - GETOPT_H=getopt.h - fi - fi + dnl option string (as of 2005-05-05). We don't use that feature, so this + dnl is not a problem for us. Thus, the respective test was removed here. ]) AC_DEFUN([gl_GETOPT_IFELSE], @@ -75,9 +65,3 @@ AC_DEFUN([gl_GETOPT_IFELSE], ]) AC_DEFUN([gl_GETOPT], [gl_GETOPT_IFELSE([gl_GETOPT_SUBSTITUTE])]) - -# Prerequisites of lib/getopt*. -AC_DEFUN([gl_PREREQ_GETOPT], -[ - AC_CHECK_DECLS_ONCE([getenv]) -]) diff --git a/src/common/bswap.h b/src/common/bswap.h index 8f82a8f4..f5cb8345 100644 --- a/src/common/bswap.h +++ b/src/common/bswap.h @@ -16,20 +16,29 @@ // NOTE: We assume that config.h is already #included. -// byteswap.h is a GNU extension. It contains inline assembly versions -// for byteswapping. When byteswap.h is not available, we use generic code. +// At least glibc has byteswap.h which contains inline assembly code for +// byteswapping. Some systems have byteswap.h but lack one or more of the +// bswap_xx macros/functions, which is why we check them separately even +// if byteswap.h is available. + #ifdef HAVE_BYTESWAP_H # include <byteswap.h> -#else +#endif + +#ifndef HAVE_BSWAP_16 # define bswap_16(num) \ (((num) << 8) | ((num) >> 8)) +#endif +#ifndef HAVE_BSWAP_32 # define bswap_32(num) \ ( (((num) << 24) ) \ | (((num) << 8) & UINT32_C(0x00FF0000)) \ | (((num) >> 8) & UINT32_C(0x0000FF00)) \ | (((num) >> 24) ) ) +#endif +#ifndef HAVE_BSWAP_64 # define bswap_64(num) \ ( (((num) << 56) ) \ | (((num) << 40) & UINT64_C(0x00FF000000000000)) \ diff --git a/src/common/physmem.h b/src/common/physmem.h index 597227ac..04a7ab4b 100644 --- a/src/common/physmem.h +++ b/src/common/physmem.h @@ -23,6 +23,10 @@ # endif #endif +#if defined(HAVE_PHYSMEM_SYSCONF) || defined(HAVE_NCPU_SYSCONF) +# include <unistd.h> +#endif + /// \brief Get the amount of physical memory in bytes /// diff --git a/src/common/sysdefs.h b/src/common/sysdefs.h index 7f935f67..47a49fde 100644 --- a/src/common/sysdefs.h +++ b/src/common/sysdefs.h @@ -111,6 +111,7 @@ #endif #include <stdlib.h> +#include <assert.h> // Pre-C99 systems lack stdbool.h. All the code in LZMA Utils must be written // so that it works with fake bool type, for example: @@ -134,17 +135,6 @@ typedef unsigned char _Bool; # define __bool_true_false_are_defined 1 #endif -#ifdef HAVE_ASSERT_H -# include <assert.h> -#else -# ifdef NDEBUG -# define assert(x) -# else - // TODO: Pretty bad assert macro. -# define assert(x) (!(x) && abort()) -# endif -#endif - // string.h should be enough but let's include strings.h and memory.h too if // they exists, since that shouldn't do any harm, but may improve portability. #ifdef HAVE_STRING_H diff --git a/src/liblzma/api/lzma/block.h b/src/liblzma/api/lzma/block.h index eb3768e2..06c1633c 100644 --- a/src/liblzma/api/lzma/block.h +++ b/src/liblzma/api/lzma/block.h @@ -1,6 +1,6 @@ /** * \file lzma/block.h - * \brief .lzma Block handling + * \brief .xz Block handling * * \author Copyright (C) 1999-2006 Igor Pavlov * \author Copyright (C) 2007 Lasse Collin @@ -131,11 +131,10 @@ typedef struct { * * \note Because of the array is terminated with * .id = LZMA_VLI_UNKNOWN, the actual array must - * have LZMA_BLOCK_FILTERS_MAX + 1 members or the Block + * have LZMA_FILTERS_MAX + 1 members or the Block * Header decoder will overflow the buffer. */ lzma_filter *filters; -# define LZMA_BLOCK_FILTERS_MAX 4 } lzma_block; @@ -148,6 +147,8 @@ typedef struct { * The size can be calculated from the first byte of a Block using this macro. * Note that if the first byte is 0x00, it indicates beginning of Index; use * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. */ #define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) @@ -211,38 +212,50 @@ extern lzma_ret lzma_block_header_decode(lzma_block *options, /** - * \brief Sets Compressed Size according to Total Size + * \brief Sets Compressed Size according to Unpadded Size * - * Block Header stores Compressed Size, but Index has Total Size. If the + * Block Header stores Compressed Size, but Index has Unpadded Size. If the * application has already parsed the Index and is now decoding Blocks, - * it can calculate Compressed Size from Total Size. This function does + * it can calculate Compressed Size from Unpadded Size. This function does * exactly that with error checking, so application doesn't need to check, * for example, if the value in Index is too small to contain even the - * Block Header. Note that you need to call this function after decoding + * Block Header. Note that you need to call this function _after_ decoding * the Block Header field. * * \return - LZMA_OK: options->compressed_size was set successfully. - * - LZMA_DATA_ERROR: total_size is too small compared to + * - LZMA_DATA_ERROR: unpadded_size is too small compared to * options->header_size and lzma_check_sizes[options->check]. * - LZMA_PROG_ERROR: Some values are invalid. For example, - * total_size and options->header_size must be multiples - * of four, total_size must be at least 12, and + * options->header_size must be a multiple of four, and * options->header_size between 8 and 1024 inclusive. */ -extern lzma_ret lzma_block_total_size_set( - lzma_block *options, lzma_vli total_size) +extern lzma_ret lzma_block_compressed_size( + lzma_block *options, lzma_vli unpadded_size) lzma_attr_warn_unused_result; /** - * \brief Calculates Total Size + * \brief Calculates Unpadded Size * - * This function can be useful after decoding a Block to get Total Size + * This function can be useful after decoding a Block to get Unpadded Size * that is stored in Index. * - * \return Total Size on success, or zero on error. + * \return Unpadded Size on success, or zero on error. + */ +extern lzma_vli lzma_block_unpadded_size(const lzma_block *options) + lzma_attr_pure; + + +/** + * \brief Calculates the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. */ -extern lzma_vli lzma_block_total_size_get(const lzma_block *options) +extern lzma_vli lzma_block_total_size(const lzma_block *options) lzma_attr_pure; @@ -255,8 +268,6 @@ extern lzma_vli lzma_block_total_size_get(const lzma_block *options) * \return - LZMA_OK: All good, continue with lzma_code(). * - LZMA_MEM_ERROR * - LZMA_OPTIONS_ERROR - * - LZMA_DATA_ERROR: Limits (total_limit and uncompressed_limit) - * have been reached already. * - LZMA_UNSUPPORTED_CHECK: options->check specfies a Check * that is not supported by this buid of liblzma. Initializing * the encoder failed. diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h index 53e5737e..b4fb02a7 100644 --- a/src/liblzma/api/lzma/filter.h +++ b/src/liblzma/api/lzma/filter.h @@ -55,6 +55,14 @@ typedef struct { /** + * \brief Maximum number of filters in a chain + * + * FIXME desc + */ +#define LZMA_FILTERS_MAX 4 + + +/** * \brief Test if the given Filter ID is supported for encoding * * Returns true if the give Filter ID is supported for encoding by this diff --git a/src/liblzma/api/lzma/index.h b/src/liblzma/api/lzma/index.h index 522969d4..d6072614 100644 --- a/src/liblzma/api/lzma/index.h +++ b/src/liblzma/api/lzma/index.h @@ -32,12 +32,24 @@ typedef struct lzma_index_s lzma_index; */ typedef struct { /** - * Total Size of a Block. + * \brief Total encoded size of a Block including Block Padding + * + * This value is useful if you need to know the actual size of the + * Block that the Block decoder will read. */ lzma_vli total_size; /** - * Uncompressed Size of a Block + * \brief Encoded size of a Block excluding Block Padding + * + * This value is stored in the Index. When doing random-access + * reading, you should give this value to the Block decoder along + * with uncompressed_size. + */ + lzma_vli unpadded_size; + + /** + * \brief Uncompressed Size of a Block */ lzma_vli uncompressed_size; @@ -80,7 +92,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); * \brief Add a new Record to an Index * * \param index Pointer to a lzma_index structure - * \param total_size Total Size of a Block + * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block, or * LZMA_VLI_UNKNOWN to indicate padding. * @@ -92,7 +104,7 @@ extern void lzma_index_end(lzma_index *i, lzma_allocator *allocator); * - LZMA_PROG_ERROR */ extern lzma_ret lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) + lzma_vli unpadded_size, lzma_vli uncompressed_size) lzma_attr_warn_unused_result; diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h index 58fc8061..001e6b5c 100644 --- a/src/liblzma/api/lzma/index_hash.h +++ b/src/liblzma/api/lzma/index_hash.h @@ -57,7 +57,7 @@ extern void lzma_index_hash_end( * \brief Add a new Record to an Index hash * * \param index Pointer to a lzma_index_hash structure - * \param total_size Total Size of a Block + * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block * * \return - LZMA_OK @@ -67,7 +67,7 @@ extern void lzma_index_hash_end( * used when lzma_index_hash_decode() has already been used. */ extern lzma_ret lzma_index_hash_append(lzma_index_hash *index_hash, - lzma_vli total_size, lzma_vli uncompressed_size) + lzma_vli unpadded_size, lzma_vli uncompressed_size) lzma_attr_warn_unused_result; diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c index f9101c7d..2bfe0b92 100644 --- a/src/liblzma/common/block_decoder.c +++ b/src/liblzma/common/block_decoder.c @@ -33,13 +33,13 @@ struct lzma_coder_s { lzma_next_coder next; /// Decoding options; we also write Compressed Size and Uncompressed - /// Size back to this structure when the encoding has been finished. + /// Size back to this structure when the decoding has been finished. lzma_block *options; - /// Compressed Size calculated while encoding + /// Compressed Size calculated while decoding lzma_vli compressed_size; - /// Uncompressed Size calculated while encoding + /// Uncompressed Size calculated while decoding lzma_vli uncompressed_size; /// Maximum allowed Compressed Size; this takes into account the @@ -110,6 +110,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, if (ret != LZMA_STREAM_END) return ret; + // Compressed and Uncompressed Sizes are now at their final + // values. Verify that they match the values given to us. + if (!is_size_valid(coder->compressed_size, + coder->options->compressed_size) + || !is_size_valid(coder->uncompressed_size, + coder->options->uncompressed_size)) + return LZMA_DATA_ERROR; + + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + coder->sequence = SEQ_PADDING; } @@ -118,30 +131,19 @@ block_decode(lzma_coder *coder, lzma_allocator *allocator, case SEQ_PADDING: // Compressed Data is padded to a multiple of four bytes. while (coder->compressed_size & 3) { + // We use compressed_size here just get the Padding + // right. The actual Compressed Size was stored to + // coder->options already, and won't be modified by + // us anymore. + ++coder->compressed_size; + if (*in_pos >= in_size) return LZMA_OK; if (in[(*in_pos)++] != 0x00) return LZMA_DATA_ERROR; - - if (update_size(&coder->compressed_size, 1, - coder->compressed_limit)) - return LZMA_DATA_ERROR; } - // Compressed and Uncompressed Sizes are now at their final - // values. Verify that they match the values given to us. - if (!is_size_valid(coder->compressed_size, - coder->options->compressed_size) - || !is_size_valid(coder->uncompressed_size, - coder->options->uncompressed_size)) - return LZMA_DATA_ERROR; - - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->options->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; @@ -193,14 +195,11 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, { lzma_next_coder_init(lzma_block_decoder_init, next, allocator); - // While lzma_block_total_size_get() is meant to calculate the Total - // Size, it also validates the options excluding the filters. - if (lzma_block_total_size_get(options) == 0) - return LZMA_PROG_ERROR; - - // options->check is used for array indexing so we need to know that - // it is in the valid range. - if ((unsigned)(options->check) > LZMA_CHECK_ID_MAX) + // Validate the options. lzma_block_unpadded_size() does that for us + // except for Uncompressed Size and filters. Filters are validated + // by the raw decoder. + if (lzma_block_unpadded_size(options) == 0 + || !lzma_vli_is_valid(options->uncompressed_size)) return LZMA_PROG_ERROR; // Allocate and initialize *next->coder if needed. @@ -221,8 +220,8 @@ lzma_block_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->uncompressed_size = 0; // If Compressed Size is not known, we calculate the maximum allowed - // value so that Total Size of the Block still is a valid VLI and - // a multiple of four. + // value so that encoded size of the Block (including Block Padding) + // is still a valid VLI and a multiple of four. next->coder->compressed_limit = options->compressed_size == LZMA_VLI_UNKNOWN ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 3c678f7d..6468cb44 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -27,8 +27,8 @@ /// take into account the headers etc. to determine the exact maximum size /// of the Compressed Data field, but the complexity would give us nothing /// useful. Instead, limit the size of Compressed Data so that even with -/// biggest possible Block Header and Check fields the total size of the -/// Block stays as valid VLI. This way we don't produce incorrect output +/// biggest possible Block Header and Check fields the total encoded size of +/// the Block stays as valid VLI. This way we don't produce incorrect output /// if someone will really try creating a Block of 8 EiB. /// /// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of @@ -41,9 +41,9 @@ struct lzma_coder_s { /// The filters in the chain; initialized with lzma_raw_decoder_init(). lzma_next_coder next; - /// Encoding options; we also write Total Size, Compressed Size, and - /// Uncompressed Size back to this structure when the encoding has - /// been finished. + /// Encoding options; we also write Unpadded Size, Compressed Size, + /// and Uncompressed Size back to this structure when the encoding + /// has been finished. lzma_block *options; enum { @@ -58,8 +58,8 @@ struct lzma_coder_s { /// Uncompressed Size calculated while encoding lzma_vli uncompressed_size; - /// Position when writing out the Check field - size_t check_pos; + /// Position in Block Padding and the Check fields + size_t pos; /// Check of the uncompressed data lzma_check_state check; @@ -106,6 +106,11 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, assert(*in_pos == in_size); assert(action == LZMA_FINISH); + // Copy the values into coder->options. The caller + // may use this information to construct Index. + coder->options->compressed_size = coder->compressed_size; + coder->options->uncompressed_size = coder->uncompressed_size; + coder->sequence = SEQ_PADDING; } @@ -113,28 +118,21 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, case SEQ_PADDING: // Pad Compressed Data to a multiple of four bytes. - while (coder->compressed_size & 3) { + while ((coder->compressed_size + coder->pos) & 3) { if (*out_pos >= out_size) return LZMA_OK; out[*out_pos] = 0x00; ++*out_pos; - - // No need to use check for overflow here since we - // have already checked in SEQ_CODE that Compressed - // Size will stay in proper limits. - ++coder->compressed_size; + ++coder->pos; } - // Copy the values into coder->options. The caller - // may use this information to construct Index. - coder->options->compressed_size = coder->compressed_size; - coder->options->uncompressed_size = coder->uncompressed_size; - if (coder->options->check == LZMA_CHECK_NONE) return LZMA_STREAM_END; lzma_check_finish(&coder->check, coder->options->check); + + coder->pos = 0; coder->sequence = SEQ_CHECK; // Fall through @@ -144,11 +142,10 @@ block_encode(lzma_coder *coder, lzma_allocator *allocator, = lzma_check_size(coder->options->check); while (*out_pos < out_size) { - out[*out_pos] = coder->check.buffer.u8[ - coder->check_pos]; + out[*out_pos] = coder->check.buffer.u8[coder->pos]; ++*out_pos; - if (++coder->check_pos == check_size) + if (++coder->pos == check_size) return LZMA_STREAM_END; } @@ -199,9 +196,9 @@ lzma_block_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, next->coder->options = options; next->coder->compressed_size = 0; next->coder->uncompressed_size = 0; + next->coder->pos = 0; // Initialize the check - next->coder->check_pos = 0; lzma_check_init(&next->coder->check, options->check); // Initialize the requested filters. diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c index 3b8e9f36..8421ac37 100644 --- a/src/liblzma/common/block_header_decoder.c +++ b/src/liblzma/common/block_header_decoder.c @@ -27,7 +27,7 @@ free_properties(lzma_block *options, lzma_allocator *allocator) // Free allocated filter options. The last array member is not // touched after the initialization in the beginning of // lzma_block_header_decode(), so we don't need to touch that here. - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) { + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) { lzma_free(options->filters[i].options, allocator); options->filters[i].id = LZMA_VLI_UNKNOWN; options->filters[i].options = NULL; @@ -48,24 +48,19 @@ lzma_block_header_decode(lzma_block *options, // Initialize the filter options array. This way the caller can // safely free() the options even if an error occurs in this function. - for (size_t i = 0; i <= LZMA_BLOCK_FILTERS_MAX; ++i) { + for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) { options->filters[i].id = LZMA_VLI_UNKNOWN; options->filters[i].options = NULL; } - size_t in_size = options->header_size; - - // Validate. The caller must have set options->header_size with - // lzma_block_header_size_decode() macro, so it is a programming error - // if these tests fail. - if (in_size < LZMA_BLOCK_HEADER_SIZE_MIN - || in_size > LZMA_BLOCK_HEADER_SIZE_MAX - || (in_size & 3) - || lzma_block_header_size_decode(in[0]) != in_size) + // Validate Block Header Size and Check type. The caller must have + // already set these, so it is a programming error if this test fails. + if (lzma_block_header_size_decode(in[0]) != options->header_size + || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return LZMA_PROG_ERROR; // Exclude the CRC32 field. - in_size -= 4; + const size_t in_size = options->header_size - 4; // Verify CRC32 if (lzma_crc32(in, in_size, 0) != integer_read_32(in + in_size)) @@ -83,15 +78,9 @@ lzma_block_header_decode(lzma_block *options, return_if_error(lzma_vli_decode(&options->compressed_size, NULL, in, &in_pos, in_size)); - if (options->compressed_size > LZMA_VLI_MAX / 4 - 1) - return LZMA_DATA_ERROR; - - options->compressed_size = (options->compressed_size + 1) * 4; - - // Check that Total Size (that is, size of - // Block Header + Compressed Data + Check) is - // representable as a VLI. - if (lzma_block_total_size_get(options) == 0) + // Validate Compressed Size. This checks that it isn't zero + // and that the total size of the Block is a valid VLI. + if (lzma_block_unpadded_size(options) == 0) return LZMA_DATA_ERROR; } else { options->compressed_size = LZMA_VLI_UNKNOWN; diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c index 9326350b..b9980363 100644 --- a/src/liblzma/common/block_header_encoder.c +++ b/src/liblzma/common/block_header_encoder.c @@ -25,21 +25,20 @@ extern LZMA_API lzma_ret lzma_block_header_size(lzma_block *options) { // Block Header Size + Block Flags + CRC32. - size_t size = 1 + 1 + 4; + uint32_t size = 1 + 1 + 4; // Compressed Size if (options->compressed_size != LZMA_VLI_UNKNOWN) { - if (options->compressed_size > LZMA_VLI_MAX / 4 - 1 - || options->compressed_size == 0 - || (options->compressed_size & 3)) + const uint32_t add = lzma_vli_size(options->compressed_size); + if (add == 0 || options->compressed_size == 0) return LZMA_PROG_ERROR; - size += lzma_vli_size(options->compressed_size / 4 - 1); + size += add; } // Uncompressed Size if (options->uncompressed_size != LZMA_VLI_UNKNOWN) { - const size_t add = lzma_vli_size(options->uncompressed_size); + const uint32_t add = lzma_vli_size(options->uncompressed_size); if (add == 0) return LZMA_PROG_ERROR; @@ -51,10 +50,9 @@ lzma_block_header_size(lzma_block *options) || options->filters[0].id == LZMA_VLI_UNKNOWN) return LZMA_PROG_ERROR; - for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; - ++i) { + for (size_t i = 0; options->filters[i].id != LZMA_VLI_UNKNOWN; ++i) { // Don't allow too many filters. - if (i == 4) + if (i == LZMA_FILTERS_MAX) return LZMA_PROG_ERROR; uint32_t add; @@ -65,12 +63,13 @@ lzma_block_header_size(lzma_block *options) } // Pad to a multiple of four bytes. - options->header_size = (size + 3) & ~(size_t)(3); + options->header_size = (size + 3) & ~UINT32_C(3); - // NOTE: We don't verify that Total Size of the Block stays within - // limits. This is because it is possible that we are called with - // exaggerated values to reserve space for Block Header, and later - // called again with lower, real values. + // NOTE: We don't verify that the encoded size of the Block stays + // within limits. This is because it is possible that we are called + // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve + // space for Block Header, and later called again with lower, + // real values. return LZMA_OK; } @@ -79,9 +78,9 @@ lzma_block_header_size(lzma_block *options) extern LZMA_API lzma_ret lzma_block_header_encode(const lzma_block *options, uint8_t *out) { - if ((options->header_size & 3) - || options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN - || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX) + // Valdidate everything but filters. + if (lzma_block_unpadded_size(options) == 0 + || !lzma_vli_is_valid(options->uncompressed_size)) return LZMA_PROG_ERROR; // Indicate the size of the buffer _excluding_ the CRC32 field. @@ -90,32 +89,28 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out) // Store the Block Header Size. out[0] = out_size / 4; - // We write Block Flags a little later. + // We write Block Flags in pieces. + out[1] = 0x00; size_t out_pos = 2; // Compressed Size if (options->compressed_size != LZMA_VLI_UNKNOWN) { - // Compressed Size must be non-zero, fit into a 63-bit - // integer and be a multiple of four. Also the Total Size - // of the Block must fit into 63-bit integer. - if (options->compressed_size == 0 - || (options->compressed_size & 3) - || options->compressed_size - > LZMA_VLI_MAX - || lzma_block_total_size_get(options) == 0) - return LZMA_PROG_ERROR; - return_if_error(lzma_vli_encode( - options->compressed_size / 4 - 1, NULL, + options->compressed_size, NULL, out, &out_pos, out_size)); + + out[1] |= 0x40; } // Uncompressed Size - if (options->uncompressed_size != LZMA_VLI_UNKNOWN) + if (options->uncompressed_size != LZMA_VLI_UNKNOWN) { return_if_error(lzma_vli_encode( options->uncompressed_size, NULL, out, &out_pos, out_size)); + out[1] |= 0x80; + } + // Filter Flags if (options->filters == NULL || options->filters[0].id == LZMA_VLI_UNKNOWN) @@ -124,24 +119,16 @@ lzma_block_header_encode(const lzma_block *options, uint8_t *out) size_t filter_count = 0; do { // There can be at maximum of four filters. - if (filter_count == 4) + if (filter_count == LZMA_FILTERS_MAX) return LZMA_PROG_ERROR; return_if_error(lzma_filter_flags_encode( options->filters + filter_count, out, &out_pos, out_size)); - } while (options->filters[++filter_count].id - != LZMA_VLI_UNKNOWN); - - // Block Flags - out[1] = filter_count - 1; + } while (options->filters[++filter_count].id != LZMA_VLI_UNKNOWN); - if (options->compressed_size != LZMA_VLI_UNKNOWN) - out[1] |= 0x40; - - if (options->uncompressed_size != LZMA_VLI_UNKNOWN) - out[1] |= 0x80; + out[1] |= filter_count - 1; // Padding memzero(out + out_pos, out_size - out_pos); diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c index 7b46ba32..66e1cad9 100644 --- a/src/liblzma/common/block_util.c +++ b/src/liblzma/common/block_util.c @@ -18,10 +18,11 @@ /////////////////////////////////////////////////////////////////////////////// #include "common.h" +#include "index.h" extern LZMA_API lzma_ret -lzma_block_total_size_set(lzma_block *options, lzma_vli total_size) +lzma_block_compressed_size(lzma_block *options, lzma_vli total_size) { // Validate. if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN @@ -45,29 +46,47 @@ lzma_block_total_size_set(lzma_block *options, lzma_vli total_size) extern LZMA_API lzma_vli -lzma_block_total_size_get(const lzma_block *options) +lzma_block_unpadded_size(const lzma_block *options) { - // Validate the values that we are interested in. + // Validate the values that we are interested in i.e. all but + // Uncompressed Size and the filters. + // + // NOTE: This function is used for validation too, so it is + // essential that these checks are always done even if + // Compressed Size is unknown. if (options->header_size < LZMA_BLOCK_HEADER_SIZE_MIN || options->header_size > LZMA_BLOCK_HEADER_SIZE_MAX || (options->header_size & 3) - || (unsigned)(options->check) > LZMA_CHECK_ID_MAX) + || !lzma_vli_is_valid(options->compressed_size) + || options->compressed_size == 0 + || (unsigned int)(options->check) > LZMA_CHECK_ID_MAX) return 0; // If Compressed Size is unknown, return that we cannot know - // Total Size either. + // size of the Block either. if (options->compressed_size == LZMA_VLI_UNKNOWN) return LZMA_VLI_UNKNOWN; - const lzma_vli total_size = options->compressed_size - + options->header_size - + lzma_check_size(options->check); + // Calculate Unpadded Size and validate it. + const lzma_vli unpadded_size = options->compressed_size + + options->header_size + + lzma_check_size(options->check); - // Validate the calculated Total Size. - if (options->compressed_size > LZMA_VLI_MAX - || (options->compressed_size & 3) - || total_size > LZMA_VLI_MAX) + assert(unpadded_size >= UNPADDED_SIZE_MIN); + if (unpadded_size > UNPADDED_SIZE_MAX) return 0; - return total_size; + return unpadded_size; +} + + +extern LZMA_API lzma_vli +lzma_block_total_size(const lzma_block *options) +{ + lzma_vli unpadded_size = lzma_block_unpadded_size(options); + + if (unpadded_size != 0 && unpadded_size != LZMA_VLI_UNKNOWN) + unpadded_size = vli_ceil4(unpadded_size); + + return unpadded_size; } diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 275cf05f..0ee8574c 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -66,10 +66,6 @@ | LZMA_CONCATENATED ) -/////////// -// Types // -/////////// - /// Type of encoder/decoder specific data; the actual structure is defined /// differently in different coders. typedef struct lzma_coder_s lzma_coder; @@ -187,10 +183,6 @@ struct lzma_internal_s { }; -/////////////// -// Functions // -/////////////// - /// Allocates memory extern void *lzma_alloc(size_t size, lzma_allocator *allocator) lzma_attribute((malloc)); diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c index 71ceeca0..03b6859a 100644 --- a/src/liblzma/common/filter_common.c +++ b/src/liblzma/common/filter_common.c @@ -164,7 +164,7 @@ validate_chain(const lzma_filter *filters, size_t *count) // There must be 1-4 filters. The last filter must be usable as // the last filter in the chain. At maximum of three filters are // allowed to change the size of the data. - if (i > LZMA_BLOCK_FILTERS_MAX || !last_ok || changes_size_count > 3) + if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3) return LZMA_OPTIONS_ERROR; *count = i; @@ -182,7 +182,7 @@ lzma_raw_coder_init(lzma_next_coder *next, lzma_allocator *allocator, return_if_error(validate_chain(options, &count)); // Set the filter functions and copy the options pointer. - lzma_filter_info filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter_info filters[LZMA_FILTERS_MAX + 1]; if (is_encoder) { for (size_t i = 0; i < count; ++i) { // The order of the filters is reversed in the diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c index f965749f..1fe65650 100644 --- a/src/liblzma/common/index.c +++ b/src/liblzma/common/index.c @@ -20,24 +20,34 @@ #include "index.h" -/// Number of Records to allocate at once. +/// Number of Records to allocate at once in the unrolled list. #define INDEX_GROUP_SIZE 256 typedef struct lzma_index_group_s lzma_index_group; struct lzma_index_group_s { - /// Next group + /// Previous group lzma_index_group *prev; - /// Previous group + /// Next group lzma_index_group *next; /// Index of the last Record in this group size_t last; - /// Total Size fields as cumulative sum relative to the beginning - /// of the group. The total size of the group is total_sums[last]. - lzma_vli total_sums[INDEX_GROUP_SIZE]; + /// Unpadded Size fields as special cumulative sum relative to the + /// beginning of the group. It's special in sense that the previous + /// value is rounded up the next multiple of four with before + /// calculating the new value. The total encoded size of the Blocks + /// in the group is unpadded_sums[last] rounded up to the next + /// multiple of four. + /// + /// For example, if the Unpadded Sizes are 39, 57, and 81, the stored + /// values are 39, 97 (40 + 57), and 181 (100 + 181). The total + /// encoded size of these Blocks is 184. + /// + /// This encoding is nice from point of view of lzma_index_locate(). + lzma_vli unpadded_sums[INDEX_GROUP_SIZE]; /// Uncompressed Size fields as cumulative sum relative to the /// beginning of the group. The uncompressed size of the group is @@ -56,19 +66,13 @@ struct lzma_index_s { /// Uncompressed size of the Stream lzma_vli uncompressed_size; - /// Number of non-padding records. This is needed by Index encoder. + /// Number of non-padding records. This is needed for Index encoder. lzma_vli count; /// Size of the List of Records field; this is updated every time /// a new non-padding Record is added. lzma_vli index_list_size; - /// This is zero if no Indexes have been combined with - /// lzma_index_cat(). With combined Indexes, this contains the sizes - /// of all but latest the Streams, including possible Stream Padding - /// fields. - lzma_vli padding_size; - /// First group of Records lzma_index_group *head; @@ -80,8 +84,8 @@ struct lzma_index_s { /// Group where the current read position is. lzma_index_group *group; - /// The most recently read record in *group - lzma_vli record; + /// The most recently read Record in *group + size_t record; /// Uncompressed offset of the beginning of *group relative /// to the beginning of the Stream @@ -102,6 +106,10 @@ struct lzma_index_s { /// Stream. This is needed when a new Index is concatenated /// to this lzma_index structure. lzma_vli index_list_size; + + /// Total size of all but the last Stream and all Stream + /// Padding fields. + lzma_vli streams_size; } old; }; @@ -136,12 +144,12 @@ lzma_index_init(lzma_index *i, lzma_allocator *allocator) i->uncompressed_size = 0; i->count = 0; i->index_list_size = 0; - i->padding_size = 0; i->head = NULL; i->tail = NULL; i->current.group = NULL; i->old.count = 0; i->old.index_list_size = 0; + i->old.streams_size = 0; return i; } @@ -195,12 +203,12 @@ lzma_index_file_size(const lzma_index *i) { // If multiple Streams are concatenated, the Stream Header, Index, // and Stream Footer fields of all but the last Stream are already - // included in padding_size. Thus, we need to calculate only the + // included in old.streams_size. Thus, we need to calculate only the // size of the last Index, not all Indexes. - return i->total_size + i->padding_size + return i->old.streams_size + LZMA_STREAM_HEADER_SIZE + i->total_size + index_size(i->count - i->old.count, i->index_list_size - i->old.index_list_size) - + LZMA_STREAM_HEADER_SIZE * 2; + + LZMA_STREAM_HEADER_SIZE; } @@ -219,10 +227,11 @@ lzma_index_padding_size(const lzma_index *i) } -/// Helper function for index_append() +/// Appends a new Record to the Index. If needed, this allocates a new +/// Record group. static lzma_ret index_append_real(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size, + lzma_vli unpadded_size, lzma_vli uncompressed_size, bool is_padding) { // Add the new record. @@ -237,7 +246,7 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, g->prev = i->tail; g->next = NULL; g->last = 0; - g->total_sums[0] = total_size; + g->unpadded_sums[0] = unpadded_size; g->uncompressed_sums[0] = uncompressed_size; g->paddings[0] = is_padding; @@ -252,9 +261,9 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } else { // i->tail has space left for at least one record. - i->tail->total_sums[i->tail->last + 1] - = i->tail->total_sums[i->tail->last] - + total_size; + i->tail->unpadded_sums[i->tail->last + 1] + = unpadded_size + vli_ceil4( + i->tail->unpadded_sums[i->tail->last]); i->tail->uncompressed_sums[i->tail->last + 1] = i->tail->uncompressed_sums[i->tail->last] + uncompressed_size; @@ -266,13 +275,14 @@ index_append_real(lzma_index *i, lzma_allocator *allocator, } -static lzma_ret -index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, - lzma_vli uncompressed_size, bool is_padding) +extern LZMA_API lzma_ret +lzma_index_append(lzma_index *i, lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) { - if (total_size > LZMA_VLI_MAX + if (unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) - return LZMA_DATA_ERROR; + return LZMA_PROG_ERROR; // This looks a bit ugly. We want to first validate that the Index // and Stream stay in valid limits after adding this Record. After @@ -280,65 +290,38 @@ index_append(lzma_index *i, lzma_allocator *allocator, lzma_vli total_size, // slightly more correct to validate before allocating, YMMV). lzma_ret ret; - if (is_padding) { - assert(uncompressed_size == 0); + // First update the overall info so we can validate it. + const lzma_vli index_list_size_add = lzma_vli_size(unpadded_size) + + lzma_vli_size(uncompressed_size); - // First update the info so we can validate it. - i->padding_size += total_size; - - if (i->padding_size > LZMA_VLI_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, true); - - // If something went wrong, undo the updated value. - if (ret != LZMA_OK) - i->padding_size -= total_size; + const lzma_vli total_size = vli_ceil4(unpadded_size); - } else { - // First update the overall info so we can validate it. - const lzma_vli index_list_size_add - = lzma_vli_size(total_size / 4 - 1) - + lzma_vli_size(uncompressed_size); - - i->total_size += total_size; - i->uncompressed_size += uncompressed_size; - ++i->count; - i->index_list_size += index_list_size_add; - - if (i->total_size > LZMA_VLI_MAX - || i->uncompressed_size > LZMA_VLI_MAX - || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX - || lzma_index_file_size(i) > LZMA_VLI_MAX) - ret = LZMA_DATA_ERROR; // Would grow past the limits. - else - ret = index_append_real(i, allocator, - total_size, uncompressed_size, false); + i->total_size += total_size; + i->uncompressed_size += uncompressed_size; + ++i->count; + i->index_list_size += index_list_size_add; - if (ret != LZMA_OK) { - // Something went wrong. Undo the updates. - i->total_size -= total_size; - i->uncompressed_size -= uncompressed_size; - --i->count; - i->index_list_size -= index_list_size_add; - } + if (i->total_size > LZMA_VLI_MAX + || i->uncompressed_size > LZMA_VLI_MAX + || lzma_index_size(i) > LZMA_BACKWARD_SIZE_MAX + || lzma_index_file_size(i) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(i, allocator, unpadded_size, + uncompressed_size, false); + + if (ret != LZMA_OK) { + // Something went wrong. Undo the updates. + i->total_size -= total_size; + i->uncompressed_size -= uncompressed_size; + --i->count; + i->index_list_size -= index_list_size_add; } return ret; } -extern LZMA_API lzma_ret -lzma_index_append(lzma_index *i, lzma_allocator *allocator, - lzma_vli total_size, lzma_vli uncompressed_size) -{ - return index_append(i, allocator, - total_size, uncompressed_size, false); -} - - /// Initialize i->current to point to the first Record. static bool init_current(lzma_index *i) @@ -370,10 +353,10 @@ previous_group(lzma_index *i) i->current.record = i->current.group->last; // Then update the offsets. - i->current.stream_offset -= i->current.group - ->total_sums[i->current.group->last]; - i->current.uncompressed_offset -= i->current.group - ->uncompressed_sums[i->current.group->last]; + i->current.stream_offset -= vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); + i->current.uncompressed_offset -= i->current.group->uncompressed_sums[ + i->current.group->last]; return; } @@ -386,8 +369,8 @@ next_group(lzma_index *i) assert(i->current.group->next != NULL); // Update the offsets first. - i->current.stream_offset += i->current.group - ->total_sums[i->current.group->last]; + i->current.stream_offset += vli_ceil4(i->current.group->unpadded_sums[ + i->current.group->last]); i->current.uncompressed_offset += i->current.group ->uncompressed_sums[i->current.group->last]; @@ -403,30 +386,39 @@ next_group(lzma_index *i) static void set_info(const lzma_index *i, lzma_index_record *info) { - info->total_size = i->current.group->total_sums[i->current.record]; + // First copy the cumulative sizes from the current Record of the + // current group. + info->unpadded_size + = i->current.group->unpadded_sums[i->current.record]; + info->total_size = vli_ceil4(info->unpadded_size); info->uncompressed_size = i->current.group->uncompressed_sums[ i->current.record]; + // Copy the start offsets of this group. info->stream_offset = i->current.stream_offset; info->uncompressed_offset = i->current.uncompressed_offset; // If it's not the first Record in this group, we need to do some // adjustements. if (i->current.record > 0) { - // _sums[] are cumulative, thus we need to substract the - // _previous _sums[] to get the sizes of this Record. - info->total_size -= i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_size -= i->current.group + // Since the _sums[] are cumulative, we substract the sums of + // the previous Record to get the sizes of the current Record, + // and add the sums of the previous Record to the offsets. + // With unpadded_sums[] we need to take into account that it + // uses a bit weird way to do the cumulative summing + const lzma_vli total_sum + = vli_ceil4(i->current.group->unpadded_sums[ + i->current.record - 1]); + + const lzma_vli uncompressed_sum = i->current.group ->uncompressed_sums[i->current.record - 1]; - // i->current.{total,uncompressed}_offsets have the offset - // of the beginning of the group, thus we need to add the - // appropriate amount to get the offsetes of this Record. - info->stream_offset += i->current.group - ->total_sums[i->current.record - 1]; - info->uncompressed_offset += i->current.group - ->uncompressed_sums[i->current.record - 1]; + info->total_size -= total_sum; + info->unpadded_size -= total_sum; + info->uncompressed_size -= uncompressed_sum; + + info->stream_offset += total_sum; + info->uncompressed_offset += uncompressed_sum; } return; @@ -548,11 +540,22 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Check that the combined size of the Indexes stays within limits. { + const lzma_vli dest_size = index_size_unpadded( + dest->count, dest->index_list_size); + const lzma_vli src_size = index_size_unpadded( + src->count, src->index_list_size); + if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX) + return LZMA_DATA_ERROR; + } + + // Check that the combined size of the "files" (combined total + // encoded sizes) stays within limits. + { const lzma_vli dest_size = lzma_index_file_size(dest); const lzma_vli src_size = lzma_index_file_size(src); - if (dest_size + src_size > LZMA_VLI_UNKNOWN + if (dest_size + src_size > LZMA_VLI_MAX || dest_size + src_size + padding - > LZMA_VLI_UNKNOWN) + > LZMA_VLI_MAX) return LZMA_DATA_ERROR; } @@ -561,17 +564,37 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // // NOTE: This cannot overflow, because Index Size is always // far smaller than LZMA_VLI_MAX, and adding two VLIs - // (Index Size and padding) doesn't overflow. It may become - // an invalid VLI if padding is huge, but that is caught by - // index_append(). + // (Index Size and padding) doesn't overflow. padding += index_size(dest->count - dest->old.count, dest->index_list_size - dest->old.index_list_size) + LZMA_STREAM_HEADER_SIZE * 2; + // While the above cannot overflow, but it may become an invalid VLI. + if (padding > LZMA_VLI_MAX) + return LZMA_DATA_ERROR; + // Add the padding Record. - return_if_error(index_append( - dest, allocator, padding, 0, true)); + { + lzma_ret ret; + + // First update the info so we can validate it. + dest->old.streams_size += padding; + + if (dest->old.streams_size > LZMA_VLI_MAX + || lzma_index_file_size(dest) > LZMA_VLI_MAX) + ret = LZMA_DATA_ERROR; // Would grow past the limits. + else + ret = index_append_real(dest, allocator, + padding, 0, true); + + // If something went wrong, undo the updated value and return + // the error. + if (ret != LZMA_OK) { + dest->old.streams_size -= padding; + return ret; + } + } // Avoid wasting lots of memory if src->head has only a few records // that fit into dest->tail. That is, combine two groups if possible. @@ -581,9 +604,10 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, if (src->head != NULL && src->head->last + 1 <= INDEX_GROUP_SIZE - dest->tail->last - 1) { // Copy the first Record. - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[0]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[0]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[dest->tail->last] @@ -596,10 +620,11 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, // Copy the rest. for (size_t i = 1; i < src->head->last; ++i) { - dest->tail->total_sums[dest->tail->last + 1] - = dest->tail->total_sums[dest->tail->last] - + src->head->total_sums[i + 1] - - src->head->total_sums[i]; + dest->tail->unpadded_sums[dest->tail->last + 1] + = vli_ceil4(dest->tail->unpadded_sums[ + dest->tail->last]) + + src->head->unpadded_sums[i + 1] + - src->head->unpadded_sums[i]; dest->tail->uncompressed_sums[dest->tail->last + 1] = dest->tail->uncompressed_sums[ @@ -636,13 +661,13 @@ lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src, dest->old.count = dest->count + src->old.count; dest->old.index_list_size = dest->index_list_size + src->old.index_list_size; + dest->old.streams_size += src->old.streams_size; // Update overall information. dest->total_size += src->total_size; dest->uncompressed_size += src->uncompressed_size; dest->count += src->count; dest->index_list_size += src->index_list_size; - dest->padding_size += src->padding_size; // *src has nothing left but the base structure. lzma_free(src, allocator); @@ -690,7 +715,7 @@ lzma_index_dup(const lzma_index *src, lzma_allocator *allocator) // Copy the arrays so that we don't read uninitialized memory. const size_t count = src_group->last + 1; - memcpy(dest_group->total_sums, src_group->total_sums, + memcpy(dest_group->unpadded_sums, src_group->unpadded_sums, sizeof(lzma_vli) * count); memcpy(dest_group->uncompressed_sums, src_group->uncompressed_sums, @@ -729,8 +754,8 @@ lzma_index_equal(const lzma_index *a, const lzma_index *b) while (ag != NULL && bg != NULL) { const size_t count = ag->last + 1; if (ag->last != bg->last - || memcmp(ag->total_sums, - bg->total_sums, + || memcmp(ag->unpadded_sums, + bg->unpadded_sums, sizeof(lzma_vli) * count) != 0 || memcmp(ag->uncompressed_sums, bg->uncompressed_sums, diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h index df897367..79719dd7 100644 --- a/src/liblzma/common/index.h +++ b/src/liblzma/common/index.h @@ -23,14 +23,11 @@ #include "common.h" -/// Maximum encoded value of Total Size. -#define TOTAL_SIZE_ENCODED_MAX (LZMA_VLI_MAX / 4 - 1) +/// Minimum Unpadded Size +#define UNPADDED_SIZE_MIN LZMA_VLI_C(5) -/// Convert the real Total Size value to a value that is stored to the Index. -#define total_size_encode(size) ((size) / 4 - 1) - -/// Convert the encoded Total Size value from Index to the real Total Size. -#define total_size_decode(size) (((size) + 1) * 4) +/// Maximum Unpadded Size +#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3)) /// Get the size of the Index Padding field. This is needed by Index encoder @@ -38,6 +35,16 @@ extern uint32_t lzma_index_padding_size(const lzma_index *i); +/// Round the variable-length integer to the next multiple of four. +static inline lzma_vli +vli_ceil4(lzma_vli vli) +{ + assert(vli <= LZMA_VLI_MAX); + return (vli + 3) & ~LZMA_VLI_C(3); +} + + +/// Calculate the size of the Index field excluding Index Padding static inline lzma_vli index_size_unpadded(lzma_vli count, lzma_vli index_list_size) { @@ -46,20 +53,20 @@ index_size_unpadded(lzma_vli count, lzma_vli index_list_size) } +/// Calculate the size of the Index field including Index Padding static inline lzma_vli index_size(lzma_vli count, lzma_vli index_list_size) { - // Round up to a mulitiple of four. - return (index_size_unpadded(count, index_list_size) + 3) - & ~LZMA_VLI_C(3); + return vli_ceil4(index_size_unpadded(count, index_list_size)); } +/// Calculate the total size of the Stream static inline lzma_vli -index_stream_size( - lzma_vli total_size, lzma_vli count, lzma_vli index_list_size) +index_stream_size(lzma_vli blocks_size, + lzma_vli count, lzma_vli index_list_size) { - return LZMA_STREAM_HEADER_SIZE + total_size + return LZMA_STREAM_HEADER_SIZE + blocks_size + index_size(count, index_list_size) + LZMA_STREAM_HEADER_SIZE; } diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c index ae66595a..5faac161 100644 --- a/src/liblzma/common/index_decoder.c +++ b/src/liblzma/common/index_decoder.c @@ -25,7 +25,7 @@ struct lzma_coder_s { enum { SEQ_INDICATOR, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, @@ -38,8 +38,8 @@ struct lzma_coder_s { /// Number of Records left to decode. lzma_vli count; - /// The most recent Total Size field - lzma_vli total_size; + /// The most recent Unpadded Size field + lzma_vli unpadded_size; /// The most recent Uncompressed Size field lzma_vli uncompressed_size; @@ -91,14 +91,14 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator, ret = LZMA_OK; coder->pos = 0; coder->sequence = coder->count == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - lzma_vli *size = coder->sequence == SEQ_TOTAL - ? &coder->total_size + lzma_vli *size = coder->sequence == SEQ_UNPADDED + ? &coder->unpadded_size : &coder->uncompressed_size; ret = lzma_vli_decode(size, &coder->pos, @@ -109,27 +109,26 @@ index_decode(lzma_coder *coder, lzma_allocator *allocator, ret = LZMA_OK; coder->pos = 0; - if (coder->sequence == SEQ_TOTAL) { - // Validate that encoded Total Size isn't too big. - if (coder->total_size > TOTAL_SIZE_ENCODED_MAX) + if (coder->sequence == SEQ_UNPADDED) { + // Validate that encoded Unpadded Size isn't too small + // or too big. + if (coder->unpadded_size < UNPADDED_SIZE_MIN + || coder->unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; - // Convert the encoded Total Size to the real - // Total Size. - coder->total_size = total_size_decode( - coder->total_size); coder->sequence = SEQ_UNCOMPRESSED; } else { // Add the decoded Record to the Index. return_if_error(lzma_index_append( coder->index, allocator, - coder->total_size, + coder->unpadded_size, coder->uncompressed_size)); // Check if this was the last Record. coder->sequence = --coder->count == 0 ? SEQ_PADDING_INIT - : SEQ_TOTAL; + : SEQ_UNPADDED; } break; diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c index 3005f835..522dbb53 100644 --- a/src/liblzma/common/index_encoder.c +++ b/src/liblzma/common/index_encoder.c @@ -26,7 +26,7 @@ struct lzma_coder_s { enum { SEQ_INDICATOR, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_NEXT, SEQ_PADDING, @@ -97,18 +97,20 @@ index_encode(lzma_coder *coder, break; } - // Total Size must be a multiple of four. - if (coder->record.total_size & 3) + // Unpadded Size must be within valid limits. + if (coder->record.unpadded_size < UNPADDED_SIZE_MIN + || coder->record.unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_PROG_ERROR; - coder->sequence = SEQ_TOTAL; + coder->sequence = SEQ_UNPADDED; // Fall through - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - const lzma_vli size = coder->sequence == SEQ_TOTAL - ? total_size_encode(coder->record.total_size) + const lzma_vli size = coder->sequence == SEQ_UNPADDED + ? coder->record.unpadded_size : coder->record.uncompressed_size; ret = lzma_vli_encode(size, &coder->pos, diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c index 5e581838..162094d1 100644 --- a/src/liblzma/common/index_hash.c +++ b/src/liblzma/common/index_hash.c @@ -23,8 +23,8 @@ typedef struct { - /// Sum of the Total Size fields - lzma_vli total_size; + /// Sum of the Block sizes (including Block Padding) + lzma_vli blocks_size; /// Sum of the Uncompressed Size fields lzma_vli uncompressed_size; @@ -35,7 +35,7 @@ typedef struct { /// Size of the List of Index Records as bytes lzma_vli index_list_size; - /// Check calculated from Total Sizes and Uncompressed Sizes. + /// Check calculated from Unpadded Sizes and Uncompressed Sizes. lzma_check_state check; } lzma_index_hash_info; @@ -45,7 +45,7 @@ struct lzma_index_hash_s { enum { SEQ_BLOCK, SEQ_COUNT, - SEQ_TOTAL, + SEQ_UNPADDED, SEQ_UNCOMPRESSED, SEQ_PADDING_INIT, SEQ_PADDING, @@ -61,8 +61,8 @@ struct lzma_index_hash_s { /// Number of Records not fully decoded lzma_vli remaining; - /// Total Size currently being read from an Index Record. - lzma_vli total_size; + /// Unpadded Size currently being read from an Index Record. + lzma_vli unpadded_size; /// Uncompressed Size currently being read from an Index Record. lzma_vli uncompressed_size; @@ -86,15 +86,15 @@ lzma_index_hash_init(lzma_index_hash *index_hash, lzma_allocator *allocator) } index_hash->sequence = SEQ_BLOCK; - index_hash->blocks.total_size = 0; + index_hash->blocks.blocks_size = 0; index_hash->blocks.uncompressed_size = 0; index_hash->blocks.count = 0; index_hash->blocks.index_list_size = 0; - index_hash->records.total_size = 0; + index_hash->records.blocks_size = 0; index_hash->records.uncompressed_size = 0; index_hash->records.count = 0; index_hash->records.index_list_size = 0; - index_hash->total_size = 0; + index_hash->unpadded_size = 0; index_hash->uncompressed_size = 0; index_hash->pos = 0; index_hash->crc32 = 0; @@ -128,16 +128,16 @@ lzma_index_hash_size(const lzma_index_hash *index_hash) /// Updates the sizes and the hash without any validation. static lzma_ret -hash_append(lzma_index_hash_info *info, lzma_vli total_size, +hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, lzma_vli uncompressed_size) { - info->total_size += total_size; + info->blocks_size += vli_ceil4(unpadded_size); info->uncompressed_size += uncompressed_size; - info->index_list_size += lzma_vli_size(total_size_encode(total_size)) + info->index_list_size += lzma_vli_size(unpadded_size) + lzma_vli_size(uncompressed_size); ++info->count; - const lzma_vli sizes[2] = { total_size, uncompressed_size }; + const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; lzma_check_update(&info->check, LZMA_CHECK_BEST, (const uint8_t *)(sizes), sizeof(sizes)); @@ -146,26 +146,27 @@ hash_append(lzma_index_hash_info *info, lzma_vli total_size, extern LZMA_API lzma_ret -lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli total_size, +lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, lzma_vli uncompressed_size) { // Validate the arguments. - if (index_hash->sequence != SEQ_BLOCK || total_size == 0 - || total_size > LZMA_VLI_MAX || (total_size & 3) + if (index_hash->sequence != SEQ_BLOCK + || unpadded_size < UNPADDED_SIZE_MIN + || unpadded_size > UNPADDED_SIZE_MAX || uncompressed_size > LZMA_VLI_MAX) return LZMA_PROG_ERROR; // Update the hash. return_if_error(hash_append(&index_hash->blocks, - total_size, uncompressed_size)); + unpadded_size, uncompressed_size)); // Validate the properties of *info are still in allowed limits. - if (index_hash->blocks.total_size > LZMA_VLI_MAX + if (index_hash->blocks.blocks_size > LZMA_VLI_MAX || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX || index_size(index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_BACKWARD_SIZE_MAX - || index_stream_size(index_hash->blocks.total_size, + || index_stream_size(index_hash->blocks.blocks_size, index_hash->blocks.count, index_hash->blocks.index_list_size) > LZMA_VLI_MAX) @@ -216,14 +217,14 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // Handle the special case when there are no Blocks. index_hash->sequence = index_hash->remaining == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; break; } - case SEQ_TOTAL: + case SEQ_UNPADDED: case SEQ_UNCOMPRESSED: { - lzma_vli *size = index_hash->sequence == SEQ_TOTAL - ? &index_hash->total_size + lzma_vli *size = index_hash->sequence == SEQ_UNPADDED + ? &index_hash->unpadded_size : &index_hash->uncompressed_size; ret = lzma_vli_decode(size, &index_hash->pos, @@ -234,18 +235,17 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, ret = LZMA_OK; index_hash->pos = 0; - if (index_hash->sequence == SEQ_TOTAL) { - if (index_hash->total_size > TOTAL_SIZE_ENCODED_MAX) + if (index_hash->sequence == SEQ_UNPADDED) { + if (index_hash->unpadded_size < UNPADDED_SIZE_MIN + || index_hash->unpadded_size + > UNPADDED_SIZE_MAX) return LZMA_DATA_ERROR; - index_hash->total_size = total_size_decode( - index_hash->total_size); - index_hash->sequence = SEQ_UNCOMPRESSED; } else { // Update the hash. return_if_error(hash_append(&index_hash->records, - index_hash->total_size, + index_hash->unpadded_size, index_hash->uncompressed_size)); // Verify that we don't go over the known sizes. Note @@ -254,8 +254,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // that values in index_hash->blocks are already // validated and we are fine as long as we don't // exceed them in index_hash->records. - if (index_hash->blocks.total_size - < index_hash->records.total_size + if (index_hash->blocks.blocks_size + < index_hash->records.blocks_size || index_hash->blocks.uncompressed_size < index_hash->records.uncompressed_size || index_hash->blocks.index_list_size @@ -264,7 +264,7 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, // Check if this was the last Record. index_hash->sequence = --index_hash->remaining == 0 - ? SEQ_PADDING_INIT : SEQ_TOTAL; + ? SEQ_PADDING_INIT : SEQ_UNPADDED; } break; @@ -288,8 +288,8 @@ lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, } // Compare the sizes. - if (index_hash->blocks.total_size - != index_hash->records.total_size + if (index_hash->blocks.blocks_size + != index_hash->records.blocks_size || index_hash->blocks.uncompressed_size != index_hash->records.uncompressed_size || index_hash->blocks.index_list_size diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index e137685f..9be47893 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -190,7 +190,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Set up a buffer to hold the filter chain. Block Header // decoder will initialize all members of this array so // we don't need to do it here. - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; coder->block_options.filters = filters; // Decode the Block Header. @@ -216,7 +216,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Free the allocated filter options since they are needed // only to initialize the Block decoder. - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) lzma_free(filters[i].options, allocator); coder->block_options.filters = NULL; @@ -243,7 +243,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Block decoded successfully. Add the new size pair to // the Index hash. return_if_error(lzma_index_hash_append(coder->index_hash, - lzma_block_total_size_get( + lzma_block_unpadded_size( &coder->block_options), coder->block_options.uncompressed_size)); @@ -270,7 +270,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, // Fall through - case SEQ_STREAM_FOOTER: + case SEQ_STREAM_FOOTER: { // Copy the Stream Footer to the internal buffer. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos, LZMA_STREAM_HEADER_SIZE); @@ -306,6 +306,7 @@ stream_decode(lzma_coder *coder, lzma_allocator *allocator, return LZMA_STREAM_END; coder->sequence = SEQ_STREAM_PADDING; + } // Fall through diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c index 0376fd3b..e52ad692 100644 --- a/src/liblzma/common/stream_encoder.c +++ b/src/liblzma/common/stream_encoder.c @@ -157,11 +157,11 @@ stream_encode(lzma_coder *coder, lzma_allocator *allocator, return ret; // Add a new Index Record. - const lzma_vli total_size = lzma_block_total_size_get( + const lzma_vli unpadded_size = lzma_block_unpadded_size( &coder->block_options); - assert(total_size != 0); + assert(unpadded_size != 0); return_if_error(lzma_index_append(coder->index, allocator, - total_size, + unpadded_size, coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_INIT; diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index d2a77ba4..53ee1c1e 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -157,14 +157,14 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) uint32_t copy_size = dict->size - copy_pos; if (copy_size < left) { - memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + memmove(dict->buf + dict->pos, dict->buf + copy_pos, copy_size); dict->pos += copy_size; copy_size = left - copy_size; memcpy(dict->buf + dict->pos, dict->buf, copy_size); dict->pos += copy_size; } else { - memcpy(dict->buf + dict->pos, dict->buf + copy_pos, + memmove(dict->buf + dict->pos, dict->buf + copy_pos, left); dict->pos += left; } diff --git a/src/liblzma/subblock/subblock_decoder.c b/src/liblzma/subblock/subblock_decoder.c index 7cf06988..3096b442 100644 --- a/src/liblzma/subblock/subblock_decoder.c +++ b/src/liblzma/subblock/subblock_decoder.c @@ -211,7 +211,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, break; } - case FLAG_END_SUBFILTER: + case FLAG_END_SUBFILTER: { if (coder->padding != 0 || (in[*in_pos] & 0x0F) || coder->subfilter.code == NULL || !coder->got_output_with_subfilter) @@ -250,6 +250,7 @@ decode_buffer(lzma_coder *coder, lzma_allocator *allocator, ++*in_pos; break; + } default: return LZMA_DATA_ERROR; diff --git a/src/lzma/Makefile.am b/src/lzma/Makefile.am index cd8bb771..e5c5c29a 100644 --- a/src/lzma/Makefile.am +++ b/src/lzma/Makefile.am @@ -15,19 +15,16 @@ bin_PROGRAMS = lzma lzma_SOURCES = \ - alloc.c \ - alloc.h \ args.c \ args.h \ - error.c \ - error.h \ hardware.c \ hardware.h \ - help.c \ - help.h \ io.c \ io.h \ main.c \ + main.h \ + message.c \ + message.h \ options.c \ options.h \ private.h \ diff --git a/src/lzma/alloc.c b/src/lzma/alloc.c deleted file mode 100644 index d0fee68b..00000000 --- a/src/lzma/alloc.c +++ /dev/null @@ -1,106 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file alloc.c -/// \brief Memory allocation functions -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "private.h" - - -/// Called when memory allocation fails. Prints and error message and -/// quits the application. -static void lzma_attribute((noreturn)) -xerror(void) -{ - errmsg(V_ERROR, "%s", strerror(errno)); - my_exit(ERROR); -} - - -extern void * -xmalloc(size_t size) -{ - if (size < 1) { - errno = EINVAL; - xerror(); - } - - void *p = malloc(size); - if (p == NULL) - xerror(); - - return p; -} - - -/* -extern void * -xrealloc(void *ptr, size_t size) -{ - if (size < 1) { - errno = EINVAL; - xerror(); - } - - ptr = realloc(ptr, size); - if (ptr == NULL) - xerror(); - - return ptr; -} -*/ - - -extern char * -xstrdup(const char *src) -{ - if (src == NULL) { - errno = EINVAL; - xerror(); - } - - const size_t size = strlen(src) + 1; - char *dest = malloc(size); - if (dest == NULL) - xerror(); - - memcpy(dest, src, size); - - return dest; -} - - -extern void -xstrcpy(char **dest, const char *src) -{ - size_t len = strlen(src) + 1; - - *dest = realloc(*dest, len); - if (*dest == NULL) - xerror(); - - memcpy(*dest, src, len + 1); - - return; -} - - -extern void * -allocator(void *opaque lzma_attribute((unused)), - size_t nmemb lzma_attribute((unused)), size_t size) -{ - return xmalloc(size); -} diff --git a/src/lzma/alloc.h b/src/lzma/alloc.h deleted file mode 100644 index 80317269..00000000 --- a/src/lzma/alloc.h +++ /dev/null @@ -1,42 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file alloc.h -/// \brief Memory allocation functions -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef ALLOC_H -#define ALLOC_H - -#include "private.h" - - -/// Safe malloc() that never returns NULL. -extern void *xmalloc(size_t size); - -/// Safe realloc() that never returns NULL. -extern void *xrealloc(void *ptr, size_t size); - -/// Safe strdup() that never returns NULL. -extern char *xstrdup(const char *src); - -/// xrealloc()s *dest to the size needed by src, and copies src to *dest. -extern void xstrcpy(char **dest, const char *src); - -/// Function for lzma_allocator.alloc. This uses xmalloc(). -extern void *allocator(void *opaque lzma_attribute((unused)), - size_t nmemb lzma_attribute((unused)), size_t size); - -#endif diff --git a/src/lzma/args.c b/src/lzma/args.c index 14ccfb6d..a2efb277 100644 --- a/src/lzma/args.c +++ b/src/lzma/args.c @@ -25,150 +25,90 @@ #include <ctype.h> -enum tool_mode opt_mode = MODE_COMPRESS; -enum format_type opt_format = FORMAT_AUTO; - -char *opt_suffix = NULL; - -char *opt_files_name = NULL; -char opt_files_split = '\0'; -FILE *opt_files_file = NULL; - bool opt_stdout = false; bool opt_force = false; bool opt_keep_original = false; -bool opt_preserve_name = false; - -lzma_check opt_check = LZMA_CHECK_CRC64; -lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1]; // We don't modify or free() this, but we need to assign it in some // non-const pointers. const char *stdin_filename = "(stdin)"; -static size_t preset_number = 7; -static bool preset_default = true; -static size_t filter_count = 0; - -/// When compressing, which file format to use if --format=auto or no --format -/// at all has been specified. We need a variable because this depends on -/// with which name we are called. All names with "lz" in them makes us to -/// use the legacy .lzma format. -static enum format_type format_compress_auto = FORMAT_XZ; - - -enum { - OPT_SUBBLOCK = INT_MIN, - OPT_X86, - OPT_POWERPC, - OPT_IA64, - OPT_ARM, - OPT_ARMTHUMB, - OPT_SPARC, - OPT_DELTA, - OPT_LZMA1, - OPT_LZMA2, - - OPT_FILES, - OPT_FILES0, -}; - - -static const char short_opts[] = "cC:dfF:hlLkM:qrS:tT:vVz123456789"; - - -static const struct option long_opts[] = { - // gzip-like options - { "fast", no_argument, NULL, '1' }, - { "best", no_argument, NULL, '9' }, - { "memory", required_argument, NULL, 'M' }, - { "name", no_argument, NULL, 'N' }, - { "suffix", required_argument, NULL, 'S' }, - { "threads", required_argument, NULL, 'T' }, - { "version", no_argument, NULL, 'V' }, - { "stdout", no_argument, NULL, 'c' }, - { "to-stdout", no_argument, NULL, 'c' }, - { "decompress", no_argument, NULL, 'd' }, - { "uncompress", no_argument, NULL, 'd' }, - { "force", no_argument, NULL, 'f' }, - { "help", no_argument, NULL, 'h' }, - { "list", no_argument, NULL, 'l' }, - { "info", no_argument, NULL, 'l' }, - { "keep", no_argument, NULL, 'k' }, - { "no-name", no_argument, NULL, 'n' }, - { "quiet", no_argument, NULL, 'q' }, -// { "recursive", no_argument, NULL, 'r' }, // TODO - { "test", no_argument, NULL, 't' }, - { "verbose", no_argument, NULL, 'v' }, - { "compress", no_argument, NULL, 'z' }, - - // Filters - { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, - { "x86", no_argument, NULL, OPT_X86 }, - { "bcj", no_argument, NULL, OPT_X86 }, - { "powerpc", no_argument, NULL, OPT_POWERPC }, - { "ppc", no_argument, NULL, OPT_POWERPC }, - { "ia64", no_argument, NULL, OPT_IA64 }, - { "itanium", no_argument, NULL, OPT_IA64 }, - { "arm", no_argument, NULL, OPT_ARM }, - { "armthumb", no_argument, NULL, OPT_ARMTHUMB }, - { "sparc", no_argument, NULL, OPT_SPARC }, - { "delta", optional_argument, NULL, OPT_DELTA }, - { "lzma1", optional_argument, NULL, OPT_LZMA1 }, - { "lzma2", optional_argument, NULL, OPT_LZMA2 }, - - // Other - { "format", required_argument, NULL, 'F' }, - { "check", required_argument, NULL, 'C' }, - { "files", optional_argument, NULL, OPT_FILES }, - { "files0", optional_argument, NULL, OPT_FILES0 }, - - { NULL, 0, NULL, 0 } -}; - static void -add_filter(lzma_vli id, const char *opt_str) +parse_real(args_info *args, int argc, char **argv) { - if (filter_count == LZMA_BLOCK_FILTERS_MAX) { - errmsg(V_ERROR, _("Maximum number of filters is seven")); - my_exit(ERROR); - } - - opt_filters[filter_count].id = id; - - switch (id) { - case LZMA_FILTER_SUBBLOCK: - opt_filters[filter_count].options - = parse_options_subblock(opt_str); - break; - - case LZMA_FILTER_DELTA: - opt_filters[filter_count].options - = parse_options_delta(opt_str); - break; - - case LZMA_FILTER_LZMA1: - case LZMA_FILTER_LZMA2: - opt_filters[filter_count].options - = parse_options_lzma(opt_str); - break; - - default: - assert(opt_str == NULL); - opt_filters[filter_count].options = NULL; - break; - } + enum { + OPT_SUBBLOCK = INT_MIN, + OPT_X86, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_FILES, + OPT_FILES0, + }; + + static const char short_opts[] = "cC:dfF:hHlLkM:p:qrS:tT:vVz123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + { "info", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "preset", required_argument, NULL, 'p' }, + { "memory", required_argument, NULL, 'M' }, + { "threads", required_argument, NULL, 'T' }, + + { "fast", no_argument, NULL, '1' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", no_argument, NULL, OPT_X86 }, + { "bcj", no_argument, NULL, OPT_X86 }, + { "powerpc", no_argument, NULL, OPT_POWERPC }, + { "ppc", no_argument, NULL, OPT_POWERPC }, + { "ia64", no_argument, NULL, OPT_IA64 }, + { "itanium", no_argument, NULL, OPT_IA64 }, + { "arm", no_argument, NULL, OPT_ARM }, + { "armthumb", no_argument, NULL, OPT_ARMTHUMB }, + { "sparc", no_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + { "subblock", optional_argument, NULL, OPT_SUBBLOCK }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; - ++filter_count; - preset_default = false; - return; -} - - -static void -parse_real(int argc, char **argv) -{ int c; while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) @@ -178,32 +118,28 @@ parse_real(int argc, char **argv) case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - preset_number = c - '0'; - preset_default = false; + coder_set_preset(c - '0'); break; - // --memory - case 'M': - opt_memory = str_to_uint64("memory", optarg, - 1, SIZE_MAX); + case 'p': { + const uint64_t preset = str_to_uint64( + "preset", optarg, 1, 9); + coder_set_preset(preset); break; + } - case 'N': - opt_preserve_name = true; + // --memory + case 'M': + // On 32-bit systems, SIZE_MAX would make more sense + // than UINT64_MAX. But use UINT64_MAX still so that + // scripts that assume > 4 GiB values don't break. + hardware_memlimit_set(str_to_uint64( + "memory", optarg, 0, UINT64_MAX)); break; // --suffix case 'S': - // Empty suffix and suffixes having a slash are - // rejected. Such suffixes would break things later. - if (optarg[0] == '\0' || strchr(optarg, '/') != NULL) { - errmsg(V_ERROR, _("%s: Invalid filename " - "suffix"), optarg); - my_exit(ERROR); - } - - free(opt_suffix); - opt_suffix = xstrdup(optarg); + suffix_set(optarg); break; case 'T': @@ -214,7 +150,7 @@ parse_real(int argc, char **argv) // --version case 'V': // This doesn't return. - show_version(); + message_version(); // --stdout case 'c': @@ -234,7 +170,12 @@ parse_real(int argc, char **argv) // --help case 'h': // This doesn't return. - show_help(); + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); // --list case 'l': @@ -246,15 +187,9 @@ parse_real(int argc, char **argv) opt_keep_original = true; break; - case 'n': - opt_preserve_name = false; - break; - // --quiet case 'q': - if (verbosity > V_SILENT) - --verbosity; - + message_verbosity_decrease(); break; case 't': @@ -263,9 +198,7 @@ parse_real(int argc, char **argv) // --verbose case 'v': - if (verbosity < V_DEBUG) - ++verbosity; - + message_verbosity_increase(); break; case 'z': @@ -275,43 +208,47 @@ parse_real(int argc, char **argv) // Filter setup case OPT_SUBBLOCK: - add_filter(LZMA_FILTER_SUBBLOCK, optarg); + coder_add_filter(LZMA_FILTER_SUBBLOCK, + options_subblock(optarg)); break; case OPT_X86: - add_filter(LZMA_FILTER_X86, NULL); + coder_add_filter(LZMA_FILTER_X86, NULL); break; case OPT_POWERPC: - add_filter(LZMA_FILTER_POWERPC, NULL); + coder_add_filter(LZMA_FILTER_POWERPC, NULL); break; case OPT_IA64: - add_filter(LZMA_FILTER_IA64, NULL); + coder_add_filter(LZMA_FILTER_IA64, NULL); break; case OPT_ARM: - add_filter(LZMA_FILTER_ARM, NULL); + coder_add_filter(LZMA_FILTER_ARM, NULL); break; case OPT_ARMTHUMB: - add_filter(LZMA_FILTER_ARMTHUMB, NULL); + coder_add_filter(LZMA_FILTER_ARMTHUMB, NULL); break; case OPT_SPARC: - add_filter(LZMA_FILTER_SPARC, NULL); + coder_add_filter(LZMA_FILTER_SPARC, NULL); break; case OPT_DELTA: - add_filter(LZMA_FILTER_DELTA, optarg); + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); break; case OPT_LZMA1: - add_filter(LZMA_FILTER_LZMA1, optarg); + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); break; case OPT_LZMA2: - add_filter(LZMA_FILTER_LZMA2, optarg); + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); break; // Other @@ -335,14 +272,11 @@ parse_real(int argc, char **argv) }; size_t i = 0; - while (strcmp(types[i].str, optarg) != 0) { - if (++i == ARRAY_SIZE(types)) { - errmsg(V_ERROR, _("%s: Unknown file " + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " "format type"), optarg); - my_exit(ERROR); - } - } opt_format = types[i].format; break; @@ -362,50 +296,43 @@ parse_real(int argc, char **argv) size_t i = 0; while (strcmp(types[i].str, optarg) != 0) { - if (++i == ARRAY_SIZE(types)) { - errmsg(V_ERROR, _("%s: Unknown " - "integrity check " - "type"), optarg); - my_exit(ERROR); - } + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown integrity" + "check type"), optarg); } - opt_check = types[i].check; + coder_set_check(types[i].check); break; } case OPT_FILES: - opt_files_split = '\n'; + args->files_delim = '\n'; // Fall through case OPT_FILES0: - if (opt_files_name != NULL) { - errmsg(V_ERROR, _("Only one file can be " + if (args->files_name != NULL) + message_fatal(_("Only one file can be " "specified with `--files'" "or `--files0'.")); - my_exit(ERROR); - } if (optarg == NULL) { - opt_files_name = (char *)stdin_filename; - opt_files_file = stdin; + args->files_name = (char *)stdin_filename; + args->files_file = stdin; } else { - opt_files_name = optarg; - opt_files_file = fopen(optarg, + args->files_name = optarg; + args->files_file = fopen(optarg, c == OPT_FILES ? "r" : "rb"); - if (opt_files_file == NULL) { - errmsg(V_ERROR, "%s: %s", optarg, + if (args->files_file == NULL) + message_fatal("%s: %s", optarg, strerror(errno)); - my_exit(ERROR); - } } break; default: - show_try_help(); - my_exit(ERROR); + message_try_help(); + my_exit(E_ERROR); } } @@ -414,163 +341,124 @@ parse_real(int argc, char **argv) static void -parse_environment(void) +parse_environment(args_info *args, char *argv0) { - char *env = getenv("LZMA_OPT"); + char *env = getenv("XZ_OPT"); if (env == NULL) return; + // We modify the string, so make a copy of it. env = xstrdup(env); - // Calculate the number of arguments in env. - unsigned int argc = 1; + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; bool prev_was_space = true; for (size_t i = 0; env[i] != '\0'; ++i) { if (isspace(env[i])) { prev_was_space = true; } else if (prev_was_space) { prev_was_space = false; - if (++argc > (unsigned int)(INT_MAX)) { - errmsg(V_ERROR, _("The environment variable " - "LZMA_OPT contains too many " + + // Keep argc small enough to fit into a singed int + // and to keep it usable for memory allocation. + if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "XZ_OPT contains too many " "arguments")); - my_exit(ERROR); - } } } - char **argv = xmalloc((argc + 1) * sizeof(char*)); + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); argv[0] = argv0; argv[argc] = NULL; + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. argc = 1; prev_was_space = true; for (size_t i = 0; env[i] != '\0'; ++i) { if (isspace(env[i])) { prev_was_space = true; + env[i] = '\0'; } else if (prev_was_space) { prev_was_space = false; argv[argc++] = env + i; } } - parse_real((int)(argc), argv); + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); free(env); return; } -static void -set_compression_settings(void) +extern void +args_parse(args_info *args, int argc, char **argv) { - static lzma_options_lzma opt_lzma; - - if (filter_count == 0) { - if (lzma_lzma_preset(&opt_lzma, preset_number)) { - errmsg(V_ERROR, _("Internal error (bug)")); - my_exit(ERROR); - } - - opt_filters[0].id = opt_format == FORMAT_LZMA - ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; - opt_filters[0].options = &opt_lzma; - filter_count = 1; - } - - // Terminate the filter options array. - opt_filters[filter_count].id = LZMA_VLI_UNKNOWN; - - // If we are using the LZMA_Alone format, allow exactly one filter - // which has to be LZMA. - if (opt_format == FORMAT_LZMA && (filter_count != 1 - || opt_filters[0].id != LZMA_FILTER_LZMA1)) { - errmsg(V_ERROR, _("With --format=lzma only the LZMA1 filter " - "is supported")); - my_exit(ERROR); - } - - // TODO: liblzma probably needs an API to validate the filter chain. - - // If using --format=raw, we can be decoding. - uint64_t memory_usage = opt_mode == MODE_COMPRESS - ? lzma_memusage_encoder(opt_filters) - : lzma_memusage_decoder(opt_filters); - - // Don't go over the memory limits when the default - // setting is used. - if (preset_default) { - while (memory_usage > opt_memory) { - if (preset_number == 1) { - errmsg(V_ERROR, _("Memory usage limit is too " - "small for any internal " - "filter preset")); - my_exit(ERROR); - } - - if (lzma_lzma_preset(&opt_lzma, --preset_number)) { - errmsg(V_ERROR, _("Internal error (bug)")); - my_exit(ERROR); - } - - memory_usage = lzma_memusage_encoder(opt_filters); - } - - // TODO: With --format=raw, we should print a warning since - // the presets may change and thus the next version may not - // be able to uncompress the raw stream with the same preset - // number. + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; - } else { - if (memory_usage > opt_memory) { - errmsg(V_ERROR, _("Memory usage limit is too small " - "for the given filter setup")); - my_exit(ERROR); - } - } - - // Limit the number of worked threads so that memory usage - // limit isn't exceeded. - assert(memory_usage > 0); - size_t thread_limit = opt_memory / memory_usage; - if (thread_limit == 0) - thread_limit = 1; - - if (opt_threads > thread_limit) - opt_threads = thread_limit; - - return; -} + // Type of the file format to use when --format=auto or no --format + // was specified. + enum format_type format_compress_auto = FORMAT_XZ; - -extern char ** -parse_args(int argc, char **argv) -{ // Check how we were called. { - const char *name = str_filename(argv[0]); - if (name != NULL) { - // Default file format - if (strstr(name, "lz") != NULL) - format_compress_auto = FORMAT_LZMA; - - // Operation mode - if (strstr(name, "cat") != NULL) { - opt_mode = MODE_DECOMPRESS; - opt_stdout = true; - } else if (strstr(name, "un") != NULL) { - opt_mode = MODE_DECOMPRESS; - } + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // The default file format is .lzma if the command name + // contains "lz". + if (strstr(name, "lz") != NULL) + format_compress_auto = FORMAT_LZMA; + + // Operation mode + if (strstr(name, "cat") != NULL) { + // Imply --decompress --stdout + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "un") != NULL) { + // Imply --decompress + opt_mode = MODE_DECOMPRESS; } } // First the flags from environment - parse_environment(); + parse_environment(args, argv[0]); // Then from the command line optind = 1; - parse_real(argc, argv); + parse_real(args, argc, argv); // Never remove the source file when the destination is not on disk. // In test mode the data is written nowhere, but setting opt_stdout @@ -580,18 +468,33 @@ parse_args(int argc, char **argv) opt_stdout = true; } + // If no --format flag was used, or it was --format=auto, we need to + // decide what is the target file format we are going to use. This + // depends on how we were called (checked earlier in this function). if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) opt_format = format_compress_auto; + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) - set_compression_settings(); + coder_set_compression_settings(); // If no filenames are given, use stdin. - if (argv[optind] == NULL && opt_files_name == NULL) { - // We don't modify or free() the "-" constant. - static char *argv_stdin[2] = { (char *)"-", NULL }; - return argv_stdin; + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = argc - optind; } - return argv + optind; + return; } diff --git a/src/lzma/args.h b/src/lzma/args.h index 8d9cd306..6d4e8282 100644 --- a/src/lzma/args.h +++ b/src/lzma/args.h @@ -23,42 +23,34 @@ #include "private.h" -enum tool_mode { - MODE_COMPRESS, - MODE_DECOMPRESS, - MODE_TEST, - MODE_LIST, -}; +typedef struct { + /// Filenames from command line + char **arg_names; -// NOTE: The order of these is significant in suffix.c. -enum format_type { - FORMAT_AUTO, - FORMAT_XZ, - FORMAT_LZMA, - // HEADER_GZIP, - FORMAT_RAW, -}; + /// Number of filenames from command line + size_t arg_count; + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + char *files_name; -extern char *opt_suffix; + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; -extern char *opt_files_name; -extern char opt_files_split; -extern FILE *opt_files_file; extern bool opt_stdout; extern bool opt_force; extern bool opt_keep_original; -extern bool opt_preserve_name; // extern bool opt_recursive; -extern enum tool_mode opt_mode; -extern enum format_type opt_format; - -extern lzma_check opt_check; -extern lzma_filter opt_filters[LZMA_BLOCK_FILTERS_MAX + 1]; extern const char *stdin_filename; -extern char **parse_args(int argc, char **argv); +extern void args_parse(args_info *args, int argc, char **argv); #endif diff --git a/src/lzma/error.c b/src/lzma/error.c deleted file mode 100644 index e66fd140..00000000 --- a/src/lzma/error.c +++ /dev/null @@ -1,162 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file error.c -/// \brief Error message printing -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "private.h" -#include <stdarg.h> - - -exit_status_type exit_status = SUCCESS; -verbosity_type verbosity = V_WARNING; -char *argv0 = NULL; -volatile sig_atomic_t user_abort = 0; - - -extern const char * -str_strm_error(lzma_ret code) -{ - switch (code) { - case LZMA_OK: - return _("Operation successful"); - - case LZMA_STREAM_END: - return _("Operation finished successfully"); - - case LZMA_PROG_ERROR: - return _("Internal error (bug)"); - - case LZMA_DATA_ERROR: - return _("Compressed data is corrupt"); - - case LZMA_MEM_ERROR: - return strerror(ENOMEM); - - case LZMA_BUF_ERROR: - return _("Unexpected end of input"); - - case LZMA_OPTIONS_ERROR: - return _("Unsupported options"); - - case LZMA_UNSUPPORTED_CHECK: - return _("Unsupported integrity check type"); - - case LZMA_MEMLIMIT_ERROR: - return _("Memory usage limit reached"); - - case LZMA_FORMAT_ERROR: - return _("File format not recognized"); - - default: - return NULL; - } -} - - -extern void -set_exit_status(exit_status_type new_status) -{ - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_lock(&mutex); - - if (new_status != WARNING || exit_status == SUCCESS) - exit_status = new_status; - - pthread_mutex_unlock(&mutex); - return; -} - - -extern void lzma_attribute((noreturn)) -my_exit(int status) -{ - // Close stdout. If something goes wrong, print an error message - // to stderr. - { - const int ferror_err = ferror(stdout); - const int fclose_err = fclose(stdout); - if (fclose_err) { - errmsg(V_ERROR, _("Writing to standard output " - "failed: %s"), strerror(errno)); - status = ERROR; - } else if (ferror_err) { - // Some error has occurred but we have no clue about - // the reason since fclose() succeeded. - errmsg(V_ERROR, _("Writing to standard output " - "failed: %s"), "Unknown error"); - status = ERROR; - } - } - - // Close stderr. If something goes wrong, there's nothing where we - // could print an error message. Just set the exit status. - { - const int ferror_err = ferror(stderr); - const int fclose_err = fclose(stderr); - if (fclose_err || ferror_err) - status = ERROR; - } - - exit(status); -} - - -extern void lzma_attribute((format(printf, 2, 3))) -errmsg(verbosity_type v, const char *fmt, ...) -{ - va_list ap; - - if (v <= verbosity) { - va_start(ap, fmt); - - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_lock(&mutex); - - fprintf(stderr, "%s: ", argv0); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - - pthread_mutex_unlock(&mutex); - - va_end(ap); - } - - if (v == V_ERROR) - set_exit_status(ERROR); - else if (v == V_WARNING) - set_exit_status(WARNING); - - return; -} - - -extern void -out_of_memory(void) -{ - errmsg(V_ERROR, "%s", strerror(ENOMEM)); - user_abort = 1; - return; -} - - -extern void -internal_error(void) -{ - errmsg(V_ERROR, _("Internal error (bug)")); - user_abort = 1; - return; -} diff --git a/src/lzma/error.h b/src/lzma/error.h deleted file mode 100644 index 34ec30e1..00000000 --- a/src/lzma/error.h +++ /dev/null @@ -1,67 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file error.c -/// \brief Error message printing -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef ERROR_H -#define ERROR_H - -#include "private.h" - - -typedef enum { - SUCCESS = 0, - ERROR = 1, - WARNING = 2, -} exit_status_type; - - -typedef enum { - V_SILENT, - V_ERROR, - V_WARNING, - V_VERBOSE, - V_DEBUG, -} verbosity_type; - - -extern exit_status_type exit_status; - -extern verbosity_type verbosity; - -/// Like GNU's program_invocation_name but portable -extern char *argv0; - -/// Once this is non-zero, all threads must shutdown and clean up incomplete -/// output files from the disk. -extern volatile sig_atomic_t user_abort; - - -extern const char * str_strm_error(lzma_ret code); - -extern void errmsg(verbosity_type v, const char *fmt, ...) - lzma_attribute((format(printf, 2, 3))); - -extern void set_exit_status(exit_status_type new_status); - -extern void my_exit(int status) lzma_attribute((noreturn)); - -extern void out_of_memory(void); - -extern void internal_error(void); - -#endif diff --git a/src/lzma/hardware.c b/src/lzma/hardware.c index 6cb3cdfc..63bf0937 100644 --- a/src/lzma/hardware.c +++ b/src/lzma/hardware.c @@ -26,33 +26,15 @@ size_t opt_threads = 1; -/// Number of bytes of memory to use at maximum (only a rough limit). -/// This can be set with the --memory=NUM command line option. -/// If no better value can be determined, the default is 14 MiB, which -/// should be quite safe even for older systems while still allowing -/// reasonable compression ratio. -size_t opt_memory = 14 * 1024 * 1024; +/// Memory usage limit for encoding +static uint64_t memlimit_encoder; +/// Memory usage limit for decoding +static uint64_t memlimit_decoder; -/// Get the amount of physical memory, and set opt_memory to 1/3 of it. -/// User can then override this with --memory command line option. -static void -hardware_memory(void) -{ - uint64_t mem = physmem(); - if (mem != 0) { - mem /= 3; - -#if UINT64_MAX > SIZE_MAX - if (mem > SIZE_MAX) - mem = SIZE_MAX; -#endif - - opt_memory = mem; - } - - return; -} +/// Memory usage limit given on the command line or environment variable. +/// Zero indicates the default (memlimit_encoder or memlimit_decoder). +static uint64_t memlimit_custom = 0; /// Get the number of CPU cores, and set opt_threads to default to that value. @@ -90,10 +72,51 @@ hardware_cores(void) } +static void +hardware_memlimit_init(void) +{ + uint64_t mem = physmem(); + + // If we cannot determine the amount of RAM, assume 32 MiB. Maybe + // even that is too much on some systems. But on most systems it's + // far too little, and can be annoying. + if (mem == 0) + mem = UINT64_C(16) * 1024 * 1024; + + // Use at maximum of 90 % of RAM when encoding and 33 % when decoding. + memlimit_encoder = mem - mem / 10; + memlimit_decoder = mem / 3; + + return; +} + + +extern void +hardware_memlimit_set(uint64_t memlimit) +{ + memlimit_custom = memlimit; + return; +} + + +extern uint64_t +hardware_memlimit_encoder(void) +{ + return memlimit_custom != 0 ? memlimit_custom : memlimit_encoder; +} + + +extern uint64_t +hardware_memlimit_decoder(void) +{ + return memlimit_custom != 0 ? memlimit_custom : memlimit_decoder; +} + + extern void hardware_init(void) { - hardware_memory(); + hardware_memlimit_init(); hardware_cores(); return; } diff --git a/src/lzma/hardware.h b/src/lzma/hardware.h index d47bd29f..f604df20 100644 --- a/src/lzma/hardware.h +++ b/src/lzma/hardware.h @@ -24,8 +24,22 @@ extern size_t opt_threads; -extern size_t opt_memory; + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. extern void hardware_init(void); + +/// Set custom memory usage limit. This is used for both encoding and +/// decoding. Zero indicates resetting the limit back to defaults. +extern void hardware_memlimit_set(uint64_t memlimit); + +/// Get the memory usage limit for encoding. By default this is 90 % of RAM. +extern uint64_t hardware_memlimit_encoder(void); + + +/// Get the memory usage limit for decoding. By default this is 30 % of RAM. +extern uint64_t hardware_memlimit_decoder(void); + #endif diff --git a/src/lzma/help.c b/src/lzma/help.c deleted file mode 100644 index 2e59f3b5..00000000 --- a/src/lzma/help.c +++ /dev/null @@ -1,170 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file help.c -/// \brief Help messages -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#include "private.h" - - -extern void -show_try_help(void) -{ - // Print this with V_WARNING instead of V_ERROR to prevent it from - // showing up when --quiet has been specified. - errmsg(V_WARNING, _("Try `%s --help' for more information."), argv0); - return; -} - - -extern void lzma_attribute((noreturn)) -show_help(void) -{ - printf(_("Usage: %s [OPTION]... [FILE]...\n" - "Compress or decompress FILEs in the .lzma format.\n" - "\n"), argv0); - - puts(_("Mandatory arguments to long options are mandatory for " - "short options too.\n")); - - puts(_( -" Operation mode:\n" -"\n" -" -z, --compress force compression\n" -" -d, --decompress force decompression\n" -" -t, --test test compressed file integrity\n" -" -l, --list list information about files\n" -)); - - puts(_( -" Operation modifiers:\n" -"\n" -" -k, --keep keep (don't delete) input files\n" -" -f, --force force overwrite of output file and (de)compress links\n" -" -c, --stdout write to standard output and don't delete input files\n" -" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" -" -F, --format=FMT file format to encode or decode; possible values are\n" -" `auto' (default), `xz', `lzma', and `raw'\n" -" --files=[FILE] read filenames to process from FILE; if FILE is\n" -" omitted, filenames are read from the standard input;\n" -" filenames must be terminated with the newline character\n" -" --files0=[FILE] like --files but use the nul byte as terminator\n" -)); - - puts(_( -" Compression presets and basic compression options:\n" -"\n" -" -1 .. -2 fast compression\n" -" -3 .. -6 good compression\n" -" -7 .. -9 excellent compression, but needs a lot of memory;\n" -" default is -7 if memory limit allows\n" -"\n" -" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" -" or `sha256'\n" -)); - - puts(_( -" Custom filter chain for compression (alternative for using presets):\n" -"\n" -" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" -" --lzma2=[OPTS] more of the following options (valid values; default):\n" -" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" -" lc=NUM number of literal context bits (0-4; 3)\n" -" lp=NUM number of literal position bits (0-4; 0)\n" -" pb=NUM number of position bits (0-4; 2)\n" -" mode=MODE compression mode (fast, normal; normal)\n" -" nice=NUM nice length of a match (2-273; 64)\n" -" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" -" depth=NUM maximum search depth; 0=automatic (default)\n" -"\n" -" --x86 x86 filter (sometimes called BCJ filter)\n" -" --powerpc PowerPC (big endian) filter\n" -" --ia64 IA64 (Itanium) filter\n" -" --arm ARM filter\n" -" --armthumb ARM-Thumb filter\n" -" --sparc SPARC filter\n" -"\n" -" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n" -" dist=NUM distance between bytes being subtracted\n" -" from each other (1-256; 1)\n" -"\n" -" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" -" size=NUM number of bytes of data per subblock\n" -" (1 - 256Mi; 4Ki)\n" -" rle=NUM run-length encoder chunk size (0-256; 0)\n" -)); - - puts(_( -" Resource usage options:\n" -"\n" -" -M, --memory=NUM use roughly NUM bytes of memory at maximum\n" -" -T, --threads=NUM use a maximum of NUM (de)compression threads\n" -// " --threading=STR threading style; possible values are `auto' (default),\n" -// " `files', and `stream' -)); - - puts(_( -" Other options:\n" -"\n" -" -q, --quiet suppress warnings; specify twice to suppress errors too\n" -" -v, --verbose be verbose; specify twice for even more verbose\n" -"\n" -" -h, --help display this help and exit\n" -" -V, --version display version and license information and exit\n")); - - puts(_("With no FILE, or when FILE is -, read standard input.\n")); - - size_t mem_limit = opt_memory / (1024 * 1024); - if (mem_limit == 0) - mem_limit = 1; - - // We use PRIu64 instead of %zu to support pre-C99 libc. - puts(_("On this system and configuration, the tool will use")); - printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"), - (uint64_t)(mem_limit)); - printf(N_(" * at maximum of one thread for (de)compression.\n\n", - " * at maximum of %" PRIu64 - " threads for (de)compression.\n\n", - (uint64_t)(opt_threads)), (uint64_t)(opt_threads)); - - printf(_("Report bugs to <%s> (in English or Finnish).\n"), - PACKAGE_BUGREPORT); - - my_exit(SUCCESS); -} - - -extern void lzma_attribute((noreturn)) -show_version(void) -{ - printf( -"lzma (LZMA Utils) " PACKAGE_VERSION "\n" -"\n" -"Copyright (C) 1999-2008 Igor Pavlov\n" -"Copyright (C) 2007-2008 Lasse Collin\n" -"\n" -"This program is free software; you can redistribute it and/or modify\n" -"it under the terms of the GNU General Public License as published by\n" -"the Free Software Foundation; either version 2 of the License, or\n" -"(at your option) any later version.\n" -"\n" -"This program is distributed in the hope that it will be useful,\n" -"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" -"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" -"GNU General Public License for more details.\n" -"\n"); - my_exit(SUCCESS); -} diff --git a/src/lzma/help.h b/src/lzma/help.h deleted file mode 100644 index 659c66a0..00000000 --- a/src/lzma/help.h +++ /dev/null @@ -1,32 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -/// \file help.h -/// \brief Help messages -// -// Copyright (C) 2007 Lasse Collin -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef HELP_H -#define HELP_H - -#include "private.h" - - -extern void show_try_help(void); - -extern void show_help(void) lzma_attribute((noreturn)); - -extern void show_version(void) lzma_attribute((noreturn)); - -#endif diff --git a/src/lzma/io.c b/src/lzma/io.c index b972099f..0ec63f03 100644 --- a/src/lzma/io.c +++ b/src/lzma/io.c @@ -19,131 +19,39 @@ #include "private.h" -#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) -# include <sys/time.h> -#endif +#include <fcntl.h> -#ifndef O_SEARCH -# define O_SEARCH O_RDONLY +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include <sys/time.h> +#elif defined(HAVE_UTIME) +# include <utime.h> #endif -/// \brief Number of open file_pairs -/// -/// Once the main() function has requested processing of all files, -/// we wait that open_pairs drops back to zero. Then it is safe to -/// exit from the program. -static size_t open_pairs = 0; - - -/// \brief mutex for file system operations -/// -/// All file system operations are done via the functions in this file. -/// They use fchdir() to avoid some race conditions (more portable than -/// openat() & co.). -/// -/// Synchronizing all file system operations shouldn't affect speed notably, -/// since the actual reading from and writing to files is done in parallel. -static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - -/// This condition is invoked when a file is closed and the value of -/// the open_files variable has dropped to zero. The only listener for -/// this condition is io_finish() which is called from main(). -static pthread_cond_t io_cond = PTHREAD_COND_INITIALIZER; - - -/// True when stdout is being used by some thread -static bool stdout_in_use = false; - - -/// This condition is signalled when a thread releases stdout (no longer -/// writes data to it). -static pthread_cond_t stdout_cond = PTHREAD_COND_INITIALIZER; - - -/// \brief Directory where we were started -/// -/// This is needed when a new file, whose name was given on command line, -/// is opened. -static int start_dir; - - -static uid_t uid; -static gid_t gid; - - -extern void -io_init(void) -{ - start_dir = open(".", O_SEARCH | O_NOCTTY); - if (start_dir == -1) { - errmsg(V_ERROR, _("Cannot get file descriptor of the current " - "directory: %s"), strerror(errno)); - my_exit(ERROR); - } - - uid = getuid(); - gid = getgid(); - - return; -} - - -/// Waits until the number of open file_pairs has dropped to zero. -extern void -io_finish(void) -{ - pthread_mutex_lock(&mutex); - - while (open_pairs != 0) - pthread_cond_wait(&io_cond, &mutex); - - (void)close(start_dir); - - pthread_mutex_unlock(&mutex); - - return; -} - - /// \brief Unlinks a file /// -/// \param dir_fd File descriptor of the directory containing the file -/// \param name Name of the file with or without path -/// -/// \return Zero on success. On error, -1 is returned and errno set. -/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). static void -io_unlink(int dir_fd, const char *name, ino_t ino) +io_unlink(const char *name, const struct stat *known_st) { - const char *base = str_filename(name); - if (base == NULL) { - // This shouldn't happen. - errmsg(V_ERROR, _("%s: Invalid filename"), name); - return; - } + struct stat new_st; - pthread_mutex_lock(&mutex); - - if (fchdir(dir_fd)) { - errmsg(V_ERROR, _("Cannot change directory: %s"), - strerror(errno)); + if (lstat(name, &new_st) + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino) { + message_error(_("%s: File seems to be moved, not removing"), + name); } else { - struct stat st; - if (lstat(base, &st) || st.st_ino != ino) - errmsg(V_ERROR, _("%s: File seems to be moved, " - "not removing"), name); - // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. - else if (unlink(base)) - errmsg(V_ERROR, _("%s: Cannot remove: %s"), + if (unlink(name)) + message_error(_("%s: Cannot remove: %s"), name, strerror(errno)); } - pthread_mutex_unlock(&mutex); - return; } @@ -160,14 +68,31 @@ io_copy_attrs(const file_pair *pair) // destination file who didn't have permission to access the // source file. - if (uid == 0 && fchown(pair->dest_fd, pair->src_st.st_uid, -1)) - errmsg(V_WARNING, _("%s: Cannot set the file owner: %s"), - pair->dest_name, strerror(errno)); + // Simple cache to avoid repeated calls to geteuid(). + static enum { + WARN_FCHOWN_UNKNOWN, + WARN_FCHOWN_NO, + WARN_FCHOWN_YES, + } warn_fchown = WARN_FCHOWN_UNKNOWN; + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) + && warn_fchown != WARN_FCHOWN_NO) { + if (warn_fchown == WARN_FCHOWN_UNKNOWN) + warn_fchown = geteuid() == 0 + ? WARN_FCHOWN_YES : WARN_FCHOWN_NO; + + if (warn_fchown == WARN_FCHOWN_YES) + message_warning(_("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + } mode_t mode; if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { - errmsg(V_WARNING, _("%s: Cannot set the file group: %s"), + message_warning(_("%s: Cannot set the file group: %s"), pair->dest_name, strerror(errno)); // We can still safely copy some additional permissions: // `group' must be at least as strict as `other' and @@ -186,192 +111,291 @@ io_copy_attrs(const file_pair *pair) } if (fchmod(pair->dest_fd, mode)) - errmsg(V_WARNING, _("%s: Cannot set the file permissions: %s"), + message_warning(_("%s: Cannot set the file permissions: %s"), pair->dest_name, strerror(errno)); - // Copy the timestamps only if we have a secure function to do it. -#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) - struct timeval tv[2]; - tv[0].tv_sec = pair->src_st.st_atime; - tv[1].tv_sec = pair->src_st.st_mtime; + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) - tv[0].tv_usec = pair->src_st.st_atim.tv_nsec / 1000; + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) - tv[0].tv_usec = pair->src_st.st_atimespec.tv_nsec / 1000; -# else - tv[0].tv_usec = 0; -# endif + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; -# if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC) - tv[1].tv_usec = pair->src_st.st_mtim.tv_nsec / 1000; -# elif defined(HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC) - tv[1].tv_usec = pair->src_st.st_mtimespec.tv_nsec / 1000; # else - tv[1].tv_usec = 0; + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; # endif -# ifdef HAVE_FUTIMES + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) (void)futimes(pair->dest_fd, tv); -# else +# elif defined(HAVE_FUTIMESAT) (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->src_name, tv); # endif + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. + const struct utimbuf buf = { + .actime = pair->src_st.st_atime; + .modtime = pair->src_st.st_mtime; + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->src_name, &buf); #endif return; } -/// Opens and changes into the directory containing the source file. -static int -io_open_dir(file_pair *pair) +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src(file_pair *pair) { - if (pair->src_name == stdin_filename) - return 0; - - if (fchdir(start_dir)) { - errmsg(V_ERROR, _("Cannot change directory: %s"), - strerror(errno)); - return -1; + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; + return false; } - const char *split = strrchr(pair->src_name, '/'); - if (split == NULL) { - pair->dir_fd = start_dir; - } else { - // Copy also the slash. It's needed to support filenames - // like "/foo" (dirname being "/"), and it never hurts anyway. - const size_t dirname_len = split - pair->src_name + 1; - char dirname[dirname_len + 1]; - memcpy(dirname, pair->src_name, dirname_len); - dirname[dirname_len] = '\0'; - - // Open the directory and change into it. - pair->dir_fd = open(dirname, O_SEARCH | O_NOCTTY); - if (pair->dir_fd == -1 || fchdir(pair->dir_fd)) { - errmsg(V_ERROR, _("%s: Cannot open the directory " - "containing the file: %s"), - pair->src_name, strerror(errno)); - (void)close(pair->dir_fd); - return -1; + // We accept only regular files if we are writing the output + // to disk too, and if --force was not given. + const bool reg_files_only = !opt_stdout && !opt_force; + + // Flags for open() + int flags = O_RDONLY | O_NOCTTY; + + // If we accept only regular files, we need to be careful to avoid + // problems with special files like devices and FIFOs. O_NONBLOCK + // prevents blocking when opening such files. When we want to accept + // special files, we must not use O_NONBLOCK, or otherwise we won't + // block waiting e.g. FIFOs to become readable. + if (reg_files_only) + flags |= O_NONBLOCK; + +#ifdef O_NOFOLLOW + if (reg_files_only) + flags |= O_NOFOLLOW; +#else + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (reg_files_only) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error("%s: %s", pair->src_name, + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + return true; } } +#endif - return 0; -} + // Try to open the file. If we are accepting non-regular files, + // unblock the caught signals so that open() can be interrupted + // if it blocks e.g. due to a FIFO file. + if (!reg_files_only) + signals_unblock(); + + // Maybe this wouldn't need a loop, since all the signal handlers for + // which we don't use SA_RESTART set user_abort to true. But it + // doesn't hurt to have it just in case. + do { + pair->src_fd = open(pair->src_name, flags); + } while (pair->src_fd == -1 && errno == EINTR && !user_abort); + + if (!reg_files_only) + signals_block(); + + if (pair->src_fd == -1) { + // If we were interrupted, don't display any error message. + if (errno == EINTR) { + // All the signals that don't have SA_RESTART + // set user_abort. + assert(user_abort); + return true; + } +#ifdef O_NOFOLLOW + // Give an understandable error message in if reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate if O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; -static void -io_close_dir(file_pair *pair) -{ - if (pair->dir_fd != start_dir) - (void)close(pair->dir_fd); +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; - return; -} +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; +# else + if (errno == ELOOP && reg_files_only) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif -/// Opens the source file. The file is opened using the plain filename without -/// path, thus the file must be in the current working directory. This is -/// ensured because io_open_dir() is always called before this function. -static int -io_open_src(file_pair *pair) -{ - if (pair->src_name == stdin_filename) { - pair->src_fd = STDIN_FILENO; - } else { - // Strip the pathname. Thanks to io_open_dir(), the file - // is now in the current working directory. - const char *filename = str_filename(pair->src_name); - if (filename == NULL) - return -1; - - // Symlinks are followed if --stdout or --force has been - // specified. - const bool follow_symlinks = opt_stdout || opt_force; - pair->src_fd = open(filename, O_RDONLY | O_NOCTTY - | (follow_symlinks ? 0 : O_NOFOLLOW)); - if (pair->src_fd == -1) { - // Give an understandable error message in if reason - // for failing was that the file was a symbolic link. - // - Linux, OpenBSD, Solaris: ELOOP - // - FreeBSD: EMLINK - // - Tru64: ENOTSUP - // It seems to be safe to check for all these, since - // those errno values aren't used for other purporses - // on any of the listed operating system *when* the - // above flags are used with open(). - if (!follow_symlinks - && (errno == ELOOP -#ifdef EMLINK - || errno == EMLINK -#endif -#ifdef ENOTSUP - || errno == ENOTSUP + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + else #endif - )) { - errmsg(V_WARNING, _("%s: Is a symbolic link, " - "skipping"), pair->src_name); - } else { - errmsg(V_ERROR, "%s: %s", pair->src_name, - strerror(errno)); - } + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error("%s: %s", pair->src_name, + strerror(errno)); - return -1; - } + return true; + } - if (fstat(pair->src_fd, &pair->src_st)) { - errmsg(V_ERROR, "%s: %s", pair->src_name, - strerror(errno)); + // Drop O_NONBLOCK, which is used only when we are accepting only + // regular files. After the open() call, we want things to block + // instead of giving EAGAIN. + if (reg_files_only) { + flags = fcntl(pair->src_fd, F_GETFL); + if (flags == -1) + goto error_msg; + + flags &= ~O_NONBLOCK; + + if (fcntl(pair->src_fd, F_SETFL, flags)) + goto error_msg; + } + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (reg_files_only) { + if (!S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, " + "skipping"), pair->src_name); goto error; } - if (S_ISDIR(pair->src_st.st_mode)) { - errmsg(V_WARNING, _("%s: Is a directory, skipping"), + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), pair->src_name); goto error; } - if (!opt_stdout) { - if (!opt_force && !S_ISREG(pair->src_st.st_mode)) { - errmsg(V_WARNING, _("%s: Not a regular file, " - "skipping"), pair->src_name); - goto error; - } - - if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { - // Setuid and setgid files are rejected even - // with --force. This is good for security - // (hopefully) but it's a bit weird to reject - // file when --force was given. At least this - // matches gzip's behavior. - errmsg(V_WARNING, _("%s: File has setuid or " - "setgid bit set, skipping"), - pair->src_name); - goto error; - } - - if (!opt_force && (pair->src_st.st_mode & S_ISVTX)) { - errmsg(V_WARNING, _("%s: File has sticky bit " - "set, skipping"), - pair->src_name); - goto error; - } + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } - if (pair->src_st.st_nlink > 1) { - errmsg(V_WARNING, _("%s: Input file has more " - "than one hard link, " - "skipping"), pair->src_name); - goto error; - } + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; } } - return 0; + return false; +error_msg: + message_error("%s: %s", pair->src_name, strerror(errno)); error: (void)close(pair->src_fd); - return -1; + return true; } @@ -383,65 +407,73 @@ error: static void io_close_src(file_pair *pair, bool success) { - if (pair->src_fd == STDIN_FILENO || pair->src_fd == -1) - return; - - if (close(pair->src_fd)) { - errmsg(V_ERROR, _("%s: Closing the file failed: %s"), - pair->src_name, strerror(errno)); - } else if (success && !opt_keep_original) { - io_unlink(pair->dir_fd, pair->src_name, pair->src_st.st_ino); + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { + // If we are going to unlink(), do it before closing the file. + // This way there's no risk that someone replaces the file and + // happens to get same inode number, which would make us + // unlink() wrong file. + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + + (void)close(pair->src_fd); } return; } -static int +static bool io_open_dest(file_pair *pair) { if (opt_stdout || pair->src_fd == STDIN_FILENO) { // We don't modify or free() this. pair->dest_name = (char *)"(stdout)"; pair->dest_fd = STDOUT_FILENO; + return false; + } - // Synchronize the order in which files get written to stdout. - // Unlocking the mutex is safe, because opening the file_pair - // can no longer fail. - while (stdout_in_use) - pthread_cond_wait(&stdout_cond, &mutex); + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; - stdout_in_use = true; + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error("%s: Cannot unlink: %s", + pair->dest_name, strerror(errno)); + free(pair->dest_name); + return true; + } - } else { - pair->dest_name = get_dest_name(pair->src_name); - if (pair->dest_name == NULL) - return -1; - - // This cannot fail, because get_dest_name() doesn't return - // invalid names. - const char *filename = str_filename(pair->dest_name); - assert(filename != NULL); - - pair->dest_fd = open(filename, O_WRONLY | O_NOCTTY | O_CREAT - | (opt_force ? O_TRUNC : O_EXCL), - S_IRUSR | S_IWUSR); - if (pair->dest_fd == -1) { - errmsg(V_ERROR, "%s: %s", pair->dest_name, + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error("%s: Cannot unlink: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return true; + } + + // Open the file. + const int flags = O_WRONLY | O_NOCTTY | O_CREAT | O_EXCL; + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + // Don't bother with error message if user requested + // us to exit anyway. + if (!user_abort) + message_error("%s: %s", pair->dest_name, strerror(errno)); - free(pair->dest_name); - return -1; - } - // If this really fails... well, we have a safe fallback. - struct stat st; - if (fstat(pair->dest_fd, &st)) - pair->dest_ino = 0; - else - pair->dest_ino = st.st_ino; + free(pair->dest_name); + return true; } - return 0; + // If this really fails... well, we have a safe fallback. + if (fstat(pair->dest_fd, &pair->dest_st)) { + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; + } + + return false; } @@ -455,22 +487,16 @@ io_open_dest(file_pair *pair) static int io_close_dest(file_pair *pair, bool success) { - if (pair->dest_fd == -1) + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) return 0; - if (pair->dest_fd == STDOUT_FILENO) { - stdout_in_use = false; - pthread_cond_signal(&stdout_cond); - return 0; - } - if (close(pair->dest_fd)) { - errmsg(V_ERROR, _("%s: Closing the file failed: %s"), + message_error(_("%s: Closing the file failed: %s"), pair->dest_name, strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: - io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino); + io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return -1; } @@ -478,7 +504,7 @@ io_close_dest(file_pair *pair, bool success) // If the operation using this file wasn't successful, we git rid // of the junk file. if (!success) - io_unlink(pair->dir_fd, pair->dest_name, pair->dest_ino); + io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); @@ -492,98 +518,63 @@ io_open(const char *src_name) if (is_empty_filename(src_name)) return NULL; - file_pair *pair = malloc(sizeof(file_pair)); - if (pair == NULL) { - out_of_memory(); - return NULL; - } + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; - *pair = (file_pair){ + pair = (file_pair){ .src_name = src_name, .dest_name = NULL, - .dir_fd = -1, .src_fd = -1, .dest_fd = -1, .src_eof = false, }; - pthread_mutex_lock(&mutex); - - ++open_pairs; - - if (io_open_dir(pair)) - goto error_dir; - - if (io_open_src(pair)) - goto error_src; - - if (user_abort || io_open_dest(pair)) - goto error_dest; - - pthread_mutex_unlock(&mutex); + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + + file_pair *ret = NULL; + if (!io_open_src(&pair)) { + // io_open_src() may have unblocked the signals temporarily, + // and thus user_abort may have got set even if open() + // succeeded. + if (user_abort || io_open_dest(&pair)) + io_close_src(&pair, false); + else + ret = &pair; + } - return pair; + signals_unblock(); -error_dest: - io_close_src(pair, false); -error_src: - io_close_dir(pair); -error_dir: - --open_pairs; - pthread_mutex_unlock(&mutex); - free(pair); - return NULL; + return ret; } -/// \brief Closes the file descriptors and frees the structure extern void io_close(file_pair *pair, bool success) { + signals_block(); + if (success && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); // Close the destination first. If it fails, we must not remove // the source file! - if (!io_close_dest(pair, success)) { - // Closing destination file succeeded. Remove the source file - // if the operation using this file pair was successful - // and we haven't been requested to keep the source file. - io_close_src(pair, success); - } else { - // We don't care if operation using this file pair was - // successful or not, since closing the destination file - // failed. Don't remove the original file. - io_close_src(pair, false); - } - - io_close_dir(pair); + if (io_close_dest(pair, success)) + success = false; - free(pair); - - pthread_mutex_lock(&mutex); - - if (--open_pairs == 0) - pthread_cond_signal(&io_cond); + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); - pthread_mutex_unlock(&mutex); + signals_unblock(); return; } -/// \brief Reads from a file to a buffer -/// -/// \param pair File pair having the sourcefile open for reading -/// \param buf Destination buffer to hold the read data -/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX -/// -/// \return On success, number of bytes read is returned. On end of -/// file zero is returned and pair->src_eof set to true. -/// On error, SIZE_MAX is returned and error message printed. -/// -/// \note This does no locking, thus two threads must not read from -/// the same file. This no problem in this program. extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size) { @@ -608,7 +599,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size) continue; } - errmsg(V_ERROR, _("%s: Read error: %s"), + message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); // FIXME Is this needed? @@ -625,18 +616,7 @@ io_read(file_pair *pair, uint8_t *buf, size_t size) } -/// \brief Writes a buffer to a file -/// -/// \param pair File pair having the destination file open for writing -/// \param buf Buffer containing the data to be written -/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX -/// -/// \return On success, zero is returned. On error, -1 is returned -/// and error message printed. -/// -/// \note This does no locking, thus two threads must not write to -/// the same file. This no problem in this program. -extern int +extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size) { assert(size < SSIZE_MAX); @@ -660,18 +640,19 @@ io_write(const file_pair *pair, const uint8_t *buf, size_t size) // GNU bash). // // We don't do anything special with --quiet, which - // is what bzip2 does too. However, we print a - // message if --verbose was used (or should that - // only be with double --verbose i.e. debugging?). - errmsg(errno == EPIPE ? V_VERBOSE : V_ERROR, - _("%s: Write error: %s"), + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), pair->dest_name, strerror(errno)); - return -1; + + return true; } buf += (size_t)(amount); size -= (size_t)(amount); } - return 0; + return false; } diff --git a/src/lzma/io.h b/src/lzma/io.h index d1aa17f4..4d8e61b2 100644 --- a/src/lzma/io.h +++ b/src/lzma/io.h @@ -22,6 +22,8 @@ #include "private.h" + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. #if BUFSIZ <= 1024 # define IO_BUFFER_SIZE 8192 #else @@ -30,31 +32,66 @@ typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. char *dest_name; - int dir_fd; + /// File descriptor of the source file int src_fd; + + /// File descriptor of the target file int dest_fd; + /// Stat of the source file. struct stat src_st; - ino_t dest_ino; - bool src_eof; -} file_pair; + /// Stat of the destination file. + struct stat dest_st; + /// True once end of the source file has been detected. + bool src_eof; -extern void io_init(void); +} file_pair; -extern void io_finish(void); +/// \brief Opens a file pair extern file_pair *io_open(const char *src_name); + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. extern void io_close(file_pair *pair, bool success); + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. extern size_t io_read(file_pair *pair, uint8_t *buf, size_t size); -extern int io_write(const file_pair *pair, const uint8_t *buf, size_t size); +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +extern bool io_write(const file_pair *pair, const uint8_t *buf, size_t size); #endif diff --git a/src/lzma/main.c b/src/lzma/main.c index 02891193..4e24b98d 100644 --- a/src/lzma/main.c +++ b/src/lzma/main.c @@ -21,16 +21,30 @@ #include "open_stdxxx.h" #include <ctype.h> -static sig_atomic_t exit_signal = 0; + +volatile sig_atomic_t user_abort = false; + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which have have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; static void signal_handler(int sig) { - // FIXME Is this thread-safe together with main()? exit_signal = sig; - - user_abort = 1; + user_abort = true; return; } @@ -38,116 +52,226 @@ signal_handler(int sig) static void establish_signal_handlers(void) { - struct sigaction sa; - sa.sa_handler = &signal_handler; - sigfillset(&sa.sa_mask); - sa.sa_flags = 0; - + // List of signals for which we establish the signal handler. static const int sigs[] = { - SIGHUP, SIGINT, - SIGPIPE, SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU SIGXCPU, +#endif +#ifdef SIGXFSZ SIGXFSZ, +#endif }; - for (size_t i = 0; i < sizeof(sigs) / sizeof(sigs[0]); ++i) { - if (sigaction(sigs[i], &sa, NULL)) { - errmsg(V_ERROR, _("Cannot establish signal handlers")); - my_exit(ERROR); - } + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + + struct sigaction sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + sa.sa_flags = 0; + sa.sa_handler = &signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + struct sigaction old; + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &sa, NULL)) + message_signal_handler(); } - /* - SIGINFO/SIGUSR1 for status reporting? - */ + return; } -static bool -is_tty_stdin(void) +extern void +signals_block(void) { - const bool ret = isatty(STDIN_FILENO); - if (ret) { - // FIXME: Other threads may print between these lines. - // Maybe that should be fixed. Not a big issue in practice. - errmsg(V_ERROR, _("Compressed data not read from " - "a terminal.")); - errmsg(V_ERROR, _("Use `--force' to force decompression.")); - show_try_help(); + if (signals_block_count++ == 0) { + const int saved_errno = errno; + sigprocmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; } - return ret; + return; } -static bool -is_tty_stdout(void) +extern void +signals_unblock(void) { - const bool ret = isatty(STDOUT_FILENO); - if (ret) { - errmsg(V_ERROR, _("Compressed data not written to " - "a terminal.")); - errmsg(V_ERROR, _("Use `--force' to force compression.")); - show_try_help(); + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + sigprocmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; } - return ret; + return; } -static char * -read_name(void) +extern void +set_exit_status(enum exit_status_type new_status) { - size_t size = 256; - size_t pos = 0; - char *name = malloc(size); - if (name == NULL) { - out_of_memory(); - return NULL; + assert(new_status == E_WARNING || new_status == E_ERROR); + + if (exit_status != E_ERROR) + exit_status = new_status; + + return; +} + + +extern void +my_exit(enum exit_status_type status) +{ + // Close stdout. If something goes wrong, print an error message + // to stderr. + { + const int ferror_err = ferror(stdout); + const int fclose_err = fclose(stdout); + if (ferror_err || fclose_err) { + // If it was fclose() that failed, we have the reason + // in errno. If only ferror() indicated an error, + // we have no idea what the reason was. + message(V_ERROR, _("Writing to standard output " + "failed: %s"), + fclose_err ? strerror(errno) + : _("Unknown error")); + status = E_ERROR; + } + } + + // Close stderr. If something goes wrong, there's nothing where we + // could print an error message. Just set the exit status. + { + const int ferror_err = ferror(stderr); + const int fclose_err = fclose(stderr); + if (fclose_err || ferror_err) + status = E_ERROR; } - while (true) { - const int c = fgetc(opt_files_file); - if (c == EOF) { - free(name); - - if (ferror(opt_files_file)) - errmsg(V_ERROR, _("%s: Error reading " - "filenames: %s"), - opt_files_name, - strerror(errno)); - else if (pos != 0) - errmsg(V_ERROR, _("%s: Unexpected end of " - "input when reading " - "filenames"), opt_files_name); + // If we have got a signal, raise it to kill the program. + const int sig = exit_signal; + if (sig != 0) { + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); + raise(exit_signal); + // If, for some weird reason, the signal doesn't kill us, + // we safely fall to the exit below. + } + + exit(status); +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and uncompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + args->files_name, strerror(errno)); return NULL; } - if (c == '\0' || c == opt_files_split) - break; + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + args->files_name); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use `--files0' instead " + "of `--files'?"), args->files_name); + return NULL; + } name[pos++] = c; + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. if (pos == size) { size *= 2; - char *tmp = realloc(name, size); - if (tmp == NULL) { - free(name); - out_of_memory(); - return NULL; - } - - name = tmp; + name = xrealloc(name, size); } } - if (name != NULL) - name[pos] = '\0'; - - return name; + return NULL; } @@ -158,35 +282,56 @@ main(int argc, char **argv) // a valid file descriptor. Exit immediatelly with exit code ERROR // if we cannot make the file descriptors valid. Maybe we should // print an error message, but our stderr could be screwed anyway. - open_stdxxx(ERROR); + open_stdxxx(E_ERROR); - // Set the program invocation name used in various messages. - argv0 = argv[0]; + // This has to be done before calling any liblzma functions. + lzma_init(); - setlocale(LC_ALL, "en_US.UTF-8"); + // Set up the locale. + setlocale(LC_ALL, ""); + +#ifdef ENABLE_NLS + // Set up the message translations too. bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); +#endif + + // Set the program invocation name used in various messages, and + // do other message handling related initializations. + message_init(argv[0]); // Set hardware-dependent default values. These can be overriden // on the command line, thus this must be done before parse_args(). hardware_init(); - char **files = parse_args(argc, argv); - - if (opt_mode == MODE_COMPRESS && opt_stdout && is_tty_stdout()) - return ERROR; - - if (opt_mode == MODE_COMPRESS) - lzma_init_encoder(); + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); else - lzma_init_decoder(); - - io_init(); - process_init(); + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal and --force wasn't used. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + my_exit(E_ERROR); + } + } + } if (opt_mode == MODE_LIST) { - errmsg(V_ERROR, "--list is not implemented yet."); - my_exit(ERROR); + message_fatal("--list is not implemented yet."); } // Hook the signal handlers. We don't need these before we start @@ -194,60 +339,63 @@ main(int argc, char **argv) // line arguments. establish_signal_handlers(); - while (*files != NULL && !user_abort) { - if (strcmp("-", *files) == 0) { + // Process the files given on the command line. Note that if no names + // were given, parse_args() gave us a fake "-" filename. + for (size_t i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Unless --force + // was used, check that we aren't writing compressed + // data to a terminal or reading it from terminal. if (!opt_force) { if (opt_mode == MODE_COMPRESS) { - if (is_tty_stdout()) { - ++files; + if (is_tty_stdout()) continue; - } } else if (is_tty_stdin()) { - ++files; continue; } } - if (opt_files_name == stdin_filename) { - errmsg(V_ERROR, _("Cannot read data from " + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " "standard input when " "reading filenames " "from standard input")); - ++files; continue; } - *files = (char *)stdin_filename; + // Replace the "-" with a special pointer, which is + // recognized by process_file() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; } - process_file(*files++); + // Do the actual compression or uncompression. + process_file(args.arg_names[i]); } - if (opt_files_name != NULL) { + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. while (true) { - char *name = read_name(); + const char *name = read_name(&args); if (name == NULL) break; - if (name[0] != '\0') - process_file(name); - - free(name); + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + process_file(name); } - if (opt_files_name != stdin_filename) - (void)fclose(opt_files_file); - } - - io_finish(); - - if (exit_signal != 0) { - struct sigaction sa; - sa.sa_handler = SIG_DFL; - sigfillset(&sa.sa_mask); - sa.sa_flags = 0; - sigaction(exit_signal, &sa, NULL); - raise(exit_signal); + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); } my_exit(exit_status); diff --git a/src/lzma/main.h b/src/lzma/main.h new file mode 100644 index 00000000..1e369425 --- /dev/null +++ b/src/lzma/main.h @@ -0,0 +1,60 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellanous declarations +// +// Copyright (C) 2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MAIN_H +#define MAIN_H + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is EX_WARNING and the old exit status was already EX_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Exits the program using the given status. This takes care of closing +/// stdin, stdout, and stderr and catches possible errors. If we had got +/// a signal, this function will raise it so that to the parent process it +/// appears that we were killed by the signal sent by the user. +extern void my_exit(enum exit_status_type status) lzma_attribute((noreturn)); + + +#endif diff --git a/src/lzma/message.c b/src/lzma/message.c new file mode 100644 index 00000000..caba9fbc --- /dev/null +++ b/src/lzma/message.c @@ -0,0 +1,892 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages to stderr +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if defined(HAVE_SYS_TIME_H) +# include <sys/time.h> +#elif defined(SIGALRM) +// FIXME +#endif + +#include <stdarg.h> + + +/// Name of the program which is prefixed to the error messages. +static const char *argv0; + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGALRM. Once this variable is true, we will print an empty line before +/// the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGALRM signals, +/// we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically. +static bool progress_automatic; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + +/// Time when we started processing the file +static double start_time; + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((unused))) +{ + progress_needs_updating = true; + return; +} + + +/// Get the current time as double +static double +my_time(void) +{ + struct timeval tv; + + // This really shouldn't fail. I'm not sure what to return if it + // still fails. It doesn't look so useful to check the return value + // everywhere. FIXME? + if (gettimeofday(&tv, NULL)) + return -1.0; + + return (double)(tv.tv_sec) + (double)(tv.tv_usec) / 1.0e9; +} + + +/// Wrapper for snprintf() to help constructing a string in pieces. +static void /* lzma_attribute((format(printf, 3, 4))) */ +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= len; + } + + return; +} + + +extern void +message_init(const char *given_argv0) +{ + // Name of the program + argv0 = given_argv0; + + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = isatty(STDERR_FILENO); + +/* + if (progress_automatic) { + // stderr is a terminal. Check the COLUMNS environment + // variable to see if the terminal is wide enough. If COLUMNS + // doesn't exist or it has some unparseable value, we assume + // that the terminal is wide enough. + const char *columns_str = getenv("COLUMNS"); + uint64_t columns; + if (columns_str != NULL + && !str_to_uint64_raw(&columns, columns_str) + && columns < 80) + progress_automatic = false; + } +*/ + +#ifdef SIGALRM + // Establish the signal handler for SIGALRM. Since this signal + // doesn't require any quick action, we set SA_RESTART. + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sa.sa_handler = &progress_signal_handler; + if (sigaction(SIGALRM, &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (!current_filename_printed + && (files_total != 1 || filename != stdin_filename)) { + signals_block(); + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', stderr); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(stderr, "%s (%u)\n", filename, + files_pos); + else + fprintf(stderr, "%s (%u/%u)\n", filename, + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_progress_start(const char *src_name, uint64_t in_size) +{ + // Store the processing start time of the file and its expected size. + // If we aren't printing any statistics, then these are unused. But + // since it is possible that the user tells us with SIGALRM to show + // statistics, we need to have these available anyway. + start_time = my_time(); + filename = src_name; + expected_in_size = in_size; + + // Indicate the name of this file hasn't been printed to + // stderr yet. + current_filename_printed = false; + + // Start numbering the files starting from one. + ++files_pos; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. As an exception, if there is exactly one file, + // do not print the filename at all. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Print the filename to stderr if that is appropriate with + // the current settings. + print_filename(); + + // Start the timer to set progress_needs_updating to true + // after about one second. An alternative would to be set + // progress_needs_updating to true here immediatelly, but + // setting the timer looks better to me, since extremely + // early progress info is pretty much useless. + alarm(1); + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + static char buf[sizeof("99.9 %")]; + + // Never show 100.0 % before we actually are finished (that case is + // handled separately in message_progress_end()). + snprintf(buf, sizeof(buf), "%.1f %%", + (double)(in_pos) / (double)(expected_in_size) * 99.9); + + return buf; +} + + +static void +progress_sizes_helper(char **pos, size_t *left, uint64_t value, bool final) +{ + if (final) { + // At maximum of four digits is allowed for exact byte count. + if (value < 10000) { + my_snprintf(pos, left, "%'" PRIu64 " B", value); + return; + } + +// // At maximum of four significant digits is allowed for KiB. +// if (value < UINT64_C(1023900)) { + // At maximum of five significant digits is allowed for KiB. + if (value < UINT64_C(10239900)) { + my_snprintf(pos, left, "%'.1f KiB", + (double)(value) / 1024.0); + return; + } + } + + // Otherwise we use MiB. + my_snprintf(pos, left, "%'.1f MiB", + (double)(value) / (1024.0 * 1024.0)); + return; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // This is enough to hold sizes up to about 99 TiB if thousand + // separator is used, or about 1 PiB without thousand separator. + // After that the progress indicator will look a bit silly, since + // the compression ratio no longer fits with three decimal places. + static char buf[44]; + + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + progress_sizes_helper(&pos, &left, compressed_pos, final); + my_snprintf(&pos, &left, " / "); + progress_sizes_helper(&pos, &left, uncompressed_pos, final); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, double elapsed) +{ + // Don't print the speed immediatelly, since the early values look + // like somewhat random. + if (elapsed < 3.0) + return ""; + + static const char unit[][8] = { + "KiB/s", + "MiB/s", + "GiB/s", + }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) / (elapsed * 1024.0); + + // Adjust the unit of the speed if needed. + while (speed > 999.9) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + static char buf[sizeof("999.9 GiB/s")]; + snprintf(buf, sizeof(buf), "%.1f %s", speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed or remaining time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint32_t seconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((UINT32_C(9999) * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Make the string to contain the estimated remaining time, or if the amount +/// of input isn't known, how much time has elapsed. +static const char * +progress_remaining(uint64_t in_pos, double elapsed) +{ + // If we don't know the size of the input, we indicate the time + // spent so far. + if (expected_in_size == 0 || in_pos > expected_in_size) + return progress_time((uint32_t)(elapsed)); + + // If we are at the very beginning of the file or the file is very + // small, don't give any estimate to avoid far too wrong estimations. + if (in_pos < (UINT64_C(1) << 19) || elapsed < 8.0) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (double)(expected_in_size - in_pos) + * elapsed / (double)(in_pos); + if (remaining == 0) + remaining = 1; + + return progress_time(remaining); +} + + +extern void +message_progress_update(uint64_t in_pos, uint64_t out_pos) +{ + // If there's nothing to do, return immediatelly. + if (!progress_needs_updating || in_pos == 0) + return; + + // Print the filename if it hasn't been printed yet. + print_filename(); + + // Calculate how long we have been processing this file. + const double elapsed = my_time() - start_time; + + // Set compressed_pos and uncompressed_pos. + uint64_t compressed_pos; + uint64_t uncompressed_pos; + if (opt_mode == MODE_COMPRESS) { + compressed_pos = out_pos; + uncompressed_pos = in_pos; + } else { + compressed_pos = in_pos; + uncompressed_pos = out_pos; + } + + signals_block(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + fprintf(stderr, " %7s %43s %11s %10s\r", + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_remaining(in_pos, elapsed)); + + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm() call or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(uint64_t in_pos, uint64_t out_pos, bool success) +{ + // If we are not in verbose mode, we have nothing to do. + if (verbosity < V_VERBOSE || user_abort) + return; + + // Cancel a pending alarm, if any. + if (progress_automatic) { + alarm(0); + progress_active = false; + } + + const double elapsed = my_time() - start_time; + + uint64_t compressed_pos; + uint64_t uncompressed_pos; + if (opt_mode == MODE_COMPRESS) { + compressed_pos = out_pos; + uncompressed_pos = in_pos; + } else { + compressed_pos = in_pos; + uncompressed_pos = out_pos; + } + + // If it took less than a second, don't display the time. + const char *elapsed_str = progress_time((double)(elapsed)); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic && in_pos > 0) { + // Using floating point conversion for the percentage instead + // of static "100.0 %" string, because the decimal separator + // isn't a dot in all locales. + fprintf(stderr, " %5.1f %% %43s %11s %10s\n", + 100.0, + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + elapsed_str); + + // When no automatic progress indicator is used, don't print a verbose + // message at all if we something went wrong and we couldn't produce + // any output. If we did produce output, then it is sometimes useful + // to tell that to the user, especially if we detected an error after + // a time-consuming operation. + } else if (success || out_pos > 0) { + // The filename and size information are always printed. + fprintf(stderr, "%s: %s", filename, progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + // If there currently is a progress message on the screen, + // print a newline so that the progress message is left + // readable. This is good, because it is nice to be able to + // see where the error occurred. (The alternative would be + // to clear the progress message and replace it with the + // error message.) + if (progress_active) { + progress_active = false; + fputc('\n', stderr); + } + + fprintf(stderr, "%s: ", argv0); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + my_exit(E_ERROR); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + return _("Internal error (bug)"); + } + + return NULL; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try `%s --help' for more information."), argv0); + return; +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + printf("xz " PACKAGE_VERSION "\n"); + printf("liblzma %s\n", lzma_version_string()); + my_exit(E_SUCCESS); +} + + +extern void +message_help(bool long_help) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .xz format.\n\n"), + argv0); + + puts(_("Mandatory arguments to long options are mandatory for " + "short options too.\n")); + + if (long_help) + puts(_(" Operation mode:\n")); + + puts(_( +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about files")); + + if (long_help) + puts(_("\n Operation modifiers:\n")); + + puts(_( +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files")); + + if (long_help) + puts(_( +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files=[FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0=[FILE] like --files but use the null character as terminator")); + + if (long_help) { + puts(_("\n Basic file format and compression options:\n")); + puts(_( +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', and `raw'\n" +" -C, --check=CHECK integrity check type: `crc32', `crc64' (default),\n" +" or `sha256'")); + } + + puts(_( +" -p, --preset=NUM compression preset: 1-2 fast compression, 3-6 good\n" +" compression, 7-9 excellent compression; default is 7")); + + puts(_( +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which depends on the operation mode\n" +" and the amount of physical memory (RAM)")); + + if (long_help) { + puts(_( +"\n Custom filter chain for compression (alternative for using presets):")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + puts(_( +"\n" +" --lzma1=[OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" +" --lzma2=[OPTS] more of the following options (valid values; default):\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" depth=NUM maximum search depth; 0=automatic (default)")); +#endif + + puts(_( +"\n" +" --x86 x86 filter (sometimes called BCJ filter)\n" +" --powerpc PowerPC (big endian) filter\n" +" --ia64 IA64 (Itanium) filter\n" +" --arm ARM filter\n" +" --armthumb ARM-Thumb filter\n" +" --sparc SPARC filter")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + puts(_( +"\n" +" --delta=[OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)")); +#endif + +#if defined(HAVE_ENCODER_SUBBLOCK) || defined(HAVE_DECODER_SUBBLOCK) + puts(_( +"\n" +" --subblock=[OPTS] Subblock filter; valid OPTS (valid values; default):\n" +" size=NUM number of bytes of data per subblock\n" +" (1 - 256Mi; 4Ki)\n" +" rle=NUM run-length encoder chunk size (0-256; 0)")); +#endif + } + +/* + if (long_help) + puts(_( +"\n" +" Resource usage options:\n" +"\n" +" -M, --memory=NUM use roughly NUM bytes of memory at maximum; 0 indicates\n" +" the default setting, which depends on the operation mode\n" +" and the amount of physical memory (RAM)\n" +" -T, --threads=NUM use a maximum of NUM (de)compression threads" +// " --threading=STR threading style; possible values are `auto' (default),\n" +// " `files', and `stream' +)); +*/ + if (long_help) + puts(_("\n Other options:\n")); + + puts(_( +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose")); + + if (long_help) + puts(_( +"\n" +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help")); + else + puts(_( +" -h, --help display this short help\n" +" -H, --long-help display the long help (lists also the advanced options)")); + + puts(_( +" -V, --version display the version number")); + + puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); + + if (long_help) { + // FIXME !!! + size_t mem_limit = hardware_memlimit_encoder() / (1024 * 1024); + if (mem_limit == 0) + mem_limit = 1; + + // We use PRIu64 instead of %zu to support pre-C99 libc. + // FIXME: Use ' but avoid warnings. + puts(_("On this system and configuration, the tool will use")); + printf(_(" * roughly %" PRIu64 " MiB of memory at maximum; and\n"), + (uint64_t)(mem_limit)); + printf(N_(" * at maximum of one thread for (de)compression.\n\n", + " * at maximum of %" PRIu64 + " threads for (de)compression.\n\n", + (uint64_t)(opt_threads)), (uint64_t)(opt_threads)); + } + + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + + my_exit(E_SUCCESS); +} diff --git a/src/lzma/message.h b/src/lzma/message.h new file mode 100644 index 00000000..7ef9b165 --- /dev/null +++ b/src/lzma/message.h @@ -0,0 +1,132 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Copyright (C) 2007-2008 Lasse Collin +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MESSAGE_H +#define MESSAGE_H + + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Debugging, FIXME remove? +}; + + +/// \brief Initializes the message functions +/// +/// \param argv0 Name of the program i.e. argv[0] from main() +/// \param verbosity Verbosity level +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(const char *argv0); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + + +/// Set the total number of files to be processed (stdin is counted as a file +/// here). The default is one. +extern void message_set_files(unsigned int files); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +extern void message(enum message_verbosity verbosity, const char *fmt, ...) + lzma_attribute((format(printf, 2, 3))); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +extern void message_warning(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +extern void message_error(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +extern void message_fatal(const char *fmt, ...) + lzma_attribute((format(printf, 1, 2))) + lzma_attribute((noreturn)); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +extern void message_bug(void) lzma_attribute((noreturn)); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +extern void message_signal_handler(void) lzma_attribute((noreturn)); + + +/// Converts lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +extern void message_version(void) lzma_attribute((noreturn)); + + +/// Print the help message. +extern void message_help(bool long_help) lzma_attribute((noreturn)); + + +/// +extern void message_progress_start(const char *filename, uint64_t in_size); + + +/// +extern void message_progress_update(uint64_t in_pos, uint64_t out_pos); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param in_pos Final input position i.e. how much input there was. +/// \param out_pos Final output position +/// \param success True if the operation was successful. We don't +/// print the final progress message if the operation +/// wasn't successful. +/// +extern void message_progress_end( + uint64_t in_pos, uint64_t out_pos, bool success); + +#endif diff --git a/src/lzma/options.c b/src/lzma/options.c index f5ebdd8e..77ebddd6 100644 --- a/src/lzma/options.c +++ b/src/lzma/options.c @@ -79,11 +79,9 @@ parse_options(const char *str, const option_map *opts, if (value != NULL) *value++ = '\0'; - if (value == NULL || value[0] == '\0') { - errmsg(V_ERROR, _("%s: Options must be `name=value' " + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: Options must be `name=value' " "pairs separated with commas"), str); - my_exit(ERROR); - } // Look for the option name from the option map. bool found = false; @@ -106,11 +104,9 @@ parse_options(const char *str, const option_map *opts, break; } - if (opts[i].map[j].name == NULL) { - errmsg(V_ERROR, _("%s: Invalid option " + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: Invalid option " "value"), value); - my_exit(ERROR); - } set(filter_options, i, opts[i].map[j].id); } @@ -119,10 +115,8 @@ parse_options(const char *str, const option_map *opts, break; } - if (!found) { - errmsg(V_ERROR, _("%s: Invalid option name"), name); - my_exit(ERROR); - } + if (!found) + message_fatal(_("%s: Invalid option name"), name); if (split == NULL) break; @@ -168,7 +162,7 @@ set_subblock(void *options, uint32_t key, uint64_t value) extern lzma_options_subblock * -parse_options_subblock(const char *str) +options_subblock(const char *str) { static const option_map opts[] = { { "size", NULL, LZMA_SUBBLOCK_DATA_SIZE_MIN, @@ -217,7 +211,7 @@ set_delta(void *options, uint32_t key, uint64_t value) extern lzma_options_delta * -parse_options_delta(const char *str) +options_delta(const char *str) { static const option_map opts[] = { { "dist", NULL, LZMA_DELTA_DIST_MIN, @@ -225,7 +219,7 @@ parse_options_delta(const char *str) { NULL, NULL, 0, 0 } }; - lzma_options_delta *options = xmalloc(sizeof(lzma_options_subblock)); + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); *options = (lzma_options_delta){ // It's hard to give a useful default for this. .type = LZMA_DELTA_TYPE_BYTE, @@ -296,7 +290,7 @@ set_lzma(void *options, uint32_t key, uint64_t value) extern lzma_options_lzma * -parse_options_lzma(const char *str) +options_lzma(const char *str) { static const name_id_map modes[] = { { "fast", LZMA_MODE_FAST }, @@ -345,18 +339,14 @@ parse_options_lzma(const char *str) parse_options(str, opts, &set_lzma, options); - if (options->lc + options->lp > LZMA_LCLP_MAX) { - errmsg(V_ERROR, "The sum of lc and lp must be at " - "maximum of 4"); - exit(ERROR); - } + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must be at " + "maximum of 4")); const uint32_t nice_len_min = options->mf & 0x0F; - if (options->nice_len < nice_len_min) { - errmsg(V_ERROR, "The selected match finder requires at " - "least nice=%" PRIu32, nice_len_min); - exit(ERROR); - } + if (options->nice_len < nice_len_min) + message_fatal(_("The selected match finder requires at " + "least nice=%" PRIu32), nice_len_min); return options; } diff --git a/src/lzma/options.h b/src/lzma/options.h index 885c5969..4253ac3c 100644 --- a/src/lzma/options.h +++ b/src/lzma/options.h @@ -27,20 +27,20 @@ /// /// \return Pointer to allocated options structure. /// Doesn't return on error. -extern lzma_options_subblock *parse_options_subblock(const char *str); +extern lzma_options_subblock *options_subblock(const char *str); /// \brief Parser for Delta options /// /// \return Pointer to allocated options structure. /// Doesn't return on error. -extern lzma_options_delta *parse_options_delta(const char *str); +extern lzma_options_delta *options_delta(const char *str); /// \brief Parser for LZMA options /// /// \return Pointer to allocated options structure. /// Doesn't return on error. -extern lzma_options_lzma *parse_options_lzma(const char *str); +extern lzma_options_lzma *options_lzma(const char *str); #endif diff --git a/src/lzma/private.h b/src/lzma/private.h index f6a75645..b463a08e 100644 --- a/src/lzma/private.h +++ b/src/lzma/private.h @@ -22,32 +22,30 @@ #include "sysdefs.h" -#ifdef HAVE_ERRNO_H -# include <errno.h> -#else -extern int errno; -#endif - +#include <sys/types.h> #include <sys/stat.h> +#include <errno.h> #include <signal.h> -#include <pthread.h> #include <locale.h> #include <stdio.h> -#include <fcntl.h> #include <unistd.h> -#include "gettext.h" -#define _(msgid) gettext(msgid) -#define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#ifdef ENABLE_NLS +# include <libintl.h> +# define _(msgid) gettext(msgid) +# define N_(msgid1, msgid2, n) ngettext(msgid1, msgid2, n) +#else +# define _(msgid) (msgid) +# define N_(msgid1, msgid2, n) ((n) == 1 ? (msgid1) : (msgid2)) +#endif -#include "alloc.h" +#include "main.h" +#include "process.h" +#include "message.h" #include "args.h" -#include "error.h" #include "hardware.h" -#include "help.h" #include "io.h" #include "options.h" -#include "process.h" #include "suffix.h" #include "util.h" diff --git a/src/lzma/process.c b/src/lzma/process.c index fc4ef96a..d30878e4 100644 --- a/src/lzma/process.c +++ b/src/lzma/process.c @@ -20,137 +20,158 @@ #include "private.h" -typedef struct { - lzma_stream strm; - void *options; +enum operation_mode opt_mode = MODE_COMPRESS; - file_pair *pair; +enum format_type opt_format = FORMAT_AUTO; - /// We don't need this for *anything* but seems that at least with - /// glibc pthread_create() doesn't allow NULL. - pthread_t thread; - bool in_use; +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; -} thread_data; +/// Filters needed for all encoding all formats, and also decoding in raw data +static lzma_filter filters[LZMA_FILTERS_MAX + 1]; +/// Number of filters. Zero indicates that we are using a preset. +static size_t filters_count = 0; -/// Number of available threads -static size_t free_threads; +/// Number of the preset (1-9) +static size_t preset_number = 7; -/// Thread-specific data -static thread_data *threads; +/// Indicate if no preset has been given. In that case, we will auto-adjust +/// the compression preset so that it doesn't use too much RAM. +// FIXME +static bool preset_default = true; -static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +/// Integrity check type +static lzma_check check = LZMA_CHECK_CRC64; -/// Attributes of new coder threads. They are created in detached state. -/// Coder threads signal to the service thread themselves when they are done. -static pthread_attr_t thread_attr; +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + return; +} -////////// -// Init // -////////// extern void -process_init(void) +coder_set_preset(size_t new_preset) { - threads = malloc(sizeof(thread_data) * opt_threads); - if (threads == NULL) { - out_of_memory(); - my_exit(ERROR); - } + preset_number = new_preset; + preset_default = false; + return; +} - for (size_t i = 0; i < opt_threads; ++i) - memzero(&threads[i], sizeof(threads[0])); - if (pthread_attr_init(&thread_attr) - || pthread_attr_setdetachstate( - &thread_attr, PTHREAD_CREATE_DETACHED)) { - out_of_memory(); - my_exit(ERROR); - } +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); - free_threads = opt_threads; + filters[filters_count].id = id; + filters[filters_count].options = options; + ++filters_count; return; } -////////////////////////// -// Thread-specific data // -////////////////////////// - -static thread_data * -get_thread_data(void) +extern void +coder_set_compression_settings(void) { - pthread_mutex_lock(&mutex); + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + if (filters_count == 0) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } - while (free_threads == 0) { - pthread_cond_wait(&cond, &mutex); + // Get the preset for LZMA1 or LZMA2. + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); - if (user_abort) { - pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); - return NULL; - } + // Use LZMA2 except with --format=lzma we use LZMA1. + filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + filters[0].options = &opt_lzma; + filters_count = 1; } - thread_data *t = threads; - while (t->in_use) - ++t; + // Terminate the filter options array. + filters[filters_count].id = LZMA_VLI_UNKNOWN; - t->in_use = true; - --free_threads; + // If we are using the LZMA_Alone format, allow exactly one filter + // which has to be LZMA. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("With --format=lzma only the LZMA1 filter " + "is supported")); - pthread_mutex_unlock(&mutex); - - return t; -} + // TODO: liblzma probably needs an API to validate the filter chain. + // If using --format=raw, we can be decoding. + uint64_t memory_usage; + uint64_t memory_limit; + if (opt_mode == MODE_COMPRESS) { + memory_usage = lzma_memusage_encoder(filters); + memory_limit = hardware_memlimit_encoder(); + } else { + memory_usage = lzma_memusage_decoder(filters); + memory_limit = hardware_memlimit_decoder(); + } -static void -release_thread_data(thread_data *t) -{ - pthread_mutex_lock(&mutex); + if (memory_usage == UINT64_MAX) + message_bug(); - t->in_use = false; - ++free_threads; + if (preset_default) { + // When no preset was explicitly requested, we use the default + // preset only if the memory usage limit allows. Otherwise we + // select a lower preset automatically. + while (memory_usage > memory_limit) { + if (preset_number == 1) + message_fatal(_("Memory usage limit is too " + "small for any internal " + "filter preset")); - pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); + if (lzma_lzma_preset(&opt_lzma, --preset_number)) + message_bug(); - return; -} - - -static int -create_thread(void *(*func)(thread_data *t), thread_data *t) -{ - if (opt_threads == 1) { - func(t); - } else { - const int err = pthread_create(&t->thread, &thread_attr, - (void *(*)(void *))(func), t); - if (err) { - errmsg(V_ERROR, _("Cannot create a thread: %s"), - strerror(err)); - user_abort = 1; - return -1; + memory_usage = lzma_memusage_encoder(filters); } + } else { + if (memory_usage > memory_limit) + message_fatal(_("Memory usage limit is too small " + "for the given filter setup")); } - return 0; -} + // Limit the number of worked threads so that memory usage + // limit isn't exceeded. + assert(memory_usage > 0); + size_t thread_limit = memory_limit / memory_usage; + if (thread_limit == 0) + thread_limit = 1; + if (opt_threads > thread_limit) + opt_threads = thread_limit; + + return; +} -///////////////////////// -// One thread per file // -///////////////////////// -static int -single_init(thread_data *t) +static bool +coder_init(void) { lzma_ret ret = LZMA_PROG_ERROR; @@ -162,17 +183,15 @@ single_init(thread_data *t) break; case FORMAT_XZ: - ret = lzma_stream_encoder(&t->strm, - opt_filters, opt_check); + ret = lzma_stream_encoder(&strm, filters, check); break; case FORMAT_LZMA: - ret = lzma_alone_encoder(&t->strm, - opt_filters[0].options); + ret = lzma_alone_encoder(&strm, filters[0].options); break; case FORMAT_RAW: - ret = lzma_raw_encoder(&t->strm, opt_filters); + ret = lzma_raw_encoder(&strm, filters); break; } } else { @@ -181,254 +200,192 @@ single_init(thread_data *t) switch (opt_format) { case FORMAT_AUTO: - ret = lzma_auto_decoder(&t->strm, opt_memory, flags); + ret = lzma_auto_decoder(&strm, + hardware_memlimit_decoder(), flags); break; case FORMAT_XZ: - ret = lzma_stream_decoder(&t->strm, opt_memory, flags); + ret = lzma_stream_decoder(&strm, + hardware_memlimit_decoder(), flags); break; case FORMAT_LZMA: - ret = lzma_alone_decoder(&t->strm, opt_memory); + ret = lzma_alone_decoder(&strm, + hardware_memlimit_decoder()); break; case FORMAT_RAW: // Memory usage has already been checked in args.c. - ret = lzma_raw_decoder(&t->strm, opt_filters); + // FIXME Comment + ret = lzma_raw_decoder(&strm, filters); break; } } if (ret != LZMA_OK) { if (ret == LZMA_MEM_ERROR) - out_of_memory(); + message_error("%s", message_strm(LZMA_MEM_ERROR)); else - internal_error(); + message_bug(); - return -1; + return true; } - return 0; + return false; } -static void * -single(thread_data *t) +static bool +coder_run(file_pair *pair) { - if (single_init(t)) { - io_close(t->pair, false); - release_thread_data(t); - return NULL; - } + // Buffers to hold input and output data. + uint8_t in_buf[IO_BUFFER_SIZE]; + uint8_t out_buf[IO_BUFFER_SIZE]; + + // Initialize the progress indicator. + const uint64_t in_size = pair->src_st.st_size <= (off_t)(0) + ? 0 : (uint64_t)(pair->src_st.st_size); + message_progress_start(pair->src_name, in_size); - uint8_t in_buf[BUFSIZ]; - uint8_t out_buf[BUFSIZ]; lzma_action action = LZMA_RUN; lzma_ret ret; - bool success = false; - t->strm.avail_in = 0; - t->strm.next_out = out_buf; - t->strm.avail_out = BUFSIZ; + strm.avail_in = 0; + strm.next_out = out_buf; + strm.avail_out = IO_BUFFER_SIZE; while (!user_abort) { - if (t->strm.avail_in == 0 && !t->pair->src_eof) { - t->strm.next_in = in_buf; - t->strm.avail_in = io_read(t->pair, in_buf, BUFSIZ); + // Fill the input buffer if it is empty and we haven't reached + // end of file yet. + if (strm.avail_in == 0 && !pair->src_eof) { + strm.next_in = in_buf; + strm.avail_in = io_read(pair, in_buf, IO_BUFFER_SIZE); - if (t->strm.avail_in == SIZE_MAX) + if (strm.avail_in == SIZE_MAX) break; - if (t->pair->src_eof) + // Encoder needs to know when we have given all the + // input to it. The decoders need to know it too when + // we are using LZMA_CONCATENATED. + if (pair->src_eof) action = LZMA_FINISH; } - ret = lzma_code(&t->strm, action); + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); - if ((t->strm.avail_out == 0 || ret != LZMA_OK) - && opt_mode != MODE_TEST) { - if (io_write(t->pair, out_buf, - BUFSIZ - t->strm.avail_out)) - break; + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (opt_mode != MODE_TEST && io_write(pair, out_buf, + IO_BUFFER_SIZE - strm.avail_out)) + return false; - t->strm.next_out = out_buf; - t->strm.avail_out = BUFSIZ; + strm.next_out = out_buf; + strm.avail_out = IO_BUFFER_SIZE; } if (ret != LZMA_OK) { - // Check that there is no trailing garbage. This is - // needed for LZMA_Alone and raw streams. - if (ret == LZMA_STREAM_END && (t->strm.avail_in != 0 - || (!t->pair->src_eof && io_read( - t->pair, in_buf, 1) != 0))) - ret = LZMA_DATA_ERROR; - - if (ret != LZMA_STREAM_END) { - errmsg(V_ERROR, "%s: %s", t->pair->src_name, - str_strm_error(ret)); - break; + // Determine if the return value indicates that we + // won't continue coding. + const bool stop = ret != LZMA_NO_CHECK + && ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // First print the final progress info. + // This way the user sees more accurately + // where the error occurred. Note that we + // print this *before* the possible error + // message. + // + // FIXME: What if something goes wrong + // after this? + message_progress_end(strm.total_in, + strm.total_out, + ret == LZMA_STREAM_END); + + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (opt_mode != MODE_TEST && io_write(pair, + out_buf, IO_BUFFER_SIZE + - strm.avail_out)) + return false; } - assert(t->pair->src_eof); - success = true; - break; - } - } - - io_close(t->pair, success); - release_thread_data(t); - - return NULL; -} + if (ret == LZMA_STREAM_END) { + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. + if (strm.avail_in == 0 && (pair->src_eof + || io_read(pair, in_buf, 1) + == 0)) { + assert(pair->src_eof); + return true; + } + // FIXME: What about io_read() failing? -/////////////////////////////// -// Multiple threads per file // -/////////////////////////////// - -// TODO - -// I'm not sure what would the best way to implement this. Here's one -// possible way: -// - Reader thread would read the input data and control the coders threads. -// - Every coder thread is associated with input and output buffer pools. -// The input buffer pool is filled by reader thread, and the output buffer -// pool is emptied by the writer thread. -// - Writer thread writes the output data of the oldest living coder thread. -// -// The per-file thread started by the application's main thread is used as -// the reader thread. In the beginning, it starts the writer thread and the -// first coder thread. The coder thread would be left waiting for input from -// the reader thread, and the writer thread would be waiting for input from -// the coder thread. -// -// The reader thread reads the input data into a ring buffer, whose size -// depends on the value returned by lzma_chunk_size(). If the ring buffer -// gets full, the buffer is marked "to be finished", which indicates to -// the coder thread that no more input is coming. Then a new coder thread -// would be started. -// -// TODO - -/* -typedef struct { - /// Buffers - uint8_t (*buffers)[BUFSIZ]; - - /// Number of buffers - size_t buffer_count; - - /// buffers[read_pos] is the buffer currently being read. Once finish - /// is true and read_pos == write_pos, end of input has been reached. - size_t read_pos; - - /// buffers[write_pos] is the buffer into which data is currently - /// being written. - size_t write_pos; - - /// This variable matters only when read_pos == write_pos && finish. - /// In that case, this variable will contain the size of the - /// buffers[read_pos]. - size_t last_size; - - /// True once no more data is being written to the buffer. When this - /// is set, the last_size variable must have been set too. - bool finish; - - /// Mutex to protect access to the variables in this structure - pthread_mutex_t mutex; - - /// Condition to indicate when another thread can continue - pthread_cond_t cond; -} mem_pool; - - -static foo -multi_reader(thread_data *t) -{ - bool done = false; - - do { - const size_t size = io_read(t->pair, - m->buffers + m->write_pos, BUFSIZ); - if (size == SIZE_MAX) { - // TODO - } else if (t->pair->src_eof) { - m->last_size = size; - } - - pthread_mutex_lock(&m->mutex); - - if (++m->write_pos == m->buffer_count) - m->write_pos = 0; - - if (m->write_pos == m->read_pos || t->pair->src_eof) - m->finish = true; - - pthread_cond_signal(&m->cond); - pthread_mutex_unlock(&m->mutex); - - } while (!m->finish); - - return done ? 0 : -1; -} - - -static foo -multi_code() -{ - lzma_action = LZMA_RUN; - - while (true) { - pthread_mutex_lock(&m->mutex); + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } - while (m->read_pos == m->write_pos && !m->finish) - pthread_cond_wait(&m->cond, &m->mutex); + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + } else { + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } - pthread_mutex_unlock(&m->mutex); + if (ret == LZMA_MEMLIMIT_ERROR) { + // Figure out how much memory would have + // actually needed. + // TODO + } - if (m->finish) { - t->strm.avail_in = m->last_size; - if (opt_mode == MODE_COMPRESS) - action = LZMA_FINISH; - } else { - t->strm.avail_in = BUFSIZ; + if (stop) + return false; } - t->strm.next_in = m->buffers + m->read_pos; - - const lzma_ret ret = lzma_code(&t->strm, action); - + // Show progress information if --verbose was specified and + // stderr is a terminal. + message_progress_update(strm.total_in, strm.total_out); } -} - -*/ + return false; +} -/////////////////////// -// Starting new file // -/////////////////////// extern void process_file(const char *filename) { - thread_data *t = get_thread_data(); - if (t == NULL) - return; // User abort - - // If this fails, it shows appropriate error messages too. - t->pair = io_open(filename); - if (t->pair == NULL) { - release_thread_data(t); + // First try initializing the coder. If it fails, it's useless to try + // opening the file. Check also for user_abort just in case if we had + // got a signal while initializing the coder. + if (coder_init() || user_abort) return; - } - // TODO Currently only one-thread-per-file mode is implemented. + // Try to open the input and output files. + file_pair *pair = io_open(filename); + if (pair == NULL) + return; - if (create_thread(&single, t)) { - io_close(t->pair, false); - release_thread_data(t); - } + // Do the actual coding. + const bool success = coder_run(pair); + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); return; } diff --git a/src/lzma/process.h b/src/lzma/process.h index 7fdfbce6..de23eacb 100644 --- a/src/lzma/process.h +++ b/src/lzma/process.h @@ -23,6 +23,46 @@ #include "private.h" +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, + // HEADER_GZIP, + FORMAT_RAW, +}; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(size_t new_preset); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// +extern void coder_set_compression_settings(void); + extern void process_init(void); extern void process_file(const char *filename); diff --git a/src/lzma/suffix.c b/src/lzma/suffix.c index 460acee2..0d46855a 100644 --- a/src/lzma/suffix.c +++ b/src/lzma/suffix.c @@ -20,6 +20,9 @@ #include "private.h" +static char *custom_suffix = NULL; + + struct suffix_pair { const char *compressed; const char *uncompressed; @@ -74,8 +77,8 @@ uncompressed_name(const char *src_name, const size_t src_len) if (opt_format == FORMAT_RAW) { // Don't check for known suffixes when --format=raw was used. - if (opt_suffix == NULL) { - errmsg(V_ERROR, _("%s: With --format=raw, " + if (custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " "--suffix=.SUF is required unless " "writing to stdout"), src_name); return NULL; @@ -91,21 +94,17 @@ uncompressed_name(const char *src_name, const size_t src_len) } } - if (new_len == 0 && opt_suffix != NULL) - new_len = test_suffix(opt_suffix, src_name, src_len); + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); if (new_len == 0) { - errmsg(V_WARNING, _("%s: Filename has an unknown suffix, " + message_warning(_("%s: Filename has an unknown suffix, " "skipping"), src_name); return NULL; } const size_t new_suffix_len = strlen(new_suffix); - char *dest_name = malloc(new_len + new_suffix_len + 1); - if (dest_name == NULL) { - out_of_memory(); - return NULL; - } + char *dest_name = xmalloc(new_len + new_suffix_len + 1); memcpy(dest_name, src_name, new_len); memcpy(dest_name + new_len, new_suffix, new_suffix_len); @@ -154,7 +153,7 @@ compressed_name(const char *src_name, const size_t src_len) for (size_t i = 0; suffixes[i].compressed != NULL; ++i) { if (test_suffix(suffixes[i].compressed, src_name, src_len) != 0) { - errmsg(V_WARNING, _("%s: File already has `%s' " + message_warning(_("%s: File already has `%s' " "suffix, skipping"), src_name, suffixes[i].compressed); return NULL; @@ -163,22 +162,18 @@ compressed_name(const char *src_name, const size_t src_len) // TODO: Hmm, maybe it would be better to validate this in args.c, // since the suffix handling when decoding is weird now. - if (opt_format == FORMAT_RAW && opt_suffix == NULL) { - errmsg(V_ERROR, _("%s: With --format=raw, " + if (opt_format == FORMAT_RAW && custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " "--suffix=.SUF is required unless " "writing to stdout"), src_name); return NULL; } - const char *suffix = opt_suffix != NULL - ? opt_suffix : suffixes[0].compressed; + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0].compressed; const size_t suffix_len = strlen(suffix); - char *dest_name = malloc(src_len + suffix_len + 1); - if (dest_name == NULL) { - out_of_memory(); - return NULL; - } + char *dest_name = xmalloc(src_len + suffix_len + 1); memcpy(dest_name, src_name, src_len); memcpy(dest_name + src_len, suffix, suffix_len); @@ -189,7 +184,7 @@ compressed_name(const char *src_name, const size_t src_len) extern char * -get_dest_name(const char *src_name) +suffix_get_dest_name(const char *src_name) { assert(src_name != NULL); @@ -201,3 +196,18 @@ get_dest_name(const char *src_name) ? compressed_name(src_name, src_len) : uncompressed_name(src_name, src_len); } + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a slash are rejected. Such + // suffixes would break things later. + if (suffix[0] == '\0' || strchr(suffix, '/') != NULL) + message_fatal(_("%s: Invalid filename suffix"), optarg); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} diff --git a/src/lzma/suffix.h b/src/lzma/suffix.h index 08315659..c92b92dc 100644 --- a/src/lzma/suffix.h +++ b/src/lzma/suffix.h @@ -20,6 +20,21 @@ #ifndef SUFFIX_H #define SUFFIX_H -extern char *get_dest_name(const char *src_name); +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); #endif diff --git a/src/lzma/util.c b/src/lzma/util.c index 4bdbf8ec..13b67925 100644 --- a/src/lzma/util.c +++ b/src/lzma/util.c @@ -20,17 +20,29 @@ #include "private.h" -/// \brief Fancy version of strtoull() -/// -/// \param name Name of the option to show in case of an error -/// \param value String containing the number to be parsed; may -/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" -/// \param min Minimum valid value -/// \param max Maximum valid value -/// -/// \return Parsed value that is in the range [min, max]. Does not return -/// if an error occurs. -/// +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + ptr = realloc(ptr, size); + if (ptr == NULL) + message_fatal("%s", strerror(errno)); + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + extern uint64_t str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) { @@ -40,12 +52,9 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) while (*value == ' ' || *value == '\t') ++value; - if (*value < '0' || *value > '9') { - errmsg(V_ERROR, _("%s: Value is not a non-negative " - "decimal integer"), - value); - my_exit(ERROR); - } + if (*value < '0' || *value > '9') + message_fatal(_("%s: Value is not a non-negative " + "decimal integer"), value); do { // Don't overflow. @@ -86,12 +95,11 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) } if (multiplier == 0) { - errmsg(V_ERROR, _("%s: Invalid multiplier suffix. " + message(V_ERROR, _("%s: Invalid multiplier suffix. " "Valid suffixes:"), value); - errmsg(V_ERROR, "`k' (10^3), `M' (10^6), `G' (10^9) " + message_fatal("`k' (10^3), `M' (10^6), `G' (10^9) " "`Ki' (2^10), `Mi' (2^20), " "`Gi' (2^30)"); - my_exit(ERROR); } // Don't overflow here either. @@ -107,32 +115,10 @@ str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) return result; error: - errmsg(V_ERROR, _("Value of the option `%s' must be in the range " + message_fatal(_("Value of the option `%s' must be in the range " "[%llu, %llu]"), name, (unsigned long long)(min), (unsigned long long)(max)); - my_exit(ERROR); -} - - -/// \brief Gets filename part from pathname+filename -/// -/// \return Pointer in the filename where the actual filename starts. -/// If the last character is a slash, NULL is returned. -/// -extern const char * -str_filename(const char *name) -{ - const char *base = strrchr(name, '/'); - - if (base == NULL) { - base = name; - } else if (*++base == '\0') { - base = NULL; - errmsg(V_ERROR, _("%s: Invalid filename"), name); - } - - return base; } @@ -179,9 +165,35 @@ extern bool is_empty_filename(const char *filename) { if (filename[0] == '\0') { - errmsg(V_WARNING, _("Empty filename, skipping")); + message_error(_("Empty filename, skipping")); return true; } return false; } + + +extern bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data not read from a terminal " + "unless `--force' is used.")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data not written to a terminal " + "unless `--force' is used.")); + + return ret; +} diff --git a/src/lzma/util.h b/src/lzma/util.h index 91bd9ba3..dca62b26 100644 --- a/src/lzma/util.h +++ b/src/lzma/util.h @@ -20,13 +20,52 @@ #ifndef UTIL_H #define UTIL_H -#include "private.h" +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + +/// \brief Safe realloc() that never returns NULL +extern void *xrealloc(void *ptr, size_t size); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// extern uint64_t str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max); -extern const char *str_filename(const char *filename); +/// \brief Check if filename is empty and print an error message extern bool is_empty_filename(const char *filename); + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); + #endif diff --git a/src/lzmadec/lzmadec.c b/src/lzmadec/lzmadec.c index eab00544..0b2adb97 100644 --- a/src/lzmadec/lzmadec.c +++ b/src/lzmadec/lzmadec.c @@ -19,12 +19,7 @@ #include "sysdefs.h" -#ifdef HAVE_ERRNO_H -# include <errno.h> -#else -extern int errno; -#endif - +#include <errno.h> #include <stdio.h> #include <unistd.h> @@ -65,7 +60,7 @@ static uint8_t out_buf[BUFSIZ]; static lzma_stream strm = LZMA_STREAM_INIT; /// Number of bytes to use memory at maximum -static size_t memlimit; +static uint64_t memlimit; /// Program name to be shown in error messages static const char *argv0; @@ -94,8 +89,8 @@ help(void) " -d, --decompress (ignored)\n" " -k, --keep (ignored)\n" " -f, --force allow reading compressed data from a terminal\n" -" -M, --memory=NUM use NUM bytes of memory at maximum; the suffixes\n" -" k, M, G, Ki, Mi, and Gi are supported.\n" +" -M, --memory=NUM use NUM bytes of memory at maximum (0 means default);\n" +" the suffixes k, M, G, Ki, Mi, and Gi are supported.\n" " --format=FMT accept only files in the given file format;\n" " possible FMTs are `auto', `native', and alone',\n" " -h, --help display this help and exit\n" @@ -141,20 +136,14 @@ version(void) static void set_default_memlimit(void) { - uint64_t mem = physmem(); - if (mem != 0) { - mem /= 3; + const uint64_t mem = physmem(); -#if UINT64_MAX > SIZE_MAX - if (mem > SIZE_MAX) - mem = SIZE_MAX; -#endif - - memlimit = mem / 3; - } else { + if (mem == 0) // Cannot autodetect, use 10 MiB as the default limit. memlimit = (1U << 23) + (1U << 21); - } + else + // Limit is 33 % of RAM. + memlimit = mem / 3; return; } @@ -165,7 +154,7 @@ set_default_memlimit(void) /// This is rudely copied from src/lzma/util.c and modified a little. :-( /// static size_t -str_to_size(const char *value) +str_to_uint64(const char *value) { size_t result = 0; @@ -263,7 +252,10 @@ parse_options(int argc, char **argv) break; case 'M': - memlimit = str_to_size(optarg); + memlimit = str_to_uint64(optarg); + if (memlimit == 0) + set_default_memlimit(); + break; case 'h': diff --git a/tests/files/README b/tests/files/README index 9818d3f1..8f0a73a7 100644 --- a/tests/files/README +++ b/tests/files/README @@ -172,11 +172,14 @@ bad-1-block_header-3.xz has wrong CRC32 in Block Header. - bad-1-block_header-4.xz has too big Compressed Size (2^63 bytes while - maximum is 2^63 - 4 bytes) in Block Header. It's important that the - file gets rejected due to invalid Compressed Size value; the decoder + bad-1-block_header-4.xz has too big Compressed Size in Block Header + (2^64 - 1 bytes while maximum is a little less, because the whole + Block must stay smaller than 2^64). It's important that the file + gets rejected due to invalid Compressed Size value; the decoder must not try decoding the Compressed Data field. + bad-1-block_header-5.xz has zero as Compressed Size in Block Header. + bad-2-index-1.xz has wrong Total Sizes in Index. bad-2-index-2.xz has wrong Uncompressed Sizes in Index. @@ -185,6 +188,9 @@ bad-2-index-4.xz wrong CRC32 in Index. + bad-2-index-5.xz has zero as Total Size. It is important that the file + gets rejected specifically due to Total Size having an invalid value. + bad-2-compressed_data_padding.xz has non-nul byte in the padding of the Compressed Data field of the first Block. diff --git a/tests/files/bad-1-block_header-1.xz b/tests/files/bad-1-block_header-1.xz Binary files differindex e126a419..d991536a 100644 --- a/tests/files/bad-1-block_header-1.xz +++ b/tests/files/bad-1-block_header-1.xz diff --git a/tests/files/bad-1-block_header-2.xz b/tests/files/bad-1-block_header-2.xz Binary files differindex e3e8166d..ae42ecf0 100644 --- a/tests/files/bad-1-block_header-2.xz +++ b/tests/files/bad-1-block_header-2.xz diff --git a/tests/files/bad-1-block_header-3.xz b/tests/files/bad-1-block_header-3.xz Binary files differindex 17955038..606cbd20 100644 --- a/tests/files/bad-1-block_header-3.xz +++ b/tests/files/bad-1-block_header-3.xz diff --git a/tests/files/bad-1-block_header-4.xz b/tests/files/bad-1-block_header-4.xz Binary files differindex be31ce04..e72dfbfc 100644 --- a/tests/files/bad-1-block_header-4.xz +++ b/tests/files/bad-1-block_header-4.xz diff --git a/tests/files/bad-1-block_header-5.xz b/tests/files/bad-1-block_header-5.xz Binary files differnew file mode 100644 index 00000000..96521129 --- /dev/null +++ b/tests/files/bad-1-block_header-5.xz diff --git a/tests/files/bad-1-check-crc32.xz b/tests/files/bad-1-check-crc32.xz Binary files differindex f8ec7d61..1ebe1310 100644 --- a/tests/files/bad-1-check-crc32.xz +++ b/tests/files/bad-1-check-crc32.xz diff --git a/tests/files/bad-1-check-crc64.xz b/tests/files/bad-1-check-crc64.xz Binary files differindex eb406ce6..cdb77093 100644 --- a/tests/files/bad-1-check-crc64.xz +++ b/tests/files/bad-1-check-crc64.xz diff --git a/tests/files/bad-1-check-sha256.xz b/tests/files/bad-1-check-sha256.xz Binary files differindex e33cee80..def7bff1 100644 --- a/tests/files/bad-1-check-sha256.xz +++ b/tests/files/bad-1-check-sha256.xz diff --git a/tests/files/bad-1-lzma2-1.xz b/tests/files/bad-1-lzma2-1.xz Binary files differindex f7ff6dd3..640f592e 100644 --- a/tests/files/bad-1-lzma2-1.xz +++ b/tests/files/bad-1-lzma2-1.xz diff --git a/tests/files/bad-1-lzma2-2.xz b/tests/files/bad-1-lzma2-2.xz Binary files differindex 3b279195..69ab07d4 100644 --- a/tests/files/bad-1-lzma2-2.xz +++ b/tests/files/bad-1-lzma2-2.xz diff --git a/tests/files/bad-1-lzma2-3.xz b/tests/files/bad-1-lzma2-3.xz Binary files differindex e1a20831..66f48c5d 100644 --- a/tests/files/bad-1-lzma2-3.xz +++ b/tests/files/bad-1-lzma2-3.xz diff --git a/tests/files/bad-1-lzma2-4.xz b/tests/files/bad-1-lzma2-4.xz Binary files differindex e0eb7aaa..ac970419 100644 --- a/tests/files/bad-1-lzma2-4.xz +++ b/tests/files/bad-1-lzma2-4.xz diff --git a/tests/files/bad-1-lzma2-5.xz b/tests/files/bad-1-lzma2-5.xz Binary files differindex 437aeed0..700464d5 100644 --- a/tests/files/bad-1-lzma2-5.xz +++ b/tests/files/bad-1-lzma2-5.xz diff --git a/tests/files/bad-1-lzma2-6.xz b/tests/files/bad-1-lzma2-6.xz Binary files differindex f87cab68..2bda0c4b 100644 --- a/tests/files/bad-1-lzma2-6.xz +++ b/tests/files/bad-1-lzma2-6.xz diff --git a/tests/files/bad-1-lzma2-7.xz b/tests/files/bad-1-lzma2-7.xz Binary files differindex 1a487ca7..8cc711c1 100644 --- a/tests/files/bad-1-lzma2-7.xz +++ b/tests/files/bad-1-lzma2-7.xz diff --git a/tests/files/bad-1-stream_flags-1.xz b/tests/files/bad-1-stream_flags-1.xz Binary files differindex fd77d474..6511773e 100644 --- a/tests/files/bad-1-stream_flags-1.xz +++ b/tests/files/bad-1-stream_flags-1.xz diff --git a/tests/files/bad-1-stream_flags-2.xz b/tests/files/bad-1-stream_flags-2.xz Binary files differindex a4a582ad..0c66b364 100644 --- a/tests/files/bad-1-stream_flags-2.xz +++ b/tests/files/bad-1-stream_flags-2.xz diff --git a/tests/files/bad-1-stream_flags-3.xz b/tests/files/bad-1-stream_flags-3.xz Binary files differindex f10b8d00..a9b1f983 100644 --- a/tests/files/bad-1-stream_flags-3.xz +++ b/tests/files/bad-1-stream_flags-3.xz diff --git a/tests/files/bad-1-vli-1.xz b/tests/files/bad-1-vli-1.xz Binary files differindex e2a08b9e..6514ab12 100644 --- a/tests/files/bad-1-vli-1.xz +++ b/tests/files/bad-1-vli-1.xz diff --git a/tests/files/bad-1-vli-2.xz b/tests/files/bad-1-vli-2.xz Binary files differindex 604e8186..c16941b4 100644 --- a/tests/files/bad-1-vli-2.xz +++ b/tests/files/bad-1-vli-2.xz diff --git a/tests/files/bad-2-compressed_data_padding.xz b/tests/files/bad-2-compressed_data_padding.xz Binary files differindex 1d0f58b7..382d0476 100644 --- a/tests/files/bad-2-compressed_data_padding.xz +++ b/tests/files/bad-2-compressed_data_padding.xz diff --git a/tests/files/bad-2-index-1.xz b/tests/files/bad-2-index-1.xz Binary files differindex 42efda0d..f51ed214 100644 --- a/tests/files/bad-2-index-1.xz +++ b/tests/files/bad-2-index-1.xz diff --git a/tests/files/bad-2-index-2.xz b/tests/files/bad-2-index-2.xz Binary files differindex ee5e89c2..d7d00ff3 100644 --- a/tests/files/bad-2-index-2.xz +++ b/tests/files/bad-2-index-2.xz diff --git a/tests/files/bad-2-index-3.xz b/tests/files/bad-2-index-3.xz Binary files differindex f51eaad8..62428b87 100644 --- a/tests/files/bad-2-index-3.xz +++ b/tests/files/bad-2-index-3.xz diff --git a/tests/files/bad-2-index-4.xz b/tests/files/bad-2-index-4.xz Binary files differindex 1b5b78e4..9cf2df65 100644 --- a/tests/files/bad-2-index-4.xz +++ b/tests/files/bad-2-index-4.xz diff --git a/tests/files/bad-2-index-5.xz b/tests/files/bad-2-index-5.xz Binary files differnew file mode 100644 index 00000000..0a792709 --- /dev/null +++ b/tests/files/bad-2-index-5.xz diff --git a/tests/files/good-1-3delta-lzma2.xz b/tests/files/good-1-3delta-lzma2.xz Binary files differindex 3edb4805..a0be1d00 100644 --- a/tests/files/good-1-3delta-lzma2.xz +++ b/tests/files/good-1-3delta-lzma2.xz diff --git a/tests/files/good-1-block_header-1.xz b/tests/files/good-1-block_header-1.xz Binary files differindex c0cc7200..fea5ad2f 100644 --- a/tests/files/good-1-block_header-1.xz +++ b/tests/files/good-1-block_header-1.xz diff --git a/tests/files/good-1-block_header-2.xz b/tests/files/good-1-block_header-2.xz Binary files differindex 5eb8575c..6b5dcb34 100644 --- a/tests/files/good-1-block_header-2.xz +++ b/tests/files/good-1-block_header-2.xz diff --git a/tests/files/good-1-block_header-3.xz b/tests/files/good-1-block_header-3.xz Binary files differindex 5e8dc78b..15653120 100644 --- a/tests/files/good-1-block_header-3.xz +++ b/tests/files/good-1-block_header-3.xz diff --git a/tests/files/good-1-check-crc32.xz b/tests/files/good-1-check-crc32.xz Binary files differindex 949a215d..6c89593d 100644 --- a/tests/files/good-1-check-crc32.xz +++ b/tests/files/good-1-check-crc32.xz diff --git a/tests/files/good-1-check-crc64.xz b/tests/files/good-1-check-crc64.xz Binary files differindex 6102c945..5a9915d2 100644 --- a/tests/files/good-1-check-crc64.xz +++ b/tests/files/good-1-check-crc64.xz diff --git a/tests/files/good-1-check-none.xz b/tests/files/good-1-check-none.xz Binary files differindex aab057d7..1e85faf3 100644 --- a/tests/files/good-1-check-none.xz +++ b/tests/files/good-1-check-none.xz diff --git a/tests/files/good-1-check-sha256.xz b/tests/files/good-1-check-sha256.xz Binary files differindex 141a4818..fdc556b6 100644 --- a/tests/files/good-1-check-sha256.xz +++ b/tests/files/good-1-check-sha256.xz diff --git a/tests/files/good-1-delta-lzma2.tiff.xz b/tests/files/good-1-delta-lzma2.tiff.xz Binary files differindex 744fb8d1..1f033bc5 100644 --- a/tests/files/good-1-delta-lzma2.tiff.xz +++ b/tests/files/good-1-delta-lzma2.tiff.xz diff --git a/tests/files/good-1-lzma2-1.xz b/tests/files/good-1-lzma2-1.xz Binary files differindex 96aff26e..d8d6489c 100644 --- a/tests/files/good-1-lzma2-1.xz +++ b/tests/files/good-1-lzma2-1.xz diff --git a/tests/files/good-1-lzma2-2.xz b/tests/files/good-1-lzma2-2.xz Binary files differindex 19631d48..7e8cdf1b 100644 --- a/tests/files/good-1-lzma2-2.xz +++ b/tests/files/good-1-lzma2-2.xz diff --git a/tests/files/good-1-lzma2-3.xz b/tests/files/good-1-lzma2-3.xz Binary files differindex 11c84ee6..c4c72be6 100644 --- a/tests/files/good-1-lzma2-3.xz +++ b/tests/files/good-1-lzma2-3.xz diff --git a/tests/files/good-1-sparc-lzma2.xz b/tests/files/good-1-sparc-lzma2.xz Binary files differindex 9111e39b..5c1c8600 100644 --- a/tests/files/good-1-sparc-lzma2.xz +++ b/tests/files/good-1-sparc-lzma2.xz diff --git a/tests/files/good-1-x86-lzma2.xz b/tests/files/good-1-x86-lzma2.xz Binary files differindex 4fbd0ee4..2c4a471a 100644 --- a/tests/files/good-1-x86-lzma2.xz +++ b/tests/files/good-1-x86-lzma2.xz diff --git a/tests/files/good-2-lzma2.xz b/tests/files/good-2-lzma2.xz Binary files differindex dd34b66a..bed5085c 100644 --- a/tests/files/good-2-lzma2.xz +++ b/tests/files/good-2-lzma2.xz diff --git a/tests/files/unsupported-block_header.xz b/tests/files/unsupported-block_header.xz Binary files differindex ee697e4d..38304429 100644 --- a/tests/files/unsupported-block_header.xz +++ b/tests/files/unsupported-block_header.xz diff --git a/tests/files/unsupported-check.xz b/tests/files/unsupported-check.xz Binary files differindex e2fe486b..c28355e7 100644 --- a/tests/files/unsupported-check.xz +++ b/tests/files/unsupported-check.xz diff --git a/tests/files/unsupported-filter_flags-1.xz b/tests/files/unsupported-filter_flags-1.xz Binary files differindex b4acab24..48b93731 100644 --- a/tests/files/unsupported-filter_flags-1.xz +++ b/tests/files/unsupported-filter_flags-1.xz diff --git a/tests/files/unsupported-filter_flags-2.xz b/tests/files/unsupported-filter_flags-2.xz Binary files differindex fa907dc2..c283359d 100644 --- a/tests/files/unsupported-filter_flags-2.xz +++ b/tests/files/unsupported-filter_flags-2.xz diff --git a/tests/files/unsupported-filter_flags-3.xz b/tests/files/unsupported-filter_flags-3.xz Binary files differindex 939ab5fe..26084984 100644 --- a/tests/files/unsupported-filter_flags-3.xz +++ b/tests/files/unsupported-filter_flags-3.xz diff --git a/tests/test_block_header.c b/tests/test_block_header.c index 4d398095..5c0f8b9a 100644 --- a/tests/test_block_header.c +++ b/tests/test_block_header.c @@ -89,7 +89,7 @@ code(void) { expect(lzma_block_header_encode(&known_options, buf) == LZMA_OK); - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; memcrap(filters, sizeof(filters)); memcrap(&decoded_options, sizeof(decoded_options)); @@ -108,7 +108,7 @@ code(void) != LZMA_VLI_UNKNOWN; ++i) expect(known_options.filters[i].id == filters[i].id); - for (size_t i = 0; i < LZMA_BLOCK_FILTERS_MAX; ++i) + for (size_t i = 0; i < LZMA_FILTERS_MAX; ++i) free(decoded_options.filters[i].options); } @@ -137,12 +137,18 @@ test1(void) known_options.check = 999; // Some invalid value, which gets ignored. expect(lzma_block_header_size(&known_options) == LZMA_OK); - known_options.compressed_size = 5; // Not a multiple of four. - expect(lzma_block_header_size(&known_options) == LZMA_PROG_ERROR); + known_options.compressed_size = 5; + expect(lzma_block_header_size(&known_options) == LZMA_OK); known_options.compressed_size = 0; // Cannot be zero. expect(lzma_block_header_size(&known_options) == LZMA_PROG_ERROR); + // LZMA_VLI_MAX is too big to keep the total size of the Block + // a valid VLI, but lzma_block_header_size() is not meant + // to validate it. (lzma_block_header_encode() must validate it.) + known_options.compressed_size = LZMA_VLI_MAX; + expect(lzma_block_header_size(&known_options) == LZMA_OK); + known_options.compressed_size = LZMA_VLI_UNKNOWN; known_options.uncompressed_size = 0; expect(lzma_block_header_size(&known_options) == LZMA_OK); @@ -192,7 +198,7 @@ test3(void) known_options.header_size += 4; expect(lzma_block_header_encode(&known_options, buf) == LZMA_OK); - lzma_filter filters[LZMA_BLOCK_FILTERS_MAX + 1]; + lzma_filter filters[LZMA_FILTERS_MAX + 1]; decoded_options.header_size = known_options.header_size; decoded_options.check = known_options.check; decoded_options.filters = filters; diff --git a/tests/test_index.c b/tests/test_index.c index 8a88dd31..d9d4e047 100644 --- a/tests/test_index.c +++ b/tests/test_index.c @@ -34,9 +34,9 @@ create_small(void) { lzma_index *i = lzma_index_init(NULL, NULL); expect(i != NULL); - expect(lzma_index_append(i, NULL, 400, 555) == LZMA_OK); - expect(lzma_index_append(i, NULL, 600, 777) == LZMA_OK); - expect(lzma_index_append(i, NULL, 800, 999) == LZMA_OK); + expect(lzma_index_append(i, NULL, 101, 555) == LZMA_OK); + expect(lzma_index_append(i, NULL, 602, 777) == LZMA_OK); + expect(lzma_index_append(i, NULL, 804, 999) == LZMA_OK); return i; } @@ -55,9 +55,9 @@ create_big(void) uint32_t n = 11; for (size_t j = 0; j < count; ++j) { n = 7019 * n + 7607; - const uint32_t t = (n * 3011) & ~UINT32_C(3); + const uint32_t t = n * 3011; expect(lzma_index_append(i, NULL, t, n) == LZMA_OK); - total_size += t; + total_size += (t + 3) & ~LZMA_VLI_C(3); uncompressed_size += n; } @@ -184,7 +184,7 @@ test_code(lzma_index *i) lzma_index_rewind(i); lzma_index_record r; while (!lzma_index_read(i, &r)) - expect(lzma_index_hash_append(h, r.total_size, + expect(lzma_index_hash_append(h, r.unpadded_size, r.uncompressed_size) == LZMA_OK); size_t pos = 0; while (pos < index_size - 1) @@ -302,7 +302,7 @@ test_locate(void) expect(!lzma_index_locate(i, &r, 0)); expect(r.total_size == 32); expect(r.uncompressed_size == 5); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 16); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 16); expect(r.uncompressed_offset == 0); // Still cannot find anything past the end. @@ -314,31 +314,31 @@ test_locate(void) expect(!lzma_index_locate(i, &r, 0)); expect(r.total_size == 32); expect(r.uncompressed_size == 5); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 16); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 16); expect(r.uncompressed_offset == 0); expect(!lzma_index_read(i, &r)); expect(r.total_size == 40); expect(r.uncompressed_size == 11); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 16 + 32); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 16 + 32); expect(r.uncompressed_offset == 5); expect(!lzma_index_locate(i, &r, 2)); expect(r.total_size == 32); expect(r.uncompressed_size == 5); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 16); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 16); expect(r.uncompressed_offset == 0); expect(!lzma_index_locate(i, &r, 5)); expect(r.total_size == 40); expect(r.uncompressed_size == 11); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 16 + 32); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 16 + 32); expect(r.uncompressed_offset == 5); expect(!lzma_index_locate(i, &r, 5 + 11 - 1)); expect(r.total_size == 40); expect(r.uncompressed_size == 11); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 16 + 32); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 16 + 32); expect(r.uncompressed_offset == 5); expect(lzma_index_locate(i, &r, 5 + 11)); @@ -357,27 +357,27 @@ test_locate(void) expect(!lzma_index_locate(i, &r, 0)); expect(r.total_size == 4 + 8); expect(r.uncompressed_size == 4); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE); expect(r.uncompressed_offset == 0); expect(!lzma_index_locate(i, &r, 3)); expect(r.total_size == 4 + 8); expect(r.uncompressed_size == 4); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE); expect(r.uncompressed_offset == 0); // Second Record expect(!lzma_index_locate(i, &r, 4)); expect(r.total_size == 2 * 4 + 8); expect(r.uncompressed_size == 2 * 4); - expect(r.stream_offset = LZMA_STREAM_HEADER_SIZE + 4 + 8); + expect(r.stream_offset == LZMA_STREAM_HEADER_SIZE + 4 + 8); expect(r.uncompressed_offset == 4); // Last Record expect(!lzma_index_locate(i, &r, lzma_index_uncompressed_size(i) - 1)); expect(r.total_size == 4 * 5555 + 8); expect(r.uncompressed_size == 4 * 5555); - expect(r.stream_offset = lzma_index_total_size(i) + expect(r.stream_offset == lzma_index_total_size(i) + LZMA_STREAM_HEADER_SIZE - 4 * 5555 - 8); expect(r.uncompressed_offset == lzma_index_uncompressed_size(i) - 4 * 5555); @@ -452,15 +452,7 @@ test_corrupt(void) lzma_stream strm = LZMA_STREAM_INIT; lzma_index *i = create_empty(); - expect(lzma_index_append(i, NULL, 7, 1) == LZMA_OK); - expect(lzma_index_encoder(&strm, i) == LZMA_OK); - succeed(coder_loop(&strm, NULL, 0, buf, 2, LZMA_PROG_ERROR, LZMA_RUN)); - lzma_index_end(i, NULL); - - i = create_empty(); - expect(lzma_index_append(i, NULL, 0, 1) == LZMA_OK); - expect(lzma_index_encoder(&strm, i) == LZMA_OK); - succeed(coder_loop(&strm, NULL, 0, buf, 2, LZMA_PROG_ERROR, LZMA_RUN)); + expect(lzma_index_append(i, NULL, 0, 1) == LZMA_PROG_ERROR); lzma_index_end(i, NULL); // Create a valid Index and corrupt it in different ways. |